aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/Makefile12
-rw-r--r--drivers/infiniband/hw/amso1100/Kbuild4
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c90
-rw-r--r--drivers/infiniband/hw/amso1100/c2.h13
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c28
-rw-r--r--drivers/infiniband/hw/amso1100/c2_alloc.c5
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cm.c18
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c10
-rw-r--r--drivers/infiniband/hw/amso1100/c2_intr.c12
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mm.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mq.h2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_pd.c5
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c97
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.h2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_qp.c39
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c27
-rw-r--r--drivers/infiniband/hw/amso1100/c2_vq.c6
-rw-r--r--drivers/infiniband/hw/amso1100/c2_wr.h4
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig2
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile6
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c92
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h26
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c79
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h53
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c117
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h31
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c469
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h8
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c30
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c22
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c198
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h12
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c235
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_user.h8
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig18
-rw-r--r--drivers/infiniband/hw/cxgb4/Makefile5
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c3933
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c1011
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c1362
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c191
-rw-r--r--drivers/infiniband/hw/cxgb4/id_table.c112
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1000
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c955
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c586
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c1808
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c453
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h680
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h854
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h80
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h42
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes_pSeries.h28
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c55
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c25
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c12
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c337
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c176
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mcast.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c901
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.h13
-rw-r--r--drivers/infiniband/hw/ehca/ehca_pd.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qes.h5
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c403
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c313
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c59
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c10
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c122
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h4
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.c16
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.h4
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c26
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig8
-rw-r--r--drivers/infiniband/hw/ipath/Makefile8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c73
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c44
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c171
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c78
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c52
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c1801
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c2558
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c20
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c243
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c41
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c36
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c10
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sd7220_img.c1082
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c30
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c36
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c25
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c2
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c145
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c688
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c478
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c226
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c4
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c1939
-rw-r--r--drivers/infiniband/hw/mlx4/main.c1980
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c1257
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h501
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c208
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1700
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c27
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c906
-rw-r--r--drivers/infiniband/hw/mlx4/user.h13
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig10
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c92
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c1187
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c98
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c139
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1543
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c162
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h574
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1250
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c3048
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c485
-rw-r--r--drivers/infiniband/hw/mlx5/user.h133
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c33
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c290
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.h93
-rw-r--r--drivers/infiniband/hw/mthca/mthca_config_reg.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c19
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c128
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c42
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c290
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mcg.c126
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c44
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c92
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c131
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c72
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c33
-rw-r--r--drivers/infiniband/hw/nes/Kconfig9
-rw-r--r--drivers/infiniband/hw/nes/Makefile2
-rw-r--r--drivers/infiniband/hw/nes/nes.c265
-rw-r--r--drivers/infiniband/hw/nes/nes.h83
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c3706
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h124
-rw-r--r--drivers/infiniband/hw/nes/nes_context.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c1790
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h199
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c1160
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.h97
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c806
-rw-r--r--drivers/infiniband/hw/nes/nes_user.h8
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c94
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c1804
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h52
-rw-r--r--drivers/infiniband/hw/ocrdma/Kconfig8
-rw-r--r--drivers/infiniband/hw/ocrdma/Makefile5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h521
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_abi.h134
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c175
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h42
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2737
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h138
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c585
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h1953
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c616
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h54
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c3054
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h99
-rw-r--r--drivers/infiniband/hw/qib/Kconfig15
-rw-r--r--drivers/infiniband/hw/qib/Makefile16
-rw-r--r--drivers/infiniband/hw/qib/qib.h1548
-rw-r--r--drivers/infiniband/hw/qib/qib_6120_regs.h977
-rw-r--r--drivers/infiniband/hw/qib/qib_7220.h149
-rw-r--r--drivers/infiniband/hw/qib/qib_7220_regs.h1496
-rw-r--r--drivers/infiniband/hw/qib/qib_7322_regs.h3163
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h812
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c540
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c283
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c911
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c169
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c817
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c456
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2410
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c621
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c3599
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c4659
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c8554
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c1857
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c241
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c387
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2533
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h431
-rw-r--r--drivers/infiniband/hw/qib/qib_mmap.c174
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c532
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c715
-rw-r--r--drivers/infiniband/hw/qib/qib_pio_copy.c64
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c1384
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c556
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h189
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2290
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c819
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c (renamed from drivers/infiniband/hw/ipath/ipath_sd7220.c)889
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c1039
-rw-r--r--drivers/infiniband/hw/qib/qib_srq.c380
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c842
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c500
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c571
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c536
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c590
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c157
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1465
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.h52
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2336
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h1173
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs_mcast.c368
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_ppc64.c (renamed from drivers/infiniband/hw/ipath/ipath_7220.h)49
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_x86_64.c171
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig10
-rw-r--r--drivers/infiniband/hw/usnic/Makefile15
-rw-r--r--drivers/infiniband/hw/usnic/usnic.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h73
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h27
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h68
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c154
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c350
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h113
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h118
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c682
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c761
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h117
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c341
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c768
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h72
-rw-r--r--drivers/infiniband/hw/usnic/usnic_log.h58
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c202
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.h51
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c604
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h80
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c254
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h73
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c467
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.h103
265 files changed, 112060 insertions, 12543 deletions
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
new file mode 100644
index 00000000000..e900b03531a
--- /dev/null
+++ b/drivers/infiniband/hw/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
+obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
+obj-$(CONFIG_INFINIBAND_QIB) += qib/
+obj-$(CONFIG_INFINIBAND_EHCA) += ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
+obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
+obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
+obj-$(CONFIG_INFINIBAND_NES) += nes/
+obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
+obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
index 06964c4af84..950dfabcd89 100644
--- a/drivers/infiniband/hw/amso1100/Kbuild
+++ b/drivers/infiniband/hw/amso1100/Kbuild
@@ -1,6 +1,4 @@
-ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG
obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 113f3c03c5b..00400c352c1 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -36,6 +36,7 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/inetdevice.h>
+#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/ethtool.h>
#include <linux/mii.h>
@@ -46,6 +47,8 @@
#include <linux/tcp.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/prefetch.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -76,7 +79,6 @@ static irqreturn_t c2_interrupt(int irq, void *dev_id);
static void c2_tx_timeout(struct net_device *netdev);
static int c2_change_mtu(struct net_device *netdev, int new_mtu);
static void c2_reset(struct c2_port *c2_port);
-static struct net_device_stats *c2_get_stats(struct net_device *netdev);
static struct pci_device_id c2_pci_table[] = {
{ PCI_DEVICE(0x18b8, 0xb001) },
@@ -87,11 +89,7 @@ MODULE_DEVICE_TABLE(pci, c2_pci_table);
static void c2_print_macaddr(struct net_device *netdev)
{
- pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
- "IRQ %u\n", netdev->name,
- netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
- netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
- netdev->irq);
+ pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq);
}
static void c2_set_rxbufsize(struct c2_port *c2_port)
@@ -349,7 +347,7 @@ static void c2_tx_clean(struct c2_port *c2_port)
elem->hw_desc + C2_TXP_ADDR);
__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE),
elem->hw_desc + C2_TXP_FLAGS);
- c2_port->netstats.tx_dropped++;
+ c2_port->netdev->stats.tx_dropped++;
break;
} else {
__raw_writew(0,
@@ -457,7 +455,7 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
elem->hw_desc + C2_RXP_FLAGS);
pr_debug("packet dropped\n");
- c2_port->netstats.rx_dropped++;
+ c2_port->netdev->stats.rx_dropped++;
}
static void c2_rx_interrupt(struct net_device *netdev)
@@ -531,9 +529,8 @@ static void c2_rx_interrupt(struct net_device *netdev)
netif_rx(skb);
- netdev->last_rx = jiffies;
- c2_port->netstats.rx_packets++;
- c2_port->netstats.rx_bytes += buflen;
+ netdev->stats.rx_packets++;
+ netdev->stats.rx_bytes += buflen;
}
/* Save where we left off */
@@ -797,19 +794,16 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
elem->hw_desc + C2_TXP_FLAGS);
- c2_port->netstats.tx_packets++;
- c2_port->netstats.tx_bytes += maplen;
+ netdev->stats.tx_packets++;
+ netdev->stats.tx_bytes += maplen;
/* Loop thru additional data fragments and queue them */
if (skb_shinfo(skb)->nr_frags) {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
- maplen = frag->size;
- mapaddr =
- pci_map_page(c2dev->pcidev, frag->page,
- frag->page_offset, maplen,
- PCI_DMA_TODEVICE);
-
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ maplen = skb_frag_size(frag);
+ mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag,
+ 0, maplen, DMA_TO_DEVICE);
elem = elem->next;
elem->skb = NULL;
elem->mapaddr = mapaddr;
@@ -823,8 +817,8 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
__raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY),
elem->hw_desc + C2_TXP_FLAGS);
- c2_port->netstats.tx_packets++;
- c2_port->netstats.tx_bytes += maplen;
+ netdev->stats.tx_packets++;
+ netdev->stats.tx_bytes += maplen;
}
}
@@ -845,13 +839,6 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
return NETDEV_TX_OK;
}
-static struct net_device_stats *c2_get_stats(struct net_device *netdev)
-{
- struct c2_port *c2_port = netdev_priv(netdev);
-
- return &c2_port->netstats;
-}
-
static void c2_tx_timeout(struct net_device *netdev)
{
struct c2_port *c2_port = netdev_priv(netdev);
@@ -880,6 +867,16 @@ static int c2_change_mtu(struct net_device *netdev, int new_mtu)
return ret;
}
+static const struct net_device_ops c2_netdev = {
+ .ndo_open = c2_up,
+ .ndo_stop = c2_down,
+ .ndo_start_xmit = c2_xmit_frame,
+ .ndo_tx_timeout = c2_tx_timeout,
+ .ndo_change_mtu = c2_change_mtu,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
/* Initialize network device */
static struct net_device *c2_devinit(struct c2_dev *c2dev,
void __iomem * mmio_addr)
@@ -894,12 +891,7 @@ static struct net_device *c2_devinit(struct c2_dev *c2dev,
SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
- netdev->open = c2_up;
- netdev->stop = c2_down;
- netdev->hard_start_xmit = c2_xmit_frame;
- netdev->get_stats = c2_get_stats;
- netdev->tx_timeout = c2_tx_timeout;
- netdev->change_mtu = c2_change_mtu;
+ netdev->netdev_ops = &c2_netdev;
netdev->watchdog_timeo = C2_TX_TIMEOUT;
netdev->irq = c2dev->pcidev->irq;
@@ -928,8 +920,7 @@ static struct net_device *c2_devinit(struct c2_dev *c2dev,
return netdev;
}
-static int __devinit c2_probe(struct pci_dev *pcidev,
- const struct pci_device_id *ent)
+static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
{
int ret = 0, i;
unsigned long reg0_start, reg0_flags, reg0_len;
@@ -992,13 +983,13 @@ static int __devinit c2_probe(struct pci_dev *pcidev,
}
if ((sizeof(dma_addr_t) > 4)) {
- ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+ ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
if (ret < 0) {
printk(KERN_ERR PFX "64b DMA configuration failed\n");
goto bail2;
}
} else {
- ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+ ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
if (ret < 0) {
printk(KERN_ERR PFX "32b DMA configuration failed\n");
goto bail2;
@@ -1091,6 +1082,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev,
/* Initialize network device */
if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+ ret = -ENOMEM;
iounmap(mmio_regs);
goto bail4;
}
@@ -1160,7 +1152,8 @@ static int __devinit c2_probe(struct pci_dev *pcidev,
goto bail10;
}
- if (c2_register_device(c2dev))
+ ret = c2_register_device(c2dev);
+ if (ret)
goto bail10;
return 0;
@@ -1199,7 +1192,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev,
return ret;
}
-static void __devexit c2_remove(struct pci_dev *pcidev)
+static void c2_remove(struct pci_dev *pcidev)
{
struct c2_dev *c2dev = pci_get_drvdata(pcidev);
struct net_device *netdev = c2dev->netdev;
@@ -1244,18 +1237,7 @@ static struct pci_driver c2_pci_driver = {
.name = DRV_NAME,
.id_table = c2_pci_table,
.probe = c2_probe,
- .remove = __devexit_p(c2_remove),
+ .remove = c2_remove,
};
-static int __init c2_init_module(void)
-{
- return pci_register_driver(&c2_pci_driver);
-}
-
-static void __exit c2_exit_module(void)
-{
- pci_unregister_driver(&c2_pci_driver);
-}
-
-module_init(c2_init_module);
-module_exit(c2_exit_module);
+module_pci_driver(c2_pci_driver);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
index d12a24a84fd..d619d735838 100644
--- a/drivers/infiniband/hw/amso1100/c2.h
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -250,7 +250,7 @@ struct c2_array {
struct sp_chunk {
struct sp_chunk *next;
dma_addr_t dma_addr;
- DECLARE_PCI_UNMAP_ADDR(mapping);
+ DEFINE_DMA_UNMAP_ADDR(mapping);
u16 head;
u16 shared_ptr[0];
};
@@ -265,7 +265,6 @@ struct c2_pd_table {
struct c2_qp_table {
struct idr idr;
spinlock_t lock;
- int last;
};
struct c2_element {
@@ -369,8 +368,6 @@ struct c2_port {
unsigned long mem_size;
u32 rx_buf_size;
-
- struct net_device_stats netstats;
};
/*
@@ -500,16 +497,16 @@ extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
struct ib_send_wr **bad_wr);
extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
struct ib_recv_wr **bad_wr);
-extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
-extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
+extern void c2_init_qp_table(struct c2_dev *c2dev);
+extern void c2_cleanup_qp_table(struct c2_dev *c2dev);
extern void c2_set_qp_state(struct c2_qp *, int);
extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
/* PDs */
extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
-extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
-extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
+extern int c2_init_pd_table(struct c2_dev *c2dev);
+extern void c2_cleanup_pd_table(struct c2_dev *c2dev);
/* CQs */
extern int c2_init_cq(struct c2_dev *c2dev, int entries,
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index 62af74295db..cedda25232b 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -141,7 +141,7 @@ static const char *to_qp_state_str(int state)
return "C2_QP_STATE_ERROR";
default:
return "<invalid QP state>";
- };
+ }
}
void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
@@ -155,9 +155,11 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
enum c2_event_id event_id;
unsigned long flags;
int status;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr;
/*
- * retreive the message
+ * retrieve the message
*/
wr = c2_mq_consume(mq);
if (!wr)
@@ -206,10 +208,10 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
case CCAE_ACTIVE_CONNECT_RESULTS:
res = &wr->ae.ae_active_connect_results;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.local_addr.sin_addr.s_addr = res->laddr;
- cm_event.remote_addr.sin_addr.s_addr = res->raddr;
- cm_event.local_addr.sin_port = res->lport;
- cm_event.remote_addr.sin_port = res->rport;
+ laddr->sin_addr.s_addr = res->laddr;
+ raddr->sin_addr.s_addr = res->raddr;
+ laddr->sin_port = res->lport;
+ raddr->sin_port = res->rport;
if (status == 0) {
cm_event.private_data_len =
be32_to_cpu(res->private_data_length);
@@ -281,13 +283,18 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
}
cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
- cm_event.local_addr.sin_addr.s_addr = req->laddr;
- cm_event.remote_addr.sin_addr.s_addr = req->raddr;
- cm_event.local_addr.sin_port = req->lport;
- cm_event.remote_addr.sin_port = req->rport;
+ laddr->sin_addr.s_addr = req->laddr;
+ raddr->sin_addr.s_addr = req->raddr;
+ laddr->sin_port = req->lport;
+ raddr->sin_port = req->rport;
cm_event.private_data_len =
be32_to_cpu(req->private_data_length);
cm_event.private_data = req->private_data;
+ /*
+ * Until ird/ord negotiation via MPAv2 support is added, send
+ * max supported values
+ */
+ cm_event.ird = cm_event.ord = 128;
if (cm_id->event_handler)
cm_id->event_handler(cm_id, &cm_event);
@@ -306,6 +313,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
if (cq->ibcq.event_handler)
cq->ibcq.event_handler(&ib_event,
cq->ibcq.cq_context);
+ break;
}
default:
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
index e9110163aef..78d247ec696 100644
--- a/drivers/infiniband/hw/amso1100/c2_alloc.c
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -32,7 +32,6 @@
*/
#include <linux/errno.h>
-#include <linux/slab.h>
#include <linux/bitmap.h>
#include "c2.h"
@@ -50,7 +49,7 @@ static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
return -ENOMEM;
new_head->dma_addr = dma_addr;
- pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
+ dma_unmap_addr_set(new_head, mapping, new_head->dma_addr);
new_head->next = NULL;
new_head->head = 0;
@@ -82,7 +81,7 @@ void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
while (root) {
next = root->next;
dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root,
- pci_unmap_addr(root, mapping));
+ dma_unmap_addr(root, mapping));
root = next;
}
}
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
index 75b93e9b881..23bfa94fbd4 100644
--- a/drivers/infiniband/hw/amso1100/c2_cm.c
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -31,6 +31,8 @@
* SOFTWARE.
*
*/
+#include <linux/slab.h>
+
#include "c2.h"
#include "c2_wr.h"
#include "c2_vq.h"
@@ -44,6 +46,10 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
struct c2_vq_req *vq_req;
int err;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+ if (cm_id->remote_addr.ss_family != AF_INET)
+ return -ENOSYS;
ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
if (!ibqp)
@@ -89,8 +95,8 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
wr->rnic_handle = c2dev->adapter_handle;
wr->qp_handle = qp->adapter_handle;
- wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
- wr->remote_port = cm_id->remote_addr.sin_port;
+ wr->remote_addr = raddr->sin_addr.s_addr;
+ wr->remote_port = raddr->sin_port;
/*
* Move any private data from the callers's buf into
@@ -133,6 +139,10 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
struct c2wr_ep_listen_create_rep *reply;
struct c2_vq_req *vq_req;
int err;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+
+ if (cm_id->local_addr.ss_family != AF_INET)
+ return -ENOSYS;
c2dev = to_c2dev(cm_id->device);
if (c2dev == NULL)
@@ -151,8 +161,8 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
wr.hdr.context = (u64) (unsigned long) vq_req;
wr.rnic_handle = c2dev->adapter_handle;
- wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
- wr.local_port = cm_id->local_addr.sin_port;
+ wr.local_addr = laddr->sin_addr.s_addr;
+ wr.local_port = laddr->sin_port;
wr.backlog = cpu_to_be32(backlog);
wr.user_context = (u64) (unsigned long) cm_id;
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index bb17cce3cb5..49e0e8533f7 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -35,6 +35,8 @@
* SOFTWARE.
*
*/
+#include <linux/gfp.h>
+
#include "c2.h"
#include "c2_vq.h"
#include "c2_status.h"
@@ -133,7 +135,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
struct c2_qp *qp;
int is_recv = 0;
- ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+ ce = c2_mq_consume(&cq->mq);
if (!ce) {
return -EAGAIN;
}
@@ -146,7 +148,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
while ((qp =
(struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
c2_mq_free(&cq->mq);
- ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
+ ce = c2_mq_consume(&cq->mq);
if (!ce)
return -EAGAIN;
}
@@ -255,7 +257,7 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
{
dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size,
- mq->msg_pool.host, pci_unmap_addr(mq, mapping));
+ mq->msg_pool.host, dma_unmap_addr(mq, mapping));
}
static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
@@ -276,7 +278,7 @@ static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
NULL, /* peer (currently unknown) */
C2_MQ_HOST_TARGET);
- pci_unmap_addr_set(mq, mapping, mq->host_dma);
+ dma_unmap_addr_set(mq, mapping, mq->host_dma);
return 0;
}
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
index 3b5095470cb..3a17d9b36db 100644
--- a/drivers/infiniband/hw/amso1100/c2_intr.c
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -62,8 +62,8 @@ void c2_rnic_interrupt(struct c2_dev *c2dev)
static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
{
if (c2dev->qptr_array[mq_index] == NULL) {
- pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
- mq_index);
+ pr_debug("handle_mq: stray activity for mq_index=%d\n",
+ mq_index);
return;
}
@@ -169,7 +169,8 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
* We should never get here, as the adapter should
* never send us a reply that we're not expecting.
*/
- vq_repbuf_free(c2dev, host_msg);
+ if (reply_msg != NULL)
+ vq_repbuf_free(c2dev, host_msg);
pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
return;
}
@@ -183,6 +184,11 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
case IW_CM_EVENT_ESTABLISHED:
c2_set_qp_state(req->qp,
C2_QP_STATE_RTS);
+ /*
+ * Until ird/ord negotiation via MPAv2 support is added, send
+ * max supported values
+ */
+ cm_event.ird = cm_event.ord = 128;
case IW_CM_EVENT_CLOSE:
/*
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
index b506fe22b4d..119c4f3d979 100644
--- a/drivers/infiniband/hw/amso1100/c2_mm.c
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -30,6 +30,8 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include <linux/slab.h>
+
#include "c2.h"
#include "c2_vq.h"
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
index acede007b94..fc1b9a7cec4 100644
--- a/drivers/infiniband/hw/amso1100/c2_mq.h
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -71,7 +71,7 @@ struct c2_mq {
u8 __iomem *adapter;
} msg_pool;
dma_addr_t host_dma;
- DECLARE_PCI_UNMAP_ADDR(mapping);
+ DEFINE_DMA_UNMAP_ADDR(mapping);
u16 hint_count;
u16 priv;
struct c2_mq_shared __iomem *peer;
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
index 00c709926c8..f3e81dc357b 100644
--- a/drivers/infiniband/hw/amso1100/c2_pd.c
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -34,6 +34,7 @@
*/
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/errno.h>
#include "c2.h"
@@ -69,7 +70,7 @@ void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
spin_unlock(&c2dev->pd_table.lock);
}
-int __devinit c2_init_pd_table(struct c2_dev *c2dev)
+int c2_init_pd_table(struct c2_dev *c2dev)
{
c2dev->pd_table.last = 0;
@@ -83,7 +84,7 @@ int __devinit c2_init_pd_table(struct c2_dev *c2dev)
return 0;
}
-void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
+void c2_cleanup_pd_table(struct c2_dev *c2dev)
{
kfree(c2dev->pd_table.table);
}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 2acf9b62cf9..8af33cf1fc4 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -50,6 +50,7 @@
#include <linux/dma-mapping.h>
#include <linux/if_arp.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -93,19 +94,11 @@ static int c2_query_port(struct ib_device *ibdev,
props->pkey_tbl_len = 1;
props->qkey_viol_cntr = 0;
props->active_width = 1;
- props->active_speed = 1;
+ props->active_speed = IB_SPEED_SDR;
return 0;
}
-static int c2_modify_port(struct ib_device *ibdev,
- u8 port, int port_modify_mask,
- struct ib_port_modify *props)
-{
- pr_debug("%s:%u\n", __func__, __LINE__);
- return 0;
-}
-
static int c2_query_pkey(struct ib_device *ibdev,
u8 port, u16 index, u16 * pkey)
{
@@ -272,7 +265,6 @@ static struct ib_qp *c2_create_qp(struct ib_pd *pd,
pr_debug("%s: Invalid QP type: %d\n", __func__,
init_attr->qp_type);
return ERR_PTR(-EINVAL);
- break;
}
if (err) {
@@ -439,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 *pages;
u64 kva = 0;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct c2_pd *c2pd = to_c2pd(pd);
struct c2_mr *c2mr;
@@ -460,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(c2mr->umem->page_size) - 1;
-
- n = 0;
- list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
- n += chunk->nents;
+ n = c2mr->umem->nmap;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
if (!pages) {
@@ -472,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
i = 0;
- list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] =
- sg_dma_address(&chunk->page_list[j]) +
- (c2mr->umem->page_size * k);
- }
+ for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] =
+ sg_dma_address(sg) +
+ (c2mr->umem->page_size * k);
}
}
@@ -654,7 +641,7 @@ static int c2_service_destroy(struct iw_cm_id *cm_id)
static int c2_pseudo_up(struct net_device *netdev)
{
struct in_device *ind;
- struct c2_dev *c2dev = netdev->priv;
+ struct c2_dev *c2dev = netdev->ml_priv;
ind = in_dev_get(netdev);
if (!ind)
@@ -679,7 +666,7 @@ static int c2_pseudo_up(struct net_device *netdev)
static int c2_pseudo_down(struct net_device *netdev)
{
struct in_device *ind;
- struct c2_dev *c2dev = netdev->priv;
+ struct c2_dev *c2dev = netdev->ml_priv;
ind = in_dev_get(netdev);
if (!ind)
@@ -709,26 +696,27 @@ static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
{
- int ret = 0;
-
if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
return -EINVAL;
netdev->mtu = new_mtu;
/* TODO: Tell rnic about new rmda interface mtu */
- return ret;
+ return 0;
}
+static const struct net_device_ops c2_pseudo_netdev_ops = {
+ .ndo_open = c2_pseudo_up,
+ .ndo_stop = c2_pseudo_down,
+ .ndo_start_xmit = c2_pseudo_xmit_frame,
+ .ndo_change_mtu = c2_pseudo_change_mtu,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
static void setup(struct net_device *netdev)
{
- netdev->open = c2_pseudo_up;
- netdev->stop = c2_pseudo_down;
- netdev->hard_start_xmit = c2_pseudo_xmit_frame;
- netdev->get_stats = NULL;
- netdev->tx_timeout = NULL;
- netdev->set_mac_address = NULL;
- netdev->change_mtu = c2_pseudo_change_mtu;
+ netdev->netdev_ops = &c2_pseudo_netdev_ops;
+
netdev->watchdog_timeo = 0;
netdev->type = ARPHRD_ETHER;
netdev->mtu = 1500;
@@ -736,7 +724,6 @@ static void setup(struct net_device *netdev)
netdev->addr_len = ETH_ALEN;
netdev->tx_queue_len = 0;
netdev->flags |= IFF_NOARP;
- return;
}
static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
@@ -747,24 +734,21 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
/* change ethxxx to iwxxx */
strcpy(name, "iw");
strcat(name, &c2dev->netdev->name[3]);
- netdev = alloc_netdev(sizeof(*netdev), name, setup);
+ netdev = alloc_netdev(0, name, setup);
if (!netdev) {
printk(KERN_ERR PFX "%s - etherdev alloc failed",
__func__);
return NULL;
}
- netdev->priv = c2dev;
+ netdev->ml_priv = c2dev;
SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
/* Print out the MAC address */
- pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
- netdev->name,
- netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
- netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+ pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
#if 0
/* Disable network packets */
@@ -781,11 +765,11 @@ int c2_register_device(struct c2_dev *dev)
/* Register pseudo network device */
dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
if (!dev->pseudo_netdev)
- goto out3;
+ goto out;
ret = register_netdev(dev->pseudo_netdev);
if (ret)
- goto out2;
+ goto out_free_netdev;
pr_debug("%s:%u\n", __func__, __LINE__);
strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
@@ -817,7 +801,6 @@ int c2_register_device(struct c2_dev *dev)
dev->ibdev.dma_device = &dev->pcidev->dev;
dev->ibdev.query_device = c2_query_device;
dev->ibdev.query_port = c2_query_port;
- dev->ibdev.modify_port = c2_modify_port;
dev->ibdev.query_pkey = c2_query_pkey;
dev->ibdev.query_gid = c2_query_gid;
dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
@@ -852,6 +835,10 @@ int c2_register_device(struct c2_dev *dev)
dev->ibdev.post_recv = c2_post_receive;
dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+ if (dev->ibdev.iwcm == NULL) {
+ ret = -ENOMEM;
+ goto out_unregister_netdev;
+ }
dev->ibdev.iwcm->add_ref = c2_add_ref;
dev->ibdev.iwcm->rem_ref = c2_rem_ref;
dev->ibdev.iwcm->get_qp = c2_get_qp;
@@ -861,25 +848,27 @@ int c2_register_device(struct c2_dev *dev)
dev->ibdev.iwcm->create_listen = c2_service_create;
dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
- ret = ib_register_device(&dev->ibdev);
+ ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
- goto out1;
+ goto out_free_iwcm;
for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
ret = device_create_file(&dev->ibdev.dev,
c2_dev_attributes[i]);
if (ret)
- goto out0;
+ goto out_unregister_ibdev;
}
- goto out3;
+ goto out;
-out0:
+out_unregister_ibdev:
ib_unregister_device(&dev->ibdev);
-out1:
+out_free_iwcm:
+ kfree(dev->ibdev.iwcm);
+out_unregister_netdev:
unregister_netdev(dev->pseudo_netdev);
-out2:
+out_free_netdev:
free_netdev(dev->pseudo_netdev);
-out3:
+out:
pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
return ret;
}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
index 1076df2ee96..bf189987711 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -50,7 +50,7 @@
struct c2_buf_list {
void *buf;
- DECLARE_PCI_UNMAP_ADDR(mapping)
+ DEFINE_DMA_UNMAP_ADDR(mapping);
};
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index a6d89440ad2..86708dee58b 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -36,6 +36,7 @@
*/
#include <linux/delay.h>
+#include <linux/gfp.h>
#include "c2.h"
#include "c2_vq.h"
@@ -381,14 +382,16 @@ static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
{
int ret;
- do {
- spin_lock_irq(&c2dev->qp_table.lock);
- ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
- c2dev->qp_table.last++, &qp->qpn);
- spin_unlock_irq(&c2dev->qp_table.lock);
- } while ((ret == -EAGAIN) &&
- idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
- return ret;
+ idr_preload(GFP_KERNEL);
+ spin_lock_irq(&c2dev->qp_table.lock);
+
+ ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT);
+ if (ret >= 0)
+ qp->qpn = ret;
+
+ spin_unlock_irq(&c2dev->qp_table.lock);
+ idr_preload_end();
+ return ret < 0 ? ret : 0;
}
static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
@@ -611,7 +614,7 @@ void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
c2_unlock_cqs(send_cq, recv_cq);
/*
- * Destory qp in the rnic...
+ * Destroy qp in the rnic...
*/
destroy_qp(c2dev, qp);
@@ -798,8 +801,10 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
u8 actual_sge_count;
u32 msg_size;
- if (qp->state > IB_QPS_RTS)
- return -EINVAL;
+ if (qp->state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
while (ib_wr) {
@@ -930,6 +935,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
ib_wr = ib_wr->next;
}
+out:
if (err)
*bad_wr = ib_wr;
return err;
@@ -944,8 +950,10 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
unsigned long lock_flags;
int err = 0;
- if (qp->state > IB_QPS_RTS)
- return -EINVAL;
+ if (qp->state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
/*
* Try and post each work request
@@ -998,18 +1006,19 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
ib_wr = ib_wr->next;
}
+out:
if (err)
*bad_wr = ib_wr;
return err;
}
-void __devinit c2_init_qp_table(struct c2_dev *c2dev)
+void c2_init_qp_table(struct c2_dev *c2dev)
{
spin_lock_init(&c2dev->qp_table.lock);
idr_init(&c2dev->qp_table.idr);
}
-void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
+void c2_cleanup_qp_table(struct c2_dev *c2dev)
{
idr_destroy(&c2dev->qp_table.idr);
}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index dd05c483564..d2a6d961344 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -51,6 +51,7 @@
#include <linux/mm.h>
#include <linux/inet.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <linux/route.h>
@@ -438,10 +439,10 @@ static int c2_rnic_close(struct c2_dev *c2dev)
/*
* Called by c2_probe to initialize the RNIC. This principally
- * involves initalizing the various limits and resouce pools that
+ * involves initializing the various limits and resource pools that
* comprise the RNIC instance.
*/
-int __devinit c2_rnic_init(struct c2_dev *c2dev)
+int c2_rnic_init(struct c2_dev *c2dev)
{
int err;
u32 qsize, msgsize;
@@ -458,13 +459,12 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
IB_DEVICE_MEM_WINDOW);
/* Allocate the qptr_array */
- c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
+ c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *));
if (!c2dev->qptr_array) {
return -ENOMEM;
}
- /* Inialize the qptr_array */
- memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
+ /* Initialize the qptr_array */
c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
c2dev->qptr_array[2] = (void *) &c2dev->aeq;
@@ -523,7 +523,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
err = -ENOMEM;
goto bail1;
}
- pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
+ dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages,
(unsigned long long) c2dev->rep_vq.host_dma);
c2_mq_rep_init(&c2dev->rep_vq,
@@ -544,7 +544,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
err = -ENOMEM;
goto bail2;
}
- pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
+ dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages,
(unsigned long long) c2dev->aeq.host_dma);
c2_mq_rep_init(&c2dev->aeq,
@@ -576,7 +576,8 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
goto bail4;
/* Initialize cached the adapter limits */
- if (c2_rnic_query(c2dev, &c2dev->props))
+ err = c2_rnic_query(c2dev, &c2dev->props);
+ if (err)
goto bail5;
/* Initialize the PD pool */
@@ -595,11 +596,11 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
bail3:
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->aeq.q_size * c2dev->aeq.msg_size,
- q2_pages, pci_unmap_addr(&c2dev->aeq, mapping));
+ q2_pages, dma_unmap_addr(&c2dev->aeq, mapping));
bail2:
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
- q1_pages, pci_unmap_addr(&c2dev->rep_vq, mapping));
+ q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping));
bail1:
c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
bail0:
@@ -611,7 +612,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev)
/*
* Called by c2_remove to cleanup the RNIC resources.
*/
-void __devexit c2_rnic_term(struct c2_dev *c2dev)
+void c2_rnic_term(struct c2_dev *c2dev)
{
/* Close the open adapter instance */
@@ -636,13 +637,13 @@ void __devexit c2_rnic_term(struct c2_dev *c2dev)
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->aeq.q_size * c2dev->aeq.msg_size,
c2dev->aeq.msg_pool.host,
- pci_unmap_addr(&c2dev->aeq, mapping));
+ dma_unmap_addr(&c2dev->aeq, mapping));
/* Free the verbs reply queue */
dma_free_coherent(&c2dev->pcidev->dev,
c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
c2dev->rep_vq.msg_pool.host,
- pci_unmap_addr(&c2dev->rep_vq, mapping));
+ dma_unmap_addr(&c2dev->rep_vq, mapping));
/* Free the MQ shared pointer pool */
c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
index 9ce7819b7b2..2ec716fb2ed 100644
--- a/drivers/infiniband/hw/amso1100/c2_vq.c
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -107,7 +107,7 @@ struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
if (r) {
init_waitqueue_head(&r->wait_object);
- r->reply_msg = (u64) NULL;
+ r->reply_msg = 0;
r->event = 0;
r->cm_id = NULL;
r->qp = NULL;
@@ -123,7 +123,7 @@ struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
*/
void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
{
- r->reply_msg = (u64) NULL;
+ r->reply_msg = 0;
if (atomic_dec_and_test(&r->refcnt)) {
kfree(r);
}
@@ -151,7 +151,7 @@ void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
{
if (atomic_dec_and_test(&r->refcnt)) {
- if (r->reply_msg != (u64) NULL)
+ if (r->reply_msg != 0)
vq_repbuf_free(c2dev,
(void *) (unsigned long) r->reply_msg);
kfree(r);
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
index c65fbdd6e46..8d4b4ca463c 100644
--- a/drivers/infiniband/hw/amso1100/c2_wr.h
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -131,7 +131,7 @@ enum c2wr_ids {
* All the preceding IDs are fixed, and must not change.
* You can add new IDs, but must not remove or reorder
* any IDs. If you do, YOU will ruin any hope of
- * compatability between versions.
+ * compatibility between versions.
*/
CCWR_LAST,
@@ -242,7 +242,7 @@ enum c2_acf {
/*
* to fix bug 1815 we define the max size allowable of the
* terminate message (per the IETF spec).Refer to the IETF
- * protocal specification, section 12.1.6, page 64)
+ * protocol specification, section 12.1.6, page 64)
* The message is prefixed by 20 types of DDP info.
*
* Then the message has 6 bytes for the terminate control
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
index 2acec3fadf6..2b6352b8548 100644
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -10,7 +10,7 @@ config INFINIBAND_CXGB3
our website at <http://www.chelsio.com>.
For customer support, please visit our customer support page at
- <http://www.chelsio.com/support.htm>.
+ <http://www.chelsio.com/support.html>.
Please send feedback to <linux-bugs@chelsio.com>.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
index 7e7b5a66f04..2761364185a 100644
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -1,10 +1,8 @@
-EXTRA_CFLAGS += -Idrivers/net/cxgb3
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3
obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
-ifdef CONFIG_INFINIBAND_CXGB3_DEBUG
-EXTRA_CFLAGS += -DDEBUG
-endif
+ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index a8d24d53f30..8bca6b4ec9a 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -31,6 +31,7 @@
*/
#ifdef DEBUG
#include <linux/types.h>
+#include <linux/slab.h>
#include "common.h"
#include "cxgb3_ioctl.h"
#include "cxio_hal.h"
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index f6d5747153a..de1c61b417d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -37,6 +37,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
+#include <linux/slab.h>
#include <net/net_namespace.h>
#include "cxio_resource.h"
@@ -109,7 +110,6 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
udelay(1);
if (i++ > 1000000) {
- BUG_ON(1);
printk(KERN_ERR "%s: stalled rnic\n",
rdev_p->dev_name);
return -EIO;
@@ -152,29 +152,30 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
sge_cmd = qpid << 8 | 3;
wqe->sge_cmd = cpu_to_be64(sge_cmd);
skb->priority = CPL_PRIORITY_CONTROL;
- return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+ return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
}
-int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
{
struct rdma_cq_setup setup;
int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
+ size += 1; /* one extra page for storing cq-in-err state */
cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
if (!cq->cqid)
return -ENOMEM;
- cq->sw_queue = kzalloc(size, GFP_KERNEL);
- if (!cq->sw_queue)
- return -ENOMEM;
- cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (cq->size_log2)) *
- sizeof(struct t3_cqe),
+ if (kernel) {
+ cq->sw_queue = kzalloc(size, GFP_KERNEL);
+ if (!cq->sw_queue)
+ return -ENOMEM;
+ }
+ cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
&(cq->dma_addr), GFP_KERNEL);
if (!cq->queue) {
kfree(cq->sw_queue);
return -ENOMEM;
}
- pci_unmap_addr_set(cq, mapping, cq->dma_addr);
+ dma_unmap_addr_set(cq, mapping, cq->dma_addr);
memset(cq->queue, 0, size);
setup.id = cq->cqid;
setup.base_addr = (u64) (cq->dma_addr);
@@ -188,6 +189,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
}
+#ifdef notyet
int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
{
struct rdma_cq_setup setup;
@@ -199,6 +201,7 @@ int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
setup.ovfl_mode = 1;
return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
}
+#endif
static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
{
@@ -297,7 +300,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
goto err4;
memset(wq->queue, 0, depth * sizeof(union t3_wr));
- pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+ dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
if (!kernel_domain)
wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
@@ -325,7 +328,7 @@ int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
(1UL << (cq->size_log2))
* sizeof(struct t3_cqe), cq->queue,
- pci_unmap_addr(cq, mapping));
+ dma_unmap_addr(cq, mapping));
cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
return err;
}
@@ -336,7 +339,7 @@ int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
(1UL << (wq->size_log2))
* sizeof(union t3_wr), wq->queue,
- pci_unmap_addr(wq, mapping));
+ dma_unmap_addr(wq, mapping));
kfree(wq->sq);
cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
kfree(wq->rq);
@@ -410,6 +413,7 @@ int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
ptr = wq->sq_rptr + count;
sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
while (ptr != wq->sq_wptr) {
+ sqp->signaled = 0;
insert_sq_cqe(wq, cq, sqp);
ptr++;
sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
@@ -450,7 +454,7 @@ static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
return 0;
- if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
+ if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
return 0;
@@ -536,7 +540,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
err = -ENOMEM;
goto err;
}
- pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
+ dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
rdev_p->ctrl_qp.dma_addr);
rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
memset(rdev_p->ctrl_qp.workq, 0,
@@ -571,7 +575,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
(unsigned long long) rdev_p->ctrl_qp.dma_addr,
rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
skb->priority = CPL_PRIORITY_CONTROL;
- return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+ return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
err:
kfree_skb(skb);
return err;
@@ -582,13 +586,13 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
(1UL << T3_CTRL_QP_SIZE_LOG2)
* sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
- pci_unmap_addr(&rdev_p->ctrl_qp, mapping));
+ dma_unmap_addr(&rdev_p->ctrl_qp, mapping));
return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
}
/* write len bytes of data into addr (32B aligned address)
* If data is NULL, clear len byte of memory to zero.
- * caller aquires the ctrl_qp lock before the call
+ * caller acquires the ctrl_qp lock before the call
*/
static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
u32 len, void *data)
@@ -701,6 +705,9 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
u32 stag_idx;
u32 wptr;
+ if (cxio_fatal_error(rdev_p))
+ return -EIO;
+
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -725,17 +732,15 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
BUG_ON(page_size >= 28);
tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
- F_TPT_MW_BIND_ENABLE |
- V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
- V_TPT_PAGE_SIZE(page_size));
- tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
+ ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
+ V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
+ V_TPT_PAGE_SIZE(page_size));
+ tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = cpu_to_be32(len);
tpt.va_hi = cpu_to_be32((u32) (to >> 32));
tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
- tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
+ tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
@@ -848,14 +853,16 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
wqe->qpcaps = attr->qpcaps;
wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
wqe->rqe_count = cpu_to_be16(attr->rqe_count);
- wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
+ wqe->flags_rtr_type = cpu_to_be16(attr->flags |
+ V_RTR_TYPE(attr->rtr_type) |
+ V_CHAN(attr->chan));
wqe->ord = cpu_to_be32(attr->ord);
wqe->ird = cpu_to_be32(attr->ird);
wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
wqe->irs = cpu_to_be32(attr->irs);
skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
- return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+ return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
}
void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
@@ -938,6 +945,23 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p)
if (!rdev_p->t3cdev_p)
rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
rdev_p->t3cdev_p->ulp = (void *) rdev_p;
+
+ err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
+ &(rdev_p->fw_info));
+ if (err) {
+ printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
+ __func__, rdev_p->t3cdev_p, err);
+ goto err1;
+ }
+ if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
+ printk(KERN_ERR MOD "fatal firmware version mismatch: "
+ "need version %u but adapter has version %u\n",
+ CXIO_FW_MAJ,
+ G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
+ err = -EINVAL;
+ goto err1;
+ }
+
err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
&(rdev_p->rnic_info));
if (err) {
@@ -1011,6 +1035,7 @@ err3:
err2:
cxio_hal_destroy_ctrl_qp(rdev_p);
err1:
+ rdev_p->t3cdev_p->ulp = NULL;
list_del(&rdev_p->entry);
return err;
}
@@ -1021,9 +1046,9 @@ void cxio_rdev_close(struct cxio_rdev *rdev_p)
cxio_hal_pblpool_destroy(rdev_p);
cxio_hal_rqtpool_destroy(rdev_p);
list_del(&rdev_p->entry);
- rdev_p->t3cdev_p->ulp = NULL;
cxio_hal_destroy_ctrl_qp(rdev_p);
cxio_hal_destroy_resource(rdev_p->rscp);
+ rdev_p->t3cdev_p->ulp = NULL;
}
}
@@ -1204,11 +1229,12 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
}
/* incoming SEND with no receive posted failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
+ if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
ret = -1;
goto skip_cqe;
}
+ BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
goto proc_cqe;
}
@@ -1223,6 +1249,13 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
* then we complete this with TPT_ERR_MSN and mark the wq in
* error.
*/
+
+ if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
+ wq->error = 1;
+ ret = -1;
+ goto skip_cqe;
+ }
+
if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
wq->error = 1;
hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
@@ -1277,6 +1310,7 @@ proc_cqe:
cxio_hal_pblpool_free(wq->rdev,
wq->rq[Q_PTR2IDX(wq->rq_rptr,
wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
+ BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
wq->rq_rptr++;
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 656fe47bc84..78fbe9ffe7f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -34,6 +34,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/kfifo.h>
#include "t3_cpl.h"
#include "t3cdev.h"
@@ -52,7 +53,7 @@
#define T3_MAX_PBL_SIZE 256
#define T3_MAX_RQ_SIZE 1024
#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
-#define T3_MAX_CQ_DEPTH 8192
+#define T3_MAX_CQ_DEPTH 65536
#define T3_MAX_NUM_STAG (1<<15)
#define T3_MAX_MR_SIZE 0x100000000ULL
#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
@@ -61,6 +62,8 @@
#define T3_MAX_DEV_NAME_LEN 32
+#define CXIO_FW_MAJ 7
+
struct cxio_hal_ctrl_qp {
u32 wptr;
u32 rptr;
@@ -68,18 +71,18 @@ struct cxio_hal_ctrl_qp {
wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
union t3_wr *workq; /* the work request queue */
dma_addr_t dma_addr; /* pci bus address of the workq */
- DECLARE_PCI_UNMAP_ADDR(mapping)
+ DEFINE_DMA_UNMAP_ADDR(mapping);
void __iomem *doorbell;
};
struct cxio_hal_resource {
- struct kfifo *tpt_fifo;
+ struct kfifo tpt_fifo;
spinlock_t tpt_fifo_lock;
- struct kfifo *qpid_fifo;
+ struct kfifo qpid_fifo;
spinlock_t qpid_fifo_lock;
- struct kfifo *cqid_fifo;
+ struct kfifo cqid_fifo;
spinlock_t cqid_fifo_lock;
- struct kfifo *pdid_fifo;
+ struct kfifo pdid_fifo;
spinlock_t pdid_fifo_lock;
};
@@ -108,8 +111,16 @@ struct cxio_rdev {
struct gen_pool *pbl_pool;
struct gen_pool *rqt_pool;
struct list_head entry;
+ struct ch_embedded_info fw_info;
+ u32 flags;
+#define CXIO_ERROR_FATAL 1
};
+static inline int cxio_fatal_error(struct cxio_rdev *rdev_p)
+{
+ return rdev_p->flags & CXIO_ERROR_FATAL;
+}
+
static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
{
return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
@@ -146,7 +157,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev);
void cxio_rdev_close(struct cxio_rdev *rdev);
int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
@@ -183,6 +194,7 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
void cxio_flush_hw_cq(struct t3_cq *cq);
int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
u8 *cqe_flushed, u64 *cookie, u32 *credit);
+int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
#define MOD "iw_cxgb3: "
#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index bd233c08765..c40088ecf9f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -39,12 +39,12 @@
#include "cxio_resource.h"
#include "cxio_hal.h"
-static struct kfifo *rhdl_fifo;
+static struct kfifo rhdl_fifo;
static spinlock_t rhdl_fifo_lock;
#define RANDOM_SIZE 16
-static int __cxio_init_resource_fifo(struct kfifo **fifo,
+static int __cxio_init_resource_fifo(struct kfifo *fifo,
spinlock_t *fifo_lock,
u32 nr, u32 skip_low,
u32 skip_high,
@@ -55,50 +55,51 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo,
u32 rarray[16];
spin_lock_init(fifo_lock);
- *fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
- if (IS_ERR(*fifo))
+ if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
return -ENOMEM;
for (i = 0; i < skip_low + skip_high; i++)
- __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
+ kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
if (random) {
j = 0;
- random_bytes = random32();
+ random_bytes = prandom_u32();
for (i = 0; i < RANDOM_SIZE; i++)
rarray[i] = i + skip_low;
for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
if (j >= RANDOM_SIZE) {
j = 0;
- random_bytes = random32();
+ random_bytes = prandom_u32();
}
idx = (random_bytes >> (j * 2)) & 0xF;
- __kfifo_put(*fifo,
+ kfifo_in(fifo,
(unsigned char *) &rarray[idx],
sizeof(u32));
rarray[idx] = i;
j++;
}
for (i = 0; i < RANDOM_SIZE; i++)
- __kfifo_put(*fifo,
+ kfifo_in(fifo,
(unsigned char *) &rarray[i],
sizeof(u32));
} else
for (i = skip_low; i < nr - skip_high; i++)
- __kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
+ kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
for (i = 0; i < skip_low + skip_high; i++)
- kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
+ if (kfifo_out_locked(fifo, (unsigned char *) &entry,
+ sizeof(u32), fifo_lock) != sizeof(u32))
+ break;
return 0;
}
-static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
+static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
u32 nr, u32 skip_low, u32 skip_high)
{
return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
skip_high, 0));
}
-static int cxio_init_resource_fifo_random(struct kfifo **fifo,
+static int cxio_init_resource_fifo_random(struct kfifo *fifo,
spinlock_t * fifo_lock,
u32 nr, u32 skip_low, u32 skip_high)
{
@@ -113,15 +114,13 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
- rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
- GFP_KERNEL,
- &rdev_p->rscp->qpid_fifo_lock);
- if (IS_ERR(rdev_p->rscp->qpid_fifo))
+ if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
+ GFP_KERNEL))
return -ENOMEM;
for (i = 16; i < T3_MAX_NUM_QP; i++)
if (!(i & rdev_p->qpmask))
- __kfifo_put(rdev_p->rscp->qpid_fifo,
+ kfifo_in(&rdev_p->rscp->qpid_fifo,
(unsigned char *) &i, sizeof(u32));
return 0;
}
@@ -134,7 +133,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
void cxio_hal_destroy_rhdl_resource(void)
{
- kfifo_free(rhdl_fifo);
+ kfifo_free(&rhdl_fifo);
}
/* nr_* must be power of 2 */
@@ -167,11 +166,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
goto pdid_err;
return 0;
pdid_err:
- kfifo_free(rscp->cqid_fifo);
+ kfifo_free(&rscp->cqid_fifo);
cqid_err:
- kfifo_free(rscp->qpid_fifo);
+ kfifo_free(&rscp->qpid_fifo);
qpid_err:
- kfifo_free(rscp->tpt_fifo);
+ kfifo_free(&rscp->tpt_fifo);
tpt_err:
return -ENOMEM;
}
@@ -179,33 +178,37 @@ tpt_err:
/*
* returns 0 if no resource available
*/
-static u32 cxio_hal_get_resource(struct kfifo *fifo)
+static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
{
u32 entry;
- if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
+ if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
return entry;
else
return 0; /* fifo emptry */
}
-static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
+static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
+ u32 entry)
{
- BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
+ BUG_ON(
+ kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
+ == 0);
}
u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
{
- return cxio_hal_get_resource(rscp->tpt_fifo);
+ return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
}
void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
{
- cxio_hal_put_resource(rscp->tpt_fifo, stag);
+ cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
}
u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
{
- u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
+ u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
+ &rscp->qpid_fifo_lock);
PDBG("%s qpid 0x%x\n", __func__, qpid);
return qpid;
}
@@ -213,35 +216,35 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
{
PDBG("%s qpid 0x%x\n", __func__, qpid);
- cxio_hal_put_resource(rscp->qpid_fifo, qpid);
+ cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
}
u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
{
- return cxio_hal_get_resource(rscp->cqid_fifo);
+ return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
}
void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
{
- cxio_hal_put_resource(rscp->cqid_fifo, cqid);
+ cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
}
u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
{
- return cxio_hal_get_resource(rscp->pdid_fifo);
+ return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
}
void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
{
- cxio_hal_put_resource(rscp->pdid_fifo, pdid);
+ cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
}
void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
{
- kfifo_free(rscp->tpt_fifo);
- kfifo_free(rscp->cqid_fifo);
- kfifo_free(rscp->qpid_fifo);
- kfifo_free(rscp->pdid_fifo);
+ kfifo_free(&rscp->tpt_fifo);
+ kfifo_free(&rscp->cqid_fifo);
+ kfifo_free(&rscp->qpid_fifo);
+ kfifo_free(&rscp->pdid_fifo);
kfree(rscp);
}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 04618f7bfbb..83d2e19d31a 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -176,7 +176,7 @@ struct t3_send_wr {
struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
};
-#define T3_MAX_FASTREG_DEPTH 24
+#define T3_MAX_FASTREG_DEPTH 10
#define T3_MAX_FASTREG_FRAG 10
struct t3_fastreg_wr {
@@ -327,6 +327,11 @@ enum rdma_init_rtr_types {
#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+#define S_CHAN 4
+#define M_CHAN 0x3
+#define V_CHAN(x) ((x) << S_CHAN)
+#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
+
struct t3_rdma_init_attr {
u32 tid;
u32 qpid;
@@ -346,6 +351,7 @@ struct t3_rdma_init_attr {
u16 flags;
u16 rqe_count;
u32 irs;
+ u32 chan;
};
struct t3_rdma_init_wr {
@@ -604,6 +610,12 @@ struct t3_cqe {
#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header)))
#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header)))
+#define CQE_SEND_OPCODE(x)( \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
+ (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
+
#define CQE_LEN(x) (be32_to_cpu((x).len))
/* used for RQ completion processing */
@@ -677,9 +689,9 @@ struct t3_swrq {
* A T3 WQ implements both the SQ and RQ.
*/
struct t3_wq {
- union t3_wr *queue; /* DMA accessable memory */
+ union t3_wr *queue; /* DMA accessible memory */
dma_addr_t dma_addr; /* DMA address for HW */
- DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */
+ DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */
u32 error; /* 1 once we go to ERROR */
u32 qpid;
u32 wptr; /* idx to next available WR slot */
@@ -706,7 +718,7 @@ struct t3_cq {
u32 wptr;
u32 size_log2;
dma_addr_t dma_addr;
- DECLARE_PCI_UNMAP_ADDR(mapping)
+ DEFINE_DMA_UNMAP_ADDR(mapping);
struct t3_cqe *queue;
struct t3_cqe *sw_queue;
u32 sw_rptr;
@@ -716,9 +728,40 @@ struct t3_cq {
#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
CQE_GENBIT(*cqe))
+struct t3_cq_status_page {
+ u32 cq_err;
+};
+
+static inline int cxio_cq_in_error(struct t3_cq *cq)
+{
+ return ((struct t3_cq_status_page *)
+ &cq->queue[1 << cq->size_log2])->cq_err;
+}
+
+static inline void cxio_set_cq_in_error(struct t3_cq *cq)
+{
+ ((struct t3_cq_status_page *)
+ &cq->queue[1 << cq->size_log2])->cq_err = 1;
+}
+
static inline void cxio_set_wq_in_error(struct t3_wq *wq)
{
- wq->queue->wq_in_err.err = 1;
+ wq->queue->wq_in_err.err |= 1;
+}
+
+static inline void cxio_disable_wq_db(struct t3_wq *wq)
+{
+ wq->queue->wq_in_err.err |= 2;
+}
+
+static inline void cxio_enable_wq_db(struct t3_wq *wq)
+{
+ wq->queue->wq_in_err.err &= ~2;
+}
+
+static inline int cxio_wq_db_enabled(struct t3_wq *wq)
+{
+ return !(wq->queue->wq_in_err.err & 2);
}
static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 4489c89d671..8e77dc543dd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -47,22 +47,62 @@ MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
-cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-
static void open_rnic_dev(struct t3cdev *);
static void close_rnic_dev(struct t3cdev *);
+static void iwch_event_handler(struct t3cdev *, u32, u32);
struct cxgb3_client t3c_client = {
.name = "iw_cxgb3",
.add = open_rnic_dev,
.remove = close_rnic_dev,
.handlers = t3c_handlers,
- .redirect = iwch_ep_redirect
+ .redirect = iwch_ep_redirect,
+ .event_handler = iwch_event_handler
};
static LIST_HEAD(dev_list);
static DEFINE_MUTEX(dev_mutex);
+static int disable_qp_db(int id, void *p, void *data)
+{
+ struct iwch_qp *qhp = p;
+
+ cxio_disable_wq_db(&qhp->wq);
+ return 0;
+}
+
+static int enable_qp_db(int id, void *p, void *data)
+{
+ struct iwch_qp *qhp = p;
+
+ if (data)
+ ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid);
+ cxio_enable_wq_db(&qhp->wq);
+ return 0;
+}
+
+static void disable_dbs(struct iwch_dev *rnicp)
+{
+ spin_lock_irq(&rnicp->lock);
+ idr_for_each(&rnicp->qpidr, disable_qp_db, NULL);
+ spin_unlock_irq(&rnicp->lock);
+}
+
+static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
+{
+ spin_lock_irq(&rnicp->lock);
+ idr_for_each(&rnicp->qpidr, enable_qp_db,
+ (void *)(unsigned long)ring_db);
+ spin_unlock_irq(&rnicp->lock);
+}
+
+static void iwch_db_drop_task(struct work_struct *work)
+{
+ struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
+ db_drop_task.work);
+ enable_dbs(rnicp, 1);
+}
+
static void rnic_init(struct iwch_dev *rnicp)
{
PDBG("%s iwch_dev %p\n", __func__, rnicp);
@@ -70,6 +110,7 @@ static void rnic_init(struct iwch_dev *rnicp)
idr_init(&rnicp->qpidr);
idr_init(&rnicp->mmidr);
spin_lock_init(&rnicp->lock);
+ INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
@@ -103,11 +144,9 @@ static void rnic_init(struct iwch_dev *rnicp)
static void open_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *rnicp;
- static int vers_printed;
PDBG("%s t3cdev %p\n", __func__, tdev);
- if (!vers_printed++)
- printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
+ printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
DRV_VERSION);
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
if (!rnicp) {
@@ -147,6 +186,9 @@ static void close_rnic_dev(struct t3cdev *tdev)
mutex_lock(&dev_mutex);
list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
if (dev->rdev.t3cdev_p == tdev) {
+ dev->rdev.flags = CXIO_ERROR_FATAL;
+ synchronize_net();
+ cancel_delayed_work_sync(&dev->db_drop_task);
list_del(&dev->entry);
iwch_unregister_device(dev);
cxio_rdev_close(&dev->rdev);
@@ -160,6 +202,69 @@ static void close_rnic_dev(struct t3cdev *tdev)
mutex_unlock(&dev_mutex);
}
+static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
+{
+ struct cxio_rdev *rdev = tdev->ulp;
+ struct iwch_dev *rnicp;
+ struct ib_event event;
+ u32 portnum = port_id + 1;
+ int dispatch = 0;
+
+ if (!rdev)
+ return;
+ rnicp = rdev_to_iwch_dev(rdev);
+ switch (evt) {
+ case OFFLOAD_STATUS_DOWN: {
+ rdev->flags = CXIO_ERROR_FATAL;
+ synchronize_net();
+ event.event = IB_EVENT_DEVICE_FATAL;
+ dispatch = 1;
+ break;
+ }
+ case OFFLOAD_PORT_DOWN: {
+ event.event = IB_EVENT_PORT_ERR;
+ dispatch = 1;
+ break;
+ }
+ case OFFLOAD_PORT_UP: {
+ event.event = IB_EVENT_PORT_ACTIVE;
+ dispatch = 1;
+ break;
+ }
+ case OFFLOAD_DB_FULL: {
+ disable_dbs(rnicp);
+ break;
+ }
+ case OFFLOAD_DB_EMPTY: {
+ enable_dbs(rnicp, 1);
+ break;
+ }
+ case OFFLOAD_DB_DROP: {
+ unsigned long delay = 1000;
+ unsigned short r;
+
+ disable_dbs(rnicp);
+ get_random_bytes(&r, 2);
+ delay += r & 1023;
+
+ /*
+ * delay is between 1000-2023 usecs.
+ */
+ schedule_delayed_work(&rnicp->db_drop_task,
+ usecs_to_jiffies(delay));
+ break;
+ }
+ }
+
+ if (dispatch) {
+ event.device = &rnicp->ibdev;
+ event.element.port_num = portnum;
+ ib_dispatch_event(&event);
+ }
+
+ return;
+}
+
static int __init iwch_init_module(void)
{
int err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 3773453b2cf..837862287a2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
+#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
@@ -110,6 +111,7 @@ struct iwch_dev {
struct idr mmidr;
spinlock_t lock;
struct list_head entry;
+ struct delayed_work db_drop_task;
};
static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
@@ -117,6 +119,11 @@ static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
return container_of(ibdev, struct iwch_dev, ibdev);
}
+static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev)
+{
+ return container_of(rdev, struct iwch_dev, rdev);
+}
+
static inline int t3b_device(const struct iwch_dev *rhp)
{
return rhp->rdev.t3cdev_p->type == T3B;
@@ -146,19 +153,17 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
void *handle, u32 id)
{
int ret;
- int newid;
-
- do {
- if (!idr_pre_get(idr, GFP_KERNEL)) {
- return -ENOMEM;
- }
- spin_lock_irq(&rhp->lock);
- ret = idr_get_new_above(idr, handle, id, &newid);
- BUG_ON(newid != id);
- spin_unlock_irq(&rhp->lock);
- } while (ret == -EAGAIN);
-
- return ret;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irq(&rhp->lock);
+
+ ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT);
+
+ spin_unlock_irq(&rhp->lock);
+ idr_preload_end();
+
+ BUG_ON(ret == -ENOSPC);
+ return ret < 0 ? ret : 0;
}
static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index c325c44807e..cb78b1e9bcd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -31,6 +31,7 @@
*/
#include <linux/module.h>
#include <linux/list.h>
+#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/skbuff.h>
#include <linux/timer.h>
@@ -101,12 +102,9 @@ static unsigned int cong_flavor = 1;
module_param(cong_flavor, uint, 0644);
MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
-static void process_work(struct work_struct *work);
static struct workqueue_struct *workq;
-static DECLARE_WORK(skb_work, process_work);
static struct sk_buff_head rxq;
-static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS];
static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
static void ep_timeout(unsigned long arg);
@@ -130,15 +128,46 @@ static void stop_ep_timer(struct iwch_ep *ep)
{
PDBG("%s ep %p\n", __func__, ep);
if (!timer_pending(&ep->timer)) {
- printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
+ WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
__func__, ep, ep->com.state);
- WARN_ON(1);
return;
}
del_timer_sync(&ep->timer);
put_ep(&ep->com);
}
+static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
+{
+ int error = 0;
+ struct cxio_rdev *rdev;
+
+ rdev = (struct cxio_rdev *)tdev->ulp;
+ if (cxio_fatal_error(rdev)) {
+ kfree_skb(skb);
+ return -EIO;
+ }
+ error = l2t_send(tdev, skb, l2e);
+ if (error < 0)
+ kfree_skb(skb);
+ return error;
+}
+
+int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
+{
+ int error = 0;
+ struct cxio_rdev *rdev;
+
+ rdev = (struct cxio_rdev *)tdev->ulp;
+ if (cxio_fatal_error(rdev)) {
+ kfree_skb(skb);
+ return -EIO;
+ }
+ error = cxgb3_ofld_send(tdev, skb);
+ if (error < 0)
+ kfree_skb(skb);
+ return error;
+}
+
static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
{
struct cpl_tid_release *req;
@@ -150,7 +179,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
skb->priority = CPL_PRIORITY_SETUP;
- cxgb3_ofld_send(tdev, skb);
+ iwch_cxgb3_ofld_send(tdev, skb);
return;
}
@@ -172,8 +201,7 @@ int iwch_quiesce_tid(struct iwch_ep *ep)
req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
skb->priority = CPL_PRIORITY_DATA;
- cxgb3_ofld_send(ep->com.tdev, skb);
- return 0;
+ return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
}
int iwch_resume_tid(struct iwch_ep *ep)
@@ -194,8 +222,7 @@ int iwch_resume_tid(struct iwch_ep *ep)
req->val = 0;
skb->priority = CPL_PRIORITY_DATA;
- cxgb3_ofld_send(ep->com.tdev, skb);
- return 0;
+ return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
}
static void set_emss(struct iwch_ep *ep, u16 opt)
@@ -252,42 +279,25 @@ static void *alloc_ep(int size, gfp_t gfp)
void __free_ep(struct kref *kref)
{
- struct iwch_ep_common *epc;
- epc = container_of(kref, struct iwch_ep_common, kref);
- PDBG("%s ep %p state %s\n", __func__, epc, states[state_read(epc)]);
- kfree(epc);
+ struct iwch_ep *ep;
+ ep = container_of(container_of(kref, struct iwch_ep_common, kref),
+ struct iwch_ep, com);
+ PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+ if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
+ cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
+ dst_release(ep->dst);
+ l2t_release(ep->com.tdev, ep->l2t);
+ }
+ kfree(ep);
}
static void release_ep_resources(struct iwch_ep *ep)
{
PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
- dst_release(ep->dst);
- l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ set_bit(RELEASE_RESOURCES, &ep->com.flags);
put_ep(&ep->com);
}
-static void process_work(struct work_struct *work)
-{
- struct sk_buff *skb = NULL;
- void *ep;
- struct t3cdev *tdev;
- int ret;
-
- while ((skb = skb_dequeue(&rxq))) {
- ep = *((void **) (skb->cb));
- tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
- ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
- if (ret & CPL_RET_BUF_DONE)
- kfree_skb(skb);
-
- /*
- * ep was referenced in sched(), and is freed here.
- */
- put_ep((struct iwch_ep_common *)ep);
- }
-}
-
static int status2errno(int status)
{
switch (status) {
@@ -327,23 +337,12 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
__be16 peer_port, u8 tos)
{
struct rtable *rt;
- struct flowi fl = {
- .oif = 0,
- .nl_u = {
- .ip4_u = {
- .daddr = peer_ip,
- .saddr = local_ip,
- .tos = tos}
- },
- .proto = IPPROTO_TCP,
- .uli_u = {
- .ports = {
- .sport = local_port,
- .dport = peer_port}
- }
- };
-
- if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0))
+ struct flowi4 fl4;
+
+ rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
+ peer_port, local_port, IPPROTO_TCP,
+ tos, 0);
+ if (IS_ERR(rt))
return NULL;
return rt;
}
@@ -382,7 +381,7 @@ static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
PDBG("%s t3cdev %p\n", __func__, dev);
req->cmd = CPL_ABORT_NO_RST;
- cxgb3_ofld_send(dev, skb);
+ iwch_cxgb3_ofld_send(dev, skb);
}
static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
@@ -402,8 +401,7 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
- l2t_send(ep->com.tdev, skb, ep->l2t);
- return 0;
+ return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
}
static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
@@ -420,12 +418,12 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, abort_arp_failure);
req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
req->cmd = CPL_ABORT_SEND_RST;
- l2t_send(ep->com.tdev, skb, ep->l2t);
- return 0;
+ return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
}
static int send_connect(struct iwch_ep *ep)
@@ -454,7 +452,8 @@ static int send_connect(struct iwch_ep *ep)
V_MSS_IDX(mtu_idx) |
V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+ opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
+ V_CONG_CONTROL_FLAVOR(cong_flavor);
skb->priority = CPL_PRIORITY_SETUP;
set_arp_failure_handler(skb, act_open_req_arp_failure);
@@ -469,8 +468,7 @@ static int send_connect(struct iwch_ep *ep)
req->opt0l = htonl(opt0l);
req->params = 0;
req->opt2 = htonl(opt2);
- l2t_send(ep->com.tdev, skb, ep->l2t);
- return 0;
+ return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
}
static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
@@ -527,7 +525,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
req->sndseq = htonl(ep->snd_seq);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
- l2t_send(ep->com.tdev, skb, ep->l2t);
+ iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
start_ep_timer(ep);
state_set(&ep->com, MPA_REQ_SENT);
return;
@@ -578,8 +576,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
req->sndseq = htonl(ep->snd_seq);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
- l2t_send(ep->com.tdev, skb, ep->l2t);
- return 0;
+ return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
}
static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
@@ -630,8 +627,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
req->sndseq = htonl(ep->snd_seq);
ep->mpa_skb = skb;
state_set(&ep->com, MPA_REP_SENT);
- l2t_send(ep->com.tdev, skb, ep->l2t);
- return 0;
+ return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
}
static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
@@ -726,8 +722,10 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status)
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
- event.local_addr = ep->com.local_addr;
- event.remote_addr = ep->com.remote_addr;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
if ((status == 0) || (status == -ECONNREFUSED)) {
event.private_data_len = ep->plen;
@@ -752,15 +750,24 @@ static void connect_request_upcall(struct iwch_ep *ep)
PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
- event.local_addr = ep->com.local_addr;
- event.remote_addr = ep->com.remote_addr;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.local_addr));
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
event.provider_data = ep;
- if (state_read(&ep->parent_ep->com) != DEAD)
+ /*
+ * Until ird/ord negotiation via MPAv2 support is added, send max
+ * supported values
+ */
+ event.ird = event.ord = 8;
+ if (state_read(&ep->parent_ep->com) != DEAD) {
+ get_ep(&ep->com);
ep->parent_ep->com.cm_id->event_handler(
ep->parent_ep->com.cm_id,
&event);
+ }
put_ep(&ep->parent_ep->com);
ep->parent_ep = NULL;
}
@@ -772,6 +779,11 @@ static void established_upcall(struct iwch_ep *ep)
PDBG("%s ep %p\n", __func__, ep);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
+ /*
+ * Until ird/ord negotiation via MPAv2 support is added, send max
+ * supported values
+ */
+ event.ird = event.ord = 8;
if (ep->com.cm_id) {
PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -795,7 +807,7 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits)
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
skb->priority = CPL_PRIORITY_ACK;
- cxgb3_ofld_send(ep->com.tdev, skb);
+ iwch_cxgb3_ofld_send(ep->com.tdev, skb);
return credits;
}
@@ -916,7 +928,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
goto err;
if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
- iwch_post_zb_read(ep->com.qp);
+ iwch_post_zb_read(ep);
}
goto out;
@@ -1080,37 +1092,45 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
struct iwch_ep *ep = ctx;
struct cpl_wr_ack *hdr = cplhdr(skb);
unsigned int credits = ntohs(hdr->credits);
+ unsigned long flags;
+ int post_zb = 0;
PDBG("%s ep %p credits %u\n", __func__, ep, credits);
if (credits == 0) {
- PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
+ PDBG("%s 0 credit ack ep %p state %u\n",
+ __func__, ep, state_read(&ep->com));
return CPL_RET_BUF_DONE;
}
+ spin_lock_irqsave(&ep->com.lock, flags);
BUG_ON(credits != 1);
dst_confirm(ep->dst);
if (!ep->mpa_skb) {
PDBG("%s rdma_init wr_ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
+ __func__, ep, ep->com.state);
if (ep->mpa_attr.initiator) {
PDBG("%s initiator ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
- if (peer2peer)
- iwch_post_zb_read(ep->com.qp);
+ __func__, ep, ep->com.state);
+ if (peer2peer && ep->com.state == FPDU_MODE)
+ post_zb = 1;
} else {
PDBG("%s responder ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
+ __func__, ep, ep->com.state);
+ if (ep->com.state == MPA_REQ_RCVD) {
+ ep->com.rpl_done = 1;
+ wake_up(&ep->com.waitq);
+ }
}
} else {
PDBG("%s lsm ack ep %p state %u freeing skb\n",
- __func__, ep, state_read(&ep->com));
+ __func__, ep, ep->com.state);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
}
+ spin_unlock_irqrestore(&ep->com.lock, flags);
+ if (post_zb)
+ iwch_post_zb_read(ep);
return CPL_RET_BUF_DONE;
}
@@ -1127,8 +1147,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
* We get 2 abort replies from the HW. The first one must
* be ignored except for scribbling that we need one more.
*/
- if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) {
- ep->flags |= ABORT_REQ_IN_PROGRESS;
+ if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
return CPL_RET_BUF_DONE;
}
@@ -1173,7 +1192,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
release_tid(ep->com.tdev, GET_TID(rpl), NULL);
cxgb3_free_atid(ep->com.tdev, ep->atid);
dst_release(ep->dst);
- l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ l2t_release(ep->com.tdev, ep->l2t);
put_ep(&ep->com);
return CPL_RET_BUF_DONE;
}
@@ -1203,8 +1222,7 @@ static int listen_start(struct iwch_listen_ep *ep)
req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
skb->priority = 1;
- cxgb3_ofld_send(ep->com.tdev, skb);
- return 0;
+ return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
}
static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
@@ -1237,8 +1255,7 @@ static int listen_stop(struct iwch_listen_ep *ep)
req->cpu_idx = 0;
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
skb->priority = 1;
- cxgb3_ofld_send(ep->com.tdev, skb);
- return 0;
+ return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
}
static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
@@ -1275,7 +1292,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
V_MSS_IDX(mtu_idx) |
V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
+ opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
+ V_CONG_CONTROL_FLAVOR(cong_flavor);
rpl = cplhdr(skb);
rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
@@ -1286,7 +1304,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
rpl->opt2 = htonl(opt2);
rpl->rsvd = rpl->opt2; /* workaround for HW bug */
skb->priority = CPL_PRIORITY_SETUP;
- l2t_send(ep->com.tdev, skb, ep->l2t);
+ iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
return;
}
@@ -1315,7 +1333,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
rpl->opt2 = 0;
rpl->rsvd = rpl->opt2;
- cxgb3_ofld_send(tdev, skb);
+ iwch_cxgb3_ofld_send(tdev, skb);
}
}
@@ -1343,15 +1361,8 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
tim.mac_addr = req->dst_mac;
tim.vlan_tag = ntohs(req->vlan_tag);
if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- printk(KERN_ERR
- "%s bad dst mac %02x %02x %02x %02x %02x %02x\n",
- __func__,
- req->dst_mac[0],
- req->dst_mac[1],
- req->dst_mac[2],
- req->dst_mac[3],
- req->dst_mac[4],
- req->dst_mac[5]);
+ printk(KERN_ERR "%s bad dst mac %pM\n",
+ __func__, req->dst_mac);
goto reject;
}
@@ -1366,8 +1377,8 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
__func__);
goto reject;
}
- dst = &rt->u.dst;
- l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
+ dst = &rt->dst;
+ l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
if (!l2t) {
printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
__func__);
@@ -1378,7 +1389,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
if (!child_ep) {
printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
__func__);
- l2t_release(L2DATA(tdev), l2t);
+ l2t_release(tdev, l2t);
dst_release(dst);
goto reject;
}
@@ -1450,10 +1461,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
/*
* We're gonna mark this puppy DEAD, but keep
* the reference on it until the ULP accepts or
- * rejects the CR.
+ * rejects the CR. Also wake up anyone waiting
+ * in rdma connection migration (see iwch_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
- get_ep(&ep->com);
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
@@ -1534,8 +1549,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
* We get 2 peer aborts from the HW. The first one must
* be ignored except for scribbling that we need one more.
*/
- if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) {
- ep->flags |= PEER_ABORT_IN_PROGRESS;
+ if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
return CPL_RET_BUF_DONE;
}
@@ -1562,9 +1576,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
/*
* We're gonna mark this puppy DEAD, but keep
* the reference on it until the ULP accepts or
- * rejects the CR.
+ * rejects the CR. Also wake up anyone waiting
+ * in rdma connection migration (see iwch_accept_cr()).
*/
- get_ep(&ep->com);
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
break;
case MORIBUND:
case CLOSING:
@@ -1613,7 +1631,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
rpl->cmd = CPL_ABORT_NO_RST;
- cxgb3_ofld_send(ep->com.tdev, rpl_skb);
+ iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
out:
if (release)
release_ep_resources(ep);
@@ -1666,7 +1684,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
* T3A does 3 things when a TERM is received:
* 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
* 2) generate an async event on the QP with the TERMINATE opcode
- * 3) post a TERMINATE opcde cqe into the associated CQ.
+ * 3) post a TERMINATE opcode cqe into the associated CQ.
*
* For (1), we save the message in the qp for later consumer consumption.
* For (2), we move the QP into TERMINATE, post a QP event and disconnect.
@@ -1678,6 +1696,9 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
{
struct iwch_ep *ep = ctx;
+ if (state_read(&ep->com) != FPDU_MODE)
+ return CPL_RET_BUF_DONE;
+
PDBG("%s ep %p\n", __func__, ep);
skb_pull(skb, sizeof(struct cpl_rdma_terminate));
PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
@@ -1739,9 +1760,8 @@ static void ep_timeout(unsigned long arg)
__state_set(&ep->com, ABORTING);
break;
default:
- printk(KERN_ERR "%s unexpected state ep %p state %u\n",
+ WARN(1, "%s unexpected state ep %p state %u\n",
__func__, ep, ep->com.state);
- WARN_ON(1);
abort = 0;
}
spin_unlock_irqrestore(&ep->com.lock, flags);
@@ -1767,6 +1787,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
err = send_mpa_reject(ep, pdata, pdata_len);
err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
}
+ put_ep(&ep->com);
return 0;
}
@@ -1780,8 +1801,10 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD)
- return -ECONNRESET;
+ if (state_read(&ep->com) == DEAD) {
+ err = -ECONNRESET;
+ goto err;
+ }
BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
BUG_ON(!qp);
@@ -1789,20 +1812,21 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
(conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
abort_connection(ep, NULL, GFP_KERNEL);
- return -EINVAL;
+ err = -EINVAL;
+ goto err;
}
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
ep->com.qp = qp;
- ep->com.rpl_done = 0;
- ep->com.rpl_err = 0;
ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
- get_ep(&ep->com);
+ if (peer2peer && ep->ird == 0)
+ ep->ird = 1;
+
+ PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
@@ -1821,30 +1845,31 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1);
if (err)
- goto err;
+ goto err1;
/* if needed, wait for wr_ack */
if (iwch_rqes_posted(qp)) {
wait_event(ep->com.waitq, ep->com.rpl_done);
err = ep->com.rpl_err;
if (err)
- goto err;
+ goto err1;
}
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
- goto err;
+ goto err1;
state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
put_ep(&ep->com);
return 0;
-err:
+err1:
ep->com.cm_id = NULL;
ep->com.qp = NULL;
cm_id->rem_ref(cm_id);
+err:
put_ep(&ep->com);
return err;
}
@@ -1852,8 +1877,9 @@ err:
static int is_loopback_dst(struct iw_cm_id *cm_id)
{
struct net_device *dev;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
- dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr);
+ dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
if (!dev)
return 0;
dev_put(dev);
@@ -1862,10 +1888,17 @@ static int is_loopback_dst(struct iw_cm_id *cm_id)
int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
{
- int err = 0;
struct iwch_dev *h = to_iwch_dev(cm_id->device);
struct iwch_ep *ep;
struct rtable *rt;
+ int err = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+ if (cm_id->remote_addr.ss_family != PF_INET) {
+ err = -ENOSYS;
+ goto out;
+ }
if (is_loopback_dst(cm_id)) {
err = -ENOSYS;
@@ -1885,6 +1918,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
conn_param->private_data, ep->plen);
ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
+
+ if (peer2peer && ep->ord == 0)
+ ep->ord = 1;
+
ep->com.tdev = h->rdev.t3cdev_p;
cm_id->add_ref(cm_id);
@@ -1905,21 +1942,17 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
/* find a route */
- rt = find_route(h->rdev.t3cdev_p,
- cm_id->local_addr.sin_addr.s_addr,
- cm_id->remote_addr.sin_addr.s_addr,
- cm_id->local_addr.sin_port,
- cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
+ rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr, laddr->sin_port,
+ raddr->sin_port, IPTOS_LOWDELAY);
if (!rt) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
- ep->dst = &rt->u.dst;
-
- /* get a l2t entry */
- ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
- ep->dst->neighbour->dev);
+ ep->dst = &rt->dst;
+ ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
+ &raddr->sin_addr.s_addr);
if (!ep->l2t) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
err = -ENOMEM;
@@ -1928,20 +1961,23 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
state_set(&ep->com, CONNECTING);
ep->tos = IPTOS_LOWDELAY;
- ep->com.local_addr = cm_id->local_addr;
- ep->com.remote_addr = cm_id->remote_addr;
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.remote_addr));
/* send connect request to rnic */
err = send_connect(ep);
if (!err)
goto out;
- l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t);
+ l2t_release(h->rdev.t3cdev_p, ep->l2t);
fail4:
dst_release(ep->dst);
fail3:
cxgb3_free_atid(ep->com.tdev, ep->atid);
fail2:
+ cm_id->rem_ref(cm_id);
put_ep(&ep->com);
out:
return err;
@@ -1956,6 +1992,11 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
might_sleep();
+ if (cm_id->local_addr.ss_family != PF_INET) {
+ err = -ENOSYS;
+ goto fail1;
+ }
+
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
@@ -1967,7 +2008,8 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
ep->backlog = backlog;
- ep->com.local_addr = cm_id->local_addr;
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
/*
* Allocate a server TID.
@@ -2013,8 +2055,11 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id)
ep->com.rpl_done = 0;
ep->com.rpl_err = 0;
err = listen_stop(ep);
+ if (err)
+ goto done;
wait_event(ep->com.waitq, ep->com.rpl_done);
cxgb3_free_stid(ep->com.tdev, ep->stid);
+done:
err = ep->com.rpl_err;
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
@@ -2026,12 +2071,22 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
int ret=0;
unsigned long flags;
int close = 0;
+ int fatal = 0;
+ struct t3cdev *tdev;
+ struct cxio_rdev *rdev;
spin_lock_irqsave(&ep->com.lock, flags);
PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
states[ep->com.state], abrupt);
+ tdev = (struct t3cdev *)ep->com.tdev;
+ rdev = (struct cxio_rdev *)tdev->ulp;
+ if (cxio_fatal_error(rdev)) {
+ fatal = 1;
+ close_complete_upcall(ep);
+ ep->com.state = DEAD;
+ }
switch (ep->com.state) {
case MPA_REQ_WAIT:
case MPA_REQ_SENT:
@@ -2045,14 +2100,17 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
ep->com.state = CLOSING;
start_ep_timer(ep);
}
+ set_bit(CLOSE_SENT, &ep->com.flags);
break;
case CLOSING:
- close = 1;
- if (abrupt) {
- stop_ep_timer(ep);
- ep->com.state = ABORTING;
- } else
- ep->com.state = MORIBUND;
+ if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
+ close = 1;
+ if (abrupt) {
+ stop_ep_timer(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
+ }
break;
case MORIBUND:
case ABORTING:
@@ -2071,7 +2129,11 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
ret = send_abort(ep, NULL, gfp);
else
ret = send_halfclose(ep, gfp);
+ if (ret)
+ fatal = 1;
}
+ if (fatal)
+ release_ep_resources(ep);
return ret;
}
@@ -2086,7 +2148,7 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
l2t);
dst_hold(new);
- l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+ l2t_release(ep->com.tdev, ep->l2t);
ep->l2t = l2t;
dst_release(old);
ep->dst = new;
@@ -2095,7 +2157,49 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
/*
* All the CM events are handled on a work queue to have a safe context.
+ * These are the real handlers that are called from the work queue.
*/
+static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
+ [CPL_ACT_ESTABLISH] = act_establish,
+ [CPL_ACT_OPEN_RPL] = act_open_rpl,
+ [CPL_RX_DATA] = rx_data,
+ [CPL_TX_DMA_ACK] = tx_ack,
+ [CPL_ABORT_RPL_RSS] = abort_rpl,
+ [CPL_ABORT_RPL] = abort_rpl,
+ [CPL_PASS_OPEN_RPL] = pass_open_rpl,
+ [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
+ [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
+ [CPL_PASS_ESTABLISH] = pass_establish,
+ [CPL_PEER_CLOSE] = peer_close,
+ [CPL_ABORT_REQ_RSS] = peer_abort,
+ [CPL_CLOSE_CON_RPL] = close_con_rpl,
+ [CPL_RDMA_TERMINATE] = terminate,
+ [CPL_RDMA_EC_STATUS] = ec_status,
+};
+
+static void process_work(struct work_struct *work)
+{
+ struct sk_buff *skb = NULL;
+ void *ep;
+ struct t3cdev *tdev;
+ int ret;
+
+ while ((skb = skb_dequeue(&rxq))) {
+ ep = *((void **) (skb->cb));
+ tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
+ ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
+ if (ret & CPL_RET_BUF_DONE)
+ kfree_skb(skb);
+
+ /*
+ * ep was referenced in sched(), and is freed here.
+ */
+ put_ep((struct iwch_ep_common *)ep);
+ }
+}
+
+static DECLARE_WORK(skb_work, process_work);
+
static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
{
struct iwch_ep_common *epc = ctx;
@@ -2127,6 +2231,29 @@ static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
return CPL_RET_BUF_DONE;
}
+/*
+ * All upcalls from the T3 Core go to sched() to schedule the
+ * processing on a work queue.
+ */
+cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
+ [CPL_ACT_ESTABLISH] = sched,
+ [CPL_ACT_OPEN_RPL] = sched,
+ [CPL_RX_DATA] = sched,
+ [CPL_TX_DMA_ACK] = sched,
+ [CPL_ABORT_RPL_RSS] = sched,
+ [CPL_ABORT_RPL] = sched,
+ [CPL_PASS_OPEN_RPL] = sched,
+ [CPL_CLOSE_LISTSRV_RPL] = sched,
+ [CPL_PASS_ACCEPT_REQ] = sched,
+ [CPL_PASS_ESTABLISH] = sched,
+ [CPL_PEER_CLOSE] = sched,
+ [CPL_CLOSE_CON_RPL] = sched,
+ [CPL_ABORT_REQ_RSS] = sched,
+ [CPL_RDMA_TERMINATE] = sched,
+ [CPL_RDMA_EC_STATUS] = sched,
+ [CPL_SET_TCB_RPL] = set_tcb_rpl,
+};
+
int __init iwch_cm_init(void)
{
skb_queue_head_init(&rxq);
@@ -2135,46 +2262,6 @@ int __init iwch_cm_init(void)
if (!workq)
return -ENOMEM;
- /*
- * All upcalls from the T3 Core go to sched() to
- * schedule the processing on a work queue.
- */
- t3c_handlers[CPL_ACT_ESTABLISH] = sched;
- t3c_handlers[CPL_ACT_OPEN_RPL] = sched;
- t3c_handlers[CPL_RX_DATA] = sched;
- t3c_handlers[CPL_TX_DMA_ACK] = sched;
- t3c_handlers[CPL_ABORT_RPL_RSS] = sched;
- t3c_handlers[CPL_ABORT_RPL] = sched;
- t3c_handlers[CPL_PASS_OPEN_RPL] = sched;
- t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched;
- t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched;
- t3c_handlers[CPL_PASS_ESTABLISH] = sched;
- t3c_handlers[CPL_PEER_CLOSE] = sched;
- t3c_handlers[CPL_CLOSE_CON_RPL] = sched;
- t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
- t3c_handlers[CPL_RDMA_TERMINATE] = sched;
- t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
- t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl;
-
- /*
- * These are the real handlers that are called from a
- * work queue.
- */
- work_handlers[CPL_ACT_ESTABLISH] = act_establish;
- work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl;
- work_handlers[CPL_RX_DATA] = rx_data;
- work_handlers[CPL_TX_DMA_ACK] = tx_ack;
- work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl;
- work_handlers[CPL_ABORT_RPL] = abort_rpl;
- work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl;
- work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl;
- work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req;
- work_handlers[CPL_PASS_ESTABLISH] = pass_establish;
- work_handlers[CPL_PEER_CLOSE] = peer_close;
- work_handlers[CPL_ABORT_REQ_RSS] = peer_abort;
- work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl;
- work_handlers[CPL_RDMA_TERMINATE] = terminate;
- work_handlers[CPL_RDMA_EC_STATUS] = ec_status;
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index d7c7e09f099..b9efadfffb4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -145,8 +145,10 @@ enum iwch_ep_state {
};
enum iwch_ep_flags {
- PEER_ABORT_IN_PROGRESS = (1 << 0),
- ABORT_REQ_IN_PROGRESS = (1 << 1),
+ PEER_ABORT_IN_PROGRESS = 0,
+ ABORT_REQ_IN_PROGRESS = 1,
+ RELEASE_RESOURCES = 2,
+ CLOSE_SENT = 3,
};
struct iwch_ep_common {
@@ -161,6 +163,7 @@ struct iwch_ep_common {
wait_queue_head_t waitq;
int rpl_done;
int rpl_err;
+ unsigned long flags;
};
struct iwch_listen_ep {
@@ -188,7 +191,6 @@ struct iwch_ep {
u16 plen;
u32 ird;
u32 ord;
- u32 flags;
};
static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index 7b67a677172..abcc9e76962 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -29,7 +29,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include <linux/slab.h>
+#include <linux/gfp.h>
#include <linux/mman.h>
#include <net/sock.h>
#include "iwch_provider.h"
@@ -46,6 +46,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
struct ib_event event;
struct iwch_qp_attributes attrs;
struct iwch_qp *qhp;
+ unsigned long flag;
spin_lock(&rnicp->lock);
qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@@ -76,6 +77,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
atomic_inc(&qhp->refcnt);
spin_unlock(&rnicp->lock);
+ if (qhp->attr.state == IWCH_QP_STATE_RTS) {
+ attrs.next_state = IWCH_QP_STATE_TERMINATE;
+ iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (send_term)
+ iwch_post_terminate(qhp, rsp_msg);
+ }
+
event.event = ib_event;
event.device = chp->ibcq.device;
if (ib_event == IB_EVENT_CQ_ERR)
@@ -86,13 +95,9 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
if (qhp->ibqp.event_handler)
(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
- if (qhp->attr.state == IWCH_QP_STATE_RTS) {
- attrs.next_state = IWCH_QP_STATE_TERMINATE;
- iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (send_term)
- iwch_post_terminate(qhp, rsp_msg);
- }
+ spin_lock_irqsave(&chp->comp_handler_lock, flag);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
if (atomic_dec_and_test(&qhp->refcnt))
wake_up(&qhp->wait);
@@ -105,6 +110,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
struct iwch_cq *chp;
struct iwch_qp *qhp;
u32 cqid = RSPQ_CQID(rsp_msg);
+ unsigned long flag;
rnicp = (struct iwch_dev *) rdev_p->ulp;
spin_lock(&rnicp->lock);
@@ -168,7 +174,9 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
*/
if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
dst_confirm(qhp->ep->dst);
+ spin_lock_irqsave(&chp->comp_handler_lock, flag);
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
break;
case TPT_ERR_STAG:
@@ -179,12 +187,6 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
case TPT_ERR_BOUND:
case TPT_ERR_INVALIDATE_SHARED_MR:
case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
- "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
- CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
- CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index ec49a5cbdeb..5c36ee2809a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -29,6 +29,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include <linux/slab.h>
#include <asm/byteorder.h>
#include <rdma/iw_cm.h>
@@ -39,7 +40,7 @@
#include "iwch.h"
#include "iwch_provider.h"
-static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
{
u32 mmid;
@@ -47,14 +48,15 @@ static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift)
{
u32 stag;
+ int ret;
if (cxio_register_phys_mem(&rhp->rdev,
&stag, mhp->attr.pdid,
@@ -66,9 +68,11 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- iwch_finish_mem_reg(mhp, stag);
-
- return 0;
+ ret = iwch_finish_mem_reg(mhp, stag);
+ if (ret)
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ return ret;
}
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
@@ -77,6 +81,7 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
int npages)
{
u32 stag;
+ int ret;
/* We could support this... */
if (npages > mhp->attr.pbl_size)
@@ -93,9 +98,12 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- iwch_finish_mem_reg(mhp, stag);
+ ret = iwch_finish_mem_reg(mhp, stag);
+ if (ret)
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
- return 0;
+ return ret;
}
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index b89640aa6e1..811b24a539c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -37,9 +37,12 @@
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/list.h>
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+#include <linux/slab.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -58,13 +61,6 @@
#include "iwch_user.h"
#include "common.h"
-static int iwch_modify_port(struct ib_device *ibdev,
- u8 port, int port_modify_mask,
- struct ib_port_modify *props)
-{
- return -ENOSYS;
-}
-
static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
struct ib_ah_attr *ah_attr)
{
@@ -151,6 +147,8 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
struct iwch_create_cq_resp uresp;
struct iwch_create_cq_req ureq;
struct iwch_ucontext *ucontext = NULL;
+ static int warned;
+ size_t resplen;
PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
rhp = to_iwch_dev(ibdev);
@@ -185,16 +183,21 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
entries = roundup_pow_of_two(entries);
chp->cq.size_log2 = ilog2(entries);
- if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+ if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
kfree(chp);
return ERR_PTR(-ENOMEM);
}
chp->rhp = rhp;
chp->ibcq.cqe = 1 << chp->cq.size_log2;
spin_lock_init(&chp->lock);
+ spin_lock_init(&chp->comp_handler_lock);
atomic_set(&chp->refcnt, 1);
init_waitqueue_head(&chp->wait);
- insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+ if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
+ cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+ kfree(chp);
+ return ERR_PTR(-ENOMEM);
+ }
if (ucontext) {
struct iwch_mm_entry *mm;
@@ -210,15 +213,27 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
uresp.key = ucontext->key;
ucontext->key += PAGE_SIZE;
spin_unlock(&ucontext->mmap_lock);
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
+ mm->key = uresp.key;
+ mm->addr = virt_to_phys(chp->cq.queue);
+ if (udata->outlen < sizeof uresp) {
+ if (!warned++)
+ printk(KERN_WARNING MOD "Warning - "
+ "downlevel libcxgb3 (non-fatal).\n");
+ mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
+ sizeof(struct t3_cqe));
+ resplen = sizeof(struct iwch_create_cq_resp_v0);
+ } else {
+ mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
+ sizeof(struct t3_cqe));
+ uresp.memsize = mm->len;
+ uresp.reserved = 0;
+ resplen = sizeof uresp;
+ }
+ if (ib_copy_to_udata(udata, &uresp, resplen)) {
kfree(mm);
iwch_destroy_cq(&chp->ibcq);
return ERR_PTR(-EFAULT);
}
- mm->key = uresp.key;
- mm->addr = virt_to_phys(chp->cq.queue);
- mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
- sizeof (struct t3_cqe));
insert_mmap(ucontext, mm);
}
PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
@@ -545,7 +560,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
__be64 *page_list = NULL;
int shift = 0;
u64 total_size;
- int npages;
+ int npages = 0;
int ret;
PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
@@ -603,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
-
+ struct scatterlist *sg;
PDBG("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
@@ -630,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
- n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- n += chunk->nents;
+ n = mhp->umem->nmap;
err = iwch_alloc_pbl(mhp, n);
if (err)
@@ -646,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(
- &chunk->page_list[j]) +
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
@@ -661,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = 0;
}
}
- }
+ }
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
@@ -724,7 +734,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
return ibmr;
}
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
struct iwch_pd *php;
@@ -733,6 +743,9 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
u32 stag = 0;
int ret;
+ if (type != IB_MW_TYPE_1)
+ return ERR_PTR(-EINVAL);
+
php = to_iwch_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
@@ -749,7 +762,11 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
mhp->attr.stag = stag;
mmid = (stag) >> 8;
mhp->ibmw.rkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+ kfree(mhp);
+ return ERR_PTR(-ENOMEM);
+ }
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
}
@@ -765,8 +782,8 @@ static int iwch_dealloc_mw(struct ib_mw *mw)
mmid = (mw->rkey) >> 8;
cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
remove_handle(rhp, &rhp->mmidr, mmid);
- kfree(mhp);
PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ kfree(mhp);
return 0;
}
@@ -777,37 +794,43 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
struct iwch_mr *mhp;
u32 mmid;
u32 stag = 0;
- int ret;
+ int ret = 0;
php = to_iwch_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
- return ERR_PTR(-ENOMEM);
+ goto err;
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, pbl_depth);
- if (ret) {
- kfree(mhp);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto err1;
mhp->attr.pbl_size = pbl_depth;
ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret) {
- iwch_free_pbl(mhp);
- kfree(mhp);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto err2;
mhp->attr.pdid = php->pdid;
mhp->attr.type = TPT_NON_SHARED_MR;
mhp->attr.stag = stag;
mhp->attr.state = 1;
mmid = (stag) >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
+ goto err3;
+
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
+err3:
+ cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+err2:
+ iwch_free_pbl(mhp);
+err1:
+ kfree(mhp);
+err:
+ return ERR_PTR(ret);
}
static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
@@ -960,7 +983,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
spin_lock_init(&qhp->lock);
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
- insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+ if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
+ cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ kfree(qhp);
+ return ERR_PTR(-ENOMEM);
+ }
if (udata) {
@@ -1102,9 +1131,7 @@ static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
char *cp, *next;
unsigned fw_maj, fw_min, fw_mic;
- rtnl_lock();
lldev->ethtool_ops->get_drvinfo(lldev, &info);
- rtnl_unlock();
next = info.fw_version + 1;
cp = strsep(&next, ".");
@@ -1154,14 +1181,42 @@ static int iwch_query_device(struct ib_device *ibdev,
static int iwch_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
+ struct iwch_dev *dev;
+ struct net_device *netdev;
+ struct in_device *inetdev;
+
PDBG("%s ibdev %p\n", __func__, ibdev);
+
+ dev = to_iwch_dev(ibdev);
+ netdev = dev->rdev.port_info.lldevs[port-1];
+
+ memset(props, 0, sizeof(struct ib_port_attr));
props->max_mtu = IB_MTU_4096;
- props->lid = 0;
- props->lmc = 0;
- props->sm_lid = 0;
- props->sm_sl = 0;
- props->state = IB_PORT_ACTIVE;
- props->phys_state = 0;
+ if (netdev->mtu >= 4096)
+ props->active_mtu = IB_MTU_4096;
+ else if (netdev->mtu >= 2048)
+ props->active_mtu = IB_MTU_2048;
+ else if (netdev->mtu >= 1024)
+ props->active_mtu = IB_MTU_1024;
+ else if (netdev->mtu >= 512)
+ props->active_mtu = IB_MTU_512;
+ else
+ props->active_mtu = IB_MTU_256;
+
+ if (!netif_carrier_ok(netdev))
+ props->state = IB_PORT_DOWN;
+ else {
+ inetdev = in_dev_get(netdev);
+ if (inetdev) {
+ if (inetdev->ifa_list)
+ props->state = IB_PORT_ACTIVE;
+ else
+ props->state = IB_PORT_INIT;
+ in_dev_put(inetdev);
+ } else
+ props->state = IB_PORT_INIT;
+ }
+
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_SNMP_TUNNEL_SUP |
@@ -1170,9 +1225,8 @@ static int iwch_query_port(struct ib_device *ibdev,
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
- props->qkey_viol_cntr = 0;
props->active_width = 2;
- props->active_speed = 2;
+ props->active_speed = IB_SPEED_DDR;
props->max_msg_sz = -1;
return 0;
@@ -1187,28 +1241,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
}
-static int fw_supports_fastreg(struct iwch_dev *iwch_dev)
-{
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- char *cp, *next;
- unsigned fw_maj, fw_min;
-
- rtnl_lock();
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- rtnl_unlock();
-
- next = info.fw_version+1;
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_maj);
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_min);
-
- PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min);
-
- return fw_maj > 6 || (fw_maj == 6 && fw_min > 0);
-}
-
static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf)
{
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
@@ -1217,9 +1249,7 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, ch
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, dev);
- rtnl_lock();
lldev->ethtool_ops->get_drvinfo(lldev, &info);
- rtnl_unlock();
return sprintf(buf, "%s\n", info.fw_version);
}
@@ -1232,9 +1262,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
PDBG("%s dev 0x%p\n", __func__, dev);
- rtnl_lock();
lldev->ethtool_ops->get_drvinfo(lldev, &info);
- rtnl_unlock();
return sprintf(buf, "%s\n", info.driver);
}
@@ -1325,12 +1353,12 @@ int iwch_register_device(struct iwch_dev *dev)
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE;
- dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+ dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
/* cxgb3 supports STag 0. */
dev->ibdev.local_dma_lkey = 0;
- if (fw_supports_fastreg(dev))
- dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
dev->ibdev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1357,7 +1385,6 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
- dev->ibdev.modify_port = iwch_modify_port;
dev->ibdev.query_pkey = iwch_query_pkey;
dev->ibdev.query_gid = iwch_query_gid;
dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
@@ -1392,6 +1419,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.post_send = iwch_post_send;
dev->ibdev.post_recv = iwch_post_receive;
dev->ibdev.get_protocol_stats = iwch_get_mib;
+ dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
@@ -1406,7 +1434,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;
- ret = ib_register_device(&dev->ibdev);
+ ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto bail1;
@@ -1421,6 +1449,7 @@ int iwch_register_device(struct iwch_dev *dev)
bail2:
ib_unregister_device(&dev->ibdev);
bail1:
+ kfree(dev->ibdev.iwcm);
return ret;
}
@@ -1433,5 +1462,6 @@ void iwch_unregister_device(struct iwch_dev *dev)
device_remove_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
+ kfree(dev->ibdev.iwcm);
return;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index f5ceca05c43..87c14b0c5ac 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -103,6 +103,7 @@ struct iwch_cq {
struct iwch_dev *rhp;
struct t3_cq cq;
spinlock_t lock;
+ spinlock_t comp_handler_lock;
atomic_t refcnt;
wait_queue_head_t wait;
u32 __user *user_rptr_addr;
@@ -293,9 +294,16 @@ static inline u32 iwch_ib_to_tpt_access(int acc)
return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
+ (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) |
TPT_LOCAL_READ;
}
+static inline u32 iwch_ib_to_tpt_bind_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0);
+}
+
enum iwch_mmid_state {
IWCH_STAG_STATE_VALID,
IWCH_STAG_STATE_INVALID
@@ -325,11 +333,9 @@ int iwch_bind_mw(struct ib_qp *qp,
struct ib_mw_bind *mw_bind);
int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
-int iwch_post_zb_read(struct iwch_qp *qhp);
+int iwch_post_zb_read(struct iwch_ep *ep);
int iwch_register_device(struct iwch_dev *dev);
void iwch_unregister_device(struct iwch_dev *dev);
-int iwch_quiesce_qps(struct iwch_cq *chp);
-int iwch_resume_qps(struct iwch_cq *chp);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 9a3be3a9d5d..b57c0befd96 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -29,6 +29,8 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include <linux/sched.h>
+#include <linux/gfp.h>
#include "iwch_provider.h"
#include "iwch.h"
#include "iwch_cm.h"
@@ -99,8 +101,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
plen = 4;
wqe->write.sgl[0].stag = wr->ex.imm_data;
- wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
- wqe->write.num_sgle = __constant_cpu_to_be32(0);
+ wqe->write.sgl[0].len = cpu_to_be32(0);
+ wqe->write.num_sgle = cpu_to_be32(0);
*flit_cnt = 6;
} else {
plen = 0;
@@ -195,15 +197,12 @@ static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
return 0;
}
-/*
- * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
- */
static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
u32 num_sgle, u32 * pbl_addr, u8 * page_size)
{
int i;
struct iwch_mr *mhp;
- u32 offset;
+ u64 offset;
for (i = 0; i < num_sgle; i++) {
mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
@@ -235,8 +234,8 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
return -EINVAL;
}
offset = sg_list[i].addr - mhp->attr.va_fbo;
- offset += ((u32) mhp->attr.va_fbo) %
- (1UL << (12 + mhp->attr.page_size));
+ offset += mhp->attr.va_fbo &
+ ((1UL << (12 + mhp->attr.page_size)) - 1);
pbl_addr[i] = ((mhp->attr.pbl_addr -
rhp->rdev.rnic_info.pbl_base) >> 3) +
(offset >> (12 + mhp->attr.page_size));
@@ -266,8 +265,8 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
/* to in the WQE == the offset into the page */
- wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
- (1UL << (12 + page_size[i])));
+ wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) &
+ ((1UL << (12 + page_size[i])) - 1));
/* pbl_addr is the adapters address in the PBL */
wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
@@ -367,18 +366,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
- if (num_wrs <= 0) {
+ if (num_wrs == 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
- *bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -430,10 +430,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wr->opcode);
err = -EINVAL;
}
- if (err) {
- *bad_wr = wr;
+ if (err)
break;
- }
wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
sqp->wr_id = wr->wr_id;
sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -455,7 +453,12 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
++(qhp->wq.sq_wptr);
}
spin_unlock_irqrestore(&qhp->lock, flag);
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ if (cxio_wq_db_enabled(&qhp->wq))
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
@@ -473,18 +476,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qhp->lock, flag);
if (qhp->attr.state > IWCH_QP_STATE_RTS) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -EINVAL;
+ goto out;
}
num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
qhp->wq.rq_size_log2) - 1;
if (!wr) {
spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
+ err = -ENOMEM;
+ goto out;
}
while (wr) {
if (wr->num_sge > T3_MAX_SGE) {
err = -EINVAL;
- *bad_wr = wr;
break;
}
idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -496,10 +500,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
err = build_zero_stag_recv(qhp, wqe, wr);
else
err = -ENOMEM;
- if (err) {
- *bad_wr = wr;
+
+ if (err)
break;
- }
+
build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@@ -512,7 +516,12 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
num_wrs--;
}
spin_unlock_irqrestore(&qhp->lock, flag);
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ if (cxio_wq_db_enabled(&qhp->wq))
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+ if (err)
+ *bad_wr = wr;
return err;
}
@@ -545,7 +554,7 @@ int iwch_bind_mw(struct ib_qp *qp,
}
num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
qhp->wq.sq_size_log2);
- if ((num_wrs) <= 0) {
+ if (num_wrs == 0) {
spin_unlock_irqrestore(&qhp->lock, flag);
return -ENOMEM;
}
@@ -558,18 +567,19 @@ int iwch_bind_mw(struct ib_qp *qp,
if (mw_bind->send_flags & IB_SEND_SIGNALED)
t3_wr_flags = T3_COMPLETION_FLAG;
- sgl.addr = mw_bind->addr;
- sgl.lkey = mw_bind->mr->lkey;
- sgl.length = mw_bind->length;
+ sgl.addr = mw_bind->bind_info.addr;
+ sgl.lkey = mw_bind->bind_info.mr->lkey;
+ sgl.length = mw_bind->bind_info.length;
wqe->bind.reserved = 0;
wqe->bind.type = TPT_VATO;
/* TBD: check perms */
- wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags);
- wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
+ wqe->bind.perms = iwch_ib_to_tpt_bind_access(
+ mw_bind->bind_info.mw_access_flags);
+ wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
- wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
- wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
+ wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
+ wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
if (err) {
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -591,7 +601,8 @@ int iwch_bind_mw(struct ib_qp *qp,
++(qhp->wq.sq_wptr);
spin_unlock_irqrestore(&qhp->lock, flag);
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ if (cxio_wq_db_enabled(&qhp->wq))
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
return err;
}
@@ -728,7 +739,7 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
}
}
-int iwch_post_zb_read(struct iwch_qp *qhp)
+int iwch_post_zb_read(struct iwch_ep *ep)
{
union t3_wr *wqe;
struct sk_buff *skb;
@@ -745,17 +756,16 @@ int iwch_post_zb_read(struct iwch_qp *qhp)
wqe->read.rdmaop = T3_READ_REQ;
wqe->read.reserved[0] = 0;
wqe->read.reserved[1] = 0;
- wqe->read.reserved[2] = 0;
wqe->read.rem_stag = cpu_to_be32(1);
wqe->read.rem_to = cpu_to_be64(1);
wqe->read.local_stag = cpu_to_be32(1);
wqe->read.local_len = cpu_to_be32(0);
wqe->read.local_to = cpu_to_be64(1);
wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
- wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
+ wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)|
V_FW_RIWR_LEN(flit_cnt));
skb->priority = CPL_PRIORITY_DATA;
- return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+ return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb);
}
/*
@@ -787,61 +797,82 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
skb->priority = CPL_PRIORITY_DATA;
- return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+ return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
}
/*
* Assumes qhp lock is held.
*/
-static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
+ struct iwch_cq *schp)
{
- struct iwch_cq *rchp, *schp;
int count;
int flushed;
- rchp = get_chp(qhp->rhp, qhp->attr.rcq);
- schp = get_chp(qhp->rhp, qhp->attr.scq);
PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
/* take a ref on the qhp since we must release the lock */
atomic_inc(&qhp->refcnt);
- spin_unlock_irqrestore(&qhp->lock, *flag);
+ spin_unlock(&qhp->lock);
- /* locking heirarchy: cq lock first, then qp lock. */
- spin_lock_irqsave(&rchp->lock, *flag);
+ /* locking hierarchy: cq lock first, then qp lock. */
+ spin_lock(&rchp->lock);
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&rchp->cq);
cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
spin_unlock(&qhp->lock);
- spin_unlock_irqrestore(&rchp->lock, *flag);
- if (flushed)
+ spin_unlock(&rchp->lock);
+ if (flushed) {
+ spin_lock(&rchp->comp_handler_lock);
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ spin_unlock(&rchp->comp_handler_lock);
+ }
- /* locking heirarchy: cq lock first, then qp lock. */
- spin_lock_irqsave(&schp->lock, *flag);
+ /* locking hierarchy: cq lock first, then qp lock. */
+ spin_lock(&schp->lock);
spin_lock(&qhp->lock);
cxio_flush_hw_cq(&schp->cq);
cxio_count_scqes(&schp->cq, &qhp->wq, &count);
flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
spin_unlock(&qhp->lock);
- spin_unlock_irqrestore(&schp->lock, *flag);
- if (flushed)
+ spin_unlock(&schp->lock);
+ if (flushed) {
+ spin_lock(&schp->comp_handler_lock);
(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ spin_unlock(&schp->comp_handler_lock);
+ }
/* deref */
if (atomic_dec_and_test(&qhp->refcnt))
wake_up(&qhp->wait);
- spin_lock_irqsave(&qhp->lock, *flag);
+ spin_lock(&qhp->lock);
}
-static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+static void flush_qp(struct iwch_qp *qhp)
{
- if (qhp->ibqp.uobject)
+ struct iwch_cq *rchp, *schp;
+
+ rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+ schp = get_chp(qhp->rhp, qhp->attr.scq);
+
+ if (qhp->ibqp.uobject) {
cxio_set_wq_in_error(&qhp->wq);
- else
- __flush_qp(qhp, flag);
+ cxio_set_cq_in_error(&rchp->cq);
+ spin_lock(&rchp->comp_handler_lock);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ spin_unlock(&rchp->comp_handler_lock);
+ if (schp != rchp) {
+ cxio_set_cq_in_error(&schp->cq);
+ spin_lock(&schp->comp_handler_lock);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ spin_unlock(&schp->comp_handler_lock);
+ }
+ return;
+ }
+ __flush_qp(qhp, rchp, schp);
}
@@ -852,7 +883,8 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp)
{
union t3_wr *wqe = qhp->wq.queue;
u16 count = 0;
- while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+
+ while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
count++;
wqe++;
}
@@ -879,20 +911,13 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
(qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
(qhp->attr.mpa_attr.crc_enabled << 2);
- /*
- * XXX - The IWCM doesn't quite handle getting these
- * attrs set before going into RTS. For now, just turn
- * them on always...
- */
-#if 0
- init_attr.qpcaps = qhp->attr.enableRdmaRead |
- (qhp->attr.enableRdmaWrite << 1) |
- (qhp->attr.enableBind << 2) |
- (qhp->attr.enable_stag0_fastreg << 3) |
- (qhp->attr.enable_stag0_fastreg << 4);
-#else
- init_attr.qpcaps = 0x1f;
-#endif
+ init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
+ uP_RI_QP_RDMA_WRITE_ENABLE |
+ uP_RI_QP_BIND_ENABLE;
+ if (!qhp->ibqp.uobject)
+ init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE |
+ uP_RI_QP_FAST_REGISTER_ENABLE;
+
init_attr.tcp_emss = qhp->ep->emss;
init_attr.ord = qhp->attr.max_ord;
init_attr.ird = qhp->attr.max_ird;
@@ -900,8 +925,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
init_attr.rqe_count = iwch_rqes_posted(qhp);
init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
- if (!qhp->ibqp.uobject)
- init_attr.flags |= PRIV_QP;
+ init_attr.chan = qhp->ep->l2t->smt_idx;
if (peer2peer) {
init_attr.rtr_type = RTR_READ;
if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
@@ -1008,7 +1032,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
break;
case IWCH_QP_STATE_ERROR:
qhp->attr.state = IWCH_QP_STATE_ERROR;
- flush_qp(qhp, &flag);
+ flush_qp(qhp);
break;
default:
ret = -EINVAL;
@@ -1056,7 +1080,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
}
switch (attrs->next_state) {
case IWCH_QP_STATE_IDLE:
- flush_qp(qhp, &flag);
+ flush_qp(qhp);
qhp->attr.state = IWCH_QP_STATE_IDLE;
qhp->attr.llp_stream_handle = NULL;
put_ep(&qhp->ep->com);
@@ -1082,7 +1106,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
goto out;
}
qhp->attr.state = IWCH_QP_STATE_IDLE;
- memset(&qhp->attr, 0, sizeof(qhp->attr));
break;
case IWCH_QP_STATE_TERMINATE:
if (!internal) {
@@ -1111,7 +1134,7 @@ err:
free=1;
wake_up(&qhp->wait);
BUG_ON(!ep);
- flush_qp(qhp, &flag);
+ flush_qp(qhp);
out:
spin_unlock_irqrestore(&qhp->lock, flag);
@@ -1138,59 +1161,3 @@ out:
PDBG("%s exit state %d\n", __func__, qhp->attr.state);
return ret;
}
-
-static int quiesce_qp(struct iwch_qp *qhp)
-{
- spin_lock_irq(&qhp->lock);
- iwch_quiesce_tid(qhp->ep);
- qhp->flags |= QP_QUIESCED;
- spin_unlock_irq(&qhp->lock);
- return 0;
-}
-
-static int resume_qp(struct iwch_qp *qhp)
-{
- spin_lock_irq(&qhp->lock);
- iwch_resume_tid(qhp->ep);
- qhp->flags &= ~QP_QUIESCED;
- spin_unlock_irq(&qhp->lock);
- return 0;
-}
-
-int iwch_quiesce_qps(struct iwch_cq *chp)
-{
- int i;
- struct iwch_qp *qhp;
-
- for (i=0; i < T3_MAX_NUM_QP; i++) {
- qhp = get_qhp(chp->rhp, i);
- if (!qhp)
- continue;
- if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
- quiesce_qp(qhp);
- continue;
- }
- if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
- quiesce_qp(qhp);
- }
- return 0;
-}
-
-int iwch_resume_qps(struct iwch_cq *chp)
-{
- int i;
- struct iwch_qp *qhp;
-
- for (i=0; i < T3_MAX_NUM_QP; i++) {
- qhp = get_qhp(chp->rhp, i);
- if (!qhp)
- continue;
- if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
- resume_qp(qhp);
- continue;
- }
- if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
- resume_qp(qhp);
- }
- return 0;
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
index cb7086f558c..a277c31fcaf 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_user.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -45,10 +45,18 @@ struct iwch_create_cq_req {
__u64 user_rptr_addr;
};
+struct iwch_create_cq_resp_v0 {
+ __u64 key;
+ __u32 cqid;
+ __u32 size_log2;
+};
+
struct iwch_create_cq_resp {
__u64 key;
__u32 cqid;
__u32 size_log2;
+ __u32 memsize;
+ __u32 reserved;
};
struct iwch_create_qp_resp {
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
new file mode 100644
index 00000000000..23f38cf2c5c
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -0,0 +1,18 @@
+config INFINIBAND_CXGB4
+ tristate "Chelsio T4/T5 RDMA Driver"
+ depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
+ select GENERIC_ALLOCATOR
+ ---help---
+ This is an iWARP/RDMA driver for the Chelsio T4 and T5
+ 1GbE, 10GbE adapters and T5 40GbE adapter.
+
+ For general information about Chelsio and our products, visit
+ our website at <http://www.chelsio.com>.
+
+ For customer support, please visit our customer support page at
+ <http://www.chelsio.com/support.html>.
+
+ Please send feedback to <linux-bugs@chelsio.com>.
+
+ To compile this driver as a module, choose M here: the module
+ will be called iw_cxgb4.
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
new file mode 100644
index 00000000000..e11cf729994
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -0,0 +1,5 @@
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+
+obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
+
+iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
new file mode 100644
index 00000000000..768a0fb67dd
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -0,0 +1,3933 @@
+/*
+ * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+
+#include <net/neighbour.h>
+#include <net/netevent.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+
+#include <rdma/ib_addr.h>
+
+#include "iw_cxgb4.h"
+
+static char *states[] = {
+ "idle",
+ "listen",
+ "connecting",
+ "mpa_wait_req",
+ "mpa_req_sent",
+ "mpa_req_rcvd",
+ "mpa_rep_sent",
+ "fpdu_mode",
+ "aborting",
+ "closing",
+ "moribund",
+ "dead",
+ NULL,
+};
+
+static int nocong;
+module_param(nocong, int, 0644);
+MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
+
+static int enable_ecn;
+module_param(enable_ecn, int, 0644);
+MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
+
+static int dack_mode = 1;
+module_param(dack_mode, int, 0644);
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
+
+int c4iw_max_read_depth = 8;
+module_param(c4iw_max_read_depth, int, 0644);
+MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)");
+
+static int enable_tcp_timestamps;
+module_param(enable_tcp_timestamps, int, 0644);
+MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
+
+static int enable_tcp_sack;
+module_param(enable_tcp_sack, int, 0644);
+MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
+
+static int enable_tcp_window_scaling = 1;
+module_param(enable_tcp_window_scaling, int, 0644);
+MODULE_PARM_DESC(enable_tcp_window_scaling,
+ "Enable tcp window scaling (default=1)");
+
+int c4iw_debug;
+module_param(c4iw_debug, int, 0644);
+MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
+
+static int peer2peer = 1;
+module_param(peer2peer, int, 0644);
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
+
+static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
+module_param(p2p_type, int, 0644);
+MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
+ "1=RDMA_READ 0=RDMA_WRITE (default 1)");
+
+static int ep_timeout_secs = 60;
+module_param(ep_timeout_secs, int, 0644);
+MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
+ "in seconds (default=60)");
+
+static int mpa_rev = 1;
+module_param(mpa_rev, int, 0644);
+MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
+ "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
+ " compliant (default=1)");
+
+static int markers_enabled;
+module_param(markers_enabled, int, 0644);
+MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
+
+static int crc_enabled = 1;
+module_param(crc_enabled, int, 0644);
+MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
+
+static int rcv_win = 256 * 1024;
+module_param(rcv_win, int, 0644);
+MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
+
+static int snd_win = 128 * 1024;
+module_param(snd_win, int, 0644);
+MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
+
+static struct workqueue_struct *workq;
+
+static struct sk_buff_head rxq;
+
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
+static void ep_timeout(unsigned long arg);
+static void connect_reply_upcall(struct c4iw_ep *ep, int status);
+
+static LIST_HEAD(timeout_list);
+static spinlock_t timeout_lock;
+
+static void deref_qp(struct c4iw_ep *ep)
+{
+ c4iw_qp_rem_ref(&ep->com.qp->ibqp);
+ clear_bit(QP_REFERENCED, &ep->com.flags);
+}
+
+static void ref_qp(struct c4iw_ep *ep)
+{
+ set_bit(QP_REFERENCED, &ep->com.flags);
+ c4iw_qp_add_ref(&ep->com.qp->ibqp);
+}
+
+static void start_ep_timer(struct c4iw_ep *ep)
+{
+ PDBG("%s ep %p\n", __func__, ep);
+ if (timer_pending(&ep->timer)) {
+ pr_err("%s timer already started! ep %p\n",
+ __func__, ep);
+ return;
+ }
+ clear_bit(TIMEOUT, &ep->com.flags);
+ c4iw_get_ep(&ep->com);
+ ep->timer.expires = jiffies + ep_timeout_secs * HZ;
+ ep->timer.data = (unsigned long)ep;
+ ep->timer.function = ep_timeout;
+ add_timer(&ep->timer);
+}
+
+static int stop_ep_timer(struct c4iw_ep *ep)
+{
+ PDBG("%s ep %p stopping\n", __func__, ep);
+ del_timer_sync(&ep->timer);
+ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+ c4iw_put_ep(&ep->com);
+ return 0;
+ }
+ return 1;
+}
+
+static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
+ struct l2t_entry *l2e)
+{
+ int error = 0;
+
+ if (c4iw_fatal_error(rdev)) {
+ kfree_skb(skb);
+ PDBG("%s - device in error state - dropping\n", __func__);
+ return -EIO;
+ }
+ error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
+ if (error < 0)
+ kfree_skb(skb);
+ return error < 0 ? error : 0;
+}
+
+int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
+{
+ int error = 0;
+
+ if (c4iw_fatal_error(rdev)) {
+ kfree_skb(skb);
+ PDBG("%s - device in error state - dropping\n", __func__);
+ return -EIO;
+ }
+ error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
+ if (error < 0)
+ kfree_skb(skb);
+ return error < 0 ? error : 0;
+}
+
+static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
+{
+ struct cpl_tid_release *req;
+
+ skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ if (!skb)
+ return;
+ req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
+ INIT_TP_WR(req, hwtid);
+ OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
+ set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+ c4iw_ofld_send(rdev, skb);
+ return;
+}
+
+static void set_emss(struct c4iw_ep *ep, u16 opt)
+{
+ ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
+ sizeof(struct iphdr) - sizeof(struct tcphdr);
+ ep->mss = ep->emss;
+ if (GET_TCPOPT_TSTAMP(opt))
+ ep->emss -= 12;
+ if (ep->emss < 128)
+ ep->emss = 128;
+ if (ep->emss & 7)
+ PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ GET_TCPOPT_MSS(opt), ep->mss, ep->emss);
+ PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
+ ep->mss, ep->emss);
+}
+
+static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
+{
+ enum c4iw_ep_state state;
+
+ mutex_lock(&epc->mutex);
+ state = epc->state;
+ mutex_unlock(&epc->mutex);
+ return state;
+}
+
+static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+ epc->state = new;
+}
+
+static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+ mutex_lock(&epc->mutex);
+ PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ __state_set(epc, new);
+ mutex_unlock(&epc->mutex);
+ return;
+}
+
+static void *alloc_ep(int size, gfp_t gfp)
+{
+ struct c4iw_ep_common *epc;
+
+ epc = kzalloc(size, gfp);
+ if (epc) {
+ kref_init(&epc->kref);
+ mutex_init(&epc->mutex);
+ c4iw_init_wr_wait(&epc->wr_wait);
+ }
+ PDBG("%s alloc ep %p\n", __func__, epc);
+ return epc;
+}
+
+void _c4iw_free_ep(struct kref *kref)
+{
+ struct c4iw_ep *ep;
+
+ ep = container_of(kref, struct c4iw_ep, com.kref);
+ PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+ if (test_bit(QP_REFERENCED, &ep->com.flags))
+ deref_qp(ep);
+ if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
+ remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ }
+ if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
+ print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
+ iwpm_remove_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr);
+ iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+ }
+ kfree(ep);
+}
+
+static void release_ep_resources(struct c4iw_ep *ep)
+{
+ set_bit(RELEASE_RESOURCES, &ep->com.flags);
+ c4iw_put_ep(&ep->com);
+}
+
+static int status2errno(int status)
+{
+ switch (status) {
+ case CPL_ERR_NONE:
+ return 0;
+ case CPL_ERR_CONN_RESET:
+ return -ECONNRESET;
+ case CPL_ERR_ARP_MISS:
+ return -EHOSTUNREACH;
+ case CPL_ERR_CONN_TIMEDOUT:
+ return -ETIMEDOUT;
+ case CPL_ERR_TCAM_FULL:
+ return -ENOMEM;
+ case CPL_ERR_CONN_EXIST:
+ return -EADDRINUSE;
+ default:
+ return -EIO;
+ }
+}
+
+/*
+ * Try and reuse skbs already allocated...
+ */
+static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
+{
+ if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
+ skb_trim(skb, 0);
+ skb_get(skb);
+ skb_reset_transport_header(skb);
+ } else {
+ skb = alloc_skb(len, gfp);
+ }
+ t4_set_arp_err_handler(skb, NULL, NULL);
+ return skb;
+}
+
+static struct net_device *get_real_dev(struct net_device *egress_dev)
+{
+ return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
+}
+
+static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
+{
+ int i;
+
+ egress_dev = get_real_dev(egress_dev);
+ for (i = 0; i < dev->rdev.lldi.nports; i++)
+ if (dev->rdev.lldi.ports[i] == egress_dev)
+ return 1;
+ return 0;
+}
+
+static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
+ __u8 *peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos,
+ __u32 sin6_scope_id)
+{
+ struct dst_entry *dst = NULL;
+
+ if (IS_ENABLED(CONFIG_IPV6)) {
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ memcpy(&fl6.daddr, peer_ip, 16);
+ memcpy(&fl6.saddr, local_ip, 16);
+ if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = sin6_scope_id;
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ if (!dst)
+ goto out;
+ if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
+ !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
+ dst_release(dst);
+ dst = NULL;
+ }
+ }
+
+out:
+ return dst;
+}
+
+static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
+ __be32 peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos)
+{
+ struct rtable *rt;
+ struct flowi4 fl4;
+ struct neighbour *n;
+
+ rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
+ peer_port, local_port, IPPROTO_TCP,
+ tos, 0);
+ if (IS_ERR(rt))
+ return NULL;
+ n = dst_neigh_lookup(&rt->dst, &peer_ip);
+ if (!n)
+ return NULL;
+ if (!our_interface(dev, n->dev) &&
+ !(n->dev->flags & IFF_LOOPBACK)) {
+ dst_release(&rt->dst);
+ return NULL;
+ }
+ neigh_release(n);
+ return &rt->dst;
+}
+
+static void arp_failure_discard(void *handle, struct sk_buff *skb)
+{
+ PDBG("%s c4iw_dev %p\n", __func__, handle);
+ kfree_skb(skb);
+}
+
+/*
+ * Handle an ARP failure for an active open.
+ */
+static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep = handle;
+
+ printk(KERN_ERR MOD "ARP failure duing connect\n");
+ kfree_skb(skb);
+ connect_reply_upcall(ep, -EHOSTUNREACH);
+ state_set(&ep->com, DEAD);
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ c4iw_put_ep(&ep->com);
+}
+
+/*
+ * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
+ * and send it along.
+ */
+static void abort_arp_failure(void *handle, struct sk_buff *skb)
+{
+ struct c4iw_rdev *rdev = handle;
+ struct cpl_abort_req *req = cplhdr(skb);
+
+ PDBG("%s rdev %p\n", __func__, rdev);
+ req->cmd = CPL_ABORT_NO_RST;
+ c4iw_ofld_send(rdev, skb);
+}
+
+static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
+{
+ unsigned int flowclen = 80;
+ struct fw_flowc_wr *flowc;
+ int i;
+
+ skb = get_skb(skb, flowclen, GFP_KERNEL);
+ flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
+
+ flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) |
+ FW_FLOWC_WR_NPARAMS(8));
+ flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen,
+ 16)) | FW_WR_FLOWID(ep->hwtid));
+
+ flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
+ flowc->mnemval[0].val = cpu_to_be32(PCI_FUNC(ep->com.dev->rdev.lldi.pdev->devfn) << 8);
+ flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
+ flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
+ flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
+ flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
+ flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
+ flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
+ flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
+ flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
+ flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
+ flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
+ flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
+ flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
+ flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
+ flowc->mnemval[7].val = cpu_to_be32(ep->emss);
+ /* Pad WR to 16 byte boundary */
+ flowc->mnemval[8].mnemonic = 0;
+ flowc->mnemval[8].val = 0;
+ for (i = 0; i < 9; i++) {
+ flowc->mnemval[i].r4[0] = 0;
+ flowc->mnemval[i].r4[1] = 0;
+ flowc->mnemval[i].r4[2] = 0;
+ }
+
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+ c4iw_ofld_send(&ep->com.dev->rdev, skb);
+}
+
+static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
+{
+ struct cpl_close_con_req *req;
+ struct sk_buff *skb;
+ int wrlen = roundup(sizeof *req, 16);
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ skb = get_skb(NULL, wrlen, gfp);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
+ return -ENOMEM;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+ t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
+ memset(req, 0, wrlen);
+ INIT_TP_WR(req, ep->hwtid);
+ OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
+ ep->hwtid));
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+}
+
+static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+ struct cpl_abort_req *req;
+ int wrlen = roundup(sizeof *req, 16);
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ skb = get_skb(skb, wrlen, gfp);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+ __func__);
+ return -ENOMEM;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+ t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
+ req = (struct cpl_abort_req *) skb_put(skb, wrlen);
+ memset(req, 0, wrlen);
+ INIT_TP_WR(req, ep->hwtid);
+ OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
+ req->cmd = CPL_ABORT_SEND_RST;
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+}
+
+/*
+ * c4iw_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void c4iw_form_pm_msg(struct c4iw_ep *ep,
+ struct iwpm_sa_data *pm_msg)
+{
+ memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
+}
+
+/*
+ * c4iw_form_reg_msg - Form a port mapper message with dev info
+ */
+static void c4iw_form_reg_msg(struct c4iw_dev *dev,
+ struct iwpm_dev_data *pm_msg)
+{
+ memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
+ memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
+ IWPM_IFNAME_SIZE);
+}
+
+static void c4iw_record_pm_msg(struct c4iw_ep *ep,
+ struct iwpm_sa_data *pm_msg)
+{
+ memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
+ sizeof(ep->com.mapped_local_addr));
+ memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
+ sizeof(ep->com.mapped_remote_addr));
+}
+
+static void best_mtu(const unsigned short *mtus, unsigned short mtu,
+ unsigned int *idx, int use_ts)
+{
+ unsigned short hdr_size = sizeof(struct iphdr) +
+ sizeof(struct tcphdr) +
+ (use_ts ? 12 : 0);
+ unsigned short data_size = mtu - hdr_size;
+
+ cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+}
+
+static int send_connect(struct c4iw_ep *ep)
+{
+ struct cpl_act_open_req *req;
+ struct cpl_t5_act_open_req *t5_req;
+ struct cpl_act_open_req6 *req6;
+ struct cpl_t5_act_open_req6 *t5_req6;
+ struct sk_buff *skb;
+ u64 opt0;
+ u32 opt2;
+ unsigned int mtu_idx;
+ int wscale;
+ int wrlen;
+ int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
+ sizeof(struct cpl_act_open_req) :
+ sizeof(struct cpl_t5_act_open_req);
+ int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
+ sizeof(struct cpl_act_open_req6) :
+ sizeof(struct cpl_t5_act_open_req6);
+ struct sockaddr_in *la = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in *ra = (struct sockaddr_in *)
+ &ep->com.mapped_remote_addr;
+ struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_remote_addr;
+ int win;
+
+ wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
+ roundup(sizev4, 16) :
+ roundup(sizev6, 16);
+
+ PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
+
+ skb = get_skb(NULL, wrlen, GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
+ __func__);
+ return -ENOMEM;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
+
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
+ wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
+
+ opt0 = (nocong ? NO_CONG(1) : 0) |
+ KEEP_ALIVE(1) |
+ DELACK(1) |
+ WND_SCALE(wscale) |
+ MSS_IDX(mtu_idx) |
+ L2T_IDX(ep->l2t->idx) |
+ TX_CHAN(ep->tx_chan) |
+ SMAC_SEL(ep->smac_idx) |
+ DSCP(ep->tos) |
+ ULP_MODE(ULP_MODE_TCPDDP) |
+ RCV_BUFSIZ(win);
+ opt2 = RX_CHANNEL(0) |
+ CCTRL_ECN(enable_ecn) |
+ RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
+ if (enable_tcp_timestamps)
+ opt2 |= TSTAMPS_EN(1);
+ if (enable_tcp_sack)
+ opt2 |= SACK_EN(1);
+ if (wscale && enable_tcp_window_scaling)
+ opt2 |= WND_SCALE_EN(1);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ }
+ t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
+
+ if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
+ if (ep->com.remote_addr.ss_family == AF_INET) {
+ req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
+ INIT_TP_WR(req, 0);
+ OPCODE_TID(req) = cpu_to_be32(
+ MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+ ((ep->rss_qid << 14) | ep->atid)));
+ req->local_port = la->sin_port;
+ req->peer_port = ra->sin_port;
+ req->local_ip = la->sin_addr.s_addr;
+ req->peer_ip = ra->sin_addr.s_addr;
+ req->opt0 = cpu_to_be64(opt0);
+ req->params = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
+ req->opt2 = cpu_to_be32(opt2);
+ } else {
+ req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
+
+ INIT_TP_WR(req6, 0);
+ OPCODE_TID(req6) = cpu_to_be32(
+ MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ ((ep->rss_qid<<14)|ep->atid)));
+ req6->local_port = la6->sin6_port;
+ req6->peer_port = ra6->sin6_port;
+ req6->local_ip_hi = *((__be64 *)
+ (la6->sin6_addr.s6_addr));
+ req6->local_ip_lo = *((__be64 *)
+ (la6->sin6_addr.s6_addr + 8));
+ req6->peer_ip_hi = *((__be64 *)
+ (ra6->sin6_addr.s6_addr));
+ req6->peer_ip_lo = *((__be64 *)
+ (ra6->sin6_addr.s6_addr + 8));
+ req6->opt0 = cpu_to_be64(opt0);
+ req6->params = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
+ req6->opt2 = cpu_to_be32(opt2);
+ }
+ } else {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ if (peer2peer)
+ isn += 4;
+
+ if (ep->com.remote_addr.ss_family == AF_INET) {
+ t5_req = (struct cpl_t5_act_open_req *)
+ skb_put(skb, wrlen);
+ INIT_TP_WR(t5_req, 0);
+ OPCODE_TID(t5_req) = cpu_to_be32(
+ MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+ ((ep->rss_qid << 14) | ep->atid)));
+ t5_req->local_port = la->sin_port;
+ t5_req->peer_port = ra->sin_port;
+ t5_req->local_ip = la->sin_addr.s_addr;
+ t5_req->peer_ip = ra->sin_addr.s_addr;
+ t5_req->opt0 = cpu_to_be64(opt0);
+ t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t)));
+ t5_req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req->rsvd));
+ t5_req->opt2 = cpu_to_be32(opt2);
+ } else {
+ t5_req6 = (struct cpl_t5_act_open_req6 *)
+ skb_put(skb, wrlen);
+ INIT_TP_WR(t5_req6, 0);
+ OPCODE_TID(t5_req6) = cpu_to_be32(
+ MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ ((ep->rss_qid<<14)|ep->atid)));
+ t5_req6->local_port = la6->sin6_port;
+ t5_req6->peer_port = ra6->sin6_port;
+ t5_req6->local_ip_hi = *((__be64 *)
+ (la6->sin6_addr.s6_addr));
+ t5_req6->local_ip_lo = *((__be64 *)
+ (la6->sin6_addr.s6_addr + 8));
+ t5_req6->peer_ip_hi = *((__be64 *)
+ (ra6->sin6_addr.s6_addr));
+ t5_req6->peer_ip_lo = *((__be64 *)
+ (ra6->sin6_addr.s6_addr + 8));
+ t5_req6->opt0 = cpu_to_be64(opt0);
+ t5_req6->params = (__force __be64)cpu_to_be32(
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
+ t5_req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req6->rsvd));
+ t5_req6->opt2 = cpu_to_be32(opt2);
+ }
+ }
+
+ set_bit(ACT_OPEN_REQ, &ep->com.history);
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+}
+
+static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
+ u8 mpa_rev_to_use)
+{
+ int mpalen, wrlen;
+ struct fw_ofld_tx_data_wr *req;
+ struct mpa_message *mpa;
+ struct mpa_v2_conn_params mpa_v2_params;
+
+ PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+
+ BUG_ON(skb_cloned(skb));
+
+ mpalen = sizeof(*mpa) + ep->plen;
+ if (mpa_rev_to_use == 2)
+ mpalen += sizeof(struct mpa_v2_conn_params);
+ wrlen = roundup(mpalen + sizeof *req, 16);
+ skb = get_skb(skb, wrlen, GFP_KERNEL);
+ if (!skb) {
+ connect_reply_upcall(ep, -ENOMEM);
+ return;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+
+ req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
+ memset(req, 0, wrlen);
+ req->op_to_immdlen = cpu_to_be32(
+ FW_WR_OP(FW_OFLD_TX_DATA_WR) |
+ FW_WR_COMPL(1) |
+ FW_WR_IMMDLEN(mpalen));
+ req->flowid_len16 = cpu_to_be32(
+ FW_WR_FLOWID(ep->hwtid) |
+ FW_WR_LEN16(wrlen >> 4));
+ req->plen = cpu_to_be32(mpalen);
+ req->tunnel_to_proxy = cpu_to_be32(
+ FW_OFLD_TX_DATA_WR_FLUSH(1) |
+ FW_OFLD_TX_DATA_WR_SHOVE(1));
+
+ mpa = (struct mpa_message *)(req + 1);
+ memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+ mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0) |
+ (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
+ mpa->private_data_size = htons(ep->plen);
+ mpa->revision = mpa_rev_to_use;
+ if (mpa_rev_to_use == 1) {
+ ep->tried_with_mpa_v1 = 1;
+ ep->retry_with_mpa_v1 = 0;
+ }
+
+ if (mpa_rev_to_use == 2) {
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof (struct mpa_v2_conn_params));
+ mpa_v2_params.ird = htons((u16)ep->ird);
+ mpa_v2_params.ord = htons((u16)ep->ord);
+
+ if (peer2peer) {
+ mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+ if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_WRITE_RTR);
+ else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_READ_RTR);
+ }
+ memcpy(mpa->private_data, &mpa_v2_params,
+ sizeof(struct mpa_v2_conn_params));
+
+ if (ep->plen)
+ memcpy(mpa->private_data +
+ sizeof(struct mpa_v2_conn_params),
+ ep->mpa_pkt + sizeof(*mpa), ep->plen);
+ } else
+ if (ep->plen)
+ memcpy(mpa->private_data,
+ ep->mpa_pkt + sizeof(*mpa), ep->plen);
+
+ /*
+ * Reference the mpa skb. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function fw4_ack() will deref it.
+ */
+ skb_get(skb);
+ t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ BUG_ON(ep->mpa_skb);
+ ep->mpa_skb = skb;
+ c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+ start_ep_timer(ep);
+ __state_set(&ep->com, MPA_REQ_SENT);
+ ep->mpa_attr.initiator = 1;
+ ep->snd_seq += mpalen;
+ return;
+}
+
+static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen, wrlen;
+ struct fw_ofld_tx_data_wr *req;
+ struct mpa_message *mpa;
+ struct sk_buff *skb;
+ struct mpa_v2_conn_params mpa_v2_params;
+
+ PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+
+ mpalen = sizeof(*mpa) + plen;
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
+ mpalen += sizeof(struct mpa_v2_conn_params);
+ wrlen = roundup(mpalen + sizeof *req, 16);
+
+ skb = get_skb(NULL, wrlen, GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ return -ENOMEM;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+
+ req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
+ memset(req, 0, wrlen);
+ req->op_to_immdlen = cpu_to_be32(
+ FW_WR_OP(FW_OFLD_TX_DATA_WR) |
+ FW_WR_COMPL(1) |
+ FW_WR_IMMDLEN(mpalen));
+ req->flowid_len16 = cpu_to_be32(
+ FW_WR_FLOWID(ep->hwtid) |
+ FW_WR_LEN16(wrlen >> 4));
+ req->plen = cpu_to_be32(mpalen);
+ req->tunnel_to_proxy = cpu_to_be32(
+ FW_OFLD_TX_DATA_WR_FLUSH(1) |
+ FW_OFLD_TX_DATA_WR_SHOVE(1));
+
+ mpa = (struct mpa_message *)(req + 1);
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = MPA_REJECT;
+ mpa->revision = ep->mpa_attr.version;
+ mpa->private_data_size = htons(plen);
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+ mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof (struct mpa_v2_conn_params));
+ mpa_v2_params.ird = htons(((u16)ep->ird) |
+ (peer2peer ? MPA_V2_PEER2PEER_MODEL :
+ 0));
+ mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
+ (p2p_type ==
+ FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
+ MPA_V2_RDMA_WRITE_RTR : p2p_type ==
+ FW_RI_INIT_P2PTYPE_READ_REQ ?
+ MPA_V2_RDMA_READ_RTR : 0) : 0));
+ memcpy(mpa->private_data, &mpa_v2_params,
+ sizeof(struct mpa_v2_conn_params));
+
+ if (ep->plen)
+ memcpy(mpa->private_data +
+ sizeof(struct mpa_v2_conn_params), pdata, plen);
+ } else
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ /*
+ * Reference the mpa skb again. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function fw4_ack() will deref it.
+ */
+ skb_get(skb);
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+ t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ BUG_ON(ep->mpa_skb);
+ ep->mpa_skb = skb;
+ ep->snd_seq += mpalen;
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+}
+
+static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen, wrlen;
+ struct fw_ofld_tx_data_wr *req;
+ struct mpa_message *mpa;
+ struct sk_buff *skb;
+ struct mpa_v2_conn_params mpa_v2_params;
+
+ PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+
+ mpalen = sizeof(*mpa) + plen;
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
+ mpalen += sizeof(struct mpa_v2_conn_params);
+ wrlen = roundup(mpalen + sizeof *req, 16);
+
+ skb = get_skb(NULL, wrlen, GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ return -ENOMEM;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+
+ req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
+ memset(req, 0, wrlen);
+ req->op_to_immdlen = cpu_to_be32(
+ FW_WR_OP(FW_OFLD_TX_DATA_WR) |
+ FW_WR_COMPL(1) |
+ FW_WR_IMMDLEN(mpalen));
+ req->flowid_len16 = cpu_to_be32(
+ FW_WR_FLOWID(ep->hwtid) |
+ FW_WR_LEN16(wrlen >> 4));
+ req->plen = cpu_to_be32(mpalen);
+ req->tunnel_to_proxy = cpu_to_be32(
+ FW_OFLD_TX_DATA_WR_FLUSH(1) |
+ FW_OFLD_TX_DATA_WR_SHOVE(1));
+
+ mpa = (struct mpa_message *)(req + 1);
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->revision = ep->mpa_attr.version;
+ mpa->private_data_size = htons(plen);
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+ mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+ mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+ sizeof (struct mpa_v2_conn_params));
+ mpa_v2_params.ird = htons((u16)ep->ird);
+ mpa_v2_params.ord = htons((u16)ep->ord);
+ if (peer2peer && (ep->mpa_attr.p2p_type !=
+ FW_RI_INIT_P2PTYPE_DISABLED)) {
+ mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+
+ if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_WRITE_RTR);
+ else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_READ_RTR);
+ }
+
+ memcpy(mpa->private_data, &mpa_v2_params,
+ sizeof(struct mpa_v2_conn_params));
+
+ if (ep->plen)
+ memcpy(mpa->private_data +
+ sizeof(struct mpa_v2_conn_params), pdata, plen);
+ } else
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ /*
+ * Reference the mpa skb. This ensures the data area
+ * will remain in memory until the hw acks the tx.
+ * Function fw4_ack() will deref it.
+ */
+ skb_get(skb);
+ t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ ep->mpa_skb = skb;
+ __state_set(&ep->com, MPA_REP_SENT);
+ ep->snd_seq += mpalen;
+ return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+}
+
+static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+ struct cpl_act_establish *req = cplhdr(skb);
+ unsigned int tid = GET_TID(req);
+ unsigned int atid = GET_TID_TID(ntohl(req->tos_atid));
+ struct tid_info *t = dev->rdev.lldi.tids;
+
+ ep = lookup_atid(t, atid);
+
+ PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
+ be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
+
+ mutex_lock(&ep->com.mutex);
+ dst_confirm(ep->dst);
+
+ /* setup the hwtid for this connection */
+ ep->hwtid = tid;
+ cxgb4_insert_tid(t, ep, tid);
+ insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid);
+
+ ep->snd_seq = be32_to_cpu(req->snd_isn);
+ ep->rcv_seq = be32_to_cpu(req->rcv_isn);
+
+ set_emss(ep, ntohs(req->tcp_opt));
+
+ /* dealloc the atid */
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
+ cxgb4_free_atid(t, atid);
+ set_bit(ACT_ESTAB, &ep->com.history);
+
+ /* start MPA negotiation */
+ send_flowc(ep, NULL);
+ if (ep->retry_with_mpa_v1)
+ send_mpa_req(ep, skb, 1);
+ else
+ send_mpa_req(ep, skb, mpa_rev);
+ mutex_unlock(&ep->com.mutex);
+ return 0;
+}
+
+static void close_complete_upcall(struct c4iw_ep *ep, int status)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ event.status = status;
+ if (ep->com.cm_id) {
+ PDBG("close complete delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ set_bit(CLOSE_UPCALL, &ep->com.history);
+ }
+}
+
+static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
+{
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ __state_set(&ep->com, ABORTING);
+ set_bit(ABORT_CONN, &ep->com.history);
+ return send_abort(ep, skb, gfp);
+}
+
+static void peer_close_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_DISCONNECT;
+ if (ep->com.cm_id) {
+ PDBG("peer close delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ set_bit(DISCONN_UPCALL, &ep->com.history);
+ }
+}
+
+static void peer_abort_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ event.status = -ECONNRESET;
+ if (ep->com.cm_id) {
+ PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
+ ep->com.cm_id, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ set_bit(ABORT_UPCALL, &ep->com.history);
+ }
+}
+
+static void connect_reply_upcall(struct c4iw_ep *ep, int status)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REPLY;
+ event.status = status;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
+
+ if ((status == 0) || (status == -ECONNREFUSED)) {
+ if (!ep->tried_with_mpa_v1) {
+ /* this means MPA_v2 is used */
+ event.private_data_len = ep->plen -
+ sizeof(struct mpa_v2_conn_params);
+ event.private_data = ep->mpa_pkt +
+ sizeof(struct mpa_message) +
+ sizeof(struct mpa_v2_conn_params);
+ } else {
+ /* this means MPA_v1 is used */
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt +
+ sizeof(struct mpa_message);
+ }
+ }
+
+ PDBG("%s ep %p tid %u status %d\n", __func__, ep,
+ ep->hwtid, status);
+ set_bit(CONN_RPL_UPCALL, &ep->com.history);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+
+ if (status < 0) {
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ }
+}
+
+static int connect_request_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+ int ret;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ memcpy(&event.local_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&event.remote_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
+ event.provider_data = ep;
+ if (!ep->tried_with_mpa_v1) {
+ /* this means MPA_v2 is used */
+ event.ord = ep->ord;
+ event.ird = ep->ird;
+ event.private_data_len = ep->plen -
+ sizeof(struct mpa_v2_conn_params);
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
+ sizeof(struct mpa_v2_conn_params);
+ } else {
+ /* this means MPA_v1 is used. Send max supported */
+ event.ord = c4iw_max_read_depth;
+ event.ird = c4iw_max_read_depth;
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ }
+ c4iw_get_ep(&ep->com);
+ ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
+ &event);
+ if (ret)
+ c4iw_put_ep(&ep->com);
+ set_bit(CONNREQ_UPCALL, &ep->com.history);
+ c4iw_put_ep(&ep->parent_ep->com);
+ return ret;
+}
+
+static void established_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_ESTABLISHED;
+ event.ird = ep->ird;
+ event.ord = ep->ord;
+ if (ep->com.cm_id) {
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ set_bit(ESTAB_UPCALL, &ep->com.history);
+ }
+}
+
+static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
+{
+ struct cpl_rx_data_ack *req;
+ struct sk_buff *skb;
+ int wrlen = roundup(sizeof *req, 16);
+
+ PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ skb = get_skb(NULL, wrlen, GFP_KERNEL);
+ if (!skb) {
+ printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ return 0;
+ }
+
+ /*
+ * If we couldn't specify the entire rcv window at connection setup
+ * due to the limit in the number of bits in the RCV_BUFSIZ field,
+ * then add the overage in to the credits returned.
+ */
+ if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024)
+ credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024;
+
+ req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
+ memset(req, 0, wrlen);
+ INIT_TP_WR(req, ep->hwtid);
+ OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
+ ep->hwtid));
+ req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) |
+ F_RX_DACK_CHANGE |
+ V_RX_DACK_MODE(dack_mode));
+ set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
+ c4iw_ofld_send(&ep->com.dev->rdev, skb);
+ return credits;
+}
+
+static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+{
+ struct mpa_message *mpa;
+ struct mpa_v2_conn_params *mpa_v2_params;
+ u16 plen;
+ u16 resp_ird, resp_ord;
+ u8 rtr_mismatch = 0, insuff_ird = 0;
+ struct c4iw_qp_attributes attrs;
+ enum c4iw_qp_attr_mask mask;
+ int err;
+ int disconnect = 0;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+
+ /*
+ * Stop mpa timer. If it expired, then
+ * we ignore the MPA reply. process_timeout()
+ * will abort the connection.
+ */
+ if (stop_ep_timer(ep))
+ return 0;
+
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * copy the new data into our accumulation buffer.
+ */
+ skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+ skb->len);
+ ep->mpa_pkt_len += skb->len;
+
+ /*
+ * if we don't even have the mpa message, then bail.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return 0;
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /* Validate MPA header. */
+ if (mpa->revision > mpa_rev) {
+ printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
+ " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ err = -EPROTO;
+ goto err;
+ }
+ if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ err = -EPROTO;
+ goto err;
+ }
+
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return 0;
+
+ if (mpa->flags & MPA_REJECT) {
+ err = -ECONNREFUSED;
+ goto err;
+ }
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data. And
+ * the MPA header is valid.
+ */
+ __state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa->revision;
+ ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+ if (mpa->revision == 2) {
+ ep->mpa_attr.enhanced_rdma_conn =
+ mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+ if (ep->mpa_attr.enhanced_rdma_conn) {
+ mpa_v2_params = (struct mpa_v2_conn_params *)
+ (ep->mpa_pkt + sizeof(*mpa));
+ resp_ird = ntohs(mpa_v2_params->ird) &
+ MPA_V2_IRD_ORD_MASK;
+ resp_ord = ntohs(mpa_v2_params->ord) &
+ MPA_V2_IRD_ORD_MASK;
+
+ /*
+ * This is a double-check. Ideally, below checks are
+ * not required since ird/ord stuff has been taken
+ * care of in c4iw_accept_cr
+ */
+ if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
+ err = -ENOMEM;
+ ep->ird = resp_ord;
+ ep->ord = resp_ird;
+ insuff_ird = 1;
+ }
+
+ if (ntohs(mpa_v2_params->ird) &
+ MPA_V2_PEER2PEER_MODEL) {
+ if (ntohs(mpa_v2_params->ord) &
+ MPA_V2_RDMA_WRITE_RTR)
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+ else if (ntohs(mpa_v2_params->ord) &
+ MPA_V2_RDMA_READ_RTR)
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_READ_REQ;
+ }
+ }
+ } else if (mpa->revision == 1)
+ if (peer2peer)
+ ep->mpa_attr.p2p_type = p2p_type;
+
+ PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
+ "%d\n", __func__, ep->mpa_attr.crc_enabled,
+ ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type, p2p_type);
+
+ /*
+ * If responder's RTR does not match with that of initiator, assign
+ * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
+ * generated when moving QP to RTS state.
+ * A TERM message will be sent after QP has moved to RTS state
+ */
+ if ((ep->mpa_attr.version == 2) && peer2peer &&
+ (ep->mpa_attr.p2p_type != p2p_type)) {
+ ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+ rtr_mismatch = 1;
+ }
+
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = C4IW_QP_STATE_RTS;
+
+ mask = C4IW_QP_ATTR_NEXT_STATE |
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
+ C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
+
+ /* bind QP and TID with INIT_WR */
+ err = c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+ if (err)
+ goto err;
+
+ /*
+ * If responder's RTR requirement did not match with what initiator
+ * supports, generate TERM message
+ */
+ if (rtr_mismatch) {
+ printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+ attrs.layer_etype = LAYER_MPA | DDP_LLP;
+ attrs.ecode = MPA_NOMATCH_RTR;
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ attrs.send_term = 1;
+ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ err = -ENOMEM;
+ disconnect = 1;
+ goto out;
+ }
+
+ /*
+ * Generate TERM if initiator IRD is not sufficient for responder
+ * provided ORD. Currently, we do the same behaviour even when
+ * responder provided IRD is also not sufficient as regards to
+ * initiator ORD.
+ */
+ if (insuff_ird) {
+ printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
+ __func__);
+ attrs.layer_etype = LAYER_MPA | DDP_LLP;
+ attrs.ecode = MPA_INSUFF_IRD;
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ attrs.send_term = 1;
+ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ err = -ENOMEM;
+ disconnect = 1;
+ goto out;
+ }
+ goto out;
+err:
+ __state_set(&ep->com, ABORTING);
+ send_abort(ep, skb, GFP_KERNEL);
+out:
+ connect_reply_upcall(ep, err);
+ return disconnect;
+}
+
+static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+{
+ struct mpa_message *mpa;
+ struct mpa_v2_conn_params *mpa_v2_params;
+ u16 plen;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+ (void)stop_ep_timer(ep);
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+
+ /*
+ * Copy the new data into our accumulation buffer.
+ */
+ skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+ skb->len);
+ ep->mpa_pkt_len += skb->len;
+
+ /*
+ * If we don't even have the mpa message, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+
+ PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /*
+ * Validate MPA Header.
+ */
+ if (mpa->revision > mpa_rev) {
+ printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
+ " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ (void)stop_ep_timer(ep);
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
+ (void)stop_ep_timer(ep);
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+ (void)stop_ep_timer(ep);
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+ (void)stop_ep_timer(ep);
+ abort_connection(ep, skb, GFP_KERNEL);
+ return;
+ }
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return;
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data.
+ */
+ ep->mpa_attr.initiator = 0;
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa->revision;
+ if (mpa->revision == 1)
+ ep->tried_with_mpa_v1 = 1;
+ ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+ if (mpa->revision == 2) {
+ ep->mpa_attr.enhanced_rdma_conn =
+ mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+ if (ep->mpa_attr.enhanced_rdma_conn) {
+ mpa_v2_params = (struct mpa_v2_conn_params *)
+ (ep->mpa_pkt + sizeof(*mpa));
+ ep->ird = ntohs(mpa_v2_params->ird) &
+ MPA_V2_IRD_ORD_MASK;
+ ep->ord = ntohs(mpa_v2_params->ord) &
+ MPA_V2_IRD_ORD_MASK;
+ if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
+ if (peer2peer) {
+ if (ntohs(mpa_v2_params->ord) &
+ MPA_V2_RDMA_WRITE_RTR)
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+ else if (ntohs(mpa_v2_params->ord) &
+ MPA_V2_RDMA_READ_RTR)
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_READ_REQ;
+ }
+ }
+ } else if (mpa->revision == 1)
+ if (peer2peer)
+ ep->mpa_attr.p2p_type = p2p_type;
+
+ PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
+ "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type);
+
+ /*
+ * If the endpoint timer already expired, then we ignore
+ * the start request. process_timeout() will abort
+ * the connection.
+ */
+ if (!stop_ep_timer(ep)) {
+ __state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ mutex_lock(&ep->parent_ep->com.mutex);
+ if (ep->parent_ep->com.state != DEAD) {
+ if (connect_request_upcall(ep))
+ abort_connection(ep, skb, GFP_KERNEL);
+ } else {
+ abort_connection(ep, skb, GFP_KERNEL);
+ }
+ mutex_unlock(&ep->parent_ep->com.mutex);
+ }
+ return;
+}
+
+static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+ struct cpl_rx_data *hdr = cplhdr(skb);
+ unsigned int dlen = ntohs(hdr->len);
+ unsigned int tid = GET_TID(hdr);
+ struct tid_info *t = dev->rdev.lldi.tids;
+ __u8 status = hdr->status;
+ int disconnect = 0;
+
+ ep = lookup_tid(t, tid);
+ if (!ep)
+ return 0;
+ PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
+ skb_pull(skb, sizeof(*hdr));
+ skb_trim(skb, dlen);
+ mutex_lock(&ep->com.mutex);
+
+ /* update RX credits */
+ update_rx_credits(ep, dlen);
+
+ switch (ep->com.state) {
+ case MPA_REQ_SENT:
+ ep->rcv_seq += dlen;
+ disconnect = process_mpa_reply(ep, skb);
+ break;
+ case MPA_REQ_WAIT:
+ ep->rcv_seq += dlen;
+ process_mpa_request(ep, skb);
+ break;
+ case FPDU_MODE: {
+ struct c4iw_qp_attributes attrs;
+ BUG_ON(!ep->com.qp);
+ if (status)
+ pr_err("%s Unexpected streaming data." \
+ " qpid %u ep %p state %d tid %u status %d\n",
+ __func__, ep->com.qp->wq.sq.qid, ep,
+ ep->com.state, ep->hwtid, status);
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ disconnect = 1;
+ break;
+ }
+ default:
+ break;
+ }
+ mutex_unlock(&ep->com.mutex);
+ if (disconnect)
+ c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ return 0;
+}
+
+static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+ struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
+ int release = 0;
+ unsigned int tid = GET_TID(rpl);
+ struct tid_info *t = dev->rdev.lldi.tids;
+
+ ep = lookup_tid(t, tid);
+ if (!ep) {
+ printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
+ return 0;
+ }
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ mutex_lock(&ep->com.mutex);
+ switch (ep->com.state) {
+ case ABORTING:
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ default:
+ printk(KERN_ERR "%s ep %p state %d\n",
+ __func__, ep, ep->com.state);
+ break;
+ }
+ mutex_unlock(&ep->com.mutex);
+
+ if (release)
+ release_ep_resources(ep);
+ return 0;
+}
+
+static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
+{
+ struct sk_buff *skb;
+ struct fw_ofld_connection_wr *req;
+ unsigned int mtu_idx;
+ int wscale;
+ struct sockaddr_in *sin;
+ int win;
+
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
+ req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR));
+ req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
+ req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
+ sin = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+ req->le.lport = sin->sin_port;
+ req->le.u.ipv4.lip = sin->sin_addr.s_addr;
+ sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+ req->le.pport = sin->sin_port;
+ req->le.u.ipv4.pip = sin->sin_addr.s_addr;
+ req->tcb.t_state_to_astid =
+ htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) |
+ V_FW_OFLD_CONNECTION_WR_ASTID(atid));
+ req->tcb.cplrxdataack_cplpassacceptrpl =
+ htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
+ req->tcb.tx_max = (__force __be32) jiffies;
+ req->tcb.rcv_adv = htons(1);
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
+ wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
+
+ req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) |
+ (nocong ? NO_CONG(1) : 0) |
+ KEEP_ALIVE(1) |
+ DELACK(1) |
+ WND_SCALE(wscale) |
+ MSS_IDX(mtu_idx) |
+ L2T_IDX(ep->l2t->idx) |
+ TX_CHAN(ep->tx_chan) |
+ SMAC_SEL(ep->smac_idx) |
+ DSCP(ep->tos) |
+ ULP_MODE(ULP_MODE_TCPDDP) |
+ RCV_BUFSIZ(win));
+ req->tcb.opt2 = (__force __be32) (PACE(1) |
+ TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
+ RX_CHANNEL(0) |
+ CCTRL_ECN(enable_ecn) |
+ RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid));
+ if (enable_tcp_timestamps)
+ req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1);
+ if (enable_tcp_sack)
+ req->tcb.opt2 |= (__force __be32) SACK_EN(1);
+ if (wscale && enable_tcp_window_scaling)
+ req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1);
+ req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0);
+ req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2);
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
+ set_bit(ACT_OFLD_CONN, &ep->com.history);
+ c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+}
+
+/*
+ * Return whether a failed active open has allocated a TID
+ */
+static inline int act_open_has_tid(int status)
+{
+ return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
+ status != CPL_ERR_ARP_MISS;
+}
+
+/* Returns whether a CPL status conveys negative advice.
+ */
+static int is_neg_adv(unsigned int status)
+{
+ return status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE ||
+ status == CPL_ERR_KEEPALV_NEG_ADVICE;
+}
+
+static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
+{
+ ep->snd_win = snd_win;
+ ep->rcv_win = rcv_win;
+ PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+}
+
+#define ACT_OPEN_RETRY_COUNT 2
+
+static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
+ struct dst_entry *dst, struct c4iw_dev *cdev,
+ bool clear_mpa_v1)
+{
+ struct neighbour *n;
+ int err, step;
+ struct net_device *pdev;
+
+ n = dst_neigh_lookup(dst, peer_ip);
+ if (!n)
+ return -ENODEV;
+
+ rcu_read_lock();
+ err = -ENOMEM;
+ if (n->dev->flags & IFF_LOOPBACK) {
+ if (iptype == 4)
+ pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
+ else if (IS_ENABLED(CONFIG_IPV6))
+ for_each_netdev(&init_net, pdev) {
+ if (ipv6_chk_addr(&init_net,
+ (struct in6_addr *)peer_ip,
+ pdev, 1))
+ break;
+ }
+ else
+ pdev = NULL;
+
+ if (!pdev) {
+ err = -ENODEV;
+ goto out;
+ }
+ ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
+ n, pdev, 0);
+ if (!ep->l2t)
+ goto out;
+ ep->mtu = pdev->mtu;
+ ep->tx_chan = cxgb4_port_chan(pdev);
+ ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+ step = cdev->rdev.lldi.ntxq /
+ cdev->rdev.lldi.nchan;
+ ep->txq_idx = cxgb4_port_idx(pdev) * step;
+ step = cdev->rdev.lldi.nrxq /
+ cdev->rdev.lldi.nchan;
+ ep->ctrlq_idx = cxgb4_port_idx(pdev);
+ ep->rss_qid = cdev->rdev.lldi.rxq_ids[
+ cxgb4_port_idx(pdev) * step];
+ set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
+ dev_put(pdev);
+ } else {
+ pdev = get_real_dev(n->dev);
+ ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
+ n, pdev, 0);
+ if (!ep->l2t)
+ goto out;
+ ep->mtu = dst_mtu(dst);
+ ep->tx_chan = cxgb4_port_chan(pdev);
+ ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+ step = cdev->rdev.lldi.ntxq /
+ cdev->rdev.lldi.nchan;
+ ep->txq_idx = cxgb4_port_idx(pdev) * step;
+ ep->ctrlq_idx = cxgb4_port_idx(pdev);
+ step = cdev->rdev.lldi.nrxq /
+ cdev->rdev.lldi.nchan;
+ ep->rss_qid = cdev->rdev.lldi.rxq_ids[
+ cxgb4_port_idx(pdev) * step];
+ set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
+
+ if (clear_mpa_v1) {
+ ep->retry_with_mpa_v1 = 0;
+ ep->tried_with_mpa_v1 = 0;
+ }
+ }
+ err = 0;
+out:
+ rcu_read_unlock();
+
+ neigh_release(n);
+
+ return err;
+}
+
+static int c4iw_reconnect(struct c4iw_ep *ep)
+{
+ int err = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)
+ &ep->com.cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)
+ &ep->com.cm_id->remote_addr;
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
+ &ep->com.cm_id->local_addr;
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
+ &ep->com.cm_id->remote_addr;
+ int iptype;
+ __u8 *ra;
+
+ PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
+ init_timer(&ep->timer);
+
+ /*
+ * Allocate an active TID to initiate a TCP connection.
+ */
+ ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
+ if (ep->atid == -1) {
+ pr_err("%s - cannot alloc atid.\n", __func__);
+ err = -ENOMEM;
+ goto fail2;
+ }
+ insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
+
+ /* find a route */
+ if (ep->com.cm_id->local_addr.ss_family == AF_INET) {
+ ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr, laddr->sin_port,
+ raddr->sin_port, 0);
+ iptype = 4;
+ ra = (__u8 *)&raddr->sin_addr;
+ } else {
+ ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port, raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
+ iptype = 6;
+ ra = (__u8 *)&raddr6->sin6_addr;
+ }
+ if (!ep->dst) {
+ pr_err("%s - cannot find route.\n", __func__);
+ err = -EHOSTUNREACH;
+ goto fail3;
+ }
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false);
+ if (err) {
+ pr_err("%s - cannot alloc l2e.\n", __func__);
+ goto fail4;
+ }
+
+ PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
+
+ state_set(&ep->com, CONNECTING);
+ ep->tos = 0;
+
+ /* send connect request to rnic */
+ err = send_connect(ep);
+ if (!err)
+ goto out;
+
+ cxgb4_l2t_release(ep->l2t);
+fail4:
+ dst_release(ep->dst);
+fail3:
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+fail2:
+ /*
+ * remember to send notification to upper layer.
+ * We are in here so the upper layer is not aware that this is
+ * re-connect attempt and so, upper layer is still waiting for
+ * response of 1st connect request.
+ */
+ connect_reply_upcall(ep, -ECONNRESET);
+ c4iw_put_ep(&ep->com);
+out:
+ return err;
+}
+
+static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+ struct cpl_act_open_rpl *rpl = cplhdr(skb);
+ unsigned int atid = GET_TID_TID(GET_AOPEN_ATID(
+ ntohl(rpl->atid_status)));
+ struct tid_info *t = dev->rdev.lldi.tids;
+ int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status));
+ struct sockaddr_in *la;
+ struct sockaddr_in *ra;
+ struct sockaddr_in6 *la6;
+ struct sockaddr_in6 *ra6;
+
+ ep = lookup_atid(t, atid);
+ la = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+ ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+ la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+ ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr;
+
+ PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
+ status, status2errno(status));
+
+ if (is_neg_adv(status)) {
+ printk(KERN_WARNING MOD "Connection problems for atid %u\n",
+ atid);
+ return 0;
+ }
+
+ set_bit(ACT_OPEN_RPL, &ep->com.history);
+
+ /*
+ * Log interesting failures.
+ */
+ switch (status) {
+ case CPL_ERR_CONN_RESET:
+ case CPL_ERR_CONN_TIMEDOUT:
+ break;
+ case CPL_ERR_TCAM_FULL:
+ mutex_lock(&dev->rdev.stats.lock);
+ dev->rdev.stats.tcam_full++;
+ mutex_unlock(&dev->rdev.stats.lock);
+ if (ep->com.local_addr.ss_family == AF_INET &&
+ dev->rdev.lldi.enable_fw_ofld_conn) {
+ send_fw_act_open_req(ep,
+ GET_TID_TID(GET_AOPEN_ATID(
+ ntohl(rpl->atid_status))));
+ return 0;
+ }
+ break;
+ case CPL_ERR_CONN_EXIST:
+ if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
+ set_bit(ACT_RETRY_INUSE, &ep->com.history);
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr,
+ atid);
+ cxgb4_free_atid(t, atid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ c4iw_reconnect(ep);
+ return 0;
+ }
+ break;
+ default:
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
+ atid, status, status2errno(status),
+ &la->sin_addr.s_addr, ntohs(la->sin_port),
+ &ra->sin_addr.s_addr, ntohs(ra->sin_port));
+ } else {
+ pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
+ atid, status, status2errno(status),
+ la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
+ ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
+ }
+ break;
+ }
+
+ connect_reply_upcall(ep, status2errno(status));
+ state_set(&ep->com, DEAD);
+
+ if (status && act_open_has_tid(status))
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
+
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
+ cxgb4_free_atid(t, atid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ c4iw_put_ep(&ep->com);
+
+ return 0;
+}
+
+static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_pass_open_rpl *rpl = cplhdr(skb);
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int stid = GET_TID(rpl);
+ struct c4iw_listen_ep *ep = lookup_stid(t, stid);
+
+ if (!ep) {
+ PDBG("%s stid %d lookup failure!\n", __func__, stid);
+ goto out;
+ }
+ PDBG("%s ep %p status %d error %d\n", __func__, ep,
+ rpl->status, status2errno(rpl->status));
+ c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
+
+out:
+ return 0;
+}
+
+static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int stid = GET_TID(rpl);
+ struct c4iw_listen_ep *ep = lookup_stid(t, stid);
+
+ PDBG("%s ep %p\n", __func__, ep);
+ c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
+ return 0;
+}
+
+static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
+ struct cpl_pass_accept_req *req)
+{
+ struct cpl_pass_accept_rpl *rpl;
+ unsigned int mtu_idx;
+ u64 opt0;
+ u32 opt2;
+ int wscale;
+ struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
+ int win;
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ BUG_ON(skb_cloned(skb));
+
+ skb_get(skb);
+ rpl = cplhdr(skb);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ skb_trim(skb, roundup(sizeof(*rpl5), 16));
+ rpl5 = (void *)rpl;
+ INIT_TP_WR(rpl5, ep->hwtid);
+ } else {
+ skb_trim(skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ }
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ ep->hwtid));
+
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps && req->tcpopt.tstamp);
+ wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
+ opt0 = (nocong ? NO_CONG(1) : 0) |
+ KEEP_ALIVE(1) |
+ DELACK(1) |
+ WND_SCALE(wscale) |
+ MSS_IDX(mtu_idx) |
+ L2T_IDX(ep->l2t->idx) |
+ TX_CHAN(ep->tx_chan) |
+ SMAC_SEL(ep->smac_idx) |
+ DSCP(ep->tos >> 2) |
+ ULP_MODE(ULP_MODE_TCPDDP) |
+ RCV_BUFSIZ(win);
+ opt2 = RX_CHANNEL(0) |
+ RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
+
+ if (enable_tcp_timestamps && req->tcpopt.tstamp)
+ opt2 |= TSTAMPS_EN(1);
+ if (enable_tcp_sack && req->tcpopt.sack)
+ opt2 |= SACK_EN(1);
+ if (wscale && enable_tcp_window_scaling)
+ opt2 |= WND_SCALE_EN(1);
+ if (enable_ecn) {
+ const struct tcphdr *tcph;
+ u32 hlen = ntohl(req->hdr_len);
+
+ tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) +
+ G_IP_HDR_LEN(hlen);
+ if (tcph->ece && tcph->cwr)
+ opt2 |= CCTRL_ECN(1);
+ }
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ rpl5 = (void *)rpl;
+ memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
+ if (peer2peer)
+ isn += 4;
+ rpl5->iss = cpu_to_be32(isn);
+ PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
+ }
+
+ rpl->opt0 = cpu_to_be64(opt0);
+ rpl->opt2 = cpu_to_be32(opt2);
+ set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
+ t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
+ c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+
+ return;
+}
+
+static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
+{
+ PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
+ BUG_ON(skb_cloned(skb));
+ skb_trim(skb, sizeof(struct cpl_tid_release));
+ release_tid(&dev->rdev, hwtid, skb);
+ return;
+}
+
+static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype,
+ __u8 *local_ip, __u8 *peer_ip,
+ __be16 *local_port, __be16 *peer_port)
+{
+ int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len));
+ int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len));
+ struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
+ struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
+ struct tcphdr *tcp = (struct tcphdr *)
+ ((u8 *)(req + 1) + eth_len + ip_len);
+
+ if (ip->version == 4) {
+ PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
+ ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
+ ntohs(tcp->dest));
+ *iptype = 4;
+ memcpy(peer_ip, &ip->saddr, 4);
+ memcpy(local_ip, &ip->daddr, 4);
+ } else {
+ PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
+ ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
+ ntohs(tcp->dest));
+ *iptype = 6;
+ memcpy(peer_ip, ip6->saddr.s6_addr, 16);
+ memcpy(local_ip, ip6->daddr.s6_addr, 16);
+ }
+ *peer_port = tcp->source;
+ *local_port = tcp->dest;
+
+ return;
+}
+
+static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *child_ep = NULL, *parent_ep;
+ struct cpl_pass_accept_req *req = cplhdr(skb);
+ unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid));
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int hwtid = GET_TID(req);
+ struct dst_entry *dst;
+ __u8 local_ip[16], peer_ip[16];
+ __be16 local_port, peer_port;
+ int err;
+ u16 peer_mss = ntohs(req->tcpopt.mss);
+ int iptype;
+ unsigned short hdrs;
+
+ parent_ep = lookup_stid(t, stid);
+ if (!parent_ep) {
+ PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ goto reject;
+ }
+
+ if (state_read(&parent_ep->com) != LISTEN) {
+ printk(KERN_ERR "%s - listening ep not in LISTEN\n",
+ __func__);
+ goto reject;
+ }
+
+ get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port);
+
+ /* Find output route */
+ if (iptype == 4) {
+ PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
+ dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
+ local_port, peer_port,
+ GET_POPEN_TOS(ntohl(req->tos_stid)));
+ } else {
+ PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
+ , __func__, parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
+ dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
+ PASS_OPEN_TOS(ntohl(req->tos_stid)),
+ ((struct sockaddr_in6 *)
+ &parent_ep->com.local_addr)->sin6_scope_id);
+ }
+ if (!dst) {
+ printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
+ __func__);
+ goto reject;
+ }
+
+ child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
+ if (!child_ep) {
+ printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
+ __func__);
+ dst_release(dst);
+ goto reject;
+ }
+
+ err = import_ep(child_ep, iptype, peer_ip, dst, dev, false);
+ if (err) {
+ printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
+ __func__);
+ dst_release(dst);
+ kfree(child_ep);
+ goto reject;
+ }
+
+ hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
+ if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
+ child_ep->mtu = peer_mss + hdrs;
+
+ state_set(&child_ep->com, CONNECTING);
+ child_ep->com.dev = dev;
+ child_ep->com.cm_id = NULL;
+ if (iptype == 4) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)
+ &child_ep->com.local_addr;
+ sin->sin_family = PF_INET;
+ sin->sin_port = local_port;
+ sin->sin_addr.s_addr = *(__be32 *)local_ip;
+ sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
+ sin->sin_family = PF_INET;
+ sin->sin_port = peer_port;
+ sin->sin_addr.s_addr = *(__be32 *)peer_ip;
+ } else {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+ &child_ep->com.local_addr;
+ sin6->sin6_family = PF_INET6;
+ sin6->sin6_port = local_port;
+ memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
+ sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
+ sin6->sin6_family = PF_INET6;
+ sin6->sin6_port = peer_port;
+ memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
+ }
+ c4iw_get_ep(&parent_ep->com);
+ child_ep->parent_ep = parent_ep;
+ child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid));
+ child_ep->dst = dst;
+ child_ep->hwtid = hwtid;
+
+ PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
+ child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
+
+ init_timer(&child_ep->timer);
+ cxgb4_insert_tid(t, child_ep, hwtid);
+ insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid);
+ accept_cr(child_ep, skb, req);
+ set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
+ goto out;
+reject:
+ reject_cr(dev, hwtid, skb);
+out:
+ return 0;
+}
+
+static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+ struct cpl_pass_establish *req = cplhdr(skb);
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int tid = GET_TID(req);
+
+ ep = lookup_tid(t, tid);
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ ep->snd_seq = be32_to_cpu(req->snd_isn);
+ ep->rcv_seq = be32_to_cpu(req->rcv_isn);
+
+ PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
+ ntohs(req->tcp_opt));
+
+ set_emss(ep, ntohs(req->tcp_opt));
+
+ dst_confirm(ep->dst);
+ state_set(&ep->com, MPA_REQ_WAIT);
+ start_ep_timer(ep);
+ send_flowc(ep, skb);
+ set_bit(PASS_ESTAB, &ep->com.history);
+
+ return 0;
+}
+
+static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_peer_close *hdr = cplhdr(skb);
+ struct c4iw_ep *ep;
+ struct c4iw_qp_attributes attrs;
+ int disconnect = 1;
+ int release = 0;
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int tid = GET_TID(hdr);
+ int ret;
+
+ ep = lookup_tid(t, tid);
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ dst_confirm(ep->dst);
+
+ set_bit(PEER_CLOSE, &ep->com.history);
+ mutex_lock(&ep->com.mutex);
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ __state_set(&ep->com, CLOSING);
+ break;
+ case MPA_REQ_SENT:
+ __state_set(&ep->com, CLOSING);
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR. Also wake up anyone waiting
+ * in rdma connection migration (see c4iw_accept_cr()).
+ */
+ __state_set(&ep->com, CLOSING);
+ PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ break;
+ case MPA_REP_SENT:
+ __state_set(&ep->com, CLOSING);
+ PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ break;
+ case FPDU_MODE:
+ start_ep_timer(ep);
+ __state_set(&ep->com, CLOSING);
+ attrs.next_state = C4IW_QP_STATE_CLOSING;
+ ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ if (ret != -ECONNRESET) {
+ peer_close_upcall(ep);
+ disconnect = 1;
+ }
+ break;
+ case ABORTING:
+ disconnect = 0;
+ break;
+ case CLOSING:
+ __state_set(&ep->com, MORIBUND);
+ disconnect = 0;
+ break;
+ case MORIBUND:
+ (void)stop_ep_timer(ep);
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = C4IW_QP_STATE_IDLE;
+ c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ }
+ close_complete_upcall(ep, 0);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ disconnect = 0;
+ break;
+ case DEAD:
+ disconnect = 0;
+ break;
+ default:
+ BUG_ON(1);
+ }
+ mutex_unlock(&ep->com.mutex);
+ if (disconnect)
+ c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ if (release)
+ release_ep_resources(ep);
+ return 0;
+}
+
+static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_abort_req_rss *req = cplhdr(skb);
+ struct c4iw_ep *ep;
+ struct cpl_abort_rpl *rpl;
+ struct sk_buff *rpl_skb;
+ struct c4iw_qp_attributes attrs;
+ int ret;
+ int release = 0;
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int tid = GET_TID(req);
+
+ ep = lookup_tid(t, tid);
+ if (is_neg_adv(req->status)) {
+ PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
+ ep->hwtid);
+ return 0;
+ }
+ PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
+ set_bit(PEER_ABORT, &ep->com.history);
+
+ /*
+ * Wake up any threads in rdma_init() or rdma_fini().
+ * However, this is not needed if com state is just
+ * MPA_REQ_SENT
+ */
+ if (ep->com.state != MPA_REQ_SENT)
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+
+ mutex_lock(&ep->com.mutex);
+ switch (ep->com.state) {
+ case CONNECTING:
+ break;
+ case MPA_REQ_WAIT:
+ (void)stop_ep_timer(ep);
+ break;
+ case MPA_REQ_SENT:
+ (void)stop_ep_timer(ep);
+ if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+ connect_reply_upcall(ep, -ECONNRESET);
+ else {
+ /*
+ * we just don't send notification upwards because we
+ * want to retry with mpa_v1 without upper layers even
+ * knowing it.
+ *
+ * do some housekeeping so as to re-initiate the
+ * connection
+ */
+ PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
+ mpa_rev);
+ ep->retry_with_mpa_v1 = 1;
+ }
+ break;
+ case MPA_REP_SENT:
+ break;
+ case MPA_REQ_RCVD:
+ break;
+ case MORIBUND:
+ case CLOSING:
+ stop_ep_timer(ep);
+ /*FALLTHROUGH*/
+ case FPDU_MODE:
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ ret = c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (ret)
+ printk(KERN_ERR MOD
+ "%s - qp <- error failed!\n",
+ __func__);
+ }
+ peer_abort_upcall(ep);
+ break;
+ case ABORTING:
+ break;
+ case DEAD:
+ PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ mutex_unlock(&ep->com.mutex);
+ return 0;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ dst_confirm(ep->dst);
+ if (ep->com.state != ABORTING) {
+ __state_set(&ep->com, DEAD);
+ /* we don't release if we want to retry with mpa_v1 */
+ if (!ep->retry_with_mpa_v1)
+ release = 1;
+ }
+ mutex_unlock(&ep->com.mutex);
+
+ rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
+ if (!rpl_skb) {
+ printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
+ __func__);
+ release = 1;
+ goto out;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+ rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
+ rpl->cmd = CPL_ABORT_NO_RST;
+ c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
+out:
+ if (release)
+ release_ep_resources(ep);
+ else if (ep->retry_with_mpa_v1) {
+ remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ c4iw_reconnect(ep);
+ }
+
+ return 0;
+}
+
+static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+ struct c4iw_qp_attributes attrs;
+ struct cpl_close_con_rpl *rpl = cplhdr(skb);
+ int release = 0;
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int tid = GET_TID(rpl);
+
+ ep = lookup_tid(t, tid);
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ BUG_ON(!ep);
+
+ /* The cm_id may be null if we failed to connect */
+ mutex_lock(&ep->com.mutex);
+ switch (ep->com.state) {
+ case CLOSING:
+ __state_set(&ep->com, MORIBUND);
+ break;
+ case MORIBUND:
+ (void)stop_ep_timer(ep);
+ if ((ep->com.cm_id) && (ep->com.qp)) {
+ attrs.next_state = C4IW_QP_STATE_IDLE;
+ c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ close_complete_upcall(ep, 0);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+ case ABORTING:
+ case DEAD:
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ mutex_unlock(&ep->com.mutex);
+ if (release)
+ release_ep_resources(ep);
+ return 0;
+}
+
+static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_rdma_terminate *rpl = cplhdr(skb);
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int tid = GET_TID(rpl);
+ struct c4iw_ep *ep;
+ struct c4iw_qp_attributes attrs;
+
+ ep = lookup_tid(t, tid);
+ BUG_ON(!ep);
+
+ if (ep && ep->com.qp) {
+ printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
+ ep->com.qp->wq.sq.qid);
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ } else
+ printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
+
+ return 0;
+}
+
+/*
+ * Upcall from the adapter indicating data has been transmitted.
+ * For us its just the single MPA request or reply. We can now free
+ * the skb holding the mpa message.
+ */
+static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct c4iw_ep *ep;
+ struct cpl_fw4_ack *hdr = cplhdr(skb);
+ u8 credits = hdr->credits;
+ unsigned int tid = GET_TID(hdr);
+ struct tid_info *t = dev->rdev.lldi.tids;
+
+
+ ep = lookup_tid(t, tid);
+ PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ if (credits == 0) {
+ PDBG("%s 0 credit ack ep %p tid %u state %u\n",
+ __func__, ep, ep->hwtid, state_read(&ep->com));
+ return 0;
+ }
+
+ dst_confirm(ep->dst);
+ if (ep->mpa_skb) {
+ PDBG("%s last streaming msg ack ep %p tid %u state %u "
+ "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
+ state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
+ kfree_skb(ep->mpa_skb);
+ ep->mpa_skb = NULL;
+ }
+ return 0;
+}
+
+int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ int err = 0;
+ int disconnect = 0;
+ struct c4iw_ep *ep = to_ep(cm_id);
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+
+ mutex_lock(&ep->com.mutex);
+ if (ep->com.state == DEAD) {
+ mutex_unlock(&ep->com.mutex);
+ c4iw_put_ep(&ep->com);
+ return -ECONNRESET;
+ }
+ set_bit(ULP_REJECT, &ep->com.history);
+ BUG_ON(ep->com.state != MPA_REQ_RCVD);
+ if (mpa_rev == 0)
+ abort_connection(ep, NULL, GFP_KERNEL);
+ else {
+ err = send_mpa_reject(ep, pdata, pdata_len);
+ disconnect = 1;
+ }
+ mutex_unlock(&ep->com.mutex);
+ if (disconnect)
+ err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
+ return 0;
+}
+
+int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err;
+ struct c4iw_qp_attributes attrs;
+ enum c4iw_qp_attr_mask mask;
+ struct c4iw_ep *ep = to_ep(cm_id);
+ struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
+ struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
+
+ PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+
+ mutex_lock(&ep->com.mutex);
+ if (ep->com.state == DEAD) {
+ err = -ECONNRESET;
+ goto err;
+ }
+
+ BUG_ON(ep->com.state != MPA_REQ_RCVD);
+ BUG_ON(!qp);
+
+ set_bit(ULP_ACCEPT, &ep->com.history);
+ if ((conn_param->ord > c4iw_max_read_depth) ||
+ (conn_param->ird > c4iw_max_read_depth)) {
+ abort_connection(ep, NULL, GFP_KERNEL);
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+ if (conn_param->ord > ep->ird) {
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+ send_mpa_reject(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ abort_connection(ep, NULL, GFP_KERNEL);
+ err = -ENOMEM;
+ goto err;
+ }
+ if (conn_param->ird > ep->ord) {
+ if (!ep->ord)
+ conn_param->ird = 1;
+ else {
+ abort_connection(ep, NULL, GFP_KERNEL);
+ err = -ENOMEM;
+ goto err;
+ }
+ }
+
+ }
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+
+ if (ep->mpa_attr.version != 2)
+ if (peer2peer && ep->ird == 0)
+ ep->ird = 1;
+
+ PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = qp;
+ ref_qp(ep);
+
+ /* bind QP to EP and move to RTS */
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = C4IW_QP_STATE_RTS;
+
+ /* bind QP and TID with INIT_WR */
+ mask = C4IW_QP_ATTR_NEXT_STATE |
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE |
+ C4IW_QP_ATTR_MPA_ATTR |
+ C4IW_QP_ATTR_MAX_IRD |
+ C4IW_QP_ATTR_MAX_ORD;
+
+ err = c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, mask, &attrs, 1);
+ if (err)
+ goto err1;
+ err = send_mpa_reply(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ if (err)
+ goto err1;
+
+ __state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ mutex_unlock(&ep->com.mutex);
+ c4iw_put_ep(&ep->com);
+ return 0;
+err1:
+ ep->com.cm_id = NULL;
+ cm_id->rem_ref(cm_id);
+err:
+ mutex_unlock(&ep->com.mutex);
+ c4iw_put_ep(&ep->com);
+ return err;
+}
+
+static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
+{
+ struct in_device *ind;
+ int found = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+
+ ind = in_dev_get(dev->rdev.lldi.ports[0]);
+ if (!ind)
+ return -EADDRNOTAVAIL;
+ for_primary_ifa(ind) {
+ laddr->sin_addr.s_addr = ifa->ifa_address;
+ raddr->sin_addr.s_addr = ifa->ifa_address;
+ found = 1;
+ break;
+ }
+ endfor_ifa(ind);
+ in_dev_put(ind);
+ return found ? 0 : -EADDRNOTAVAIL;
+}
+
+static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
+ unsigned char banned_flags)
+{
+ struct inet6_dev *idev;
+ int err = -EADDRNOTAVAIL;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev != NULL) {
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
+ memcpy(addr, &ifp->addr, 16);
+ err = 0;
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+ return err;
+}
+
+static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
+{
+ struct in6_addr uninitialized_var(addr);
+ struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr;
+ struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
+
+ if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
+ memcpy(la6->sin6_addr.s6_addr, &addr, 16);
+ memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
+ return 0;
+ }
+ return -EADDRNOTAVAIL;
+}
+
+int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
+ struct c4iw_ep *ep;
+ int err = 0;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
+ __u8 *ra;
+ int iptype;
+ int iwpm_err = 0;
+
+ if ((conn_param->ord > c4iw_max_read_depth) ||
+ (conn_param->ird > c4iw_max_read_depth)) {
+ err = -EINVAL;
+ goto out;
+ }
+ ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+ if (!ep) {
+ printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ err = -ENOMEM;
+ goto out;
+ }
+ init_timer(&ep->timer);
+ ep->plen = conn_param->private_data_len;
+ if (ep->plen)
+ memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
+ conn_param->private_data, ep->plen);
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+
+ if (peer2peer && ep->ord == 0)
+ ep->ord = 1;
+
+ cm_id->add_ref(cm_id);
+ ep->com.dev = dev;
+ ep->com.cm_id = cm_id;
+ ep->com.qp = get_qhp(dev, conn_param->qpn);
+ if (!ep->com.qp) {
+ PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
+ err = -EINVAL;
+ goto fail1;
+ }
+ ref_qp(ep);
+ PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
+ ep->com.qp, cm_id);
+
+ /*
+ * Allocate an active TID to initiate a TCP connection.
+ */
+ ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
+ if (ep->atid == -1) {
+ printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ err = -ENOMEM;
+ goto fail1;
+ }
+ insert_handle(dev, &dev->atid_idr, ep, ep->atid);
+
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.remote_addr));
+
+ /* No port mapper available, go with the specified peer information */
+ memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+ sizeof(ep->com.mapped_local_addr));
+ memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.mapped_remote_addr));
+
+ c4iw_form_reg_msg(dev, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+ if (iwpm_err) {
+ PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+ __func__, iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ c4iw_form_pm_msg(ep, &pm_msg);
+ iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
+ if (iwpm_err)
+ PDBG("%s: Port Mapper query fail (err = %d).\n",
+ __func__, iwpm_err);
+ else
+ c4iw_record_pm_msg(ep, &pm_msg);
+ }
+ if (iwpm_create_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+ iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+ err = -ENOMEM;
+ goto fail1;
+ }
+ print_addr(&ep->com, __func__, "add_query/create_mapinfo");
+ set_bit(RELEASE_MAPINFO, &ep->com.flags);
+
+ laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+ raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+ raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
+
+ if (cm_id->remote_addr.ss_family == AF_INET) {
+ iptype = 4;
+ ra = (__u8 *)&raddr->sin_addr;
+
+ /*
+ * Handle loopback requests to INADDR_ANY.
+ */
+ if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
+ err = pick_local_ipaddrs(dev, cm_id);
+ if (err)
+ goto fail1;
+ }
+
+ /* find a route */
+ PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
+ __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
+ ra, ntohs(raddr->sin_port));
+ ep->dst = find_route(dev, laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr, laddr->sin_port,
+ raddr->sin_port, 0);
+ } else {
+ iptype = 6;
+ ra = (__u8 *)&raddr6->sin6_addr;
+
+ /*
+ * Handle loopback requests to INADDR_ANY.
+ */
+ if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
+ err = pick_local_ip6addrs(dev, cm_id);
+ if (err)
+ goto fail1;
+ }
+
+ /* find a route */
+ PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
+ __func__, laddr6->sin6_addr.s6_addr,
+ ntohs(laddr6->sin6_port),
+ raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
+ ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port, raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
+ }
+ if (!ep->dst) {
+ printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ err = -EHOSTUNREACH;
+ goto fail2;
+ }
+
+ err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
+ if (err) {
+ printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ goto fail3;
+ }
+
+ PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
+
+ state_set(&ep->com, CONNECTING);
+ ep->tos = 0;
+
+ /* send connect request to rnic */
+ err = send_connect(ep);
+ if (!err)
+ goto out;
+
+ cxgb4_l2t_release(ep->l2t);
+fail3:
+ dst_release(ep->dst);
+fail2:
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+fail1:
+ cm_id->rem_ref(cm_id);
+ c4iw_put_ep(&ep->com);
+out:
+ return err;
+}
+
+static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
+{
+ int err;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+
+ c4iw_init_wr_wait(&ep->com.wr_wait);
+ err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
+ ep->stid, &sin6->sin6_addr,
+ sin6->sin6_port,
+ ep->com.dev->rdev.lldi.rxq_ids[0]);
+ if (!err)
+ err = c4iw_wait_for_reply(&ep->com.dev->rdev,
+ &ep->com.wr_wait,
+ 0, 0, __func__);
+ if (err)
+ pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
+ err, ep->stid,
+ sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
+ return err;
+}
+
+static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
+{
+ int err;
+ struct sockaddr_in *sin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+
+ if (dev->rdev.lldi.enable_fw_ofld_conn) {
+ do {
+ err = cxgb4_create_server_filter(
+ ep->com.dev->rdev.lldi.ports[0], ep->stid,
+ sin->sin_addr.s_addr, sin->sin_port, 0,
+ ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
+ if (err == -EBUSY) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(usecs_to_jiffies(100));
+ }
+ } while (err == -EBUSY);
+ } else {
+ c4iw_init_wr_wait(&ep->com.wr_wait);
+ err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
+ ep->stid, sin->sin_addr.s_addr, sin->sin_port,
+ 0, ep->com.dev->rdev.lldi.rxq_ids[0]);
+ if (!err)
+ err = c4iw_wait_for_reply(&ep->com.dev->rdev,
+ &ep->com.wr_wait,
+ 0, 0, __func__);
+ }
+ if (err)
+ pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
+ , err, ep->stid,
+ &sin->sin_addr, ntohs(sin->sin_port));
+ return err;
+}
+
+int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ int err = 0;
+ struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
+ struct c4iw_listen_ep *ep;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
+ int iwpm_err = 0;
+
+ might_sleep();
+
+ ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+ if (!ep) {
+ printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ err = -ENOMEM;
+ goto fail1;
+ }
+ PDBG("%s ep %p\n", __func__, ep);
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.dev = dev;
+ ep->backlog = backlog;
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
+
+ /*
+ * Allocate a server TID.
+ */
+ if (dev->rdev.lldi.enable_fw_ofld_conn &&
+ ep->com.local_addr.ss_family == AF_INET)
+ ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
+ cm_id->local_addr.ss_family, ep);
+ else
+ ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
+ cm_id->local_addr.ss_family, ep);
+
+ if (ep->stid == -1) {
+ printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
+ err = -ENOMEM;
+ goto fail2;
+ }
+ insert_handle(dev, &dev->stid_idr, ep, ep->stid);
+
+ /* No port mapper available, go with the specified info */
+ memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+ sizeof(ep->com.mapped_local_addr));
+
+ c4iw_form_reg_msg(dev, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+ if (iwpm_err) {
+ PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+ __func__, iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
+ if (iwpm_err)
+ PDBG("%s: Port Mapper query fail (err = %d).\n",
+ __func__, iwpm_err);
+ else
+ memcpy(&ep->com.mapped_local_addr,
+ &pm_msg.mapped_loc_addr,
+ sizeof(ep->com.mapped_local_addr));
+ }
+ if (iwpm_create_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+ err = -ENOMEM;
+ goto fail3;
+ }
+ print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
+
+ set_bit(RELEASE_MAPINFO, &ep->com.flags);
+ state_set(&ep->com, LISTEN);
+ if (ep->com.local_addr.ss_family == AF_INET)
+ err = create_server4(dev, ep);
+ else
+ err = create_server6(dev, ep);
+ if (!err) {
+ cm_id->provider_data = ep;
+ goto out;
+ }
+
+fail3:
+ cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
+ ep->com.local_addr.ss_family);
+fail2:
+ cm_id->rem_ref(cm_id);
+ c4iw_put_ep(&ep->com);
+fail1:
+out:
+ return err;
+}
+
+int c4iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+ int err;
+ struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
+
+ PDBG("%s ep %p\n", __func__, ep);
+
+ might_sleep();
+ state_set(&ep->com, DEAD);
+ if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
+ ep->com.local_addr.ss_family == AF_INET) {
+ err = cxgb4_remove_server_filter(
+ ep->com.dev->rdev.lldi.ports[0], ep->stid,
+ ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ } else {
+ c4iw_init_wr_wait(&ep->com.wr_wait);
+ err = cxgb4_remove_server(
+ ep->com.dev->rdev.lldi.ports[0], ep->stid,
+ ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ if (err)
+ goto done;
+ err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
+ 0, 0, __func__);
+ }
+ remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
+ cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
+ ep->com.local_addr.ss_family);
+done:
+ cm_id->rem_ref(cm_id);
+ c4iw_put_ep(&ep->com);
+ return err;
+}
+
+int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
+{
+ int ret = 0;
+ int close = 0;
+ int fatal = 0;
+ struct c4iw_rdev *rdev;
+
+ mutex_lock(&ep->com.mutex);
+
+ PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
+ states[ep->com.state], abrupt);
+
+ rdev = &ep->com.dev->rdev;
+ if (c4iw_fatal_error(rdev)) {
+ fatal = 1;
+ close_complete_upcall(ep, -EIO);
+ ep->com.state = DEAD;
+ }
+ switch (ep->com.state) {
+ case MPA_REQ_WAIT:
+ case MPA_REQ_SENT:
+ case MPA_REQ_RCVD:
+ case MPA_REP_SENT:
+ case FPDU_MODE:
+ close = 1;
+ if (abrupt)
+ ep->com.state = ABORTING;
+ else {
+ ep->com.state = CLOSING;
+ start_ep_timer(ep);
+ }
+ set_bit(CLOSE_SENT, &ep->com.flags);
+ break;
+ case CLOSING:
+ if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
+ close = 1;
+ if (abrupt) {
+ (void)stop_ep_timer(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
+ }
+ break;
+ case MORIBUND:
+ case ABORTING:
+ case DEAD:
+ PDBG("%s ignoring disconnect ep %p state %u\n",
+ __func__, ep, ep->com.state);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ if (close) {
+ if (abrupt) {
+ set_bit(EP_DISC_ABORT, &ep->com.history);
+ close_complete_upcall(ep, -ECONNRESET);
+ ret = send_abort(ep, NULL, gfp);
+ } else {
+ set_bit(EP_DISC_CLOSE, &ep->com.history);
+ ret = send_halfclose(ep, gfp);
+ }
+ if (ret)
+ fatal = 1;
+ }
+ mutex_unlock(&ep->com.mutex);
+ if (fatal)
+ release_ep_resources(ep);
+ return ret;
+}
+
+static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
+ struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
+{
+ struct c4iw_ep *ep;
+ int atid = be32_to_cpu(req->tid);
+
+ ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
+ (__force u32) req->tid);
+ if (!ep)
+ return;
+
+ switch (req->retval) {
+ case FW_ENOMEM:
+ set_bit(ACT_RETRY_NOMEM, &ep->com.history);
+ if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
+ send_fw_act_open_req(ep, atid);
+ return;
+ }
+ case FW_EADDRINUSE:
+ set_bit(ACT_RETRY_INUSE, &ep->com.history);
+ if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
+ send_fw_act_open_req(ep, atid);
+ return;
+ }
+ break;
+ default:
+ pr_info("%s unexpected ofld conn wr retval %d\n",
+ __func__, req->retval);
+ break;
+ }
+ pr_err("active ofld_connect_wr failure %d atid %d\n",
+ req->retval, atid);
+ mutex_lock(&dev->rdev.stats.lock);
+ dev->rdev.stats.act_ofld_conn_fails++;
+ mutex_unlock(&dev->rdev.stats.lock);
+ connect_reply_upcall(ep, status2errno(req->retval));
+ state_set(&ep->com, DEAD);
+ remove_handle(dev, &dev->atid_idr, atid);
+ cxgb4_free_atid(dev->rdev.lldi.tids, atid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ c4iw_put_ep(&ep->com);
+}
+
+static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
+ struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
+{
+ struct sk_buff *rpl_skb;
+ struct cpl_pass_accept_req *cpl;
+ int ret;
+
+ rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
+ BUG_ON(!rpl_skb);
+ if (req->retval) {
+ PDBG("%s passive open failure %d\n", __func__, req->retval);
+ mutex_lock(&dev->rdev.stats.lock);
+ dev->rdev.stats.pas_ofld_conn_fails++;
+ mutex_unlock(&dev->rdev.stats.lock);
+ kfree_skb(rpl_skb);
+ } else {
+ cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
+ OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
+ (__force u32) htonl(
+ (__force u32) req->tid)));
+ ret = pass_accept_req(dev, rpl_skb);
+ if (!ret)
+ kfree_skb(rpl_skb);
+ }
+ return;
+}
+
+static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_fw6_msg *rpl = cplhdr(skb);
+ struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
+
+ switch (rpl->type) {
+ case FW6_TYPE_CQE:
+ c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
+ break;
+ case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
+ req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
+ switch (req->t_state) {
+ case TCP_SYN_SENT:
+ active_ofld_conn_reply(dev, skb, req);
+ break;
+ case TCP_SYN_RECV:
+ passive_ofld_conn_reply(dev, skb, req);
+ break;
+ default:
+ pr_err("%s unexpected ofld conn wr state %d\n",
+ __func__, req->t_state);
+ break;
+ }
+ break;
+ }
+ return 0;
+}
+
+static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
+{
+ u32 l2info;
+ u16 vlantag, len, hdr_len, eth_hdr_len;
+ u8 intf;
+ struct cpl_rx_pkt *cpl = cplhdr(skb);
+ struct cpl_pass_accept_req *req;
+ struct tcp_options_received tmp_opt;
+ struct c4iw_dev *dev;
+
+ dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
+ /* Store values from cpl_rx_pkt in temporary location. */
+ vlantag = (__force u16) cpl->vlan;
+ len = (__force u16) cpl->len;
+ l2info = (__force u32) cpl->l2info;
+ hdr_len = (__force u16) cpl->hdr_len;
+ intf = cpl->iff;
+
+ __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
+
+ /*
+ * We need to parse the TCP options from SYN packet.
+ * to generate cpl_pass_accept_req.
+ */
+ memset(&tmp_opt, 0, sizeof(tmp_opt));
+ tcp_clear_options(&tmp_opt);
+ tcp_parse_options(skb, &tmp_opt, 0, NULL);
+
+ req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
+ req->l2info = cpu_to_be16(V_SYN_INTF(intf) |
+ V_SYN_MAC_IDX(G_RX_MACIDX(
+ (__force int) htonl(l2info))) |
+ F_SYN_XACT_MATCH);
+ eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
+ G_RX_ETHHDR_LEN((__force int) htonl(l2info)) :
+ G_RX_T5_ETHHDR_LEN((__force int) htonl(l2info));
+ req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(
+ (__force int) htonl(l2info))) |
+ V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(
+ (__force int) htons(hdr_len))) |
+ V_IP_HDR_LEN(G_RX_IPHDR_LEN(
+ (__force int) htons(hdr_len))) |
+ V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(eth_hdr_len)));
+ req->vlan = (__force __be16) vlantag;
+ req->len = (__force __be16) len;
+ req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) |
+ PASS_OPEN_TOS(tos));
+ req->tcpopt.mss = htons(tmp_opt.mss_clamp);
+ if (tmp_opt.wscale_ok)
+ req->tcpopt.wsf = tmp_opt.snd_wscale;
+ req->tcpopt.tstamp = tmp_opt.saw_tstamp;
+ if (tmp_opt.sack_ok)
+ req->tcpopt.sack = 1;
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
+ return;
+}
+
+static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
+ __be32 laddr, __be16 lport,
+ __be32 raddr, __be16 rport,
+ u32 rcv_isn, u32 filter, u16 window,
+ u32 rss_qid, u8 port_id)
+{
+ struct sk_buff *req_skb;
+ struct fw_ofld_connection_wr *req;
+ struct cpl_pass_accept_req *cpl = cplhdr(skb);
+ int ret;
+
+ req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
+ req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
+ req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1));
+ req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
+ req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL);
+ req->le.filter = (__force __be32) filter;
+ req->le.lport = lport;
+ req->le.pport = rport;
+ req->le.u.ipv4.lip = laddr;
+ req->le.u.ipv4.pip = raddr;
+ req->tcb.rcv_nxt = htonl(rcv_isn + 1);
+ req->tcb.rcv_adv = htons(window);
+ req->tcb.t_state_to_astid =
+ htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) |
+ V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) |
+ V_FW_OFLD_CONNECTION_WR_ASTID(
+ GET_PASS_OPEN_TID(ntohl(cpl->tos_stid))));
+
+ /*
+ * We store the qid in opt2 which will be used by the firmware
+ * to send us the wr response.
+ */
+ req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid));
+
+ /*
+ * We initialize the MSS index in TCB to 0xF.
+ * So that when driver sends cpl_pass_accept_rpl
+ * TCB picks up the correct value. If this was 0
+ * TP will ignore any value > 0 for MSS index.
+ */
+ req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF));
+ req->cookie = (unsigned long)skb;
+
+ set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
+ ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+ if (ret < 0) {
+ pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
+ ret);
+ kfree_skb(skb);
+ kfree_skb(req_skb);
+ }
+}
+
+/*
+ * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
+ * messages when a filter is being used instead of server to
+ * redirect a syn packet. When packets hit filter they are redirected
+ * to the offload queue and driver tries to establish the connection
+ * using firmware work request.
+ */
+static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ int stid;
+ unsigned int filter;
+ struct ethhdr *eh = NULL;
+ struct vlan_ethhdr *vlan_eh = NULL;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ struct rss_header *rss = (void *)skb->data;
+ struct cpl_rx_pkt *cpl = (void *)skb->data;
+ struct cpl_pass_accept_req *req = (void *)(rss + 1);
+ struct l2t_entry *e;
+ struct dst_entry *dst;
+ struct c4iw_ep *lep;
+ u16 window;
+ struct port_info *pi;
+ struct net_device *pdev;
+ u16 rss_qid, eth_hdr_len;
+ int step;
+ u32 tx_chan;
+ struct neighbour *neigh;
+
+ /* Drop all non-SYN packets */
+ if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN)))
+ goto reject;
+
+ /*
+ * Drop all packets which did not hit the filter.
+ * Unlikely to happen.
+ */
+ if (!(rss->filter_hit && rss->filter_tid))
+ goto reject;
+
+ /*
+ * Calculate the server tid from filter hit index from cpl_rx_pkt.
+ */
+ stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
+
+ lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
+ if (!lep) {
+ PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ goto reject;
+ }
+
+ eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
+ G_RX_ETHHDR_LEN(htonl(cpl->l2info)) :
+ G_RX_T5_ETHHDR_LEN(htonl(cpl->l2info));
+ if (eth_hdr_len == ETH_HLEN) {
+ eh = (struct ethhdr *)(req + 1);
+ iph = (struct iphdr *)(eh + 1);
+ } else {
+ vlan_eh = (struct vlan_ethhdr *)(req + 1);
+ iph = (struct iphdr *)(vlan_eh + 1);
+ skb->vlan_tci = ntohs(cpl->vlan);
+ }
+
+ if (iph->version != 0x4)
+ goto reject;
+
+ tcph = (struct tcphdr *)(iph + 1);
+ skb_set_network_header(skb, (void *)iph - (void *)rss);
+ skb_set_transport_header(skb, (void *)tcph - (void *)rss);
+ skb_get(skb);
+
+ PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
+ ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
+ ntohs(tcph->source), iph->tos);
+
+ dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
+ iph->tos);
+ if (!dst) {
+ pr_err("%s - failed to find dst entry!\n",
+ __func__);
+ goto reject;
+ }
+ neigh = dst_neigh_lookup_skb(dst, skb);
+
+ if (!neigh) {
+ pr_err("%s - failed to allocate neigh!\n",
+ __func__);
+ goto free_dst;
+ }
+
+ if (neigh->dev->flags & IFF_LOOPBACK) {
+ pdev = ip_dev_find(&init_net, iph->daddr);
+ e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
+ pdev, 0);
+ pi = (struct port_info *)netdev_priv(pdev);
+ tx_chan = cxgb4_port_chan(pdev);
+ dev_put(pdev);
+ } else {
+ pdev = get_real_dev(neigh->dev);
+ e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
+ pdev, 0);
+ pi = (struct port_info *)netdev_priv(pdev);
+ tx_chan = cxgb4_port_chan(pdev);
+ }
+ neigh_release(neigh);
+ if (!e) {
+ pr_err("%s - failed to allocate l2t entry!\n",
+ __func__);
+ goto free_dst;
+ }
+
+ step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
+ rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
+ window = (__force u16) htons((__force u16)tcph->window);
+
+ /* Calcuate filter portion for LE region. */
+ filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
+ dev->rdev.lldi.ports[0],
+ e));
+
+ /*
+ * Synthesize the cpl_pass_accept_req. We have everything except the
+ * TID. Once firmware sends a reply with TID we update the TID field
+ * in cpl and pass it through the regular cpl_pass_accept_req path.
+ */
+ build_cpl_pass_accept_req(skb, stid, iph->tos);
+ send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
+ tcph->source, ntohl(tcph->seq), filter, window,
+ rss_qid, pi->port_id);
+ cxgb4_l2t_release(e);
+free_dst:
+ dst_release(dst);
+reject:
+ return 0;
+}
+
+/*
+ * These are the real handlers that are called from a
+ * work queue.
+ */
+static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
+ [CPL_ACT_ESTABLISH] = act_establish,
+ [CPL_ACT_OPEN_RPL] = act_open_rpl,
+ [CPL_RX_DATA] = rx_data,
+ [CPL_ABORT_RPL_RSS] = abort_rpl,
+ [CPL_ABORT_RPL] = abort_rpl,
+ [CPL_PASS_OPEN_RPL] = pass_open_rpl,
+ [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
+ [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
+ [CPL_PASS_ESTABLISH] = pass_establish,
+ [CPL_PEER_CLOSE] = peer_close,
+ [CPL_ABORT_REQ_RSS] = peer_abort,
+ [CPL_CLOSE_CON_RPL] = close_con_rpl,
+ [CPL_RDMA_TERMINATE] = terminate,
+ [CPL_FW4_ACK] = fw4_ack,
+ [CPL_FW6_MSG] = deferred_fw6_msg,
+ [CPL_RX_PKT] = rx_pkt
+};
+
+static void process_timeout(struct c4iw_ep *ep)
+{
+ struct c4iw_qp_attributes attrs;
+ int abort = 1;
+
+ mutex_lock(&ep->com.mutex);
+ PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
+ ep->com.state);
+ set_bit(TIMEDOUT, &ep->com.history);
+ switch (ep->com.state) {
+ case MPA_REQ_SENT:
+ __state_set(&ep->com, ABORTING);
+ connect_reply_upcall(ep, -ETIMEDOUT);
+ break;
+ case MPA_REQ_WAIT:
+ __state_set(&ep->com, ABORTING);
+ break;
+ case CLOSING:
+ case MORIBUND:
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+ __state_set(&ep->com, ABORTING);
+ close_complete_upcall(ep, -ETIMEDOUT);
+ break;
+ case ABORTING:
+ case DEAD:
+
+ /*
+ * These states are expected if the ep timed out at the same
+ * time as another thread was calling stop_ep_timer().
+ * So we silently do nothing for these states.
+ */
+ abort = 0;
+ break;
+ default:
+ WARN(1, "%s unexpected state ep %p tid %u state %u\n",
+ __func__, ep, ep->hwtid, ep->com.state);
+ abort = 0;
+ }
+ if (abort)
+ abort_connection(ep, NULL, GFP_KERNEL);
+ mutex_unlock(&ep->com.mutex);
+ c4iw_put_ep(&ep->com);
+}
+
+static void process_timedout_eps(void)
+{
+ struct c4iw_ep *ep;
+
+ spin_lock_irq(&timeout_lock);
+ while (!list_empty(&timeout_list)) {
+ struct list_head *tmp;
+
+ tmp = timeout_list.next;
+ list_del(tmp);
+ tmp->next = NULL;
+ tmp->prev = NULL;
+ spin_unlock_irq(&timeout_lock);
+ ep = list_entry(tmp, struct c4iw_ep, entry);
+ process_timeout(ep);
+ spin_lock_irq(&timeout_lock);
+ }
+ spin_unlock_irq(&timeout_lock);
+}
+
+static void process_work(struct work_struct *work)
+{
+ struct sk_buff *skb = NULL;
+ struct c4iw_dev *dev;
+ struct cpl_act_establish *rpl;
+ unsigned int opcode;
+ int ret;
+
+ process_timedout_eps();
+ while ((skb = skb_dequeue(&rxq))) {
+ rpl = cplhdr(skb);
+ dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
+ opcode = rpl->ot.opcode;
+
+ BUG_ON(!work_handlers[opcode]);
+ ret = work_handlers[opcode](dev, skb);
+ if (!ret)
+ kfree_skb(skb);
+ process_timedout_eps();
+ }
+}
+
+static DECLARE_WORK(skb_work, process_work);
+
+static void ep_timeout(unsigned long arg)
+{
+ struct c4iw_ep *ep = (struct c4iw_ep *)arg;
+ int kickit = 0;
+
+ spin_lock(&timeout_lock);
+ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+ /*
+ * Only insert if it is not already on the list.
+ */
+ if (!ep->entry.next) {
+ list_add_tail(&ep->entry, &timeout_list);
+ kickit = 1;
+ }
+ }
+ spin_unlock(&timeout_lock);
+ if (kickit)
+ queue_work(workq, &skb_work);
+}
+
+/*
+ * All the CM events are handled on a work queue to have a safe context.
+ */
+static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+
+ /*
+ * Save dev in the skb->cb area.
+ */
+ *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
+
+ /*
+ * Queue the skb and schedule the worker thread.
+ */
+ skb_queue_tail(&rxq, skb);
+ queue_work(workq, &skb_work);
+ return 0;
+}
+
+static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
+
+ if (rpl->status != CPL_ERR_NONE) {
+ printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
+ "for tid %u\n", rpl->status, GET_TID(rpl));
+ }
+ kfree_skb(skb);
+ return 0;
+}
+
+static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_fw6_msg *rpl = cplhdr(skb);
+ struct c4iw_wr_wait *wr_waitp;
+ int ret;
+
+ PDBG("%s type %u\n", __func__, rpl->type);
+
+ switch (rpl->type) {
+ case FW6_TYPE_WR_RPL:
+ ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
+ wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
+ PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
+ if (wr_waitp)
+ c4iw_wake_up(wr_waitp, ret ? -ret : 0);
+ kfree_skb(skb);
+ break;
+ case FW6_TYPE_CQE:
+ case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
+ sched(dev, skb);
+ break;
+ default:
+ printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
+ rpl->type);
+ kfree_skb(skb);
+ break;
+ }
+ return 0;
+}
+
+static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_abort_req_rss *req = cplhdr(skb);
+ struct c4iw_ep *ep;
+ struct tid_info *t = dev->rdev.lldi.tids;
+ unsigned int tid = GET_TID(req);
+
+ ep = lookup_tid(t, tid);
+ if (!ep) {
+ printk(KERN_WARNING MOD
+ "Abort on non-existent endpoint, tid %d\n", tid);
+ kfree_skb(skb);
+ return 0;
+ }
+ if (is_neg_adv(req->status)) {
+ PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
+ ep->hwtid);
+ kfree_skb(skb);
+ return 0;
+ }
+ PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+ ep->com.state);
+
+ /*
+ * Wake up any threads in rdma_init() or rdma_fini().
+ * However, if we are on MPAv2 and want to retry with MPAv1
+ * then, don't wake up yet.
+ */
+ if (mpa_rev == 2 && !ep->tried_with_mpa_v1) {
+ if (ep->com.state != MPA_REQ_SENT)
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ } else
+ c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ sched(dev, skb);
+ return 0;
+}
+
+/*
+ * Most upcalls from the T4 Core go to sched() to
+ * schedule the processing on a work queue.
+ */
+c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
+ [CPL_ACT_ESTABLISH] = sched,
+ [CPL_ACT_OPEN_RPL] = sched,
+ [CPL_RX_DATA] = sched,
+ [CPL_ABORT_RPL_RSS] = sched,
+ [CPL_ABORT_RPL] = sched,
+ [CPL_PASS_OPEN_RPL] = sched,
+ [CPL_CLOSE_LISTSRV_RPL] = sched,
+ [CPL_PASS_ACCEPT_REQ] = sched,
+ [CPL_PASS_ESTABLISH] = sched,
+ [CPL_PEER_CLOSE] = sched,
+ [CPL_CLOSE_CON_RPL] = sched,
+ [CPL_ABORT_REQ_RSS] = peer_abort_intr,
+ [CPL_RDMA_TERMINATE] = sched,
+ [CPL_FW4_ACK] = sched,
+ [CPL_SET_TCB_RPL] = set_tcb_rpl,
+ [CPL_FW6_MSG] = fw6_msg,
+ [CPL_RX_PKT] = sched
+};
+
+int __init c4iw_cm_init(void)
+{
+ spin_lock_init(&timeout_lock);
+ skb_queue_head_init(&rxq);
+
+ workq = create_singlethread_workqueue("iw_cxgb4");
+ if (!workq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void c4iw_cm_term(void)
+{
+ WARN_ON(!list_empty(&timeout_list));
+ flush_workqueue(workq);
+ destroy_workqueue(workq);
+}
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
new file mode 100644
index 00000000000..c04292c950f
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -0,0 +1,1011 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iw_cxgb4.h"
+
+static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ int wr_len;
+ struct c4iw_wr_wait wr_wait;
+ struct sk_buff *skb;
+ int ret;
+
+ wr_len = sizeof *res_wr + sizeof *res;
+ skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+ res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(
+ FW_WR_OP(FW_RI_RES_WR) |
+ V_FW_RI_RES_WR_NRES(1) |
+ FW_WR_COMPL(1));
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (unsigned long) &wr_wait;
+ res = res_wr->res;
+ res->u.cq.restype = FW_RI_RES_TYPE_CQ;
+ res->u.cq.op = FW_RI_RES_OP_RESET;
+ res->u.cq.iqid = cpu_to_be32(cq->cqid);
+
+ c4iw_init_wr_wait(&wr_wait);
+ ret = c4iw_ofld_send(rdev, skb);
+ if (!ret) {
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ }
+
+ kfree(cq->sw_queue);
+ dma_free_coherent(&(rdev->lldi.pdev->dev),
+ cq->memsize, cq->queue,
+ dma_unmap_addr(cq, mapping));
+ c4iw_put_cqid(rdev, cq->cqid, uctx);
+ return ret;
+}
+
+static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ int wr_len;
+ int user = (uctx != &rdev->uctx);
+ struct c4iw_wr_wait wr_wait;
+ int ret;
+ struct sk_buff *skb;
+
+ cq->cqid = c4iw_get_cqid(rdev, uctx);
+ if (!cq->cqid) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ if (!user) {
+ cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
+ if (!cq->sw_queue) {
+ ret = -ENOMEM;
+ goto err2;
+ }
+ }
+ cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize,
+ &cq->dma_addr, GFP_KERNEL);
+ if (!cq->queue) {
+ ret = -ENOMEM;
+ goto err3;
+ }
+ dma_unmap_addr_set(cq, mapping, cq->dma_addr);
+ memset(cq->queue, 0, cq->memsize);
+
+ /* build fw_ri_res_wr */
+ wr_len = sizeof *res_wr + sizeof *res;
+
+ skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!skb) {
+ ret = -ENOMEM;
+ goto err4;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+ res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(
+ FW_WR_OP(FW_RI_RES_WR) |
+ V_FW_RI_RES_WR_NRES(1) |
+ FW_WR_COMPL(1));
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (unsigned long) &wr_wait;
+ res = res_wr->res;
+ res->u.cq.restype = FW_RI_RES_TYPE_CQ;
+ res->u.cq.op = FW_RI_RES_OP_WRITE;
+ res->u.cq.iqid = cpu_to_be32(cq->cqid);
+ res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
+ V_FW_RI_RES_WR_IQANUS(0) |
+ V_FW_RI_RES_WR_IQANUD(1) |
+ F_FW_RI_RES_WR_IQANDST |
+ V_FW_RI_RES_WR_IQANDSTINDEX(
+ rdev->lldi.ciq_ids[cq->vector]));
+ res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
+ F_FW_RI_RES_WR_IQDROPRSS |
+ V_FW_RI_RES_WR_IQPCIECH(2) |
+ V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
+ F_FW_RI_RES_WR_IQO |
+ V_FW_RI_RES_WR_IQESIZE(1));
+ res->u.cq.iqsize = cpu_to_be16(cq->size);
+ res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
+
+ c4iw_init_wr_wait(&wr_wait);
+
+ ret = c4iw_ofld_send(rdev, skb);
+ if (ret)
+ goto err4;
+ PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait);
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ if (ret)
+ goto err4;
+
+ cq->gen = 1;
+ cq->gts = rdev->lldi.gts_reg;
+ cq->rdev = rdev;
+ if (user) {
+ cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
+ (cq->cqid << rdev->cqshift);
+ cq->ugts &= PAGE_MASK;
+ }
+ return 0;
+err4:
+ dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue,
+ dma_unmap_addr(cq, mapping));
+err3:
+ kfree(cq->sw_queue);
+err2:
+ c4iw_put_cqid(rdev, cq->cqid, uctx);
+err1:
+ return ret;
+}
+
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+{
+ struct t4_cqe cqe;
+
+ PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
+ V_CQE_OPCODE(FW_RI_SEND) |
+ V_CQE_TYPE(0) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->sq.qid));
+ cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
+ cq->sw_queue[cq->sw_pidx] = cqe;
+ t4_swcq_produce(cq);
+}
+
+int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
+{
+ int flushed = 0;
+ int in_use = wq->rq.in_use - count;
+
+ BUG_ON(in_use < 0);
+ PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
+ wq, cq, wq->rq.in_use, count);
+ while (in_use--) {
+ insert_recv_cqe(wq, cq);
+ flushed++;
+ }
+ return flushed;
+}
+
+static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
+ struct t4_swsqe *swcqe)
+{
+ struct t4_cqe cqe;
+
+ PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
+ V_CQE_OPCODE(swcqe->opcode) |
+ V_CQE_TYPE(1) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->sq.qid));
+ CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
+ cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
+ cq->sw_queue[cq->sw_pidx] = cqe;
+ t4_swcq_produce(cq);
+}
+
+static void advance_oldest_read(struct t4_wq *wq);
+
+int c4iw_flush_sq(struct c4iw_qp *qhp)
+{
+ int flushed = 0;
+ struct t4_wq *wq = &qhp->wq;
+ struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
+ struct t4_cq *cq = &chp->cq;
+ int idx;
+ struct t4_swsqe *swsqe;
+
+ if (wq->sq.flush_cidx == -1)
+ wq->sq.flush_cidx = wq->sq.cidx;
+ idx = wq->sq.flush_cidx;
+ BUG_ON(idx >= wq->sq.size);
+ while (idx != wq->sq.pidx) {
+ swsqe = &wq->sq.sw_sq[idx];
+ BUG_ON(swsqe->flushed);
+ swsqe->flushed = 1;
+ insert_sq_cqe(wq, cq, swsqe);
+ if (wq->sq.oldest_read == swsqe) {
+ BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
+ advance_oldest_read(wq);
+ }
+ flushed++;
+ if (++idx == wq->sq.size)
+ idx = 0;
+ }
+ wq->sq.flush_cidx += flushed;
+ if (wq->sq.flush_cidx >= wq->sq.size)
+ wq->sq.flush_cidx -= wq->sq.size;
+ return flushed;
+}
+
+static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
+{
+ struct t4_swsqe *swsqe;
+ int cidx;
+
+ if (wq->sq.flush_cidx == -1)
+ wq->sq.flush_cidx = wq->sq.cidx;
+ cidx = wq->sq.flush_cidx;
+ BUG_ON(cidx > wq->sq.size);
+
+ while (cidx != wq->sq.pidx) {
+ swsqe = &wq->sq.sw_sq[cidx];
+ if (!swsqe->signaled) {
+ if (++cidx == wq->sq.size)
+ cidx = 0;
+ } else if (swsqe->complete) {
+
+ BUG_ON(swsqe->flushed);
+
+ /*
+ * Insert this completed cqe into the swcq.
+ */
+ PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
+ __func__, cidx, cq->sw_pidx);
+ swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
+ cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
+ t4_swcq_produce(cq);
+ swsqe->flushed = 1;
+ if (++cidx == wq->sq.size)
+ cidx = 0;
+ wq->sq.flush_cidx = cidx;
+ } else
+ break;
+ }
+}
+
+static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
+ struct t4_cqe *read_cqe)
+{
+ read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
+ read_cqe->len = htonl(wq->sq.oldest_read->read_len);
+ read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
+ V_CQE_SWCQE(SW_CQE(hw_cqe)) |
+ V_CQE_OPCODE(FW_RI_READ_REQ) |
+ V_CQE_TYPE(1));
+ read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
+}
+
+static void advance_oldest_read(struct t4_wq *wq)
+{
+
+ u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
+
+ if (rptr == wq->sq.size)
+ rptr = 0;
+ while (rptr != wq->sq.pidx) {
+ wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
+
+ if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
+ return;
+ if (++rptr == wq->sq.size)
+ rptr = 0;
+ }
+ wq->sq.oldest_read = NULL;
+}
+
+/*
+ * Move all CQEs from the HWCQ into the SWCQ.
+ * Deal with out-of-order and/or completions that complete
+ * prior unsignalled WRs.
+ */
+void c4iw_flush_hw_cq(struct c4iw_cq *chp)
+{
+ struct t4_cqe *hw_cqe, *swcqe, read_cqe;
+ struct c4iw_qp *qhp;
+ struct t4_swsqe *swsqe;
+ int ret;
+
+ PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid);
+ ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+
+ /*
+ * This logic is similar to poll_cq(), but not quite the same
+ * unfortunately. Need to move pertinent HW CQEs to the SW CQ but
+ * also do any translation magic that poll_cq() normally does.
+ */
+ while (!ret) {
+ qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
+
+ /*
+ * drop CQEs with no associated QP
+ */
+ if (qhp == NULL)
+ goto next_cqe;
+
+ if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
+ goto next_cqe;
+
+ if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
+
+ /* If we have reached here because of async
+ * event or other error, and have egress error
+ * then drop
+ */
+ if (CQE_TYPE(hw_cqe) == 1)
+ goto next_cqe;
+
+ /* drop peer2peer RTR reads.
+ */
+ if (CQE_WRID_STAG(hw_cqe) == 1)
+ goto next_cqe;
+
+ /*
+ * Eat completions for unsignaled read WRs.
+ */
+ if (!qhp->wq.sq.oldest_read->signaled) {
+ advance_oldest_read(&qhp->wq);
+ goto next_cqe;
+ }
+
+ /*
+ * Don't write to the HWCQ, create a new read req CQE
+ * in local memory and move it into the swcq.
+ */
+ create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
+ hw_cqe = &read_cqe;
+ advance_oldest_read(&qhp->wq);
+ }
+
+ /* if its a SQ completion, then do the magic to move all the
+ * unsignaled and now in-order completions into the swcq.
+ */
+ if (SQ_TYPE(hw_cqe)) {
+ swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
+ swsqe->cqe = *hw_cqe;
+ swsqe->complete = 1;
+ flush_completed_wrs(&qhp->wq, &chp->cq);
+ } else {
+ swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
+ *swcqe = *hw_cqe;
+ swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+ t4_swcq_produce(&chp->cq);
+ }
+next_cqe:
+ t4_hwcq_consume(&chp->cq);
+ ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+ }
+}
+
+static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
+{
+ if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
+ return 0;
+
+ if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
+ return 0;
+
+ if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
+ return 0;
+
+ if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
+ return 0;
+ return 1;
+}
+
+void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
+{
+ struct t4_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ PDBG("%s count zero %d\n", __func__, *count);
+ ptr = cq->sw_cidx;
+ while (ptr != cq->sw_pidx) {
+ cqe = &cq->sw_queue[ptr];
+ if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
+ (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
+ (*count)++;
+ if (++ptr == cq->size)
+ ptr = 0;
+ }
+ PDBG("%s cq %p count %d\n", __func__, cq, *count);
+}
+
+/*
+ * poll_cq
+ *
+ * Caller must:
+ * check the validity of the first CQE,
+ * supply the wq assicated with the qpid.
+ *
+ * credit: cq credit to return to sge.
+ * cqe_flushed: 1 iff the CQE is flushed.
+ * cqe: copy of the polled CQE.
+ *
+ * return value:
+ * 0 CQE returned ok.
+ * -EAGAIN CQE skipped, try again.
+ * -EOVERFLOW CQ overflow detected.
+ */
+static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit)
+{
+ int ret = 0;
+ struct t4_cqe *hw_cqe, read_cqe;
+
+ *cqe_flushed = 0;
+ *credit = 0;
+ ret = t4_next_cqe(cq, &hw_cqe);
+ if (ret)
+ return ret;
+
+ PDBG("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x"
+ " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
+ CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
+ CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
+ CQE_WRID_LOW(hw_cqe));
+
+ /*
+ * skip cqe's not affiliated with a QP.
+ */
+ if (wq == NULL) {
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * skip hw cqe's if the wq is flushed.
+ */
+ if (wq->flushed && !SW_CQE(hw_cqe)) {
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * skip TERMINATE cqes...
+ */
+ if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * Gotta tweak READ completions:
+ * 1) the cqe doesn't contain the sq_wptr from the wr.
+ * 2) opcode not reflected from the wr.
+ * 3) read_len not reflected from the wr.
+ * 4) cq_type is RQ_TYPE not SQ_TYPE.
+ */
+ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
+
+ /* If we have reached here because of async
+ * event or other error, and have egress error
+ * then drop
+ */
+ if (CQE_TYPE(hw_cqe) == 1) {
+ if (CQE_STATUS(hw_cqe))
+ t4_set_wq_in_error(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /* If this is an unsolicited read response, then the read
+ * was generated by the kernel driver as part of peer-2-peer
+ * connection setup. So ignore the completion.
+ */
+ if (CQE_WRID_STAG(hw_cqe) == 1) {
+ if (CQE_STATUS(hw_cqe))
+ t4_set_wq_in_error(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * Eat completions for unsignaled read WRs.
+ */
+ if (!wq->sq.oldest_read->signaled) {
+ advance_oldest_read(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * Don't write to the HWCQ, so create a new read req CQE
+ * in local memory.
+ */
+ create_read_req_cqe(wq, hw_cqe, &read_cqe);
+ hw_cqe = &read_cqe;
+ advance_oldest_read(wq);
+ }
+
+ if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
+ *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
+ t4_set_wq_in_error(wq);
+ }
+
+ /*
+ * RECV completion.
+ */
+ if (RQ_TYPE(hw_cqe)) {
+
+ /*
+ * HW only validates 4 bits of MSN. So we must validate that
+ * the MSN in the SEND is the next expected MSN. If its not,
+ * then we complete this with T4_ERR_MSN and mark the wq in
+ * error.
+ */
+
+ if (t4_rq_empty(wq)) {
+ t4_set_wq_in_error(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+ if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+ t4_set_wq_in_error(wq);
+ hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
+ goto proc_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * If we get here its a send completion.
+ *
+ * Handle out of order completion. These get stuffed
+ * in the SW SQ. Then the SW SQ is walked to move any
+ * now in-order completions into the SW CQ. This handles
+ * 2 cases:
+ * 1) reaping unsignaled WRs when the first subsequent
+ * signaled WR is completed.
+ * 2) out of order read completions.
+ */
+ if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
+ struct t4_swsqe *swsqe;
+
+ PDBG("%s out of order completion going in sw_sq at idx %u\n",
+ __func__, CQE_WRID_SQ_IDX(hw_cqe));
+ swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
+ swsqe->cqe = *hw_cqe;
+ swsqe->complete = 1;
+ ret = -EAGAIN;
+ goto flush_wq;
+ }
+
+proc_cqe:
+ *cqe = *hw_cqe;
+
+ /*
+ * Reap the associated WR(s) that are freed up with this
+ * completion.
+ */
+ if (SQ_TYPE(hw_cqe)) {
+ int idx = CQE_WRID_SQ_IDX(hw_cqe);
+ BUG_ON(idx >= wq->sq.size);
+
+ /*
+ * Account for any unsignaled completions completed by
+ * this signaled completion. In this case, cidx points
+ * to the first unsignaled one, and idx points to the
+ * signaled one. So adjust in_use based on this delta.
+ * if this is not completing any unsigned wrs, then the
+ * delta will be 0. Handle wrapping also!
+ */
+ if (idx < wq->sq.cidx)
+ wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
+ else
+ wq->sq.in_use -= idx - wq->sq.cidx;
+ BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
+
+ wq->sq.cidx = (uint16_t)idx;
+ PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
+ *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
+ t4_sq_consume(wq);
+ } else {
+ PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx);
+ *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+ BUG_ON(t4_rq_empty(wq));
+ t4_rq_consume(wq);
+ goto skip_cqe;
+ }
+
+flush_wq:
+ /*
+ * Flush any completed cqes that are now in-order.
+ */
+ flush_completed_wrs(wq, cq);
+
+skip_cqe:
+ if (SW_CQE(hw_cqe)) {
+ PDBG("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->sw_cidx);
+ t4_swcq_consume(cq);
+ } else {
+ PDBG("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
+ __func__, cq, cq->cqid, cq->cidx);
+ t4_hwcq_consume(cq);
+ }
+ return ret;
+}
+
+/*
+ * Get one cq entry from c4iw and map it to openib.
+ *
+ * Returns:
+ * 0 cqe returned
+ * -ENODATA EMPTY;
+ * -EAGAIN caller must try again
+ * any other -errno fatal error
+ */
+static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+{
+ struct c4iw_qp *qhp = NULL;
+ struct t4_cqe uninitialized_var(cqe), *rd_cqe;
+ struct t4_wq *wq;
+ u32 credit = 0;
+ u8 cqe_flushed;
+ u64 cookie = 0;
+ int ret;
+
+ ret = t4_next_cqe(&chp->cq, &rd_cqe);
+
+ if (ret)
+ return ret;
+
+ qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
+ if (!qhp)
+ wq = NULL;
+ else {
+ spin_lock(&qhp->lock);
+ wq = &(qhp->wq);
+ }
+ ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+ if (ret)
+ goto out;
+
+ wc->wr_id = cookie;
+ wc->qp = &qhp->ibqp;
+ wc->vendor_err = CQE_STATUS(&cqe);
+ wc->wc_flags = 0;
+
+ PDBG("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x "
+ "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe),
+ CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe),
+ CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), (unsigned long long)cookie);
+
+ if (CQE_TYPE(&cqe) == 0) {
+ if (!CQE_STATUS(&cqe))
+ wc->byte_len = CQE_LEN(&cqe);
+ else
+ wc->byte_len = 0;
+ wc->opcode = IB_WC_RECV;
+ if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
+ CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
+ wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+ } else {
+ switch (CQE_OPCODE(&cqe)) {
+ case FW_RI_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case FW_RI_READ_REQ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = CQE_LEN(&cqe);
+ break;
+ case FW_RI_SEND_WITH_INV:
+ case FW_RI_SEND_WITH_SE_INV:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ break;
+ case FW_RI_SEND:
+ case FW_RI_SEND_WITH_SE:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case FW_RI_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+
+ case FW_RI_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ case FW_RI_FAST_REGISTER:
+ wc->opcode = IB_WC_FAST_REG_MR;
+ break;
+ default:
+ printk(KERN_ERR MOD "Unexpected opcode %d "
+ "in the CQE received for QPID=0x%0x\n",
+ CQE_OPCODE(&cqe), CQE_QPID(&cqe));
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (cqe_flushed)
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ else {
+
+ switch (CQE_STATUS(&cqe)) {
+ case T4_ERR_SUCCESS:
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case T4_ERR_STAG:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case T4_ERR_PDID:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case T4_ERR_QPID:
+ case T4_ERR_ACCESS:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case T4_ERR_WRAP:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ case T4_ERR_BOUND:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case T4_ERR_INVALIDATE_SHARED_MR:
+ case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case T4_ERR_CRC:
+ case T4_ERR_MARKER:
+ case T4_ERR_PDU_LEN_ERR:
+ case T4_ERR_OUT_OF_RQE:
+ case T4_ERR_DDP_VERSION:
+ case T4_ERR_RDMA_VERSION:
+ case T4_ERR_DDP_QUEUE_NUM:
+ case T4_ERR_MSN:
+ case T4_ERR_TBIT:
+ case T4_ERR_MO:
+ case T4_ERR_MSN_RANGE:
+ case T4_ERR_IRD_OVERFLOW:
+ case T4_ERR_OPCODE:
+ case T4_ERR_INTERNAL_ERR:
+ wc->status = IB_WC_FATAL_ERR;
+ break;
+ case T4_ERR_SWFLUSH:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ default:
+ printk(KERN_ERR MOD
+ "Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ CQE_STATUS(&cqe), CQE_QPID(&cqe));
+ ret = -EINVAL;
+ }
+ }
+out:
+ if (wq)
+ spin_unlock(&qhp->lock);
+ return ret;
+}
+
+int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct c4iw_cq *chp;
+ unsigned long flags;
+ int npolled;
+ int err = 0;
+
+ chp = to_c4iw_cq(ibcq);
+
+ spin_lock_irqsave(&chp->lock, flags);
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ do {
+ err = c4iw_poll_cq_one(chp, wc + npolled);
+ } while (err == -EAGAIN);
+ if (err)
+ break;
+ }
+ spin_unlock_irqrestore(&chp->lock, flags);
+ return !err || err == -ENODATA ? npolled : err;
+}
+
+int c4iw_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct c4iw_cq *chp;
+ struct c4iw_ucontext *ucontext;
+
+ PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ chp = to_c4iw_cq(ib_cq);
+
+ remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
+ atomic_dec(&chp->refcnt);
+ wait_event(chp->wait, !atomic_read(&chp->refcnt));
+
+ ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
+ : NULL;
+ destroy_cq(&chp->rhp->rdev, &chp->cq,
+ ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
+ kfree(chp);
+ return 0;
+}
+
+struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *ib_context,
+ struct ib_udata *udata)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_cq *chp;
+ struct c4iw_create_cq_resp uresp;
+ struct c4iw_ucontext *ucontext = NULL;
+ int ret;
+ size_t memsize, hwentries;
+ struct c4iw_mm_entry *mm, *mm2;
+
+ PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+
+ rhp = to_c4iw_dev(ibdev);
+
+ if (vector >= rhp->rdev.lldi.nciq)
+ return ERR_PTR(-EINVAL);
+
+ chp = kzalloc(sizeof(*chp), GFP_KERNEL);
+ if (!chp)
+ return ERR_PTR(-ENOMEM);
+
+ if (ib_context)
+ ucontext = to_c4iw_ucontext(ib_context);
+
+ /* account for the status page. */
+ entries++;
+
+ /* IQ needs one extra entry to differentiate full vs empty. */
+ entries++;
+
+ /*
+ * entries must be multiple of 16 for HW.
+ */
+ entries = roundup(entries, 16);
+
+ /*
+ * Make actual HW queue 2x to avoid cdix_inc overflows.
+ */
+ hwentries = min(entries * 2, T4_MAX_IQ_SIZE);
+
+ /*
+ * Make HW queue at least 64 entries so GTS updates aren't too
+ * frequent.
+ */
+ if (hwentries < 64)
+ hwentries = 64;
+
+ memsize = hwentries * sizeof *chp->cq.queue;
+
+ /*
+ * memsize must be a multiple of the page size if its a user cq.
+ */
+ if (ucontext) {
+ memsize = roundup(memsize, PAGE_SIZE);
+ hwentries = memsize / sizeof *chp->cq.queue;
+ while (hwentries > T4_MAX_IQ_SIZE) {
+ memsize -= PAGE_SIZE;
+ hwentries = memsize / sizeof *chp->cq.queue;
+ }
+ }
+ chp->cq.size = hwentries;
+ chp->cq.memsize = memsize;
+ chp->cq.vector = vector;
+
+ ret = create_cq(&rhp->rdev, &chp->cq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ if (ret)
+ goto err1;
+
+ chp->rhp = rhp;
+ chp->cq.size--; /* status page */
+ chp->ibcq.cqe = entries - 2;
+ spin_lock_init(&chp->lock);
+ spin_lock_init(&chp->comp_handler_lock);
+ atomic_set(&chp->refcnt, 1);
+ init_waitqueue_head(&chp->wait);
+ ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+ if (ret)
+ goto err2;
+
+ if (ucontext) {
+ mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ if (!mm)
+ goto err3;
+ mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ if (!mm2)
+ goto err4;
+
+ uresp.qid_mask = rhp->rdev.cqmask;
+ uresp.cqid = chp->cq.cqid;
+ uresp.size = chp->cq.size;
+ uresp.memsize = chp->cq.memsize;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
+ if (ret)
+ goto err5;
+
+ mm->key = uresp.key;
+ mm->addr = virt_to_phys(chp->cq.queue);
+ mm->len = chp->cq.memsize;
+ insert_mmap(ucontext, mm);
+
+ mm2->key = uresp.gts_key;
+ mm2->addr = chp->cq.ugts;
+ mm2->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm2);
+ }
+ PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
+ __func__, chp->cq.cqid, chp, chp->cq.size,
+ chp->cq.memsize,
+ (unsigned long long) chp->cq.dma_addr);
+ return &chp->ibcq;
+err5:
+ kfree(mm2);
+err4:
+ kfree(mm);
+err3:
+ remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
+err2:
+ destroy_cq(&chp->rhp->rdev, &chp->cq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+err1:
+ kfree(chp);
+ return ERR_PTR(ret);
+}
+
+int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+ return -ENOSYS;
+}
+
+int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct c4iw_cq *chp;
+ int ret;
+ unsigned long flag;
+
+ chp = to_c4iw_cq(ibcq);
+ spin_lock_irqsave(&chp->lock, flag);
+ ret = t4_arm_cq(&chp->cq,
+ (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+ spin_unlock_irqrestore(&chp->lock, flag);
+ if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
+ ret = 0;
+ return ret;
+}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
new file mode 100644
index 00000000000..7db82b24302
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "iw_cxgb4.h"
+
+#define DRV_VERSION "0.1"
+
+MODULE_AUTHOR("Steve Wise");
+MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static int allow_db_fc_on_t5;
+module_param(allow_db_fc_on_t5, int, 0644);
+MODULE_PARM_DESC(allow_db_fc_on_t5,
+ "Allow DB Flow Control on T5 (default = 0)");
+
+static int allow_db_coalescing_on_t5;
+module_param(allow_db_coalescing_on_t5, int, 0644);
+MODULE_PARM_DESC(allow_db_coalescing_on_t5,
+ "Allow DB Coalescing on T5 (default = 0)");
+
+struct uld_ctx {
+ struct list_head entry;
+ struct cxgb4_lld_info lldi;
+ struct c4iw_dev *dev;
+};
+
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+
+#define DB_FC_RESUME_SIZE 64
+#define DB_FC_RESUME_DELAY 1
+#define DB_FC_DRAIN_THRESH 0
+
+static struct dentry *c4iw_debugfs_root;
+
+struct c4iw_debugfs_data {
+ struct c4iw_dev *devp;
+ char *buf;
+ int bufsize;
+ int pos;
+};
+
+/* registered cxgb4 netlink callbacks */
+static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
+ [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+ [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+ [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+ [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+ [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+ [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
+static int count_idrs(int id, void *p, void *data)
+{
+ int *countp = data;
+
+ *countp = *countp + 1;
+ return 0;
+}
+
+static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct c4iw_debugfs_data *d = file->private_data;
+
+ return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
+}
+
+static int dump_qp(int id, void *p, void *data)
+{
+ struct c4iw_qp *qp = p;
+ struct c4iw_debugfs_data *qpd = data;
+ int space;
+ int cc;
+
+ if (id != qp->wq.sq.qid)
+ return 0;
+
+ space = qpd->bufsize - qpd->pos - 1;
+ if (space == 0)
+ return 1;
+
+ if (qp->ep) {
+ if (qp->ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin = (struct sockaddr_in *)
+ &qp->ep->com.local_addr;
+ struct sockaddr_in *rsin = (struct sockaddr_in *)
+ &qp->ep->com.remote_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &qp->ep->com.mapped_local_addr;
+ struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+ &qp->ep->com.mapped_remote_addr;
+
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "rc qp sq id %u rq id %u state %u "
+ "onchip %u ep tid %u state %u "
+ "%pI4:%u/%u->%pI4:%u/%u\n",
+ qp->wq.sq.qid, qp->wq.rq.qid,
+ (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP,
+ qp->ep->hwtid, (int)qp->ep->com.state,
+ &lsin->sin_addr, ntohs(lsin->sin_port),
+ ntohs(mapped_lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port),
+ ntohs(mapped_rsin->sin_port));
+ } else {
+ struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+ &qp->ep->com.local_addr;
+ struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
+ &qp->ep->com.remote_addr;
+ struct sockaddr_in6 *mapped_lsin6 =
+ (struct sockaddr_in6 *)
+ &qp->ep->com.mapped_local_addr;
+ struct sockaddr_in6 *mapped_rsin6 =
+ (struct sockaddr_in6 *)
+ &qp->ep->com.mapped_remote_addr;
+
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "rc qp sq id %u rq id %u state %u "
+ "onchip %u ep tid %u state %u "
+ "%pI6:%u/%u->%pI6:%u/%u\n",
+ qp->wq.sq.qid, qp->wq.rq.qid,
+ (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP,
+ qp->ep->hwtid, (int)qp->ep->com.state,
+ &lsin6->sin6_addr,
+ ntohs(lsin6->sin6_port),
+ ntohs(mapped_lsin6->sin6_port),
+ &rsin6->sin6_addr,
+ ntohs(rsin6->sin6_port),
+ ntohs(mapped_rsin6->sin6_port));
+ }
+ } else
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "qp sq id %u rq id %u state %u onchip %u\n",
+ qp->wq.sq.qid, qp->wq.rq.qid,
+ (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP);
+ if (cc < space)
+ qpd->pos += cc;
+ return 0;
+}
+
+static int qp_release(struct inode *inode, struct file *file)
+{
+ struct c4iw_debugfs_data *qpd = file->private_data;
+ if (!qpd) {
+ printk(KERN_INFO "%s null qpd?\n", __func__);
+ return 0;
+ }
+ vfree(qpd->buf);
+ kfree(qpd);
+ return 0;
+}
+
+static int qp_open(struct inode *inode, struct file *file)
+{
+ struct c4iw_debugfs_data *qpd;
+ int ret = 0;
+ int count = 1;
+
+ qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
+ if (!qpd) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ qpd->devp = inode->i_private;
+ qpd->pos = 0;
+
+ spin_lock_irq(&qpd->devp->lock);
+ idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
+ spin_unlock_irq(&qpd->devp->lock);
+
+ qpd->bufsize = count * 128;
+ qpd->buf = vmalloc(qpd->bufsize);
+ if (!qpd->buf) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ spin_lock_irq(&qpd->devp->lock);
+ idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
+ spin_unlock_irq(&qpd->devp->lock);
+
+ qpd->buf[qpd->pos++] = 0;
+ file->private_data = qpd;
+ goto out;
+err1:
+ kfree(qpd);
+out:
+ return ret;
+}
+
+static const struct file_operations qp_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = qp_open,
+ .release = qp_release,
+ .read = debugfs_read,
+ .llseek = default_llseek,
+};
+
+static int dump_stag(int id, void *p, void *data)
+{
+ struct c4iw_debugfs_data *stagd = data;
+ int space;
+ int cc;
+
+ space = stagd->bufsize - stagd->pos - 1;
+ if (space == 0)
+ return 1;
+
+ cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8);
+ if (cc < space)
+ stagd->pos += cc;
+ return 0;
+}
+
+static int stag_release(struct inode *inode, struct file *file)
+{
+ struct c4iw_debugfs_data *stagd = file->private_data;
+ if (!stagd) {
+ printk(KERN_INFO "%s null stagd?\n", __func__);
+ return 0;
+ }
+ kfree(stagd->buf);
+ kfree(stagd);
+ return 0;
+}
+
+static int stag_open(struct inode *inode, struct file *file)
+{
+ struct c4iw_debugfs_data *stagd;
+ int ret = 0;
+ int count = 1;
+
+ stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
+ if (!stagd) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ stagd->devp = inode->i_private;
+ stagd->pos = 0;
+
+ spin_lock_irq(&stagd->devp->lock);
+ idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
+ spin_unlock_irq(&stagd->devp->lock);
+
+ stagd->bufsize = count * sizeof("0x12345678\n");
+ stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL);
+ if (!stagd->buf) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ spin_lock_irq(&stagd->devp->lock);
+ idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
+ spin_unlock_irq(&stagd->devp->lock);
+
+ stagd->buf[stagd->pos++] = 0;
+ file->private_data = stagd;
+ goto out;
+err1:
+ kfree(stagd);
+out:
+ return ret;
+}
+
+static const struct file_operations stag_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = stag_open,
+ .release = stag_release,
+ .read = debugfs_read,
+ .llseek = default_llseek,
+};
+
+static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+ struct c4iw_dev *dev = seq->private;
+
+ seq_printf(seq, " Object: %10s %10s %10s %10s\n", "Total", "Current",
+ "Max", "Fail");
+ seq_printf(seq, " PDID: %10llu %10llu %10llu %10llu\n",
+ dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
+ dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
+ seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n",
+ dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
+ dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
+ seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n",
+ dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
+ dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
+ seq_printf(seq, " PBLMEM: %10llu %10llu %10llu %10llu\n",
+ dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
+ dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
+ seq_printf(seq, " RQTMEM: %10llu %10llu %10llu %10llu\n",
+ dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
+ dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
+ seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu %10llu\n",
+ dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
+ dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
+ seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
+ seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
+ seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
+ seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
+ db_state_str[dev->db_state],
+ dev->rdev.stats.db_state_transitions,
+ dev->rdev.stats.db_fc_interruptions);
+ seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
+ seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
+ dev->rdev.stats.act_ofld_conn_fails);
+ seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
+ dev->rdev.stats.pas_ofld_conn_fails);
+ return 0;
+}
+
+static int stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, stats_show, inode->i_private);
+}
+
+static ssize_t stats_clear(struct file *file, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
+
+ mutex_lock(&dev->rdev.stats.lock);
+ dev->rdev.stats.pd.max = 0;
+ dev->rdev.stats.pd.fail = 0;
+ dev->rdev.stats.qid.max = 0;
+ dev->rdev.stats.qid.fail = 0;
+ dev->rdev.stats.stag.max = 0;
+ dev->rdev.stats.stag.fail = 0;
+ dev->rdev.stats.pbl.max = 0;
+ dev->rdev.stats.pbl.fail = 0;
+ dev->rdev.stats.rqt.max = 0;
+ dev->rdev.stats.rqt.fail = 0;
+ dev->rdev.stats.ocqp.max = 0;
+ dev->rdev.stats.ocqp.fail = 0;
+ dev->rdev.stats.db_full = 0;
+ dev->rdev.stats.db_empty = 0;
+ dev->rdev.stats.db_drop = 0;
+ dev->rdev.stats.db_state_transitions = 0;
+ dev->rdev.stats.tcam_full = 0;
+ dev->rdev.stats.act_ofld_conn_fails = 0;
+ dev->rdev.stats.pas_ofld_conn_fails = 0;
+ mutex_unlock(&dev->rdev.stats.lock);
+ return count;
+}
+
+static const struct file_operations stats_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = stats_open,
+ .release = single_release,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = stats_clear,
+};
+
+static int dump_ep(int id, void *p, void *data)
+{
+ struct c4iw_ep *ep = p;
+ struct c4iw_debugfs_data *epd = data;
+ int space;
+ int cc;
+
+ space = epd->bufsize - epd->pos - 1;
+ if (space == 0)
+ return 1;
+
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin = (struct sockaddr_in *)
+ &ep->com.local_addr;
+ struct sockaddr_in *rsin = (struct sockaddr_in *)
+ &ep->com.remote_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+ &ep->com.mapped_remote_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p qp %p state %d flags 0x%lx "
+ "history 0x%lx hwtid %d atid %d "
+ "%pI4:%d/%d <-> %pI4:%d/%d\n",
+ ep, ep->com.cm_id, ep->com.qp,
+ (int)ep->com.state, ep->com.flags,
+ ep->com.history, ep->hwtid, ep->atid,
+ &lsin->sin_addr, ntohs(lsin->sin_port),
+ ntohs(mapped_lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port),
+ ntohs(mapped_rsin->sin_port));
+ } else {
+ struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+ &ep->com.local_addr;
+ struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
+ &ep->com.remote_addr;
+ struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_remote_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p qp %p state %d flags 0x%lx "
+ "history 0x%lx hwtid %d atid %d "
+ "%pI6:%d/%d <-> %pI6:%d/%d\n",
+ ep, ep->com.cm_id, ep->com.qp,
+ (int)ep->com.state, ep->com.flags,
+ ep->com.history, ep->hwtid, ep->atid,
+ &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+ ntohs(mapped_lsin6->sin6_port),
+ &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
+ ntohs(mapped_rsin6->sin6_port));
+ }
+ if (cc < space)
+ epd->pos += cc;
+ return 0;
+}
+
+static int dump_listen_ep(int id, void *p, void *data)
+{
+ struct c4iw_listen_ep *ep = p;
+ struct c4iw_debugfs_data *epd = data;
+ int space;
+ int cc;
+
+ space = epd->bufsize - epd->pos - 1;
+ if (space == 0)
+ return 1;
+
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin = (struct sockaddr_in *)
+ &ep->com.local_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p state %d flags 0x%lx stid %d "
+ "backlog %d %pI4:%d/%d\n",
+ ep, ep->com.cm_id, (int)ep->com.state,
+ ep->com.flags, ep->stid, ep->backlog,
+ &lsin->sin_addr, ntohs(lsin->sin_port),
+ ntohs(mapped_lsin->sin_port));
+ } else {
+ struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
+ &ep->com.local_addr;
+ struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+
+ cc = snprintf(epd->buf + epd->pos, space,
+ "ep %p cm_id %p state %d flags 0x%lx stid %d "
+ "backlog %d %pI6:%d/%d\n",
+ ep, ep->com.cm_id, (int)ep->com.state,
+ ep->com.flags, ep->stid, ep->backlog,
+ &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+ ntohs(mapped_lsin6->sin6_port));
+ }
+ if (cc < space)
+ epd->pos += cc;
+ return 0;
+}
+
+static int ep_release(struct inode *inode, struct file *file)
+{
+ struct c4iw_debugfs_data *epd = file->private_data;
+ if (!epd) {
+ pr_info("%s null qpd?\n", __func__);
+ return 0;
+ }
+ vfree(epd->buf);
+ kfree(epd);
+ return 0;
+}
+
+static int ep_open(struct inode *inode, struct file *file)
+{
+ struct c4iw_debugfs_data *epd;
+ int ret = 0;
+ int count = 1;
+
+ epd = kmalloc(sizeof(*epd), GFP_KERNEL);
+ if (!epd) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ epd->devp = inode->i_private;
+ epd->pos = 0;
+
+ spin_lock_irq(&epd->devp->lock);
+ idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
+ idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
+ idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
+ spin_unlock_irq(&epd->devp->lock);
+
+ epd->bufsize = count * 160;
+ epd->buf = vmalloc(epd->bufsize);
+ if (!epd->buf) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ spin_lock_irq(&epd->devp->lock);
+ idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
+ idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
+ idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
+ spin_unlock_irq(&epd->devp->lock);
+
+ file->private_data = epd;
+ goto out;
+err1:
+ kfree(epd);
+out:
+ return ret;
+}
+
+static const struct file_operations ep_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = ep_open,
+ .release = ep_release,
+ .read = debugfs_read,
+};
+
+static int setup_debugfs(struct c4iw_dev *devp)
+{
+ struct dentry *de;
+
+ if (!devp->debugfs_root)
+ return -1;
+
+ de = debugfs_create_file("qps", S_IWUSR, devp->debugfs_root,
+ (void *)devp, &qp_debugfs_fops);
+ if (de && de->d_inode)
+ de->d_inode->i_size = 4096;
+
+ de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root,
+ (void *)devp, &stag_debugfs_fops);
+ if (de && de->d_inode)
+ de->d_inode->i_size = 4096;
+
+ de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root,
+ (void *)devp, &stats_debugfs_fops);
+ if (de && de->d_inode)
+ de->d_inode->i_size = 4096;
+
+ de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root,
+ (void *)devp, &ep_debugfs_fops);
+ if (de && de->d_inode)
+ de->d_inode->i_size = 4096;
+
+ return 0;
+}
+
+void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct list_head *pos, *nxt;
+ struct c4iw_qid_list *entry;
+
+ mutex_lock(&uctx->lock);
+ list_for_each_safe(pos, nxt, &uctx->qpids) {
+ entry = list_entry(pos, struct c4iw_qid_list, entry);
+ list_del_init(&entry->entry);
+ if (!(entry->qid & rdev->qpmask)) {
+ c4iw_put_resource(&rdev->resource.qid_table,
+ entry->qid);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.cur -= rdev->qpmask + 1;
+ mutex_unlock(&rdev->stats.lock);
+ }
+ kfree(entry);
+ }
+
+ list_for_each_safe(pos, nxt, &uctx->qpids) {
+ entry = list_entry(pos, struct c4iw_qid_list, entry);
+ list_del_init(&entry->entry);
+ kfree(entry);
+ }
+ mutex_unlock(&uctx->lock);
+}
+
+void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
+ struct c4iw_dev_ucontext *uctx)
+{
+ INIT_LIST_HEAD(&uctx->qpids);
+ INIT_LIST_HEAD(&uctx->cqids);
+ mutex_init(&uctx->lock);
+}
+
+/* Caller takes care of locking if needed */
+static int c4iw_rdev_open(struct c4iw_rdev *rdev)
+{
+ int err;
+
+ c4iw_init_dev_ucontext(rdev, &rdev->uctx);
+
+ /*
+ * qpshift is the number of bits to shift the qpid left in order
+ * to get the correct address of the doorbell for that qp.
+ */
+ rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density);
+ rdev->qpmask = rdev->lldi.udb_density - 1;
+ rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density);
+ rdev->cqmask = rdev->lldi.ucq_density - 1;
+ PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
+ "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
+ "qp qid start %u size %u cq qid start %u size %u\n",
+ __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
+ rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
+ rdev->lldi.vr->pbl.start,
+ rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
+ rdev->lldi.vr->rq.size,
+ rdev->lldi.vr->qp.start,
+ rdev->lldi.vr->qp.size,
+ rdev->lldi.vr->cq.start,
+ rdev->lldi.vr->cq.size);
+ PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
+ "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
+ (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
+ (u64)pci_resource_start(rdev->lldi.pdev, 2),
+ rdev->lldi.db_reg,
+ rdev->lldi.gts_reg,
+ rdev->qpshift, rdev->qpmask,
+ rdev->cqshift, rdev->cqmask);
+
+ if (c4iw_num_stags(rdev) == 0) {
+ err = -EINVAL;
+ goto err1;
+ }
+
+ rdev->stats.pd.total = T4_MAX_NUM_PD;
+ rdev->stats.stag.total = rdev->lldi.vr->stag.size;
+ rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
+ rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
+ rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
+ rdev->stats.qid.total = rdev->lldi.vr->qp.size;
+
+ err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
+ if (err) {
+ printk(KERN_ERR MOD "error %d initializing resources\n", err);
+ goto err1;
+ }
+ err = c4iw_pblpool_create(rdev);
+ if (err) {
+ printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
+ goto err2;
+ }
+ err = c4iw_rqtpool_create(rdev);
+ if (err) {
+ printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
+ goto err3;
+ }
+ err = c4iw_ocqp_pool_create(rdev);
+ if (err) {
+ printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
+ goto err4;
+ }
+ rdev->status_page = (struct t4_dev_status_page *)
+ __get_free_page(GFP_KERNEL);
+ if (!rdev->status_page) {
+ pr_err(MOD "error allocating status page\n");
+ goto err4;
+ }
+ rdev->status_page->db_off = 0;
+ return 0;
+err4:
+ c4iw_rqtpool_destroy(rdev);
+err3:
+ c4iw_pblpool_destroy(rdev);
+err2:
+ c4iw_destroy_resource(&rdev->resource);
+err1:
+ return err;
+}
+
+static void c4iw_rdev_close(struct c4iw_rdev *rdev)
+{
+ free_page((unsigned long)rdev->status_page);
+ c4iw_pblpool_destroy(rdev);
+ c4iw_rqtpool_destroy(rdev);
+ c4iw_destroy_resource(&rdev->resource);
+}
+
+static void c4iw_dealloc(struct uld_ctx *ctx)
+{
+ c4iw_rdev_close(&ctx->dev->rdev);
+ idr_destroy(&ctx->dev->cqidr);
+ idr_destroy(&ctx->dev->qpidr);
+ idr_destroy(&ctx->dev->mmidr);
+ idr_destroy(&ctx->dev->hwtid_idr);
+ idr_destroy(&ctx->dev->stid_idr);
+ idr_destroy(&ctx->dev->atid_idr);
+ if (ctx->dev->rdev.bar2_kva)
+ iounmap(ctx->dev->rdev.bar2_kva);
+ if (ctx->dev->rdev.oc_mw_kva)
+ iounmap(ctx->dev->rdev.oc_mw_kva);
+ ib_dealloc_device(&ctx->dev->ibdev);
+ ctx->dev = NULL;
+}
+
+static void c4iw_remove(struct uld_ctx *ctx)
+{
+ PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
+ c4iw_unregister_device(ctx->dev);
+ c4iw_dealloc(ctx);
+}
+
+static int rdma_supported(const struct cxgb4_lld_info *infop)
+{
+ return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
+ infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
+ infop->vr->cq.size > 0;
+}
+
+static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
+{
+ struct c4iw_dev *devp;
+ int ret;
+
+ if (!rdma_supported(infop)) {
+ printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
+ pci_name(infop->pdev));
+ return ERR_PTR(-ENOSYS);
+ }
+ if (!ocqp_supported(infop))
+ pr_info("%s: On-Chip Queues not supported on this device.\n",
+ pci_name(infop->pdev));
+
+ devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
+ if (!devp) {
+ printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ devp->rdev.lldi = *infop;
+
+ /*
+ * For T5 devices, we map all of BAR2 with WC.
+ * For T4 devices with onchip qp mem, we map only that part
+ * of BAR2 with WC.
+ */
+ devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
+ if (is_t5(devp->rdev.lldi.adapter_type)) {
+ devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
+ pci_resource_len(devp->rdev.lldi.pdev, 2));
+ if (!devp->rdev.bar2_kva) {
+ pr_err(MOD "Unable to ioremap BAR2\n");
+ ib_dealloc_device(&devp->ibdev);
+ return ERR_PTR(-EINVAL);
+ }
+ } else if (ocqp_supported(infop)) {
+ devp->rdev.oc_mw_pa =
+ pci_resource_start(devp->rdev.lldi.pdev, 2) +
+ pci_resource_len(devp->rdev.lldi.pdev, 2) -
+ roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
+ devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
+ devp->rdev.lldi.vr->ocq.size);
+ if (!devp->rdev.oc_mw_kva) {
+ pr_err(MOD "Unable to ioremap onchip mem\n");
+ ib_dealloc_device(&devp->ibdev);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ PDBG(KERN_INFO MOD "ocq memory: "
+ "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
+ devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
+ devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
+
+ ret = c4iw_rdev_open(&devp->rdev);
+ if (ret) {
+ printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
+ ib_dealloc_device(&devp->ibdev);
+ return ERR_PTR(ret);
+ }
+
+ idr_init(&devp->cqidr);
+ idr_init(&devp->qpidr);
+ idr_init(&devp->mmidr);
+ idr_init(&devp->hwtid_idr);
+ idr_init(&devp->stid_idr);
+ idr_init(&devp->atid_idr);
+ spin_lock_init(&devp->lock);
+ mutex_init(&devp->rdev.stats.lock);
+ mutex_init(&devp->db_mutex);
+ INIT_LIST_HEAD(&devp->db_fc_list);
+
+ if (c4iw_debugfs_root) {
+ devp->debugfs_root = debugfs_create_dir(
+ pci_name(devp->rdev.lldi.pdev),
+ c4iw_debugfs_root);
+ setup_debugfs(devp);
+ }
+
+
+ return devp;
+}
+
+static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
+{
+ struct uld_ctx *ctx;
+ static int vers_printed;
+ int i;
+
+ if (!vers_printed++)
+ pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
+ DRV_VERSION);
+
+ ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ if (!ctx) {
+ ctx = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ ctx->lldi = *infop;
+
+ PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
+ __func__, pci_name(ctx->lldi.pdev),
+ ctx->lldi.nchan, ctx->lldi.nrxq,
+ ctx->lldi.ntxq, ctx->lldi.nports);
+
+ mutex_lock(&dev_mutex);
+ list_add_tail(&ctx->entry, &uld_ctx_list);
+ mutex_unlock(&dev_mutex);
+
+ for (i = 0; i < ctx->lldi.nrxq; i++)
+ PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
+out:
+ return ctx;
+}
+
+static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
+ const __be64 *rsp,
+ u32 pktshift)
+{
+ struct sk_buff *skb;
+
+ /*
+ * Allocate space for cpl_pass_accept_req which will be synthesized by
+ * driver. Once the driver synthesizes the request the skb will go
+ * through the regular cpl_pass_accept_req processing.
+ * The math here assumes sizeof cpl_pass_accept_req >= sizeof
+ * cpl_rx_pkt.
+ */
+ skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
+ sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return NULL;
+
+ __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
+ sizeof(struct rss_header) - pktshift);
+
+ /*
+ * This skb will contain:
+ * rss_header from the rspq descriptor (1 flit)
+ * cpl_rx_pkt struct from the rspq descriptor (2 flits)
+ * space for the difference between the size of an
+ * rx_pkt and pass_accept_req cpl (1 flit)
+ * the packet data from the gl
+ */
+ skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
+ sizeof(struct rss_header));
+ skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
+ sizeof(struct cpl_pass_accept_req),
+ gl->va + pktshift,
+ gl->tot_len - pktshift);
+ return skb;
+}
+
+static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
+ const __be64 *rsp)
+{
+ unsigned int opcode = *(u8 *)rsp;
+ struct sk_buff *skb;
+
+ if (opcode != CPL_RX_PKT)
+ goto out;
+
+ skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
+ if (skb == NULL)
+ goto out;
+
+ if (c4iw_handlers[opcode] == NULL) {
+ pr_info("%s no handler opcode 0x%x...\n", __func__,
+ opcode);
+ kfree_skb(skb);
+ goto out;
+ }
+ c4iw_handlers[opcode](dev, skb);
+ return 1;
+out:
+ return 0;
+}
+
+static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
+ const struct pkt_gl *gl)
+{
+ struct uld_ctx *ctx = handle;
+ struct c4iw_dev *dev = ctx->dev;
+ struct sk_buff *skb;
+ u8 opcode;
+
+ if (gl == NULL) {
+ /* omit RSS and rsp_ctrl at end of descriptor */
+ unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
+
+ skb = alloc_skb(256, GFP_ATOMIC);
+ if (!skb)
+ goto nomem;
+ __skb_put(skb, len);
+ skb_copy_to_linear_data(skb, &rsp[1], len);
+ } else if (gl == CXGB4_MSG_AN) {
+ const struct rsp_ctrl *rc = (void *)rsp;
+
+ u32 qid = be32_to_cpu(rc->pldbuflen_qid);
+ c4iw_ev_handler(dev, qid);
+ return 0;
+ } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
+ if (recv_rx_pkt(dev, gl, rsp))
+ return 0;
+
+ pr_info("%s: unexpected FL contents at %p, " \
+ "RSS %#llx, FL %#llx, len %u\n",
+ pci_name(ctx->lldi.pdev), gl->va,
+ (unsigned long long)be64_to_cpu(*rsp),
+ (unsigned long long)be64_to_cpu(
+ *(__force __be64 *)gl->va),
+ gl->tot_len);
+
+ return 0;
+ } else {
+ skb = cxgb4_pktgl_to_skb(gl, 128, 128);
+ if (unlikely(!skb))
+ goto nomem;
+ }
+
+ opcode = *(u8 *)rsp;
+ if (c4iw_handlers[opcode]) {
+ c4iw_handlers[opcode](dev, skb);
+ } else {
+ pr_info("%s no handler opcode 0x%x...\n", __func__,
+ opcode);
+ kfree_skb(skb);
+ }
+
+ return 0;
+nomem:
+ return -1;
+}
+
+static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
+{
+ struct uld_ctx *ctx = handle;
+
+ PDBG("%s new_state %u\n", __func__, new_state);
+ switch (new_state) {
+ case CXGB4_STATE_UP:
+ printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
+ if (!ctx->dev) {
+ int ret;
+
+ ctx->dev = c4iw_alloc(&ctx->lldi);
+ if (IS_ERR(ctx->dev)) {
+ printk(KERN_ERR MOD
+ "%s: initialization failed: %ld\n",
+ pci_name(ctx->lldi.pdev),
+ PTR_ERR(ctx->dev));
+ ctx->dev = NULL;
+ break;
+ }
+ ret = c4iw_register_device(ctx->dev);
+ if (ret) {
+ printk(KERN_ERR MOD
+ "%s: RDMA registration failed: %d\n",
+ pci_name(ctx->lldi.pdev), ret);
+ c4iw_dealloc(ctx);
+ }
+ }
+ break;
+ case CXGB4_STATE_DOWN:
+ printk(KERN_INFO MOD "%s: Down\n",
+ pci_name(ctx->lldi.pdev));
+ if (ctx->dev)
+ c4iw_remove(ctx);
+ break;
+ case CXGB4_STATE_START_RECOVERY:
+ printk(KERN_INFO MOD "%s: Fatal Error\n",
+ pci_name(ctx->lldi.pdev));
+ if (ctx->dev) {
+ struct ib_event event;
+
+ ctx->dev->rdev.flags |= T4_FATAL_ERROR;
+ memset(&event, 0, sizeof event);
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.device = &ctx->dev->ibdev;
+ ib_dispatch_event(&event);
+ c4iw_remove(ctx);
+ }
+ break;
+ case CXGB4_STATE_DETACH:
+ printk(KERN_INFO MOD "%s: Detach\n",
+ pci_name(ctx->lldi.pdev));
+ if (ctx->dev)
+ c4iw_remove(ctx);
+ break;
+ }
+ return 0;
+}
+
+static int disable_qp_db(int id, void *p, void *data)
+{
+ struct c4iw_qp *qp = p;
+
+ t4_disable_wq_db(&qp->wq);
+ return 0;
+}
+
+static void stop_queues(struct uld_ctx *ctx)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->dev->lock, flags);
+ ctx->dev->rdev.stats.db_state_transitions++;
+ ctx->dev->db_state = STOPPED;
+ if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
+ idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
+ else
+ ctx->dev->rdev.status_page->db_off = 1;
+ spin_unlock_irqrestore(&ctx->dev->lock, flags);
+}
+
+static int enable_qp_db(int id, void *p, void *data)
+{
+ struct c4iw_qp *qp = p;
+
+ t4_enable_wq_db(&qp->wq);
+ return 0;
+}
+
+static void resume_rc_qp(struct c4iw_qp *qp)
+{
+ spin_lock(&qp->lock);
+ t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
+ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ qp->wq.sq.wq_pidx_inc = 0;
+ t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
+ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ qp->wq.rq.wq_pidx_inc = 0;
+ spin_unlock(&qp->lock);
+}
+
+static void resume_a_chunk(struct uld_ctx *ctx)
+{
+ int i;
+ struct c4iw_qp *qp;
+
+ for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
+ qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
+ db_fc_entry);
+ list_del_init(&qp->db_fc_entry);
+ resume_rc_qp(qp);
+ if (list_empty(&ctx->dev->db_fc_list))
+ break;
+ }
+}
+
+static void resume_queues(struct uld_ctx *ctx)
+{
+ spin_lock_irq(&ctx->dev->lock);
+ if (ctx->dev->db_state != STOPPED)
+ goto out;
+ ctx->dev->db_state = FLOW_CONTROL;
+ while (1) {
+ if (list_empty(&ctx->dev->db_fc_list)) {
+ WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
+ ctx->dev->db_state = NORMAL;
+ ctx->dev->rdev.stats.db_state_transitions++;
+ if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
+ idr_for_each(&ctx->dev->qpidr, enable_qp_db,
+ NULL);
+ } else {
+ ctx->dev->rdev.status_page->db_off = 0;
+ }
+ break;
+ } else {
+ if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
+ < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
+ DB_FC_DRAIN_THRESH)) {
+ resume_a_chunk(ctx);
+ }
+ if (!list_empty(&ctx->dev->db_fc_list)) {
+ spin_unlock_irq(&ctx->dev->lock);
+ if (DB_FC_RESUME_DELAY) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(DB_FC_RESUME_DELAY);
+ }
+ spin_lock_irq(&ctx->dev->lock);
+ if (ctx->dev->db_state != FLOW_CONTROL)
+ break;
+ }
+ }
+ }
+out:
+ if (ctx->dev->db_state != NORMAL)
+ ctx->dev->rdev.stats.db_fc_interruptions++;
+ spin_unlock_irq(&ctx->dev->lock);
+}
+
+struct qp_list {
+ unsigned idx;
+ struct c4iw_qp **qps;
+};
+
+static int add_and_ref_qp(int id, void *p, void *data)
+{
+ struct qp_list *qp_listp = data;
+ struct c4iw_qp *qp = p;
+
+ c4iw_qp_add_ref(&qp->ibqp);
+ qp_listp->qps[qp_listp->idx++] = qp;
+ return 0;
+}
+
+static int count_qps(int id, void *p, void *data)
+{
+ unsigned *countp = data;
+ (*countp)++;
+ return 0;
+}
+
+static void deref_qps(struct qp_list *qp_list)
+{
+ int idx;
+
+ for (idx = 0; idx < qp_list->idx; idx++)
+ c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
+}
+
+static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
+{
+ int idx;
+ int ret;
+
+ for (idx = 0; idx < qp_list->idx; idx++) {
+ struct c4iw_qp *qp = qp_list->qps[idx];
+
+ spin_lock_irq(&qp->rhp->lock);
+ spin_lock(&qp->lock);
+ ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
+ qp->wq.sq.qid,
+ t4_sq_host_wq_pidx(&qp->wq),
+ t4_sq_wq_size(&qp->wq));
+ if (ret) {
+ pr_err(KERN_ERR MOD "%s: Fatal error - "
+ "DB overflow recovery failed - "
+ "error syncing SQ qid %u\n",
+ pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
+ return;
+ }
+ qp->wq.sq.wq_pidx_inc = 0;
+
+ ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
+ qp->wq.rq.qid,
+ t4_rq_host_wq_pidx(&qp->wq),
+ t4_rq_wq_size(&qp->wq));
+
+ if (ret) {
+ pr_err(KERN_ERR MOD "%s: Fatal error - "
+ "DB overflow recovery failed - "
+ "error syncing RQ qid %u\n",
+ pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
+ return;
+ }
+ qp->wq.rq.wq_pidx_inc = 0;
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
+
+ /* Wait for the dbfifo to drain */
+ while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(usecs_to_jiffies(10));
+ }
+ }
+}
+
+static void recover_queues(struct uld_ctx *ctx)
+{
+ int count = 0;
+ struct qp_list qp_list;
+ int ret;
+
+ /* slow everybody down */
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(usecs_to_jiffies(1000));
+
+ /* flush the SGE contexts */
+ ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
+ if (ret) {
+ printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+ pci_name(ctx->lldi.pdev));
+ return;
+ }
+
+ /* Count active queues so we can build a list of queues to recover */
+ spin_lock_irq(&ctx->dev->lock);
+ WARN_ON(ctx->dev->db_state != STOPPED);
+ ctx->dev->db_state = RECOVERY;
+ idr_for_each(&ctx->dev->qpidr, count_qps, &count);
+
+ qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
+ if (!qp_list.qps) {
+ printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+ pci_name(ctx->lldi.pdev));
+ spin_unlock_irq(&ctx->dev->lock);
+ return;
+ }
+ qp_list.idx = 0;
+
+ /* add and ref each qp so it doesn't get freed */
+ idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
+
+ spin_unlock_irq(&ctx->dev->lock);
+
+ /* now traverse the list in a safe context to recover the db state*/
+ recover_lost_dbs(ctx, &qp_list);
+
+ /* we're almost done! deref the qps and clean up */
+ deref_qps(&qp_list);
+ kfree(qp_list.qps);
+
+ spin_lock_irq(&ctx->dev->lock);
+ WARN_ON(ctx->dev->db_state != RECOVERY);
+ ctx->dev->db_state = STOPPED;
+ spin_unlock_irq(&ctx->dev->lock);
+}
+
+static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
+{
+ struct uld_ctx *ctx = handle;
+
+ switch (control) {
+ case CXGB4_CONTROL_DB_FULL:
+ stop_queues(ctx);
+ ctx->dev->rdev.stats.db_full++;
+ break;
+ case CXGB4_CONTROL_DB_EMPTY:
+ resume_queues(ctx);
+ mutex_lock(&ctx->dev->rdev.stats.lock);
+ ctx->dev->rdev.stats.db_empty++;
+ mutex_unlock(&ctx->dev->rdev.stats.lock);
+ break;
+ case CXGB4_CONTROL_DB_DROP:
+ recover_queues(ctx);
+ mutex_lock(&ctx->dev->rdev.stats.lock);
+ ctx->dev->rdev.stats.db_drop++;
+ mutex_unlock(&ctx->dev->rdev.stats.lock);
+ break;
+ default:
+ printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
+ pci_name(ctx->lldi.pdev), control);
+ break;
+ }
+ return 0;
+}
+
+static struct cxgb4_uld_info c4iw_uld_info = {
+ .name = DRV_NAME,
+ .add = c4iw_uld_add,
+ .rx_handler = c4iw_uld_rx_handler,
+ .state_change = c4iw_uld_state_change,
+ .control = c4iw_uld_control,
+};
+
+static int __init c4iw_init_module(void)
+{
+ int err;
+
+ err = c4iw_cm_init();
+ if (err)
+ return err;
+
+ c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
+ if (!c4iw_debugfs_root)
+ printk(KERN_WARNING MOD
+ "could not create debugfs entry, continuing\n");
+
+ if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
+ c4iw_nl_cb_table))
+ pr_err("%s[%u]: Failed to add netlink callback\n"
+ , __func__, __LINE__);
+
+ err = iwpm_init(RDMA_NL_C4IW);
+ if (err) {
+ pr_err("port mapper initialization failed with %d\n", err);
+ ibnl_remove_client(RDMA_NL_C4IW);
+ c4iw_cm_term();
+ debugfs_remove_recursive(c4iw_debugfs_root);
+ return err;
+ }
+
+ cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
+
+ return 0;
+}
+
+static void __exit c4iw_exit_module(void)
+{
+ struct uld_ctx *ctx, *tmp;
+
+ mutex_lock(&dev_mutex);
+ list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
+ if (ctx->dev)
+ c4iw_remove(ctx);
+ kfree(ctx);
+ }
+ mutex_unlock(&dev_mutex);
+ cxgb4_unregister_uld(CXGB4_ULD_RDMA);
+ iwpm_exit(RDMA_NL_C4IW);
+ ibnl_remove_client(RDMA_NL_C4IW);
+ c4iw_cm_term();
+ debugfs_remove_recursive(c4iw_debugfs_root);
+}
+
+module_init(c4iw_init_module);
+module_exit(c4iw_exit_module);
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
new file mode 100644
index 00000000000..d61d0a18f78
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <net/sock.h>
+
+#include "iw_cxgb4.h"
+
+static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
+ struct c4iw_qp *qhp,
+ struct t4_cqe *err_cqe,
+ enum ib_event_type ib_event)
+{
+ struct ib_event event;
+ struct c4iw_qp_attributes attrs;
+ unsigned long flag;
+
+ printk(KERN_ERR MOD "AE qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ CQE_QPID(err_cqe), CQE_OPCODE(err_cqe),
+ CQE_STATUS(err_cqe), CQE_TYPE(err_cqe),
+ CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
+
+ if (qhp->attr.state == C4IW_QP_STATE_RTS) {
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ c4iw_modify_qp(qhp->rhp, qhp, C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 0);
+ }
+
+ event.event = ib_event;
+ event.device = chp->ibcq.device;
+ if (ib_event == IB_EVENT_CQ_ERR)
+ event.element.cq = &chp->ibcq;
+ else
+ event.element.qp = &qhp->ibqp;
+ if (qhp->ibqp.event_handler)
+ (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
+
+ spin_lock_irqsave(&chp->comp_handler_lock, flag);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+}
+
+void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
+{
+ struct c4iw_cq *chp;
+ struct c4iw_qp *qhp;
+ u32 cqid;
+
+ spin_lock_irq(&dev->lock);
+ qhp = get_qhp(dev, CQE_QPID(err_cqe));
+ if (!qhp) {
+ printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d "
+ "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ CQE_QPID(err_cqe),
+ CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
+ CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
+ CQE_WRID_LOW(err_cqe));
+ spin_unlock_irq(&dev->lock);
+ goto out;
+ }
+
+ if (SQ_TYPE(err_cqe))
+ cqid = qhp->attr.scq;
+ else
+ cqid = qhp->attr.rcq;
+ chp = get_chp(dev, cqid);
+ if (!chp) {
+ printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
+ "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ cqid, CQE_QPID(err_cqe),
+ CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
+ CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
+ CQE_WRID_LOW(err_cqe));
+ spin_unlock_irq(&dev->lock);
+ goto out;
+ }
+
+ c4iw_qp_add_ref(&qhp->ibqp);
+ atomic_inc(&chp->refcnt);
+ spin_unlock_irq(&dev->lock);
+
+ /* Bad incoming write */
+ if (RQ_TYPE(err_cqe) &&
+ (CQE_OPCODE(err_cqe) == FW_RI_RDMA_WRITE)) {
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_REQ_ERR);
+ goto done;
+ }
+
+ switch (CQE_STATUS(err_cqe)) {
+
+ /* Completion Events */
+ case T4_ERR_SUCCESS:
+ printk(KERN_ERR MOD "AE with status 0!\n");
+ break;
+
+ case T4_ERR_STAG:
+ case T4_ERR_PDID:
+ case T4_ERR_QPID:
+ case T4_ERR_ACCESS:
+ case T4_ERR_WRAP:
+ case T4_ERR_BOUND:
+ case T4_ERR_INVALIDATE_SHARED_MR:
+ case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_ACCESS_ERR);
+ break;
+
+ /* Device Fatal Errors */
+ case T4_ERR_ECC:
+ case T4_ERR_ECC_PSTAG:
+ case T4_ERR_INTERNAL_ERR:
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_DEVICE_FATAL);
+ break;
+
+ /* QP Fatal Errors */
+ case T4_ERR_OUT_OF_RQE:
+ case T4_ERR_PBL_ADDR_BOUND:
+ case T4_ERR_CRC:
+ case T4_ERR_MARKER:
+ case T4_ERR_PDU_LEN_ERR:
+ case T4_ERR_DDP_VERSION:
+ case T4_ERR_RDMA_VERSION:
+ case T4_ERR_OPCODE:
+ case T4_ERR_DDP_QUEUE_NUM:
+ case T4_ERR_MSN:
+ case T4_ERR_TBIT:
+ case T4_ERR_MO:
+ case T4_ERR_MSN_GAP:
+ case T4_ERR_MSN_RANGE:
+ case T4_ERR_RQE_ADDR_BOUND:
+ case T4_ERR_IRD_OVERFLOW:
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
+ break;
+
+ default:
+ printk(KERN_ERR MOD "Unknown T4 status 0x%x QPID 0x%x\n",
+ CQE_STATUS(err_cqe), qhp->wq.sq.qid);
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
+ break;
+ }
+done:
+ if (atomic_dec_and_test(&chp->refcnt))
+ wake_up(&chp->wait);
+ c4iw_qp_rem_ref(&qhp->ibqp);
+out:
+ return;
+}
+
+int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
+{
+ struct c4iw_cq *chp;
+ unsigned long flag;
+
+ chp = get_chp(dev, qid);
+ if (chp) {
+ spin_lock_irqsave(&chp->comp_handler_lock, flag);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+ } else
+ PDBG("%s unknown cqid 0x%x\n", __func__, qid);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c
new file mode 100644
index 00000000000..0161ae6ad62
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/id_table.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2011 Chelsio Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include "iw_cxgb4.h"
+
+#define RANDOM_SKIP 16
+
+/*
+ * Trivial bitmap-based allocator. If the random flag is set, the
+ * allocator is designed to:
+ * - pseudo-randomize the id returned such that it is not trivially predictable.
+ * - avoid reuse of recently used id (at the expense of predictability)
+ */
+u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
+{
+ unsigned long flags;
+ u32 obj;
+
+ spin_lock_irqsave(&alloc->lock, flags);
+
+ obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
+ if (obj >= alloc->max)
+ obj = find_first_zero_bit(alloc->table, alloc->max);
+
+ if (obj < alloc->max) {
+ if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
+ alloc->last += prandom_u32() % RANDOM_SKIP;
+ else
+ alloc->last = obj + 1;
+ if (alloc->last >= alloc->max)
+ alloc->last = 0;
+ set_bit(obj, alloc->table);
+ obj += alloc->start;
+ } else
+ obj = -1;
+
+ spin_unlock_irqrestore(&alloc->lock, flags);
+ return obj;
+}
+
+void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj)
+{
+ unsigned long flags;
+
+ obj -= alloc->start;
+ BUG_ON((int)obj < 0);
+
+ spin_lock_irqsave(&alloc->lock, flags);
+ clear_bit(obj, alloc->table);
+ spin_unlock_irqrestore(&alloc->lock, flags);
+}
+
+int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
+ u32 reserved, u32 flags)
+{
+ int i;
+
+ alloc->start = start;
+ alloc->flags = flags;
+ if (flags & C4IW_ID_TABLE_F_RANDOM)
+ alloc->last = prandom_u32() % RANDOM_SKIP;
+ else
+ alloc->last = 0;
+ alloc->max = num;
+ spin_lock_init(&alloc->lock);
+ alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long),
+ GFP_KERNEL);
+ if (!alloc->table)
+ return -ENOMEM;
+
+ bitmap_zero(alloc->table, num);
+ if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY))
+ for (i = 0; i < reserved; ++i)
+ set_bit(i, alloc->table);
+
+ return 0;
+}
+
+void c4iw_id_table_free(struct c4iw_id_table *alloc)
+{
+ kfree(alloc->table);
+}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
new file mode 100644
index 00000000000..361fff7a074
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -0,0 +1,1000 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IW_CXGB4_H__
+#define __IW_CXGB4_H__
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/completion.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/inet.h>
+#include <linux/wait.h>
+#include <linux/kref.h>
+#include <linux/timer.h>
+#include <linux/io.h>
+
+#include <asm/byteorder.h>
+
+#include <net/net_namespace.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
+
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+#include "l2t.h"
+#include "user.h"
+
+#define DRV_NAME "iw_cxgb4"
+#define MOD DRV_NAME ":"
+
+extern int c4iw_debug;
+#define PDBG(fmt, args...) \
+do { \
+ if (c4iw_debug) \
+ printk(MOD fmt, ## args); \
+} while (0)
+
+#include "t4.h"
+
+#define PBL_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->pbl.start)
+#define RQT_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->rq.start)
+
+static inline void *cplhdr(struct sk_buff *skb)
+{
+ return skb->data;
+}
+
+#define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */
+#define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */
+
+struct c4iw_id_table {
+ u32 flags;
+ u32 start; /* logical minimal id */
+ u32 last; /* hint for find */
+ u32 max;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct c4iw_resource {
+ struct c4iw_id_table tpt_table;
+ struct c4iw_id_table qid_table;
+ struct c4iw_id_table pdid_table;
+};
+
+struct c4iw_qid_list {
+ struct list_head entry;
+ u32 qid;
+};
+
+struct c4iw_dev_ucontext {
+ struct list_head qpids;
+ struct list_head cqids;
+ struct mutex lock;
+};
+
+enum c4iw_rdev_flags {
+ T4_FATAL_ERROR = (1<<0),
+ T4_STATUS_PAGE_DISABLED = (1<<1),
+};
+
+struct c4iw_stat {
+ u64 total;
+ u64 cur;
+ u64 max;
+ u64 fail;
+};
+
+struct c4iw_stats {
+ struct mutex lock;
+ struct c4iw_stat qid;
+ struct c4iw_stat pd;
+ struct c4iw_stat stag;
+ struct c4iw_stat pbl;
+ struct c4iw_stat rqt;
+ struct c4iw_stat ocqp;
+ u64 db_full;
+ u64 db_empty;
+ u64 db_drop;
+ u64 db_state_transitions;
+ u64 db_fc_interruptions;
+ u64 tcam_full;
+ u64 act_ofld_conn_fails;
+ u64 pas_ofld_conn_fails;
+};
+
+struct c4iw_rdev {
+ struct c4iw_resource resource;
+ unsigned long qpshift;
+ u32 qpmask;
+ unsigned long cqshift;
+ u32 cqmask;
+ struct c4iw_dev_ucontext uctx;
+ struct gen_pool *pbl_pool;
+ struct gen_pool *rqt_pool;
+ struct gen_pool *ocqp_pool;
+ u32 flags;
+ struct cxgb4_lld_info lldi;
+ unsigned long bar2_pa;
+ void __iomem *bar2_kva;
+ unsigned long oc_mw_pa;
+ void __iomem *oc_mw_kva;
+ struct c4iw_stats stats;
+ struct t4_dev_status_page *status_page;
+};
+
+static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
+{
+ return rdev->flags & T4_FATAL_ERROR;
+}
+
+static inline int c4iw_num_stags(struct c4iw_rdev *rdev)
+{
+ return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5));
+}
+
+#define C4IW_WR_TO (30*HZ)
+
+struct c4iw_wr_wait {
+ struct completion completion;
+ int ret;
+};
+
+static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp)
+{
+ wr_waitp->ret = 0;
+ init_completion(&wr_waitp->completion);
+}
+
+static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret)
+{
+ wr_waitp->ret = ret;
+ complete(&wr_waitp->completion);
+}
+
+static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
+ struct c4iw_wr_wait *wr_waitp,
+ u32 hwtid, u32 qpid,
+ const char *func)
+{
+ unsigned to = C4IW_WR_TO;
+ int ret;
+
+ do {
+ ret = wait_for_completion_timeout(&wr_waitp->completion, to);
+ if (!ret) {
+ printk(KERN_ERR MOD "%s - Device %s not responding - "
+ "tid %u qpid %u\n", func,
+ pci_name(rdev->lldi.pdev), hwtid, qpid);
+ if (c4iw_fatal_error(rdev)) {
+ wr_waitp->ret = -EIO;
+ break;
+ }
+ to = to << 2;
+ }
+ } while (!ret);
+ if (wr_waitp->ret)
+ PDBG("%s: FW reply %d tid %u qpid %u\n",
+ pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+ return wr_waitp->ret;
+}
+
+enum db_state {
+ NORMAL = 0,
+ FLOW_CONTROL = 1,
+ RECOVERY = 2,
+ STOPPED = 3
+};
+
+struct c4iw_dev {
+ struct ib_device ibdev;
+ struct c4iw_rdev rdev;
+ u32 device_cap_flags;
+ struct idr cqidr;
+ struct idr qpidr;
+ struct idr mmidr;
+ spinlock_t lock;
+ struct mutex db_mutex;
+ struct dentry *debugfs_root;
+ enum db_state db_state;
+ struct idr hwtid_idr;
+ struct idr atid_idr;
+ struct idr stid_idr;
+ struct list_head db_fc_list;
+};
+
+static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct c4iw_dev, ibdev);
+}
+
+static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev)
+{
+ return container_of(rdev, struct c4iw_dev, rdev);
+}
+
+static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid)
+{
+ return idr_find(&rhp->cqidr, cqid);
+}
+
+static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid)
+{
+ return idr_find(&rhp->qpidr, qpid);
+}
+
+static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
+{
+ return idr_find(&rhp->mmidr, mmid);
+}
+
+static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id, int lock)
+{
+ int ret;
+
+ if (lock) {
+ idr_preload(GFP_KERNEL);
+ spin_lock_irq(&rhp->lock);
+ }
+
+ ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC);
+
+ if (lock) {
+ spin_unlock_irq(&rhp->lock);
+ idr_preload_end();
+ }
+
+ BUG_ON(ret == -ENOSPC);
+ return ret < 0 ? ret : 0;
+}
+
+static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ return _insert_handle(rhp, idr, handle, id, 1);
+}
+
+static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ return _insert_handle(rhp, idr, handle, id, 0);
+}
+
+static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr,
+ u32 id, int lock)
+{
+ if (lock)
+ spin_lock_irq(&rhp->lock);
+ idr_remove(idr, id);
+ if (lock)
+ spin_unlock_irq(&rhp->lock);
+}
+
+static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
+{
+ _remove_handle(rhp, idr, id, 1);
+}
+
+static inline void remove_handle_nolock(struct c4iw_dev *rhp,
+ struct idr *idr, u32 id)
+{
+ _remove_handle(rhp, idr, id, 0);
+}
+
+struct c4iw_pd {
+ struct ib_pd ibpd;
+ u32 pdid;
+ struct c4iw_dev *rhp;
+};
+
+static inline struct c4iw_pd *to_c4iw_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct c4iw_pd, ibpd);
+}
+
+struct tpt_attributes {
+ u64 len;
+ u64 va_fbo;
+ enum fw_ri_mem_perms perms;
+ u32 stag;
+ u32 pdid;
+ u32 qpid;
+ u32 pbl_addr;
+ u32 pbl_size;
+ u32 state:1;
+ u32 type:2;
+ u32 rsvd:1;
+ u32 remote_invaliate_disable:1;
+ u32 zbva:1;
+ u32 mw_bind_enable:1;
+ u32 page_size:5;
+};
+
+struct c4iw_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct c4iw_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct c4iw_mr, ibmr);
+}
+
+struct c4iw_mw {
+ struct ib_mw ibmw;
+ struct c4iw_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct c4iw_mw, ibmw);
+}
+
+struct c4iw_fr_page_list {
+ struct ib_fast_reg_page_list ibpl;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ dma_addr_t dma_addr;
+ struct c4iw_dev *dev;
+ int pll_len;
+};
+
+static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
+ struct ib_fast_reg_page_list *ibpl)
+{
+ return container_of(ibpl, struct c4iw_fr_page_list, ibpl);
+}
+
+struct c4iw_cq {
+ struct ib_cq ibcq;
+ struct c4iw_dev *rhp;
+ struct t4_cq cq;
+ spinlock_t lock;
+ spinlock_t comp_handler_lock;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+};
+
+static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct c4iw_cq, ibcq);
+}
+
+struct c4iw_mpa_attributes {
+ u8 initiator;
+ u8 recv_marker_enabled;
+ u8 xmit_marker_enabled;
+ u8 crc_enabled;
+ u8 enhanced_rdma_conn;
+ u8 version;
+ u8 p2p_type;
+};
+
+struct c4iw_qp_attributes {
+ u32 scq;
+ u32 rcq;
+ u32 sq_num_entries;
+ u32 rq_num_entries;
+ u32 sq_max_sges;
+ u32 sq_max_sges_rdma_write;
+ u32 rq_max_sges;
+ u32 state;
+ u8 enable_rdma_read;
+ u8 enable_rdma_write;
+ u8 enable_bind;
+ u8 enable_mmid0_fastreg;
+ u32 max_ord;
+ u32 max_ird;
+ u32 pd;
+ u32 next_state;
+ char terminate_buffer[52];
+ u32 terminate_msg_len;
+ u8 is_terminate_local;
+ struct c4iw_mpa_attributes mpa_attr;
+ struct c4iw_ep *llp_stream_handle;
+ u8 layer_etype;
+ u8 ecode;
+ u16 sq_db_inc;
+ u16 rq_db_inc;
+ u8 send_term;
+};
+
+struct c4iw_qp {
+ struct ib_qp ibqp;
+ struct list_head db_fc_entry;
+ struct c4iw_dev *rhp;
+ struct c4iw_ep *ep;
+ struct c4iw_qp_attributes attr;
+ struct t4_wq wq;
+ spinlock_t lock;
+ struct mutex mutex;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+ struct timer_list timer;
+ int sq_sig_all;
+};
+
+static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct c4iw_qp, ibqp);
+}
+
+struct c4iw_ucontext {
+ struct ib_ucontext ibucontext;
+ struct c4iw_dev_ucontext uctx;
+ u32 key;
+ spinlock_t mmap_lock;
+ struct list_head mmaps;
+};
+
+static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
+{
+ return container_of(c, struct c4iw_ucontext, ibucontext);
+}
+
+struct c4iw_mm_entry {
+ struct list_head entry;
+ u64 addr;
+ u32 key;
+ unsigned len;
+};
+
+static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
+ u32 key, unsigned len)
+{
+ struct list_head *pos, *nxt;
+ struct c4iw_mm_entry *mm;
+
+ spin_lock(&ucontext->mmap_lock);
+ list_for_each_safe(pos, nxt, &ucontext->mmaps) {
+
+ mm = list_entry(pos, struct c4iw_mm_entry, entry);
+ if (mm->key == key && mm->len == len) {
+ list_del_init(&mm->entry);
+ spin_unlock(&ucontext->mmap_lock);
+ PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
+ key, (unsigned long long) mm->addr, mm->len);
+ return mm;
+ }
+ }
+ spin_unlock(&ucontext->mmap_lock);
+ return NULL;
+}
+
+static inline void insert_mmap(struct c4iw_ucontext *ucontext,
+ struct c4iw_mm_entry *mm)
+{
+ spin_lock(&ucontext->mmap_lock);
+ PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
+ mm->key, (unsigned long long) mm->addr, mm->len);
+ list_add_tail(&mm->entry, &ucontext->mmaps);
+ spin_unlock(&ucontext->mmap_lock);
+}
+
+enum c4iw_qp_attr_mask {
+ C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
+ C4IW_QP_ATTR_SQ_DB = 1<<1,
+ C4IW_QP_ATTR_RQ_DB = 1<<2,
+ C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
+ C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
+ C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
+ C4IW_QP_ATTR_MAX_ORD = 1 << 11,
+ C4IW_QP_ATTR_MAX_IRD = 1 << 12,
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
+ C4IW_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
+ C4IW_QP_ATTR_MPA_ATTR = 1 << 24,
+ C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
+ C4IW_QP_ATTR_VALID_MODIFY = (C4IW_QP_ATTR_ENABLE_RDMA_READ |
+ C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
+ C4IW_QP_ATTR_MAX_ORD |
+ C4IW_QP_ATTR_MAX_IRD |
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE |
+ C4IW_QP_ATTR_STREAM_MSG_BUFFER |
+ C4IW_QP_ATTR_MPA_ATTR |
+ C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE)
+};
+
+int c4iw_modify_qp(struct c4iw_dev *rhp,
+ struct c4iw_qp *qhp,
+ enum c4iw_qp_attr_mask mask,
+ struct c4iw_qp_attributes *attrs,
+ int internal);
+
+enum c4iw_qp_state {
+ C4IW_QP_STATE_IDLE,
+ C4IW_QP_STATE_RTS,
+ C4IW_QP_STATE_ERROR,
+ C4IW_QP_STATE_TERMINATE,
+ C4IW_QP_STATE_CLOSING,
+ C4IW_QP_STATE_TOT
+};
+
+static inline int c4iw_convert_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET:
+ case IB_QPS_INIT:
+ return C4IW_QP_STATE_IDLE;
+ case IB_QPS_RTS:
+ return C4IW_QP_STATE_RTS;
+ case IB_QPS_SQD:
+ return C4IW_QP_STATE_CLOSING;
+ case IB_QPS_SQE:
+ return C4IW_QP_STATE_TERMINATE;
+ case IB_QPS_ERR:
+ return C4IW_QP_STATE_ERROR;
+ default:
+ return -1;
+ }
+}
+
+static inline int to_ib_qp_state(int c4iw_qp_state)
+{
+ switch (c4iw_qp_state) {
+ case C4IW_QP_STATE_IDLE:
+ return IB_QPS_INIT;
+ case C4IW_QP_STATE_RTS:
+ return IB_QPS_RTS;
+ case C4IW_QP_STATE_CLOSING:
+ return IB_QPS_SQD;
+ case C4IW_QP_STATE_TERMINATE:
+ return IB_QPS_SQE;
+ case C4IW_QP_STATE_ERROR:
+ return IB_QPS_ERR;
+ }
+ return IB_QPS_ERR;
+}
+
+static inline u32 c4iw_ib_to_tpt_access(int a)
+{
+ return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
+ (a & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0) |
+ (a & IB_ACCESS_LOCAL_WRITE ? FW_RI_MEM_ACCESS_LOCAL_WRITE : 0) |
+ FW_RI_MEM_ACCESS_LOCAL_READ;
+}
+
+static inline u32 c4iw_ib_to_tpt_bind_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0);
+}
+
+enum c4iw_mmid_state {
+ C4IW_STAG_STATE_VALID,
+ C4IW_STAG_STATE_INVALID
+};
+
+#define C4IW_NODE_DESC "cxgb4 Chelsio Communications"
+
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+
+#define MPA_MAX_PRIVATE_DATA 256
+#define MPA_ENHANCED_RDMA_CONN 0x10
+#define MPA_REJECT 0x20
+#define MPA_CRC 0x40
+#define MPA_MARKERS 0x80
+#define MPA_FLAGS_MASK 0xE0
+
+#define MPA_V2_PEER2PEER_MODEL 0x8000
+#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000
+#define MPA_V2_RDMA_WRITE_RTR 0x8000
+#define MPA_V2_RDMA_READ_RTR 0x4000
+#define MPA_V2_IRD_ORD_MASK 0x3FFF
+
+#define c4iw_put_ep(ep) { \
+ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
+ ep, atomic_read(&((ep)->kref.refcount))); \
+ WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
+ kref_put(&((ep)->kref), _c4iw_free_ep); \
+}
+
+#define c4iw_get_ep(ep) { \
+ PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
+ ep, atomic_read(&((ep)->kref.refcount))); \
+ kref_get(&((ep)->kref)); \
+}
+void _c4iw_free_ep(struct kref *kref);
+
+struct mpa_message {
+ u8 key[16];
+ u8 flags;
+ u8 revision;
+ __be16 private_data_size;
+ u8 private_data[0];
+};
+
+struct mpa_v2_conn_params {
+ __be16 ird;
+ __be16 ord;
+};
+
+struct terminate_message {
+ u8 layer_etype;
+ u8 ecode;
+ __be16 hdrct_rsvd;
+ u8 len_hdrs[0];
+};
+
+#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
+
+enum c4iw_layers_types {
+ LAYER_RDMAP = 0x00,
+ LAYER_DDP = 0x10,
+ LAYER_MPA = 0x20,
+ RDMAP_LOCAL_CATA = 0x00,
+ RDMAP_REMOTE_PROT = 0x01,
+ RDMAP_REMOTE_OP = 0x02,
+ DDP_LOCAL_CATA = 0x00,
+ DDP_TAGGED_ERR = 0x01,
+ DDP_UNTAGGED_ERR = 0x02,
+ DDP_LLP = 0x03
+};
+
+enum c4iw_rdma_ecodes {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_BASE_BOUNDS = 0x01,
+ RDMAP_ACC_VIOL = 0x02,
+ RDMAP_STAG_NOT_ASSOC = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_VERS = 0x05,
+ RDMAP_INV_OPCODE = 0x06,
+ RDMAP_STREAM_CATA = 0x07,
+ RDMAP_GLOBAL_CATA = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum c4iw_ddp_ecodes {
+ DDPT_INV_STAG = 0x00,
+ DDPT_BASE_BOUNDS = 0x01,
+ DDPT_STAG_NOT_ASSOC = 0x02,
+ DDPT_TO_WRAP = 0x03,
+ DDPT_INV_VERS = 0x04,
+ DDPU_INV_QN = 0x01,
+ DDPU_INV_MSN_NOBUF = 0x02,
+ DDPU_INV_MSN_RANGE = 0x03,
+ DDPU_INV_MO = 0x04,
+ DDPU_MSG_TOOBIG = 0x05,
+ DDPU_INV_VERS = 0x06
+};
+
+enum c4iw_mpa_ecodes {
+ MPA_CRC_ERR = 0x02,
+ MPA_MARKER_ERR = 0x03,
+ MPA_LOCAL_CATA = 0x05,
+ MPA_INSUFF_IRD = 0x06,
+ MPA_NOMATCH_RTR = 0x07,
+};
+
+enum c4iw_ep_state {
+ IDLE = 0,
+ LISTEN,
+ CONNECTING,
+ MPA_REQ_WAIT,
+ MPA_REQ_SENT,
+ MPA_REQ_RCVD,
+ MPA_REP_SENT,
+ FPDU_MODE,
+ ABORTING,
+ CLOSING,
+ MORIBUND,
+ DEAD,
+};
+
+enum c4iw_ep_flags {
+ PEER_ABORT_IN_PROGRESS = 0,
+ ABORT_REQ_IN_PROGRESS = 1,
+ RELEASE_RESOURCES = 2,
+ CLOSE_SENT = 3,
+ TIMEOUT = 4,
+ QP_REFERENCED = 5,
+ RELEASE_MAPINFO = 6,
+};
+
+enum c4iw_ep_history {
+ ACT_OPEN_REQ = 0,
+ ACT_OFLD_CONN = 1,
+ ACT_OPEN_RPL = 2,
+ ACT_ESTAB = 3,
+ PASS_ACCEPT_REQ = 4,
+ PASS_ESTAB = 5,
+ ABORT_UPCALL = 6,
+ ESTAB_UPCALL = 7,
+ CLOSE_UPCALL = 8,
+ ULP_ACCEPT = 9,
+ ULP_REJECT = 10,
+ TIMEDOUT = 11,
+ PEER_ABORT = 12,
+ PEER_CLOSE = 13,
+ CONNREQ_UPCALL = 14,
+ ABORT_CONN = 15,
+ DISCONN_UPCALL = 16,
+ EP_DISC_CLOSE = 17,
+ EP_DISC_ABORT = 18,
+ CONN_RPL_UPCALL = 19,
+ ACT_RETRY_NOMEM = 20,
+ ACT_RETRY_INUSE = 21
+};
+
+struct c4iw_ep_common {
+ struct iw_cm_id *cm_id;
+ struct c4iw_qp *qp;
+ struct c4iw_dev *dev;
+ enum c4iw_ep_state state;
+ struct kref kref;
+ struct mutex mutex;
+ struct sockaddr_storage local_addr;
+ struct sockaddr_storage remote_addr;
+ struct sockaddr_storage mapped_local_addr;
+ struct sockaddr_storage mapped_remote_addr;
+ struct c4iw_wr_wait wr_wait;
+ unsigned long flags;
+ unsigned long history;
+};
+
+struct c4iw_listen_ep {
+ struct c4iw_ep_common com;
+ unsigned int stid;
+ int backlog;
+};
+
+struct c4iw_ep {
+ struct c4iw_ep_common com;
+ struct c4iw_ep *parent_ep;
+ struct timer_list timer;
+ struct list_head entry;
+ unsigned int atid;
+ u32 hwtid;
+ u32 snd_seq;
+ u32 rcv_seq;
+ struct l2t_entry *l2t;
+ struct dst_entry *dst;
+ struct sk_buff *mpa_skb;
+ struct c4iw_mpa_attributes mpa_attr;
+ u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
+ unsigned int mpa_pkt_len;
+ u32 ird;
+ u32 ord;
+ u32 smac_idx;
+ u32 tx_chan;
+ u32 mtu;
+ u16 mss;
+ u16 emss;
+ u16 plen;
+ u16 rss_qid;
+ u16 txq_idx;
+ u16 ctrlq_idx;
+ u8 tos;
+ u8 retry_with_mpa_v1;
+ u8 tried_with_mpa_v1;
+ unsigned int retry_count;
+ int snd_win;
+ int rcv_win;
+};
+
+static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
+ const char *msg)
+{
+
+#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr))
+#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port)
+#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr))
+#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port)
+
+ if (c4iw_debug) {
+ switch (epc->local_addr.ss_family) {
+ case AF_INET:
+ PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n",
+ func, msg, SINA(&epc->local_addr),
+ SINP(&epc->local_addr),
+ SINP(&epc->mapped_local_addr),
+ SINA(&epc->remote_addr),
+ SINP(&epc->remote_addr),
+ SINP(&epc->mapped_remote_addr));
+ break;
+ case AF_INET6:
+ PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n",
+ func, msg, SIN6A(&epc->local_addr),
+ SIN6P(&epc->local_addr),
+ SIN6P(&epc->mapped_local_addr),
+ SIN6A(&epc->remote_addr),
+ SIN6P(&epc->remote_addr),
+ SIN6P(&epc->mapped_remote_addr));
+ break;
+ default:
+ break;
+ }
+ }
+#undef SINA
+#undef SINP
+#undef SIN6A
+#undef SIN6P
+}
+
+static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline int compute_wscale(int win)
+{
+ int wscale = 0;
+
+ while (wscale < 14 && (65535<<wscale) < win)
+ wscale++;
+ return wscale;
+}
+
+static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
+{
+#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
+ return infop->vr->ocq.size > 0;
+#else
+ return 0;
+#endif
+}
+
+u32 c4iw_id_alloc(struct c4iw_id_table *alloc);
+void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj);
+int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
+ u32 reserved, u32 flags);
+void c4iw_id_table_free(struct c4iw_id_table *alloc);
+
+typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
+
+int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
+ struct l2t_entry *l2t);
+void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
+ struct c4iw_dev_ucontext *uctx);
+u32 c4iw_get_resource(struct c4iw_id_table *id_table);
+void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
+int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
+int c4iw_pblpool_create(struct c4iw_rdev *rdev);
+int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
+int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev);
+void c4iw_pblpool_destroy(struct c4iw_rdev *rdev);
+void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev);
+void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev);
+void c4iw_destroy_resource(struct c4iw_resource *rscp);
+int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
+int c4iw_register_device(struct c4iw_dev *dev);
+void c4iw_unregister_device(struct c4iw_dev *dev);
+int __init c4iw_cm_init(void);
+void c4iw_cm_term(void);
+void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
+ struct c4iw_dev_ucontext *uctx);
+void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
+ struct c4iw_dev_ucontext *uctx);
+int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
+int c4iw_destroy_listen(struct iw_cm_id *cm_id);
+int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+void c4iw_qp_add_ref(struct ib_qp *qp);
+void c4iw_qp_rem_ref(struct ib_qp *qp);
+void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list);
+struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
+ struct ib_device *device,
+ int page_list_len);
+struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
+int c4iw_dealloc_mw(struct ib_mw *mw);
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
+ u64 length, u64 virt, int acc,
+ struct ib_udata *udata);
+struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start);
+int c4iw_reregister_phys_mem(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc, u64 *iova_start);
+int c4iw_dereg_mr(struct ib_mr *ib_mr);
+int c4iw_destroy_cq(struct ib_cq *ib_cq);
+struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
+ int vector,
+ struct ib_ucontext *ib_context,
+ struct ib_udata *udata);
+int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+int c4iw_destroy_qp(struct ib_qp *ib_qp);
+struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata);
+int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr);
+struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn);
+u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size);
+void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
+u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size);
+void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
+u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size);
+void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size);
+int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb);
+void c4iw_flush_hw_cq(struct c4iw_cq *chp);
+void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
+int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
+int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
+int c4iw_flush_sq(struct c4iw_qp *qhp);
+int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
+u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
+int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
+u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
+void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx);
+u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
+void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx);
+void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe);
+
+extern struct cxgb4_client t4c_client;
+extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
+extern int c4iw_max_read_depth;
+extern int db_fc_threshold;
+extern int db_coalescing_threshold;
+extern int use_dsgl;
+
+
+#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
new file mode 100644
index 00000000000..ec7a2988a70
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -0,0 +1,955 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <rdma/ib_umem.h>
+#include <linux/atomic.h>
+
+#include "iw_cxgb4.h"
+
+int use_dsgl = 0;
+module_param(use_dsgl, int, 0644);
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
+
+#define T4_ULPTX_MIN_IO 32
+#define C4IW_MAX_INLINE_SIZE 96
+#define T4_ULPTX_MAX_DMA 1024
+#define C4IW_INLINE_THRESHOLD 128
+
+static int inline_threshold = C4IW_INLINE_THRESHOLD;
+module_param(inline_threshold, int, 0644);
+MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)");
+
+static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
+ u32 len, dma_addr_t data, int wait)
+{
+ struct sk_buff *skb;
+ struct ulp_mem_io *req;
+ struct ulptx_sgl *sgl;
+ u8 wr_len;
+ int ret = 0;
+ struct c4iw_wr_wait wr_wait;
+
+ addr &= 0x7FFFFFF;
+
+ if (wait)
+ c4iw_init_wr_wait(&wr_wait);
+ wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16);
+
+ skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+ if (!skb)
+ return -ENOMEM;
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+ req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
+ memset(req, 0, wr_len);
+ INIT_ULPTX_WR(req, wr_len, 0, 0);
+ req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
+ (wait ? FW_WR_COMPL(1) : 0));
+ req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
+ req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
+ req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
+ req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));
+ req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(len>>5));
+ req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16));
+ req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr));
+
+ sgl = (struct ulptx_sgl *)(req + 1);
+ sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_DSGL) |
+ ULPTX_NSGE(1));
+ sgl->len0 = cpu_to_be32(len);
+ sgl->addr0 = cpu_to_be64(data);
+
+ ret = c4iw_ofld_send(rdev, skb);
+ if (ret)
+ return ret;
+ if (wait)
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ return ret;
+}
+
+static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
+ void *data)
+{
+ struct sk_buff *skb;
+ struct ulp_mem_io *req;
+ struct ulptx_idata *sc;
+ u8 wr_len, *to_dp, *from_dp;
+ int copy_len, num_wqe, i, ret = 0;
+ struct c4iw_wr_wait wr_wait;
+ __be32 cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
+
+ if (is_t4(rdev->lldi.adapter_type))
+ cmd |= cpu_to_be32(ULP_MEMIO_ORDER(1));
+ else
+ cmd |= cpu_to_be32(V_T5_ULP_MEMIO_IMM(1));
+
+ addr &= 0x7FFFFFF;
+ PDBG("%s addr 0x%x len %u\n", __func__, addr, len);
+ num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE);
+ c4iw_init_wr_wait(&wr_wait);
+ for (i = 0; i < num_wqe; i++) {
+
+ copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE :
+ len;
+ wr_len = roundup(sizeof *req + sizeof *sc +
+ roundup(copy_len, T4_ULPTX_MIN_IO), 16);
+
+ skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+ req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
+ memset(req, 0, wr_len);
+ INIT_ULPTX_WR(req, wr_len, 0, 0);
+
+ if (i == (num_wqe-1)) {
+ req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
+ FW_WR_COMPL(1));
+ req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait;
+ } else
+ req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR));
+ req->wr.wr_mid = cpu_to_be32(
+ FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
+
+ req->cmd = cmd;
+ req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(
+ DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO)));
+ req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr),
+ 16));
+ req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr + i * 3));
+
+ sc = (struct ulptx_idata *)(req + 1);
+ sc->cmd_more = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_IMM));
+ sc->len = cpu_to_be32(roundup(copy_len, T4_ULPTX_MIN_IO));
+
+ to_dp = (u8 *)(sc + 1);
+ from_dp = (u8 *)data + i * C4IW_MAX_INLINE_SIZE;
+ if (data)
+ memcpy(to_dp, from_dp, copy_len);
+ else
+ memset(to_dp, 0, copy_len);
+ if (copy_len % T4_ULPTX_MIN_IO)
+ memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO -
+ (copy_len % T4_ULPTX_MIN_IO));
+ ret = c4iw_ofld_send(rdev, skb);
+ if (ret)
+ return ret;
+ len -= C4IW_MAX_INLINE_SIZE;
+ }
+
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ return ret;
+}
+
+static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
+{
+ u32 remain = len;
+ u32 dmalen;
+ int ret = 0;
+ dma_addr_t daddr;
+ dma_addr_t save;
+
+ daddr = dma_map_single(&rdev->lldi.pdev->dev, data, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&rdev->lldi.pdev->dev, daddr))
+ return -1;
+ save = daddr;
+
+ while (remain > inline_threshold) {
+ if (remain < T4_ULPTX_MAX_DMA) {
+ if (remain & ~T4_ULPTX_MIN_IO)
+ dmalen = remain & ~(T4_ULPTX_MIN_IO-1);
+ else
+ dmalen = remain;
+ } else
+ dmalen = T4_ULPTX_MAX_DMA;
+ remain -= dmalen;
+ ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr,
+ !remain);
+ if (ret)
+ goto out;
+ addr += dmalen >> 5;
+ data += dmalen;
+ daddr += dmalen;
+ }
+ if (remain)
+ ret = _c4iw_write_mem_inline(rdev, addr, remain, data);
+out:
+ dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE);
+ return ret;
+}
+
+/*
+ * write len bytes of data into addr (32B aligned address)
+ * If data is NULL, clear len byte of memory to zero.
+ */
+static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
+ void *data)
+{
+ if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
+ if (len > inline_threshold) {
+ if (_c4iw_write_mem_dma(rdev, addr, len, data)) {
+ printk_ratelimited(KERN_WARNING
+ "%s: dma map"
+ " failure (non fatal)\n",
+ pci_name(rdev->lldi.pdev));
+ return _c4iw_write_mem_inline(rdev, addr, len,
+ data);
+ } else
+ return 0;
+ } else
+ return _c4iw_write_mem_inline(rdev, addr, len, data);
+ } else
+ return _c4iw_write_mem_inline(rdev, addr, len, data);
+}
+
+/*
+ * Build and write a TPT entry.
+ * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size,
+ * pbl_size and pbl_addr
+ * OUT: stag index
+ */
+static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
+ u32 *stag, u8 stag_state, u32 pdid,
+ enum fw_ri_stag_type type, enum fw_ri_mem_perms perm,
+ int bind_enabled, u32 zbva, u64 to,
+ u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr)
+{
+ int err;
+ struct fw_ri_tpte tpt;
+ u32 stag_idx;
+ static atomic_t key;
+
+ if (c4iw_fatal_error(rdev))
+ return -EIO;
+
+ stag_state = stag_state > 0;
+ stag_idx = (*stag) >> 8;
+
+ if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) {
+ stag_idx = c4iw_get_resource(&rdev->resource.tpt_table);
+ if (!stag_idx) {
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.stag.fail++;
+ mutex_unlock(&rdev->stats.lock);
+ return -ENOMEM;
+ }
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.stag.cur += 32;
+ if (rdev->stats.stag.cur > rdev->stats.stag.max)
+ rdev->stats.stag.max = rdev->stats.stag.cur;
+ mutex_unlock(&rdev->stats.lock);
+ *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
+ }
+ PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ __func__, stag_state, type, pdid, stag_idx);
+
+ /* write TPT entry */
+ if (reset_tpt_entry)
+ memset(&tpt, 0, sizeof(tpt));
+ else {
+ tpt.valid_to_pdid = cpu_to_be32(F_FW_RI_TPTE_VALID |
+ V_FW_RI_TPTE_STAGKEY((*stag & M_FW_RI_TPTE_STAGKEY)) |
+ V_FW_RI_TPTE_STAGSTATE(stag_state) |
+ V_FW_RI_TPTE_STAGTYPE(type) | V_FW_RI_TPTE_PDID(pdid));
+ tpt.locread_to_qpid = cpu_to_be32(V_FW_RI_TPTE_PERM(perm) |
+ (bind_enabled ? F_FW_RI_TPTE_MWBINDEN : 0) |
+ V_FW_RI_TPTE_ADDRTYPE((zbva ? FW_RI_ZERO_BASED_TO :
+ FW_RI_VA_BASED_TO))|
+ V_FW_RI_TPTE_PS(page_size));
+ tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+ V_FW_RI_TPTE_PBLADDR(PBL_OFF(rdev, pbl_addr)>>3));
+ tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+ tpt.va_hi = cpu_to_be32((u32)(to >> 32));
+ tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+ tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
+ tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+ }
+ err = write_adapter_mem(rdev, stag_idx +
+ (rdev->lldi.vr->stag.start >> 5),
+ sizeof(tpt), &tpt);
+
+ if (reset_tpt_entry) {
+ c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.stag.cur -= 32;
+ mutex_unlock(&rdev->stats.lock);
+ }
+ return err;
+}
+
+static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size)
+{
+ int err;
+
+ PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ __func__, pbl_addr, rdev->lldi.vr->pbl.start,
+ pbl_size);
+
+ err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl);
+ return err;
+}
+
+static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
+ u32 pbl_addr)
+{
+ return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0,
+ pbl_size, pbl_addr);
+}
+
+static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid)
+{
+ *stag = T4_STAG_UNSET;
+ return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
+ 0UL, 0, 0, 0, 0);
+}
+
+static int deallocate_window(struct c4iw_rdev *rdev, u32 stag)
+{
+ return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
+ 0);
+}
+
+static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
+ u32 pbl_size, u32 pbl_addr)
+{
+ *stag = T4_STAG_UNSET;
+ return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0,
+ 0UL, 0, 0, pbl_size, pbl_addr);
+}
+
+static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
+{
+ u32 mmid;
+
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+}
+
+static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
+ struct c4iw_mr *mhp, int shift)
+{
+ u32 stag = T4_STAG_UNSET;
+ int ret;
+
+ ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
+ FW_RI_STAG_NSMR, mhp->attr.perms,
+ mhp->attr.mw_bind_enable, mhp->attr.zbva,
+ mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret)
+ return ret;
+
+ ret = finish_mem_reg(mhp, stag);
+ if (ret)
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ return ret;
+}
+
+static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
+ struct c4iw_mr *mhp, int shift, int npages)
+{
+ u32 stag;
+ int ret;
+
+ if (npages > mhp->attr.pbl_size)
+ return -ENOMEM;
+
+ stag = mhp->attr.stag;
+ ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
+ FW_RI_STAG_NSMR, mhp->attr.perms,
+ mhp->attr.mw_bind_enable, mhp->attr.zbva,
+ mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret)
+ return ret;
+
+ ret = finish_mem_reg(mhp, stag);
+ if (ret)
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+
+ return ret;
+}
+
+static int alloc_pbl(struct c4iw_mr *mhp, int npages)
+{
+ mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
+ npages << 3);
+
+ if (!mhp->attr.pbl_addr)
+ return -ENOMEM;
+
+ mhp->attr.pbl_size = npages;
+
+ return 0;
+}
+
+static int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf, u64 *iova_start,
+ u64 *total_size, int *npages,
+ int *shift, __be64 **page_list)
+{
+ u64 mask;
+ int i, j, n;
+
+ mask = 0;
+ *total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
+ return -EINVAL;
+ if (i != 0 && i != num_phys_buf - 1 &&
+ (buffer_list[i].size & ~PAGE_MASK))
+ return -EINVAL;
+ *total_size += buffer_list[i].size;
+ if (i > 0)
+ mask |= buffer_list[i].addr;
+ else
+ mask |= buffer_list[i].addr & PAGE_MASK;
+ if (i != num_phys_buf - 1)
+ mask |= buffer_list[i].addr + buffer_list[i].size;
+ else
+ mask |= (buffer_list[i].addr + buffer_list[i].size +
+ PAGE_SIZE - 1) & PAGE_MASK;
+ }
+
+ if (*total_size > 0xFFFFFFFFULL)
+ return -ENOMEM;
+
+ /* Find largest page shift we can use to cover buffers */
+ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
+ if ((1ULL << *shift) & mask)
+ break;
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
+ buffer_list[0].addr &= ~0ull << *shift;
+
+ *npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ *npages += (buffer_list[i].size +
+ (1ULL << *shift) - 1) >> *shift;
+
+ if (!*npages)
+ return -EINVAL;
+
+ *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
+ if (!*page_list)
+ return -ENOMEM;
+
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
+ ++j)
+ (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
+ ((u64) j << *shift));
+
+ PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, (unsigned long long)*iova_start,
+ (unsigned long long)mask, *shift, (unsigned long long)*total_size,
+ *npages);
+
+ return 0;
+
+}
+
+int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
+ struct ib_pd *pd, struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start)
+{
+
+ struct c4iw_mr mh, *mhp;
+ struct c4iw_pd *php;
+ struct c4iw_dev *rhp;
+ __be64 *page_list = NULL;
+ int shift = 0;
+ u64 total_size;
+ int npages;
+ int ret;
+
+ PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
+
+ /* There can be no memory windows */
+ if (atomic_read(&mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_c4iw_mr(mr);
+ rhp = mhp->rhp;
+ php = to_c4iw_pd(mr->pd);
+
+ /* make sure we are on the same adapter */
+ if (rhp != php->rhp)
+ return -EINVAL;
+
+ memcpy(&mh, mhp, sizeof *mhp);
+
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ php = to_c4iw_pd(pd);
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
+ mh.attr.perms = c4iw_ib_to_tpt_access(acc);
+ mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
+ IB_ACCESS_MW_BIND;
+ }
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ ret = build_phys_page_list(buffer_list, num_phys_buf,
+ iova_start,
+ &total_size, &npages,
+ &shift, &page_list);
+ if (ret)
+ return ret;
+ }
+
+ ret = reregister_mem(rhp, php, &mh, shift, npages);
+ kfree(page_list);
+ if (ret)
+ return ret;
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ mhp->attr.pdid = php->pdid;
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ mhp->attr.zbva = 0;
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ }
+
+ return 0;
+}
+
+struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start)
+{
+ __be64 *page_list;
+ int shift;
+ u64 total_size;
+ int npages;
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+ int ret;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+
+ /* First check that we have enough alignment */
+ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (num_phys_buf > 1 &&
+ ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
+ &total_size, &npages, &shift,
+ &page_list);
+ if (ret)
+ goto err;
+
+ ret = alloc_pbl(mhp, npages);
+ if (ret) {
+ kfree(page_list);
+ goto err;
+ }
+
+ ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
+ npages);
+ kfree(page_list);
+ if (ret)
+ goto err_pbl;
+
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ ret = register_mem(rhp, php, mhp, shift);
+ if (ret)
+ goto err_pbl;
+
+ return &mhp->ibmr;
+
+err_pbl:
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+
+err:
+ kfree(mhp);
+ return ERR_PTR(ret);
+
+}
+
+struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+ int ret;
+ u32 stag = T4_STAG_UNSET;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ mhp->attr.mw_bind_enable = (acc&IB_ACCESS_MW_BIND) == IB_ACCESS_MW_BIND;
+ mhp->attr.zbva = 0;
+ mhp->attr.va_fbo = 0;
+ mhp->attr.page_size = 0;
+ mhp->attr.len = ~0UL;
+ mhp->attr.pbl_size = 0;
+
+ ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
+ FW_RI_STAG_NSMR, mhp->attr.perms,
+ mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0);
+ if (ret)
+ goto err1;
+
+ ret = finish_mem_reg(mhp, stag);
+ if (ret)
+ goto err2;
+ return &mhp->ibmr;
+err2:
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+err1:
+ kfree(mhp);
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata)
+{
+ __be64 *pages;
+ int shift, n, len;
+ int i, k, entry;
+ int err = 0;
+ struct scatterlist *sg;
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+
+ if (length == ~0ULL)
+ return ERR_PTR(-EINVAL);
+
+ if ((length + start) < start)
+ return ERR_PTR(-EINVAL);
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ if (IS_ERR(mhp->umem)) {
+ err = PTR_ERR(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(mhp->umem->page_size) - 1;
+
+ n = mhp->umem->nmap;
+ err = alloc_pbl(mhp, n);
+ if (err)
+ goto err;
+
+ pages = (__be64 *) __get_free_page(GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err_pbl;
+ }
+
+ i = n = 0;
+
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
+ mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = write_pbl(&mhp->rhp->rdev,
+ pages,
+ mhp->attr.pbl_addr + (n << 3), i);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = write_pbl(&mhp->rhp->rdev, pages,
+ mhp->attr.pbl_addr + (n << 3), i);
+
+pbl_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_pbl;
+
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = virt;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = length;
+
+ err = register_mem(rhp, php, mhp, shift);
+ if (err)
+ goto err_pbl;
+
+ return &mhp->ibmr;
+
+err_pbl:
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+
+err:
+ ib_umem_release(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+}
+
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mw *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret;
+
+ if (type != IB_MW_TYPE_1)
+ return ERR_PTR(-EINVAL);
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+ ret = allocate_window(&rhp->rdev, &stag, php->pdid);
+ if (ret) {
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = FW_RI_STAG_MW;
+ mhp->attr.stag = stag;
+ mmid = (stag) >> 8;
+ mhp->ibmw.rkey = stag;
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ deallocate_window(&rhp->rdev, mhp->attr.stag);
+ kfree(mhp);
+ return ERR_PTR(-ENOMEM);
+ }
+ PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ return &(mhp->ibmw);
+}
+
+int c4iw_dealloc_mw(struct ib_mw *mw)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_mw *mhp;
+ u32 mmid;
+
+ mhp = to_c4iw_mw(mw);
+ rhp = mhp->rhp;
+ mmid = (mw->rkey) >> 8;
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ deallocate_window(&rhp->rdev, mhp->attr.stag);
+ kfree(mhp);
+ PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
+ return 0;
+}
+
+struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret = 0;
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ mhp->rhp = rhp;
+ ret = alloc_pbl(mhp, pbl_depth);
+ if (ret)
+ goto err1;
+ mhp->attr.pbl_size = pbl_depth;
+ ret = allocate_stag(&rhp->rdev, &stag, php->pdid,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret)
+ goto err2;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = FW_RI_STAG_NSMR;
+ mhp->attr.stag = stag;
+ mhp->attr.state = 1;
+ mmid = (stag) >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ ret = -ENOMEM;
+ goto err3;
+ }
+
+ PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ return &(mhp->ibmr);
+err3:
+ dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+err2:
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+err1:
+ kfree(mhp);
+err:
+ return ERR_PTR(ret);
+}
+
+struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
+ int page_list_len)
+{
+ struct c4iw_fr_page_list *c4pl;
+ struct c4iw_dev *dev = to_c4iw_dev(device);
+ dma_addr_t dma_addr;
+ int pll_len = roundup(page_list_len * sizeof(u64), 32);
+
+ c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL);
+ if (!c4pl)
+ return ERR_PTR(-ENOMEM);
+
+ c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev,
+ pll_len, &dma_addr,
+ GFP_KERNEL);
+ if (!c4pl->ibpl.page_list) {
+ kfree(c4pl);
+ return ERR_PTR(-ENOMEM);
+ }
+ dma_unmap_addr_set(c4pl, mapping, dma_addr);
+ c4pl->dma_addr = dma_addr;
+ c4pl->dev = dev;
+ c4pl->pll_len = pll_len;
+
+ PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+ __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+ &c4pl->dma_addr);
+
+ return &c4pl->ibpl;
+}
+
+void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
+{
+ struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
+
+ PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+ __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+ &c4pl->dma_addr);
+
+ dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
+ c4pl->pll_len,
+ c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
+ kfree(c4pl);
+}
+
+int c4iw_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_mr *mhp;
+ u32 mmid;
+
+ PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ /* There can be no memory windows */
+ if (atomic_read(&ib_mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_c4iw_mr(ib_mr);
+ rhp = mhp->rhp;
+ mmid = mhp->attr.stag >> 8;
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ if (mhp->attr.pbl_size)
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+ if (mhp->kva)
+ kfree((void *) (unsigned long) mhp->kva);
+ if (mhp->umem)
+ ib_umem_release(mhp->umem);
+ PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ kfree(mhp);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
new file mode 100644
index 00000000000..b1d305338de
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+#include <linux/io.h>
+
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "iw_cxgb4.h"
+
+static int fastreg_support = 1;
+module_param(fastreg_support, int, 0644);
+MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
+
+static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static int c4iw_ah_destroy(struct ib_ah *ah)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags,
+ u8 port_num, struct ib_wc *in_wc,
+ struct ib_grh *in_grh, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct c4iw_dev *rhp = to_c4iw_dev(context->device);
+ struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
+ struct c4iw_mm_entry *mm, *tmp;
+
+ PDBG("%s context %p\n", __func__, context);
+ list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
+ kfree(mm);
+ c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
+ kfree(ucontext);
+ return 0;
+}
+
+static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct c4iw_ucontext *context;
+ struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
+ static int warned;
+ struct c4iw_alloc_ucontext_resp uresp;
+ int ret = 0;
+ struct c4iw_mm_entry *mm = NULL;
+
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
+ INIT_LIST_HEAD(&context->mmaps);
+ spin_lock_init(&context->mmap_lock);
+
+ if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
+ if (!warned++)
+ pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
+ rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
+ } else {
+ mm = kmalloc(sizeof(*mm), GFP_KERNEL);
+ if (!mm) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ uresp.status_page_size = PAGE_SIZE;
+
+ spin_lock(&context->mmap_lock);
+ uresp.status_page_key = context->key;
+ context->key += PAGE_SIZE;
+ spin_unlock(&context->mmap_lock);
+
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
+ if (ret)
+ goto err_mm;
+
+ mm->key = uresp.status_page_key;
+ mm->addr = virt_to_phys(rhp->rdev.status_page);
+ mm->len = PAGE_SIZE;
+ insert_mmap(context, mm);
+ }
+ return &context->ibucontext;
+err_mm:
+ kfree(mm);
+err_free:
+ kfree(context);
+err:
+ return ERR_PTR(ret);
+}
+
+static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ int len = vma->vm_end - vma->vm_start;
+ u32 key = vma->vm_pgoff << PAGE_SHIFT;
+ struct c4iw_rdev *rdev;
+ int ret = 0;
+ struct c4iw_mm_entry *mm;
+ struct c4iw_ucontext *ucontext;
+ u64 addr;
+
+ PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
+ key, len);
+
+ if (vma->vm_start & (PAGE_SIZE-1))
+ return -EINVAL;
+
+ rdev = &(to_c4iw_dev(context->device)->rdev);
+ ucontext = to_c4iw_ucontext(context);
+
+ mm = remove_mmap(ucontext, key, len);
+ if (!mm)
+ return -EINVAL;
+ addr = mm->addr;
+ kfree(mm);
+
+ if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 0) +
+ pci_resource_len(rdev->lldi.pdev, 0)))) {
+
+ /*
+ * MA_SYNC register...
+ */
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
+ pci_resource_len(rdev->lldi.pdev, 2)))) {
+
+ /*
+ * Map user DB or OCQP memory...
+ */
+ if (addr >= rdev->oc_mw_pa)
+ vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
+ else {
+ if (is_t5(rdev->lldi.adapter_type))
+ vma->vm_page_prot =
+ t4_pgprot_wc(vma->vm_page_prot);
+ else
+ vma->vm_page_prot =
+ pgprot_noncached(vma->vm_page_prot);
+ }
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ } else {
+
+ /*
+ * Map WQ or CQ contig dma memory...
+ */
+ ret = remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+
+ return ret;
+}
+
+static int c4iw_deallocate_pd(struct ib_pd *pd)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
+ mutex_lock(&rhp->rdev.stats.lock);
+ rhp->rdev.stats.pd.cur--;
+ mutex_unlock(&rhp->rdev.stats.lock);
+ kfree(php);
+ return 0;
+}
+
+static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct c4iw_pd *php;
+ u32 pdid;
+ struct c4iw_dev *rhp;
+
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ rhp = (struct c4iw_dev *) ibdev;
+ pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
+ if (!pdid)
+ return ERR_PTR(-EINVAL);
+ php = kzalloc(sizeof(*php), GFP_KERNEL);
+ if (!php) {
+ c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
+ return ERR_PTR(-ENOMEM);
+ }
+ php->pdid = pdid;
+ php->rhp = rhp;
+ if (context) {
+ if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) {
+ c4iw_deallocate_pd(&php->ibpd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ mutex_lock(&rhp->rdev.stats.lock);
+ rhp->rdev.stats.pd.cur++;
+ if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
+ rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
+ mutex_unlock(&rhp->rdev.stats.lock);
+ PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
+ return &php->ibpd;
+}
+
+static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+ *pkey = 0;
+ return 0;
+}
+
+static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct c4iw_dev *dev;
+
+ PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
+ __func__, ibdev, port, index, gid);
+ dev = to_c4iw_dev(ibdev);
+ BUG_ON(port == 0);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ memcpy(&(gid->raw[0]), dev->rdev.lldi.ports[port-1]->dev_addr, 6);
+ return 0;
+}
+
+static int c4iw_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+
+ struct c4iw_dev *dev;
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+
+ dev = to_c4iw_dev(ibdev);
+ memset(props, 0, sizeof *props);
+ memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
+ props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
+ props->fw_ver = dev->rdev.lldi.fw_vers;
+ props->device_cap_flags = dev->device_cap_flags;
+ props->page_size_cap = T4_PAGESIZE_MASK;
+ props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor;
+ props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
+ props->max_mr_size = T4_MAX_MR_SIZE;
+ props->max_qp = T4_MAX_NUM_QP;
+ props->max_qp_wr = T4_MAX_QP_DEPTH;
+ props->max_sge = T4_MAX_RECV_SGE;
+ props->max_sge_rd = 1;
+ props->max_qp_rd_atom = c4iw_max_read_depth;
+ props->max_qp_init_rd_atom = c4iw_max_read_depth;
+ props->max_cq = T4_MAX_NUM_CQ;
+ props->max_cqe = T4_MAX_CQ_DEPTH;
+ props->max_mr = c4iw_num_stags(&dev->rdev);
+ props->max_pd = T4_MAX_NUM_PD;
+ props->local_ca_ack_delay = 0;
+ props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl);
+
+ return 0;
+}
+
+static int c4iw_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct c4iw_dev *dev;
+ struct net_device *netdev;
+ struct in_device *inetdev;
+
+ PDBG("%s ibdev %p\n", __func__, ibdev);
+
+ dev = to_c4iw_dev(ibdev);
+ netdev = dev->rdev.lldi.ports[port-1];
+
+ memset(props, 0, sizeof(struct ib_port_attr));
+ props->max_mtu = IB_MTU_4096;
+ if (netdev->mtu >= 4096)
+ props->active_mtu = IB_MTU_4096;
+ else if (netdev->mtu >= 2048)
+ props->active_mtu = IB_MTU_2048;
+ else if (netdev->mtu >= 1024)
+ props->active_mtu = IB_MTU_1024;
+ else if (netdev->mtu >= 512)
+ props->active_mtu = IB_MTU_512;
+ else
+ props->active_mtu = IB_MTU_256;
+
+ if (!netif_carrier_ok(netdev))
+ props->state = IB_PORT_DOWN;
+ else {
+ inetdev = in_dev_get(netdev);
+ if (inetdev) {
+ if (inetdev->ifa_list)
+ props->state = IB_PORT_ACTIVE;
+ else
+ props->state = IB_PORT_INIT;
+ in_dev_put(inetdev);
+ } else
+ props->state = IB_PORT_INIT;
+ }
+
+ props->port_cap_flags =
+ IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP |
+ IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->active_width = 2;
+ props->active_speed = IB_SPEED_DDR;
+ props->max_msg_sz = -1;
+
+ return 0;
+}
+
+static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
+ ibdev.dev);
+ PDBG("%s dev 0x%p\n", __func__, dev);
+ return sprintf(buf, "%d\n",
+ CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
+}
+
+static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
+ ibdev.dev);
+ PDBG("%s dev 0x%p\n", __func__, dev);
+
+ return sprintf(buf, "%u.%u.%u.%u\n",
+ FW_HDR_FW_VER_MAJOR_GET(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MINOR_GET(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_MICRO_GET(c4iw_dev->rdev.lldi.fw_vers),
+ FW_HDR_FW_VER_BUILD_GET(c4iw_dev->rdev.lldi.fw_vers));
+}
+
+static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
+ ibdev.dev);
+ struct ethtool_drvinfo info;
+ struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
+
+ PDBG("%s dev 0x%p\n", __func__, dev);
+ lldev->ethtool_ops->get_drvinfo(lldev, &info);
+ return sprintf(buf, "%s\n", info.driver);
+}
+
+static ssize_t show_board(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
+ ibdev.dev);
+ PDBG("%s dev 0x%p\n", __func__, dev);
+ return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
+ c4iw_dev->rdev.lldi.pdev->device);
+}
+
+static int c4iw_get_mib(struct ib_device *ibdev,
+ union rdma_protocol_stats *stats)
+{
+ struct tp_tcp_stats v4, v6;
+ struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev);
+
+ cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6);
+ memset(stats, 0, sizeof *stats);
+ stats->iw.tcpInSegs = v4.tcpInSegs + v6.tcpInSegs;
+ stats->iw.tcpOutSegs = v4.tcpOutSegs + v6.tcpOutSegs;
+ stats->iw.tcpRetransSegs = v4.tcpRetransSegs + v6.tcpRetransSegs;
+ stats->iw.tcpOutRsts = v4.tcpOutRsts + v6.tcpOutSegs;
+
+ return 0;
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *c4iw_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id,
+};
+
+int c4iw_register_device(struct c4iw_dev *dev)
+{
+ int ret;
+ int i;
+
+ PDBG("%s c4iw_dev %p\n", __func__, dev);
+ BUG_ON(!dev->rdev.lldi.ports[0]);
+ strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
+ memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
+ memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
+ dev->ibdev.owner = THIS_MODULE;
+ dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+ if (fastreg_support)
+ dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ dev->ibdev.local_dma_lkey = 0;
+ dev->ibdev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV);
+ dev->ibdev.node_type = RDMA_NODE_RNIC;
+ memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
+ dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
+ dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
+ dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
+ dev->ibdev.query_device = c4iw_query_device;
+ dev->ibdev.query_port = c4iw_query_port;
+ dev->ibdev.query_pkey = c4iw_query_pkey;
+ dev->ibdev.query_gid = c4iw_query_gid;
+ dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext;
+ dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext;
+ dev->ibdev.mmap = c4iw_mmap;
+ dev->ibdev.alloc_pd = c4iw_allocate_pd;
+ dev->ibdev.dealloc_pd = c4iw_deallocate_pd;
+ dev->ibdev.create_ah = c4iw_ah_create;
+ dev->ibdev.destroy_ah = c4iw_ah_destroy;
+ dev->ibdev.create_qp = c4iw_create_qp;
+ dev->ibdev.modify_qp = c4iw_ib_modify_qp;
+ dev->ibdev.query_qp = c4iw_ib_query_qp;
+ dev->ibdev.destroy_qp = c4iw_destroy_qp;
+ dev->ibdev.create_cq = c4iw_create_cq;
+ dev->ibdev.destroy_cq = c4iw_destroy_cq;
+ dev->ibdev.resize_cq = c4iw_resize_cq;
+ dev->ibdev.poll_cq = c4iw_poll_cq;
+ dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
+ dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
+ dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
+ dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
+ dev->ibdev.dereg_mr = c4iw_dereg_mr;
+ dev->ibdev.alloc_mw = c4iw_alloc_mw;
+ dev->ibdev.bind_mw = c4iw_bind_mw;
+ dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
+ dev->ibdev.alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr;
+ dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
+ dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
+ dev->ibdev.attach_mcast = c4iw_multicast_attach;
+ dev->ibdev.detach_mcast = c4iw_multicast_detach;
+ dev->ibdev.process_mad = c4iw_process_mad;
+ dev->ibdev.req_notify_cq = c4iw_arm_cq;
+ dev->ibdev.post_send = c4iw_post_send;
+ dev->ibdev.post_recv = c4iw_post_receive;
+ dev->ibdev.get_protocol_stats = c4iw_get_mib;
+ dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
+
+ dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
+ if (!dev->ibdev.iwcm)
+ return -ENOMEM;
+
+ dev->ibdev.iwcm->connect = c4iw_connect;
+ dev->ibdev.iwcm->accept = c4iw_accept_cr;
+ dev->ibdev.iwcm->reject = c4iw_reject_cr;
+ dev->ibdev.iwcm->create_listen = c4iw_create_listen;
+ dev->ibdev.iwcm->destroy_listen = c4iw_destroy_listen;
+ dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
+ dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
+ dev->ibdev.iwcm->get_qp = c4iw_get_qp;
+
+ ret = ib_register_device(&dev->ibdev, NULL);
+ if (ret)
+ goto bail1;
+
+ for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
+ ret = device_create_file(&dev->ibdev.dev,
+ c4iw_class_attributes[i]);
+ if (ret)
+ goto bail2;
+ }
+ return 0;
+bail2:
+ ib_unregister_device(&dev->ibdev);
+bail1:
+ kfree(dev->ibdev.iwcm);
+ return ret;
+}
+
+void c4iw_unregister_device(struct c4iw_dev *dev)
+{
+ int i;
+
+ PDBG("%s c4iw_dev %p\n", __func__, dev);
+ for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
+ device_remove_file(&dev->ibdev.dev,
+ c4iw_class_attributes[i]);
+ ib_unregister_device(&dev->ibdev);
+ kfree(dev->ibdev.iwcm);
+ return;
+}
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
new file mode 100644
index 00000000000..086f62f5dc9
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -0,0 +1,1808 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+
+#include "iw_cxgb4.h"
+
+static int db_delay_usecs = 1;
+module_param(db_delay_usecs, int, 0644);
+MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
+
+static int ocqp_support = 1;
+module_param(ocqp_support, int, 0644);
+MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
+
+int db_fc_threshold = 1000;
+module_param(db_fc_threshold, int, 0644);
+MODULE_PARM_DESC(db_fc_threshold,
+ "QP count/threshold that triggers"
+ " automatic db flow control mode (default = 1000)");
+
+int db_coalescing_threshold;
+module_param(db_coalescing_threshold, int, 0644);
+MODULE_PARM_DESC(db_coalescing_threshold,
+ "QP count/threshold that triggers"
+ " disabling db coalescing (default = 0)");
+
+static int max_fr_immd = T4_MAX_FR_IMMD;
+module_param(max_fr_immd, int, 0644);
+MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");
+
+static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
+{
+ unsigned long flag;
+ spin_lock_irqsave(&qhp->lock, flag);
+ qhp->attr.state = state;
+ spin_unlock_irqrestore(&qhp->lock, flag);
+}
+
+static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+ c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize);
+}
+
+static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+ dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
+ pci_unmap_addr(sq, mapping));
+}
+
+static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+ if (t4_sq_onchip(sq))
+ dealloc_oc_sq(rdev, sq);
+ else
+ dealloc_host_sq(rdev, sq);
+}
+
+static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+ if (!ocqp_support || !ocqp_supported(&rdev->lldi))
+ return -ENOSYS;
+ sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize);
+ if (!sq->dma_addr)
+ return -ENOMEM;
+ sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr -
+ rdev->lldi.vr->ocq.start;
+ sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr -
+ rdev->lldi.vr->ocq.start);
+ sq->flags |= T4_SQ_ONCHIP;
+ return 0;
+}
+
+static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+ sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize,
+ &(sq->dma_addr), GFP_KERNEL);
+ if (!sq->queue)
+ return -ENOMEM;
+ sq->phys_addr = virt_to_phys(sq->queue);
+ pci_unmap_addr_set(sq, mapping, sq->dma_addr);
+ return 0;
+}
+
+static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
+{
+ int ret = -ENOSYS;
+ if (user)
+ ret = alloc_oc_sq(rdev, sq);
+ if (ret)
+ ret = alloc_host_sq(rdev, sq);
+ return ret;
+}
+
+static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ /*
+ * uP clears EQ contexts when the connection exits rdma mode,
+ * so no need to post a RESET WR for these EQs.
+ */
+ dma_free_coherent(&(rdev->lldi.pdev->dev),
+ wq->rq.memsize, wq->rq.queue,
+ dma_unmap_addr(&wq->rq, mapping));
+ dealloc_sq(rdev, &wq->sq);
+ c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+ kfree(wq->rq.sw_rq);
+ kfree(wq->sq.sw_sq);
+ c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+ c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+ return 0;
+}
+
+static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
+ struct t4_cq *rcq, struct t4_cq *scq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ int user = (uctx != &rdev->uctx);
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ int wr_len;
+ struct c4iw_wr_wait wr_wait;
+ struct sk_buff *skb;
+ int ret = 0;
+ int eqsize;
+
+ wq->sq.qid = c4iw_get_qpid(rdev, uctx);
+ if (!wq->sq.qid)
+ return -ENOMEM;
+
+ wq->rq.qid = c4iw_get_qpid(rdev, uctx);
+ if (!wq->rq.qid) {
+ ret = -ENOMEM;
+ goto free_sq_qid;
+ }
+
+ if (!user) {
+ wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
+ GFP_KERNEL);
+ if (!wq->sq.sw_sq) {
+ ret = -ENOMEM;
+ goto free_rq_qid;
+ }
+
+ wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
+ GFP_KERNEL);
+ if (!wq->rq.sw_rq) {
+ ret = -ENOMEM;
+ goto free_sw_sq;
+ }
+ }
+
+ /*
+ * RQT must be a power of 2.
+ */
+ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size);
+ wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
+ if (!wq->rq.rqt_hwaddr) {
+ ret = -ENOMEM;
+ goto free_sw_rq;
+ }
+
+ ret = alloc_sq(rdev, &wq->sq, user);
+ if (ret)
+ goto free_hwaddr;
+ memset(wq->sq.queue, 0, wq->sq.memsize);
+ dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
+
+ wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
+ wq->rq.memsize, &(wq->rq.dma_addr),
+ GFP_KERNEL);
+ if (!wq->rq.queue) {
+ ret = -ENOMEM;
+ goto free_sq;
+ }
+ PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+ __func__, wq->sq.queue,
+ (unsigned long long)virt_to_phys(wq->sq.queue),
+ wq->rq.queue,
+ (unsigned long long)virt_to_phys(wq->rq.queue));
+ memset(wq->rq.queue, 0, wq->rq.memsize);
+ dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
+
+ wq->db = rdev->lldi.db_reg;
+ wq->gts = rdev->lldi.gts_reg;
+ if (user || is_t5(rdev->lldi.adapter_type)) {
+ u32 off;
+
+ off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK;
+ if (user) {
+ wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+ } else {
+ off += 128 * (wq->sq.qid & rdev->qpmask) + 8;
+ wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+ }
+ off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK;
+ if (user) {
+ wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+ } else {
+ off += 128 * (wq->rq.qid & rdev->qpmask) + 8;
+ wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+ }
+ }
+ wq->rdev = rdev;
+ wq->rq.msn = 1;
+
+ /* build fw_ri_res_wr */
+ wr_len = sizeof *res_wr + 2 * sizeof *res;
+
+ skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!skb) {
+ ret = -ENOMEM;
+ goto free_dma;
+ }
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+ res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(
+ FW_WR_OP(FW_RI_RES_WR) |
+ V_FW_RI_RES_WR_NRES(2) |
+ FW_WR_COMPL(1));
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (unsigned long) &wr_wait;
+ res = res_wr->res;
+ res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
+ res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+ /*
+ * eqsize is the number of 64B entries plus the status page size.
+ */
+ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES;
+
+ res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
+ V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */
+ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */
+ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */
+ (t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0) |
+ V_FW_RI_RES_WR_IQID(scq->cqid));
+ res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
+ V_FW_RI_RES_WR_DCAEN(0) |
+ V_FW_RI_RES_WR_DCACPU(0) |
+ V_FW_RI_RES_WR_FBMIN(2) |
+ V_FW_RI_RES_WR_FBMAX(2) |
+ V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
+ V_FW_RI_RES_WR_CIDXFTHRESH(0) |
+ V_FW_RI_RES_WR_EQSIZE(eqsize));
+ res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
+ res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
+ res++;
+ res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
+ res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+ /*
+ * eqsize is the number of 64B entries plus the status page size.
+ */
+ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES;
+ res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
+ V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */
+ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */
+ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */
+ V_FW_RI_RES_WR_IQID(rcq->cqid));
+ res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
+ V_FW_RI_RES_WR_DCAEN(0) |
+ V_FW_RI_RES_WR_DCACPU(0) |
+ V_FW_RI_RES_WR_FBMIN(2) |
+ V_FW_RI_RES_WR_FBMAX(2) |
+ V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
+ V_FW_RI_RES_WR_CIDXFTHRESH(0) |
+ V_FW_RI_RES_WR_EQSIZE(eqsize));
+ res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
+ res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+
+ c4iw_init_wr_wait(&wr_wait);
+
+ ret = c4iw_ofld_send(rdev, skb);
+ if (ret)
+ goto free_dma;
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
+ if (ret)
+ goto free_dma;
+
+ PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n",
+ __func__, wq->sq.qid, wq->rq.qid, wq->db,
+ (__force unsigned long) wq->sq.udb,
+ (__force unsigned long) wq->rq.udb);
+
+ return 0;
+free_dma:
+ dma_free_coherent(&(rdev->lldi.pdev->dev),
+ wq->rq.memsize, wq->rq.queue,
+ dma_unmap_addr(&wq->rq, mapping));
+free_sq:
+ dealloc_sq(rdev, &wq->sq);
+free_hwaddr:
+ c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+free_sw_rq:
+ kfree(wq->rq.sw_rq);
+free_sw_sq:
+ kfree(wq->sq.sw_sq);
+free_rq_qid:
+ c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+free_sq_qid:
+ c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+ return ret;
+}
+
+static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
+ struct ib_send_wr *wr, int max, u32 *plenp)
+{
+ u8 *dstp, *srcp;
+ u32 plen = 0;
+ int i;
+ int rem, len;
+
+ dstp = (u8 *)immdp->data;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) > max)
+ return -EMSGSIZE;
+ srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
+ plen += wr->sg_list[i].length;
+ rem = wr->sg_list[i].length;
+ while (rem) {
+ if (dstp == (u8 *)&sq->queue[sq->size])
+ dstp = (u8 *)sq->queue;
+ if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
+ len = rem;
+ else
+ len = (u8 *)&sq->queue[sq->size] - dstp;
+ memcpy(dstp, srcp, len);
+ dstp += len;
+ srcp += len;
+ rem -= len;
+ }
+ }
+ len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
+ if (len)
+ memset(dstp, 0, len);
+ immdp->op = FW_RI_DATA_IMMD;
+ immdp->r1 = 0;
+ immdp->r2 = 0;
+ immdp->immdlen = cpu_to_be32(plen);
+ *plenp = plen;
+ return 0;
+}
+
+static int build_isgl(__be64 *queue_start, __be64 *queue_end,
+ struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
+ int num_sge, u32 *plenp)
+
+{
+ int i;
+ u32 plen = 0;
+ __be64 *flitp = (__be64 *)isglp->sge;
+
+ for (i = 0; i < num_sge; i++) {
+ if ((plen + sg_list[i].length) < plen)
+ return -EMSGSIZE;
+ plen += sg_list[i].length;
+ *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
+ sg_list[i].length);
+ if (++flitp == queue_end)
+ flitp = queue_start;
+ *flitp = cpu_to_be64(sg_list[i].addr);
+ if (++flitp == queue_end)
+ flitp = queue_start;
+ }
+ *flitp = (__force __be64)0;
+ isglp->op = FW_RI_DATA_ISGL;
+ isglp->r1 = 0;
+ isglp->nsge = cpu_to_be16(num_sge);
+ isglp->r2 = 0;
+ if (plenp)
+ *plenp = plen;
+ return 0;
+}
+
+static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_send_wr *wr, u8 *len16)
+{
+ u32 plen;
+ int size;
+ int ret;
+
+ if (wr->num_sge > T4_MAX_SEND_SGE)
+ return -EINVAL;
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE));
+ else
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND));
+ wqe->send.stag_inv = 0;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE_INV));
+ else
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_INV));
+ wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ wqe->send.r3 = 0;
+ wqe->send.r4 = 0;
+
+ plen = 0;
+ if (wr->num_sge) {
+ if (wr->send_flags & IB_SEND_INLINE) {
+ ret = build_immd(sq, wqe->send.u.immd_src, wr,
+ T4_MAX_SEND_INLINE, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
+ plen;
+ } else {
+ ret = build_isgl((__be64 *)sq->queue,
+ (__be64 *)&sq->queue[sq->size],
+ wqe->send.u.isgl_src,
+ wr->sg_list, wr->num_sge, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
+ wr->num_sge * sizeof(struct fw_ri_sge);
+ }
+ } else {
+ wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
+ wqe->send.u.immd_src[0].r1 = 0;
+ wqe->send.u.immd_src[0].r2 = 0;
+ wqe->send.u.immd_src[0].immdlen = 0;
+ size = sizeof wqe->send + sizeof(struct fw_ri_immd);
+ plen = 0;
+ }
+ *len16 = DIV_ROUND_UP(size, 16);
+ wqe->send.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_send_wr *wr, u8 *len16)
+{
+ u32 plen;
+ int size;
+ int ret;
+
+ if (wr->num_sge > T4_MAX_SEND_SGE)
+ return -EINVAL;
+ wqe->write.r2 = 0;
+ wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+ if (wr->num_sge) {
+ if (wr->send_flags & IB_SEND_INLINE) {
+ ret = build_immd(sq, wqe->write.u.immd_src, wr,
+ T4_MAX_WRITE_INLINE, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
+ plen;
+ } else {
+ ret = build_isgl((__be64 *)sq->queue,
+ (__be64 *)&sq->queue[sq->size],
+ wqe->write.u.isgl_src,
+ wr->sg_list, wr->num_sge, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
+ wr->num_sge * sizeof(struct fw_ri_sge);
+ }
+ } else {
+ wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
+ wqe->write.u.immd_src[0].r1 = 0;
+ wqe->write.u.immd_src[0].r2 = 0;
+ wqe->write.u.immd_src[0].immdlen = 0;
+ size = sizeof wqe->write + sizeof(struct fw_ri_immd);
+ plen = 0;
+ }
+ *len16 = DIV_ROUND_UP(size, 16);
+ wqe->write.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+{
+ if (wr->num_sge > 1)
+ return -EINVAL;
+ if (wr->num_sge) {
+ wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr
+ >> 32));
+ wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr);
+ wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
+ wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
+ wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
+ >> 32));
+ wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr));
+ } else {
+ wqe->read.stag_src = cpu_to_be32(2);
+ wqe->read.to_src_hi = 0;
+ wqe->read.to_src_lo = 0;
+ wqe->read.stag_sink = cpu_to_be32(2);
+ wqe->read.plen = 0;
+ wqe->read.to_sink_hi = 0;
+ wqe->read.to_sink_lo = 0;
+ }
+ wqe->read.r2 = 0;
+ wqe->read.r5 = 0;
+ *len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
+ return 0;
+}
+
+static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
+ struct ib_recv_wr *wr, u8 *len16)
+{
+ int ret;
+
+ ret = build_isgl((__be64 *)qhp->wq.rq.queue,
+ (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
+ &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+ if (ret)
+ return ret;
+ *len16 = DIV_ROUND_UP(sizeof wqe->recv +
+ wr->num_sge * sizeof(struct fw_ri_sge), 16);
+ return 0;
+}
+
+static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_send_wr *wr, u8 *len16, u8 t5dev)
+{
+
+ struct fw_ri_immd *imdp;
+ __be64 *p;
+ int i;
+ int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
+ int rem;
+
+ if (wr->wr.fast_reg.page_list_len >
+ t4_max_fr_depth(use_dsgl))
+ return -EINVAL;
+
+ wqe->fr.qpbinde_to_dcacpu = 0;
+ wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12;
+ wqe->fr.addr_type = FW_RI_VA_BASED_TO;
+ wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags);
+ wqe->fr.len_hi = 0;
+ wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length);
+ wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
+ wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+ wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &
+ 0xffffffff);
+
+ if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
+ struct c4iw_fr_page_list *c4pl =
+ to_c4iw_fr_page_list(wr->wr.fast_reg.page_list);
+ struct fw_ri_dsgl *sglp;
+
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+ wr->wr.fast_reg.page_list->page_list[i] = (__force u64)
+ cpu_to_be64((u64)
+ wr->wr.fast_reg.page_list->page_list[i]);
+ }
+
+ sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
+ sglp->op = FW_RI_DATA_DSGL;
+ sglp->r1 = 0;
+ sglp->nsge = cpu_to_be16(1);
+ sglp->addr0 = cpu_to_be64(c4pl->dma_addr);
+ sglp->len0 = cpu_to_be32(pbllen);
+
+ *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
+ } else {
+ imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
+ imdp->op = FW_RI_DATA_IMMD;
+ imdp->r1 = 0;
+ imdp->r2 = 0;
+ imdp->immdlen = cpu_to_be32(pbllen);
+ p = (__be64 *)(imdp + 1);
+ rem = pbllen;
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+ *p = cpu_to_be64(
+ (u64)wr->wr.fast_reg.page_list->page_list[i]);
+ rem -= sizeof(*p);
+ if (++p == (__be64 *)&sq->queue[sq->size])
+ p = (__be64 *)sq->queue;
+ }
+ BUG_ON(rem < 0);
+ while (rem) {
+ *p = 0;
+ rem -= sizeof(*p);
+ if (++p == (__be64 *)&sq->queue[sq->size])
+ p = (__be64 *)sq->queue;
+ }
+ *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp)
+ + pbllen, 16);
+ }
+ return 0;
+}
+
+static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
+ u8 *len16)
+{
+ wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
+ wqe->inv.r2 = 0;
+ *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
+ return 0;
+}
+
+void c4iw_qp_add_ref(struct ib_qp *qp)
+{
+ PDBG("%s ib_qp %p\n", __func__, qp);
+ atomic_inc(&(to_c4iw_qp(qp)->refcnt));
+}
+
+void c4iw_qp_rem_ref(struct ib_qp *qp)
+{
+ PDBG("%s ib_qp %p\n", __func__, qp);
+ if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt)))
+ wake_up(&(to_c4iw_qp(qp)->wait));
+}
+
+static void add_to_fc_list(struct list_head *head, struct list_head *entry)
+{
+ if (list_empty(entry))
+ list_add_tail(entry, head);
+}
+
+static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qhp->rhp->lock, flags);
+ spin_lock(&qhp->lock);
+ if (qhp->rhp->db_state == NORMAL)
+ t4_ring_sq_db(&qhp->wq, inc,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ else {
+ add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+ qhp->wq.sq.wq_pidx_inc += inc;
+ }
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ return 0;
+}
+
+static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qhp->rhp->lock, flags);
+ spin_lock(&qhp->lock);
+ if (qhp->rhp->db_state == NORMAL)
+ t4_ring_rq_db(&qhp->wq, inc,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ else {
+ add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+ qhp->wq.rq.wq_pidx_inc += inc;
+ }
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ return 0;
+}
+
+int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int err = 0;
+ u8 len16 = 0;
+ enum fw_wr_opcodes fw_opcode = 0;
+ enum fw_ri_wr_flags fw_flags;
+ struct c4iw_qp *qhp;
+ union t4_wr *wqe = NULL;
+ u32 num_wrs;
+ struct t4_swsqe *swsqe;
+ unsigned long flag;
+ u16 idx = 0;
+
+ qhp = to_c4iw_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (t4_wq_in_error(&qhp->wq)) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = t4_sq_avail(&qhp->wq);
+ if (num_wrs == 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ while (wr) {
+ if (num_wrs == 0) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+ wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
+ qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
+
+ fw_flags = 0;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
+ if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
+ fw_flags |= FW_RI_COMPLETION_FLAG;
+ swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_INV:
+ case IB_WR_SEND:
+ if (wr->send_flags & IB_SEND_FENCE)
+ fw_flags |= FW_RI_READ_FENCE_FLAG;
+ fw_opcode = FW_RI_SEND_WR;
+ if (wr->opcode == IB_WR_SEND)
+ swsqe->opcode = FW_RI_SEND;
+ else
+ swsqe->opcode = FW_RI_SEND_WITH_INV;
+ err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
+ break;
+ case IB_WR_RDMA_WRITE:
+ fw_opcode = FW_RI_RDMA_WRITE_WR;
+ swsqe->opcode = FW_RI_RDMA_WRITE;
+ err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ fw_opcode = FW_RI_RDMA_READ_WR;
+ swsqe->opcode = FW_RI_READ_REQ;
+ if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+ fw_flags = FW_RI_RDMA_READ_INVALIDATE;
+ else
+ fw_flags = 0;
+ err = build_rdma_read(wqe, wr, &len16);
+ if (err)
+ break;
+ swsqe->read_len = wr->sg_list[0].length;
+ if (!qhp->wq.sq.oldest_read)
+ qhp->wq.sq.oldest_read = swsqe;
+ break;
+ case IB_WR_FAST_REG_MR:
+ fw_opcode = FW_RI_FR_NSMR_WR;
+ swsqe->opcode = FW_RI_FAST_REGISTER;
+ err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16,
+ is_t5(
+ qhp->rhp->rdev.lldi.adapter_type) ?
+ 1 : 0);
+ break;
+ case IB_WR_LOCAL_INV:
+ if (wr->send_flags & IB_SEND_FENCE)
+ fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
+ fw_opcode = FW_RI_INV_LSTAG_WR;
+ swsqe->opcode = FW_RI_LOCAL_INV;
+ err = build_inv_stag(wqe, wr, &len16);
+ break;
+ default:
+ PDBG("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
+ err = -EINVAL;
+ }
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ swsqe->idx = qhp->wq.sq.pidx;
+ swsqe->complete = 0;
+ swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
+ qhp->sq_sig_all;
+ swsqe->flushed = 0;
+ swsqe->wr_id = wr->wr_id;
+
+ init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
+
+ PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
+ __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
+ swsqe->opcode, swsqe->read_len);
+ wr = wr->next;
+ num_wrs--;
+ t4_sq_produce(&qhp->wq, len16);
+ idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ }
+ if (!qhp->rhp->rdev.status_page->db_off) {
+ t4_ring_sq_db(&qhp->wq, idx,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ } else {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_kernel_sq_db(qhp, idx);
+ }
+ return err;
+}
+
+int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int err = 0;
+ struct c4iw_qp *qhp;
+ union t4_recv_wr *wqe = NULL;
+ u32 num_wrs;
+ u8 len16 = 0;
+ unsigned long flag;
+ u16 idx = 0;
+
+ qhp = to_c4iw_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (t4_wq_in_error(&qhp->wq)) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = t4_rq_avail(&qhp->wq);
+ if (num_wrs == 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ while (wr) {
+ if (wr->num_sge > T4_MAX_RECV_SGE) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+ wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
+ qhp->wq.rq.wq_pidx *
+ T4_EQ_ENTRY_SIZE);
+ if (num_wrs)
+ err = build_rdma_recv(qhp, wqe, wr, &len16);
+ else
+ err = -ENOMEM;
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+
+ qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
+
+ wqe->recv.opcode = FW_RI_RECV_WR;
+ wqe->recv.r1 = 0;
+ wqe->recv.wrid = qhp->wq.rq.pidx;
+ wqe->recv.r2[0] = 0;
+ wqe->recv.r2[1] = 0;
+ wqe->recv.r2[2] = 0;
+ wqe->recv.len16 = len16;
+ PDBG("%s cookie 0x%llx pidx %u\n", __func__,
+ (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
+ t4_rq_produce(&qhp->wq, len16);
+ idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ wr = wr->next;
+ num_wrs--;
+ }
+ if (!qhp->rhp->rdev.status_page->db_off) {
+ t4_ring_rq_db(&qhp->wq, idx,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ } else {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_kernel_rq_db(qhp, idx);
+ }
+ return err;
+}
+
+int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
+{
+ return -ENOSYS;
+}
+
+static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
+ u8 *ecode)
+{
+ int status;
+ int tagged;
+ int opcode;
+ int rqtype;
+ int send_inv;
+
+ if (!err_cqe) {
+ *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ return;
+ }
+
+ status = CQE_STATUS(err_cqe);
+ opcode = CQE_OPCODE(err_cqe);
+ rqtype = RQ_TYPE(err_cqe);
+ send_inv = (opcode == FW_RI_SEND_WITH_INV) ||
+ (opcode == FW_RI_SEND_WITH_SE_INV);
+ tagged = (opcode == FW_RI_RDMA_WRITE) ||
+ (rqtype && (opcode == FW_RI_READ_RESP));
+
+ switch (status) {
+ case T4_ERR_STAG:
+ if (send_inv) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_INV_STAG;
+ }
+ break;
+ case T4_ERR_PDID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ if ((opcode == FW_RI_SEND_WITH_INV) ||
+ (opcode == FW_RI_SEND_WITH_SE_INV))
+ *ecode = RDMAP_CANT_INV_STAG;
+ else
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case T4_ERR_QPID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case T4_ERR_ACCESS:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_ACC_VIOL;
+ break;
+ case T4_ERR_WRAP:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_TO_WRAP;
+ break;
+ case T4_ERR_BOUND:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_BASE_BOUNDS;
+ }
+ break;
+ case T4_ERR_INVALIDATE_SHARED_MR:
+ case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ break;
+ case T4_ERR_ECC:
+ case T4_ERR_ECC_PSTAG:
+ case T4_ERR_INTERNAL_ERR:
+ *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case T4_ERR_OUT_OF_RQE:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_NOBUF;
+ break;
+ case T4_ERR_PBL_ADDR_BOUND:
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ break;
+ case T4_ERR_CRC:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_CRC_ERR;
+ break;
+ case T4_ERR_MARKER:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_MARKER_ERR;
+ break;
+ case T4_ERR_PDU_LEN_ERR:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_MSG_TOOBIG;
+ break;
+ case T4_ERR_DDP_VERSION:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_INV_VERS;
+ } else {
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_VERS;
+ }
+ break;
+ case T4_ERR_RDMA_VERSION:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_VERS;
+ break;
+ case T4_ERR_OPCODE:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_OPCODE;
+ break;
+ case T4_ERR_DDP_QUEUE_NUM:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_QN;
+ break;
+ case T4_ERR_MSN:
+ case T4_ERR_MSN_GAP:
+ case T4_ERR_MSN_RANGE:
+ case T4_ERR_IRD_OVERFLOW:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_RANGE;
+ break;
+ case T4_ERR_TBIT:
+ *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case T4_ERR_MO:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MO;
+ break;
+ default:
+ *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ }
+}
+
+static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
+ gfp_t gfp)
+{
+ struct fw_ri_wr *wqe;
+ struct sk_buff *skb;
+ struct terminate_message *term;
+
+ PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ qhp->ep->hwtid);
+
+ skb = alloc_skb(sizeof *wqe, gfp);
+ if (!skb)
+ return;
+ set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
+
+ wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+ memset(wqe, 0, sizeof *wqe);
+ wqe->op_compl = cpu_to_be32(FW_WR_OP(FW_RI_INIT_WR));
+ wqe->flowid_len16 = cpu_to_be32(
+ FW_WR_FLOWID(qhp->ep->hwtid) |
+ FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+
+ wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
+ wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
+ term = (struct terminate_message *)wqe->u.terminate.termmsg;
+ if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
+ term->layer_etype = qhp->attr.layer_etype;
+ term->ecode = qhp->attr.ecode;
+ } else
+ build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
+ c4iw_ofld_send(&qhp->rhp->rdev, skb);
+}
+
+/*
+ * Assumes qhp lock is held.
+ */
+static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
+ struct c4iw_cq *schp)
+{
+ int count;
+ int flushed;
+ unsigned long flag;
+
+ PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+
+ /* locking hierarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&rchp->lock, flag);
+ spin_lock(&qhp->lock);
+
+ if (qhp->wq.flushed) {
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&rchp->lock, flag);
+ return;
+ }
+ qhp->wq.flushed = 1;
+
+ c4iw_flush_hw_cq(rchp);
+ c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
+ flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&rchp->lock, flag);
+ if (flushed) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+
+ /* locking hierarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&schp->lock, flag);
+ spin_lock(&qhp->lock);
+ if (schp != rchp)
+ c4iw_flush_hw_cq(schp);
+ flushed = c4iw_flush_sq(qhp);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&schp->lock, flag);
+ if (flushed) {
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
+}
+
+static void flush_qp(struct c4iw_qp *qhp)
+{
+ struct c4iw_cq *rchp, *schp;
+ unsigned long flag;
+
+ rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+ schp = to_c4iw_cq(qhp->ibqp.send_cq);
+
+ t4_set_wq_in_error(&qhp->wq);
+ if (qhp->ibqp.uobject) {
+ t4_set_cq_in_error(&rchp->cq);
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ if (schp != rchp) {
+ t4_set_cq_in_error(&schp->cq);
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
+ return;
+ }
+ __flush_qp(qhp, rchp, schp);
+}
+
+static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+ struct c4iw_ep *ep)
+{
+ struct fw_ri_wr *wqe;
+ int ret;
+ struct sk_buff *skb;
+
+ PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ ep->hwtid);
+
+ skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
+
+ wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+ memset(wqe, 0, sizeof *wqe);
+ wqe->op_compl = cpu_to_be32(
+ FW_WR_OP(FW_RI_INIT_WR) |
+ FW_WR_COMPL(1));
+ wqe->flowid_len16 = cpu_to_be32(
+ FW_WR_FLOWID(ep->hwtid) |
+ FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+ wqe->cookie = (unsigned long) &ep->com.wr_wait;
+
+ wqe->u.fini.type = FW_RI_TYPE_FINI;
+ ret = c4iw_ofld_send(&rhp->rdev, skb);
+ if (ret)
+ goto out;
+
+ ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid,
+ qhp->wq.sq.qid, __func__);
+out:
+ PDBG("%s ret %d\n", __func__, ret);
+ return ret;
+}
+
+static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
+{
+ PDBG("%s p2p_type = %d\n", __func__, p2p_type);
+ memset(&init->u, 0, sizeof init->u);
+ switch (p2p_type) {
+ case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
+ init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
+ init->u.write.stag_sink = cpu_to_be32(1);
+ init->u.write.to_sink = cpu_to_be64(1);
+ init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
+ init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
+ sizeof(struct fw_ri_immd),
+ 16);
+ break;
+ case FW_RI_INIT_P2PTYPE_READ_REQ:
+ init->u.write.opcode = FW_RI_RDMA_READ_WR;
+ init->u.read.stag_src = cpu_to_be32(1);
+ init->u.read.to_src_lo = cpu_to_be32(1);
+ init->u.read.stag_sink = cpu_to_be32(1);
+ init->u.read.to_sink_lo = cpu_to_be32(1);
+ init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
+ break;
+ }
+}
+
+static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
+{
+ struct fw_ri_wr *wqe;
+ int ret;
+ struct sk_buff *skb;
+
+ PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
+ qhp->ep->hwtid);
+
+ skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
+
+ wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
+ memset(wqe, 0, sizeof *wqe);
+ wqe->op_compl = cpu_to_be32(
+ FW_WR_OP(FW_RI_INIT_WR) |
+ FW_WR_COMPL(1));
+ wqe->flowid_len16 = cpu_to_be32(
+ FW_WR_FLOWID(qhp->ep->hwtid) |
+ FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+
+ wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait;
+
+ wqe->u.init.type = FW_RI_TYPE_INIT;
+ wqe->u.init.mpareqbit_p2ptype =
+ V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) |
+ V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type);
+ wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE;
+ if (qhp->attr.mpa_attr.recv_marker_enabled)
+ wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE;
+ if (qhp->attr.mpa_attr.xmit_marker_enabled)
+ wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE;
+ if (qhp->attr.mpa_attr.crc_enabled)
+ wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE;
+
+ wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE |
+ FW_RI_QP_RDMA_WRITE_ENABLE |
+ FW_RI_QP_BIND_ENABLE;
+ if (!qhp->ibqp.uobject)
+ wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE |
+ FW_RI_QP_STAG0_ENABLE;
+ wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq));
+ wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
+ wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
+ wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
+ wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+ wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
+ wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
+ wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
+ wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
+ wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
+ wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
+ wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
+ wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
+ rhp->rdev.lldi.vr->rq.start);
+ if (qhp->attr.mpa_attr.initiator)
+ build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
+
+ ret = c4iw_ofld_send(&rhp->rdev, skb);
+ if (ret)
+ goto out;
+
+ ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait,
+ qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
+out:
+ PDBG("%s ret %d\n", __func__, ret);
+ return ret;
+}
+
+int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+ enum c4iw_qp_attr_mask mask,
+ struct c4iw_qp_attributes *attrs,
+ int internal)
+{
+ int ret = 0;
+ struct c4iw_qp_attributes newattr = qhp->attr;
+ int disconnect = 0;
+ int terminate = 0;
+ int abort = 0;
+ int free = 0;
+ struct c4iw_ep *ep = NULL;
+
+ PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__,
+ qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
+ (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+
+ mutex_lock(&qhp->mutex);
+
+ /* Process attr changes if in IDLE */
+ if (mask & C4IW_QP_ATTR_VALID_MODIFY) {
+ if (qhp->attr.state != C4IW_QP_STATE_IDLE) {
+ ret = -EIO;
+ goto out;
+ }
+ if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ)
+ newattr.enable_rdma_read = attrs->enable_rdma_read;
+ if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE)
+ newattr.enable_rdma_write = attrs->enable_rdma_write;
+ if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND)
+ newattr.enable_bind = attrs->enable_bind;
+ if (mask & C4IW_QP_ATTR_MAX_ORD) {
+ if (attrs->max_ord > c4iw_max_read_depth) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ord = attrs->max_ord;
+ }
+ if (mask & C4IW_QP_ATTR_MAX_IRD) {
+ if (attrs->max_ird > c4iw_max_read_depth) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ird = attrs->max_ird;
+ }
+ qhp->attr = newattr;
+ }
+
+ if (mask & C4IW_QP_ATTR_SQ_DB) {
+ ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc);
+ goto out;
+ }
+ if (mask & C4IW_QP_ATTR_RQ_DB) {
+ ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc);
+ goto out;
+ }
+
+ if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
+ goto out;
+ if (qhp->attr.state == attrs->next_state)
+ goto out;
+
+ switch (qhp->attr.state) {
+ case C4IW_QP_STATE_IDLE:
+ switch (attrs->next_state) {
+ case C4IW_QP_STATE_RTS:
+ if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.mpa_attr = attrs->mpa_attr;
+ qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
+ qhp->ep = qhp->attr.llp_stream_handle;
+ set_state(qhp, C4IW_QP_STATE_RTS);
+
+ /*
+ * Ref the endpoint here and deref when we
+ * disassociate the endpoint from the QP. This
+ * happens in CLOSING->IDLE transition or *->ERROR
+ * transition.
+ */
+ c4iw_get_ep(&qhp->ep->com);
+ ret = rdma_init(rhp, qhp);
+ if (ret)
+ goto err;
+ break;
+ case C4IW_QP_STATE_ERROR:
+ set_state(qhp, C4IW_QP_STATE_ERROR);
+ flush_qp(qhp);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case C4IW_QP_STATE_RTS:
+ switch (attrs->next_state) {
+ case C4IW_QP_STATE_CLOSING:
+ BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ t4_set_wq_in_error(&qhp->wq);
+ set_state(qhp, C4IW_QP_STATE_CLOSING);
+ ep = qhp->ep;
+ if (!internal) {
+ abort = 0;
+ disconnect = 1;
+ c4iw_get_ep(&qhp->ep->com);
+ }
+ ret = rdma_fini(rhp, qhp, ep);
+ if (ret)
+ goto err;
+ break;
+ case C4IW_QP_STATE_TERMINATE:
+ t4_set_wq_in_error(&qhp->wq);
+ set_state(qhp, C4IW_QP_STATE_TERMINATE);
+ qhp->attr.layer_etype = attrs->layer_etype;
+ qhp->attr.ecode = attrs->ecode;
+ ep = qhp->ep;
+ if (!internal) {
+ c4iw_get_ep(&qhp->ep->com);
+ terminate = 1;
+ disconnect = 1;
+ } else {
+ terminate = qhp->attr.send_term;
+ ret = rdma_fini(rhp, qhp, ep);
+ if (ret)
+ goto err;
+ }
+ break;
+ case C4IW_QP_STATE_ERROR:
+ t4_set_wq_in_error(&qhp->wq);
+ set_state(qhp, C4IW_QP_STATE_ERROR);
+ if (!internal) {
+ abort = 1;
+ disconnect = 1;
+ ep = qhp->ep;
+ c4iw_get_ep(&qhp->ep->com);
+ }
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case C4IW_QP_STATE_CLOSING:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ switch (attrs->next_state) {
+ case C4IW_QP_STATE_IDLE:
+ flush_qp(qhp);
+ set_state(qhp, C4IW_QP_STATE_IDLE);
+ qhp->attr.llp_stream_handle = NULL;
+ c4iw_put_ep(&qhp->ep->com);
+ qhp->ep = NULL;
+ wake_up(&qhp->wait);
+ break;
+ case C4IW_QP_STATE_ERROR:
+ goto err;
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+ break;
+ case C4IW_QP_STATE_ERROR:
+ if (attrs->next_state != C4IW_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ set_state(qhp, C4IW_QP_STATE_IDLE);
+ break;
+ case C4IW_QP_STATE_TERMINATE:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto err;
+ break;
+ default:
+ printk(KERN_ERR "%s in a bad state %d\n",
+ __func__, qhp->attr.state);
+ ret = -EINVAL;
+ goto err;
+ break;
+ }
+ goto out;
+err:
+ PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
+ qhp->wq.sq.qid);
+
+ /* disassociate the LLP connection */
+ qhp->attr.llp_stream_handle = NULL;
+ if (!ep)
+ ep = qhp->ep;
+ qhp->ep = NULL;
+ set_state(qhp, C4IW_QP_STATE_ERROR);
+ free = 1;
+ abort = 1;
+ wake_up(&qhp->wait);
+ BUG_ON(!ep);
+ flush_qp(qhp);
+out:
+ mutex_unlock(&qhp->mutex);
+
+ if (terminate)
+ post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL);
+
+ /*
+ * If disconnect is 1, then we need to initiate a disconnect
+ * on the EP. This can be a normal close (RTS->CLOSING) or
+ * an abnormal close (RTS/CLOSING->ERROR).
+ */
+ if (disconnect) {
+ c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC :
+ GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
+ }
+
+ /*
+ * If free is 1, then we've disassociated the EP from the QP
+ * and we need to dereference the EP.
+ */
+ if (free)
+ c4iw_put_ep(&ep->com);
+ PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ return ret;
+}
+
+int c4iw_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_qp *qhp;
+ struct c4iw_qp_attributes attrs;
+ struct c4iw_ucontext *ucontext;
+
+ qhp = to_c4iw_qp(ib_qp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
+ c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ else
+ c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ wait_event(qhp->wait, !qhp->ep);
+
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
+ atomic_dec(&qhp->refcnt);
+ wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
+
+ spin_lock_irq(&rhp->lock);
+ if (!list_empty(&qhp->db_fc_entry))
+ list_del_init(&qhp->db_fc_entry);
+ spin_unlock_irq(&rhp->lock);
+
+ ucontext = ib_qp->uobject ?
+ to_c4iw_ucontext(ib_qp->uobject->context) : NULL;
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+
+ PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
+ kfree(qhp);
+ return 0;
+}
+
+struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_qp *qhp;
+ struct c4iw_pd *php;
+ struct c4iw_cq *schp;
+ struct c4iw_cq *rchp;
+ struct c4iw_create_qp_resp uresp;
+ unsigned int sqsize, rqsize;
+ struct c4iw_ucontext *ucontext;
+ int ret;
+ struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL;
+
+ PDBG("%s ib_pd %p\n", __func__, pd);
+
+ if (attrs->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
+ rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
+ if (!schp || !rchp)
+ return ERR_PTR(-EINVAL);
+
+ if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
+ return ERR_PTR(-EINVAL);
+
+ rqsize = roundup(attrs->cap.max_recv_wr + 1, 16);
+ if (rqsize > T4_MAX_RQ_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ sqsize = roundup(attrs->cap.max_send_wr + 1, 16);
+ if (sqsize > T4_MAX_SQ_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+
+ qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
+ if (!qhp)
+ return ERR_PTR(-ENOMEM);
+ qhp->wq.sq.size = sqsize;
+ qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue;
+ qhp->wq.sq.flush_cidx = -1;
+ qhp->wq.rq.size = rqsize;
+ qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue;
+
+ if (ucontext) {
+ qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
+ qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
+ }
+
+ PDBG("%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu\n",
+ __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize);
+
+ ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ if (ret)
+ goto err1;
+
+ attrs->cap.max_recv_wr = rqsize - 1;
+ attrs->cap.max_send_wr = sqsize - 1;
+ attrs->cap.max_inline_data = T4_MAX_SEND_INLINE;
+
+ qhp->rhp = rhp;
+ qhp->attr.pd = php->pdid;
+ qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
+ qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
+ qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
+ qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+ qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
+ qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
+ qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+ qhp->attr.state = C4IW_QP_STATE_IDLE;
+ qhp->attr.next_state = C4IW_QP_STATE_IDLE;
+ qhp->attr.enable_rdma_read = 1;
+ qhp->attr.enable_rdma_write = 1;
+ qhp->attr.enable_bind = 1;
+ qhp->attr.max_ord = 1;
+ qhp->attr.max_ird = 1;
+ qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
+ spin_lock_init(&qhp->lock);
+ mutex_init(&qhp->mutex);
+ init_waitqueue_head(&qhp->wait);
+ atomic_set(&qhp->refcnt, 1);
+
+ ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+ if (ret)
+ goto err2;
+
+ if (udata) {
+ mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
+ if (!mm1) {
+ ret = -ENOMEM;
+ goto err3;
+ }
+ mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ if (!mm2) {
+ ret = -ENOMEM;
+ goto err4;
+ }
+ mm3 = kmalloc(sizeof *mm3, GFP_KERNEL);
+ if (!mm3) {
+ ret = -ENOMEM;
+ goto err5;
+ }
+ mm4 = kmalloc(sizeof *mm4, GFP_KERNEL);
+ if (!mm4) {
+ ret = -ENOMEM;
+ goto err6;
+ }
+ if (t4_sq_onchip(&qhp->wq.sq)) {
+ mm5 = kmalloc(sizeof *mm5, GFP_KERNEL);
+ if (!mm5) {
+ ret = -ENOMEM;
+ goto err7;
+ }
+ uresp.flags = C4IW_QPF_ONCHIP;
+ } else
+ uresp.flags = 0;
+ uresp.qid_mask = rhp->rdev.qpmask;
+ uresp.sqid = qhp->wq.sq.qid;
+ uresp.sq_size = qhp->wq.sq.size;
+ uresp.sq_memsize = qhp->wq.sq.memsize;
+ uresp.rqid = qhp->wq.rq.qid;
+ uresp.rq_size = qhp->wq.rq.size;
+ uresp.rq_memsize = qhp->wq.rq.memsize;
+ spin_lock(&ucontext->mmap_lock);
+ if (mm5) {
+ uresp.ma_sync_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ } else {
+ uresp.ma_sync_key = 0;
+ }
+ uresp.sq_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.rq_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.sq_db_gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.rq_db_gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ if (ret)
+ goto err8;
+ mm1->key = uresp.sq_key;
+ mm1->addr = qhp->wq.sq.phys_addr;
+ mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize);
+ insert_mmap(ucontext, mm1);
+ mm2->key = uresp.rq_key;
+ mm2->addr = virt_to_phys(qhp->wq.rq.queue);
+ mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+ insert_mmap(ucontext, mm2);
+ mm3->key = uresp.sq_db_gts_key;
+ mm3->addr = (__force unsigned long) qhp->wq.sq.udb;
+ mm3->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm3);
+ mm4->key = uresp.rq_db_gts_key;
+ mm4->addr = (__force unsigned long) qhp->wq.rq.udb;
+ mm4->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm4);
+ if (mm5) {
+ mm5->key = uresp.ma_sync_key;
+ mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0)
+ + A_PCIE_MA_SYNC) & PAGE_MASK;
+ mm5->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm5);
+ }
+ }
+ qhp->ibqp.qp_num = qhp->wq.sq.qid;
+ init_timer(&(qhp->timer));
+ INIT_LIST_HEAD(&qhp->db_fc_entry);
+ PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n",
+ __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.sq.qid);
+ return &qhp->ibqp;
+err8:
+ kfree(mm5);
+err7:
+ kfree(mm4);
+err6:
+ kfree(mm3);
+err5:
+ kfree(mm2);
+err4:
+ kfree(mm1);
+err3:
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
+err2:
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+err1:
+ kfree(qhp);
+ return ERR_PTR(ret);
+}
+
+int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_qp *qhp;
+ enum c4iw_qp_attr_mask mask = 0;
+ struct c4iw_qp_attributes attrs;
+
+ PDBG("%s ib_qp %p\n", __func__, ibqp);
+
+ /* iwarp does not support the RTR state */
+ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
+ attr_mask &= ~IB_QP_STATE;
+
+ /* Make sure we still have something left to do */
+ if (!attr_mask)
+ return 0;
+
+ memset(&attrs, 0, sizeof attrs);
+ qhp = to_c4iw_qp(ibqp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = c4iw_convert_state(attr->qp_state);
+ attrs.enable_rdma_read = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ attrs.enable_rdma_write = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+
+ mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0;
+ mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
+ (C4IW_QP_ATTR_ENABLE_RDMA_READ |
+ C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
+ C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
+
+ /*
+ * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
+ * ringing the queue db when we're in DB_FULL mode.
+ * Only allow this on T4 devices.
+ */
+ attrs.sq_db_inc = attr->sq_psn;
+ attrs.rq_db_inc = attr->rq_psn;
+ mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
+ mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
+ if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
+ (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
+ return -EINVAL;
+
+ return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
+}
+
+struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
+{
+ PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
+}
+
+int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
+
+ memset(attr, 0, sizeof *attr);
+ memset(init_attr, 0, sizeof *init_attr);
+ attr->qp_state = to_ib_qp_state(qhp->attr.state);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
new file mode 100644
index 00000000000..67df71a7012
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* Crude resource management */
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/ratelimit.h>
+#include "iw_cxgb4.h"
+
+static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
+{
+ u32 i;
+
+ if (c4iw_id_table_alloc(&rdev->resource.qid_table,
+ rdev->lldi.vr->qp.start,
+ rdev->lldi.vr->qp.size,
+ rdev->lldi.vr->qp.size, 0))
+ return -ENOMEM;
+
+ for (i = rdev->lldi.vr->qp.start;
+ i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++)
+ if (!(i & rdev->qpmask))
+ c4iw_id_free(&rdev->resource.qid_table, i);
+ return 0;
+}
+
+/* nr_* must be power of 2 */
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
+{
+ int err = 0;
+ err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
+ C4IW_ID_TABLE_F_RANDOM);
+ if (err)
+ goto tpt_err;
+ err = c4iw_init_qid_table(rdev);
+ if (err)
+ goto qid_err;
+ err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0,
+ nr_pdid, 1, 0);
+ if (err)
+ goto pdid_err;
+ return 0;
+ pdid_err:
+ c4iw_id_table_free(&rdev->resource.qid_table);
+ qid_err:
+ c4iw_id_table_free(&rdev->resource.tpt_table);
+ tpt_err:
+ return -ENOMEM;
+}
+
+/*
+ * returns 0 if no resource available
+ */
+u32 c4iw_get_resource(struct c4iw_id_table *id_table)
+{
+ u32 entry;
+ entry = c4iw_id_alloc(id_table);
+ if (entry == (u32)(-1))
+ return 0;
+ return entry;
+}
+
+void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry)
+{
+ PDBG("%s entry 0x%x\n", __func__, entry);
+ c4iw_id_free(id_table, entry);
+}
+
+u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+ u32 qid;
+ int i;
+
+ mutex_lock(&uctx->lock);
+ if (!list_empty(&uctx->cqids)) {
+ entry = list_entry(uctx->cqids.next, struct c4iw_qid_list,
+ entry);
+ list_del(&entry->entry);
+ qid = entry->qid;
+ kfree(entry);
+ } else {
+ qid = c4iw_get_resource(&rdev->resource.qid_table);
+ if (!qid)
+ goto out;
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.cur += rdev->qpmask + 1;
+ mutex_unlock(&rdev->stats.lock);
+ for (i = qid+1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->cqids);
+ }
+
+ /*
+ * now put the same ids on the qp list since they all
+ * map to the same db/gts page.
+ */
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = qid;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ for (i = qid+1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ }
+ }
+out:
+ mutex_unlock(&uctx->lock);
+ PDBG("%s qid 0x%x\n", __func__, qid);
+ mutex_lock(&rdev->stats.lock);
+ if (rdev->stats.qid.cur > rdev->stats.qid.max)
+ rdev->stats.qid.max = rdev->stats.qid.cur;
+ mutex_unlock(&rdev->stats.lock);
+ return qid;
+}
+
+void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return;
+ PDBG("%s qid 0x%x\n", __func__, qid);
+ entry->qid = qid;
+ mutex_lock(&uctx->lock);
+ list_add_tail(&entry->entry, &uctx->cqids);
+ mutex_unlock(&uctx->lock);
+}
+
+u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+ u32 qid;
+ int i;
+
+ mutex_lock(&uctx->lock);
+ if (!list_empty(&uctx->qpids)) {
+ entry = list_entry(uctx->qpids.next, struct c4iw_qid_list,
+ entry);
+ list_del(&entry->entry);
+ qid = entry->qid;
+ kfree(entry);
+ } else {
+ qid = c4iw_get_resource(&rdev->resource.qid_table);
+ if (!qid) {
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.fail++;
+ mutex_unlock(&rdev->stats.lock);
+ goto out;
+ }
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.cur += rdev->qpmask + 1;
+ mutex_unlock(&rdev->stats.lock);
+ for (i = qid+1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ }
+
+ /*
+ * now put the same ids on the cq list since they all
+ * map to the same db/gts page.
+ */
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = qid;
+ list_add_tail(&entry->entry, &uctx->cqids);
+ for (i = qid; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->cqids);
+ }
+ }
+out:
+ mutex_unlock(&uctx->lock);
+ PDBG("%s qid 0x%x\n", __func__, qid);
+ mutex_lock(&rdev->stats.lock);
+ if (rdev->stats.qid.cur > rdev->stats.qid.max)
+ rdev->stats.qid.max = rdev->stats.qid.cur;
+ mutex_unlock(&rdev->stats.lock);
+ return qid;
+}
+
+void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return;
+ PDBG("%s qid 0x%x\n", __func__, qid);
+ entry->qid = qid;
+ mutex_lock(&uctx->lock);
+ list_add_tail(&entry->entry, &uctx->qpids);
+ mutex_unlock(&uctx->lock);
+}
+
+void c4iw_destroy_resource(struct c4iw_resource *rscp)
+{
+ c4iw_id_table_free(&rscp->tpt_table);
+ c4iw_id_table_free(&rscp->qid_table);
+ c4iw_id_table_free(&rscp->pdid_table);
+}
+
+/*
+ * PBL Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
+
+u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
+ PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ mutex_lock(&rdev->stats.lock);
+ if (addr) {
+ rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
+ if (rdev->stats.pbl.cur > rdev->stats.pbl.max)
+ rdev->stats.pbl.max = rdev->stats.pbl.cur;
+ } else
+ rdev->stats.pbl.fail++;
+ mutex_unlock(&rdev->stats.lock);
+ return (u32)addr;
+}
+
+void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
+{
+ PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
+ mutex_unlock(&rdev->stats.lock);
+ gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size);
+}
+
+int c4iw_pblpool_create(struct c4iw_rdev *rdev)
+{
+ unsigned pbl_start, pbl_chunk, pbl_top;
+
+ rdev->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
+ if (!rdev->pbl_pool)
+ return -ENOMEM;
+
+ pbl_start = rdev->lldi.vr->pbl.start;
+ pbl_chunk = rdev->lldi.vr->pbl.size;
+ pbl_top = pbl_start + pbl_chunk;
+
+ while (pbl_start < pbl_top) {
+ pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk);
+ if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) {
+ PDBG("%s failed to add PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
+ printk(KERN_WARNING MOD
+ "Failed to add all PBL chunks (%x/%x)\n",
+ pbl_start,
+ pbl_top - pbl_start);
+ return 0;
+ }
+ pbl_chunk >>= 1;
+ } else {
+ PDBG("%s added PBL chunk (%x/%x)\n",
+ __func__, pbl_start, pbl_chunk);
+ pbl_start += pbl_chunk;
+ }
+ }
+
+ return 0;
+}
+
+void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
+{
+ gen_pool_destroy(rdev->pbl_pool);
+}
+
+/*
+ * RQT Memory Manager. Uses Linux generic allocator.
+ */
+
+#define MIN_RQT_SHIFT 10 /* 1KB == min RQT size (16 entries) */
+
+u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
+ PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ if (!addr)
+ pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
+ pci_name(rdev->lldi.pdev));
+ mutex_lock(&rdev->stats.lock);
+ if (addr) {
+ rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
+ if (rdev->stats.rqt.cur > rdev->stats.rqt.max)
+ rdev->stats.rqt.max = rdev->stats.rqt.cur;
+ } else
+ rdev->stats.rqt.fail++;
+ mutex_unlock(&rdev->stats.lock);
+ return (u32)addr;
+}
+
+void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
+{
+ PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
+ mutex_unlock(&rdev->stats.lock);
+ gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6);
+}
+
+int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
+{
+ unsigned rqt_start, rqt_chunk, rqt_top;
+
+ rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
+ if (!rdev->rqt_pool)
+ return -ENOMEM;
+
+ rqt_start = rdev->lldi.vr->rq.start;
+ rqt_chunk = rdev->lldi.vr->rq.size;
+ rqt_top = rqt_start + rqt_chunk;
+
+ while (rqt_start < rqt_top) {
+ rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk);
+ if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) {
+ PDBG("%s failed to add RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
+ if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) {
+ printk(KERN_WARNING MOD
+ "Failed to add all RQT chunks (%x/%x)\n",
+ rqt_start, rqt_top - rqt_start);
+ return 0;
+ }
+ rqt_chunk >>= 1;
+ } else {
+ PDBG("%s added RQT chunk (%x/%x)\n",
+ __func__, rqt_start, rqt_chunk);
+ rqt_start += rqt_chunk;
+ }
+ }
+ return 0;
+}
+
+void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
+{
+ gen_pool_destroy(rdev->rqt_pool);
+}
+
+/*
+ * On-Chip QP Memory.
+ */
+#define MIN_OCQP_SHIFT 12 /* 4KB == min ocqp size */
+
+u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size);
+ PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ if (addr) {
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT);
+ if (rdev->stats.ocqp.cur > rdev->stats.ocqp.max)
+ rdev->stats.ocqp.max = rdev->stats.ocqp.cur;
+ mutex_unlock(&rdev->stats.lock);
+ }
+ return (u32)addr;
+}
+
+void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size)
+{
+ PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT);
+ mutex_unlock(&rdev->stats.lock);
+ gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size);
+}
+
+int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev)
+{
+ unsigned start, chunk, top;
+
+ rdev->ocqp_pool = gen_pool_create(MIN_OCQP_SHIFT, -1);
+ if (!rdev->ocqp_pool)
+ return -ENOMEM;
+
+ start = rdev->lldi.vr->ocq.start;
+ chunk = rdev->lldi.vr->ocq.size;
+ top = start + chunk;
+
+ while (start < top) {
+ chunk = min(top - start + 1, chunk);
+ if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) {
+ PDBG("%s failed to add OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
+ if (chunk <= 1024 << MIN_OCQP_SHIFT) {
+ printk(KERN_WARNING MOD
+ "Failed to add all OCQP chunks (%x/%x)\n",
+ start, top - start);
+ return 0;
+ }
+ chunk >>= 1;
+ } else {
+ PDBG("%s added OCQP chunk (%x/%x)\n",
+ __func__, start, chunk);
+ start += chunk;
+ }
+ }
+ return 0;
+}
+
+void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev)
+{
+ gen_pool_destroy(rdev->ocqp_pool);
+}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
new file mode 100644
index 00000000000..68b0a6bf4eb
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -0,0 +1,680 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __T4_H__
+#define __T4_H__
+
+#include "t4_hw.h"
+#include "t4_regs.h"
+#include "t4_msg.h"
+#include "t4fw_ri_api.h"
+
+#define T4_MAX_NUM_QP 65536
+#define T4_MAX_NUM_CQ 65536
+#define T4_MAX_NUM_PD 65536
+#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1)
+#define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES)
+#define T4_MAX_IQ_SIZE (65520 - 1)
+#define T4_MAX_RQ_SIZE (8192 - T4_EQ_STATUS_ENTRIES)
+#define T4_MAX_SQ_SIZE (T4_MAX_EQ_SIZE - 1)
+#define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1)
+#define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1)
+#define T4_MAX_NUM_STAG (1<<15)
+#define T4_MAX_MR_SIZE (~0ULL)
+#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
+#define T4_STAG_UNSET 0xffffffff
+#define T4_FW_MAJ 0
+#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1)
+#define A_PCIE_MA_SYNC 0x30b4
+
+struct t4_status_page {
+ __be32 rsvd1; /* flit 0 - hw owns */
+ __be16 rsvd2;
+ __be16 qid;
+ __be16 cidx;
+ __be16 pidx;
+ u8 qp_err; /* flit 1 - sw owns */
+ u8 db_off;
+ u8 pad;
+ u16 host_wq_pidx;
+ u16 host_cidx;
+ u16 host_pidx;
+};
+
+#define T4_EQ_ENTRY_SIZE 64
+
+#define T4_SQ_NUM_SLOTS 5
+#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS)
+#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
+ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
+#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
+ sizeof(struct fw_ri_immd)))
+#define T4_MAX_WRITE_INLINE ((T4_SQ_NUM_BYTES - \
+ sizeof(struct fw_ri_rdma_write_wr) - \
+ sizeof(struct fw_ri_immd)))
+#define T4_MAX_WRITE_SGE ((T4_SQ_NUM_BYTES - \
+ sizeof(struct fw_ri_rdma_write_wr) - \
+ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
+#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \
+ sizeof(struct fw_ri_immd)) & ~31UL)
+#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
+#define T4_MAX_FR_DSGL 1024
+#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64))
+
+static inline int t4_max_fr_depth(int use_dsgl)
+{
+ return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH;
+}
+
+#define T4_RQ_NUM_SLOTS 2
+#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
+#define T4_MAX_RECV_SGE 4
+
+union t4_wr {
+ struct fw_ri_res_wr res;
+ struct fw_ri_wr ri;
+ struct fw_ri_rdma_write_wr write;
+ struct fw_ri_send_wr send;
+ struct fw_ri_rdma_read_wr read;
+ struct fw_ri_bind_mw_wr bind;
+ struct fw_ri_fr_nsmr_wr fr;
+ struct fw_ri_inv_lstag_wr inv;
+ struct t4_status_page status;
+ __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
+};
+
+union t4_recv_wr {
+ struct fw_ri_recv_wr recv;
+ struct t4_status_page status;
+ __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS];
+};
+
+static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid,
+ enum fw_wr_opcodes opcode, u8 flags, u8 len16)
+{
+ wqe->send.opcode = (u8)opcode;
+ wqe->send.flags = flags;
+ wqe->send.wrid = wrid;
+ wqe->send.r1[0] = 0;
+ wqe->send.r1[1] = 0;
+ wqe->send.r1[2] = 0;
+ wqe->send.len16 = len16;
+}
+
+/* CQE/AE status codes */
+#define T4_ERR_SUCCESS 0x0
+#define T4_ERR_STAG 0x1 /* STAG invalid: either the */
+ /* STAG is offlimt, being 0, */
+ /* or STAG_key mismatch */
+#define T4_ERR_PDID 0x2 /* PDID mismatch */
+#define T4_ERR_QPID 0x3 /* QPID mismatch */
+#define T4_ERR_ACCESS 0x4 /* Invalid access right */
+#define T4_ERR_WRAP 0x5 /* Wrap error */
+#define T4_ERR_BOUND 0x6 /* base and bounds voilation */
+#define T4_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
+ /* shared memory region */
+#define T4_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
+ /* shared memory region */
+#define T4_ERR_ECC 0x9 /* ECC error detected */
+#define T4_ERR_ECC_PSTAG 0xA /* ECC error detected when */
+ /* reading PSTAG for a MW */
+ /* Invalidate */
+#define T4_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
+ /* software error */
+#define T4_ERR_SWFLUSH 0xC /* SW FLUSHED */
+#define T4_ERR_CRC 0x10 /* CRC error */
+#define T4_ERR_MARKER 0x11 /* Marker error */
+#define T4_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
+#define T4_ERR_OUT_OF_RQE 0x13 /* out of RQE */
+#define T4_ERR_DDP_VERSION 0x14 /* wrong DDP version */
+#define T4_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
+#define T4_ERR_OPCODE 0x16 /* invalid rdma opcode */
+#define T4_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
+#define T4_ERR_MSN 0x18 /* MSN error */
+#define T4_ERR_TBIT 0x19 /* tag bit not set correctly */
+#define T4_ERR_MO 0x1A /* MO not 0 for TERMINATE */
+ /* or READ_REQ */
+#define T4_ERR_MSN_GAP 0x1B
+#define T4_ERR_MSN_RANGE 0x1C
+#define T4_ERR_IRD_OVERFLOW 0x1D
+#define T4_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
+ /* software error */
+#define T4_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
+ /* mismatch) */
+/*
+ * CQE defs
+ */
+struct t4_cqe {
+ __be32 header;
+ __be32 len;
+ union {
+ struct {
+ __be32 stag;
+ __be32 msn;
+ } rcqe;
+ struct {
+ u32 nada1;
+ u16 nada2;
+ u16 cidx;
+ } scqe;
+ struct {
+ __be32 wrid_hi;
+ __be32 wrid_low;
+ } gen;
+ } u;
+ __be64 reserved;
+ __be64 bits_type_ts;
+};
+
+/* macros for flit 0 of the cqe */
+
+#define S_CQE_QPID 12
+#define M_CQE_QPID 0xFFFFF
+#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
+#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
+
+#define S_CQE_SWCQE 11
+#define M_CQE_SWCQE 0x1
+#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
+#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
+
+#define S_CQE_STATUS 5
+#define M_CQE_STATUS 0x1F
+#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
+#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
+
+#define S_CQE_TYPE 4
+#define M_CQE_TYPE 0x1
+#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
+#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
+
+#define S_CQE_OPCODE 0
+#define M_CQE_OPCODE 0xF
+#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
+#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
+
+#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x)->header)))
+#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x)->header)))
+#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x)->header)))
+#define SQ_TYPE(x) (CQE_TYPE((x)))
+#define RQ_TYPE(x) (!CQE_TYPE((x)))
+#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x)->header)))
+#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x)->header)))
+
+#define CQE_SEND_OPCODE(x)( \
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND) || \
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE) || \
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_INV) || \
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE_INV))
+
+#define CQE_LEN(x) (be32_to_cpu((x)->len))
+
+/* used for RQ completion processing */
+#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag))
+#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn))
+
+/* used for SQ completion processing */
+#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx)
+
+/* generic accessor macros */
+#define CQE_WRID_HI(x) ((x)->u.gen.wrid_hi)
+#define CQE_WRID_LOW(x) ((x)->u.gen.wrid_low)
+
+/* macros for flit 3 of the cqe */
+#define S_CQE_GENBIT 63
+#define M_CQE_GENBIT 0x1
+#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
+#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
+
+#define S_CQE_OVFBIT 62
+#define M_CQE_OVFBIT 0x1
+#define G_CQE_OVFBIT(x) ((((x) >> S_CQE_OVFBIT)) & M_CQE_OVFBIT)
+
+#define S_CQE_IQTYPE 60
+#define M_CQE_IQTYPE 0x3
+#define G_CQE_IQTYPE(x) ((((x) >> S_CQE_IQTYPE)) & M_CQE_IQTYPE)
+
+#define M_CQE_TS 0x0fffffffffffffffULL
+#define G_CQE_TS(x) ((x) & M_CQE_TS)
+
+#define CQE_OVFBIT(x) ((unsigned)G_CQE_OVFBIT(be64_to_cpu((x)->bits_type_ts)))
+#define CQE_GENBIT(x) ((unsigned)G_CQE_GENBIT(be64_to_cpu((x)->bits_type_ts)))
+#define CQE_TS(x) (G_CQE_TS(be64_to_cpu((x)->bits_type_ts)))
+
+struct t4_swsqe {
+ u64 wr_id;
+ struct t4_cqe cqe;
+ int read_len;
+ int opcode;
+ int complete;
+ int signaled;
+ u16 idx;
+ int flushed;
+};
+
+static inline pgprot_t t4_pgprot_wc(pgprot_t prot)
+{
+#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
+ return pgprot_writecombine(prot);
+#else
+ return pgprot_noncached(prot);
+#endif
+}
+
+enum {
+ T4_SQ_ONCHIP = (1<<0),
+};
+
+struct t4_sq {
+ union t4_wr *queue;
+ dma_addr_t dma_addr;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ unsigned long phys_addr;
+ struct t4_swsqe *sw_sq;
+ struct t4_swsqe *oldest_read;
+ u64 __iomem *udb;
+ size_t memsize;
+ u32 qid;
+ u16 in_use;
+ u16 size;
+ u16 cidx;
+ u16 pidx;
+ u16 wq_pidx;
+ u16 wq_pidx_inc;
+ u16 flags;
+ short flush_cidx;
+};
+
+struct t4_swrqe {
+ u64 wr_id;
+};
+
+struct t4_rq {
+ union t4_recv_wr *queue;
+ dma_addr_t dma_addr;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ struct t4_swrqe *sw_rq;
+ u64 __iomem *udb;
+ size_t memsize;
+ u32 qid;
+ u32 msn;
+ u32 rqt_hwaddr;
+ u16 rqt_size;
+ u16 in_use;
+ u16 size;
+ u16 cidx;
+ u16 pidx;
+ u16 wq_pidx;
+ u16 wq_pidx_inc;
+};
+
+struct t4_wq {
+ struct t4_sq sq;
+ struct t4_rq rq;
+ void __iomem *db;
+ void __iomem *gts;
+ struct c4iw_rdev *rdev;
+ int flushed;
+};
+
+static inline int t4_rqes_posted(struct t4_wq *wq)
+{
+ return wq->rq.in_use;
+}
+
+static inline int t4_rq_empty(struct t4_wq *wq)
+{
+ return wq->rq.in_use == 0;
+}
+
+static inline int t4_rq_full(struct t4_wq *wq)
+{
+ return wq->rq.in_use == (wq->rq.size - 1);
+}
+
+static inline u32 t4_rq_avail(struct t4_wq *wq)
+{
+ return wq->rq.size - 1 - wq->rq.in_use;
+}
+
+static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
+{
+ wq->rq.in_use++;
+ if (++wq->rq.pidx == wq->rq.size)
+ wq->rq.pidx = 0;
+ wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS)
+ wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS;
+}
+
+static inline void t4_rq_consume(struct t4_wq *wq)
+{
+ wq->rq.in_use--;
+ wq->rq.msn++;
+ if (++wq->rq.cidx == wq->rq.size)
+ wq->rq.cidx = 0;
+}
+
+static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq)
+{
+ return wq->rq.queue[wq->rq.size].status.host_wq_pidx;
+}
+
+static inline u16 t4_rq_wq_size(struct t4_wq *wq)
+{
+ return wq->rq.size * T4_RQ_NUM_SLOTS;
+}
+
+static inline int t4_sq_onchip(struct t4_sq *sq)
+{
+ return sq->flags & T4_SQ_ONCHIP;
+}
+
+static inline int t4_sq_empty(struct t4_wq *wq)
+{
+ return wq->sq.in_use == 0;
+}
+
+static inline int t4_sq_full(struct t4_wq *wq)
+{
+ return wq->sq.in_use == (wq->sq.size - 1);
+}
+
+static inline u32 t4_sq_avail(struct t4_wq *wq)
+{
+ return wq->sq.size - 1 - wq->sq.in_use;
+}
+
+static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
+{
+ wq->sq.in_use++;
+ if (++wq->sq.pidx == wq->sq.size)
+ wq->sq.pidx = 0;
+ wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS)
+ wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS;
+}
+
+static inline void t4_sq_consume(struct t4_wq *wq)
+{
+ BUG_ON(wq->sq.in_use < 1);
+ if (wq->sq.cidx == wq->sq.flush_cidx)
+ wq->sq.flush_cidx = -1;
+ wq->sq.in_use--;
+ if (++wq->sq.cidx == wq->sq.size)
+ wq->sq.cidx = 0;
+}
+
+static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq)
+{
+ return wq->sq.queue[wq->sq.size].status.host_wq_pidx;
+}
+
+static inline u16 t4_sq_wq_size(struct t4_wq *wq)
+{
+ return wq->sq.size * T4_SQ_NUM_SLOTS;
+}
+
+/* This function copies 64 byte coalesced work request to memory
+ * mapped BAR2 space. For coalesced WRs, the SGE fetches data
+ * from the FIFO instead of from Host.
+ */
+static inline void pio_copy(u64 __iomem *dst, u64 *src)
+{
+ int count = 8;
+
+ while (count) {
+ writeq(*src, dst);
+ src++;
+ dst++;
+ count--;
+ }
+}
+
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
+ union t4_wr *wqe)
+{
+
+ /* Flush host queue memory writes. */
+ wmb();
+ if (t5) {
+ if (inc == 1 && wqe) {
+ PDBG("%s: WC wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
+ pio_copy(wq->sq.udb + 7, (void *)wqe);
+ } else {
+ PDBG("%s: DB wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
+ writel(PIDX_T5(inc), wq->sq.udb);
+ }
+
+ /* Flush user doorbell area writes. */
+ wmb();
+ return;
+ }
+ writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
+}
+
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
+ union t4_recv_wr *wqe)
+{
+
+ /* Flush host queue memory writes. */
+ wmb();
+ if (t5) {
+ if (inc == 1 && wqe) {
+ PDBG("%s: WC wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
+ pio_copy(wq->rq.udb + 7, (void *)wqe);
+ } else {
+ PDBG("%s: DB wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
+ writel(PIDX_T5(inc), wq->rq.udb);
+ }
+
+ /* Flush user doorbell area writes. */
+ wmb();
+ return;
+ }
+ writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
+}
+
+static inline int t4_wq_in_error(struct t4_wq *wq)
+{
+ return wq->rq.queue[wq->rq.size].status.qp_err;
+}
+
+static inline void t4_set_wq_in_error(struct t4_wq *wq)
+{
+ wq->rq.queue[wq->rq.size].status.qp_err = 1;
+}
+
+static inline void t4_disable_wq_db(struct t4_wq *wq)
+{
+ wq->rq.queue[wq->rq.size].status.db_off = 1;
+}
+
+static inline void t4_enable_wq_db(struct t4_wq *wq)
+{
+ wq->rq.queue[wq->rq.size].status.db_off = 0;
+}
+
+static inline int t4_wq_db_enabled(struct t4_wq *wq)
+{
+ return !wq->rq.queue[wq->rq.size].status.db_off;
+}
+
+struct t4_cq {
+ struct t4_cqe *queue;
+ dma_addr_t dma_addr;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ struct t4_cqe *sw_queue;
+ void __iomem *gts;
+ struct c4iw_rdev *rdev;
+ u64 ugts;
+ size_t memsize;
+ __be64 bits_type_ts;
+ u32 cqid;
+ int vector;
+ u16 size; /* including status page */
+ u16 cidx;
+ u16 sw_pidx;
+ u16 sw_cidx;
+ u16 sw_in_use;
+ u16 cidx_inc;
+ u8 gen;
+ u8 error;
+};
+
+static inline int t4_arm_cq(struct t4_cq *cq, int se)
+{
+ u32 val;
+
+ while (cq->cidx_inc > CIDXINC_MASK) {
+ val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
+ cq->cidx_inc -= CIDXINC_MASK;
+ }
+ val = SEINTARM(se) | CIDXINC(cq->cidx_inc) | TIMERREG(6) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
+ cq->cidx_inc = 0;
+ return 0;
+}
+
+static inline void t4_swcq_produce(struct t4_cq *cq)
+{
+ cq->sw_in_use++;
+ if (cq->sw_in_use == cq->size) {
+ PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ cq->error = 1;
+ BUG_ON(1);
+ }
+ if (++cq->sw_pidx == cq->size)
+ cq->sw_pidx = 0;
+}
+
+static inline void t4_swcq_consume(struct t4_cq *cq)
+{
+ BUG_ON(cq->sw_in_use < 1);
+ cq->sw_in_use--;
+ if (++cq->sw_cidx == cq->size)
+ cq->sw_cidx = 0;
+}
+
+static inline void t4_hwcq_consume(struct t4_cq *cq)
+{
+ cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
+ if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_MASK) {
+ u32 val;
+
+ val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
+ cq->cidx_inc = 0;
+ }
+ if (++cq->cidx == cq->size) {
+ cq->cidx = 0;
+ cq->gen ^= 1;
+ }
+}
+
+static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe)
+{
+ return (CQE_GENBIT(cqe) == cq->gen);
+}
+
+static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
+{
+ int ret;
+ u16 prev_cidx;
+
+ if (cq->cidx == 0)
+ prev_cidx = cq->size - 1;
+ else
+ prev_cidx = cq->cidx - 1;
+
+ if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) {
+ ret = -EOVERFLOW;
+ cq->error = 1;
+ printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+ BUG_ON(1);
+ } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
+
+ /* Ensure CQE is flushed to memory */
+ rmb();
+ *cqe = &cq->queue[cq->cidx];
+ ret = 0;
+ } else
+ ret = -ENODATA;
+ return ret;
+}
+
+static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
+{
+ if (cq->sw_in_use == cq->size) {
+ PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ cq->error = 1;
+ BUG_ON(1);
+ return NULL;
+ }
+ if (cq->sw_in_use)
+ return &cq->sw_queue[cq->sw_cidx];
+ return NULL;
+}
+
+static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
+{
+ int ret = 0;
+
+ if (cq->error)
+ ret = -ENODATA;
+ else if (cq->sw_in_use)
+ *cqe = &cq->sw_queue[cq->sw_cidx];
+ else
+ ret = t4_next_hw_cqe(cq, cqe);
+ return ret;
+}
+
+static inline int t4_cq_in_error(struct t4_cq *cq)
+{
+ return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err;
+}
+
+static inline void t4_set_cq_in_error(struct t4_cq *cq)
+{
+ ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
+}
+#endif
+
+struct t4_dev_status_page {
+ u8 db_off;
+};
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
new file mode 100644
index 00000000000..91289a051af
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -0,0 +1,854 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _T4FW_RI_API_H_
+#define _T4FW_RI_API_H_
+
+#include "t4fw_api.h"
+
+enum fw_ri_wr_opcode {
+ FW_RI_RDMA_WRITE = 0x0, /* IETF RDMAP v1.0 ... */
+ FW_RI_READ_REQ = 0x1,
+ FW_RI_READ_RESP = 0x2,
+ FW_RI_SEND = 0x3,
+ FW_RI_SEND_WITH_INV = 0x4,
+ FW_RI_SEND_WITH_SE = 0x5,
+ FW_RI_SEND_WITH_SE_INV = 0x6,
+ FW_RI_TERMINATE = 0x7,
+ FW_RI_RDMA_INIT = 0x8, /* CHELSIO RI specific ... */
+ FW_RI_BIND_MW = 0x9,
+ FW_RI_FAST_REGISTER = 0xa,
+ FW_RI_LOCAL_INV = 0xb,
+ FW_RI_QP_MODIFY = 0xc,
+ FW_RI_BYPASS = 0xd,
+ FW_RI_RECEIVE = 0xe,
+
+ FW_RI_SGE_EC_CR_RETURN = 0xf
+};
+
+enum fw_ri_wr_flags {
+ FW_RI_COMPLETION_FLAG = 0x01,
+ FW_RI_NOTIFICATION_FLAG = 0x02,
+ FW_RI_SOLICITED_EVENT_FLAG = 0x04,
+ FW_RI_READ_FENCE_FLAG = 0x08,
+ FW_RI_LOCAL_FENCE_FLAG = 0x10,
+ FW_RI_RDMA_READ_INVALIDATE = 0x20
+};
+
+enum fw_ri_mpa_attrs {
+ FW_RI_MPA_RX_MARKER_ENABLE = 0x01,
+ FW_RI_MPA_TX_MARKER_ENABLE = 0x02,
+ FW_RI_MPA_CRC_ENABLE = 0x04,
+ FW_RI_MPA_IETF_ENABLE = 0x08
+};
+
+enum fw_ri_qp_caps {
+ FW_RI_QP_RDMA_READ_ENABLE = 0x01,
+ FW_RI_QP_RDMA_WRITE_ENABLE = 0x02,
+ FW_RI_QP_BIND_ENABLE = 0x04,
+ FW_RI_QP_FAST_REGISTER_ENABLE = 0x08,
+ FW_RI_QP_STAG0_ENABLE = 0x10
+};
+
+enum fw_ri_addr_type {
+ FW_RI_ZERO_BASED_TO = 0x00,
+ FW_RI_VA_BASED_TO = 0x01
+};
+
+enum fw_ri_mem_perms {
+ FW_RI_MEM_ACCESS_REM_WRITE = 0x01,
+ FW_RI_MEM_ACCESS_REM_READ = 0x02,
+ FW_RI_MEM_ACCESS_REM = 0x03,
+ FW_RI_MEM_ACCESS_LOCAL_WRITE = 0x04,
+ FW_RI_MEM_ACCESS_LOCAL_READ = 0x08,
+ FW_RI_MEM_ACCESS_LOCAL = 0x0C
+};
+
+enum fw_ri_stag_type {
+ FW_RI_STAG_NSMR = 0x00,
+ FW_RI_STAG_SMR = 0x01,
+ FW_RI_STAG_MW = 0x02,
+ FW_RI_STAG_MW_RELAXED = 0x03
+};
+
+enum fw_ri_data_op {
+ FW_RI_DATA_IMMD = 0x81,
+ FW_RI_DATA_DSGL = 0x82,
+ FW_RI_DATA_ISGL = 0x83
+};
+
+enum fw_ri_sgl_depth {
+ FW_RI_SGL_DEPTH_MAX_SQ = 16,
+ FW_RI_SGL_DEPTH_MAX_RQ = 4
+};
+
+struct fw_ri_dsge_pair {
+ __be32 len[2];
+ __be64 addr[2];
+};
+
+struct fw_ri_dsgl {
+ __u8 op;
+ __u8 r1;
+ __be16 nsge;
+ __be32 len0;
+ __be64 addr0;
+#ifndef C99_NOT_SUPPORTED
+ struct fw_ri_dsge_pair sge[0];
+#endif
+};
+
+struct fw_ri_sge {
+ __be32 stag;
+ __be32 len;
+ __be64 to;
+};
+
+struct fw_ri_isgl {
+ __u8 op;
+ __u8 r1;
+ __be16 nsge;
+ __be32 r2;
+#ifndef C99_NOT_SUPPORTED
+ struct fw_ri_sge sge[0];
+#endif
+};
+
+struct fw_ri_immd {
+ __u8 op;
+ __u8 r1;
+ __be16 r2;
+ __be32 immdlen;
+#ifndef C99_NOT_SUPPORTED
+ __u8 data[0];
+#endif
+};
+
+struct fw_ri_tpte {
+ __be32 valid_to_pdid;
+ __be32 locread_to_qpid;
+ __be32 nosnoop_pbladdr;
+ __be32 len_lo;
+ __be32 va_hi;
+ __be32 va_lo_fbo;
+ __be32 dca_mwbcnt_pstag;
+ __be32 len_hi;
+};
+
+#define S_FW_RI_TPTE_VALID 31
+#define M_FW_RI_TPTE_VALID 0x1
+#define V_FW_RI_TPTE_VALID(x) ((x) << S_FW_RI_TPTE_VALID)
+#define G_FW_RI_TPTE_VALID(x) \
+ (((x) >> S_FW_RI_TPTE_VALID) & M_FW_RI_TPTE_VALID)
+#define F_FW_RI_TPTE_VALID V_FW_RI_TPTE_VALID(1U)
+
+#define S_FW_RI_TPTE_STAGKEY 23
+#define M_FW_RI_TPTE_STAGKEY 0xff
+#define V_FW_RI_TPTE_STAGKEY(x) ((x) << S_FW_RI_TPTE_STAGKEY)
+#define G_FW_RI_TPTE_STAGKEY(x) \
+ (((x) >> S_FW_RI_TPTE_STAGKEY) & M_FW_RI_TPTE_STAGKEY)
+
+#define S_FW_RI_TPTE_STAGSTATE 22
+#define M_FW_RI_TPTE_STAGSTATE 0x1
+#define V_FW_RI_TPTE_STAGSTATE(x) ((x) << S_FW_RI_TPTE_STAGSTATE)
+#define G_FW_RI_TPTE_STAGSTATE(x) \
+ (((x) >> S_FW_RI_TPTE_STAGSTATE) & M_FW_RI_TPTE_STAGSTATE)
+#define F_FW_RI_TPTE_STAGSTATE V_FW_RI_TPTE_STAGSTATE(1U)
+
+#define S_FW_RI_TPTE_STAGTYPE 20
+#define M_FW_RI_TPTE_STAGTYPE 0x3
+#define V_FW_RI_TPTE_STAGTYPE(x) ((x) << S_FW_RI_TPTE_STAGTYPE)
+#define G_FW_RI_TPTE_STAGTYPE(x) \
+ (((x) >> S_FW_RI_TPTE_STAGTYPE) & M_FW_RI_TPTE_STAGTYPE)
+
+#define S_FW_RI_TPTE_PDID 0
+#define M_FW_RI_TPTE_PDID 0xfffff
+#define V_FW_RI_TPTE_PDID(x) ((x) << S_FW_RI_TPTE_PDID)
+#define G_FW_RI_TPTE_PDID(x) \
+ (((x) >> S_FW_RI_TPTE_PDID) & M_FW_RI_TPTE_PDID)
+
+#define S_FW_RI_TPTE_PERM 28
+#define M_FW_RI_TPTE_PERM 0xf
+#define V_FW_RI_TPTE_PERM(x) ((x) << S_FW_RI_TPTE_PERM)
+#define G_FW_RI_TPTE_PERM(x) \
+ (((x) >> S_FW_RI_TPTE_PERM) & M_FW_RI_TPTE_PERM)
+
+#define S_FW_RI_TPTE_REMINVDIS 27
+#define M_FW_RI_TPTE_REMINVDIS 0x1
+#define V_FW_RI_TPTE_REMINVDIS(x) ((x) << S_FW_RI_TPTE_REMINVDIS)
+#define G_FW_RI_TPTE_REMINVDIS(x) \
+ (((x) >> S_FW_RI_TPTE_REMINVDIS) & M_FW_RI_TPTE_REMINVDIS)
+#define F_FW_RI_TPTE_REMINVDIS V_FW_RI_TPTE_REMINVDIS(1U)
+
+#define S_FW_RI_TPTE_ADDRTYPE 26
+#define M_FW_RI_TPTE_ADDRTYPE 1
+#define V_FW_RI_TPTE_ADDRTYPE(x) ((x) << S_FW_RI_TPTE_ADDRTYPE)
+#define G_FW_RI_TPTE_ADDRTYPE(x) \
+ (((x) >> S_FW_RI_TPTE_ADDRTYPE) & M_FW_RI_TPTE_ADDRTYPE)
+#define F_FW_RI_TPTE_ADDRTYPE V_FW_RI_TPTE_ADDRTYPE(1U)
+
+#define S_FW_RI_TPTE_MWBINDEN 25
+#define M_FW_RI_TPTE_MWBINDEN 0x1
+#define V_FW_RI_TPTE_MWBINDEN(x) ((x) << S_FW_RI_TPTE_MWBINDEN)
+#define G_FW_RI_TPTE_MWBINDEN(x) \
+ (((x) >> S_FW_RI_TPTE_MWBINDEN) & M_FW_RI_TPTE_MWBINDEN)
+#define F_FW_RI_TPTE_MWBINDEN V_FW_RI_TPTE_MWBINDEN(1U)
+
+#define S_FW_RI_TPTE_PS 20
+#define M_FW_RI_TPTE_PS 0x1f
+#define V_FW_RI_TPTE_PS(x) ((x) << S_FW_RI_TPTE_PS)
+#define G_FW_RI_TPTE_PS(x) \
+ (((x) >> S_FW_RI_TPTE_PS) & M_FW_RI_TPTE_PS)
+
+#define S_FW_RI_TPTE_QPID 0
+#define M_FW_RI_TPTE_QPID 0xfffff
+#define V_FW_RI_TPTE_QPID(x) ((x) << S_FW_RI_TPTE_QPID)
+#define G_FW_RI_TPTE_QPID(x) \
+ (((x) >> S_FW_RI_TPTE_QPID) & M_FW_RI_TPTE_QPID)
+
+#define S_FW_RI_TPTE_NOSNOOP 30
+#define M_FW_RI_TPTE_NOSNOOP 0x1
+#define V_FW_RI_TPTE_NOSNOOP(x) ((x) << S_FW_RI_TPTE_NOSNOOP)
+#define G_FW_RI_TPTE_NOSNOOP(x) \
+ (((x) >> S_FW_RI_TPTE_NOSNOOP) & M_FW_RI_TPTE_NOSNOOP)
+#define F_FW_RI_TPTE_NOSNOOP V_FW_RI_TPTE_NOSNOOP(1U)
+
+#define S_FW_RI_TPTE_PBLADDR 0
+#define M_FW_RI_TPTE_PBLADDR 0x1fffffff
+#define V_FW_RI_TPTE_PBLADDR(x) ((x) << S_FW_RI_TPTE_PBLADDR)
+#define G_FW_RI_TPTE_PBLADDR(x) \
+ (((x) >> S_FW_RI_TPTE_PBLADDR) & M_FW_RI_TPTE_PBLADDR)
+
+#define S_FW_RI_TPTE_DCA 24
+#define M_FW_RI_TPTE_DCA 0x1f
+#define V_FW_RI_TPTE_DCA(x) ((x) << S_FW_RI_TPTE_DCA)
+#define G_FW_RI_TPTE_DCA(x) \
+ (((x) >> S_FW_RI_TPTE_DCA) & M_FW_RI_TPTE_DCA)
+
+#define S_FW_RI_TPTE_MWBCNT_PSTAG 0
+#define M_FW_RI_TPTE_MWBCNT_PSTAG 0xffffff
+#define V_FW_RI_TPTE_MWBCNT_PSTAT(x) \
+ ((x) << S_FW_RI_TPTE_MWBCNT_PSTAG)
+#define G_FW_RI_TPTE_MWBCNT_PSTAG(x) \
+ (((x) >> S_FW_RI_TPTE_MWBCNT_PSTAG) & M_FW_RI_TPTE_MWBCNT_PSTAG)
+
+enum fw_ri_res_type {
+ FW_RI_RES_TYPE_SQ,
+ FW_RI_RES_TYPE_RQ,
+ FW_RI_RES_TYPE_CQ,
+};
+
+enum fw_ri_res_op {
+ FW_RI_RES_OP_WRITE,
+ FW_RI_RES_OP_RESET,
+};
+
+struct fw_ri_res {
+ union fw_ri_restype {
+ struct fw_ri_res_sqrq {
+ __u8 restype;
+ __u8 op;
+ __be16 r3;
+ __be32 eqid;
+ __be32 r4[2];
+ __be32 fetchszm_to_iqid;
+ __be32 dcaen_to_eqsize;
+ __be64 eqaddr;
+ } sqrq;
+ struct fw_ri_res_cq {
+ __u8 restype;
+ __u8 op;
+ __be16 r3;
+ __be32 iqid;
+ __be32 r4[2];
+ __be32 iqandst_to_iqandstindex;
+ __be16 iqdroprss_to_iqesize;
+ __be16 iqsize;
+ __be64 iqaddr;
+ __be32 iqns_iqro;
+ __be32 r6_lo;
+ __be64 r7;
+ } cq;
+ } u;
+};
+
+struct fw_ri_res_wr {
+ __be32 op_nres;
+ __be32 len16_pkd;
+ __u64 cookie;
+#ifndef C99_NOT_SUPPORTED
+ struct fw_ri_res res[0];
+#endif
+};
+
+#define S_FW_RI_RES_WR_NRES 0
+#define M_FW_RI_RES_WR_NRES 0xff
+#define V_FW_RI_RES_WR_NRES(x) ((x) << S_FW_RI_RES_WR_NRES)
+#define G_FW_RI_RES_WR_NRES(x) \
+ (((x) >> S_FW_RI_RES_WR_NRES) & M_FW_RI_RES_WR_NRES)
+
+#define S_FW_RI_RES_WR_FETCHSZM 26
+#define M_FW_RI_RES_WR_FETCHSZM 0x1
+#define V_FW_RI_RES_WR_FETCHSZM(x) ((x) << S_FW_RI_RES_WR_FETCHSZM)
+#define G_FW_RI_RES_WR_FETCHSZM(x) \
+ (((x) >> S_FW_RI_RES_WR_FETCHSZM) & M_FW_RI_RES_WR_FETCHSZM)
+#define F_FW_RI_RES_WR_FETCHSZM V_FW_RI_RES_WR_FETCHSZM(1U)
+
+#define S_FW_RI_RES_WR_STATUSPGNS 25
+#define M_FW_RI_RES_WR_STATUSPGNS 0x1
+#define V_FW_RI_RES_WR_STATUSPGNS(x) ((x) << S_FW_RI_RES_WR_STATUSPGNS)
+#define G_FW_RI_RES_WR_STATUSPGNS(x) \
+ (((x) >> S_FW_RI_RES_WR_STATUSPGNS) & M_FW_RI_RES_WR_STATUSPGNS)
+#define F_FW_RI_RES_WR_STATUSPGNS V_FW_RI_RES_WR_STATUSPGNS(1U)
+
+#define S_FW_RI_RES_WR_STATUSPGRO 24
+#define M_FW_RI_RES_WR_STATUSPGRO 0x1
+#define V_FW_RI_RES_WR_STATUSPGRO(x) ((x) << S_FW_RI_RES_WR_STATUSPGRO)
+#define G_FW_RI_RES_WR_STATUSPGRO(x) \
+ (((x) >> S_FW_RI_RES_WR_STATUSPGRO) & M_FW_RI_RES_WR_STATUSPGRO)
+#define F_FW_RI_RES_WR_STATUSPGRO V_FW_RI_RES_WR_STATUSPGRO(1U)
+
+#define S_FW_RI_RES_WR_FETCHNS 23
+#define M_FW_RI_RES_WR_FETCHNS 0x1
+#define V_FW_RI_RES_WR_FETCHNS(x) ((x) << S_FW_RI_RES_WR_FETCHNS)
+#define G_FW_RI_RES_WR_FETCHNS(x) \
+ (((x) >> S_FW_RI_RES_WR_FETCHNS) & M_FW_RI_RES_WR_FETCHNS)
+#define F_FW_RI_RES_WR_FETCHNS V_FW_RI_RES_WR_FETCHNS(1U)
+
+#define S_FW_RI_RES_WR_FETCHRO 22
+#define M_FW_RI_RES_WR_FETCHRO 0x1
+#define V_FW_RI_RES_WR_FETCHRO(x) ((x) << S_FW_RI_RES_WR_FETCHRO)
+#define G_FW_RI_RES_WR_FETCHRO(x) \
+ (((x) >> S_FW_RI_RES_WR_FETCHRO) & M_FW_RI_RES_WR_FETCHRO)
+#define F_FW_RI_RES_WR_FETCHRO V_FW_RI_RES_WR_FETCHRO(1U)
+
+#define S_FW_RI_RES_WR_HOSTFCMODE 20
+#define M_FW_RI_RES_WR_HOSTFCMODE 0x3
+#define V_FW_RI_RES_WR_HOSTFCMODE(x) ((x) << S_FW_RI_RES_WR_HOSTFCMODE)
+#define G_FW_RI_RES_WR_HOSTFCMODE(x) \
+ (((x) >> S_FW_RI_RES_WR_HOSTFCMODE) & M_FW_RI_RES_WR_HOSTFCMODE)
+
+#define S_FW_RI_RES_WR_CPRIO 19
+#define M_FW_RI_RES_WR_CPRIO 0x1
+#define V_FW_RI_RES_WR_CPRIO(x) ((x) << S_FW_RI_RES_WR_CPRIO)
+#define G_FW_RI_RES_WR_CPRIO(x) \
+ (((x) >> S_FW_RI_RES_WR_CPRIO) & M_FW_RI_RES_WR_CPRIO)
+#define F_FW_RI_RES_WR_CPRIO V_FW_RI_RES_WR_CPRIO(1U)
+
+#define S_FW_RI_RES_WR_ONCHIP 18
+#define M_FW_RI_RES_WR_ONCHIP 0x1
+#define V_FW_RI_RES_WR_ONCHIP(x) ((x) << S_FW_RI_RES_WR_ONCHIP)
+#define G_FW_RI_RES_WR_ONCHIP(x) \
+ (((x) >> S_FW_RI_RES_WR_ONCHIP) & M_FW_RI_RES_WR_ONCHIP)
+#define F_FW_RI_RES_WR_ONCHIP V_FW_RI_RES_WR_ONCHIP(1U)
+
+#define S_FW_RI_RES_WR_PCIECHN 16
+#define M_FW_RI_RES_WR_PCIECHN 0x3
+#define V_FW_RI_RES_WR_PCIECHN(x) ((x) << S_FW_RI_RES_WR_PCIECHN)
+#define G_FW_RI_RES_WR_PCIECHN(x) \
+ (((x) >> S_FW_RI_RES_WR_PCIECHN) & M_FW_RI_RES_WR_PCIECHN)
+
+#define S_FW_RI_RES_WR_IQID 0
+#define M_FW_RI_RES_WR_IQID 0xffff
+#define V_FW_RI_RES_WR_IQID(x) ((x) << S_FW_RI_RES_WR_IQID)
+#define G_FW_RI_RES_WR_IQID(x) \
+ (((x) >> S_FW_RI_RES_WR_IQID) & M_FW_RI_RES_WR_IQID)
+
+#define S_FW_RI_RES_WR_DCAEN 31
+#define M_FW_RI_RES_WR_DCAEN 0x1
+#define V_FW_RI_RES_WR_DCAEN(x) ((x) << S_FW_RI_RES_WR_DCAEN)
+#define G_FW_RI_RES_WR_DCAEN(x) \
+ (((x) >> S_FW_RI_RES_WR_DCAEN) & M_FW_RI_RES_WR_DCAEN)
+#define F_FW_RI_RES_WR_DCAEN V_FW_RI_RES_WR_DCAEN(1U)
+
+#define S_FW_RI_RES_WR_DCACPU 26
+#define M_FW_RI_RES_WR_DCACPU 0x1f
+#define V_FW_RI_RES_WR_DCACPU(x) ((x) << S_FW_RI_RES_WR_DCACPU)
+#define G_FW_RI_RES_WR_DCACPU(x) \
+ (((x) >> S_FW_RI_RES_WR_DCACPU) & M_FW_RI_RES_WR_DCACPU)
+
+#define S_FW_RI_RES_WR_FBMIN 23
+#define M_FW_RI_RES_WR_FBMIN 0x7
+#define V_FW_RI_RES_WR_FBMIN(x) ((x) << S_FW_RI_RES_WR_FBMIN)
+#define G_FW_RI_RES_WR_FBMIN(x) \
+ (((x) >> S_FW_RI_RES_WR_FBMIN) & M_FW_RI_RES_WR_FBMIN)
+
+#define S_FW_RI_RES_WR_FBMAX 20
+#define M_FW_RI_RES_WR_FBMAX 0x7
+#define V_FW_RI_RES_WR_FBMAX(x) ((x) << S_FW_RI_RES_WR_FBMAX)
+#define G_FW_RI_RES_WR_FBMAX(x) \
+ (((x) >> S_FW_RI_RES_WR_FBMAX) & M_FW_RI_RES_WR_FBMAX)
+
+#define S_FW_RI_RES_WR_CIDXFTHRESHO 19
+#define M_FW_RI_RES_WR_CIDXFTHRESHO 0x1
+#define V_FW_RI_RES_WR_CIDXFTHRESHO(x) ((x) << S_FW_RI_RES_WR_CIDXFTHRESHO)
+#define G_FW_RI_RES_WR_CIDXFTHRESHO(x) \
+ (((x) >> S_FW_RI_RES_WR_CIDXFTHRESHO) & M_FW_RI_RES_WR_CIDXFTHRESHO)
+#define F_FW_RI_RES_WR_CIDXFTHRESHO V_FW_RI_RES_WR_CIDXFTHRESHO(1U)
+
+#define S_FW_RI_RES_WR_CIDXFTHRESH 16
+#define M_FW_RI_RES_WR_CIDXFTHRESH 0x7
+#define V_FW_RI_RES_WR_CIDXFTHRESH(x) ((x) << S_FW_RI_RES_WR_CIDXFTHRESH)
+#define G_FW_RI_RES_WR_CIDXFTHRESH(x) \
+ (((x) >> S_FW_RI_RES_WR_CIDXFTHRESH) & M_FW_RI_RES_WR_CIDXFTHRESH)
+
+#define S_FW_RI_RES_WR_EQSIZE 0
+#define M_FW_RI_RES_WR_EQSIZE 0xffff
+#define V_FW_RI_RES_WR_EQSIZE(x) ((x) << S_FW_RI_RES_WR_EQSIZE)
+#define G_FW_RI_RES_WR_EQSIZE(x) \
+ (((x) >> S_FW_RI_RES_WR_EQSIZE) & M_FW_RI_RES_WR_EQSIZE)
+
+#define S_FW_RI_RES_WR_IQANDST 15
+#define M_FW_RI_RES_WR_IQANDST 0x1
+#define V_FW_RI_RES_WR_IQANDST(x) ((x) << S_FW_RI_RES_WR_IQANDST)
+#define G_FW_RI_RES_WR_IQANDST(x) \
+ (((x) >> S_FW_RI_RES_WR_IQANDST) & M_FW_RI_RES_WR_IQANDST)
+#define F_FW_RI_RES_WR_IQANDST V_FW_RI_RES_WR_IQANDST(1U)
+
+#define S_FW_RI_RES_WR_IQANUS 14
+#define M_FW_RI_RES_WR_IQANUS 0x1
+#define V_FW_RI_RES_WR_IQANUS(x) ((x) << S_FW_RI_RES_WR_IQANUS)
+#define G_FW_RI_RES_WR_IQANUS(x) \
+ (((x) >> S_FW_RI_RES_WR_IQANUS) & M_FW_RI_RES_WR_IQANUS)
+#define F_FW_RI_RES_WR_IQANUS V_FW_RI_RES_WR_IQANUS(1U)
+
+#define S_FW_RI_RES_WR_IQANUD 12
+#define M_FW_RI_RES_WR_IQANUD 0x3
+#define V_FW_RI_RES_WR_IQANUD(x) ((x) << S_FW_RI_RES_WR_IQANUD)
+#define G_FW_RI_RES_WR_IQANUD(x) \
+ (((x) >> S_FW_RI_RES_WR_IQANUD) & M_FW_RI_RES_WR_IQANUD)
+
+#define S_FW_RI_RES_WR_IQANDSTINDEX 0
+#define M_FW_RI_RES_WR_IQANDSTINDEX 0xfff
+#define V_FW_RI_RES_WR_IQANDSTINDEX(x) ((x) << S_FW_RI_RES_WR_IQANDSTINDEX)
+#define G_FW_RI_RES_WR_IQANDSTINDEX(x) \
+ (((x) >> S_FW_RI_RES_WR_IQANDSTINDEX) & M_FW_RI_RES_WR_IQANDSTINDEX)
+
+#define S_FW_RI_RES_WR_IQDROPRSS 15
+#define M_FW_RI_RES_WR_IQDROPRSS 0x1
+#define V_FW_RI_RES_WR_IQDROPRSS(x) ((x) << S_FW_RI_RES_WR_IQDROPRSS)
+#define G_FW_RI_RES_WR_IQDROPRSS(x) \
+ (((x) >> S_FW_RI_RES_WR_IQDROPRSS) & M_FW_RI_RES_WR_IQDROPRSS)
+#define F_FW_RI_RES_WR_IQDROPRSS V_FW_RI_RES_WR_IQDROPRSS(1U)
+
+#define S_FW_RI_RES_WR_IQGTSMODE 14
+#define M_FW_RI_RES_WR_IQGTSMODE 0x1
+#define V_FW_RI_RES_WR_IQGTSMODE(x) ((x) << S_FW_RI_RES_WR_IQGTSMODE)
+#define G_FW_RI_RES_WR_IQGTSMODE(x) \
+ (((x) >> S_FW_RI_RES_WR_IQGTSMODE) & M_FW_RI_RES_WR_IQGTSMODE)
+#define F_FW_RI_RES_WR_IQGTSMODE V_FW_RI_RES_WR_IQGTSMODE(1U)
+
+#define S_FW_RI_RES_WR_IQPCIECH 12
+#define M_FW_RI_RES_WR_IQPCIECH 0x3
+#define V_FW_RI_RES_WR_IQPCIECH(x) ((x) << S_FW_RI_RES_WR_IQPCIECH)
+#define G_FW_RI_RES_WR_IQPCIECH(x) \
+ (((x) >> S_FW_RI_RES_WR_IQPCIECH) & M_FW_RI_RES_WR_IQPCIECH)
+
+#define S_FW_RI_RES_WR_IQDCAEN 11
+#define M_FW_RI_RES_WR_IQDCAEN 0x1
+#define V_FW_RI_RES_WR_IQDCAEN(x) ((x) << S_FW_RI_RES_WR_IQDCAEN)
+#define G_FW_RI_RES_WR_IQDCAEN(x) \
+ (((x) >> S_FW_RI_RES_WR_IQDCAEN) & M_FW_RI_RES_WR_IQDCAEN)
+#define F_FW_RI_RES_WR_IQDCAEN V_FW_RI_RES_WR_IQDCAEN(1U)
+
+#define S_FW_RI_RES_WR_IQDCACPU 6
+#define M_FW_RI_RES_WR_IQDCACPU 0x1f
+#define V_FW_RI_RES_WR_IQDCACPU(x) ((x) << S_FW_RI_RES_WR_IQDCACPU)
+#define G_FW_RI_RES_WR_IQDCACPU(x) \
+ (((x) >> S_FW_RI_RES_WR_IQDCACPU) & M_FW_RI_RES_WR_IQDCACPU)
+
+#define S_FW_RI_RES_WR_IQINTCNTTHRESH 4
+#define M_FW_RI_RES_WR_IQINTCNTTHRESH 0x3
+#define V_FW_RI_RES_WR_IQINTCNTTHRESH(x) \
+ ((x) << S_FW_RI_RES_WR_IQINTCNTTHRESH)
+#define G_FW_RI_RES_WR_IQINTCNTTHRESH(x) \
+ (((x) >> S_FW_RI_RES_WR_IQINTCNTTHRESH) & M_FW_RI_RES_WR_IQINTCNTTHRESH)
+
+#define S_FW_RI_RES_WR_IQO 3
+#define M_FW_RI_RES_WR_IQO 0x1
+#define V_FW_RI_RES_WR_IQO(x) ((x) << S_FW_RI_RES_WR_IQO)
+#define G_FW_RI_RES_WR_IQO(x) \
+ (((x) >> S_FW_RI_RES_WR_IQO) & M_FW_RI_RES_WR_IQO)
+#define F_FW_RI_RES_WR_IQO V_FW_RI_RES_WR_IQO(1U)
+
+#define S_FW_RI_RES_WR_IQCPRIO 2
+#define M_FW_RI_RES_WR_IQCPRIO 0x1
+#define V_FW_RI_RES_WR_IQCPRIO(x) ((x) << S_FW_RI_RES_WR_IQCPRIO)
+#define G_FW_RI_RES_WR_IQCPRIO(x) \
+ (((x) >> S_FW_RI_RES_WR_IQCPRIO) & M_FW_RI_RES_WR_IQCPRIO)
+#define F_FW_RI_RES_WR_IQCPRIO V_FW_RI_RES_WR_IQCPRIO(1U)
+
+#define S_FW_RI_RES_WR_IQESIZE 0
+#define M_FW_RI_RES_WR_IQESIZE 0x3
+#define V_FW_RI_RES_WR_IQESIZE(x) ((x) << S_FW_RI_RES_WR_IQESIZE)
+#define G_FW_RI_RES_WR_IQESIZE(x) \
+ (((x) >> S_FW_RI_RES_WR_IQESIZE) & M_FW_RI_RES_WR_IQESIZE)
+
+#define S_FW_RI_RES_WR_IQNS 31
+#define M_FW_RI_RES_WR_IQNS 0x1
+#define V_FW_RI_RES_WR_IQNS(x) ((x) << S_FW_RI_RES_WR_IQNS)
+#define G_FW_RI_RES_WR_IQNS(x) \
+ (((x) >> S_FW_RI_RES_WR_IQNS) & M_FW_RI_RES_WR_IQNS)
+#define F_FW_RI_RES_WR_IQNS V_FW_RI_RES_WR_IQNS(1U)
+
+#define S_FW_RI_RES_WR_IQRO 30
+#define M_FW_RI_RES_WR_IQRO 0x1
+#define V_FW_RI_RES_WR_IQRO(x) ((x) << S_FW_RI_RES_WR_IQRO)
+#define G_FW_RI_RES_WR_IQRO(x) \
+ (((x) >> S_FW_RI_RES_WR_IQRO) & M_FW_RI_RES_WR_IQRO)
+#define F_FW_RI_RES_WR_IQRO V_FW_RI_RES_WR_IQRO(1U)
+
+struct fw_ri_rdma_write_wr {
+ __u8 opcode;
+ __u8 flags;
+ __u16 wrid;
+ __u8 r1[3];
+ __u8 len16;
+ __be64 r2;
+ __be32 plen;
+ __be32 stag_sink;
+ __be64 to_sink;
+#ifndef C99_NOT_SUPPORTED
+ union {
+ struct fw_ri_immd immd_src[0];
+ struct fw_ri_isgl isgl_src[0];
+ } u;
+#endif
+};
+
+struct fw_ri_send_wr {
+ __u8 opcode;
+ __u8 flags;
+ __u16 wrid;
+ __u8 r1[3];
+ __u8 len16;
+ __be32 sendop_pkd;
+ __be32 stag_inv;
+ __be32 plen;
+ __be32 r3;
+ __be64 r4;
+#ifndef C99_NOT_SUPPORTED
+ union {
+ struct fw_ri_immd immd_src[0];
+ struct fw_ri_isgl isgl_src[0];
+ } u;
+#endif
+};
+
+#define S_FW_RI_SEND_WR_SENDOP 0
+#define M_FW_RI_SEND_WR_SENDOP 0xf
+#define V_FW_RI_SEND_WR_SENDOP(x) ((x) << S_FW_RI_SEND_WR_SENDOP)
+#define G_FW_RI_SEND_WR_SENDOP(x) \
+ (((x) >> S_FW_RI_SEND_WR_SENDOP) & M_FW_RI_SEND_WR_SENDOP)
+
+struct fw_ri_rdma_read_wr {
+ __u8 opcode;
+ __u8 flags;
+ __u16 wrid;
+ __u8 r1[3];
+ __u8 len16;
+ __be64 r2;
+ __be32 stag_sink;
+ __be32 to_sink_hi;
+ __be32 to_sink_lo;
+ __be32 plen;
+ __be32 stag_src;
+ __be32 to_src_hi;
+ __be32 to_src_lo;
+ __be32 r5;
+};
+
+struct fw_ri_recv_wr {
+ __u8 opcode;
+ __u8 r1;
+ __u16 wrid;
+ __u8 r2[3];
+ __u8 len16;
+ struct fw_ri_isgl isgl;
+};
+
+struct fw_ri_bind_mw_wr {
+ __u8 opcode;
+ __u8 flags;
+ __u16 wrid;
+ __u8 r1[3];
+ __u8 len16;
+ __u8 qpbinde_to_dcacpu;
+ __u8 pgsz_shift;
+ __u8 addr_type;
+ __u8 mem_perms;
+ __be32 stag_mr;
+ __be32 stag_mw;
+ __be32 r3;
+ __be64 len_mw;
+ __be64 va_fbo;
+ __be64 r4;
+};
+
+#define S_FW_RI_BIND_MW_WR_QPBINDE 6
+#define M_FW_RI_BIND_MW_WR_QPBINDE 0x1
+#define V_FW_RI_BIND_MW_WR_QPBINDE(x) ((x) << S_FW_RI_BIND_MW_WR_QPBINDE)
+#define G_FW_RI_BIND_MW_WR_QPBINDE(x) \
+ (((x) >> S_FW_RI_BIND_MW_WR_QPBINDE) & M_FW_RI_BIND_MW_WR_QPBINDE)
+#define F_FW_RI_BIND_MW_WR_QPBINDE V_FW_RI_BIND_MW_WR_QPBINDE(1U)
+
+#define S_FW_RI_BIND_MW_WR_NS 5
+#define M_FW_RI_BIND_MW_WR_NS 0x1
+#define V_FW_RI_BIND_MW_WR_NS(x) ((x) << S_FW_RI_BIND_MW_WR_NS)
+#define G_FW_RI_BIND_MW_WR_NS(x) \
+ (((x) >> S_FW_RI_BIND_MW_WR_NS) & M_FW_RI_BIND_MW_WR_NS)
+#define F_FW_RI_BIND_MW_WR_NS V_FW_RI_BIND_MW_WR_NS(1U)
+
+#define S_FW_RI_BIND_MW_WR_DCACPU 0
+#define M_FW_RI_BIND_MW_WR_DCACPU 0x1f
+#define V_FW_RI_BIND_MW_WR_DCACPU(x) ((x) << S_FW_RI_BIND_MW_WR_DCACPU)
+#define G_FW_RI_BIND_MW_WR_DCACPU(x) \
+ (((x) >> S_FW_RI_BIND_MW_WR_DCACPU) & M_FW_RI_BIND_MW_WR_DCACPU)
+
+struct fw_ri_fr_nsmr_wr {
+ __u8 opcode;
+ __u8 flags;
+ __u16 wrid;
+ __u8 r1[3];
+ __u8 len16;
+ __u8 qpbinde_to_dcacpu;
+ __u8 pgsz_shift;
+ __u8 addr_type;
+ __u8 mem_perms;
+ __be32 stag;
+ __be32 len_hi;
+ __be32 len_lo;
+ __be32 va_hi;
+ __be32 va_lo_fbo;
+};
+
+#define S_FW_RI_FR_NSMR_WR_QPBINDE 6
+#define M_FW_RI_FR_NSMR_WR_QPBINDE 0x1
+#define V_FW_RI_FR_NSMR_WR_QPBINDE(x) ((x) << S_FW_RI_FR_NSMR_WR_QPBINDE)
+#define G_FW_RI_FR_NSMR_WR_QPBINDE(x) \
+ (((x) >> S_FW_RI_FR_NSMR_WR_QPBINDE) & M_FW_RI_FR_NSMR_WR_QPBINDE)
+#define F_FW_RI_FR_NSMR_WR_QPBINDE V_FW_RI_FR_NSMR_WR_QPBINDE(1U)
+
+#define S_FW_RI_FR_NSMR_WR_NS 5
+#define M_FW_RI_FR_NSMR_WR_NS 0x1
+#define V_FW_RI_FR_NSMR_WR_NS(x) ((x) << S_FW_RI_FR_NSMR_WR_NS)
+#define G_FW_RI_FR_NSMR_WR_NS(x) \
+ (((x) >> S_FW_RI_FR_NSMR_WR_NS) & M_FW_RI_FR_NSMR_WR_NS)
+#define F_FW_RI_FR_NSMR_WR_NS V_FW_RI_FR_NSMR_WR_NS(1U)
+
+#define S_FW_RI_FR_NSMR_WR_DCACPU 0
+#define M_FW_RI_FR_NSMR_WR_DCACPU 0x1f
+#define V_FW_RI_FR_NSMR_WR_DCACPU(x) ((x) << S_FW_RI_FR_NSMR_WR_DCACPU)
+#define G_FW_RI_FR_NSMR_WR_DCACPU(x) \
+ (((x) >> S_FW_RI_FR_NSMR_WR_DCACPU) & M_FW_RI_FR_NSMR_WR_DCACPU)
+
+struct fw_ri_inv_lstag_wr {
+ __u8 opcode;
+ __u8 flags;
+ __u16 wrid;
+ __u8 r1[3];
+ __u8 len16;
+ __be32 r2;
+ __be32 stag_inv;
+};
+
+enum fw_ri_type {
+ FW_RI_TYPE_INIT,
+ FW_RI_TYPE_FINI,
+ FW_RI_TYPE_TERMINATE
+};
+
+enum fw_ri_init_p2ptype {
+ FW_RI_INIT_P2PTYPE_RDMA_WRITE = FW_RI_RDMA_WRITE,
+ FW_RI_INIT_P2PTYPE_READ_REQ = FW_RI_READ_REQ,
+ FW_RI_INIT_P2PTYPE_SEND = FW_RI_SEND,
+ FW_RI_INIT_P2PTYPE_SEND_WITH_INV = FW_RI_SEND_WITH_INV,
+ FW_RI_INIT_P2PTYPE_SEND_WITH_SE = FW_RI_SEND_WITH_SE,
+ FW_RI_INIT_P2PTYPE_SEND_WITH_SE_INV = FW_RI_SEND_WITH_SE_INV,
+ FW_RI_INIT_P2PTYPE_DISABLED = 0xf,
+};
+
+struct fw_ri_wr {
+ __be32 op_compl;
+ __be32 flowid_len16;
+ __u64 cookie;
+ union fw_ri {
+ struct fw_ri_init {
+ __u8 type;
+ __u8 mpareqbit_p2ptype;
+ __u8 r4[2];
+ __u8 mpa_attrs;
+ __u8 qp_caps;
+ __be16 nrqe;
+ __be32 pdid;
+ __be32 qpid;
+ __be32 sq_eqid;
+ __be32 rq_eqid;
+ __be32 scqid;
+ __be32 rcqid;
+ __be32 ord_max;
+ __be32 ird_max;
+ __be32 iss;
+ __be32 irs;
+ __be32 hwrqsize;
+ __be32 hwrqaddr;
+ __be64 r5;
+ union fw_ri_init_p2p {
+ struct fw_ri_rdma_write_wr write;
+ struct fw_ri_rdma_read_wr read;
+ struct fw_ri_send_wr send;
+ } u;
+ } init;
+ struct fw_ri_fini {
+ __u8 type;
+ __u8 r3[7];
+ __be64 r4;
+ } fini;
+ struct fw_ri_terminate {
+ __u8 type;
+ __u8 r3[3];
+ __be32 immdlen;
+ __u8 termmsg[40];
+ } terminate;
+ } u;
+};
+
+#define S_FW_RI_WR_MPAREQBIT 7
+#define M_FW_RI_WR_MPAREQBIT 0x1
+#define V_FW_RI_WR_MPAREQBIT(x) ((x) << S_FW_RI_WR_MPAREQBIT)
+#define G_FW_RI_WR_MPAREQBIT(x) \
+ (((x) >> S_FW_RI_WR_MPAREQBIT) & M_FW_RI_WR_MPAREQBIT)
+#define F_FW_RI_WR_MPAREQBIT V_FW_RI_WR_MPAREQBIT(1U)
+
+#define S_FW_RI_WR_P2PTYPE 0
+#define M_FW_RI_WR_P2PTYPE 0xf
+#define V_FW_RI_WR_P2PTYPE(x) ((x) << S_FW_RI_WR_P2PTYPE)
+#define G_FW_RI_WR_P2PTYPE(x) \
+ (((x) >> S_FW_RI_WR_P2PTYPE) & M_FW_RI_WR_P2PTYPE)
+
+struct tcp_options {
+ __be16 mss;
+ __u8 wsf;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8:4;
+ __u8 unknown:1;
+ __u8:1;
+ __u8 sack:1;
+ __u8 tstamp:1;
+#else
+ __u8 tstamp:1;
+ __u8 sack:1;
+ __u8:1;
+ __u8 unknown:1;
+ __u8:4;
+#endif
+};
+
+struct cpl_pass_accept_req {
+ union opcode_tid ot;
+ __be16 rsvd;
+ __be16 len;
+ __be32 hdr_len;
+ __be16 vlan;
+ __be16 l2info;
+ __be32 tos_stid;
+ struct tcp_options tcpopt;
+};
+
+/* cpl_pass_accept_req.hdr_len fields */
+#define S_SYN_RX_CHAN 0
+#define M_SYN_RX_CHAN 0xF
+#define V_SYN_RX_CHAN(x) ((x) << S_SYN_RX_CHAN)
+#define G_SYN_RX_CHAN(x) (((x) >> S_SYN_RX_CHAN) & M_SYN_RX_CHAN)
+
+#define S_TCP_HDR_LEN 10
+#define M_TCP_HDR_LEN 0x3F
+#define V_TCP_HDR_LEN(x) ((x) << S_TCP_HDR_LEN)
+#define G_TCP_HDR_LEN(x) (((x) >> S_TCP_HDR_LEN) & M_TCP_HDR_LEN)
+
+#define S_IP_HDR_LEN 16
+#define M_IP_HDR_LEN 0x3FF
+#define V_IP_HDR_LEN(x) ((x) << S_IP_HDR_LEN)
+#define G_IP_HDR_LEN(x) (((x) >> S_IP_HDR_LEN) & M_IP_HDR_LEN)
+
+#define S_ETH_HDR_LEN 26
+#define M_ETH_HDR_LEN 0x1F
+#define V_ETH_HDR_LEN(x) ((x) << S_ETH_HDR_LEN)
+#define G_ETH_HDR_LEN(x) (((x) >> S_ETH_HDR_LEN) & M_ETH_HDR_LEN)
+
+/* cpl_pass_accept_req.l2info fields */
+#define S_SYN_MAC_IDX 0
+#define M_SYN_MAC_IDX 0x1FF
+#define V_SYN_MAC_IDX(x) ((x) << S_SYN_MAC_IDX)
+#define G_SYN_MAC_IDX(x) (((x) >> S_SYN_MAC_IDX) & M_SYN_MAC_IDX)
+
+#define S_SYN_XACT_MATCH 9
+#define V_SYN_XACT_MATCH(x) ((x) << S_SYN_XACT_MATCH)
+#define F_SYN_XACT_MATCH V_SYN_XACT_MATCH(1U)
+
+#define S_SYN_INTF 12
+#define M_SYN_INTF 0xF
+#define V_SYN_INTF(x) ((x) << S_SYN_INTF)
+#define G_SYN_INTF(x) (((x) >> S_SYN_INTF) & M_SYN_INTF)
+
+struct ulptx_idata {
+ __be32 cmd_more;
+ __be32 len;
+};
+
+#define S_ULPTX_NSGE 0
+#define M_ULPTX_NSGE 0xFFFF
+#define V_ULPTX_NSGE(x) ((x) << S_ULPTX_NSGE)
+
+#define S_RX_DACK_MODE 29
+#define M_RX_DACK_MODE 0x3
+#define V_RX_DACK_MODE(x) ((x) << S_RX_DACK_MODE)
+#define G_RX_DACK_MODE(x) (((x) >> S_RX_DACK_MODE) & M_RX_DACK_MODE)
+
+#define S_RX_DACK_CHANGE 31
+#define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE)
+#define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U)
+
+enum { /* TCP congestion control algorithms */
+ CONG_ALG_RENO,
+ CONG_ALG_TAHOE,
+ CONG_ALG_NEWRENO,
+ CONG_ALG_HIGHSPEED
+};
+
+#define S_CONG_CNTRL 14
+#define M_CONG_CNTRL 0x3
+#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL)
+#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL)
+
+#define CONG_CNTRL_VALID (1 << 18)
+#define T5_OPT_2_VALID (1 << 31)
+
+#endif /* _T4FW_RI_API_H_ */
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
new file mode 100644
index 00000000000..cbd0ce17072
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __C4IW_USER_H__
+#define __C4IW_USER_H__
+
+#define C4IW_UVERBS_ABI_VERSION 2
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct c4iw_create_cq_resp {
+ __u64 key;
+ __u64 gts_key;
+ __u64 memsize;
+ __u32 cqid;
+ __u32 size;
+ __u32 qid_mask;
+ __u32 reserved; /* explicit padding (optional for i386) */
+};
+
+
+enum {
+ C4IW_QPF_ONCHIP = (1<<0)
+};
+
+struct c4iw_create_qp_resp {
+ __u64 ma_sync_key;
+ __u64 sq_key;
+ __u64 rq_key;
+ __u64 sq_db_gts_key;
+ __u64 rq_db_gts_key;
+ __u64 sq_memsize;
+ __u64 rq_memsize;
+ __u32 sqid;
+ __u32 rqid;
+ __u32 sq_size;
+ __u32 rq_size;
+ __u32 qid_mask;
+ __u32 flags;
+};
+
+struct c4iw_alloc_ucontext_resp {
+ __u64 status_page_key;
+ __u32 status_page_size;
+ __u32 reserved; /* explicit padding (optional for i386) */
+};
+#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
index 56735ea2fc5..465926319f3 100644
--- a/drivers/infiniband/hw/ehca/ehca_av.c
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -41,6 +41,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+
#include "ehca_tools.h"
#include "ehca_iverbs.h"
#include "hcp_if.h"
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 1e9e99a1393..bd45e0f3923 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -112,7 +112,7 @@ struct ehca_sport {
struct ehca_shca {
struct ib_device ib_device;
- struct of_device *ofdev;
+ struct platform_device *ofdev;
u8 num_ports;
int hw_level;
struct list_head shca_list;
@@ -128,6 +128,8 @@ struct ehca_shca {
/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
u32 hca_cap_mr_pgsize;
int max_mtu;
+ int max_num_qps;
+ int max_num_cqs;
atomic_t num_cqs;
atomic_t num_qps;
};
@@ -156,6 +158,30 @@ struct ehca_mod_qp_parm {
#define EHCA_MOD_QP_PARM_MAX 4
+#define QMAP_IDX_MASK 0xFFFFULL
+
+/* struct for tracking if cqes have been reported to the application */
+struct ehca_qmap_entry {
+ u16 app_wr_id;
+ u8 reported;
+ u8 cqe_req;
+};
+
+struct ehca_queue_map {
+ struct ehca_qmap_entry *map;
+ unsigned int entries;
+ unsigned int tail;
+ unsigned int left_to_poll;
+ unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */
+};
+
+/* function to calculate the next index for the qmap */
+static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
+{
+ unsigned int temp = cur_index + 1;
+ return (temp == limit) ? 0 : temp;
+}
+
struct ehca_qp {
union {
struct ib_qp ib_qp;
@@ -165,7 +191,9 @@ struct ehca_qp {
enum ehca_ext_qp_type ext_type;
enum ib_qp_state state;
struct ipz_queue ipz_squeue;
+ struct ehca_queue_map sq_map;
struct ipz_queue ipz_rqueue;
+ struct ehca_queue_map rq_map;
struct h_galpas galpas;
u32 qkey;
u32 real_qp_num;
@@ -194,6 +222,9 @@ struct ehca_qp {
u32 packet_count;
atomic_t nr_events; /* events seen */
wait_queue_head_t wait_completion;
+ int mig_armed;
+ struct list_head sq_err_node;
+ struct list_head rq_err_node;
};
#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
@@ -223,6 +254,8 @@ struct ehca_cq {
/* mmap counter for resources mapped into user space */
u32 mm_count_queue;
u32 mm_count_galpa;
+ struct list_head sqp_err_list;
+ struct list_head rqp_err_list;
};
enum ehca_mr_flag {
@@ -289,7 +322,7 @@ struct ehca_mr_pginfo {
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
- struct ib_umem_chunk *next_chunk;
+ struct scatterlist *next_sg;
u64 next_nmap;
} usr;
struct { /* type EHCA_MR_PGI_FMR section */
@@ -342,11 +375,12 @@ extern rwlock_t ehca_qp_idr_lock;
extern rwlock_t ehca_cq_idr_lock;
extern struct idr ehca_qp_idr;
extern struct idr ehca_cq_idr;
+extern spinlock_t shca_list_lock;
extern int ehca_static_rate;
extern int ehca_port_act_time;
-extern int ehca_use_hp_mr;
-extern int ehca_scaling_code;
+extern bool ehca_use_hp_mr;
+extern bool ehca_scaling_code;
extern int ehca_lock_hcalls;
extern int ehca_nr_ports;
extern int ehca_max_cq;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
index 1798e6466bd..689c35786dd 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -165,7 +165,6 @@ struct hcp_modify_qp_control_block {
#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7)
#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8)
#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9)
-#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11)
#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12)
#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13)
@@ -176,60 +175,33 @@ struct hcp_modify_qp_control_block {
#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18)
#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19)
#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20)
-#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21)
-#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22)
-#define MQPCB_DLID EHCA_BMASK_IBM(16, 31)
#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23)
-#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31)
#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24)
-#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31)
#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25)
-#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26)
-#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27)
-#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28)
-#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31)
#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30)
#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31)
-#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31)
#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32)
-#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31)
#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33)
-#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34)
-#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31)
#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35)
-#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36)
-#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31)
#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37)
-#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31)
#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38)
-#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31)
#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39)
-#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40)
-#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41)
-#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31)
#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42)
-#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31)
#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44)
#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45)
-#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46)
-#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31)
#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47)
-#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31)
-#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48)
-#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31)
#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49)
-#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31)
#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50)
#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51)
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 5540b276a33..8cc83753776 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -43,6 +43,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+
#include "ehca_iverbs.h"
#include "ehca_classes.h"
#include "ehca_irq.h"
@@ -126,15 +128,15 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
void *vpage;
u32 counter;
u64 rpage, cqx_fec, h_ret;
- int ipz_rc, ret, i;
+ int ipz_rc, i;
unsigned long flags;
if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
return ERR_PTR(-EINVAL);
- if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) {
+ if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) {
ehca_err(device, "Unable to create CQ, max number of %i "
- "CQs reached.", ehca_max_cq);
+ "CQs reached.", shca->max_num_cqs);
ehca_err(device, "To increase the maximum number of CQs "
"use the number_of_cqs module parameter.\n");
return ERR_PTR(-ENOSPC);
@@ -161,32 +163,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
adapter_handle = shca->ipz_hca_handle;
param.eq_handle = shca->eq.ipz_eq_handle;
- do {
- if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
- cq = ERR_PTR(-ENOMEM);
- ehca_err(device, "Can't reserve idr nr. device=%p",
- device);
- goto create_cq_exit1;
- }
-
- write_lock_irqsave(&ehca_cq_idr_lock, flags);
- ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
- write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
- } while (ret == -EAGAIN);
+ idr_preload(GFP_KERNEL);
+ write_lock_irqsave(&ehca_cq_idr_lock, flags);
+ my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT);
+ write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+ idr_preload_end();
- if (ret) {
+ if (my_cq->token < 0) {
cq = ERR_PTR(-ENOMEM);
ehca_err(device, "Can't allocate new idr entry. device=%p",
device);
goto create_cq_exit1;
}
- if (my_cq->token > 0x1FFFFFF) {
- cq = ERR_PTR(-ENOMEM);
- ehca_err(device, "Invalid number of cq. device=%p", device);
- goto create_cq_exit2;
- }
-
/*
* CQs maximum depth is 4GB-64, but we need additional 20 as buffer
* for receiving errors CQEs.
@@ -196,7 +185,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if (h_ret != H_SUCCESS) {
ehca_err(device, "hipz_h_alloc_resource_cq() failed "
- "h_ret=%li device=%p", h_ret, device);
+ "h_ret=%lli device=%p", h_ret, device);
cq = ERR_PTR(ehca2ib_return_code(h_ret));
goto create_cq_exit2;
}
@@ -218,7 +207,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
cq = ERR_PTR(-EAGAIN);
goto create_cq_exit4;
}
- rpage = virt_to_abs(vpage);
+ rpage = __pa(vpage);
h_ret = hipz_h_register_rpage_cq(adapter_handle,
my_cq->ipz_cq_handle,
@@ -232,7 +221,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if (h_ret < H_SUCCESS) {
ehca_err(device, "hipz_h_register_rpage_cq() failed "
- "ehca_cq=%p cq_num=%x h_ret=%li counter=%i "
+ "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i "
"act_pages=%i", my_cq, my_cq->cq_number,
h_ret, counter, param.act_pages);
cq = ERR_PTR(-EINVAL);
@@ -244,7 +233,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
if ((h_ret != H_SUCCESS) || vpage) {
ehca_err(device, "Registration of pages not "
"complete ehca_cq=%p cq_num=%x "
- "h_ret=%li", my_cq, my_cq->cq_number,
+ "h_ret=%lli", my_cq, my_cq->cq_number,
h_ret);
cq = ERR_PTR(-EAGAIN);
goto create_cq_exit4;
@@ -252,7 +241,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
} else {
if (h_ret != H_PAGE_REGISTERED) {
ehca_err(device, "Registration of page failed "
- "ehca_cq=%p cq_num=%x h_ret=%li "
+ "ehca_cq=%p cq_num=%x h_ret=%lli "
"counter=%i act_pages=%i",
my_cq, my_cq->cq_number,
h_ret, counter, param.act_pages);
@@ -266,7 +255,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
gal = my_cq->galpas.kernel;
cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
- ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
+ ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx",
my_cq, my_cq->cq_number, cqx_fec);
my_cq->ib_cq.cqe = my_cq->nr_of_entries =
@@ -276,6 +265,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
for (i = 0; i < QP_HASHTAB_LEN; i++)
INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
+ INIT_LIST_HEAD(&my_cq->sqp_err_list);
+ INIT_LIST_HEAD(&my_cq->rqp_err_list);
+
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
@@ -291,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
(my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
+ cq = ERR_PTR(-EFAULT);
goto create_cq_exit4;
}
}
@@ -304,7 +297,7 @@ create_cq_exit3:
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
if (h_ret != H_SUCCESS)
ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
- "cq_num=%x h_ret=%li", my_cq, my_cq->cq_number, h_ret);
+ "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret);
create_cq_exit2:
write_lock_irqsave(&ehca_cq_idr_lock, flags);
@@ -352,7 +345,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
if (h_ret == H_R_STATE) {
/* cq in err: read err data and destroy it forcibly */
- ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
+ ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err "
"state. Try to delete it forcibly.",
my_cq, cq_num, my_cq->ipz_cq_handle.handle);
ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
@@ -362,7 +355,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
cq_num);
}
if (h_ret != H_SUCCESS) {
- ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%li "
+ ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli "
"ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
return ehca2ib_return_code(h_ret);
}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 49660dfa186..90da6747d39 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -101,7 +101,7 @@ int ehca_create_eq(struct ehca_shca *shca,
if (!vpage)
goto create_eq_exit2;
- rpage = virt_to_abs(vpage);
+ rpage = __pa(vpage);
h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
eq->ipz_eq_handle,
&eq->pf,
@@ -113,7 +113,7 @@ int ehca_create_eq(struct ehca_shca *shca,
if (h_ret != H_SUCCESS || vpage)
goto create_eq_exit2;
} else {
- if (h_ret != H_PAGE_REGISTERED || !vpage)
+ if (h_ret != H_PAGE_REGISTERED)
goto create_eq_exit2;
}
}
@@ -122,21 +122,21 @@ int ehca_create_eq(struct ehca_shca *shca,
/* register interrupt handlers and initialize work queues */
if (type == EHCA_EQ) {
+ tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
+
ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
- IRQF_DISABLED, "ehca_eq",
+ 0, "ehca_eq",
(void *)shca);
if (ret < 0)
ehca_err(ib_dev, "Can't map interrupt handler.");
-
- tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
} else if (type == EHCA_NEQ) {
+ tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
+
ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
- IRQF_DISABLED, "ehca_neq",
+ 0, "ehca_neq",
(void *)shca);
if (ret < 0)
ehca_err(ib_dev, "Can't map interrupt handler.");
-
- tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
}
eq->is_initialized = 1;
@@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
unsigned long flags;
u64 h_ret;
- spin_lock_irqsave(&eq->spinlock, flags);
ibmebus_free_irq(eq->ist, (void *)shca);
- h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+ spin_lock_irqsave(&shca_list_lock, flags);
+ eq->is_initialized = 0;
+ spin_unlock_irqrestore(&shca_list_lock, flags);
- spin_unlock_irqrestore(&eq->spinlock, flags);
+ tasklet_kill(&eq->interrupt_task);
+
+ h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "Can't free EQ resources.");
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index bc3b37d2070..9ed4d258830 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -39,6 +39,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/gfp.h>
+
#include "ehca_tools.h"
#include "ehca_iverbs.h"
#include "hcp_if.h"
@@ -114,7 +116,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
}
props->max_pkeys = 16;
- props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255);
+ /* Some FW versions say 0 here; insert sensible value in that case */
+ props->local_ca_ack_delay = rblock->local_ca_ack_delay ?
+ min_t(u8, rblock->local_ca_ack_delay, 255) : 12;
props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp);
props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp);
props->max_mcast_grp = limit_uint(rblock->max_mcast_grp);
@@ -229,7 +233,7 @@ int ehca_query_port(struct ib_device *ibdev,
props->phys_state = 5;
props->state = rblock->state;
props->active_width = IB_WIDTH_12X;
- props->active_speed = 0x1;
+ props->active_speed = IB_SPEED_SDR;
}
query_port1:
@@ -317,7 +321,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port,
ib_device);
struct hipz_query_port *rblock;
- if (index > 255) {
+ if (index < 0 || index > 255) {
ehca_err(&shca->ib_device, "Invalid index: %x.", index);
return -EINVAL;
}
@@ -391,7 +395,7 @@ int ehca_modify_port(struct ib_device *ibdev,
hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
cap, props->init_type, port_modify_mask);
if (hret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "Modify port failed h_ret=%li",
+ ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli",
hret);
ret = -EINVAL;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 0792d930c48..8615d7cf7e0 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -41,6 +41,9 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
#include "ehca_classes.h"
#include "ehca_irq.h"
#include "ehca_iverbs.h"
@@ -99,7 +102,7 @@ static void print_error_data(struct ehca_shca *shca, void *data,
return;
ehca_err(&shca->ib_device,
- "QP 0x%x (resource=%lx) has errors.",
+ "QP 0x%x (resource=%llx) has errors.",
qp->ib_qp.qp_num, resource);
break;
}
@@ -108,21 +111,21 @@ static void print_error_data(struct ehca_shca *shca, void *data,
struct ehca_cq *cq = (struct ehca_cq *)data;
ehca_err(&shca->ib_device,
- "CQ 0x%x (resource=%lx) has errors.",
+ "CQ 0x%x (resource=%llx) has errors.",
cq->cq_number, resource);
break;
}
default:
ehca_err(&shca->ib_device,
- "Unknown error type: %lx on %s.",
+ "Unknown error type: %llx on %s.",
type, shca->ib_device.name);
break;
}
- ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
+ ehca_err(&shca->ib_device, "Error data is available: %llx.", resource);
ehca_err(&shca->ib_device, "EHCA ----- error data begin "
"---------------------------------------------------");
- ehca_dmp(rblock, length, "resource=%lx", resource);
+ ehca_dmp(rblock, length, "resource=%llx", resource);
ehca_err(&shca->ib_device, "EHCA ----- error data end "
"----------------------------------------------------");
@@ -152,7 +155,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data,
if (ret == H_R_STATE)
ehca_err(&shca->ib_device,
- "No error data is available: %lx.", resource);
+ "No error data is available: %llx.", resource);
else if (ret == H_SUCCESS) {
int length;
@@ -164,7 +167,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data,
print_error_data(shca, data, rblock, length);
} else
ehca_err(&shca->ib_device,
- "Error data could not be fetched: %lx", resource);
+ "Error data could not be fetched: %llx", resource);
ehca_free_fw_ctrlblock(rblock);
@@ -178,6 +181,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp,
{
struct ib_event event;
+ /* PATH_MIG without the QP ever having been armed is false alarm */
+ if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed)
+ return;
+
event.device = &shca->ib_device;
event.event = event_type;
@@ -355,36 +362,48 @@ static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
*old_attr = new_attr;
}
+/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */
+static int replay_modify_qp(struct ehca_sport *sport)
+{
+ int aqp1_destroyed;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
+
+ aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI];
+
+ if (sport->ibqp_sqp[IB_QPT_SMI])
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
+ if (!aqp1_destroyed)
+ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
+
+ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
+
+ return aqp1_destroyed;
+}
+
static void parse_ec(struct ehca_shca *shca, u64 eqe)
{
u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
u8 spec_event;
struct ehca_sport *sport = &shca->sport[port - 1];
- unsigned long flags;
switch (ec) {
case 0x30: /* port availability change */
if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
- int suppress_event;
- /* replay modify_qp for sqps */
- spin_lock_irqsave(&sport->mod_sqp_lock, flags);
- suppress_event = !sport->ibqp_sqp[IB_QPT_GSI];
- if (sport->ibqp_sqp[IB_QPT_SMI])
- ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
- if (!suppress_event)
- ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
- spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
-
- /* AQP1 was destroyed, ignore this event */
- if (suppress_event)
- break;
+ /* only replay modify_qp calls in autodetect mode;
+ * if AQP1 was destroyed, the port is already down
+ * again and we can drop the event.
+ */
+ if (ehca_nr_ports < 0)
+ if (replay_modify_qp(sport))
+ break;
sport->port_state = IB_PORT_ACTIVE;
dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
"is active");
- ehca_query_sma_attr(shca, port,
- &sport->saved_attr);
+ ehca_query_sma_attr(shca, port, &sport->saved_attr);
} else {
sport->port_state = IB_PORT_DOWN;
dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
@@ -463,13 +482,13 @@ void ehca_tasklet_neq(unsigned long data)
struct ehca_eqe *eqe;
u64 ret;
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+ eqe = ehca_poll_eq(shca, &shca->neq);
while (eqe) {
if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
parse_ec(shca, eqe->entry);
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
+ eqe = ehca_poll_eq(shca, &shca->neq);
}
ret = hipz_h_reset_event(shca->ipz_hca_handle,
@@ -498,7 +517,7 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
struct ehca_cq *cq;
eqe_value = eqe->entry;
- ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+ ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value);
if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
ehca_dbg(&shca->ib_device, "Got completion event");
token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
@@ -532,11 +551,10 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
struct ehca_eq *eq = &shca->eq;
struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
u64 eqe_value, ret;
- unsigned long flags;
int eqe_cnt, i;
int eq_empty = 0;
- spin_lock_irqsave(&eq->irq_spinlock, flags);
+ spin_lock(&eq->irq_spinlock);
if (is_irq) {
const int max_query_cnt = 100;
int query_cnt = 0;
@@ -556,8 +574,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
eqe_cnt = 0;
do {
u32 token;
- eqe_cache[eqe_cnt].eqe =
- (struct ehca_eqe *)ehca_poll_eq(shca, eq);
+ eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq);
if (!eqe_cache[eqe_cnt].eqe)
break;
eqe_value = eqe_cache[eqe_cnt].eqe->entry;
@@ -587,7 +604,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
ret = hipz_h_eoi(eq->ist);
if (ret != H_SUCCESS)
ehca_err(&shca->ib_device,
- "bad return code EOI -rc = %ld\n", ret);
+ "bad return code EOI -rc = %lld\n", ret);
ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
}
if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
@@ -621,14 +638,14 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
goto unlock_irq_spinlock;
do {
struct ehca_eqe *eqe;
- eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+ eqe = ehca_poll_eq(shca, &shca->eq);
if (!eqe)
break;
process_eqe(shca, eqe);
} while (1);
unlock_irq_spinlock:
- spin_unlock_irqrestore(&eq->irq_spinlock, flags);
+ spin_unlock(&eq->irq_spinlock);
}
void ehca_tasklet_eq(unsigned long data)
@@ -636,27 +653,30 @@ void ehca_tasklet_eq(unsigned long data)
ehca_process_eq((struct ehca_shca*)data, 1);
}
-static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
+static int find_next_online_cpu(struct ehca_comp_pool *pool)
{
int cpu;
unsigned long flags;
WARN_ON_ONCE(!in_interrupt());
if (ehca_debug_level >= 3)
- ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
+ ehca_dmp(cpu_online_mask, cpumask_size(), "");
spin_lock_irqsave(&pool->last_cpu_lock, flags);
- cpu = next_cpu(pool->last_cpu, cpu_online_map);
- if (cpu == NR_CPUS)
- cpu = first_cpu(cpu_online_map);
- pool->last_cpu = cpu;
+ do {
+ cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = cpumask_first(cpu_online_mask);
+ pool->last_cpu = cpu;
+ } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
return cpu;
}
static void __queue_comp_task(struct ehca_cq *__cq,
- struct ehca_cpu_comp_task *cct)
+ struct ehca_cpu_comp_task *cct,
+ struct task_struct *thread)
{
unsigned long flags;
@@ -667,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq,
__cq->nr_callbacks++;
list_add_tail(&__cq->entry, &cct->cq_list);
cct->cq_jobs++;
- wake_up(&cct->wait_queue);
+ wake_up_process(thread);
} else
__cq->nr_callbacks++;
@@ -679,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
{
int cpu_id;
struct ehca_cpu_comp_task *cct;
+ struct task_struct *thread;
int cq_jobs;
unsigned long flags;
@@ -686,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq)
BUG_ON(!cpu_online(cpu_id));
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
- BUG_ON(!cct);
+ thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
+ BUG_ON(!cct || !thread);
spin_lock_irqsave(&cct->task_lock, flags);
cq_jobs = cct->cq_jobs;
@@ -694,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq)
if (cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
- BUG_ON(!cct);
+ thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
+ BUG_ON(!cct || !thread);
}
-
- __queue_comp_task(__cq, cct);
+ __queue_comp_task(__cq, cct, thread);
}
static void run_comp_task(struct ehca_cpu_comp_task *cct)
{
struct ehca_cq *cq;
- unsigned long flags;
-
- spin_lock_irqsave(&cct->task_lock, flags);
while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
- spin_unlock_irqrestore(&cct->task_lock, flags);
+ spin_unlock_irq(&cct->task_lock);
comp_event_callback(cq);
if (atomic_dec_and_test(&cq->nr_events))
wake_up(&cq->wait_completion);
- spin_lock_irqsave(&cct->task_lock, flags);
+ spin_lock_irq(&cct->task_lock);
spin_lock(&cq->task_lock);
cq->nr_callbacks--;
if (!cq->nr_callbacks) {
@@ -724,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct)
}
spin_unlock(&cq->task_lock);
}
-
- spin_unlock_irqrestore(&cct->task_lock, flags);
}
-static int comp_task(void *__cct)
+static void comp_task_park(unsigned int cpu)
{
- struct ehca_cpu_comp_task *cct = __cct;
- int cql_empty;
- DECLARE_WAITQUEUE(wait, current);
-
- set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop()) {
- add_wait_queue(&cct->wait_queue, &wait);
-
- spin_lock_irq(&cct->task_lock);
- cql_empty = list_empty(&cct->cq_list);
- spin_unlock_irq(&cct->task_lock);
- if (cql_empty)
- schedule();
- else
- __set_current_state(TASK_RUNNING);
-
- remove_wait_queue(&cct->wait_queue, &wait);
+ struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ struct ehca_cpu_comp_task *target;
+ struct task_struct *thread;
+ struct ehca_cq *cq, *tmp;
+ LIST_HEAD(list);
- spin_lock_irq(&cct->task_lock);
- cql_empty = list_empty(&cct->cq_list);
- spin_unlock_irq(&cct->task_lock);
- if (!cql_empty)
- run_comp_task(__cct);
+ spin_lock_irq(&cct->task_lock);
+ cct->cq_jobs = 0;
+ cct->active = 0;
+ list_splice_init(&cct->cq_list, &list);
+ spin_unlock_irq(&cct->task_lock);
- set_current_state(TASK_INTERRUPTIBLE);
+ cpu = find_next_online_cpu(pool);
+ target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
+ spin_lock_irq(&target->task_lock);
+ list_for_each_entry_safe(cq, tmp, &list, entry) {
+ list_del(&cq->entry);
+ __queue_comp_task(cq, target, thread);
}
- __set_current_state(TASK_RUNNING);
-
- return 0;
+ spin_unlock_irq(&target->task_lock);
}
-static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
- int cpu)
+static void comp_task_stop(unsigned int cpu, bool online)
{
- struct ehca_cpu_comp_task *cct;
-
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- spin_lock_init(&cct->task_lock);
- INIT_LIST_HEAD(&cct->cq_list);
- init_waitqueue_head(&cct->wait_queue);
- cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
-
- return cct->task;
-}
-
-static void destroy_comp_task(struct ehca_comp_pool *pool,
- int cpu)
-{
- struct ehca_cpu_comp_task *cct;
- struct task_struct *task;
- unsigned long flags_cct;
-
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
-
- spin_lock_irqsave(&cct->task_lock, flags_cct);
+ struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- task = cct->task;
- cct->task = NULL;
+ spin_lock_irq(&cct->task_lock);
cct->cq_jobs = 0;
-
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
- if (task)
- kthread_stop(task);
+ cct->active = 0;
+ WARN_ON(!list_empty(&cct->cq_list));
+ spin_unlock_irq(&cct->task_lock);
}
-static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)
+static int comp_task_should_run(unsigned int cpu)
{
struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- LIST_HEAD(list);
- struct ehca_cq *cq;
- unsigned long flags_cct;
-
- spin_lock_irqsave(&cct->task_lock, flags_cct);
-
- list_splice_init(&cct->cq_list, &list);
-
- while (!list_empty(&list)) {
- cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-
- list_del(&cq->entry);
- __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
- smp_processor_id()));
- }
-
- spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+ return cct->cq_jobs;
}
-static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static void comp_task(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)hcpu;
- struct ehca_cpu_comp_task *cct;
+ struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
+ int cql_empty;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
- if (!create_comp_task(pool, cpu)) {
- ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
- return NOTIFY_BAD;
- }
- break;
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- kthread_bind(cct->task, any_online_cpu(cpu_online_map));
- destroy_comp_task(pool, cpu);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
- cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
- kthread_bind(cct->task, cpu);
- wake_up_process(cct->task);
- break;
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
- break;
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
- destroy_comp_task(pool, cpu);
- take_over_work(pool, cpu);
- break;
+ spin_lock_irq(&cct->task_lock);
+ cql_empty = list_empty(&cct->cq_list);
+ if (!cql_empty) {
+ __set_current_state(TASK_RUNNING);
+ run_comp_task(cct);
}
-
- return NOTIFY_OK;
+ spin_unlock_irq(&cct->task_lock);
}
-static struct notifier_block comp_pool_callback_nb __cpuinitdata = {
- .notifier_call = comp_pool_callback,
- .priority = 0,
+static struct smp_hotplug_thread comp_pool_threads = {
+ .thread_should_run = comp_task_should_run,
+ .thread_fn = comp_task,
+ .thread_comm = "ehca_comp/%u",
+ .cleanup = comp_task_stop,
+ .park = comp_task_park,
};
int ehca_create_comp_pool(void)
{
- int cpu;
- struct task_struct *task;
+ int cpu, ret = -ENOMEM;
if (!ehca_scaling_code)
return 0;
@@ -886,42 +822,49 @@ int ehca_create_comp_pool(void)
return -ENOMEM;
spin_lock_init(&pool->last_cpu_lock);
- pool->last_cpu = any_online_cpu(cpu_online_map);
+ pool->last_cpu = cpumask_any(cpu_online_mask);
pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
- if (pool->cpu_comp_tasks == NULL) {
- kfree(pool);
- return -EINVAL;
- }
+ if (!pool->cpu_comp_tasks)
+ goto out_pool;
- for_each_online_cpu(cpu) {
- task = create_comp_task(pool, cpu);
- if (task) {
- kthread_bind(task, cpu);
- wake_up_process(task);
- }
+ pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
+ if (!pool->cpu_comp_threads)
+ goto out_tasks;
+
+ for_each_present_cpu(cpu) {
+ struct ehca_cpu_comp_task *cct;
+
+ cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
+ spin_lock_init(&cct->task_lock);
+ INIT_LIST_HEAD(&cct->cq_list);
}
- register_hotcpu_notifier(&comp_pool_callback_nb);
+ comp_pool_threads.store = pool->cpu_comp_threads;
+ ret = smpboot_register_percpu_thread(&comp_pool_threads);
+ if (ret)
+ goto out_threads;
- printk(KERN_INFO "eHCA scaling code enabled\n");
+ pr_info("eHCA scaling code enabled\n");
+ return ret;
- return 0;
+out_threads:
+ free_percpu(pool->cpu_comp_threads);
+out_tasks:
+ free_percpu(pool->cpu_comp_tasks);
+out_pool:
+ kfree(pool);
+ return ret;
}
void ehca_destroy_comp_pool(void)
{
- int i;
-
if (!ehca_scaling_code)
return;
- unregister_hotcpu_notifier(&comp_pool_callback_nb);
+ smpboot_unregister_percpu_thread(&comp_pool_threads);
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_online(i))
- destroy_comp_task(pool, i);
- }
+ free_percpu(pool->cpu_comp_threads);
free_percpu(pool->cpu_comp_tasks);
kfree(pool);
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index 3346cb06cea..5370199f08c 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data);
void ehca_process_eq(struct ehca_shca *shca, int is_irq);
struct ehca_cpu_comp_task {
- wait_queue_head_t wait_queue;
struct list_head cq_list;
- struct task_struct *task;
spinlock_t task_lock;
int cq_jobs;
+ int active;
};
struct ehca_comp_pool {
- struct ehca_cpu_comp_task *cpu_comp_tasks;
+ struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
+ struct task_struct * __percpu *cpu_comp_threads;
int last_cpu;
spinlock_t last_cpu_lock;
};
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index a8a2ea585d2..22f79afa7fc 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -95,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
int ehca_dereg_mr(struct ib_mr *mr);
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
@@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data);
int ehca_calc_ipd(struct ehca_shca *shca, int port,
enum ib_rate path_rate, u32 *ipd);
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq);
+
#ifdef CONFIG_PPC_64K_PAGES
void *ehca_alloc_fw_ctrlblock(gfp_t flags);
void ehca_free_fw_ctrlblock(void *ptr);
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 598844d2edc..cd8d290a09f 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -44,29 +44,31 @@
#include <linux/slab.h>
#endif
+#include <linux/notifier.h>
+#include <linux/memory.h>
#include "ehca_classes.h"
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
#include "ehca_tools.h"
#include "hcp_if.h"
-#define HCAD_VERSION "0026"
+#define HCAD_VERSION "0029"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
MODULE_VERSION(HCAD_VERSION);
-static int ehca_open_aqp1 = 0;
+static bool ehca_open_aqp1 = 0;
static int ehca_hw_level = 0;
-static int ehca_poll_all_eqs = 1;
+static bool ehca_poll_all_eqs = 1;
int ehca_debug_level = 0;
-int ehca_nr_ports = 2;
-int ehca_use_hp_mr = 0;
+int ehca_nr_ports = -1;
+bool ehca_use_hp_mr = 0;
int ehca_port_act_time = 30;
int ehca_static_rate = -1;
-int ehca_scaling_code = 0;
+bool ehca_scaling_code = 0;
int ehca_lock_hcalls = -1;
int ehca_max_cq = -1;
int ehca_max_qp = -1;
@@ -80,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
-module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO);
+module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO);
module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
@@ -93,8 +95,8 @@ MODULE_PARM_DESC(hw_level,
"Hardware level (0: autosensing (default), "
"0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
MODULE_PARM_DESC(nr_ports,
- "number of connected ports (-1: autodetect, 1: port one only, "
- "2: two ports (default)");
+ "number of connected ports (-1: autodetect (default), "
+ "1: port one only, 2: two ports)");
MODULE_PARM_DESC(use_hp_mr,
"Use high performance MRs (default: no)");
MODULE_PARM_DESC(port_act_time,
@@ -121,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
DEFINE_IDR(ehca_cq_idr);
static LIST_HEAD(shca_list); /* list of all registered ehcas */
-static DEFINE_SPINLOCK(shca_list_lock);
+DEFINE_SPINLOCK(shca_list_lock);
static struct timer_list poll_eqs_timer;
@@ -209,6 +211,7 @@ static int ehca_create_slab_caches(void)
if (!ctblk_cache) {
ehca_gen_err("Cannot create ctblk SLAB cache.");
ehca_cleanup_small_qp_cache();
+ ret = -ENOMEM;
goto create_slab_caches6;
}
#endif
@@ -289,8 +292,9 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
};
ehca_gen_dbg("Probing adapter %s...",
- shca->ofdev->node->full_name);
- loc_code = of_get_property(shca->ofdev->node, "ibm,loc-code", NULL);
+ shca->ofdev->dev.of_node->full_name);
+ loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code",
+ NULL);
if (loc_code)
ehca_gen_dbg(" ... location lode=%s", loc_code);
@@ -302,7 +306,7 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
if (h_ret != H_SUCCESS) {
- ehca_gen_err("Cannot query device properties. h_ret=%li",
+ ehca_gen_err("Cannot query device properties. h_ret=%lli",
h_ret);
ret = -EPERM;
goto sense_attributes1;
@@ -357,7 +361,8 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
* a firmware property, so it's valid across all adapters
*/
if (ehca_lock_hcalls == -1)
- ehca_lock_hcalls = !(shca->hca_cap & HCA_CAP_H_ALLOC_RES_SYNC);
+ ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC,
+ shca->hca_cap);
/* translate supported MR page sizes; always support 4K */
shca->hca_cap_mr_pgsize = EHCA_PAGESIZE;
@@ -366,29 +371,30 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
/* Set maximum number of CQs and QPs to calculate EQ size */
- if (ehca_max_qp == -1)
- ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES);
- else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) {
- ehca_gen_err("Requested number of QPs is out of range (1 - %i) "
- "specified by HW", rblock->max_qp);
- ret = -EINVAL;
- goto sense_attributes1;
+ if (shca->max_num_qps == -1)
+ shca->max_num_qps = min_t(int, rblock->max_qp,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) {
+ ehca_gen_warn("The requested number of QPs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_qp, rblock->max_qp);
+ shca->max_num_qps = rblock->max_qp;
}
- if (ehca_max_cq == -1)
- ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES);
- else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) {
- ehca_gen_err("Requested number of CQs is out of range (1 - %i) "
- "specified by HW", rblock->max_cq);
- ret = -EINVAL;
- goto sense_attributes1;
+ if (shca->max_num_cqs == -1)
+ shca->max_num_cqs = min_t(int, rblock->max_cq,
+ EHCA_MAX_NUM_QUEUES);
+ else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) {
+ ehca_gen_warn("The requested number of CQs is out of range "
+ "(1 - %i) specified by HW. Value is set to %i",
+ rblock->max_cq, rblock->max_cq);
}
/* query max MTU from first port -- it's the same for all ports */
port = (struct hipz_query_port *)rblock;
h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
if (h_ret != H_SUCCESS) {
- ehca_gen_err("Cannot query port properties. h_ret=%li",
+ ehca_gen_err("Cannot query port properties. h_ret=%lli",
h_ret);
ret = -EPERM;
goto sense_attributes1;
@@ -503,6 +509,7 @@ static int ehca_init_device(struct ehca_shca *shca)
shca->ib_device.detach_mcast = ehca_detach_mcast;
shca->ib_device.process_mad = ehca_process_mad;
shca->ib_device.mmap = ehca_mmap;
+ shca->ib_device.dma_ops = &ehca_dma_mapping_ops;
if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
shca->ib_device.uverbs_cmd_mask |=
@@ -619,7 +626,7 @@ static struct attribute_group ehca_drv_attr_grp = {
.attrs = ehca_drv_attrs
};
-static struct attribute_group *ehca_drv_attr_groups[] = {
+static const struct attribute_group *ehca_drv_attr_groups[] = {
&ehca_drv_attr_grp,
NULL,
};
@@ -633,7 +640,7 @@ static ssize_t ehca_show_##name(struct device *dev, \
struct hipz_query_hca *rblock; \
int data; \
\
- shca = dev->driver_data; \
+ shca = dev_get_drvdata(dev); \
\
rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \
if (!rblock) { \
@@ -677,9 +684,9 @@ static ssize_t ehca_show_adapter_handle(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct ehca_shca *shca = dev->driver_data;
+ struct ehca_shca *shca = dev_get_drvdata(dev);
- return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
+ return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle);
}
static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
@@ -707,24 +714,24 @@ static struct attribute_group ehca_dev_attr_grp = {
.attrs = ehca_dev_attrs
};
-static int __devinit ehca_probe(struct of_device *dev,
- const struct of_device_id *id)
+static int ehca_probe(struct platform_device *dev)
{
struct ehca_shca *shca;
const u64 *handle;
struct ib_pd *ibpd;
int ret, i, eq_size;
+ unsigned long flags;
- handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
+ handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL);
if (!handle) {
ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
- dev->node->full_name);
+ dev->dev.of_node->full_name);
return -ENODEV;
}
if (!(*handle)) {
ehca_gen_err("Wrong eHCA handle for adapter: %s.",
- dev->node->full_name);
+ dev->dev.of_node->full_name);
return -ENODEV;
}
@@ -733,15 +740,19 @@ static int __devinit ehca_probe(struct of_device *dev,
ehca_gen_err("Cannot allocate shca memory.");
return -ENOMEM;
}
+
mutex_init(&shca->modify_mutex);
atomic_set(&shca->num_cqs, 0);
atomic_set(&shca->num_qps, 0);
+ shca->max_num_qps = ehca_max_qp;
+ shca->max_num_cqs = ehca_max_cq;
+
for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
spin_lock_init(&shca->sport[i].mod_sqp_lock);
shca->ofdev = dev;
shca->ipz_hca_handle.handle = *handle;
- dev->dev.driver_data = shca;
+ dev_set_drvdata(&dev->dev, shca);
ret = ehca_sense_attributes(shca);
if (ret < 0) {
@@ -755,7 +766,7 @@ static int __devinit ehca_probe(struct of_device *dev,
goto probe1;
}
- eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp;
+ eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps;
/* create event queues */
ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
if (ret) {
@@ -789,7 +800,7 @@ static int __devinit ehca_probe(struct of_device *dev,
goto probe5;
}
- ret = ib_register_device(&shca->ib_device);
+ ret = ib_register_device(&shca->ib_device, NULL);
if (ret) {
ehca_err(&shca->ib_device,
"ib_register_device() failed ret=%i", ret);
@@ -823,9 +834,9 @@ static int __devinit ehca_probe(struct of_device *dev,
ehca_err(&shca->ib_device,
"Cannot create device attributes ret=%d", ret);
- spin_lock(&shca_list_lock);
+ spin_lock_irqsave(&shca_list_lock, flags);
list_add(&shca->shca_list, &shca_list);
- spin_unlock(&shca_list_lock);
+ spin_unlock_irqrestore(&shca_list_lock, flags);
return 0;
@@ -868,9 +879,10 @@ probe1:
return -EINVAL;
}
-static int __devexit ehca_remove(struct of_device *dev)
+static int ehca_remove(struct platform_device *dev)
{
- struct ehca_shca *shca = dev->dev.driver_data;
+ struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
+ unsigned long flags;
int ret;
sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
@@ -908,9 +920,9 @@ static int __devexit ehca_remove(struct of_device *dev)
ib_dealloc_device(&shca->ib_device);
- spin_lock(&shca_list_lock);
+ spin_lock_irqsave(&shca_list_lock, flags);
list_del(&shca->shca_list);
- spin_unlock(&shca_list_lock);
+ spin_unlock_irqrestore(&shca_list_lock, flags);
return ret;
}
@@ -925,13 +937,14 @@ static struct of_device_id ehca_device_table[] =
};
MODULE_DEVICE_TABLE(of, ehca_device_table);
-static struct of_platform_driver ehca_driver = {
- .name = "ehca",
- .match_table = ehca_device_table,
+static struct platform_driver ehca_driver = {
.probe = ehca_probe,
.remove = ehca_remove,
- .driver = {
+ .driver = {
+ .name = "ehca",
+ .owner = THIS_MODULE,
.groups = ehca_drv_attr_groups,
+ .of_match_table = ehca_device_table,
},
};
@@ -946,7 +959,7 @@ void ehca_poll_eqs(unsigned long data)
struct ehca_eq *eq = &shca->eq;
int max = 3;
volatile u64 q_ofs, q_ofs2;
- u64 flags;
+ unsigned long flags;
spin_lock_irqsave(&eq->spinlock, flags);
q_ofs = eq->ipz_queue.current_q_offset;
spin_unlock_irqrestore(&eq->spinlock, flags);
@@ -964,6 +977,41 @@ void ehca_poll_eqs(unsigned long data)
spin_unlock(&shca_list_lock);
}
+static int ehca_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ static unsigned long ehca_dmem_warn_time;
+ unsigned long flags;
+
+ switch (action) {
+ case MEM_CANCEL_OFFLINE:
+ case MEM_CANCEL_ONLINE:
+ case MEM_ONLINE:
+ case MEM_OFFLINE:
+ return NOTIFY_OK;
+ case MEM_GOING_ONLINE:
+ case MEM_GOING_OFFLINE:
+ /* only ok if no hca is attached to the lpar */
+ spin_lock_irqsave(&shca_list_lock, flags);
+ if (list_empty(&shca_list)) {
+ spin_unlock_irqrestore(&shca_list_lock, flags);
+ return NOTIFY_OK;
+ } else {
+ spin_unlock_irqrestore(&shca_list_lock, flags);
+ if (printk_timed_ratelimit(&ehca_dmem_warn_time,
+ 30 * 1000))
+ ehca_gen_err("DMEM operations are not allowed"
+ "in conjunction with eHCA");
+ return NOTIFY_BAD;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block ehca_mem_nb = {
+ .notifier_call = ehca_mem_notifier,
+};
+
static int __init ehca_module_init(void)
{
int ret;
@@ -984,11 +1032,23 @@ static int __init ehca_module_init(void)
goto module_init1;
}
+ ret = ehca_create_busmap();
+ if (ret) {
+ ehca_gen_err("Cannot create busmap.");
+ goto module_init2;
+ }
+
ret = ibmebus_register_driver(&ehca_driver);
if (ret) {
ehca_gen_err("Cannot register eHCA device driver");
ret = -EINVAL;
- goto module_init2;
+ goto module_init3;
+ }
+
+ ret = register_memory_notifier(&ehca_mem_nb);
+ if (ret) {
+ ehca_gen_err("Failed registering memory add/remove notifier");
+ goto module_init4;
}
if (ehca_poll_all_eqs != 1) {
@@ -1003,6 +1063,12 @@ static int __init ehca_module_init(void)
return 0;
+module_init4:
+ ibmebus_unregister_driver(&ehca_driver);
+
+module_init3:
+ ehca_destroy_busmap();
+
module_init2:
ehca_destroy_slab_caches();
@@ -1018,6 +1084,10 @@ static void __exit ehca_module_exit(void)
ibmebus_unregister_driver(&ehca_driver);
+ unregister_memory_notifier(&ehca_mem_nb);
+
+ ehca_destroy_busmap();
+
ehca_destroy_slab_caches();
ehca_destroy_comp_pool();
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
index e3ef0264ccc..120aedf9f98 100644
--- a/drivers/infiniband/hw/ehca/ehca_mcast.c
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -88,7 +88,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (h_ret != H_SUCCESS)
ehca_err(ibqp->device,
"ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
- "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
+ "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
@@ -125,7 +125,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (h_ret != H_SUCCESS)
ehca_err(ibqp->device,
"ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
- "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
+ "h_ret=%lli", my_qp, ibqp->qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index f974367cad4..3488e8c9fcb 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -40,6 +40,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
#include <rdma/ib_umem.h>
#include "ehca_iverbs.h"
@@ -53,6 +54,38 @@
/* max number of rpages (per hcall register_rpages) */
#define MAX_RPAGES 512
+/* DMEM toleration management */
+#define EHCA_SECTSHIFT SECTION_SIZE_BITS
+#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT)
+#define EHCA_HUGEPAGESHIFT 34
+#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT)
+#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
+#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL
+#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */
+#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
+#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
+#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */
+#define EHCA_DIR_MAP_SIZE (0x10000)
+#define EHCA_ENT_MAP_SIZE (0x10000)
+#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
+
+static unsigned long ehca_mr_len;
+
+/*
+ * Memory map data structures
+ */
+struct ehca_dir_bmap {
+ u64 ent[EHCA_MAP_ENTRIES];
+};
+struct ehca_top_bmap {
+ struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
+};
+struct ehca_bmap {
+ struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
+};
+
+static struct ehca_bmap *ehca_bmap;
+
static struct kmem_cache *mr_cache;
static struct kmem_cache *mw_cache;
@@ -68,6 +101,8 @@ enum ehca_mr_pgsize {
#define EHCA_MR_PGSHIFT1M 20
#define EHCA_MR_PGSHIFT16M 24
+static u64 ehca_map_vaddr(void *caddr);
+
static u32 ehca_encode_hwpage_size(u32 pgsize)
{
int log = ilog2(pgsize);
@@ -77,7 +112,7 @@ static u32 ehca_encode_hwpage_size(u32 pgsize)
static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
{
- return 1UL << ilog2(shca->hca_cap_mr_pgsize);
+ return rounddown_pow_of_two(shca->hca_cap_mr_pgsize);
}
static struct ehca_mr *ehca_mr_new(void)
@@ -135,7 +170,8 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
goto get_dma_mr_exit0;
}
- ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE,
+ ret = ehca_reg_maxmr(shca, e_maxmr,
+ (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
mr_access_flags, e_pd,
&e_maxmr->ib.ib_mr.lkey,
&e_maxmr->ib.ib_mr.rkey);
@@ -204,7 +240,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
}
if ((size == 0) ||
(((u64)iova_start + size) < (u64)iova_start)) {
- ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
+ ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
size, iova_start);
ib_mr = ERR_PTR(-EINVAL);
goto reg_phys_mr_exit0;
@@ -251,7 +287,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
+ &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
if (ret) {
ib_mr = ERR_PTR(ret);
goto reg_phys_mr_exit1;
@@ -309,8 +345,8 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
if (length == 0 || virt + length < virt) {
- ehca_err(pd->device, "bad input values: length=%lx "
- "virt_base=%lx", length, virt);
+ ehca_err(pd->device, "bad input values: length=%llx "
+ "virt_base=%llx", length, virt);
ib_mr = ERR_PTR(-EINVAL);
goto reg_user_mr_exit0;
}
@@ -364,16 +400,13 @@ reg_user_mr_fallback:
pginfo.num_hwpages = num_hwpages;
pginfo.u.usr.region = e_mr->umem;
pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
- pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
- (&e_mr->umem->chunk_list),
- list);
-
+ pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
+ &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
ehca_warn(pd->device, "failed to register mr "
- "with hwpage_size=%lx", hwpage_size);
+ "with hwpage_size=%llx", hwpage_size);
ehca_info(pd->device, "try to register mr with "
"kpage_size=%lx", PAGE_SIZE);
/*
@@ -509,7 +542,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr,
goto rereg_phys_mr_exit1;
if ((new_size == 0) ||
(((u64)iova_start + new_size) < (u64)iova_start)) {
- ehca_err(mr->device, "bad input values: new_size=%lx "
+ ehca_err(mr->device, "bad input values: new_size=%llx "
"iova_start=%p", new_size, iova_start);
ret = -EINVAL;
goto rereg_phys_mr_exit1;
@@ -580,8 +613,8 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(mr->device, "hipz_mr_query failed, h_ret=%li mr=%p "
- "hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
+ "hca_hndl=%llx mr_hndl=%llx lkey=%x",
h_ret, mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
ret = ehca2ib_return_code(h_ret);
@@ -630,8 +663,8 @@ int ehca_dereg_mr(struct ib_mr *mr)
/* TODO: BUSY: MR still has bound window(s) */
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
- ehca_err(mr->device, "hipz_free_mr failed, h_ret=%li shca=%p "
- "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
+ ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
+ "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle, mr->lkey);
ret = ehca2ib_return_code(h_ret);
@@ -652,7 +685,7 @@ dereg_mr_exit0:
/*----------------------------------------------------------------------*/
-struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
+struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct ib_mw *ib_mw;
u64 h_ret;
@@ -662,6 +695,9 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
container_of(pd->device, struct ehca_shca, ib_device);
struct ehca_mw_hipzout_parms hipzout;
+ if (type != IB_MW_TYPE_1)
+ return ERR_PTR(-EINVAL);
+
e_mw = ehca_mw_new();
if (!e_mw) {
ib_mw = ERR_PTR(-ENOMEM);
@@ -671,8 +707,8 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
e_pd->fw_pd, &hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%li "
- "shca=%p hca_hndl=%lx mw=%p",
+ ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
+ "shca=%p hca_hndl=%llx mw=%p",
h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
goto alloc_mw_exit1;
@@ -713,8 +749,8 @@ int ehca_dealloc_mw(struct ib_mw *mw)
h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
if (h_ret != H_SUCCESS) {
- ehca_err(mw->device, "hipz_free_mw failed, h_ret=%li shca=%p "
- "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
+ ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
+ "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
e_mw->ipz_mw_handle.handle);
return ehca2ib_return_code(h_ret);
@@ -794,7 +830,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
ret = ehca_reg_mr(shca, e_fmr, NULL,
fmr_attr->max_pages * (1 << fmr_attr->page_shift),
mr_access_flags, e_pd, &pginfo,
- &tmp_lkey, &tmp_rkey);
+ &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
if (ret) {
ib_fmr = ERR_PTR(ret);
goto alloc_fmr_exit1;
@@ -840,7 +876,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
goto map_phys_fmr_exit0;
if (iova % e_fmr->fmr_page_size) {
/* only whole-numbered pages */
- ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
+ ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
iova, e_fmr->fmr_page_size);
ret = -EINVAL;
goto map_phys_fmr_exit0;
@@ -878,7 +914,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
map_phys_fmr_exit0:
if (ret)
ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
- "iova=%lx", ret, fmr, page_list, list_len, iova);
+ "iova=%llx", ret, fmr, page_list, list_len, iova);
return ret;
} /* end ehca_map_phys_fmr() */
@@ -897,11 +933,6 @@ int ehca_unmap_fmr(struct list_head *fmr_list)
/* check all FMR belong to same SHCA, and check internal flag */
list_for_each_entry(ib_fmr, fmr_list, list) {
prev_shca = shca;
- if (!ib_fmr) {
- ehca_gen_err("bad fmr=%p in list", ib_fmr);
- ret = -EINVAL;
- goto unmap_fmr_exit0;
- }
shca = container_of(ib_fmr->device, struct ehca_shca,
ib_device);
e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
@@ -964,8 +995,8 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
if (h_ret != H_SUCCESS) {
- ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%li e_fmr=%p "
- "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
+ ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
+ "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle, fmr->lkey);
ret = ehca2ib_return_code(h_ret);
@@ -983,6 +1014,10 @@ free_fmr_exit0:
/*----------------------------------------------------------------------*/
+static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ struct ehca_mr_pginfo *pginfo);
+
int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_mr *e_mr,
u64 *iova_start,
@@ -991,7 +1026,8 @@ int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_pd *e_pd,
struct ehca_mr_pginfo *pginfo,
u32 *lkey, /*OUT*/
- u32 *rkey) /*OUT*/
+ u32 *rkey, /*OUT*/
+ enum ehca_reg_type reg_type)
{
int ret;
u64 h_ret;
@@ -1007,15 +1043,21 @@ int ehca_reg_mr(struct ehca_shca *shca,
(u64)iova_start, size, hipz_acl,
e_pd->fw_pd, &hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%li "
- "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
+ ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
+ "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
ret = ehca2ib_return_code(h_ret);
goto ehca_reg_mr_exit0;
}
e_mr->ipz_mr_handle = hipzout.handle;
- ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+ if (reg_type == EHCA_REG_BUSMAP_MR)
+ ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
+ else if (reg_type == EHCA_REG_MR)
+ ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
+ else
+ ret = -EINVAL;
+
if (ret)
goto ehca_reg_mr_exit1;
@@ -1033,9 +1075,9 @@ int ehca_reg_mr(struct ehca_shca *shca,
ehca_reg_mr_exit1:
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "h_ret=%li shca=%p e_mr=%p "
- "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
- "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%i",
+ ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
+ "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
+ "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
h_ret, shca, e_mr, iova_start, size, acl, e_pd,
hipzout.lkey, pginfo, pginfo->num_kpages,
pginfo->num_hwpages, ret);
@@ -1045,8 +1087,8 @@ ehca_reg_mr_exit1:
ehca_reg_mr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
- "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
- "num_kpages=%lx num_hwpages=%lx",
+ "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
+ "num_kpages=%llx num_hwpages=%llx",
ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
pginfo->num_kpages, pginfo->num_hwpages);
return ret;
@@ -1094,7 +1136,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
}
if (rnum > 1) {
- rpage = virt_to_abs(kpage);
+ rpage = __pa(kpage);
if (!rpage) {
ehca_err(&shca->ib_device, "kpage=%p i=%x",
kpage, i);
@@ -1116,8 +1158,8 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
*/
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "last "
- "hipz_reg_rpage_mr failed, h_ret=%li "
- "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
+ "hipz_reg_rpage_mr failed, h_ret=%lli "
+ "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
" lkey=%x", h_ret, e_mr, i,
shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle,
@@ -1128,8 +1170,8 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
ret = 0;
} else if (h_ret != H_PAGE_REGISTERED) {
ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
- "h_ret=%li e_mr=%p i=%x lkey=%x hca_hndl=%lx "
- "mr_hndl=%lx", h_ret, e_mr, i,
+ "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
+ "mr_hndl=%llx", h_ret, e_mr, i,
e_mr->ib.ib_mr.lkey,
shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle);
@@ -1145,7 +1187,7 @@ ehca_reg_mr_rpages_exit1:
ehca_reg_mr_rpages_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
- "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr,
+ "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
pginfo, pginfo->num_kpages, pginfo->num_hwpages);
return ret;
} /* end ehca_reg_mr_rpages() */
@@ -1184,12 +1226,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
if (ret) {
ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
- "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx "
+ "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
"kpage=%p", e_mr, pginfo, pginfo->type,
pginfo->num_kpages, pginfo->num_hwpages, kpage);
goto ehca_rereg_mr_rereg1_exit1;
}
- rpage = virt_to_abs(kpage);
+ rpage = __pa(kpage);
if (!rpage) {
ehca_err(&shca->ib_device, "kpage=%p", kpage);
ret = -EFAULT;
@@ -1205,13 +1247,13 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
* (MW bound or MR is shared)
*/
ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
- "(Rereg1), h_ret=%li e_mr=%p", h_ret, e_mr);
+ "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
*pginfo = pginfo_save;
ret = -EAGAIN;
} else if ((u64 *)hipzout.vaddr != iova_start) {
ehca_err(&shca->ib_device, "PHYP changed iova_start in "
- "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
- "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
+ "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
+ "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
e_mr->ib.ib_mr.lkey, hipzout.lkey);
ret = -EFAULT;
@@ -1235,7 +1277,7 @@ ehca_rereg_mr_rereg1_exit1:
ehca_rereg_mr_rereg1_exit0:
if ( ret && (ret != -EAGAIN) )
ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
- "pginfo=%p num_kpages=%lx num_hwpages=%lx",
+ "pginfo=%p num_kpages=%llx num_hwpages=%llx",
ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
pginfo->num_hwpages);
return ret;
@@ -1263,7 +1305,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
(e_mr->num_hwpages > MAX_RPAGES) ||
(pginfo->num_hwpages > e_mr->num_hwpages)) {
ehca_dbg(&shca->ib_device, "Rereg3 case, "
- "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x",
+ "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
pginfo->num_hwpages, e_mr->num_hwpages);
rereg_1_hcall = 0;
rereg_3_hcall = 1;
@@ -1295,7 +1337,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_free_mr failed, "
- "h_ret=%li e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+ "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
"mr->lkey=%x",
h_ret, e_mr, shca->ipz_hca_handle.handle,
e_mr->ipz_mr_handle.handle,
@@ -1316,7 +1358,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
- e_pd, pginfo, lkey, rkey);
+ e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
if (ret) {
u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
memcpy(&e_mr->flags, &(save_mr.flags),
@@ -1328,8 +1370,8 @@ int ehca_rereg_mr(struct ehca_shca *shca,
ehca_rereg_mr_exit0:
if (ret)
ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
- "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
- "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
+ "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
+ "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
"rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
rereg_1_hcall, rereg_3_hcall);
@@ -1371,8 +1413,8 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
* FMRs are not shared and no MW bound to FMRs
*/
ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
- "(Rereg1), h_ret=%li e_fmr=%p hca_hndl=%lx "
- "mr_hndl=%lx lkey=%x lkey_out=%x",
+ "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
+ "mr_hndl=%llx lkey=%x lkey_out=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle,
e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
@@ -1383,7 +1425,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_free_mr failed, "
- "h_ret=%li e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+ "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
"lkey=%x",
h_ret, e_fmr, shca->ipz_hca_handle.handle,
e_fmr->ipz_mr_handle.handle,
@@ -1409,7 +1451,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
ret = ehca_reg_mr(shca, e_fmr, NULL,
(e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
- &tmp_rkey);
+ &tmp_rkey, EHCA_REG_MR);
if (ret) {
u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
memcpy(&e_fmr->flags, &(save_mr.flags),
@@ -1447,9 +1489,9 @@ int ehca_reg_smr(struct ehca_shca *shca,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
&hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
+ ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
"shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
- "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
shca->ipz_hca_handle.handle,
e_origmr->ipz_mr_handle.handle,
@@ -1478,6 +1520,90 @@ ehca_reg_smr_exit0:
} /* end ehca_reg_smr() */
/*----------------------------------------------------------------------*/
+static inline void *ehca_calc_sectbase(int top, int dir, int idx)
+{
+ unsigned long ret = idx;
+ ret |= dir << EHCA_DIR_INDEX_SHIFT;
+ ret |= top << EHCA_TOP_INDEX_SHIFT;
+ return __va(ret << SECTION_SIZE_BITS);
+}
+
+#define ehca_bmap_valid(entry) \
+ ((u64)entry != (u64)EHCA_INVAL_ADDR)
+
+static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
+ struct ehca_shca *shca, struct ehca_mr *mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ u64 h_ret = 0;
+ unsigned long page = 0;
+ u64 rpage = __pa(kpage);
+ int page_count;
+
+ void *sectbase = ehca_calc_sectbase(top, dir, idx);
+ if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
+ ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
+ "hwpage_size does not fit to "
+ "section start address");
+ }
+ page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
+
+ while (page < page_count) {
+ u64 rnum;
+ for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
+ rnum++) {
+ void *pg = sectbase + ((page++) * pginfo->hwpage_size);
+ kpage[rnum] = __pa(pg);
+ }
+
+ h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
+ ehca_encode_hwpage_size(pginfo->hwpage_size),
+ 0, rpage, rnum);
+
+ if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
+ ehca_err(&shca->ib_device, "register_rpage_mr failed");
+ return h_ret;
+ }
+ }
+ return h_ret;
+}
+
+static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
+ struct ehca_shca *shca, struct ehca_mr *mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ u64 hret = H_SUCCESS;
+ int idx;
+
+ for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
+ continue;
+
+ hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
+ pginfo);
+ if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
+ return hret;
+ }
+ return hret;
+}
+
+static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
+ struct ehca_mr *mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ u64 hret = H_SUCCESS;
+ int dir;
+
+ for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
+ continue;
+
+ hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
+ if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
+ return hret;
+ }
+ return hret;
+}
/* register internal max-MR to internal SHCA */
int ehca_reg_internal_maxmr(
@@ -1495,6 +1621,11 @@ int ehca_reg_internal_maxmr(
u32 num_hwpages;
u64 hw_pgsize;
+ if (!ehca_bmap) {
+ ret = -EFAULT;
+ goto ehca_reg_internal_maxmr_exit0;
+ }
+
e_mr = ehca_mr_new();
if (!e_mr) {
ehca_err(&shca->ib_device, "out of memory");
@@ -1504,8 +1635,8 @@ int ehca_reg_internal_maxmr(
e_mr->flags |= EHCA_MR_FLAG_MAXMR;
/* register internal max-MR on HCA */
- size_maxmr = (u64)high_memory - PAGE_OFFSET;
- iova_start = (u64 *)KERNELBASE;
+ size_maxmr = ehca_mr_len;
+ iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
ib_pbuf.addr = 0;
ib_pbuf.size = size_maxmr;
num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
@@ -1524,10 +1655,10 @@ int ehca_reg_internal_maxmr(
ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
&pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
+ &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
if (ret) {
ehca_err(&shca->ib_device, "reg of internal max MR failed, "
- "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x "
+ "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
"num_hwpages=%x", e_mr, iova_start, size_maxmr,
num_kpages, num_hwpages);
goto ehca_reg_internal_maxmr_exit1;
@@ -1573,8 +1704,8 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
(u64)iova_start, hipz_acl, e_pd->fw_pd,
&hipzout);
if (h_ret != H_SUCCESS) {
- ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
- "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
+ ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
+ "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
h_ret, e_origmr, shca->ipz_hca_handle.handle,
e_origmr->ipz_mr_handle.handle,
e_origmr->ib.ib_mr.lkey);
@@ -1651,28 +1782,28 @@ int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
/* check first buffer */
if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
- "pbuf->addr=%lx pbuf->size=%lx",
+ "pbuf->addr=%llx pbuf->size=%llx",
iova_start, pbuf->addr, pbuf->size);
return -EINVAL;
}
if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
(num_phys_buf > 1)) {
- ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
- "pbuf->size=%lx", pbuf->addr, pbuf->size);
+ ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
+ "pbuf->size=%llx", pbuf->addr, pbuf->size);
return -EINVAL;
}
for (i = 0; i < num_phys_buf; i++) {
if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
- ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
- "pbuf->size=%lx",
+ ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
+ "pbuf->size=%llx",
i, pbuf->addr, pbuf->size);
return -EINVAL;
}
if (((i > 0) && /* not 1st */
(i < (num_phys_buf - 1)) && /* not last */
(pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
- ehca_gen_err("bad size, i=%x pbuf->size=%lx",
+ ehca_gen_err("bad size, i=%x pbuf->size=%llx",
i, pbuf->size);
return -EINVAL;
}
@@ -1705,7 +1836,7 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
page = page_list;
for (i = 0; i < list_len; i++) {
if (*page % e_fmr->fmr_page_size) {
- ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
+ ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
"fmr_page_size=%x", i, *page, page, e_fmr,
e_fmr->fmr_page_size);
return -EINVAL;
@@ -1724,62 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
- struct ib_umem_chunk *prev_chunk;
- struct ib_umem_chunk *chunk;
u64 pgaddr;
- u32 i = 0;
u32 j = 0;
int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-
- /* loop over desired chunk entries */
- chunk = pginfo->u.usr.next_chunk;
- prev_chunk = pginfo->u.usr.next_chunk;
- list_for_each_entry_continue(
- chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
- for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
- << PAGE_SHIFT ;
- *kpage = phys_to_abs(pgaddr +
- (pginfo->next_hwpage *
- pginfo->hwpage_size));
- if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%lx "
- "chunk->page_list[i]=%lx "
- "i=%x next_hwpage=%lx",
- pgaddr, (u64)sg_dma_address(
- &chunk->page_list[i]),
- i, pginfo->next_hwpage);
- return -EFAULT;
- }
- (pginfo->hwpage_cnt)++;
- (pginfo->next_hwpage)++;
- kpage++;
- if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
- (pginfo->kpage_cnt)++;
- (pginfo->u.usr.next_nmap)++;
- pginfo->next_hwpage = 0;
- i++;
- }
- j++;
- if (j >= number) break;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
+ pgaddr = page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT;
+ *kpage = pgaddr + (pginfo->next_hwpage *
+ pginfo->hwpage_size);
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx "
+ "sg_dma_address=%llx "
+ "entry=%llx next_hwpage=%llx",
+ pgaddr, (u64)sg_dma_address(*sg),
+ pginfo->u.usr.next_nmap,
+ pginfo->next_hwpage);
+ return -EFAULT;
}
- if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- break;
- } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ kpage++;
+ if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.usr.next_nmap)++;
+ pginfo->next_hwpage = 0;
+ *sg = sg_next(*sg);
+ }
+ j++;
+ if (j >= number)
break;
- else
- prev_chunk = chunk;
}
- pginfo->u.usr.next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->u.usr.region->chunk_list)),
- list);
+
return ret;
}
@@ -1787,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
* check given pages for contiguous layout
* last page addr is returned in prev_pgaddr for further check
*/
-static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
- int start_idx, int end_idx,
+static int ehca_check_kpages_per_ate(struct scatterlist **sg,
+ int num_pages,
u64 *prev_pgaddr)
{
- int t;
- for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
+ for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
+ u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
if (ehca_debug_level >= 3)
- ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr,
- *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
+ ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
+ *(u64 *)__va(pgaddr));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
- ehca_gen_err("uncontiguous page found pgaddr=%lx "
- "prev_pgaddr=%lx page_list_i=%x",
- pgaddr, *prev_pgaddr, t);
+ ehca_gen_err("uncontiguous page found pgaddr=%llx "
+ "prev_pgaddr=%llx entries_left_in_hwpage=%x",
+ pgaddr, *prev_pgaddr, num_pages);
return -EINVAL;
}
*prev_pgaddr = pgaddr;
@@ -1814,113 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
- struct ib_umem_chunk *prev_chunk;
- struct ib_umem_chunk *chunk;
u64 pgaddr, prev_pgaddr;
- u32 i = 0;
u32 j = 0;
int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
int nr_kpages = kpages_per_hwpage;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
- /* loop over desired chunk entries */
- chunk = pginfo->u.usr.next_chunk;
- prev_chunk = pginfo->u.usr.next_chunk;
- list_for_each_entry_continue(
- chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
- for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- if (nr_kpages == kpages_per_hwpage) {
- pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
- << PAGE_SHIFT );
- *kpage = phys_to_abs(pgaddr);
- if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%lx i=%x",
- pgaddr, i);
+ if (nr_kpages == kpages_per_hwpage) {
+ pgaddr = (page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT);
+ *kpage = pgaddr;
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx entry=%llx",
+ pgaddr, pginfo->u.usr.next_nmap);
+ ret = -EFAULT;
+ return ret;
+ }
+ /*
+ * The first page in a hwpage must be aligned;
+ * the first MR page is exempt from this rule.
+ */
+ if (pgaddr & (pginfo->hwpage_size - 1)) {
+ if (pginfo->hwpage_cnt) {
+ ehca_gen_err(
+ "invalid alignment "
+ "pgaddr=%llx entry=%llx "
+ "mr_pgsize=%llx",
+ pgaddr, pginfo->u.usr.next_nmap,
+ pginfo->hwpage_size);
ret = -EFAULT;
return ret;
}
- /*
- * The first page in a hwpage must be aligned;
- * the first MR page is exempt from this rule.
- */
- if (pgaddr & (pginfo->hwpage_size - 1)) {
- if (pginfo->hwpage_cnt) {
- ehca_gen_err(
- "invalid alignment "
- "pgaddr=%lx i=%x "
- "mr_pgsize=%lx",
- pgaddr, i,
- pginfo->hwpage_size);
- ret = -EFAULT;
- return ret;
- }
- /* first MR page */
- pginfo->kpage_cnt =
- (pgaddr &
- (pginfo->hwpage_size - 1)) >>
- PAGE_SHIFT;
- nr_kpages -= pginfo->kpage_cnt;
- *kpage = phys_to_abs(
- pgaddr &
- ~(pginfo->hwpage_size - 1));
- }
- if (ehca_debug_level >= 3) {
- u64 val = *(u64 *)abs_to_virt(
- phys_to_abs(pgaddr));
- ehca_gen_dbg("kpage=%lx chunk_page=%lx "
- "value=%016lx",
- *kpage, pgaddr, val);
- }
- prev_pgaddr = pgaddr;
- i++;
- pginfo->kpage_cnt++;
- pginfo->u.usr.next_nmap++;
- nr_kpages--;
- if (!nr_kpages)
- goto next_kpage;
- continue;
+ /* first MR page */
+ pginfo->kpage_cnt =
+ (pgaddr &
+ (pginfo->hwpage_size - 1)) >>
+ PAGE_SHIFT;
+ nr_kpages -= pginfo->kpage_cnt;
+ *kpage = pgaddr &
+ ~(pginfo->hwpage_size - 1);
}
- if (i + nr_kpages > chunk->nmap) {
- ret = ehca_check_kpages_per_ate(
- chunk->page_list, i,
- chunk->nmap - 1, &prev_pgaddr);
- if (ret) return ret;
- pginfo->kpage_cnt += chunk->nmap - i;
- pginfo->u.usr.next_nmap += chunk->nmap - i;
- nr_kpages -= chunk->nmap - i;
- break;
+ if (ehca_debug_level >= 3) {
+ u64 val = *(u64 *)__va(pgaddr);
+ ehca_gen_dbg("kpage=%llx page=%llx "
+ "value=%016llx",
+ *kpage, pgaddr, val);
}
+ prev_pgaddr = pgaddr;
+ *sg = sg_next(*sg);
+ pginfo->kpage_cnt++;
+ pginfo->u.usr.next_nmap++;
+ nr_kpages--;
+ if (!nr_kpages)
+ goto next_kpage;
+ continue;
+ }
+
+ ret = ehca_check_kpages_per_ate(sg, nr_kpages,
+ &prev_pgaddr);
+ if (ret)
+ return ret;
+ pginfo->kpage_cnt += nr_kpages;
+ pginfo->u.usr.next_nmap += nr_kpages;
- ret = ehca_check_kpages_per_ate(chunk->page_list, i,
- i + nr_kpages - 1,
- &prev_pgaddr);
- if (ret) return ret;
- i += nr_kpages;
- pginfo->kpage_cnt += nr_kpages;
- pginfo->u.usr.next_nmap += nr_kpages;
next_kpage:
- nr_kpages = kpages_per_hwpage;
- (pginfo->hwpage_cnt)++;
- kpage++;
- j++;
- if (j >= number) break;
- }
- if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- break;
- } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
+ nr_kpages = kpages_per_hwpage;
+ (pginfo->hwpage_cnt)++;
+ kpage++;
+ j++;
+ if (j >= number)
break;
- else
- prev_chunk = chunk;
}
- pginfo->u.usr.next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->u.usr.region->chunk_list)),
- list);
+
return ret;
}
@@ -1944,21 +2018,20 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
(pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
ehca_gen_err("kpage_cnt >= num_kpages, "
- "kpage_cnt=%lx num_kpages=%lx "
- "hwpage_cnt=%lx "
- "num_hwpages=%lx i=%x",
+ "kpage_cnt=%llx num_kpages=%llx "
+ "hwpage_cnt=%llx "
+ "num_hwpages=%llx i=%x",
pginfo->kpage_cnt,
pginfo->num_kpages,
pginfo->hwpage_cnt,
pginfo->num_hwpages, i);
return -EFAULT;
}
- *kpage = phys_to_abs(
- (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
- (pginfo->next_hwpage * pginfo->hwpage_size));
+ *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
+ (pginfo->next_hwpage * pginfo->hwpage_size);
if ( !(*kpage) && pbuf->addr ) {
- ehca_gen_err("pbuf->addr=%lx pbuf->size=%lx "
- "next_hwpage=%lx", pbuf->addr,
+ ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
+ "next_hwpage=%llx", pbuf->addr,
pbuf->size, pginfo->next_hwpage);
return -EFAULT;
}
@@ -1993,11 +2066,11 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
/* loop over desired page_list entries */
fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
for (i = 0; i < number; i++) {
- *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
- pginfo->next_hwpage * pginfo->hwpage_size);
+ *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) +
+ pginfo->next_hwpage * pginfo->hwpage_size;
if ( !(*kpage) ) {
- ehca_gen_err("*fmrlist=%lx fmrlist=%p "
- "next_listelem=%lx next_hwpage=%lx",
+ ehca_gen_err("*fmrlist=%llx fmrlist=%p "
+ "next_listelem=%llx next_hwpage=%llx",
*fmrlist, fmrlist,
pginfo->u.fmr.next_listelem,
pginfo->next_hwpage);
@@ -2021,11 +2094,10 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
u64 prev = *kpage;
/* check if adrs are contiguous */
for (j = 1; j < cnt_per_hwpage; j++) {
- u64 p = phys_to_abs(fmrlist[j] &
- ~(pginfo->hwpage_size - 1));
+ u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1);
if (prev + pginfo->u.fmr.fmr_pgsize != p) {
ehca_gen_err("uncontiguous fmr pages "
- "found prev=%lx p=%lx "
+ "found prev=%llx p=%llx "
"idx=%x", prev, p, i + j);
return -EINVAL;
}
@@ -2077,8 +2149,8 @@ int ehca_mr_is_maxmr(u64 size,
u64 *iova_start)
{
/* a MR is treated as max-MR only if it fits following: */
- if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
- (iova_start == (void *)KERNELBASE)) {
+ if ((size == ehca_mr_len) &&
+ (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
ehca_gen_dbg("this is a max-MR");
return 1;
} else
@@ -2184,3 +2256,338 @@ void ehca_cleanup_mrmw_cache(void)
if (mw_cache)
kmem_cache_destroy(mw_cache);
}
+
+static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
+ int dir)
+{
+ if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
+ ehca_top_bmap->dir[dir] =
+ kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
+ if (!ehca_top_bmap->dir[dir])
+ return -ENOMEM;
+ /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
+ memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
+ }
+ return 0;
+}
+
+static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
+{
+ if (!ehca_bmap_valid(ehca_bmap->top[top])) {
+ ehca_bmap->top[top] =
+ kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
+ if (!ehca_bmap->top[top])
+ return -ENOMEM;
+ /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
+ memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
+ }
+ return ehca_init_top_bmap(ehca_bmap->top[top], dir);
+}
+
+static inline int ehca_calc_index(unsigned long i, unsigned long s)
+{
+ return (i >> s) & EHCA_INDEX_MASK;
+}
+
+void ehca_destroy_busmap(void)
+{
+ int top, dir;
+
+ if (!ehca_bmap)
+ return;
+
+ for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]))
+ continue;
+ for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
+ continue;
+
+ kfree(ehca_bmap->top[top]->dir[dir]);
+ }
+
+ kfree(ehca_bmap->top[top]);
+ }
+
+ kfree(ehca_bmap);
+ ehca_bmap = NULL;
+}
+
+static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
+{
+ unsigned long i, start_section, end_section;
+ int top, dir, idx;
+
+ if (!nr_pages)
+ return 0;
+
+ if (!ehca_bmap) {
+ ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
+ if (!ehca_bmap)
+ return -ENOMEM;
+ /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
+ memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
+ }
+
+ start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE;
+ end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
+ for (i = start_section; i < end_section; i++) {
+ int ret;
+ top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
+ dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
+ idx = i & EHCA_INDEX_MASK;
+
+ ret = ehca_init_bmap(ehca_bmap, top, dir);
+ if (ret) {
+ ehca_destroy_busmap();
+ return ret;
+ }
+ ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
+ ehca_mr_len += EHCA_SECTSIZE;
+ }
+ return 0;
+}
+
+static int ehca_is_hugepage(unsigned long pfn)
+{
+ int page_order;
+
+ if (pfn & EHCA_HUGEPAGE_PFN_MASK)
+ return 0;
+
+ page_order = compound_order(pfn_to_page(pfn));
+ if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
+ return 0;
+
+ return 1;
+}
+
+static int ehca_create_busmap_callback(unsigned long initial_pfn,
+ unsigned long total_nr_pages, void *arg)
+{
+ int ret;
+ unsigned long pfn, start_pfn, end_pfn, nr_pages;
+
+ if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
+ return ehca_update_busmap(initial_pfn, total_nr_pages);
+
+ /* Given chunk is >= 16GB -> check for hugepages */
+ start_pfn = initial_pfn;
+ end_pfn = initial_pfn + total_nr_pages;
+ pfn = start_pfn;
+
+ while (pfn < end_pfn) {
+ if (ehca_is_hugepage(pfn)) {
+ /* Add mem found in front of the hugepage */
+ nr_pages = pfn - start_pfn;
+ ret = ehca_update_busmap(start_pfn, nr_pages);
+ if (ret)
+ return ret;
+ /* Skip the hugepage */
+ pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
+ start_pfn = pfn;
+ } else
+ pfn += (EHCA_SECTSIZE / PAGE_SIZE);
+ }
+
+ /* Add mem found behind the hugepage(s) */
+ nr_pages = pfn - start_pfn;
+ return ehca_update_busmap(start_pfn, nr_pages);
+}
+
+int ehca_create_busmap(void)
+{
+ int ret;
+
+ ehca_mr_len = 0;
+ ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+ ehca_create_busmap_callback);
+ return ret;
+}
+
+static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
+ struct ehca_mr *e_mr,
+ struct ehca_mr_pginfo *pginfo)
+{
+ int top;
+ u64 hret, *kpage;
+
+ kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+ if (!kpage) {
+ ehca_err(&shca->ib_device, "kpage alloc failed");
+ return -ENOMEM;
+ }
+ for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
+ if (!ehca_bmap_valid(ehca_bmap->top[top]))
+ continue;
+ hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
+ if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
+ break;
+ }
+
+ ehca_free_fw_ctrlblock(kpage);
+
+ if (hret == H_SUCCESS)
+ return 0; /* Everything is fine */
+ else {
+ ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
+ "h_ret=%lli e_mr=%p top=%x lkey=%x "
+ "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
+ e_mr->ib.ib_mr.lkey,
+ shca->ipz_hca_handle.handle,
+ e_mr->ipz_mr_handle.handle);
+ return ehca2ib_return_code(hret);
+ }
+}
+
+static u64 ehca_map_vaddr(void *caddr)
+{
+ int top, dir, idx;
+ unsigned long abs_addr, offset;
+ u64 entry;
+
+ if (!ehca_bmap)
+ return EHCA_INVAL_ADDR;
+
+ abs_addr = __pa(caddr);
+ top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
+ if (!ehca_bmap_valid(ehca_bmap->top[top]))
+ return EHCA_INVAL_ADDR;
+
+ dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
+ if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
+ return EHCA_INVAL_ADDR;
+
+ idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
+
+ entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
+ if (ehca_bmap_valid(entry)) {
+ offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
+ return entry | offset;
+ } else
+ return EHCA_INVAL_ADDR;
+}
+
+static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return dma_addr == EHCA_INVAL_ADDR;
+}
+
+static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
+ size_t size, enum dma_data_direction direction)
+{
+ if (cpu_addr)
+ return ehca_map_vaddr(cpu_addr);
+ else
+ return EHCA_INVAL_ADDR;
+}
+
+static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ /* This is only a stub; nothing to be done here */
+}
+
+static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ u64 addr;
+
+ if (offset + size > PAGE_SIZE)
+ return EHCA_INVAL_ADDR;
+
+ addr = ehca_map_vaddr(page_address(page));
+ if (!ehca_dma_mapping_error(dev, addr))
+ addr += offset;
+
+ return addr;
+}
+
+static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ /* This is only a stub; nothing to be done here */
+}
+
+static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i) {
+ u64 addr;
+ addr = ehca_map_vaddr(sg_virt(sg));
+ if (ehca_dma_mapping_error(dev, addr))
+ return 0;
+
+ sg->dma_address = addr;
+ sg->dma_length = sg->length;
+ }
+ return nents;
+}
+
+static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+{
+ /* This is only a stub; nothing to be done here */
+}
+
+static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+ dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+}
+
+static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+ dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+}
+
+static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
+ u64 *dma_handle, gfp_t flag)
+{
+ struct page *p;
+ void *addr = NULL;
+ u64 dma_addr;
+
+ p = alloc_pages(flag, get_order(size));
+ if (p) {
+ addr = page_address(p);
+ dma_addr = ehca_map_vaddr(addr);
+ if (ehca_dma_mapping_error(dev, dma_addr)) {
+ free_pages((unsigned long)addr, get_order(size));
+ return NULL;
+ }
+ if (dma_handle)
+ *dma_handle = dma_addr;
+ return addr;
+ }
+ return NULL;
+}
+
+static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
+ void *cpu_addr, u64 dma_handle)
+{
+ if (cpu_addr && size)
+ free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+
+struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
+ .mapping_error = ehca_dma_mapping_error,
+ .map_single = ehca_dma_map_single,
+ .unmap_single = ehca_dma_unmap_single,
+ .map_page = ehca_dma_map_page,
+ .unmap_page = ehca_dma_unmap_page,
+ .map_sg = ehca_dma_map_sg,
+ .unmap_sg = ehca_dma_unmap_sg,
+ .sync_single_for_cpu = ehca_dma_sync_single_for_cpu,
+ .sync_single_for_device = ehca_dma_sync_single_for_device,
+ .alloc_coherent = ehca_dma_alloc_coherent,
+ .free_coherent = ehca_dma_free_coherent,
+};
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
index bc8f4e31c12..50d8b51306d 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.h
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -42,6 +42,11 @@
#ifndef _EHCA_MRMW_H_
#define _EHCA_MRMW_H_
+enum ehca_reg_type {
+ EHCA_REG_MR,
+ EHCA_REG_BUSMAP_MR
+};
+
int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_mr *e_mr,
u64 *iova_start,
@@ -50,7 +55,8 @@ int ehca_reg_mr(struct ehca_shca *shca,
struct ehca_pd *e_pd,
struct ehca_mr_pginfo *pginfo,
u32 *lkey,
- u32 *rkey);
+ u32 *rkey,
+ enum ehca_reg_type reg_type);
int ehca_reg_mr_rpages(struct ehca_shca *shca,
struct ehca_mr *e_mr,
@@ -118,4 +124,9 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
void ehca_mr_deletenew(struct ehca_mr *mr);
+int ehca_create_busmap(void);
+
+void ehca_destroy_busmap(void);
+
+extern struct ib_dma_mapping_ops ehca_dma_mapping_ops;
#endif /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
index 2fe554855fa..351577a6670 100644
--- a/drivers/infiniband/hw/ehca/ehca_pd.c
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -38,6 +38,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+
#include "ehca_tools.h"
#include "ehca_iverbs.h"
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
index 818803057eb..90c4efa6758 100644
--- a/drivers/infiniband/hw/ehca/ehca_qes.h
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -46,7 +46,7 @@
#include "ehca_tools.h"
-/* virtual scatter gather entry to specify remote adresses with length */
+/* virtual scatter gather entry to specify remote addresses with length */
struct ehca_vsgentry {
u64 vaddr;
u32 lkey;
@@ -148,7 +148,7 @@ struct ehca_wqe {
u32 immediate_data;
union {
struct {
- u64 remote_virtual_adress;
+ u64 remote_virtual_address;
u32 rkey;
u32 reserved;
u64 atomic_1st_op_dma_len;
@@ -213,6 +213,7 @@ struct ehca_wqe {
#define WC_STATUS_ERROR_BIT 0x80000000
#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
#define WC_STATUS_PURGE_BIT 0x10
+#define WC_SEND_RECEIVE_BIT 0x80
struct ehca_cqe {
u64 work_request_id;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 3f59587338e..2e89356c46f 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -43,6 +43,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+
#include "ehca_classes.h"
#include "ehca_tools.h"
#include "ehca_qes.h"
@@ -55,9 +57,7 @@ static struct kmem_cache *qp_cache;
/*
* attributes not supported by query qp
*/
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
- IB_QP_MAX_QP_RD_ATOMIC | \
- IB_QP_ACCESS_FLAGS | \
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \
IB_QP_EN_SQD_ASYNC_NOTIFY)
/*
@@ -251,7 +251,7 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
return ST_UD;
case IB_QPT_RAW_IPV6:
return -EINVAL;
- case IB_QPT_RAW_ETY:
+ case IB_QPT_RAW_ETHERTYPE:
return -EINVAL;
default:
ehca_gen_err("Invalid ibqptype=%x", ibqptype);
@@ -321,7 +321,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
ret = -EINVAL;
goto init_qp_queue1;
}
- rpage = virt_to_abs(vpage);
+ rpage = __pa(vpage);
h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
my_qp->ipz_qp_handle,
@@ -331,7 +331,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
if (cnt == (nr_q_pages - 1)) { /* last page! */
if (h_ret != expected_hret) {
ehca_err(ib_dev, "hipz_qp_register_rpage() "
- "h_ret=%li", h_ret);
+ "h_ret=%lli", h_ret);
ret = ehca2ib_return_code(h_ret);
goto init_qp_queue1;
}
@@ -345,7 +345,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
} else {
if (h_ret != H_PAGE_REGISTERED) {
ehca_err(ib_dev, "hipz_qp_register_rpage() "
- "h_ret=%li", h_ret);
+ "h_ret=%lli", h_ret);
ret = ehca2ib_return_code(h_ret);
goto init_qp_queue1;
}
@@ -396,6 +396,54 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue,
queue->is_small = (queue->page_size != 0);
}
+/* needs to be called with cq->spinlock held */
+void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq)
+{
+ struct list_head *list, *node;
+
+ /* TODO: support low latency QPs */
+ if (qp->ext_type == EQPT_LLQP)
+ return;
+
+ if (on_sq) {
+ list = &qp->send_cq->sqp_err_list;
+ node = &qp->sq_err_node;
+ } else {
+ list = &qp->recv_cq->rqp_err_list;
+ node = &qp->rq_err_node;
+ }
+
+ if (list_empty(node))
+ list_add_tail(node, list);
+
+ return;
+}
+
+static void del_from_err_list(struct ehca_cq *cq, struct list_head *node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->spinlock, flags);
+
+ if (!list_empty(node))
+ list_del_init(node);
+
+ spin_unlock_irqrestore(&cq->spinlock, flags);
+}
+
+static void reset_queue_map(struct ehca_queue_map *qmap)
+{
+ int i;
+
+ qmap->tail = qmap->entries - 1;
+ qmap->left_to_poll = 0;
+ qmap->next_wqe_idx = 0;
+ for (i = 0; i < qmap->entries; i++) {
+ qmap->map[i].reported = 1;
+ qmap->map[i].cqe_req = 0;
+ }
+}
+
/*
* Create an ib_qp struct that is either a QP or an SRQ, depending on
* the value of the is_srq parameter. If init_attr and srq_init_attr share
@@ -407,13 +455,13 @@ static struct ehca_qp *internal_create_qp(
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata, int is_srq)
{
- struct ehca_qp *my_qp;
+ struct ehca_qp *my_qp, *my_srq = NULL;
struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
ib_device);
struct ib_ucontext *context = NULL;
u64 h_ret;
- int is_llqp = 0, has_srq = 0;
+ int is_llqp = 0, has_srq = 0, is_user = 0;
int qp_type, max_send_sge, max_recv_sge, ret;
/* h_call's out parameters */
@@ -421,9 +469,9 @@ static struct ehca_qp *internal_create_qp(
u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
unsigned long flags;
- if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) {
+ if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) {
ehca_err(pd->device, "Unable to create QP, max number of %i "
- "QPs reached.", ehca_max_qp);
+ "QPs reached.", shca->max_num_qps);
ehca_err(pd->device, "To increase the maximum number of QPs "
"use the number_of_qps module parameter.\n");
return ERR_PTR(-ENOSPC);
@@ -456,8 +504,13 @@ static struct ehca_qp *internal_create_qp(
/* handle SRQ base QPs */
if (init_attr->srq) {
- struct ehca_qp *my_srq =
- container_of(init_attr->srq, struct ehca_qp, ib_srq);
+ my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq);
+
+ if (qp_type == IB_QPT_UC) {
+ ehca_err(pd->device, "UC with SRQ not supported");
+ atomic_dec(&shca->num_qps);
+ return ERR_PTR(-EINVAL);
+ }
has_srq = 1;
parms.ext_type = EQPT_SRQBASE;
@@ -556,9 +609,6 @@ static struct ehca_qp *internal_create_qp(
}
}
- if (pd->uobject && udata)
- context = pd->uobject->context;
-
my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
if (!my_qp) {
ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
@@ -566,6 +616,11 @@ static struct ehca_qp *internal_create_qp(
return ERR_PTR(-ENOMEM);
}
+ if (pd->uobject && udata) {
+ is_user = 1;
+ context = pd->uobject->context;
+ }
+
atomic_set(&my_qp->nr_events, 0);
init_waitqueue_head(&my_qp->wait_completion);
spin_lock_init(&my_qp->spinlock_s);
@@ -581,30 +636,26 @@ static struct ehca_qp *internal_create_qp(
my_qp->send_cq =
container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
- do {
- if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
- ret = -ENOMEM;
- ehca_err(pd->device, "Can't reserve idr resources.");
- goto create_qp_exit0;
- }
+ idr_preload(GFP_KERNEL);
+ write_lock_irqsave(&ehca_qp_idr_lock, flags);
- write_lock_irqsave(&ehca_qp_idr_lock, flags);
- ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
- write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
- } while (ret == -EAGAIN);
+ ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT);
+ if (ret >= 0)
+ my_qp->token = ret;
- if (ret) {
- ret = -ENOMEM;
- ehca_err(pd->device, "Can't allocate new idr entry.");
+ write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ idr_preload_end();
+ if (ret < 0) {
+ if (ret == -ENOSPC) {
+ ret = -EINVAL;
+ ehca_err(pd->device, "Invalid number of qp");
+ } else {
+ ret = -ENOMEM;
+ ehca_err(pd->device, "Can't allocate new idr entry.");
+ }
goto create_qp_exit0;
}
- if (my_qp->token > 0x1FFFFFF) {
- ret = -EINVAL;
- ehca_err(pd->device, "Invalid number of qp");
- goto create_qp_exit1;
- }
-
if (has_srq)
parms.srq_token = my_qp->token;
@@ -654,9 +705,9 @@ static struct ehca_qp *internal_create_qp(
(parms.squeue.is_small || parms.rqueue.is_small);
}
- h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
+ h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user);
if (h_ret != H_SUCCESS) {
- ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
+ ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli",
h_ret);
ret = ehca2ib_return_code(h_ret);
goto create_qp_exit1;
@@ -715,6 +766,21 @@ static struct ehca_qp *internal_create_qp(
"and pages ret=%i", ret);
goto create_qp_exit2;
}
+
+ if (!is_user) {
+ my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length /
+ my_qp->ipz_squeue.qe_size;
+ my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries *
+ sizeof(struct ehca_qmap_entry));
+ if (!my_qp->sq_map.map) {
+ ehca_err(pd->device, "Couldn't allocate squeue "
+ "map ret=%i", ret);
+ goto create_qp_exit3;
+ }
+ INIT_LIST_HEAD(&my_qp->sq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->sq_map);
+ }
}
if (HAS_RQ(my_qp)) {
@@ -724,8 +790,28 @@ static struct ehca_qp *internal_create_qp(
if (ret) {
ehca_err(pd->device, "Couldn't initialize rqueue "
"and pages ret=%i", ret);
- goto create_qp_exit3;
+ goto create_qp_exit4;
+ }
+ if (!is_user) {
+ my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length /
+ my_qp->ipz_rqueue.qe_size;
+ my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries *
+ sizeof(struct ehca_qmap_entry));
+ if (!my_qp->rq_map.map) {
+ ehca_err(pd->device, "Couldn't allocate squeue "
+ "map ret=%i", ret);
+ goto create_qp_exit5;
+ }
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+ /* to avoid the generation of bogus flush CQEs */
+ reset_queue_map(&my_qp->rq_map);
}
+ } else if (init_attr->srq && !is_user) {
+ /* this is a base QP, use the queue map of the SRQ */
+ my_qp->rq_map = my_srq->rq_map;
+ INIT_LIST_HEAD(&my_qp->rq_err_node);
+
+ my_qp->ipz_rqueue = my_srq->ipz_rqueue;
}
if (is_srq) {
@@ -770,7 +856,7 @@ static struct ehca_qp *internal_create_qp(
if (!my_qp->mod_qp_parm) {
ehca_err(pd->device,
"Could not alloc mod_qp_parm");
- goto create_qp_exit4;
+ goto create_qp_exit5;
}
}
}
@@ -779,8 +865,13 @@ static struct ehca_qp *internal_create_qp(
if (qp_type == IB_QPT_GSI) {
h_ret = ehca_define_sqp(shca, my_qp, init_attr);
if (h_ret != H_SUCCESS) {
+ kfree(my_qp->mod_qp_parm);
+ my_qp->mod_qp_parm = NULL;
+ /* the QP pointer is no longer valid */
+ shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
+ NULL;
ret = ehca2ib_return_code(h_ret);
- goto create_qp_exit5;
+ goto create_qp_exit6;
}
}
@@ -789,7 +880,7 @@ static struct ehca_qp *internal_create_qp(
if (ret) {
ehca_err(pd->device,
"Couldn't assign qp to send_cq ret=%i", ret);
- goto create_qp_exit5;
+ goto create_qp_exit7;
}
}
@@ -815,22 +906,30 @@ static struct ehca_qp *internal_create_qp(
if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
ehca_err(pd->device, "Copy to udata failed");
ret = -EINVAL;
- goto create_qp_exit6;
+ goto create_qp_exit8;
}
}
return my_qp;
-create_qp_exit6:
+create_qp_exit8:
ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
-create_qp_exit5:
+create_qp_exit7:
kfree(my_qp->mod_qp_parm);
-create_qp_exit4:
+create_qp_exit6:
+ if (HAS_RQ(my_qp) && !is_user)
+ vfree(my_qp->rq_map.map);
+
+create_qp_exit5:
if (HAS_RQ(my_qp))
ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+create_qp_exit4:
+ if (HAS_SQ(my_qp) && !is_user)
+ vfree(my_qp->sq_map.map);
+
create_qp_exit3:
if (HAS_SQ(my_qp))
ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
@@ -874,6 +973,9 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
struct hcp_modify_qp_control_block *mqpcb;
u64 hret, update_mask;
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC)
+ return ERR_PTR(-ENOSYS);
+
/* For common attributes, internal_create_qp() takes its info
* out of qp_init_attr, so copy all common attrs there.
*/
@@ -912,7 +1014,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
mqpcb, my_qp->galpas.kernel);
if (hret != H_SUCCESS) {
ehca_err(pd->device, "Could not modify SRQ to INIT "
- "ehca_qp=%p qp_num=%x h_ret=%li",
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, my_qp->real_qp_num, hret);
goto create_srq2;
}
@@ -926,7 +1028,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
mqpcb, my_qp->galpas.kernel);
if (hret != H_SUCCESS) {
ehca_err(pd->device, "Could not enable SRQ "
- "ehca_qp=%p qp_num=%x h_ret=%li",
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, my_qp->real_qp_num, hret);
goto create_srq2;
}
@@ -940,7 +1042,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
mqpcb, my_qp->galpas.kernel);
if (hret != H_SUCCESS) {
ehca_err(pd->device, "Could not modify SRQ to RTR "
- "ehca_qp=%p qp_num=%x h_ret=%li",
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, my_qp->real_qp_num, hret);
goto create_srq2;
}
@@ -980,7 +1082,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
&bad_send_wqe_p, NULL, 2);
if (h_ret != H_SUCCESS) {
ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
- " ehca_qp=%p qp_num=%x h_ret=%li",
+ " ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, qp_num, h_ret);
return ehca2ib_return_code(h_ret);
}
@@ -988,7 +1090,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
qp_num, bad_send_wqe_p);
/* convert wqe pointer to vadr */
- bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
+ bad_send_wqe_v = __va((u64)bad_send_wqe_p);
if (ehca_debug_level >= 2)
ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
squeue = &my_qp->ipz_squeue;
@@ -1021,6 +1123,111 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
return 0;
}
+static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
+ struct ehca_queue_map *qmap)
+{
+ void *wqe_v;
+ u64 q_ofs;
+ u32 wqe_idx;
+ unsigned int tail_idx;
+
+ /* convert real to abs address */
+ wqe_p = wqe_p & (~(1UL << 63));
+
+ wqe_v = __va(wqe_p);
+
+ if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
+ ehca_gen_err("Invalid offset for calculating left cqes "
+ "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v);
+ return -EFAULT;
+ }
+
+ tail_idx = next_index(qmap->tail, qmap->entries);
+ wqe_idx = q_ofs / ipz_queue->qe_size;
+
+ /* check all processed wqes, whether a cqe is requested or not */
+ while (tail_idx != wqe_idx) {
+ if (qmap->map[tail_idx].cqe_req)
+ qmap->left_to_poll++;
+ tail_idx = next_index(tail_idx, qmap->entries);
+ }
+ /* save index in queue, where we have to start flushing */
+ qmap->next_wqe_idx = wqe_idx;
+ return 0;
+}
+
+static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca)
+{
+ u64 h_ret;
+ void *send_wqe_p, *recv_wqe_p;
+ int ret;
+ unsigned long flags;
+ int qp_num = my_qp->ib_qp.qp_num;
+
+ /* this hcall is not supported on base QPs */
+ if (my_qp->ext_type != EQPT_SRQBASE) {
+ /* get send and receive wqe pointer */
+ h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
+ my_qp->ipz_qp_handle, &my_qp->pf,
+ &send_wqe_p, &recv_wqe_p, 4);
+ if (h_ret != H_SUCCESS) {
+ ehca_err(&shca->ib_device, "disable_and_get_wqe() "
+ "failed ehca_qp=%p qp_num=%x h_ret=%lli",
+ my_qp, qp_num, h_ret);
+ return ehca2ib_return_code(h_ret);
+ }
+
+ /*
+ * acquire lock to ensure that nobody is polling the cq which
+ * could mean that the qmap->tail pointer is in an
+ * inconsistent state.
+ */
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue,
+ &my_qp->sq_map);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+ if (ret)
+ return ret;
+
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue,
+ &my_qp->rq_map);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ if (ret)
+ return ret;
+ } else {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ my_qp->sq_map.left_to_poll = 0;
+ my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+ my_qp->sq_map.entries);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ my_qp->rq_map.left_to_poll = 0;
+ my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+ my_qp->rq_map.entries);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
+ }
+
+ /* this assures flush cqes being generated only for pending wqes */
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 1);
+ spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
+
+ if (HAS_RQ(my_qp)) {
+ spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
+ ehca_add_to_err_list(my_qp, 0);
+ spin_unlock_irqrestore(&my_qp->recv_cq->spinlock,
+ flags);
+ }
+ }
+
+ return 0;
+}
+
/*
* internal_modify_qp with circumvention to handle aqp0 properly
* smi_reset2init indicates if this is an internal reset-to-init-call for
@@ -1041,6 +1248,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
u64 update_mask;
u64 h_ret;
int bad_wqe_cnt = 0;
+ int is_user = 0;
int squeue_locked = 0;
unsigned long flags = 0;
@@ -1058,11 +1266,13 @@ static int internal_modify_qp(struct ib_qp *ibqp,
mqpcb, my_qp->galpas.kernel);
if (h_ret != H_SUCCESS) {
ehca_err(ibqp->device, "hipz_h_query_qp() failed "
- "ehca_qp=%p qp_num=%x h_ret=%li",
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, ibqp->qp_num, h_ret);
ret = ehca2ib_return_code(h_ret);
goto modify_qp_exit1;
}
+ if (ibqp->uobject)
+ is_user = 1;
qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
@@ -1119,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
if (!smi_reset2init &&
!ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
- attr_mask)) {
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
ret = -EINVAL;
ehca_err(ibqp->device,
"Invalid qp transition new_state=%x cur_state=%x "
@@ -1460,6 +1670,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
goto modify_qp_exit2;
}
mqpcb->path_migration_state = attr->path_mig_state + 1;
+ if (attr->path_mig_state == IB_MIG_REARM)
+ my_qp->mig_armed = 1;
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
}
@@ -1485,7 +1697,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
- ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%li "
+ ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli "
"ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
@@ -1518,22 +1730,39 @@ static int internal_modify_qp(struct ib_qp *ibqp,
ret = ehca2ib_return_code(h_ret);
ehca_err(ibqp->device, "ENABLE in context of "
"RESET_2_INIT failed! Maybe you didn't get "
- "a LID h_ret=%li ehca_qp=%p qp_num=%x",
+ "a LID h_ret=%lli ehca_qp=%p qp_num=%x",
h_ret, my_qp, ibqp->qp_num);
goto modify_qp_exit2;
}
}
+ if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)
+ && !is_user) {
+ ret = check_for_left_cqes(my_qp, shca);
+ if (ret)
+ goto modify_qp_exit2;
+ }
if (statetrans == IB_QPST_ANY2RESET) {
ipz_qeit_reset(&my_qp->ipz_rqueue);
ipz_qeit_reset(&my_qp->ipz_squeue);
+
+ if (qp_cur_state == IB_QPS_ERR && !is_user) {
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
+ if (HAS_RQ(my_qp))
+ del_from_err_list(my_qp->recv_cq,
+ &my_qp->rq_err_node);
+ }
+ if (!is_user)
+ reset_queue_map(&my_qp->sq_map);
+
+ if (HAS_RQ(my_qp) && !is_user)
+ reset_queue_map(&my_qp->rq_map);
}
if (attr_mask & IB_QP_QKEY)
my_qp->qkey = attr->qkey;
- my_qp->state = qp_new_state;
-
modify_qp_exit2:
if (squeue_locked) { /* this means: sqe -> rts */
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
@@ -1549,6 +1778,8 @@ modify_qp_exit1:
int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata)
{
+ int ret = 0;
+
struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
ib_device);
struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
@@ -1595,12 +1826,18 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
attr->qp_state, my_qp->init_attr.port_num,
ibqp->qp_type);
spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
- return 0;
+ goto out;
}
spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
}
- return internal_modify_qp(ibqp, attr, attr_mask, 0);
+ ret = internal_modify_qp(ibqp, attr, attr_mask, 0);
+
+out:
+ if ((ret == 0) && (attr_mask & IB_QP_STATE))
+ my_qp->state = attr->qp_state;
+
+ return ret;
}
void ehca_recover_sqp(struct ib_qp *sqp)
@@ -1681,7 +1918,7 @@ int ehca_query_qp(struct ib_qp *qp,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
ehca_err(qp->device, "hipz_h_query_qp() failed "
- "ehca_qp=%p qp_num=%x h_ret=%li",
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, qp->qp_num, h_ret);
goto query_qp_exit1;
}
@@ -1724,19 +1961,13 @@ int ehca_query_qp(struct ib_qp *qp,
qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
qp_attr->dest_qp_num = qpcb->dest_qp_nr;
- qp_attr->pkey_index =
- EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
-
- qp_attr->port_num =
- EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
-
+ qp_attr->pkey_index = qpcb->prim_p_key_idx;
+ qp_attr->port_num = qpcb->prim_phys_port;
qp_attr->timeout = qpcb->timeout;
qp_attr->retry_cnt = qpcb->retry_count;
qp_attr->rnr_retry = qpcb->rnr_retry_count;
- qp_attr->alt_pkey_index =
- EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
-
+ qp_attr->alt_pkey_index = qpcb->alt_p_key_idx;
qp_attr->alt_port_num = qpcb->alt_phys_port;
qp_attr->alt_timeout = qpcb->timeout_al;
@@ -1823,8 +2054,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
update_mask |=
EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
| EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
- mqpcb->curr_srq_limit =
- EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit);
+ mqpcb->curr_srq_limit = attr->srq_limit;
mqpcb->qp_aff_asyn_ev_log_reg =
EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
}
@@ -1846,7 +2076,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
- ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%li "
+ ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli "
"ehca_qp=%p qp_num=%x",
h_ret, my_qp, my_qp->real_qp_num);
}
@@ -1880,15 +2110,14 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
if (h_ret != H_SUCCESS) {
ret = ehca2ib_return_code(h_ret);
ehca_err(srq->device, "hipz_h_query_qp() failed "
- "ehca_qp=%p qp_num=%x h_ret=%li",
+ "ehca_qp=%p qp_num=%x h_ret=%lli",
my_qp, my_qp->real_qp_num, h_ret);
goto query_srq_exit1;
}
srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
srq_attr->max_sge = 3;
- srq_attr->srq_limit = EHCA_BMASK_GET(
- MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
+ srq_attr->srq_limit = qpcb->curr_srq_limit;
if (ehca_debug_level >= 2)
ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
@@ -1910,10 +2139,12 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
int ret;
u64 h_ret;
u8 port_num;
+ int is_user = 0;
enum ib_qp_type qp_type;
unsigned long flags;
if (uobject) {
+ is_user = 1;
if (my_qp->mm_count_galpa ||
my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
ehca_err(dev, "Resources still referenced in "
@@ -1936,12 +2167,22 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
idr_remove(&ehca_qp_idr, my_qp->token);
write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ /*
+ * SRQs will never get into an error list and do not have a recv_cq,
+ * so we need to skip them here.
+ */
+ if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user)
+ del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node);
+
+ if (HAS_SQ(my_qp) && !is_user)
+ del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node);
+
/* now wait until all pending events have completed */
wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
if (h_ret != H_SUCCESS) {
- ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
+ ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli "
"ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
return ehca2ib_return_code(h_ret);
}
@@ -1961,7 +2202,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
if (qp_type == IB_QPT_GSI) {
struct ib_event event;
ehca_info(dev, "device %s: port %x is inactive.",
- shca->ib_device.name, port_num);
+ shca->ib_device.name, port_num);
event.device = &shca->ib_device;
event.event = IB_EVENT_PORT_ERR;
event.element.port_num = port_num;
@@ -1969,10 +2210,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
ib_dispatch_event(&event);
}
- if (HAS_RQ(my_qp))
+ if (HAS_RQ(my_qp)) {
ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
- if (HAS_SQ(my_qp))
+ if (!is_user)
+ vfree(my_qp->rq_map.map);
+ }
+ if (HAS_SQ(my_qp)) {
ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
+ if (!is_user)
+ vfree(my_qp->sq_map.map);
+ }
kmem_cache_free(qp_cache, my_qp);
atomic_dec(&shca->num_qps);
return 0;
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index dd9bc68f1c7..47f94984353 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -42,7 +42,6 @@
*/
-#include <asm-powerpc/system.h>
#include "ehca_classes.h"
#include "ehca_tools.h"
#include "ehca_qes.h"
@@ -53,9 +52,25 @@
/* in RC traffic, insert an empty RDMA READ every this many packets */
#define ACK_CIRC_THRESHOLD 2000000
+static u64 replace_wr_id(u64 wr_id, u16 idx)
+{
+ u64 ret;
+
+ ret = wr_id & ~QMAP_IDX_MASK;
+ ret |= idx & QMAP_IDX_MASK;
+
+ return ret;
+}
+
+static u16 get_app_wr_id(u64 wr_id)
+{
+ return wr_id & QMAP_IDX_MASK;
+}
+
static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
struct ehca_wqe *wqe_p,
- struct ib_recv_wr *recv_wr)
+ struct ib_recv_wr *recv_wr,
+ u32 rq_map_idx)
{
u8 cnt_ds;
if (unlikely((recv_wr->num_sge < 0) ||
@@ -69,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = recv_wr->wr_id;
+ wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx);
wqe_p->nr_of_data_seg = recv_wr->num_sge;
for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
@@ -120,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
mad_hdr->attr_mod);
}
for (j = 0; j < send_wr->num_sge; j++) {
- u8 *data = (u8 *)abs_to_virt(sge->addr);
+ u8 *data = __va(sge->addr);
ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
"lkey=%x",
idx, j, data, sge->length, sge->lkey);
@@ -139,12 +154,14 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
static inline int ehca_write_swqe(struct ehca_qp *qp,
struct ehca_wqe *wqe_p,
const struct ib_send_wr *send_wr,
+ u32 sq_map_idx,
int hidden)
{
u32 idx;
u64 dma_length;
struct ehca_av *my_av;
u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+ struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
if (unlikely((send_wr->num_sge < 0) ||
(send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
@@ -157,7 +174,11 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* clear wqe header until sglist */
memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
- wqe_p->work_request_id = send_wr->wr_id;
+ wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx);
+
+ qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 0;
switch (send_wr->opcode) {
case IB_WR_SEND:
@@ -182,8 +203,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
if ((send_wr->send_flags & IB_SEND_SIGNALED ||
qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
- && !hidden)
+ && !hidden) {
wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+ qmap_entry->cqe_req = 1;
+ }
if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
@@ -245,7 +268,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
/* no break is intentional here */
case IB_QPT_RC:
/* TODO: atomic not implemented */
- wqe_p->u.nud.remote_virtual_adress =
+ wqe_p->u.nud.remote_virtual_address =
send_wr->wr.rdma.remote_addr;
wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
@@ -376,33 +399,36 @@ static inline void map_ib_wc_status(u32 cqe_status,
static inline int post_one_send(struct ehca_qp *my_qp,
struct ib_send_wr *cur_send_wr,
- struct ib_send_wr **bad_send_wr,
int hidden)
{
struct ehca_wqe *wqe_p;
int ret;
+ u32 sq_map_idx;
u64 start_offset = my_qp->ipz_squeue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -ENOMEM;
}
+
+ /*
+ * Get the index of the WQE in the send queue. The same index is used
+ * for writing into the sq_map.
+ */
+ sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size;
+
/* write a SEND WQE into the QUEUE */
- ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden);
+ ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden);
/*
* if something failed,
* reset the free entry pointer to the start value
*/
if (unlikely(ret)) {
my_qp->ipz_squeue.current_q_offset = start_offset;
- if (bad_send_wr)
- *bad_send_wr = cur_send_wr;
ehca_err(my_qp->ib_qp.device, "Could not write WQE "
"qp_num=%x", my_qp->ib_qp.qp_num);
return -EINVAL;
@@ -416,7 +442,6 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr **bad_send_wr)
{
struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
- struct ib_send_wr *cur_send_wr;
int wqe_cnt = 0;
int ret = 0;
unsigned long flags;
@@ -425,7 +450,8 @@ int ehca_post_send(struct ib_qp *qp,
if (unlikely(my_qp->state < IB_QPS_RTS)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/* LOCK the QUEUE */
@@ -444,24 +470,21 @@ int ehca_post_send(struct ib_qp *qp,
struct ib_send_wr circ_wr;
memset(&circ_wr, 0, sizeof(circ_wr));
circ_wr.opcode = IB_WR_RDMA_READ;
- post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
+ post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
wqe_cnt++;
ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
my_qp->message_count = my_qp->packet_count = 0;
}
/* loop processes list of send reqs */
- for (cur_send_wr = send_wr; cur_send_wr != NULL;
- cur_send_wr = cur_send_wr->next) {
- ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+ while (send_wr) {
+ ret = post_one_send(my_qp, send_wr, 0);
if (unlikely(ret)) {
- /* if one or more WQEs were successful, don't fail */
- if (wqe_cnt)
- ret = 0;
goto post_send_exit0;
}
wqe_cnt++;
- } /* eof for cur_send_wr */
+ send_wr = send_wr->next;
+ }
post_send_exit0:
iosync(); /* serialize GAL register access */
@@ -471,6 +494,10 @@ post_send_exit0:
my_qp, qp->qp_num, wqe_cnt, ret);
my_qp->message_count += wqe_cnt;
spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+
+out:
+ if (ret)
+ *bad_send_wr = send_wr;
return ret;
}
@@ -479,56 +506,64 @@ static int internal_post_recv(struct ehca_qp *my_qp,
struct ib_recv_wr *recv_wr,
struct ib_recv_wr **bad_recv_wr)
{
- struct ib_recv_wr *cur_recv_wr;
struct ehca_wqe *wqe_p;
int wqe_cnt = 0;
int ret = 0;
+ u32 rq_map_idx;
unsigned long flags;
+ struct ehca_qmap_entry *qmap_entry;
if (unlikely(!HAS_RQ(my_qp))) {
ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d",
my_qp, my_qp->real_qp_num, my_qp->ext_type);
- return -ENODEV;
+ ret = -ENODEV;
+ goto out;
}
/* LOCK the QUEUE */
spin_lock_irqsave(&my_qp->spinlock_r, flags);
- /* loop processes list of send reqs */
- for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
- cur_recv_wr = cur_recv_wr->next) {
+ /* loop processes list of recv reqs */
+ while (recv_wr) {
u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
/* get pointer next to free WQE */
wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
if (unlikely(!wqe_p)) {
/* too many posted work requests: queue overflow */
- if (bad_recv_wr)
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -ENOMEM;
- ehca_err(dev, "Too many posted WQEs "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -ENOMEM;
+ ehca_err(dev, "Too many posted WQEs "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
+ /*
+ * Get the index of the WQE in the recv queue. The same index
+ * is used for writing into the rq_map.
+ */
+ rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
+
/* write a RECV WQE into the QUEUE */
- ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
+ ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
+ rq_map_idx);
/*
* if something failed,
* reset the free entry pointer to the start value
*/
if (unlikely(ret)) {
my_qp->ipz_rqueue.current_q_offset = start_offset;
- *bad_recv_wr = cur_recv_wr;
- if (wqe_cnt == 0) {
- ret = -EINVAL;
- ehca_err(dev, "Could not write WQE "
- "qp_num=%x", my_qp->real_qp_num);
- }
+ ret = -EINVAL;
+ ehca_err(dev, "Could not write WQE "
+ "qp_num=%x", my_qp->real_qp_num);
goto post_recv_exit0;
}
+
+ qmap_entry = &my_qp->rq_map.map[rq_map_idx];
+ qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
+ qmap_entry->reported = 0;
+ qmap_entry->cqe_req = 1;
+
wqe_cnt++;
- } /* eof for cur_recv_wr */
+ recv_wr = recv_wr->next;
+ } /* eof for recv_wr */
post_recv_exit0:
iosync(); /* serialize GAL register access */
@@ -537,6 +572,11 @@ post_recv_exit0:
ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
my_qp, my_qp->real_qp_num, wqe_cnt, ret);
spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+
+out:
+ if (ret)
+ *bad_recv_wr = recv_wr;
+
return ret;
}
@@ -550,6 +590,7 @@ int ehca_post_recv(struct ib_qp *qp,
if (unlikely(my_qp->state == IB_QPS_RESET)) {
ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x",
my_qp->state, qp->qp_num);
+ *bad_recv_wr = recv_wr;
return -EINVAL;
}
@@ -583,13 +624,15 @@ static const u8 ib_wc_opcode[255] = {
/* internal function to poll one entry of cq */
static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
{
- int ret = 0;
+ int ret = 0, qmap_tail_idx;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
struct ehca_cqe *cqe;
struct ehca_qp *my_qp;
+ struct ehca_qmap_entry *qmap_entry;
+ struct ehca_queue_map *qmap;
int cqe_count = 0, is_error;
-poll_cq_one_read_cqe:
+repoll:
cqe = (struct ehca_cqe *)
ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
if (!cqe) {
@@ -617,7 +660,7 @@ poll_cq_one_read_cqe:
ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
my_cq->cq_number, cqe->local_qp_number);
/* ignore this purged cqe */
- goto poll_cq_one_read_cqe;
+ goto repoll;
}
spin_lock_irqsave(&qp->spinlock_s, flags);
purgeflag = qp->sqerr_purgeflag;
@@ -636,7 +679,7 @@ poll_cq_one_read_cqe:
* that caused sqe and turn off purge flag
*/
qp->sqerr_purgeflag = 0;
- goto poll_cq_one_read_cqe;
+ goto repoll;
}
}
@@ -654,8 +697,62 @@ poll_cq_one_read_cqe:
my_cq, my_cq->cq_number);
}
- /* we got a completion! */
- wc->wr_id = cqe->work_request_id;
+ read_lock(&ehca_qp_idr_lock);
+ my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
+ read_unlock(&ehca_qp_idr_lock);
+ if (!my_qp)
+ goto repoll;
+ wc->qp = &my_qp->ib_qp;
+
+ qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+ if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+ /* We got a send completion. */
+ qmap = &my_qp->sq_map;
+ else
+ /* We got a receive completion. */
+ qmap = &my_qp->rq_map;
+
+ /* advance the tail pointer */
+ qmap->tail = qmap_tail_idx;
+
+ if (is_error) {
+ /*
+ * set left_to_poll to 0 because in error state, we will not
+ * get any additional CQEs
+ */
+ my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+ my_qp->sq_map.entries);
+ my_qp->sq_map.left_to_poll = 0;
+ ehca_add_to_err_list(my_qp, 1);
+
+ my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+ my_qp->rq_map.entries);
+ my_qp->rq_map.left_to_poll = 0;
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+
+ qmap_entry = &qmap->map[qmap_tail_idx];
+ if (qmap_entry->reported) {
+ ehca_warn(cq->device, "Double cqe on qp_num=%#x",
+ my_qp->real_qp_num);
+ /* found a double cqe, discard it and read next one */
+ goto repoll;
+ }
+
+ wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
+ qmap_entry->reported = 1;
+
+ /* if left_to_poll is decremented to 0, add the QP to the error list */
+ if (qmap->left_to_poll > 0) {
+ qmap->left_to_poll--;
+ if ((my_qp->sq_map.left_to_poll == 0) &&
+ (my_qp->rq_map.left_to_poll == 0)) {
+ ehca_add_to_err_list(my_qp, 1);
+ if (HAS_RQ(my_qp))
+ ehca_add_to_err_list(my_qp, 0);
+ }
+ }
/* eval ib_wc_opcode */
wc->opcode = ib_wc_opcode[cqe->optype]-1;
@@ -667,7 +764,7 @@ poll_cq_one_read_cqe:
ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
my_cq, my_cq->cq_number);
/* update also queue adder to throw away this entry!!! */
- goto poll_cq_one_exit0;
+ goto repoll;
}
/* eval ib_wc_status */
@@ -678,17 +775,16 @@ poll_cq_one_read_cqe:
} else
wc->status = IB_WC_SUCCESS;
- read_lock(&ehca_qp_idr_lock);
- my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
- wc->qp = &my_qp->ib_qp;
- read_unlock(&ehca_qp_idr_lock);
-
wc->byte_len = cqe->nr_bytes_transferred;
wc->pkey_index = cqe->pkey_index;
wc->slid = cqe->rlid;
wc->dlid_path_bits = cqe->dlid;
wc->src_qp = cqe->remote_qp_number;
- wc->wc_flags = cqe->w_completion_flags;
+ /*
+ * HW has "Immed data present" and "GRH present" in bits 6 and 5.
+ * SW defines those in bits 1 and 0, so we can just shift and mask.
+ */
+ wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
wc->sl = cqe->service_level;
@@ -699,13 +795,89 @@ poll_cq_one_exit0:
return ret;
}
+static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq,
+ struct ib_wc *wc, int num_entries,
+ struct ipz_queue *ipz_queue, int on_sq)
+{
+ int nr = 0;
+ struct ehca_wqe *wqe;
+ u64 offset;
+ struct ehca_queue_map *qmap;
+ struct ehca_qmap_entry *qmap_entry;
+
+ if (on_sq)
+ qmap = &my_qp->sq_map;
+ else
+ qmap = &my_qp->rq_map;
+
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ while ((nr < num_entries) && (qmap_entry->reported == 0)) {
+ /* generate flush CQE */
+
+ memset(wc, 0, sizeof(*wc));
+
+ offset = qmap->next_wqe_idx * ipz_queue->qe_size;
+ wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
+ if (!wqe) {
+ ehca_err(cq->device, "Invalid wqe offset=%#llx on "
+ "qp_num=%#x", offset, my_qp->real_qp_num);
+ return nr;
+ }
+
+ wc->wr_id = replace_wr_id(wqe->work_request_id,
+ qmap_entry->app_wr_id);
+
+ if (on_sq) {
+ switch (wqe->optype) {
+ case WQE_OPTYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case WQE_OPTYPE_RDMAWRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case WQE_OPTYPE_RDMAREAD:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ default:
+ ehca_err(cq->device, "Invalid optype=%x",
+ wqe->optype);
+ return nr;
+ }
+ } else
+ wc->opcode = IB_WC_RECV;
+
+ if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) {
+ wc->ex.imm_data = wqe->immediate_data;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ }
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+
+ wc->qp = &my_qp->ib_qp;
+
+ /* mark as reported and advance next_wqe pointer */
+ qmap_entry->reported = 1;
+ qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
+ qmap->entries);
+ qmap_entry = &qmap->map[qmap->next_wqe_idx];
+
+ wc++; nr++;
+ }
+
+ return nr;
+
+}
+
int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
{
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int nr;
+ struct ehca_qp *err_qp;
struct ib_wc *current_wc = wc;
int ret = 0;
unsigned long flags;
+ int entries_left = num_entries;
if (num_entries < 1) {
ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -715,15 +887,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
}
spin_lock_irqsave(&my_cq->spinlock, flags);
- for (nr = 0; nr < num_entries; nr++) {
+
+ /* generate flush cqes for send queues */
+ list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_squeue, 1);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ /* generate flush cqes for receive queues */
+ list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) {
+ nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left,
+ &err_qp->ipz_rqueue, 0);
+ entries_left -= nr;
+ current_wc += nr;
+
+ if (entries_left == 0)
+ break;
+ }
+
+ for (nr = 0; nr < entries_left; nr++) {
ret = ehca_poll_cq_one(cq, current_wc);
if (ret)
break;
current_wc++;
} /* eof for nr */
+ entries_left -= nr;
+
spin_unlock_irqrestore(&my_cq->spinlock, flags);
if (ret == -EAGAIN || !ret)
- ret = nr;
+ ret = num_entries - entries_left;
poll_cq_exit0:
return ret;
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index 706d97ad555..dba8f9f8b99 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -46,11 +46,11 @@
#include "ehca_iverbs.h"
#include "hcp_if.h"
-#define IB_MAD_STATUS_REDIRECT __constant_htons(0x0002)
-#define IB_MAD_STATUS_UNSUP_VERSION __constant_htons(0x0004)
-#define IB_MAD_STATUS_UNSUP_METHOD __constant_htons(0x0008)
+#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002)
+#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004)
+#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008)
-#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
+#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
/**
* ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
@@ -85,7 +85,7 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
if (ret != H_SUCCESS) {
ehca_err(&shca->ib_device,
- "Can't define AQP1 for port %x. h_ret=%li",
+ "Can't define AQP1 for port %x. h_ret=%lli",
port, ret);
return ret;
}
@@ -125,14 +125,30 @@ struct ib_perf {
u8 data[192];
} __attribute__ ((packed));
+/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
+struct tcslfl {
+ u32 tc:8;
+ u32 sl:4;
+ u32 fl:20;
+} __attribute__ ((packed));
+
+/* IP Version/TC/FL packed into 32 bits, as in GRH */
+struct vertcfl {
+ u32 ver:4;
+ u32 tc:8;
+ u32 fl:20;
+} __attribute__ ((packed));
static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
struct ib_perf *in_perf = (struct ib_perf *)in_mad;
struct ib_perf *out_perf = (struct ib_perf *)out_mad;
struct ib_class_port_info *poi =
(struct ib_class_port_info *)out_perf->data;
+ struct tcslfl *tcslfl =
+ (struct tcslfl *)&poi->redirect_tcslfl;
struct ehca_shca *shca =
container_of(ibdev, struct ehca_shca, ib_device);
struct ehca_sport *sport = &shca->sport[port_num - 1];
@@ -158,10 +174,29 @@ static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
poi->base_version = 1;
poi->class_version = 1;
poi->resp_time_value = 18;
- poi->redirect_lid = sport->saved_attr.lid;
- poi->redirect_qp = sport->pma_qp_nr;
+
+ /* copy local routing information from WC where applicable */
+ tcslfl->sl = in_wc->sl;
+ poi->redirect_lid =
+ sport->saved_attr.lid | in_wc->dlid_path_bits;
+ poi->redirect_qp = sport->pma_qp_nr;
poi->redirect_qkey = IB_QP1_QKEY;
- poi->redirect_pkey = IB_DEFAULT_PKEY_FULL;
+
+ ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
+ &poi->redirect_pkey);
+
+ /* if request was globally routed, copy route info */
+ if (in_grh) {
+ struct vertcfl *vertcfl =
+ (struct vertcfl *)&in_grh->version_tclass_flow;
+ memcpy(poi->redirect_gid, in_grh->dgid.raw,
+ sizeof(poi->redirect_gid));
+ tcslfl->tc = vertcfl->tc;
+ tcslfl->fl = vertcfl->fl;
+ } else
+ /* else only fill in default GID */
+ ehca_query_gid(ibdev, port_num, 0,
+ (union ib_gid *)&poi->redirect_gid);
ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
sport->saved_attr.lid, sport->pma_qp_nr);
@@ -183,12 +218,11 @@ perf_reply:
int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
- struct ib_mad *in_mad,
- struct ib_mad *out_mad)
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
{
int ret;
- if (!port_num || port_num > ibdev->phys_port_cnt)
+ if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
return IB_MAD_RESULT_FAILURE;
/* accept only pma request */
@@ -196,7 +230,8 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS;
ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
- ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad);
+ ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
+ in_mad, out_mad);
return ret;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index ec950bf8c47..d280b12aae6 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -54,13 +54,11 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/vmalloc.h>
-#include <linux/version.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/device.h>
-#include <asm/atomic.h>
-#include <asm/abs_addr.h>
+#include <linux/atomic.h>
#include <asm/ibmebus.h>
#include <asm/io.h>
#include <asm/pgtable.h>
@@ -117,7 +115,7 @@ extern int ehca_debug_level;
unsigned char *deb = (unsigned char *)(adr); \
for (x = 0; x < l; x += 16) { \
printk(KERN_INFO "EHCA_DMP:%s " format \
- " adr=%p ofs=%04x %016lx %016lx\n", \
+ " adr=%p ofs=%04x %016llx %016llx\n", \
__func__, ##args, deb, x, \
*((u64 *)&deb[0]), *((u64 *)&deb[8])); \
deb += 16; \
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index e43ed8f8a0c..1a1d5d99fcf 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -40,6 +40,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+
#include "ehca_classes.h"
#include "ehca_iverbs.h"
#include "ehca_mrmw.h"
@@ -95,7 +97,7 @@ static void ehca_mm_close(struct vm_area_struct *vma)
vma->vm_start, vma->vm_end, *count);
}
-static struct vm_operations_struct vm_ops = {
+static const struct vm_operations_struct vm_ops = {
.open = ehca_mm_open,
.close = ehca_mm_close,
};
@@ -114,8 +116,8 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
physical = galpas->user.fw_handle;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
- /* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+ ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
+ /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
vma->vm_page_prot);
if (unlikely(ret)) {
@@ -137,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
u64 start, ofs;
struct page *page;
- vma->vm_flags |= VM_RESERVED;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
start = vma->vm_start;
for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 415d3a465de..89517ffb438 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -90,26 +90,6 @@
static DEFINE_SPINLOCK(hcall_lock);
-static u32 get_longbusy_msecs(int longbusy_rc)
-{
- switch (longbusy_rc) {
- case H_LONG_BUSY_ORDER_1_MSEC:
- return 1;
- case H_LONG_BUSY_ORDER_10_MSEC:
- return 10;
- case H_LONG_BUSY_ORDER_100_MSEC:
- return 100;
- case H_LONG_BUSY_ORDER_1_SEC:
- return 1000;
- case H_LONG_BUSY_ORDER_10_SEC:
- return 10000;
- case H_LONG_BUSY_ORDER_100_SEC:
- return 100000;
- default:
- return 1;
- }
-}
-
static long ehca_plpar_hcall_norets(unsigned long opcode,
unsigned long arg1,
unsigned long arg2,
@@ -226,7 +206,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
u32 *eq_ist)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
u64 allocate_controls;
/* resource type */
@@ -249,7 +229,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
*eq_ist = (u32)outs[5];
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resource - ret=%li ", ret);
+ ehca_gen_err("Not enough resource - ret=%lli ", ret);
return ret;
}
@@ -269,8 +249,9 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
struct ehca_cq *cq,
struct ehca_alloc_cq_parms *param)
{
+ int rc;
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
@@ -283,21 +264,33 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
param->act_nr_of_entries = (u32)outs[3];
param->act_pages = (u32)outs[4];
- if (ret == H_SUCCESS)
- hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
+ if (ret == H_SUCCESS) {
+ rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]);
+ if (rc) {
+ ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
+ rc, outs[5]);
+
+ ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ cq->ipz_cq_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+ ret = H_NO_MEM;
+ }
+ }
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resources. ret=%li", ret);
+ ehca_gen_err("Not enough resources. ret=%lli", ret);
return ret;
}
u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
- struct ehca_alloc_qp_parms *parms)
+ struct ehca_alloc_qp_parms *parms, int is_user)
{
+ int rc;
u64 ret;
u64 allocate_controls, max_r10_reg, r11, r12;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
allocate_controls =
EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
@@ -358,11 +351,22 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
parms->rqueue.queue_size =
(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
- if (ret == H_SUCCESS)
- hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
+ if (ret == H_SUCCESS) {
+ rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]);
+ if (rc) {
+ ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx",
+ rc, outs[6]);
+
+ ehca_plpar_hcall_norets(H_FREE_RESOURCE,
+ adapter_handle.handle, /* r4 */
+ parms->qp_handle.handle, /* r5 */
+ 0, 0, 0, 0, 0);
+ ret = H_NO_MEM;
+ }
+ }
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resources. ret=%li", ret);
+ ehca_gen_err("Not enough resources. ret=%lli", ret);
return ret;
}
@@ -372,7 +376,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
struct hipz_query_port *query_port_response_block)
{
u64 ret;
- u64 r_cb = virt_to_abs(query_port_response_block);
+ u64 r_cb = __pa(query_port_response_block);
if (r_cb & (EHCA_PAGESIZE-1)) {
ehca_gen_err("response block not page aligned");
@@ -414,7 +418,7 @@ u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
struct hipz_query_hca *query_hca_rblock)
{
- u64 r_cb = virt_to_abs(query_hca_rblock);
+ u64 r_cb = __pa(query_hca_rblock);
if (r_cb & (EHCA_PAGESIZE-1)) {
ehca_gen_err("response_block=%p not page aligned",
@@ -454,7 +458,7 @@ u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
const u64 count)
{
if (count != 1) {
- ehca_gen_err("Ppage counter=%lx", count);
+ ehca_gen_err("Ppage counter=%llx", count);
return H_PARAMETER;
}
return hipz_h_register_rpage(adapter_handle,
@@ -489,7 +493,7 @@ u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
const struct h_galpa gal)
{
if (count != 1) {
- ehca_gen_err("Page counter=%lx", count);
+ ehca_gen_err("Page counter=%llx", count);
return H_PARAMETER;
}
@@ -508,7 +512,7 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
const struct h_galpa galpa)
{
if (count > 1) {
- ehca_gen_err("Page counter=%lx", count);
+ ehca_gen_err("Page counter=%llx", count);
return H_PARAMETER;
}
@@ -525,7 +529,7 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
int dis_and_get_function_code)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
adapter_handle.handle, /* r4 */
@@ -548,16 +552,16 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
struct h_galpa gal)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
adapter_handle.handle, /* r4 */
qp_handle.handle, /* r5 */
update_mask, /* r6 */
- virt_to_abs(mqpcb), /* r7 */
+ __pa(mqpcb), /* r7 */
0, 0, 0, 0, 0);
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Insufficient resources ret=%li", ret);
+ ehca_gen_err("Insufficient resources ret=%lli", ret);
return ret;
}
@@ -571,7 +575,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
return ehca_plpar_hcall_norets(H_QUERY_QP,
adapter_handle.handle, /* r4 */
qp_handle.handle, /* r5 */
- virt_to_abs(qqpcb), /* r6 */
+ __pa(qqpcb), /* r6 */
0, 0, 0, 0);
}
@@ -579,7 +583,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
struct ehca_qp *qp)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = hcp_galpas_dtor(&qp->galpas);
if (ret) {
@@ -593,7 +597,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
qp->ipz_qp_handle.handle, /* r6 */
0, 0, 0, 0, 0, 0);
if (ret == H_HARDWARE)
- ehca_gen_err("HCA not operational. ret=%li", ret);
+ ehca_gen_err("HCA not operational. ret=%lli", ret);
ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
adapter_handle.handle, /* r4 */
@@ -601,7 +605,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0, 0);
if (ret == H_RESOURCE)
- ehca_gen_err("Resource still in use. ret=%li", ret);
+ ehca_gen_err("Resource still in use. ret=%lli", ret);
return ret;
}
@@ -625,7 +629,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
u32 * bma_qp_nr)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
adapter_handle.handle, /* r4 */
@@ -636,7 +640,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
*bma_qp_nr = (u32)outs[1];
if (ret == H_ALIAS_EXIST)
- ehca_gen_err("AQP1 already exists. ret=%li", ret);
+ ehca_gen_err("AQP1 already exists. ret=%lli", ret);
return ret;
}
@@ -658,7 +662,7 @@ u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
0, 0);
if (ret == H_NOT_ENOUGH_RESOURCES)
- ehca_gen_err("Not enough resources. ret=%li", ret);
+ ehca_gen_err("Not enough resources. ret=%lli", ret);
return ret;
}
@@ -697,7 +701,7 @@ u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0);
if (ret == H_RESOURCE)
- ehca_gen_err("H_FREE_RESOURCE failed ret=%li ", ret);
+ ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret);
return ret;
}
@@ -719,7 +723,7 @@ u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
0, 0, 0, 0, 0);
if (ret == H_RESOURCE)
- ehca_gen_err("Resource in use. ret=%li ", ret);
+ ehca_gen_err("Resource in use. ret=%lli ", ret);
return ret;
}
@@ -733,7 +737,7 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
@@ -763,7 +767,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
if (count > 1) {
u64 *kpage;
int i;
- kpage = (u64 *)abs_to_virt(logical_address_of_page);
+ kpage = __va(logical_address_of_page);
for (i = 0; i < count; i++)
ehca_gen_dbg("kpage[%d]=%p",
i, (void *)kpage[i]);
@@ -774,9 +778,9 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
ehca_gen_err("logical_address_of_page not on a 4k boundary "
- "adapter_handle=%lx mr=%p mr_handle=%lx "
+ "adapter_handle=%llx mr=%p mr_handle=%llx "
"pagesize=%x queue_type=%x "
- "logical_address_of_page=%lx count=%lx",
+ "logical_address_of_page=%llx count=%llx",
adapter_handle.handle, mr,
mr->ipz_mr_handle.handle, pagesize, queue_type,
logical_address_of_page, count);
@@ -794,7 +798,7 @@ u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
adapter_handle.handle, /* r4 */
@@ -828,7 +832,7 @@ u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
adapter_handle.handle, /* r4 */
@@ -855,7 +859,7 @@ u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
struct ehca_mr_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
adapter_handle.handle, /* r4 */
@@ -877,7 +881,7 @@ u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
struct ehca_mw_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
adapter_handle.handle, /* r4 */
@@ -895,7 +899,7 @@ u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
struct ehca_mw_hipzout_parms *outparms)
{
u64 ret;
- u64 outs[PLPAR_HCALL9_BUFSIZE];
+ unsigned long outs[PLPAR_HCALL9_BUFSIZE];
ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
adapter_handle.handle, /* r4 */
@@ -920,7 +924,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
void *rblock,
unsigned long *byte_count)
{
- u64 r_cb = virt_to_abs(rblock);
+ u64 r_cb = __pa(rblock);
if (r_cb & (EHCA_PAGESIZE-1)) {
ehca_gen_err("rblock not page aligned.");
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 2c3c6e0ea5c..a46e514c367 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -49,7 +49,7 @@
#include "hipz_hw.h"
/*
- * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
+ * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize
* resources, create the empty EQPT (ring).
*/
u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
@@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
* initialize resources, create empty QPPTs (2 rings).
*/
u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
- struct ehca_alloc_qp_parms *parms);
+ struct ehca_alloc_qp_parms *parms, int is_user);
u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
const u8 port_id,
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
index 214821095cb..077376ff3d2 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.c
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -42,10 +42,9 @@
#include "ehca_classes.h"
#include "hipz_hw.h"
-int hcall_map_page(u64 physaddr, u64 *mapaddr)
+u64 hcall_map_page(u64 physaddr)
{
- *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
- return 0;
+ return (u64)ioremap(physaddr, EHCA_PAGESIZE);
}
int hcall_unmap_page(u64 mapaddr)
@@ -54,12 +53,15 @@ int hcall_unmap_page(u64 mapaddr)
return 0;
}
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
u64 paddr_kernel, u64 paddr_user)
{
- int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
- if (ret)
- return ret;
+ if (!is_user) {
+ galpas->kernel.fw_handle = hcall_map_page(paddr_kernel);
+ if (!galpas->kernel.fw_handle)
+ return -ENOMEM;
+ } else
+ galpas->kernel.fw_handle = 0;
galpas->user.fw_handle = paddr_user;
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
index 5305c2a3ed9..d1b02991024 100644
--- a/drivers/infiniband/hw/ehca/hcp_phyp.h
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -78,12 +78,12 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
*(volatile u64 __force *)addr = value;
}
-int hcp_galpas_ctor(struct h_galpas *galpas,
+int hcp_galpas_ctor(struct h_galpas *galpas, int is_user,
u64 paddr_kernel, u64 paddr_user);
int hcp_galpas_dtor(struct h_galpas *galpas);
-int hcall_map_page(u64 physaddr, u64 * mapaddr);
+u64 hcall_map_page(u64 physaddr);
int hcall_unmap_page(u64 mapaddr);
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 661f8db6270..8d594517cd2 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -38,6 +38,8 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/slab.h>
+
#include "ehca_tools.h"
#include "ipz_pt_fn.h"
#include "ehca_classes.h"
@@ -79,7 +81,7 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
{
int i;
for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
- u64 page = (u64)virt_to_abs(queue->queue_pages[i]);
+ u64 page = __pa(queue->queue_pages[i]);
if (addr >= page && addr < page + queue->pagesize) {
*q_offset = addr - page + i * queue->pagesize;
return 0;
@@ -163,6 +165,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
out:
ehca_err(pd->ib_pd.device, "failed to allocate small queue page");
+ mutex_unlock(&pd->lock);
return 0;
}
@@ -219,12 +222,15 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
queue->small_page = NULL;
/* allocate queue page pointers */
- queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
+ queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *),
+ GFP_KERNEL | __GFP_NOWARN);
if (!queue->queue_pages) {
- ehca_gen_err("Couldn't allocate queue page list");
- return 0;
+ queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *));
+ if (!queue->queue_pages) {
+ ehca_gen_err("Couldn't allocate queue page list");
+ return 0;
+ }
}
- memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
/* allocate actual queue pages */
if (is_small) {
@@ -239,7 +245,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue,
ipz_queue_ctor_exit0:
ehca_gen_err("Couldn't alloc pages queue=%p "
"nr_of_pages=%x", queue, nr_of_pages);
- vfree(queue->queue_pages);
+ if (is_vmalloc_addr(queue->queue_pages))
+ vfree(queue->queue_pages);
+ else
+ kfree(queue->queue_pages);
return 0;
}
@@ -261,7 +270,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue)
free_page((unsigned long)queue->queue_pages[i]);
}
- vfree(queue->queue_pages);
+ if (is_vmalloc_addr(queue->queue_pages))
+ vfree(queue->queue_pages);
+ else
+ kfree(queue->queue_pages);
return 1;
}
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 3c7968f25ec..1d9bb115cbf 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,9 +1,11 @@
config INFINIBAND_IPATH
- tristate "QLogic InfiniPath Driver"
- depends on 64BIT && NET
+ tristate "QLogic HTX HCA support"
+ depends on 64BIT && NET && HT_IRQ
---help---
- This is a driver for QLogic InfiniPath host channel adapters,
+ This is a driver for the obsolete QLogic Hyper-Transport
+ IB host channel adapter (model QHT7140),
including InfiniBand verbs support. This driver allows these
devices to be used with both kernel upper level protocols such
as IP-over-InfiniBand as well as with userspace applications
(in conjunction with InfiniBand userspace access).
+ For QLogic PCIe QLE based cards, use the QIB driver instead.
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index bf945006198..4496f2820c9 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,4 +1,4 @@
-EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
+ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \
-DIPATH_KERN_TYPE=0
obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
@@ -29,13 +29,9 @@ ib_ipath-y := \
ipath_user_pages.o \
ipath_user_sdma.o \
ipath_verbs_mcast.o \
- ipath_verbs.o \
- ipath_iba7220.o \
- ipath_sd7220.o \
- ipath_sd7220_img.o
+ ipath_verbs.o
ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
-ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o
ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index d385e4168c9..0416c6c0e12 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -32,6 +32,7 @@
*/
#include <linux/err.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index d4ce8b63e19..45802e97332 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -45,6 +45,7 @@
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <linux/export.h>
#include <asm/uaccess.h>
#include "ipath_kernel.h"
@@ -65,7 +66,8 @@ static const struct file_operations diag_file_ops = {
.write = ipath_diag_write,
.read = ipath_diag_read,
.open = ipath_diag_open,
- .release = ipath_diag_release
+ .release = ipath_diag_release,
+ .llseek = default_llseek,
};
static ssize_t ipath_diagpkt_write(struct file *fp,
@@ -75,6 +77,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
static const struct file_operations diagpkt_file_ops = {
.owner = THIS_MODULE,
.write = ipath_diagpkt_write,
+ .llseek = noop_llseek,
};
static atomic_t diagpkt_count = ATOMIC_INIT(0);
@@ -323,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
+ u32 plen, pbufn, maxlen_reserve;
struct ipath_diag_pkt odp;
struct ipath_diag_xpkt dp;
u32 *tmpbuf = NULL;
@@ -332,42 +335,24 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
u64 val;
u32 l_state, lt_state; /* LinkState, LinkTrainingState */
- if (count < sizeof(odp)) {
- ret = -EINVAL;
- goto bail;
- }
if (count == sizeof(dp)) {
if (copy_from_user(&dp, data, sizeof(dp))) {
ret = -EFAULT;
goto bail;
}
- } else if (copy_from_user(&odp, data, sizeof(odp))) {
- ret = -EFAULT;
- goto bail;
- }
-
- /*
- * Due to padding/alignment issues (lessened with new struct)
- * the old and new structs are the same length. We need to
- * disambiguate them, which we can do because odp.len has never
- * been less than the total of LRH+BTH+DETH so far, while
- * dp.unit (same offset) unit is unlikely to get that high.
- * Similarly, dp.data, the pointer to user at the same offset
- * as odp.unit, is almost certainly at least one (512byte)page
- * "above" NULL. The if-block below can be omitted if compatibility
- * between a new driver and older diagnostic code is unimportant.
- * compatibility the other direction (new diags, old driver) is
- * handled in the diagnostic code, with a warning.
- */
- if (dp.unit >= 20 && dp.data < 512) {
- /* very probable version mismatch. Fix it up */
- memcpy(&odp, &dp, sizeof(odp));
- /* We got a legacy dp, copy elements to dp */
+ } else if (count == sizeof(odp)) {
+ if (copy_from_user(&odp, data, sizeof(odp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ dp.len = odp.len;
dp.unit = odp.unit;
dp.data = odp.data;
- dp.len = odp.len;
- dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
+ dp.pbc_wd = 0;
+ } else {
+ ret = -EINVAL;
+ goto bail;
}
/* send count must be an exact number of dwords */
@@ -376,7 +361,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- clen = dp.len >> 2;
+ plen = dp.len >> 2;
dd = ipath_lookup(dp.unit);
if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
@@ -419,16 +404,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
-
- if ((plen + 4) > dd->ipath_ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
- plen - 4, dd->ipath_ibmaxlen);
+ dp.len, dd->ipath_ibmaxlen);
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
@@ -470,11 +461,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
*/
if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
ipath_flush_wc();
- __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
ipath_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
- __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen);
ipath_flush_wc();
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index e90a0ea538a..123a8c05353 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -31,6 +31,7 @@
*/
#include <linux/scatterlist.h>
+#include <linux/gfp.h>
#include <rdma/ib_verbs.h>
#include "ipath_verbs.h"
@@ -114,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
ret = 0;
break;
}
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return ret;
}
@@ -125,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- u64 addr = (u64) page_address(sg_page(sg));
-
- if (addr)
- addr += sg->offset;
- return addr;
-}
-
-static unsigned int ipath_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void ipath_sync_single_for_cpu(struct ib_device *dev,
u64 addr,
size_t size,
@@ -175,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
}
struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
- ipath_mapping_error,
- ipath_dma_map_single,
- ipath_dma_unmap_single,
- ipath_dma_map_page,
- ipath_dma_unmap_page,
- ipath_map_sg,
- ipath_unmap_sg,
- ipath_sg_dma_address,
- ipath_sg_dma_len,
- ipath_sync_single_for_cpu,
- ipath_sync_single_for_device,
- ipath_dma_alloc_coherent,
- ipath_dma_free_coherent
+ .mapping_error = ipath_mapping_error,
+ .map_single = ipath_dma_map_single,
+ .unmap_single = ipath_dma_unmap_single,
+ .map_page = ipath_dma_map_page,
+ .unmap_page = ipath_dma_unmap_page,
+ .map_sg = ipath_map_sg,
+ .unmap_sg = ipath_unmap_sg,
+ .sync_single_for_cpu = ipath_sync_single_for_cpu,
+ .sync_single_for_device = ipath_sync_single_for_device,
+ .alloc_coherent = ipath_dma_alloc_coherent,
+ .free_coherent = ipath_dma_free_coherent
};
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index daad09a4591..bd0caedafe9 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/pci.h>
@@ -38,6 +39,9 @@
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -123,24 +127,18 @@ const char *ipath_ibcstatus_str[] = {
"LTState1C", "LTState1D", "LTState1E", "LTState1F"
};
-static void __devexit ipath_remove_one(struct pci_dev *);
-static int __devinit ipath_init_one(struct pci_dev *,
- const struct pci_device_id *);
+static void ipath_remove_one(struct pci_dev *);
+static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
/* Only needed for registration, nothing else needs this info */
#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
-#define PCI_VENDOR_ID_QLOGIC 0x1077
#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
-#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
-#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220
/* Number of seconds before our card status check... */
#define STATUS_TIMEOUT 60
static const struct pci_device_id ipath_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
- { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
- { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) },
{ 0, }
};
@@ -149,7 +147,7 @@ MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
static struct pci_driver ipath_driver = {
.name = IPATH_DRV_NAME,
.probe = ipath_init_one,
- .remove = __devexit_p(ipath_remove_one),
+ .remove = ipath_remove_one,
.id_table = ipath_pci_tbl,
.driver = {
.groups = ipath_driver_attr_groups,
@@ -196,22 +194,17 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
struct ipath_devdata *dd;
int ret;
- if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
- dd = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- dd = vmalloc(sizeof(*dd));
+ dd = vzalloc(sizeof(*dd));
if (!dd) {
dd = ERR_PTR(-ENOMEM);
goto bail;
}
- memset(dd, 0, sizeof(*dd));
dd->ipath_unit = -1;
+ idr_preload(GFP_KERNEL);
spin_lock_irqsave(&ipath_devs_lock, flags);
- ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
+ ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT);
if (ret < 0) {
printk(KERN_ERR IPATH_DRV_NAME
": Could not allocate unit ID: error %d\n", -ret);
@@ -219,6 +212,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
dd = ERR_PTR(ret);
goto bail_unlock;
}
+ dd->ipath_unit = ret;
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
@@ -227,7 +221,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
bail_unlock:
spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
+ idr_preload_end();
bail:
return dd;
}
@@ -392,14 +386,14 @@ done:
ipath_enable_armlaunch(dd);
}
-static int __devinit ipath_init_one(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static void cleanup_device(struct ipath_devdata *dd);
+
+static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret, len, j;
struct ipath_devdata *dd;
unsigned long long addr;
u32 bar0 = 0, bar1 = 0;
- u8 rev;
dd = ipath_alloc_devdata(pdev);
if (IS_ERR(dd)) {
@@ -470,14 +464,14 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail_disable;
}
- ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (ret) {
/*
* if the 64 bit setup fails, try 32 bit. Some systems
* do not setup 64 bit maps on systems with 2GB or less
* memory installed.
*/
- ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret) {
dev_info(&pdev->dev,
"Unable to set DMA mask for unit %u: %d\n",
@@ -486,7 +480,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
}
else {
ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
- ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret)
dev_info(&pdev->dev,
"Unable to set DMA consistent mask "
@@ -496,7 +490,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
}
}
else {
- ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (ret)
dev_info(&pdev->dev,
"Unable to set DMA consistent mask "
@@ -518,30 +512,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
/* setup the chip-specific functions, as early as possible. */
switch (ent->device) {
case PCI_DEVICE_ID_INFINIPATH_HT:
-#ifdef CONFIG_HT_IRQ
ipath_init_iba6110_funcs(dd);
break;
-#else
- ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
- "CONFIG_HT_IRQ is not enabled\n", ent->device);
- return -ENODEV;
-#endif
- case PCI_DEVICE_ID_INFINIPATH_PE800:
-#ifdef CONFIG_PCI_MSI
- ipath_init_iba6120_funcs(dd);
- break;
-#else
- ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
- "CONFIG_PCI_MSI is not enabled\n", ent->device);
- return -ENODEV;
-#endif
- case PCI_DEVICE_ID_INFINIPATH_7220:
-#ifndef CONFIG_PCI_MSI
- ipath_dbg("CONFIG_PCI_MSI is not enabled, "
- "using INTx for unit %u\n", dd->ipath_unit);
-#endif
- ipath_init_iba7220_funcs(dd);
- break;
+
default:
ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
"failing\n", ent->device);
@@ -551,9 +524,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
for (j = 0; j < 6; j++) {
if (!pdev->resource[j].start)
continue;
- ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
- j, (unsigned long long)pdev->resource[j].start,
- (unsigned long long)pdev->resource[j].end,
+ ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n",
+ j, &pdev->resource[j],
(unsigned long long)pci_resource_len(pdev, j));
}
@@ -563,13 +535,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail_regions;
}
- ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
- if (ret) {
- ipath_dev_err(dd, "Failed to read PCI revision ID unit "
- "%u: err %d\n", dd->ipath_unit, -ret);
- goto bail_regions; /* shouldn't ever happen */
- }
- dd->ipath_pcirev = rev;
+ dd->ipath_pcirev = pdev->revision;
#if defined(__powerpc__)
/* There isn't a generic way to specify writethrough mappings */
@@ -639,8 +605,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
goto bail;
bail_irqsetup:
- if (pdev->irq)
- free_irq(pdev->irq, dd);
+ cleanup_device(dd);
+
+ if (dd->ipath_irq)
+ dd->ipath_f_free_irq(dd);
+
+ if (dd->ipath_f_cleanup)
+ dd->ipath_f_cleanup(dd);
bail_iounmap:
iounmap((volatile void __iomem *) dd->ipath_kregbase);
@@ -658,9 +629,11 @@ bail:
return ret;
}
-static void __devexit cleanup_device(struct ipath_devdata *dd)
+static void cleanup_device(struct ipath_devdata *dd)
{
int port;
+ struct ipath_portdata **tmp;
+ unsigned long flags;
if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
/* can't do anything more with chip; needs re-init */
@@ -742,23 +715,24 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
/*
* free any resources still in use (usually just kernel ports)
- * at unload; we do for portcnt, not cfgports, because cfgports
- * could have changed while we were loaded.
+ * at unload; we do for portcnt, because that's what we allocate.
+ * We acquire lock to be really paranoid that ipath_pd isn't being
+ * accessed from some interrupt-related code (that should not happen,
+ * but best to be sure).
*/
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
+ tmp = dd->ipath_pd;
+ dd->ipath_pd = NULL;
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
for (port = 0; port < dd->ipath_portcnt; port++) {
- struct ipath_portdata *pd = dd->ipath_pd[port];
- dd->ipath_pd[port] = NULL;
+ struct ipath_portdata *pd = tmp[port];
+ tmp[port] = NULL; /* debugging paranoia */
ipath_free_pddata(dd, pd);
}
- kfree(dd->ipath_pd);
- /*
- * debuggability, in case some cleanup path tries to use it
- * after this
- */
- dd->ipath_pd = NULL;
+ kfree(tmp);
}
-static void __devexit ipath_remove_one(struct pci_dev *pdev)
+static void ipath_remove_one(struct pci_dev *pdev)
{
struct ipath_devdata *dd = pci_get_drvdata(pdev);
@@ -770,7 +744,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
*/
ipath_shutdown_device(dd);
- flush_scheduled_work();
+ flush_workqueue(ib_wq);
if (dd->verbs_dev)
ipath_unregister_ib_device(dd->verbs_dev);
@@ -1259,7 +1233,7 @@ reloop:
*/
ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf"
" %x, len %x hdrq+%x rhf: %Lx\n",
- etail, tlen, l,
+ etail, tlen, l, (unsigned long long)
le64_to_cpu(*(__le64 *) rhf_addr));
if (ipath_debug & __IPATH_ERRPKTDBG) {
u32 j, *d, dw = rsize-2;
@@ -1457,7 +1431,8 @@ static void ipath_reset_availshadow(struct ipath_devdata *dd)
0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
if (oldval != dd->ipath_pioavailshadow[i])
ipath_dbg("shadow[%d] was %Lx, now %lx\n",
- i, oldval, dd->ipath_pioavailshadow[i]);
+ i, (unsigned long long) oldval,
+ dd->ipath_pioavailshadow[i]);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
}
@@ -1692,7 +1667,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail)
{
unsigned long flags;
- unsigned end, cnt = 0, next;
+ unsigned end, cnt = 0;
/* There are two bits per send buffer (busy and generation) */
start *= 2;
@@ -1743,12 +1718,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
if (dd->ipath_pioupd_thresh) {
end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
- next = find_first_bit(dd->ipath_pioavailkernel, end);
- while (next < end) {
- cnt++;
- next = find_next_bit(dd->ipath_pioavailkernel, end,
- next + 1);
- }
+ cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
@@ -2411,7 +2381,7 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
/*
* clear SerdesEnable and turn the leds off; do this here because
* we are unloading, so don't count on interrupts to move along
- * Turn the LEDs off explictly for the same reason.
+ * Turn the LEDs off explicitly for the same reason.
*/
dd->ipath_f_quiet_serdes(dd);
@@ -2530,11 +2500,6 @@ static int __init infinipath_init(void)
* the PCI subsystem.
*/
idr_init(&unit_table);
- if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
- printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n");
- ret = -ENOMEM;
- goto bail;
- }
ret = pci_register_driver(&ipath_driver);
if (ret < 0) {
@@ -2585,6 +2550,7 @@ int ipath_reset_device(int unit)
{
int ret, i;
struct ipath_devdata *dd = ipath_lookup(unit);
+ unsigned long flags;
if (!dd) {
ret = -ENODEV;
@@ -2610,18 +2576,21 @@ int ipath_reset_device(int unit)
goto bail;
}
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
if (dd->ipath_pd)
for (i = 1; i < dd->ipath_cfgports; i++) {
- if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
- ipath_dbg("unit %u port %d is in use "
- "(PID %u cmd %s), can't reset\n",
- unit, i,
- pid_nr(dd->ipath_pd[i]->port_pid),
- dd->ipath_pd[i]->port_comm);
- ret = -EBUSY;
- goto bail;
- }
+ if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
+ continue;
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
+ ipath_dbg("unit %u port %d is in use "
+ "(PID %u cmd %s), can't reset\n",
+ unit, i,
+ pid_nr(dd->ipath_pd[i]->port_pid),
+ dd->ipath_pd[i]->port_comm);
+ ret = -EBUSY;
+ goto bail;
}
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
teardown_sdma(dd);
@@ -2655,9 +2624,12 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
{
int i, sub, any = 0;
struct pid *pid;
+ unsigned long flags;
if (!dd->ipath_pd)
return 0;
+
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
for (i = 1; i < dd->ipath_cfgports; i++) {
if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
continue;
@@ -2681,6 +2653,7 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
any++;
}
}
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
return any;
}
@@ -2703,7 +2676,7 @@ static void ipath_hol_signal_up(struct ipath_devdata *dd)
* to prevent HoL blocking, then start the HoL timer that
* periodically continues, then stop procs, so they can detect
* link down if they want, and do something about it.
- * Timer may already be running, so use __mod_timer, not add_timer.
+ * Timer may already be running, so use mod_timer, not add_timer.
*/
void ipath_hol_down(struct ipath_devdata *dd)
{
@@ -2712,7 +2685,7 @@ void ipath_hol_down(struct ipath_devdata *dd)
dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
dd->ipath_hol_timer.expires = jiffies +
msecs_to_jiffies(ipath_hol_timeout_ms);
- __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
+ mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
}
/*
@@ -2751,7 +2724,7 @@ void ipath_hol_event(unsigned long opaque)
else {
dd->ipath_hol_timer.expires = jiffies +
msecs_to_jiffies(ipath_hol_timeout_ms);
- __mod_timer(&dd->ipath_hol_timer,
+ mod_timer(&dd->ipath_hol_timer,
dd->ipath_hol_timer.expires);
}
}
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index dc37277f1c8..fc7181985e8 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -772,8 +772,8 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
"0x%x, not 0x%x\n", csum, ifp->if_csum);
goto done;
}
- if (*(__be64 *) ifp->if_guid == 0ULL ||
- *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) {
+ if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
+ *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
ipath_dev_err(dd, "Invalid GUID %llx from flash; "
"ignoring\n",
*(unsigned long long *) ifp->if_guid);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 35f301c88b5..6d7f453b4d0 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -35,11 +35,14 @@
#include <linux/poll.h>
#include <linux/cdev.h>
#include <linux/swap.h>
+#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/io.h>
+#include <linux/aio.h>
#include <linux/jiffies.h>
-#include <linux/smp_lock.h>
+#include <linux/cpu.h>
#include <asm/pgtable.h>
#include "ipath_kernel.h"
@@ -62,7 +65,8 @@ static const struct file_operations ipath_file_ops = {
.open = ipath_open,
.release = ipath_close,
.poll = ipath_poll,
- .mmap = ipath_mmap
+ .mmap = ipath_mmap,
+ .llseek = noop_llseek,
};
/*
@@ -223,8 +227,13 @@ static int ipath_get_base_info(struct file *fp,
(unsigned long long) kinfo->spi_subport_rcvhdr_base);
}
- kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
- dd->ipath_palign;
+ /*
+ * All user buffers are 2KB buffers. If we ever support
+ * giving 4KB buffers to user processes, this will need some
+ * work.
+ */
+ kinfo->spi_pioindex = (kinfo->spi_piobufbase -
+ (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
kinfo->spi_pioalign = dd->ipath_palign;
kinfo->spi_qpair = IPATH_KD_QP;
@@ -1146,7 +1155,7 @@ static int ipath_file_vma_fault(struct vm_area_struct *vma,
return 0;
}
-static struct vm_operations_struct ipath_file_vm_ops = {
+static const struct vm_operations_struct ipath_file_vm_ops = {
.fault = ipath_file_vma_fault,
};
@@ -1217,7 +1226,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
vma->vm_ops = &ipath_file_vm_ops;
- vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
ret = 1;
bail:
@@ -1524,7 +1533,7 @@ static int init_subports(struct ipath_devdata *dd,
}
num_subports = uinfo->spu_subport_cnt;
- pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
+ pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
if (!pd->subport_uregbase) {
ret = -ENOMEM;
goto bail;
@@ -1532,13 +1541,13 @@ static int init_subports(struct ipath_devdata *dd,
/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
sizeof(u32), PAGE_SIZE) * num_subports;
- pd->subport_rcvhdr_base = vmalloc(size);
+ pd->subport_rcvhdr_base = vzalloc(size);
if (!pd->subport_rcvhdr_base) {
ret = -ENOMEM;
goto bail_ureg;
}
- pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+ pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
pd->port_rcvegrbuf_size *
num_subports);
if (!pd->subport_rcvegrbuf) {
@@ -1550,11 +1559,6 @@ static int init_subports(struct ipath_devdata *dd,
pd->port_subport_id = uinfo->spu_subport_id;
pd->active_slaves = 1;
set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
- memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
- memset(pd->subport_rcvhdr_base, 0, size);
- memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
- pd->port_rcvegrbuf_size *
- num_subports);
goto bail;
bail_rhdr:
@@ -1611,7 +1615,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
pd->port_cnt = 1;
port_fp(fp) = pd;
pd->port_pid = get_pid(task_pid(current));
- strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
+ strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++;
ret = 0;
} else
@@ -1674,7 +1678,7 @@ static int find_best_unit(struct file *fp,
* InfiniPath chip to that processor (we assume reasonable connectivity,
* for now). This code assumes that if affinity has been set
* before this point, that at most one cpu is set; for now this
- * is reasonable. I check for both cpus_empty() and cpus_full(),
+ * is reasonable. I check for both cpumask_empty() and cpumask_full(),
* in case some kernel variant sets none of the bits when no
* affinity is set. 2.6.11 and 12 kernels have all present
* cpus set. Some day we'll have to fix it up further to handle
@@ -1683,17 +1687,19 @@ static int find_best_unit(struct file *fp,
* information. There may be some issues with dual core numbering
* as well. This needs more work prior to release.
*/
- if (!cpus_empty(current->cpus_allowed) &&
- !cpus_full(current->cpus_allowed)) {
+ if (!cpumask_empty(tsk_cpus_allowed(current)) &&
+ !cpumask_full(tsk_cpus_allowed(current))) {
int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
- for (i = 0; i < ncpus; i++)
- if (cpu_isset(i, current->cpus_allowed)) {
+ get_online_cpus();
+ for_each_online_cpu(i)
+ if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
ipath_cdbg(PROC, "%s[%u] affinity set for "
"cpu %d/%d\n", current->comm,
current->pid, i, ncpus);
curcpu = i;
nset++;
}
+ put_online_cpus();
if (curcpu != -1 && nset != ncpus) {
if (npresent) {
prefunit = curcpu / (ncpus / npresent);
@@ -1816,7 +1822,6 @@ done:
static int ipath_open(struct inode *in, struct file *fp)
{
/* The real work is performed later in ipath_assign_port() */
- cycle_kernel_lock();
fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
return fp->private_data ? 0 : -ENOMEM;
}
@@ -1860,9 +1865,9 @@ static int ipath_assign_port(struct file *fp,
goto done_chk_sdma;
}
- i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE;
+ i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
- (long)fp->f_path.dentry->d_inode->i_rdev, i_minor);
+ (long)file_inode(fp)->i_rdev, i_minor);
if (i_minor)
ret = find_free_port(i_minor - 1, fp, uinfo);
@@ -1972,7 +1977,7 @@ static int ipath_do_user_init(struct file *fp,
* 0 to 1. So for those chips, we turn it off and then back on.
* This will (very briefly) affect any other open ports, but the
* duration is very short, and therefore isn't an issue. We
- * explictly set the in-memory tail copy to 0 beforehand, so we
+ * explicitly set the in-memory tail copy to 0 beforehand, so we
* don't have to wait to be sure the DMA update has happened
* (chip resets head/tail to 0 on transition to enable).
*/
@@ -2041,14 +2046,16 @@ static int ipath_close(struct inode *in, struct file *fp)
struct ipath_filedata *fd;
struct ipath_portdata *pd;
struct ipath_devdata *dd;
+ unsigned long flags;
unsigned port;
+ struct pid *pid;
ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
(long)in->i_rdev, fp->private_data);
mutex_lock(&ipath_mutex);
- fd = (struct ipath_filedata *) fp->private_data;
+ fd = fp->private_data;
fp->private_data = NULL;
pd = fd->pd;
if (!pd) {
@@ -2074,14 +2081,13 @@ static int ipath_close(struct inode *in, struct file *fp)
mutex_unlock(&ipath_mutex);
goto bail;
}
+ /* early; no interrupt users after this */
+ spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
port = pd->port_port;
-
- if (pd->port_hdrqfull) {
- ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
- "during run\n", pd->port_comm, pid_nr(pd->port_pid),
- pd->port_hdrqfull);
- pd->port_hdrqfull = 0;
- }
+ dd->ipath_pd[port] = NULL;
+ pid = pd->port_pid;
+ pd->port_pid = NULL;
+ spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
if (pd->port_rcvwait_to || pd->port_piowait_to
|| pd->port_rcvnowait || pd->port_pionowait) {
@@ -2138,13 +2144,11 @@ static int ipath_close(struct inode *in, struct file *fp)
unlock_expected_tids(pd);
ipath_stats.sps_ports--;
ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
- pd->port_comm, pid_nr(pd->port_pid),
+ pd->port_comm, pid_nr(pid),
dd->ipath_unit, port);
}
- put_pid(pd->port_pid);
- pd->port_pid = NULL;
- dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
+ put_pid(pid);
mutex_unlock(&ipath_mutex);
ipath_free_pddata(dd, pd); /* after releasing the mutex */
@@ -2455,7 +2459,7 @@ static int init_cdev(int minor, char *name, const struct file_operations *fops,
goto err_cdev;
}
- device = device_create(ipath_class, NULL, dev, name);
+ device = device_create(ipath_class, NULL, dev, NULL, name);
if (IS_ERR(device)) {
ret = PTR_ERR(device);
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 23faba9d21e..e0c404bdc4a 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -31,13 +31,13 @@
* SOFTWARE.
*/
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/namei.h>
+#include <linux/slab.h>
#include "ipath_kernel.h"
@@ -46,7 +46,7 @@
static struct super_block *ipath_super;
static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
- int mode, const struct file_operations *fops,
+ umode_t mode, const struct file_operations *fops,
void *data)
{
int error;
@@ -57,13 +57,11 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
goto bail;
}
+ inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_uid = 0;
- inode->i_gid = 0;
- inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_private = data;
- if ((mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
inc_nlink(dir);
@@ -78,7 +76,7 @@ bail:
return error;
}
-static int create_file(const char *name, mode_t mode,
+static int create_file(const char *name, umode_t mode,
struct dentry *parent, struct dentry **dentry,
const struct file_operations *fops, void *data)
{
@@ -87,11 +85,11 @@ static int create_file(const char *name, mode_t mode,
*dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
- if (!IS_ERR(dentry))
+ if (!IS_ERR(*dentry))
error = ipathfs_mknod(parent->d_inode, *dentry,
mode, fops, data);
else
- error = PTR_ERR(dentry);
+ error = PTR_ERR(*dentry);
mutex_unlock(&parent->d_inode->i_mutex);
return error;
@@ -106,6 +104,7 @@ static ssize_t atomic_stats_read(struct file *file, char __user *buf,
static const struct file_operations atomic_stats_ops = {
.read = atomic_stats_read,
+ .llseek = default_llseek,
};
static ssize_t atomic_counters_read(struct file *file, char __user *buf,
@@ -114,7 +113,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf,
struct infinipath_counters counters;
struct ipath_devdata *dd;
- dd = file->f_path.dentry->d_inode->i_private;
+ dd = file_inode(file)->i_private;
dd->ipath_f_read_counters(dd, &counters);
return simple_read_from_buffer(buf, count, ppos, &counters,
@@ -123,6 +122,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf,
static const struct file_operations atomic_counters_ops = {
.read = atomic_counters_read,
+ .llseek = default_llseek,
};
static ssize_t flash_read(struct file *file, char __user *buf,
@@ -154,7 +154,7 @@ static ssize_t flash_read(struct file *file, char __user *buf,
goto bail;
}
- dd = file->f_path.dentry->d_inode->i_private;
+ dd = file_inode(file)->i_private;
if (ipath_eeprom_read(dd, pos, tmp, count)) {
ipath_dev_err(dd, "failed to read from flash\n");
ret = -ENXIO;
@@ -207,7 +207,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
goto bail_tmp;
}
- dd = file->f_path.dentry->d_inode->i_private;
+ dd = file_inode(file)->i_private;
if (ipath_eeprom_write(dd, pos, tmp, count)) {
ret = -ENXIO;
ipath_dev_err(dd, "failed to write to flash\n");
@@ -227,6 +227,7 @@ bail:
static const struct file_operations flash_ops = {
.read = flash_read,
.write = flash_write,
+ .llseek = default_llseek,
};
static int create_device_files(struct super_block *sb,
@@ -276,18 +277,14 @@ static int remove_file(struct dentry *parent, char *name)
goto bail;
}
- spin_lock(&dcache_lock);
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_locked(tmp);
+ dget_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
- spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, tmp);
- } else {
+ } else
spin_unlock(&tmp->d_lock);
- spin_unlock(&dcache_lock);
- }
ret = 0;
bail:
@@ -350,10 +347,8 @@ static int ipathfs_fill_super(struct super_block *sb, void *data,
list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
spin_unlock_irqrestore(&ipath_devs_lock, flags);
ret = create_device_files(sb, dd);
- if (ret) {
- deactivate_super(sb);
+ if (ret)
goto bail;
- }
spin_lock_irqsave(&ipath_devs_lock, flags);
}
@@ -363,13 +358,13 @@ bail:
return ret;
}
-static int ipathfs_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ipathfs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- int ret = get_sb_single(fs_type, flags, data,
- ipathfs_fill_super, mnt);
- if (ret >= 0)
- ipath_super = mnt->mnt_sb;
+ struct dentry *ret;
+ ret = mount_single(fs_type, flags, data, ipathfs_fill_super);
+ if (!IS_ERR(ret))
+ ipath_super = ret->d_sb;
return ret;
}
@@ -412,9 +407,10 @@ bail:
static struct file_system_type ipathfs_fs_type = {
.owner = THIS_MODULE,
.name = "ipathfs",
- .get_sb = ipathfs_get_sb,
+ .mount = ipathfs_mount,
.kill_sb = ipathfs_kill_super,
};
+MODULE_ALIAS_FS("ipathfs");
int __init ipath_init_ipathfs(void)
{
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 02831ad070b..7cc305488a3 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -381,7 +381,7 @@ static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr =
#define IPATH_GPIO_SCL \
(1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-/* keep the code below somewhat more readonable; not used elsewhere */
+/* keep the code below somewhat more readable; not used elsewhere */
#define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \
infinipath_hwe_htclnkabyte1crcerr)
#define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \
@@ -596,8 +596,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
ipath_format_hwerrors(hwerrs,
ipath_6110_hwerror_msgs,
- sizeof(ipath_6110_hwerror_msgs) /
- sizeof(ipath_6110_hwerror_msgs[0]),
+ ARRAY_SIZE(ipath_6110_hwerror_msgs),
msg, msgl);
if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
@@ -809,7 +808,7 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd)
* errors. We only bother to do this at load time, because it's OK if
* it happened before we were loaded (first time after boot/reset),
* but any time after that, it's fatal anyway. Also need to not check
- * for for upper byte errors if we are in 8 bit mode, so figure out
+ * for upper byte errors if we are in 8 bit mode, so figure out
* our width. For now, at least, also complain if it's 8 bit.
*/
static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
@@ -1474,7 +1473,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
/**
* ipath_pe_put_tid - write a TID in chip
* @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidptr: pointer to the expected TID (in chip) to update
* @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
* @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
*
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
deleted file mode 100644
index 421cc2af891..00000000000
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ /dev/null
@@ -1,1801 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/*
- * This file contains all of the code that is specific to the
- * InfiniPath PCIe chip.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-
-static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
-
-/*
- * This file contains all the chip-specific register information and
- * access functions for the QLogic InfiniPath PCI-Express chip.
- *
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- */
-struct _infinipath_do_not_use_kernel_regs {
- unsigned long long Revision;
- unsigned long long Control;
- unsigned long long PageAlign;
- unsigned long long PortCnt;
- unsigned long long DebugPortSelect;
- unsigned long long Reserved0;
- unsigned long long SendRegBase;
- unsigned long long UserRegBase;
- unsigned long long CounterRegBase;
- unsigned long long Scratch;
- unsigned long long Reserved1;
- unsigned long long Reserved2;
- unsigned long long IntBlocked;
- unsigned long long IntMask;
- unsigned long long IntStatus;
- unsigned long long IntClear;
- unsigned long long ErrorMask;
- unsigned long long ErrorStatus;
- unsigned long long ErrorClear;
- unsigned long long HwErrMask;
- unsigned long long HwErrStatus;
- unsigned long long HwErrClear;
- unsigned long long HwDiagCtrl;
- unsigned long long MDIO;
- unsigned long long IBCStatus;
- unsigned long long IBCCtrl;
- unsigned long long ExtStatus;
- unsigned long long ExtCtrl;
- unsigned long long GPIOOut;
- unsigned long long GPIOMask;
- unsigned long long GPIOStatus;
- unsigned long long GPIOClear;
- unsigned long long RcvCtrl;
- unsigned long long RcvBTHQP;
- unsigned long long RcvHdrSize;
- unsigned long long RcvHdrCnt;
- unsigned long long RcvHdrEntSize;
- unsigned long long RcvTIDBase;
- unsigned long long RcvTIDCnt;
- unsigned long long RcvEgrBase;
- unsigned long long RcvEgrCnt;
- unsigned long long RcvBufBase;
- unsigned long long RcvBufSize;
- unsigned long long RxIntMemBase;
- unsigned long long RxIntMemSize;
- unsigned long long RcvPartitionKey;
- unsigned long long Reserved3;
- unsigned long long RcvPktLEDCnt;
- unsigned long long Reserved4[8];
- unsigned long long SendCtrl;
- unsigned long long SendPIOBufBase;
- unsigned long long SendPIOSize;
- unsigned long long SendPIOBufCnt;
- unsigned long long SendPIOAvailAddr;
- unsigned long long TxIntMemBase;
- unsigned long long TxIntMemSize;
- unsigned long long Reserved5;
- unsigned long long PCIeRBufTestReg0;
- unsigned long long PCIeRBufTestReg1;
- unsigned long long Reserved51[6];
- unsigned long long SendBufferError;
- unsigned long long SendBufferErrorCONT1;
- unsigned long long Reserved6SBE[6];
- unsigned long long RcvHdrAddr0;
- unsigned long long RcvHdrAddr1;
- unsigned long long RcvHdrAddr2;
- unsigned long long RcvHdrAddr3;
- unsigned long long RcvHdrAddr4;
- unsigned long long Reserved7RHA[11];
- unsigned long long RcvHdrTailAddr0;
- unsigned long long RcvHdrTailAddr1;
- unsigned long long RcvHdrTailAddr2;
- unsigned long long RcvHdrTailAddr3;
- unsigned long long RcvHdrTailAddr4;
- unsigned long long Reserved8RHTA[11];
- unsigned long long Reserved9SW[8];
- unsigned long long SerdesConfig0;
- unsigned long long SerdesConfig1;
- unsigned long long SerdesStatus;
- unsigned long long XGXSConfig;
- unsigned long long IBPLLCfg;
- unsigned long long Reserved10SW2[3];
- unsigned long long PCIEQ0SerdesConfig0;
- unsigned long long PCIEQ0SerdesConfig1;
- unsigned long long PCIEQ0SerdesStatus;
- unsigned long long Reserved11;
- unsigned long long PCIEQ1SerdesConfig0;
- unsigned long long PCIEQ1SerdesConfig1;
- unsigned long long PCIEQ1SerdesStatus;
- unsigned long long Reserved12;
-};
-
-struct _infinipath_do_not_use_counters {
- __u64 LBIntCnt;
- __u64 LBFlowStallCnt;
- __u64 Reserved1;
- __u64 TxUnsupVLErrCnt;
- __u64 TxDataPktCnt;
- __u64 TxFlowPktCnt;
- __u64 TxDwordCnt;
- __u64 TxLenErrCnt;
- __u64 TxMaxMinLenErrCnt;
- __u64 TxUnderrunCnt;
- __u64 TxFlowStallCnt;
- __u64 TxDroppedPktCnt;
- __u64 RxDroppedPktCnt;
- __u64 RxDataPktCnt;
- __u64 RxFlowPktCnt;
- __u64 RxDwordCnt;
- __u64 RxLenErrCnt;
- __u64 RxMaxMinLenErrCnt;
- __u64 RxICRCErrCnt;
- __u64 RxVCRCErrCnt;
- __u64 RxFlowCtrlErrCnt;
- __u64 RxBadFormatCnt;
- __u64 RxLinkProblemCnt;
- __u64 RxEBPCnt;
- __u64 RxLPCRCErrCnt;
- __u64 RxBufOvflCnt;
- __u64 RxTIDFullErrCnt;
- __u64 RxTIDValidErrCnt;
- __u64 RxPKeyMismatchCnt;
- __u64 RxP0HdrEgrOvflCnt;
- __u64 RxP1HdrEgrOvflCnt;
- __u64 RxP2HdrEgrOvflCnt;
- __u64 RxP3HdrEgrOvflCnt;
- __u64 RxP4HdrEgrOvflCnt;
- __u64 RxP5HdrEgrOvflCnt;
- __u64 RxP6HdrEgrOvflCnt;
- __u64 RxP7HdrEgrOvflCnt;
- __u64 RxP8HdrEgrOvflCnt;
- __u64 Reserved6;
- __u64 Reserved7;
- __u64 IBStatusChangeCnt;
- __u64 IBLinkErrRecoveryCnt;
- __u64 IBLinkDownedCnt;
- __u64 IBSymbolErrCnt;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof( \
- struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
- struct _infinipath_do_not_use_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_pe_kregs = {
- .kr_control = IPATH_KREG_OFFSET(Control),
- .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
- .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
- .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
- .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
- .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
- .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
- .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
- .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
- .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
- .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
- .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
- .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
- .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
- .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
- .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
- .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
- .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
- .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
- .kr_intclear = IPATH_KREG_OFFSET(IntClear),
- .kr_intmask = IPATH_KREG_OFFSET(IntMask),
- .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
- .kr_mdio = IPATH_KREG_OFFSET(MDIO),
- .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
- .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
- .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
- .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
- .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
- .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
- .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
- .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
- .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
- .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
- .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
- .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
- .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
- .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
- .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
- .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
- .kr_revision = IPATH_KREG_OFFSET(Revision),
- .kr_scratch = IPATH_KREG_OFFSET(Scratch),
- .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
- .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
- .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr),
- .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase),
- .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt),
- .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize),
- .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
- .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
- .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
- .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
- .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0),
- .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1),
- .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
- .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
- .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
-
- /*
- * These should not be used directly via ipath_write_kreg64(),
- * use them with ipath_write_kreg64_port(),
- */
- .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
- .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
-
- /* The rcvpktled register controls one of the debug port signals, so
- * a packet activity LED can be connected to it. */
- .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
- .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
- .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
- .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0),
- .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1),
- .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus),
- .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0),
- .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1),
- .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus)
-};
-
-static const struct ipath_cregs ipath_pe_cregs = {
- .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
- .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
- .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
- .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
- .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
- .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
- .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
- .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
- .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
- .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
- .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
- .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
- .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
- .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
- .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
- .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
- .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
- .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
- .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
- .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
- .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
- .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
- .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
- .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
- .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
- .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
- .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
- .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
- .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
- .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
- .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
- .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
- .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt)
-};
-
-/* kr_control bits */
-#define INFINIPATH_C_RESET 1U
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
-#define INFINIPATH_I_RCVURG_SHIFT 0
-#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
-#define INFINIPATH_I_RCVAVAIL_SHIFT 12
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
-#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
-#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
-#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL
-
-#define IBA6120_IBCS_LINKTRAININGSTATE_MASK 0xf
-#define IBA6120_IBCS_LINKSTATE_SHIFT 4
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_FOUND 0x0000000000008000
-
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET 0x5ULL
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA (1ULL << \
- (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL (1ULL << \
- (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-#define INFINIPATH_RT_BUFSIZE_MASK 0xe0000000ULL
-#define INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid) \
- ((((tid) & INFINIPATH_RT_BUFSIZE_MASK) >> 29) + 11 - 1)
-#define INFINIPATH_RT_BUFSIZE(tid) (1 << INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid))
-#define INFINIPATH_RT_IS_VALID(tid) \
- (((tid) & INFINIPATH_RT_BUFSIZE_MASK) && \
- ((((tid) & INFINIPATH_RT_BUFSIZE_MASK) != INFINIPATH_RT_BUFSIZE_MASK)))
-#define INFINIPATH_RT_ADDR_MASK 0x1FFFFFFFULL /* 29 bits valid */
-#define INFINIPATH_RT_ADDR_SHIFT 10
-
-#define INFINIPATH_R_INTRAVAIL_SHIFT 16
-#define INFINIPATH_R_TAILUPD_SHIFT 31
-
-/* 6120 specific hardware errors... */
-static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
- INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
- INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
- /*
- * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
- * parity or memory parity error failures, because most likely we
- * won't be able to talk to the core of the chip. Nonetheless, we
- * might see them, if they are in parts of the PCIe core that aren't
- * essential.
- */
- INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
- INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
- INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
- INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
- INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
- INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
- INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
-};
-
-#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
-#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
- << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
-
-static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *,
- u32, unsigned long);
-
-/*
- * On platforms using this chip, and not having ordered WC stores, we
- * can get TXE parity errors due to speculative reads to the PIO buffers,
- * and this, due to a chip bug can result in (many) false parity error
- * reports. So it's a debug print on those, and an info print on systems
- * where the speculative reads don't occur.
- */
-static void ipath_pe_txe_recover(struct ipath_devdata *dd)
-{
- if (ipath_unordered_wc())
- ipath_dbg("Recovering from TXE PIO parity error\n");
- else {
- ++ipath_stats.sps_txeparity;
- dev_info(&dd->pcidev->dev,
- "Recovering from TXE PIO parity error\n");
- }
-}
-
-
-/**
- * ipath_pe_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use. Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue. We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
- size_t msgl)
-{
- ipath_err_t hwerrs;
- u32 bits, ctrl;
- int isfatal = 0;
- char bitsmsg[64];
- int log_idx;
-
- hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
- if (!hwerrs) {
- /*
- * better than printing cofusing messages
- * This seems to be related to clearing the crc error, or
- * the pll error during init.
- */
- ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
- return;
- } else if (hwerrs == ~0ULL) {
- ipath_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
- return;
- }
- ipath_stats.sps_hwerrs++;
-
- /* Always clear the error status register, except MEMBISTFAIL,
- * regardless of whether we continue or stop using the chip.
- * We want that set so we know it failed, even across driver reload.
- * We'll still ignore it in the hwerrmask. We do this partly for
- * diagnostics, but also for support */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
- hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
- hwerrs &= dd->ipath_hwerrmask;
-
- /* We log some errors to EEPROM, check if we have any of those. */
- for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
- if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
- ipath_inc_eeprom_err(dd, log_idx, 1);
-
- /*
- * make sure we get this much out, unless told to be quiet,
- * or it's occurred within the last 5 seconds
- */
- if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
- RXE_EAGER_PARITY)) ||
- (ipath_debug & __IPATH_VERBDBG))
- dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
- dd->ipath_lasthwerror |= hwerrs;
-
- if (hwerrs & ~dd->ipath_hwe_bitsextant)
- ipath_dev_err(dd, "hwerror interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (hwerrs & ~dd->ipath_hwe_bitsextant));
-
- ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
- if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
- /*
- * parity errors in send memory are recoverable,
- * just cancel the send (if indicated in * sendbuffererror),
- * count the occurrence, unfreeze (if no other handled
- * hardware error bits are set), and continue. They can
- * occur if a processor speculative read is done to the PIO
- * buffer while we are sending a packet, for example.
- */
- if (hwerrs & TXE_PIO_PARITY) {
- ipath_pe_txe_recover(dd);
- hwerrs &= ~TXE_PIO_PARITY;
- }
- if (!hwerrs) {
- static u32 freeze_cnt;
-
- freeze_cnt++;
- ipath_dbg("Clearing freezemode on ignored or recovered "
- "hardware error (%u)\n", freeze_cnt);
- ipath_clear_freeze(dd);
- }
- }
-
- *msg = '\0';
-
- if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
- strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
- msgl);
- /* ignore from now on, so disable until driver reloaded */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- ipath_format_hwerrors(hwerrs,
- ipath_6120_hwerror_msgs,
- sizeof(ipath_6120_hwerror_msgs)/
- sizeof(ipath_6120_hwerror_msgs[0]),
- msg, msgl);
-
- if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
- << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg,
- "[PCIe Mem Parity Errs %x] ", bits);
- strlcat(msg, bitsmsg, msgl);
- }
-
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
- INFINIPATH_HWE_COREPLL_RFSLIP )
-
- if (hwerrs & _IPATH_PLL_FAIL) {
- snprintf(bitsmsg, sizeof bitsmsg,
- "[PLL failed (%llx), InfiniPath hardware unusable]",
- (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
- strlcat(msg, bitsmsg, msgl);
- /* ignore from now on, so disable until driver reloaded */
- dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
- /*
- * If it occurs, it is left masked since the external
- * interface is unused
- */
- dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- if (hwerrs) {
- /*
- * if any set that we aren't ignoring; only
- * make the complaint once, in case it's stuck
- * or recurring, and we get here multiple
- * times.
- */
- ipath_dev_err(dd, "%s hardware error\n", msg);
- if (dd->ipath_flags & IPATH_INITTED) {
- ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
- ipath_setup_pe_setextled(dd,
- INFINIPATH_IBCS_L_STATE_DOWN,
- INFINIPATH_IBCS_LT_STATE_DISABLED);
- ipath_dev_err(dd, "Fatal Hardware Error (freeze "
- "mode), no longer usable, SN %.16s\n",
- dd->ipath_serial);
- isfatal = 1;
- }
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- /* mark as having had error */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- /*
- * mark as not usable, at a minimum until driver
- * is reloaded, probably until reboot, since no
- * other reset is possible.
- */
- dd->ipath_flags &= ~IPATH_INITTED;
- } else
- *msg = 0; /* recovered from all of them */
-
- if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg && msg) {
- /*
- * for /sys status file ; if no trailing brace is copied,
- * we'll know it was truncated.
- */
- snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
- "{%s}", msg);
- }
-}
-
-/**
- * ipath_pe_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * info is based on the board revision register
- */
-static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
- size_t namelen)
-{
- char *n = NULL;
- u8 boardrev = dd->ipath_boardrev;
- int ret;
-
- switch (boardrev) {
- case 0:
- n = "InfiniPath_Emulation";
- break;
- case 1:
- n = "InfiniPath_QLE7140-Bringup";
- break;
- case 2:
- n = "InfiniPath_QLE7140";
- break;
- case 3:
- n = "InfiniPath_QMI7140";
- break;
- case 4:
- n = "InfiniPath_QEM7140";
- break;
- case 5:
- n = "InfiniPath_QMH7140";
- break;
- case 6:
- n = "InfiniPath_QLE7142";
- break;
- default:
- ipath_dev_err(dd,
- "Don't yet know about board with ID %u\n",
- boardrev);
- snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
- boardrev);
- break;
- }
- if (n)
- snprintf(name, namelen, "%s", n);
-
- if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
- ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
- dd->ipath_majrev, dd->ipath_minrev);
- ret = 1;
- } else {
- ret = 0;
- if (dd->ipath_minrev >= 2)
- dd->ipath_f_put_tid = ipath_pe_put_tid_2;
- }
-
- /*
- * set here, not in ipath_init_*_funcs because we have to do
- * it after we can read chip registers.
- */
- dd->ipath_ureg_align =
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign);
-
- return ret;
-}
-
-/**
- * ipath_pe_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
-{
- ipath_err_t val;
- u64 extsval;
-
- extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
- if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
- ipath_dev_err(dd, "MemBIST did not complete!\n");
- if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
- ipath_dbg("MemBIST corrected\n");
-
- val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
-
- if (!dd->ipath_boardrev) // no PLL for Emulator
- val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-
- if (dd->ipath_minrev < 2) {
- /* workaround bug 9460 in internal interface bus parity
- * checking. Fixed (HW bug 9490) in Rev2.
- */
- val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
- }
- dd->ipath_hwerrmask = val;
-}
-
-/**
- * ipath_pe_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
-{
- u64 val, config1, prev_val;
- int ret = 0;
-
- ipath_dbg("Trying to bringup serdes\n");
-
- if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
- INFINIPATH_HWE_SERDESPLLFAILED) {
- ipath_dbg("At start, serdes PLL failed bit set "
- "in hwerrstatus, clearing and continuing\n");
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
- INFINIPATH_HWE_SERDESPLLFAILED);
- }
-
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
- config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
-
- ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, "
- "xgxsconfig %llx\n", (unsigned long long) val,
- (unsigned long long) config1, (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
- /*
- * Force reset on, also set rxdetect enable. Must do before reading
- * serdesstatus at least for simulation, or some of the bits in
- * serdes status will come back as undefined and cause simulation
- * failures
- */
- val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN
- | INFINIPATH_SERDC0_L1PWR_DN;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
- /* be sure chip saw it */
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- udelay(5); /* need pll reset set at least for a bit */
- /*
- * after PLL is reset, set the per-lane Resets and TxIdle and
- * clear the PLL reset and rxdetect (to get falling edge).
- * Leave L1PWR bits set (permanently)
- */
- val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL
- | INFINIPATH_SERDC0_L1PWR_DN);
- val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE;
- ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets "
- "and txidle (%llx)\n", (unsigned long long) val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
- /* be sure chip saw it */
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- /* need PLL reset clear for at least 11 usec before lane
- * resets cleared; give it a few more to be sure */
- udelay(15);
- val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE);
-
- ipath_cdbg(VERBOSE, "Clearing lane resets and txidle "
- "(writing %llx)\n", (unsigned long long) val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
- /* be sure chip saw it */
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
- prev_val = val;
- if (val & INFINIPATH_XGXS_RESET)
- val &= ~INFINIPATH_XGXS_RESET;
- if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
- INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
- /* need to compensate for Tx inversion in partner */
- val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
- INFINIPATH_XGXS_RX_POL_SHIFT);
- val |= dd->ipath_rx_pol_inv <<
- INFINIPATH_XGXS_RX_POL_SHIFT;
- }
- if (val != prev_val)
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
- /* clear current and de-emphasis bits */
- config1 &= ~0x0ffffffff00ULL;
- /* set current to 20ma */
- config1 |= 0x00000000000ULL;
- /* set de-emphasis to -5.68dB */
- config1 |= 0x0cccc000000ULL;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1);
-
- ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx "
- "config1=%llx, sstatus=%llx xgxs=%llx\n",
- (unsigned long long) val, (unsigned long long) config1,
- (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus),
- (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
- return ret;
-}
-
-/**
- * ipath_pe_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * Called when driver is being unloaded
- */
-static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
-{
- u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
-
- val |= INFINIPATH_SERDC0_TXIDLE;
- ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
- (unsigned long long) val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
-}
-
-static int ipath_pe_intconfig(struct ipath_devdata *dd)
-{
- u32 chiprev;
-
- /*
- * If the chip supports added error indication via GPIO pins,
- * enable interrupts on those bits so the interrupt routine
- * can count the events. Also set flag so interrupt routine
- * can know they are expected.
- */
- chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
- if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
- /* Rev2+ reports extra errors via internal GPIO pins */
- dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
- dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
- dd->ipath_gpio_mask);
- }
- return 0;
-}
-
-/**
- * ipath_setup_pe_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
-
- * These LEDs indicate the physical and logical state of IB link.
- * For this chip (at least with recommended board pinouts), LED1
- * is Yellow (logical state) and LED2 is Green (physical state),
- *
- * Note: We try to match the Mellanox HCA LED behavior as best
- * we can. Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate. That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
- u64 ltst)
-{
- u64 extctl;
- unsigned long flags = 0;
-
- /* the diags use the LED to indicate diag info, so we leave
- * the external LED alone when the diags are running */
- if (ipath_diag_inuse)
- return;
-
- /* Allow override of LED display for, e.g. Locating system in rack */
- if (dd->ipath_led_override) {
- ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
- ? INFINIPATH_IBCS_LT_STATE_LINKUP
- : INFINIPATH_IBCS_LT_STATE_DISABLED;
- lst = (dd->ipath_led_override & IPATH_LED_LOG)
- ? INFINIPATH_IBCS_L_STATE_ACTIVE
- : INFINIPATH_IBCS_L_STATE_DOWN;
- }
-
- spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
- extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
- INFINIPATH_EXTC_LED2PRIPORT_ON);
-
- if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP)
- extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
- if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
- extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
- dd->ipath_extctrl = extctl;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
- spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-}
-
-/**
- * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * This is called during driver unload.
- * We do the pci_disable_msi here, not in generic code, because it
- * isn't used for the HT chips. If we do end up needing pci_enable_msi
- * at some point in the future for HT, we'll move the call back
- * into the main init_one code.
- */
-static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
-{
- dd->ipath_msi_lo = 0; /* just in case unload fails */
- pci_disable_msi(dd->pcidev);
-}
-
-static void ipath_6120_pcie_params(struct ipath_devdata *dd)
-{
- u16 linkstat, speed;
- int pos;
-
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
- if (!pos) {
- ipath_dev_err(dd, "Can't find PCI Express capability!\n");
- goto bail;
- }
-
- pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
- &linkstat);
- /*
- * speed is bits 0-4, linkwidth is bits 4-8
- * no defines for them in headers
- */
- speed = linkstat & 0xf;
- linkstat >>= 4;
- linkstat &= 0x1f;
- dd->ipath_lbus_width = linkstat;
-
- switch (speed) {
- case 1:
- dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */
- break;
- case 2:
- dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */
- break;
- default: /* not defined, assume gen1 */
- dd->ipath_lbus_speed = 2500;
- break;
- }
-
- if (linkstat < 8)
- ipath_dev_err(dd,
- "PCIe width %u (x8 HCA), performance reduced\n",
- linkstat);
- else
- ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x8 HCA)\n",
- dd->ipath_lbus_speed, linkstat);
-
- if (speed != 1)
- ipath_dev_err(dd,
- "PCIe linkspeed %u is incorrect; "
- "should be 1 (2500)!\n", speed);
-bail:
- /* fill in string, even on errors */
- snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
- "PCIe,%uMHz,x%u\n",
- dd->ipath_lbus_speed,
- dd->ipath_lbus_width);
-
- return;
-}
-
-/**
- * ipath_setup_pe_config - setup PCIe config related stuff
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * The pci_enable_msi() call will fail on systems with MSI quirks
- * such as those with AMD8131, even if the device of interest is not
- * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
- * late in 2.6.16).
- * All that can be done is to edit the kernel source to remove the quirk
- * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip,
- * even though it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for PCIe. If we do end up needing
- * pci_enable_msi at some point in the future for HT, we'll move the
- * call back into the main init_one code.
- * We save the msi lo and hi values, so we can restore them after
- * chip reset (the kernel PCI infrastructure doesn't yet handle that
- * correctly).
- */
-static int ipath_setup_pe_config(struct ipath_devdata *dd,
- struct pci_dev *pdev)
-{
- int pos, ret;
-
- dd->ipath_msi_lo = 0; /* used as a flag during reset processing */
- ret = pci_enable_msi(dd->pcidev);
- if (ret)
- ipath_dev_err(dd, "pci_enable_msi failed: %d, "
- "interrupts may not work\n", ret);
- /* continue even if it fails, we may still be OK... */
- dd->ipath_irq = pdev->irq;
-
- if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
- u16 control;
- pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
- &dd->ipath_msi_lo);
- pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
- &dd->ipath_msi_hi);
- pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
- &control);
- /* now save the data (vector) info */
- pci_read_config_word(dd->pcidev,
- pos + ((control & PCI_MSI_FLAGS_64BIT)
- ? 12 : 8),
- &dd->ipath_msi_data);
- ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset "
- "0x%x, control=0x%x\n", dd->ipath_msi_data,
- pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
- control);
- /* we save the cachelinesize also, although it doesn't
- * really matter */
- pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
- &dd->ipath_pci_cacheline);
- } else
- ipath_dev_err(dd, "Can't find MSI capability, "
- "can't save MSI settings for reset\n");
-
- ipath_6120_pcie_params(dd);
-
- dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
- dd->ipath_link_speed_supported = IPATH_IB_SDR;
- dd->ipath_link_width_enabled = IB_WIDTH_4X;
- dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
- /* these can't change for this chip, so set once */
- dd->ipath_link_width_active = dd->ipath_link_width_enabled;
- dd->ipath_link_speed_active = dd->ipath_link_speed_enabled;
- return 0;
-}
-
-static void ipath_init_pe_variables(struct ipath_devdata *dd)
-{
- /*
- * setup the register offsets, since they are different for each
- * chip
- */
- dd->ipath_kregs = &ipath_pe_kregs;
- dd->ipath_cregs = &ipath_pe_cregs;
-
- /*
- * bits for selecting i2c direction and values,
- * used for I2C serial flash
- */
- dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- dd->ipath_gpio_sda = IPATH_GPIO_SDA;
- dd->ipath_gpio_scl = IPATH_GPIO_SCL;
-
- /*
- * Fill in data for field-values that change in newer chips.
- * We dynamically specify only the mask for LINKTRAININGSTATE
- * and only the shift for LINKSTATE, as they are the only ones
- * that change. Also precalculate the 3 link states of interest
- * and the combined mask.
- */
- dd->ibcs_ls_shift = IBA6120_IBCS_LINKSTATE_SHIFT;
- dd->ibcs_lts_mask = IBA6120_IBCS_LINKTRAININGSTATE_MASK;
- dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
- dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
- dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
- (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
- dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
- (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
- dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
- (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
-
- /*
- * Fill in data for ibcc field-values that change in newer chips.
- * We dynamically specify only the mask for LINKINITCMD
- * and only the shift for LINKCMD and MAXPKTLEN, as they are
- * the only ones that change.
- */
- dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK;
- dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT;
- dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
-
- /* Fill in shifts for RcvCtrl. */
- dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
- dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT;
- dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT;
- dd->ipath_r_portcfg_shift = 0; /* Not on IBA6120 */
-
- /* variables for sanity checking interrupt and errors */
- dd->ipath_hwe_bitsextant =
- (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
- (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
- (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
- INFINIPATH_HWE_PCIE1PLLFAILED |
- INFINIPATH_HWE_PCIE0PLLFAILED |
- INFINIPATH_HWE_PCIEPOISONEDTLP |
- INFINIPATH_HWE_PCIECPLTIMEOUT |
- INFINIPATH_HWE_PCIEBUSPARITYXTLH |
- INFINIPATH_HWE_PCIEBUSPARITYXADM |
- INFINIPATH_HWE_PCIEBUSPARITYRADM |
- INFINIPATH_HWE_MEMBISTFAILED |
- INFINIPATH_HWE_COREPLL_FBSLIP |
- INFINIPATH_HWE_COREPLL_RFSLIP |
- INFINIPATH_HWE_SERDESPLLFAILED |
- INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
- INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
- dd->ipath_i_bitsextant =
- (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
- (INFINIPATH_I_RCVAVAIL_MASK <<
- INFINIPATH_I_RCVAVAIL_SHIFT) |
- INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
- INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
- dd->ipath_e_bitsextant =
- INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
- INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
- INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
- INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
- INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
- INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
- INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
- INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
- INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
- INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN |
- INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN |
- INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT |
- INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
- INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED |
- INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
- INFINIPATH_E_HARDWARE;
-
- dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
- dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
- dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
-
- /*
- * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
- * 2 is Some Misc, 3 is reserved for future.
- */
- dd->ipath_eep_st_masks[0].hwerrs_to_log =
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
-
- /* Ignore errors in PIO/PBC on systems with unordered write-combining */
- if (ipath_unordered_wc())
- dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
-
- dd->ipath_eep_st_masks[1].hwerrs_to_log =
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
-
- dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
- dd->delay_mult = 2; /* SDR, 4X, can't change */
-}
-
-/* setup the MSI stuff again after a reset. I'd like to just call
- * pci_enable_msi() and request_irq() again, but when I do that,
- * the MSI enable bit doesn't get set in the command word, and
- * we switch to to a different interrupt vector, which is confusing,
- * so I instead just do it all inline. Perhaps somehow can tie this
- * into the PCIe hotplug support at some point
- * Note, because I'm doing it all here, I don't call pci_disable_msi()
- * or free_irq() at the start of ipath_setup_pe_reset().
- */
-static int ipath_reinit_msi(struct ipath_devdata *dd)
-{
- int pos;
- u16 control;
- int ret;
-
- if (!dd->ipath_msi_lo) {
- dev_info(&dd->pcidev->dev, "Can't restore MSI config, "
- "initial setup failed?\n");
- ret = 0;
- goto bail;
- }
-
- if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) {
- ipath_dev_err(dd, "Can't find MSI capability, "
- "can't restore MSI settings\n");
- ret = 0;
- goto bail;
- }
- ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
- dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
- pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
- dd->ipath_msi_lo);
- ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
- dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
- pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
- dd->ipath_msi_hi);
- pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
- if (!(control & PCI_MSI_FLAGS_ENABLE)) {
- ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
- "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
- control, control | PCI_MSI_FLAGS_ENABLE);
- control |= PCI_MSI_FLAGS_ENABLE;
- pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
- control);
- }
- /* now rewrite the data (vector) info */
- pci_write_config_word(dd->pcidev, pos +
- ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
- dd->ipath_msi_data);
- /* we restore the cachelinesize also, although it doesn't really
- * matter */
- pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
- dd->ipath_pci_cacheline);
- /* and now set the pci master bit again */
- pci_set_master(dd->pcidev);
- ret = 1;
-
-bail:
- return ret;
-}
-
-/* This routine sleeps, so it can only be called from user context, not
- * from interrupt context. If we need interrupt context, we can split
- * it into two routines.
-*/
-static int ipath_setup_pe_reset(struct ipath_devdata *dd)
-{
- u64 val;
- int i;
- int ret;
- u16 cmdval;
-
- pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval);
-
- /* Use ERROR so it shows up in logs, etc. */
- ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
- /* keep chip from being accessed in a few places */
- dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
- val = dd->ipath_control | INFINIPATH_C_RESET;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
- mb();
-
- for (i = 1; i <= 5; i++) {
- int r;
- /* allow MBIST, etc. to complete; longer on each retry.
- * We sometimes get machine checks from bus timeout if no
- * response, so for now, make it *really* long.
- */
- msleep(1000 + (1 + i) * 2000);
- if ((r =
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
- dd->ipath_pcibar0)))
- ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n",
- r);
- if ((r =
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
- dd->ipath_pcibar1)))
- ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n",
- r);
- /* now re-enable memory access */
- pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval);
- if ((r = pci_enable_device(dd->pcidev)))
- ipath_dev_err(dd, "pci_enable_device failed after "
- "reset: %d\n", r);
- /*
- * whether it fully enabled or not, mark as present,
- * again (but not INITTED)
- */
- dd->ipath_flags |= IPATH_PRESENT;
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
- if (val == dd->ipath_revision) {
- ipath_cdbg(VERBOSE, "Got matching revision "
- "register %llx on try %d\n",
- (unsigned long long) val, i);
- ret = ipath_reinit_msi(dd);
- goto bail;
- }
- /* Probably getting -1 back */
- ipath_dbg("Didn't get expected revision register, "
- "got %llx, try %d\n", (unsigned long long) val,
- i + 1);
- }
- ret = 0; /* failed */
-
-bail:
- if (ret)
- ipath_6120_pcie_params(dd);
- return ret;
-}
-
-/**
- * ipath_pe_put_tid - write a TID in chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for special locking etc.
- * It's used for both the full cleanup on exit, as well as the normal
- * setup and teardown.
- */
-static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
- u32 type, unsigned long pa)
-{
- u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
- unsigned long flags = 0; /* keep gcc quiet */
- int tidx;
- spinlock_t *tidlockp;
-
- if (!dd->ipath_kregbase)
- return;
-
- if (pa != dd->ipath_tidinvalid) {
- if (pa & ((1U << 11) - 1)) {
- dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
- "not 2KB aligned!\n", pa);
- return;
- }
- pa >>= 11;
- /* paranoia check */
- if (pa & ~INFINIPATH_RT_ADDR_MASK)
- ipath_dev_err(dd,
- "BUG: Physical page address 0x%lx "
- "has bits set in 31-29\n", pa);
-
- if (type == RCVHQ_RCV_TYPE_EAGER)
- pa |= dd->ipath_tidtemplate;
- else /* for now, always full 4KB page */
- pa |= 2 << 29;
- }
-
- /*
- * Workaround chip bug 9437 by writing the scratch register
- * before and after the TID, and with an io write barrier.
- * We use a spinlock around the writes, so they can't intermix
- * with other TID (eager or expected) writes (the chip bug
- * is triggered by back to back TID writes). Unfortunately, this
- * call can be done from interrupt level for the port 0 eager TIDs,
- * so we have to use irqsave locks.
- */
- /*
- * Assumes tidptr always > ipath_egrtidbase
- * if type == RCVHQ_RCV_TYPE_EAGER.
- */
- tidx = tidptr - dd->ipath_egrtidbase;
-
- tidlockp = (type == RCVHQ_RCV_TYPE_EAGER && tidx < dd->ipath_rcvegrcnt)
- ? &dd->ipath_kernel_tid_lock : &dd->ipath_user_tid_lock;
- spin_lock_irqsave(tidlockp, flags);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
- writel(pa, tidp32);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef);
- mmiowb();
- spin_unlock_irqrestore(tidlockp, flags);
-}
-
-/**
- * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for selection of the
- * appropriate "flavor". The static calls in cleanup just use the
- * revision-agnostic form, as they are not performance critical.
- */
-static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
- u32 type, unsigned long pa)
-{
- u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
- u32 tidx;
-
- if (!dd->ipath_kregbase)
- return;
-
- if (pa != dd->ipath_tidinvalid) {
- if (pa & ((1U << 11) - 1)) {
- dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
- "not 2KB aligned!\n", pa);
- return;
- }
- pa >>= 11;
- /* paranoia check */
- if (pa & ~INFINIPATH_RT_ADDR_MASK)
- ipath_dev_err(dd,
- "BUG: Physical page address 0x%lx "
- "has bits set in 31-29\n", pa);
-
- if (type == RCVHQ_RCV_TYPE_EAGER)
- pa |= dd->ipath_tidtemplate;
- else /* for now, always full 4KB page */
- pa |= 2 << 29;
- }
- tidx = tidptr - dd->ipath_egrtidbase;
- writel(pa, tidp32);
- mmiowb();
-}
-
-
-/**
- * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * clear all TID entries for a port, expected and eager.
- * Used from ipath_close(). On this chip, TIDs are only 32 bits,
- * not 64, but they are still on 64 bit boundaries, so tidbase
- * is declared as u64 * for the pointer math, even though we write 32 bits
- */
-static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
- u64 __iomem *tidbase;
- unsigned long tidinv;
- int i;
-
- if (!dd->ipath_kregbase)
- return;
-
- ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
- tidinv = dd->ipath_tidinvalid;
- tidbase = (u64 __iomem *)
- ((char __iomem *)(dd->ipath_kregbase) +
- dd->ipath_rcvtidbase +
- port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
-
- for (i = 0; i < dd->ipath_rcvtidcnt; i++)
- dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
- tidinv);
-
- tidbase = (u64 __iomem *)
- ((char __iomem *)(dd->ipath_kregbase) +
- dd->ipath_rcvegrbase +
- port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
-
- for (i = 0; i < dd->ipath_rcvegrcnt; i++)
- dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
- tidinv);
-}
-
-/**
- * ipath_pe_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
-{
- u32 egrsize = dd->ipath_rcvegrbufsize;
-
- /* For now, we always allocate 4KB buffers (at init) so we can
- * receive max size packets. We may want a module parameter to
- * specify 2KB or 4KB and/or make be per port instead of per device
- * for those who want to reduce memory footprint. Note that the
- * ipath_rcvhdrentsize size must be large enough to hold the largest
- * IB header (currently 96 bytes) that we expect to handle (plus of
- * course the 2 dwords of RHF).
- */
- if (egrsize == 2048)
- dd->ipath_tidtemplate = 1U << 29;
- else if (egrsize == 4096)
- dd->ipath_tidtemplate = 2U << 29;
- else {
- egrsize = 4096;
- dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
- "%u, using %u\n", dd->ipath_rcvegrbufsize,
- egrsize);
- dd->ipath_tidtemplate = 2U << 29;
- }
- dd->ipath_tidinvalid = 0;
-}
-
-static int ipath_pe_early_init(struct ipath_devdata *dd)
-{
- dd->ipath_flags |= IPATH_4BYTE_TID;
- if (ipath_unordered_wc())
- dd->ipath_flags |= IPATH_PIO_FLUSH_WC;
-
- /*
- * For openfabrics, we need to be able to handle an IB header of
- * 24 dwords. HT chip has arbitrary sized receive buffers, so we
- * made them the same size as the PIO buffers. This chip does not
- * handle arbitrary size buffers, so we need the header large enough
- * to handle largest IB header, but still have room for a 2KB MTU
- * standard IB packet.
- */
- dd->ipath_rcvhdrentsize = 24;
- dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
- dd->ipath_rhf_offset = 0;
- dd->ipath_egrtidbase = (u64 __iomem *)
- ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase);
-
- dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048;
- /*
- * the min() check here is currently a nop, but it may not always
- * be, depending on just how we do ipath_rcvegrbufsize
- */
- dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k :
- dd->ipath_piosize2k,
- dd->ipath_rcvegrbufsize +
- (dd->ipath_rcvhdrentsize << 2));
- dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-
- /*
- * We can request a receive interrupt for 1 or
- * more packets from current offset. For now, we set this
- * up for a single packet.
- */
- dd->ipath_rhdrhead_intr_off = 1ULL<<32;
-
- ipath_get_eeprom_info(dd);
-
- return 0;
-}
-
-int __attribute__((weak)) ipath_unordered_wc(void)
-{
- return 0;
-}
-
-/**
- * ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @pd: the infinipath port
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithms.
- */
-static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
- struct ipath_base_info *kinfo = kbase;
- struct ipath_devdata *dd;
-
- if (ipath_unordered_wc()) {
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
- ipath_cdbg(PROC, "Intel processor, forcing WC order\n");
- }
- else
- ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
-
- if (pd == NULL)
- goto done;
-
- dd = pd->port_dd;
-
-done:
- kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE |
- IPATH_RUNTIME_FORCE_PIOAVAIL | IPATH_RUNTIME_PIO_REGSWAPPED;
- return 0;
-}
-
-static void ipath_pe_free_irq(struct ipath_devdata *dd)
-{
- free_irq(dd->ipath_irq, dd);
- dd->ipath_irq = 0;
-}
-
-
-static struct ipath_message_header *
-ipath_pe_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
-{
- return (struct ipath_message_header *)
- &rhf_addr[sizeof(u64) / sizeof(u32)];
-}
-
-static void ipath_pe_config_ports(struct ipath_devdata *dd, ushort cfgports)
-{
- dd->ipath_portcnt =
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
- dd->ipath_p0_rcvegrcnt =
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt);
-}
-
-static void ipath_pe_read_counters(struct ipath_devdata *dd,
- struct infinipath_counters *cntrs)
-{
- cntrs->LBIntCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt));
- cntrs->LBFlowStallCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt));
- cntrs->TxSDmaDescCnt = 0;
- cntrs->TxUnsupVLErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt));
- cntrs->TxDataPktCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt));
- cntrs->TxFlowPktCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt));
- cntrs->TxDwordCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt));
- cntrs->TxLenErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt));
- cntrs->TxMaxMinLenErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt));
- cntrs->TxUnderrunCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt));
- cntrs->TxFlowStallCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt));
- cntrs->TxDroppedPktCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt));
- cntrs->RxDroppedPktCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt));
- cntrs->RxDataPktCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt));
- cntrs->RxFlowPktCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt));
- cntrs->RxDwordCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt));
- cntrs->RxLenErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt));
- cntrs->RxMaxMinLenErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt));
- cntrs->RxICRCErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt));
- cntrs->RxVCRCErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt));
- cntrs->RxFlowCtrlErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt));
- cntrs->RxBadFormatCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt));
- cntrs->RxLinkProblemCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt));
- cntrs->RxEBPCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt));
- cntrs->RxLPCRCErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt));
- cntrs->RxBufOvflCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt));
- cntrs->RxTIDFullErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt));
- cntrs->RxTIDValidErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt));
- cntrs->RxPKeyMismatchCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt));
- cntrs->RxP0HdrEgrOvflCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt));
- cntrs->RxP1HdrEgrOvflCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt));
- cntrs->RxP2HdrEgrOvflCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt));
- cntrs->RxP3HdrEgrOvflCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt));
- cntrs->RxP4HdrEgrOvflCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt));
- cntrs->RxP5HdrEgrOvflCnt = 0;
- cntrs->RxP6HdrEgrOvflCnt = 0;
- cntrs->RxP7HdrEgrOvflCnt = 0;
- cntrs->RxP8HdrEgrOvflCnt = 0;
- cntrs->RxP9HdrEgrOvflCnt = 0;
- cntrs->RxP10HdrEgrOvflCnt = 0;
- cntrs->RxP11HdrEgrOvflCnt = 0;
- cntrs->RxP12HdrEgrOvflCnt = 0;
- cntrs->RxP13HdrEgrOvflCnt = 0;
- cntrs->RxP14HdrEgrOvflCnt = 0;
- cntrs->RxP15HdrEgrOvflCnt = 0;
- cntrs->RxP16HdrEgrOvflCnt = 0;
- cntrs->IBStatusChangeCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt));
- cntrs->IBLinkErrRecoveryCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt));
- cntrs->IBLinkDownedCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt));
- cntrs->IBSymbolErrCnt =
- ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt));
- cntrs->RxVL15DroppedPktCnt = 0;
- cntrs->RxOtherLocalPhyErrCnt = 0;
- cntrs->PcieRetryBufDiagQwordCnt = 0;
- cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs;
- cntrs->LocalLinkIntegrityErrCnt = dd->ipath_lli_errs;
- cntrs->RxVlErrCnt = 0;
- cntrs->RxDlidFltrCnt = 0;
-}
-
-
-/* no interrupt fallback for these chips */
-static int ipath_pe_nointr_fallback(struct ipath_devdata *dd)
-{
- return 0;
-}
-
-
-/*
- * reset the XGXS (between serdes and IBC). Slightly less intrusive
- * than resetting the IBC or external link state, and useful in some
- * cases to cause some retraining. To do this right, we reset IBC
- * as well.
- */
-static void ipath_pe_xgxs_reset(struct ipath_devdata *dd)
-{
- u64 val, prev_val;
-
- prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
- val = prev_val | INFINIPATH_XGXS_RESET;
- prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- dd->ipath_control);
-}
-
-
-static int ipath_pe_get_ib_cfg(struct ipath_devdata *dd, int which)
-{
- int ret;
-
- switch (which) {
- case IPATH_IB_CFG_LWID:
- ret = dd->ipath_link_width_active;
- break;
- case IPATH_IB_CFG_SPD:
- ret = dd->ipath_link_speed_active;
- break;
- case IPATH_IB_CFG_LWID_ENB:
- ret = dd->ipath_link_width_enabled;
- break;
- case IPATH_IB_CFG_SPD_ENB:
- ret = dd->ipath_link_speed_enabled;
- break;
- default:
- ret = -ENOTSUPP;
- break;
- }
- return ret;
-}
-
-
-/* we assume range checking is already done, if needed */
-static int ipath_pe_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
-{
- int ret = 0;
-
- if (which == IPATH_IB_CFG_LWID_ENB)
- dd->ipath_link_width_enabled = val;
- else if (which == IPATH_IB_CFG_SPD_ENB)
- dd->ipath_link_speed_enabled = val;
- else
- ret = -ENOTSUPP;
- return ret;
-}
-
-static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b)
-{
-}
-
-
-static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
-{
- ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs),
- ipath_ib_linktrstate(dd, ibcs));
- return 0;
-}
-
-
-/**
- * ipath_init_iba6120_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
-{
- dd->ipath_f_intrsetup = ipath_pe_intconfig;
- dd->ipath_f_bus = ipath_setup_pe_config;
- dd->ipath_f_reset = ipath_setup_pe_reset;
- dd->ipath_f_get_boardname = ipath_pe_boardname;
- dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors;
- dd->ipath_f_early_init = ipath_pe_early_init;
- dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors;
- dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
- dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
- dd->ipath_f_clear_tids = ipath_pe_clear_tids;
- /*
- * _f_put_tid may get changed after we read the chip revision,
- * but we start with the safe version for all revs
- */
- dd->ipath_f_put_tid = ipath_pe_put_tid;
- dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
- dd->ipath_f_setextled = ipath_setup_pe_setextled;
- dd->ipath_f_get_base_info = ipath_pe_get_base_info;
- dd->ipath_f_free_irq = ipath_pe_free_irq;
- dd->ipath_f_tidtemplate = ipath_pe_tidtemplate;
- dd->ipath_f_intr_fallback = ipath_pe_nointr_fallback;
- dd->ipath_f_xgxs_reset = ipath_pe_xgxs_reset;
- dd->ipath_f_get_msgheader = ipath_pe_get_msgheader;
- dd->ipath_f_config_ports = ipath_pe_config_ports;
- dd->ipath_f_read_counters = ipath_pe_read_counters;
- dd->ipath_f_get_ib_cfg = ipath_pe_get_ib_cfg;
- dd->ipath_f_set_ib_cfg = ipath_pe_set_ib_cfg;
- dd->ipath_f_config_jint = ipath_pe_config_jint;
- dd->ipath_f_ib_updown = ipath_pe_ib_updown;
-
-
- /* initialize chip-specific variables */
- ipath_init_pe_variables(dd);
-}
-
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
deleted file mode 100644
index fb70712ac85..00000000000
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ /dev/null
@@ -1,2558 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/*
- * This file contains all of the code that is specific to the
- * InfiniPath 7220 chip (except that specific to the SerDes)
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <rdma/ib_verbs.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-#include "ipath_7220.h"
-
-static void ipath_setup_7220_setextled(struct ipath_devdata *, u64, u64);
-
-static unsigned ipath_compat_ddr_negotiate = 1;
-
-module_param_named(compat_ddr_negotiate, ipath_compat_ddr_negotiate, uint,
- S_IWUSR | S_IRUGO);
-MODULE_PARM_DESC(compat_ddr_negotiate,
- "Attempt pre-IBTA 1.2 DDR speed negotiation");
-
-static unsigned ipath_sdma_fetch_arb = 1;
-module_param_named(fetch_arb, ipath_sdma_fetch_arb, uint, S_IRUGO);
-MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration");
-
-/*
- * This file contains almost all the chip-specific register information and
- * access functions for the QLogic InfiniPath 7220 PCI-Express chip, with the
- * exception of SerDes support, which in in ipath_sd7220.c.
- *
- * This lists the InfiniPath registers, in the actual chip layout.
- * This structure should never be directly accessed.
- */
-struct _infinipath_do_not_use_kernel_regs {
- unsigned long long Revision;
- unsigned long long Control;
- unsigned long long PageAlign;
- unsigned long long PortCnt;
- unsigned long long DebugPortSelect;
- unsigned long long DebugSigsIntSel; /* was Reserved0;*/
- unsigned long long SendRegBase;
- unsigned long long UserRegBase;
- unsigned long long CounterRegBase;
- unsigned long long Scratch;
- unsigned long long EEPROMAddrCmd; /* was Reserved1; */
- unsigned long long EEPROMData; /* was Reserved2; */
- unsigned long long IntBlocked;
- unsigned long long IntMask;
- unsigned long long IntStatus;
- unsigned long long IntClear;
- unsigned long long ErrorMask;
- unsigned long long ErrorStatus;
- unsigned long long ErrorClear;
- unsigned long long HwErrMask;
- unsigned long long HwErrStatus;
- unsigned long long HwErrClear;
- unsigned long long HwDiagCtrl;
- unsigned long long MDIO;
- unsigned long long IBCStatus;
- unsigned long long IBCCtrl;
- unsigned long long ExtStatus;
- unsigned long long ExtCtrl;
- unsigned long long GPIOOut;
- unsigned long long GPIOMask;
- unsigned long long GPIOStatus;
- unsigned long long GPIOClear;
- unsigned long long RcvCtrl;
- unsigned long long RcvBTHQP;
- unsigned long long RcvHdrSize;
- unsigned long long RcvHdrCnt;
- unsigned long long RcvHdrEntSize;
- unsigned long long RcvTIDBase;
- unsigned long long RcvTIDCnt;
- unsigned long long RcvEgrBase;
- unsigned long long RcvEgrCnt;
- unsigned long long RcvBufBase;
- unsigned long long RcvBufSize;
- unsigned long long RxIntMemBase;
- unsigned long long RxIntMemSize;
- unsigned long long RcvPartitionKey;
- unsigned long long RcvQPMulticastPort;
- unsigned long long RcvPktLEDCnt;
- unsigned long long IBCDDRCtrl;
- unsigned long long HRTBT_GUID;
- unsigned long long IB_SDTEST_IF_TX;
- unsigned long long IB_SDTEST_IF_RX;
- unsigned long long IBCDDRCtrl2;
- unsigned long long IBCDDRStatus;
- unsigned long long JIntReload;
- unsigned long long IBNCModeCtrl;
- unsigned long long SendCtrl;
- unsigned long long SendBufBase;
- unsigned long long SendBufSize;
- unsigned long long SendBufCnt;
- unsigned long long SendAvailAddr;
- unsigned long long TxIntMemBase;
- unsigned long long TxIntMemSize;
- unsigned long long SendDmaBase;
- unsigned long long SendDmaLenGen;
- unsigned long long SendDmaTail;
- unsigned long long SendDmaHead;
- unsigned long long SendDmaHeadAddr;
- unsigned long long SendDmaBufMask0;
- unsigned long long SendDmaBufMask1;
- unsigned long long SendDmaBufMask2;
- unsigned long long SendDmaStatus;
- unsigned long long SendBufferError;
- unsigned long long SendBufferErrorCONT1;
- unsigned long long SendBufErr2; /* was Reserved6SBE[0/6] */
- unsigned long long Reserved6L[2];
- unsigned long long AvailUpdCount;
- unsigned long long RcvHdrAddr0;
- unsigned long long RcvHdrAddrs[16]; /* Why enumerate? */
- unsigned long long Reserved7hdtl; /* Align next to 300 */
- unsigned long long RcvHdrTailAddr0; /* 300, like others */
- unsigned long long RcvHdrTailAddrs[16];
- unsigned long long Reserved9SW[7]; /* was [8]; we have 17 ports */
- unsigned long long IbsdEpbAccCtl; /* IB Serdes EPB access control */
- unsigned long long IbsdEpbTransReg; /* IB Serdes EPB Transaction */
- unsigned long long Reserved10sds; /* was SerdesStatus on */
- unsigned long long XGXSConfig;
- unsigned long long IBSerDesCtrl; /* Was IBPLLCfg on Monty */
- unsigned long long EEPCtlStat; /* for "boot" EEPROM/FLASH */
- unsigned long long EEPAddrCmd;
- unsigned long long EEPData;
- unsigned long long PcieEpbAccCtl;
- unsigned long long PcieEpbTransCtl;
- unsigned long long EfuseCtl; /* E-Fuse control */
- unsigned long long EfuseData[4];
- unsigned long long ProcMon;
- /* this chip moves following two from previous 200, 208 */
- unsigned long long PCIeRBufTestReg0;
- unsigned long long PCIeRBufTestReg1;
- /* added for this chip */
- unsigned long long PCIeRBufTestReg2;
- unsigned long long PCIeRBufTestReg3;
- /* added for this chip, debug only */
- unsigned long long SPC_JTAG_ACCESS_REG;
- unsigned long long LAControlReg;
- unsigned long long GPIODebugSelReg;
- unsigned long long DebugPortValueReg;
- /* added for this chip, DMA */
- unsigned long long SendDmaBufUsed[3];
- unsigned long long SendDmaReqTagUsed;
- /*
- * added for this chip, EFUSE: note that these program 64-bit
- * words 2 and 3 */
- unsigned long long efuse_pgm_data[2];
- unsigned long long Reserved11LAalign[10]; /* Skip 4B0..4F8 */
- /* we have 30 regs for DDS and RXEQ in IB SERDES */
- unsigned long long SerDesDDSRXEQ[30];
- unsigned long long Reserved12LAalign[2]; /* Skip 5F0, 5F8 */
- /* added for LA debug support */
- unsigned long long LAMemory[32];
-};
-
-struct _infinipath_do_not_use_counters {
- __u64 LBIntCnt;
- __u64 LBFlowStallCnt;
- __u64 TxSDmaDescCnt; /* was Reserved1 */
- __u64 TxUnsupVLErrCnt;
- __u64 TxDataPktCnt;
- __u64 TxFlowPktCnt;
- __u64 TxDwordCnt;
- __u64 TxLenErrCnt;
- __u64 TxMaxMinLenErrCnt;
- __u64 TxUnderrunCnt;
- __u64 TxFlowStallCnt;
- __u64 TxDroppedPktCnt;
- __u64 RxDroppedPktCnt;
- __u64 RxDataPktCnt;
- __u64 RxFlowPktCnt;
- __u64 RxDwordCnt;
- __u64 RxLenErrCnt;
- __u64 RxMaxMinLenErrCnt;
- __u64 RxICRCErrCnt;
- __u64 RxVCRCErrCnt;
- __u64 RxFlowCtrlErrCnt;
- __u64 RxBadFormatCnt;
- __u64 RxLinkProblemCnt;
- __u64 RxEBPCnt;
- __u64 RxLPCRCErrCnt;
- __u64 RxBufOvflCnt;
- __u64 RxTIDFullErrCnt;
- __u64 RxTIDValidErrCnt;
- __u64 RxPKeyMismatchCnt;
- __u64 RxP0HdrEgrOvflCnt;
- __u64 RxP1HdrEgrOvflCnt;
- __u64 RxP2HdrEgrOvflCnt;
- __u64 RxP3HdrEgrOvflCnt;
- __u64 RxP4HdrEgrOvflCnt;
- __u64 RxP5HdrEgrOvflCnt;
- __u64 RxP6HdrEgrOvflCnt;
- __u64 RxP7HdrEgrOvflCnt;
- __u64 RxP8HdrEgrOvflCnt;
- __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */
- __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */
- __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */
- __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */
- __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */
- __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */
- __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */
- __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */
- __u64 IBStatusChangeCnt;
- __u64 IBLinkErrRecoveryCnt;
- __u64 IBLinkDownedCnt;
- __u64 IBSymbolErrCnt;
- /* The following are new for IBA7220 */
- __u64 RxVL15DroppedPktCnt;
- __u64 RxOtherLocalPhyErrCnt;
- __u64 PcieRetryBufDiagQwordCnt;
- __u64 ExcessBufferOvflCnt;
- __u64 LocalLinkIntegrityErrCnt;
- __u64 RxVlErrCnt;
- __u64 RxDlidFltrCnt;
- __u64 Reserved8[7];
- __u64 PSStat;
- __u64 PSStart;
- __u64 PSInterval;
- __u64 PSRcvDataCount;
- __u64 PSRcvPktsCount;
- __u64 PSXmitDataCount;
- __u64 PSXmitPktsCount;
- __u64 PSXmitWaitCount;
-};
-
-#define IPATH_KREG_OFFSET(field) (offsetof( \
- struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64))
-#define IPATH_CREG_OFFSET(field) (offsetof( \
- struct _infinipath_do_not_use_counters, field) / sizeof(u64))
-
-static const struct ipath_kregs ipath_7220_kregs = {
- .kr_control = IPATH_KREG_OFFSET(Control),
- .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase),
- .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect),
- .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear),
- .kr_errormask = IPATH_KREG_OFFSET(ErrorMask),
- .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus),
- .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl),
- .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus),
- .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear),
- .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask),
- .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut),
- .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus),
- .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl),
- .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear),
- .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask),
- .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus),
- .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl),
- .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus),
- .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked),
- .kr_intclear = IPATH_KREG_OFFSET(IntClear),
- .kr_intmask = IPATH_KREG_OFFSET(IntMask),
- .kr_intstatus = IPATH_KREG_OFFSET(IntStatus),
- .kr_mdio = IPATH_KREG_OFFSET(MDIO),
- .kr_pagealign = IPATH_KREG_OFFSET(PageAlign),
- .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey),
- .kr_portcnt = IPATH_KREG_OFFSET(PortCnt),
- .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP),
- .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase),
- .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize),
- .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl),
- .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase),
- .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt),
- .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt),
- .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize),
- .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize),
- .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase),
- .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize),
- .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase),
- .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt),
- .kr_revision = IPATH_KREG_OFFSET(Revision),
- .kr_scratch = IPATH_KREG_OFFSET(Scratch),
- .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError),
- .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl),
- .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendAvailAddr),
- .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendBufBase),
- .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendBufCnt),
- .kr_sendpiosize = IPATH_KREG_OFFSET(SendBufSize),
- .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase),
- .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase),
- .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize),
- .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase),
-
- .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
-
- /* send dma related regs */
- .kr_senddmabase = IPATH_KREG_OFFSET(SendDmaBase),
- .kr_senddmalengen = IPATH_KREG_OFFSET(SendDmaLenGen),
- .kr_senddmatail = IPATH_KREG_OFFSET(SendDmaTail),
- .kr_senddmahead = IPATH_KREG_OFFSET(SendDmaHead),
- .kr_senddmaheadaddr = IPATH_KREG_OFFSET(SendDmaHeadAddr),
- .kr_senddmabufmask0 = IPATH_KREG_OFFSET(SendDmaBufMask0),
- .kr_senddmabufmask1 = IPATH_KREG_OFFSET(SendDmaBufMask1),
- .kr_senddmabufmask2 = IPATH_KREG_OFFSET(SendDmaBufMask2),
- .kr_senddmastatus = IPATH_KREG_OFFSET(SendDmaStatus),
-
- /* SerDes related regs */
- .kr_ibserdesctrl = IPATH_KREG_OFFSET(IBSerDesCtrl),
- .kr_ib_epbacc = IPATH_KREG_OFFSET(IbsdEpbAccCtl),
- .kr_ib_epbtrans = IPATH_KREG_OFFSET(IbsdEpbTransReg),
- .kr_pcie_epbacc = IPATH_KREG_OFFSET(PcieEpbAccCtl),
- .kr_pcie_epbtrans = IPATH_KREG_OFFSET(PcieEpbTransCtl),
- .kr_ib_ddsrxeq = IPATH_KREG_OFFSET(SerDesDDSRXEQ),
-
- /*
- * These should not be used directly via ipath_read_kreg64(),
- * use them with ipath_read_kreg64_port()
- */
- .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
- .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
-
- /*
- * The rcvpktled register controls one of the debug port signals, so
- * a packet activity LED can be connected to it.
- */
- .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
- .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0),
- .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1),
-
- .kr_hrtbt_guid = IPATH_KREG_OFFSET(HRTBT_GUID),
- .kr_ibcddrctrl = IPATH_KREG_OFFSET(IBCDDRCtrl),
- .kr_ibcddrstatus = IPATH_KREG_OFFSET(IBCDDRStatus),
- .kr_jintreload = IPATH_KREG_OFFSET(JIntReload)
-};
-
-static const struct ipath_cregs ipath_7220_cregs = {
- .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt),
- .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt),
- .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt),
- .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt),
- .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt),
- .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt),
- .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt),
- .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt),
- .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt),
- .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt),
- .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt),
- .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt),
- .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt),
- .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt),
- .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt),
- .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt),
- .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt),
- .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt),
- .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt),
- .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt),
- .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt),
- .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt),
- .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt),
- .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt),
- .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt),
- .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt),
- .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt),
- .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt),
- .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt),
- .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt),
- .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt),
- .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt),
- .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt),
- .cr_vl15droppedpktcnt = IPATH_CREG_OFFSET(RxVL15DroppedPktCnt),
- .cr_rxotherlocalphyerrcnt =
- IPATH_CREG_OFFSET(RxOtherLocalPhyErrCnt),
- .cr_excessbufferovflcnt = IPATH_CREG_OFFSET(ExcessBufferOvflCnt),
- .cr_locallinkintegrityerrcnt =
- IPATH_CREG_OFFSET(LocalLinkIntegrityErrCnt),
- .cr_rxvlerrcnt = IPATH_CREG_OFFSET(RxVlErrCnt),
- .cr_rxdlidfltrcnt = IPATH_CREG_OFFSET(RxDlidFltrCnt),
- .cr_psstat = IPATH_CREG_OFFSET(PSStat),
- .cr_psstart = IPATH_CREG_OFFSET(PSStart),
- .cr_psinterval = IPATH_CREG_OFFSET(PSInterval),
- .cr_psrcvdatacount = IPATH_CREG_OFFSET(PSRcvDataCount),
- .cr_psrcvpktscount = IPATH_CREG_OFFSET(PSRcvPktsCount),
- .cr_psxmitdatacount = IPATH_CREG_OFFSET(PSXmitDataCount),
- .cr_psxmitpktscount = IPATH_CREG_OFFSET(PSXmitPktsCount),
- .cr_psxmitwaitcount = IPATH_CREG_OFFSET(PSXmitWaitCount),
-};
-
-/* kr_control bits */
-#define INFINIPATH_C_RESET (1U<<7)
-
-/* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK ((1ULL<<17)-1)
-#define INFINIPATH_I_RCVURG_SHIFT 32
-#define INFINIPATH_I_RCVAVAIL_MASK ((1ULL<<17)-1)
-#define INFINIPATH_I_RCVAVAIL_SHIFT 0
-#define INFINIPATH_I_SERDESTRIMDONE (1ULL<<27)
-
-/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x00000000000000ffULL
-#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0
-#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
-#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
-#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
-#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
-#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
-#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
-#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
-#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL
-/* specific to this chip */
-#define INFINIPATH_HWE_PCIECPLDATAQUEUEERR 0x0000000000000040ULL
-#define INFINIPATH_HWE_PCIECPLHDRQUEUEERR 0x0000000000000080ULL
-#define INFINIPATH_HWE_SDMAMEMREADERR 0x0000000010000000ULL
-#define INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED 0x2000000000000000ULL
-#define INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT 0x0100000000000000ULL
-#define INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT 0x0200000000000000ULL
-#define INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT 0x0400000000000000ULL
-#define INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT 0x0800000000000000ULL
-#define INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR 0x0000008000000000ULL
-#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
-#define INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR 0x0000001000000000ULL
-#define INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR 0x0000002000000000ULL
-
-#define IBA7220_IBCS_LINKTRAININGSTATE_MASK 0x1F
-#define IBA7220_IBCS_LINKSTATE_SHIFT 5
-#define IBA7220_IBCS_LINKSPEED_SHIFT 8
-#define IBA7220_IBCS_LINKWIDTH_SHIFT 9
-
-#define IBA7220_IBCC_LINKINITCMD_MASK 0x7ULL
-#define IBA7220_IBCC_LINKCMD_SHIFT 19
-#define IBA7220_IBCC_MAXPKTLEN_SHIFT 21
-
-/* kr_ibcddrctrl bits */
-#define IBA7220_IBC_DLIDLMC_MASK 0xFFFFFFFFUL
-#define IBA7220_IBC_DLIDLMC_SHIFT 32
-#define IBA7220_IBC_HRTBT_MASK 3
-#define IBA7220_IBC_HRTBT_SHIFT 16
-#define IBA7220_IBC_HRTBT_ENB 0x10000UL
-#define IBA7220_IBC_LANE_REV_SUPPORTED (1<<8)
-#define IBA7220_IBC_LREV_MASK 1
-#define IBA7220_IBC_LREV_SHIFT 8
-#define IBA7220_IBC_RXPOL_MASK 1
-#define IBA7220_IBC_RXPOL_SHIFT 7
-#define IBA7220_IBC_WIDTH_SHIFT 5
-#define IBA7220_IBC_WIDTH_MASK 0x3
-#define IBA7220_IBC_WIDTH_1X_ONLY (0<<IBA7220_IBC_WIDTH_SHIFT)
-#define IBA7220_IBC_WIDTH_4X_ONLY (1<<IBA7220_IBC_WIDTH_SHIFT)
-#define IBA7220_IBC_WIDTH_AUTONEG (2<<IBA7220_IBC_WIDTH_SHIFT)
-#define IBA7220_IBC_SPEED_AUTONEG (1<<1)
-#define IBA7220_IBC_SPEED_SDR (1<<2)
-#define IBA7220_IBC_SPEED_DDR (1<<3)
-#define IBA7220_IBC_SPEED_AUTONEG_MASK (0x7<<1)
-#define IBA7220_IBC_IBTA_1_2_MASK (1)
-
-/* kr_ibcddrstatus */
-/* link latency shift is 0, don't bother defining */
-#define IBA7220_DDRSTAT_LINKLAT_MASK 0x3ffffff
-
-/* kr_extstatus bits */
-#define INFINIPATH_EXTS_FREQSEL 0x2
-#define INFINIPATH_EXTS_SERDESSEL 0x4
-#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
-#define INFINIPATH_EXTS_MEMBIST_DISABLED 0x0000000000008000
-
-/* kr_xgxsconfig bits */
-#define INFINIPATH_XGXS_RESET 0x5ULL
-#define INFINIPATH_XGXS_FC_SAFE (1ULL<<63)
-
-/* kr_rcvpktledcnt */
-#define IBA7220_LEDBLINK_ON_SHIFT 32 /* 4ns period on after packet */
-#define IBA7220_LEDBLINK_OFF_SHIFT 0 /* 4ns period off before next on */
-
-#define _IPATH_GPIO_SDA_NUM 1
-#define _IPATH_GPIO_SCL_NUM 0
-
-#define IPATH_GPIO_SDA (1ULL << \
- (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-#define IPATH_GPIO_SCL (1ULL << \
- (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
-
-#define IBA7220_R_INTRAVAIL_SHIFT 17
-#define IBA7220_R_TAILUPD_SHIFT 35
-#define IBA7220_R_PORTCFG_SHIFT 36
-
-#define INFINIPATH_JINT_PACKETSHIFT 16
-#define INFINIPATH_JINT_DEFAULT_IDLE_TICKS 0
-#define INFINIPATH_JINT_DEFAULT_MAX_PACKETS 0
-
-#define IBA7220_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */
-
-/*
- * the size bits give us 2^N, in KB units. 0 marks as invalid,
- * and 7 is reserved. We currently use only 2KB and 4KB
- */
-#define IBA7220_TID_SZ_SHIFT 37 /* shift to 3bit size selector */
-#define IBA7220_TID_SZ_2K (1UL<<IBA7220_TID_SZ_SHIFT) /* 2KB */
-#define IBA7220_TID_SZ_4K (2UL<<IBA7220_TID_SZ_SHIFT) /* 4KB */
-#define IBA7220_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */
-
-#define IPATH_AUTONEG_TRIES 5 /* sequential retries to negotiate DDR */
-
-static char int_type[16] = "auto";
-module_param_string(interrupt_type, int_type, sizeof(int_type), 0444);
-MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx\n");
-
-/* packet rate matching delay; chip has support */
-static u8 rate_to_delay[2][2] = {
- /* 1x, 4x */
- { 8, 2 }, /* SDR */
- { 4, 1 } /* DDR */
-};
-
-/* 7220 specific hardware errors... */
-static const struct ipath_hwerror_msgs ipath_7220_hwerror_msgs[] = {
- INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
- INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
- /*
- * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
- * parity or memory parity error failures, because most likely we
- * won't be able to talk to the core of the chip. Nonetheless, we
- * might see them, if they are in parts of the PCIe core that aren't
- * essential.
- */
- INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
- INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
- INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
- INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
- INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
- INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
- INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
- INFINIPATH_HWE_MSG(PCIECPLDATAQUEUEERR, "PCIe cpl header queue"),
- INFINIPATH_HWE_MSG(PCIECPLHDRQUEUEERR, "PCIe cpl data queue"),
- INFINIPATH_HWE_MSG(SDMAMEMREADERR, "Send DMA memory read"),
- INFINIPATH_HWE_MSG(CLK_UC_PLLNOTLOCKED, "uC PLL clock not locked"),
- INFINIPATH_HWE_MSG(PCIESERDESQ0PCLKNOTDETECT,
- "PCIe serdes Q0 no clock"),
- INFINIPATH_HWE_MSG(PCIESERDESQ1PCLKNOTDETECT,
- "PCIe serdes Q1 no clock"),
- INFINIPATH_HWE_MSG(PCIESERDESQ2PCLKNOTDETECT,
- "PCIe serdes Q2 no clock"),
- INFINIPATH_HWE_MSG(PCIESERDESQ3PCLKNOTDETECT,
- "PCIe serdes Q3 no clock"),
- INFINIPATH_HWE_MSG(DDSRXEQMEMORYPARITYERR,
- "DDS RXEQ memory parity"),
- INFINIPATH_HWE_MSG(IB_UC_MEMORYPARITYERR, "IB uC memory parity"),
- INFINIPATH_HWE_MSG(PCIE_UC_OCT0MEMORYPARITYERR,
- "PCIe uC oct0 memory parity"),
- INFINIPATH_HWE_MSG(PCIE_UC_OCT1MEMORYPARITYERR,
- "PCIe uC oct1 memory parity"),
-};
-
-static void autoneg_work(struct work_struct *);
-
-/*
- * the offset is different for different configured port numbers, since
- * port0 is fixed in size, but others can vary. Make it a function to
- * make the issue more obvious.
-*/
-static inline u32 port_egrtid_idx(struct ipath_devdata *dd, unsigned port)
-{
- return port ? dd->ipath_p0_rcvegrcnt +
- (port-1) * dd->ipath_rcvegrcnt : 0;
-}
-
-static void ipath_7220_txe_recover(struct ipath_devdata *dd)
-{
- ++ipath_stats.sps_txeparity;
-
- dev_info(&dd->pcidev->dev,
- "Recovering from TXE PIO parity error\n");
- ipath_disarm_senderrbufs(dd);
-}
-
-
-/**
- * ipath_7220_handle_hwerrors - display hardware errors.
- * @dd: the infinipath device
- * @msg: the output buffer
- * @msgl: the size of the output buffer
- *
- * Use same msg buffer as regular errors to avoid excessive stack
- * use. Most hardware errors are catastrophic, but for right now,
- * we'll print them and continue. We reuse the same message buffer as
- * ipath_handle_errors() to avoid excessive stack usage.
- */
-static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
- size_t msgl)
-{
- ipath_err_t hwerrs;
- u32 bits, ctrl;
- int isfatal = 0;
- char bitsmsg[64];
- int log_idx;
-
- hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
- if (!hwerrs) {
- /*
- * better than printing cofusing messages
- * This seems to be related to clearing the crc error, or
- * the pll error during init.
- */
- ipath_cdbg(VERBOSE, "Called but no hardware errors set\n");
- goto bail;
- } else if (hwerrs == ~0ULL) {
- ipath_dev_err(dd, "Read of hardware error status failed "
- "(all bits set); ignoring\n");
- goto bail;
- }
- ipath_stats.sps_hwerrs++;
-
- /*
- * Always clear the error status register, except MEMBISTFAIL,
- * regardless of whether we continue or stop using the chip.
- * We want that set so we know it failed, even across driver reload.
- * We'll still ignore it in the hwerrmask. We do this partly for
- * diagnostics, but also for support.
- */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
- hwerrs&~INFINIPATH_HWE_MEMBISTFAILED);
-
- hwerrs &= dd->ipath_hwerrmask;
-
- /* We log some errors to EEPROM, check if we have any of those. */
- for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
- if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
- ipath_inc_eeprom_err(dd, log_idx, 1);
- /*
- * Make sure we get this much out, unless told to be quiet,
- * or it's occurred within the last 5 seconds.
- */
- if ((hwerrs & ~(dd->ipath_lasthwerror |
- ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
- (ipath_debug & __IPATH_VERBDBG))
- dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
- "(cleared)\n", (unsigned long long) hwerrs);
- dd->ipath_lasthwerror |= hwerrs;
-
- if (hwerrs & ~dd->ipath_hwe_bitsextant)
- ipath_dev_err(dd, "hwerror interrupt with unknown errors "
- "%llx set\n", (unsigned long long)
- (hwerrs & ~dd->ipath_hwe_bitsextant));
-
- if (hwerrs & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR)
- ipath_sd7220_clr_ibpar(dd);
-
- ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
- if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
- /*
- * Parity errors in send memory are recoverable by h/w
- * just do housekeeping, exit freeze mode and continue.
- */
- if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- ipath_7220_txe_recover(dd);
- hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- }
- if (hwerrs) {
- /*
- * If any set that we aren't ignoring only make the
- * complaint once, in case it's stuck or recurring,
- * and we get here multiple times
- * Force link down, so switch knows, and
- * LEDs are turned off.
- */
- if (dd->ipath_flags & IPATH_INITTED) {
- ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
- ipath_setup_7220_setextled(dd,
- INFINIPATH_IBCS_L_STATE_DOWN,
- INFINIPATH_IBCS_LT_STATE_DISABLED);
- ipath_dev_err(dd, "Fatal Hardware Error "
- "(freeze mode), no longer"
- " usable, SN %.16s\n",
- dd->ipath_serial);
- isfatal = 1;
- }
- /*
- * Mark as having had an error for driver, and also
- * for /sys and status word mapped to user programs.
- * This marks unit as not usable, until reset.
- */
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- dd->ipath_flags &= ~IPATH_INITTED;
- } else {
- ipath_dbg("Clearing freezemode on ignored or "
- "recovered hardware error\n");
- ipath_clear_freeze(dd);
- }
- }
-
- *msg = '\0';
-
- if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
- strlcat(msg, "[Memory BIST test failed, "
- "InfiniPath hardware unusable]", msgl);
- /* ignore from now on, so disable until driver reloaded */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- ipath_format_hwerrors(hwerrs,
- ipath_7220_hwerror_msgs,
- ARRAY_SIZE(ipath_7220_hwerror_msgs),
- msg, msgl);
-
- if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
- << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
- bits = (u32) ((hwerrs >>
- INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) &
- INFINIPATH_HWE_PCIEMEMPARITYERR_MASK);
- snprintf(bitsmsg, sizeof bitsmsg,
- "[PCIe Mem Parity Errs %x] ", bits);
- strlcat(msg, bitsmsg, msgl);
- }
-
-#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \
- INFINIPATH_HWE_COREPLL_RFSLIP)
-
- if (hwerrs & _IPATH_PLL_FAIL) {
- snprintf(bitsmsg, sizeof bitsmsg,
- "[PLL failed (%llx), InfiniPath hardware unusable]",
- (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
- strlcat(msg, bitsmsg, msgl);
- /* ignore from now on, so disable until driver reloaded */
- dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) {
- /*
- * If it occurs, it is left masked since the eternal
- * interface is unused.
- */
- dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
- }
-
- ipath_dev_err(dd, "%s hardware error\n", msg);
- /*
- * For /sys status file. if no trailing } is copied, we'll
- * know it was truncated.
- */
- if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
- snprintf(dd->ipath_freezemsg, dd->ipath_freezelen,
- "{%s}", msg);
-bail:;
-}
-
-/**
- * ipath_7220_boardname - fill in the board name
- * @dd: the infinipath device
- * @name: the output buffer
- * @namelen: the size of the output buffer
- *
- * info is based on the board revision register
- */
-static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
- size_t namelen)
-{
- char *n = NULL;
- u8 boardrev = dd->ipath_boardrev;
- int ret;
-
- if (boardrev == 15) {
- /*
- * Emulator sometimes comes up all-ones, rather than zero.
- */
- boardrev = 0;
- dd->ipath_boardrev = boardrev;
- }
- switch (boardrev) {
- case 0:
- n = "InfiniPath_7220_Emulation";
- break;
- case 1:
- n = "InfiniPath_QLE7240";
- break;
- case 2:
- n = "InfiniPath_QLE7280";
- break;
- case 3:
- n = "InfiniPath_QLE7242";
- break;
- case 4:
- n = "InfiniPath_QEM7240";
- break;
- case 5:
- n = "InfiniPath_QMI7240";
- break;
- case 6:
- n = "InfiniPath_QMI7264";
- break;
- case 7:
- n = "InfiniPath_QMH7240";
- break;
- case 8:
- n = "InfiniPath_QME7240";
- break;
- case 9:
- n = "InfiniPath_QLE7250";
- break;
- case 10:
- n = "InfiniPath_QLE7290";
- break;
- case 11:
- n = "InfiniPath_QEM7250";
- break;
- case 12:
- n = "InfiniPath_QLE-Bringup";
- break;
- default:
- ipath_dev_err(dd,
- "Don't yet know about board with ID %u\n",
- boardrev);
- snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
- boardrev);
- break;
- }
- if (n)
- snprintf(name, namelen, "%s", n);
-
- if (dd->ipath_majrev != 5 || !dd->ipath_minrev ||
- dd->ipath_minrev > 2) {
- ipath_dev_err(dd, "Unsupported InfiniPath hardware "
- "revision %u.%u!\n",
- dd->ipath_majrev, dd->ipath_minrev);
- ret = 1;
- } else if (dd->ipath_minrev == 1 &&
- !(dd->ipath_flags & IPATH_INITTED)) {
- /* Rev1 chips are prototype. Complain at init, but allow use */
- ipath_dev_err(dd, "Unsupported hardware "
- "revision %u.%u, Contact support@qlogic.com\n",
- dd->ipath_majrev, dd->ipath_minrev);
- ret = 0;
- } else
- ret = 0;
-
- /*
- * Set here not in ipath_init_*_funcs because we have to do
- * it after we can read chip registers.
- */
- dd->ipath_ureg_align = 0x10000; /* 64KB alignment */
-
- return ret;
-}
-
-/**
- * ipath_7220_init_hwerrors - enable hardware errors
- * @dd: the infinipath device
- *
- * now that we have finished initializing everything that might reasonably
- * cause a hardware error, and cleared those errors bits as they occur,
- * we can enable hardware errors in the mask (potentially enabling
- * freeze mode), and enable hardware errors as errors (along with
- * everything else) in errormask
- */
-static void ipath_7220_init_hwerrors(struct ipath_devdata *dd)
-{
- ipath_err_t val;
- u64 extsval;
-
- extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-
- if (!(extsval & (INFINIPATH_EXTS_MEMBIST_ENDTEST |
- INFINIPATH_EXTS_MEMBIST_DISABLED)))
- ipath_dev_err(dd, "MemBIST did not complete!\n");
- if (extsval & INFINIPATH_EXTS_MEMBIST_DISABLED)
- dev_info(&dd->pcidev->dev, "MemBIST is disabled.\n");
-
- val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
-
- if (!dd->ipath_boardrev) /* no PLL for Emulator */
- val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
-
- if (dd->ipath_minrev == 1)
- val &= ~(1ULL << 42); /* TXE LaunchFIFO Parity rev1 issue */
-
- val &= ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR;
- dd->ipath_hwerrmask = val;
-
- /*
- * special trigger "error" is for debugging purposes. It
- * works around a processor/chipset problem. The error
- * interrupt allows us to count occurrences, but we don't
- * want to pay the overhead for normal use. Emulation only
- */
- if (!dd->ipath_boardrev)
- dd->ipath_maskederrs = INFINIPATH_E_SENDSPECIALTRIGGER;
-}
-
-/*
- * All detailed interaction with the SerDes has been moved to ipath_sd7220.c
- *
- * The portion of IBA7220-specific bringup_serdes() that actually deals with
- * registers and memory within the SerDes itself is ipath_sd7220_init().
- */
-
-/**
- * ipath_7220_bringup_serdes - bring up the serdes
- * @dd: the infinipath device
- */
-static int ipath_7220_bringup_serdes(struct ipath_devdata *dd)
-{
- int ret = 0;
- u64 val, prev_val, guid;
- int was_reset; /* Note whether uC was reset */
-
- ipath_dbg("Trying to bringup serdes\n");
-
- if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) &
- INFINIPATH_HWE_SERDESPLLFAILED) {
- ipath_dbg("At start, serdes PLL failed bit set "
- "in hwerrstatus, clearing and continuing\n");
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
- INFINIPATH_HWE_SERDESPLLFAILED);
- }
-
- if (!dd->ipath_ibcddrctrl) {
- /* not on re-init after reset */
- dd->ipath_ibcddrctrl =
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrctrl);
-
- if (dd->ipath_link_speed_enabled ==
- (IPATH_IB_SDR | IPATH_IB_DDR))
- dd->ipath_ibcddrctrl |=
- IBA7220_IBC_SPEED_AUTONEG_MASK |
- IBA7220_IBC_IBTA_1_2_MASK;
- else
- dd->ipath_ibcddrctrl |=
- dd->ipath_link_speed_enabled == IPATH_IB_DDR
- ? IBA7220_IBC_SPEED_DDR :
- IBA7220_IBC_SPEED_SDR;
- if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
- IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X))
- dd->ipath_ibcddrctrl |= IBA7220_IBC_WIDTH_AUTONEG;
- else
- dd->ipath_ibcddrctrl |=
- dd->ipath_link_width_enabled == IB_WIDTH_4X
- ? IBA7220_IBC_WIDTH_4X_ONLY :
- IBA7220_IBC_WIDTH_1X_ONLY;
-
- /* always enable these on driver reload, not sticky */
- dd->ipath_ibcddrctrl |=
- IBA7220_IBC_RXPOL_MASK << IBA7220_IBC_RXPOL_SHIFT;
- dd->ipath_ibcddrctrl |=
- IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
- /*
- * automatic lane reversal detection for receive
- * doesn't work correctly in rev 1, so disable it
- * on that rev, otherwise enable (disabling not
- * sticky across reload for >rev1)
- */
- if (dd->ipath_minrev == 1)
- dd->ipath_ibcddrctrl &=
- ~IBA7220_IBC_LANE_REV_SUPPORTED;
- else
- dd->ipath_ibcddrctrl |=
- IBA7220_IBC_LANE_REV_SUPPORTED;
- }
-
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
- dd->ipath_ibcddrctrl);
-
- ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0Ull);
-
- /* IBA7220 has SERDES MPU reset in D0 of what _was_ IBPLLCfg */
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
- /* remember if uC was in Reset or not, for dactrim */
- was_reset = (val & 1);
- ipath_cdbg(VERBOSE, "IBReset %s xgxsconfig %llx\n",
- was_reset ? "Asserted" : "Negated", (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig));
-
- if (dd->ipath_boardrev) {
- /*
- * Hardware is not emulator, and may have been reset. Init it.
- * Below will release reset, but needs to know if chip was
- * originally in reset, to only trim DACs on first time
- * after chip reset or powercycle (not driver reload)
- */
- ret = ipath_sd7220_init(dd, was_reset);
- }
-
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
- prev_val = val;
- val |= INFINIPATH_XGXS_FC_SAFE;
- if (val != prev_val) {
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
- }
- if (val & INFINIPATH_XGXS_RESET)
- val &= ~INFINIPATH_XGXS_RESET;
- if (val != prev_val)
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-
- ipath_cdbg(VERBOSE, "done: xgxs=%llx from %llx\n",
- (unsigned long long)
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig),
- prev_val);
-
- guid = be64_to_cpu(dd->ipath_guid);
-
- if (!guid) {
- /* have to have something, so use likely unique tsc */
- guid = get_cycles();
- ipath_dbg("No GUID for heartbeat, faking %llx\n",
- (unsigned long long)guid);
- } else
- ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n", guid);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hrtbt_guid, guid);
- return ret;
-}
-
-static void ipath_7220_config_jint(struct ipath_devdata *dd,
- u16 idle_ticks, u16 max_packets)
-{
-
- /*
- * We can request a receive interrupt for 1 or more packets
- * from current offset.
- */
- if (idle_ticks == 0 || max_packets == 0)
- /* interrupt after one packet if no mitigation */
- dd->ipath_rhdrhead_intr_off =
- 1ULL << IBA7220_HDRHEAD_PKTINT_SHIFT;
- else
- /* Turn off RcvHdrHead interrupts if using mitigation */
- dd->ipath_rhdrhead_intr_off = 0ULL;
-
- /* refresh kernel RcvHdrHead registers... */
- ipath_write_ureg(dd, ur_rcvhdrhead,
- dd->ipath_rhdrhead_intr_off |
- dd->ipath_pd[0]->port_head, 0);
-
- dd->ipath_jint_max_packets = max_packets;
- dd->ipath_jint_idle_ticks = idle_ticks;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_jintreload,
- ((u64) max_packets << INFINIPATH_JINT_PACKETSHIFT) |
- idle_ticks);
-}
-
-/**
- * ipath_7220_quiet_serdes - set serdes to txidle
- * @dd: the infinipath device
- * Called when driver is being unloaded
- */
-static void ipath_7220_quiet_serdes(struct ipath_devdata *dd)
-{
- u64 val;
- dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
- wake_up(&dd->ipath_autoneg_wait);
- cancel_delayed_work(&dd->ipath_autoneg_work);
- flush_scheduled_work();
- ipath_shutdown_relock_poll(dd);
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
- val |= INFINIPATH_XGXS_RESET;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
-}
-
-static int ipath_7220_intconfig(struct ipath_devdata *dd)
-{
- ipath_7220_config_jint(dd, dd->ipath_jint_idle_ticks,
- dd->ipath_jint_max_packets);
- return 0;
-}
-
-/**
- * ipath_setup_7220_setextled - set the state of the two external LEDs
- * @dd: the infinipath device
- * @lst: the L state
- * @ltst: the LT state
- *
- * These LEDs indicate the physical and logical state of IB link.
- * For this chip (at least with recommended board pinouts), LED1
- * is Yellow (logical state) and LED2 is Green (physical state),
- *
- * Note: We try to match the Mellanox HCA LED behavior as best
- * we can. Green indicates physical link state is OK (something is
- * plugged in, and we can train).
- * Amber indicates the link is logically up (ACTIVE).
- * Mellanox further blinks the amber LED to indicate data packet
- * activity, but we have no hardware support for that, so it would
- * require waking up every 10-20 msecs and checking the counters
- * on the chip, and then turning the LED off if appropriate. That's
- * visible overhead, so not something we will do.
- *
- */
-static void ipath_setup_7220_setextled(struct ipath_devdata *dd, u64 lst,
- u64 ltst)
-{
- u64 extctl, ledblink = 0;
- unsigned long flags = 0;
-
- /* the diags use the LED to indicate diag info, so we leave
- * the external LED alone when the diags are running */
- if (ipath_diag_inuse)
- return;
-
- /* Allow override of LED display for, e.g. Locating system in rack */
- if (dd->ipath_led_override) {
- ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
- ? INFINIPATH_IBCS_LT_STATE_LINKUP
- : INFINIPATH_IBCS_LT_STATE_DISABLED;
- lst = (dd->ipath_led_override & IPATH_LED_LOG)
- ? INFINIPATH_IBCS_L_STATE_ACTIVE
- : INFINIPATH_IBCS_L_STATE_DOWN;
- }
-
- spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
- extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
- INFINIPATH_EXTC_LED2PRIPORT_ON);
- if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) {
- extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
- /*
- * counts are in chip clock (4ns) periods.
- * This is 1/16 sec (66.6ms) on,
- * 3/16 sec (187.5 ms) off, with packets rcvd
- */
- ledblink = ((66600*1000UL/4) << IBA7220_LEDBLINK_ON_SHIFT)
- | ((187500*1000UL/4) << IBA7220_LEDBLINK_OFF_SHIFT);
- }
- if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE)
- extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON;
- dd->ipath_extctrl = extctl;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
- spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
-
- if (ledblink) /* blink the LED on packet receive */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvpktledcnt,
- ledblink);
-}
-
-/*
- * Similar to pci_intx(pdev, 1), except that we make sure
- * msi is off...
- */
-static void ipath_enable_intx(struct pci_dev *pdev)
-{
- u16 cw, new;
- int pos;
-
- /* first, turn on INTx */
- pci_read_config_word(pdev, PCI_COMMAND, &cw);
- new = cw & ~PCI_COMMAND_INTX_DISABLE;
- if (new != cw)
- pci_write_config_word(pdev, PCI_COMMAND, new);
-
- /* then turn off MSI */
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
- if (pos) {
- pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
- new = cw & ~PCI_MSI_FLAGS_ENABLE;
- if (new != cw)
- pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
- }
-}
-
-static int ipath_msi_enabled(struct pci_dev *pdev)
-{
- int pos, ret = 0;
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
- if (pos) {
- u16 cw;
-
- pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
- ret = !!(cw & PCI_MSI_FLAGS_ENABLE);
- }
- return ret;
-}
-
-/*
- * disable msi interrupt if enabled, and clear the flag.
- * flag is used primarily for the fallback to INTx, but
- * is also used in reinit after reset as a flag.
- */
-static void ipath_7220_nomsi(struct ipath_devdata *dd)
-{
- dd->ipath_msi_lo = 0;
-
- if (ipath_msi_enabled(dd->pcidev)) {
- /*
- * free, but don't zero; later kernels require
- * it be freed before disable_msi, so the intx
- * setup has to request it again.
- */
- if (dd->ipath_irq)
- free_irq(dd->ipath_irq, dd);
- pci_disable_msi(dd->pcidev);
- }
-}
-
-/*
- * ipath_setup_7220_cleanup - clean up any per-chip chip-specific stuff
- * @dd: the infinipath device
- *
- * Nothing but msi interrupt cleanup for now.
- *
- * This is called during driver unload.
- */
-static void ipath_setup_7220_cleanup(struct ipath_devdata *dd)
-{
- ipath_7220_nomsi(dd);
-}
-
-
-static void ipath_7220_pcie_params(struct ipath_devdata *dd, u32 boardrev)
-{
- u16 linkstat, minwidth, speed;
- int pos;
-
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP);
- if (!pos) {
- ipath_dev_err(dd, "Can't find PCI Express capability!\n");
- goto bail;
- }
-
- pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA,
- &linkstat);
- /*
- * speed is bits 0-4, linkwidth is bits 4-8
- * no defines for them in headers
- */
- speed = linkstat & 0xf;
- linkstat >>= 4;
- linkstat &= 0x1f;
- dd->ipath_lbus_width = linkstat;
- switch (boardrev) {
- case 0:
- case 2:
- case 10:
- case 12:
- minwidth = 16; /* x16 capable boards */
- break;
- default:
- minwidth = 8; /* x8 capable boards */
- break;
- }
-
- switch (speed) {
- case 1:
- dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */
- break;
- case 2:
- dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */
- break;
- default: /* not defined, assume gen1 */
- dd->ipath_lbus_speed = 2500;
- break;
- }
-
- if (linkstat < minwidth)
- ipath_dev_err(dd,
- "PCIe width %u (x%u HCA), performance "
- "reduced\n", linkstat, minwidth);
- else
- ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x%u HCA)\n",
- dd->ipath_lbus_speed, linkstat, minwidth);
-
- if (speed != 1)
- ipath_dev_err(dd,
- "PCIe linkspeed %u is incorrect; "
- "should be 1 (2500)!\n", speed);
-
-bail:
- /* fill in string, even on errors */
- snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info),
- "PCIe,%uMHz,x%u\n",
- dd->ipath_lbus_speed,
- dd->ipath_lbus_width);
- return;
-}
-
-
-/**
- * ipath_setup_7220_config - setup PCIe config related stuff
- * @dd: the infinipath device
- * @pdev: the PCI device
- *
- * The pci_enable_msi() call will fail on systems with MSI quirks
- * such as those with AMD8131, even if the device of interest is not
- * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed
- * late in 2.6.16).
- * All that can be done is to edit the kernel source to remove the quirk
- * check until that is fixed.
- * We do not need to call enable_msi() for our HyperTransport chip,
- * even though it uses MSI, and we want to avoid the quirk warning, so
- * So we call enable_msi only for PCIe. If we do end up needing
- * pci_enable_msi at some point in the future for HT, we'll move the
- * call back into the main init_one code.
- * We save the msi lo and hi values, so we can restore them after
- * chip reset (the kernel PCI infrastructure doesn't yet handle that
- * correctly).
- */
-static int ipath_setup_7220_config(struct ipath_devdata *dd,
- struct pci_dev *pdev)
-{
- int pos, ret = -1;
- u32 boardrev;
-
- dd->ipath_msi_lo = 0; /* used as a flag during reset processing */
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
- if (!strcmp(int_type, "force_msi") || !strcmp(int_type, "auto"))
- ret = pci_enable_msi(pdev);
- if (ret) {
- if (!strcmp(int_type, "force_msi")) {
- ipath_dev_err(dd, "pci_enable_msi failed: %d, "
- "force_msi is on, so not continuing.\n",
- ret);
- return ret;
- }
-
- ipath_enable_intx(pdev);
- if (!strcmp(int_type, "auto"))
- ipath_dev_err(dd, "pci_enable_msi failed: %d, "
- "falling back to INTx\n", ret);
- } else if (pos) {
- u16 control;
- pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
- &dd->ipath_msi_lo);
- pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI,
- &dd->ipath_msi_hi);
- pci_read_config_word(pdev, pos + PCI_MSI_FLAGS,
- &control);
- /* now save the data (vector) info */
- pci_read_config_word(pdev,
- pos + ((control & PCI_MSI_FLAGS_64BIT)
- ? PCI_MSI_DATA_64 :
- PCI_MSI_DATA_32),
- &dd->ipath_msi_data);
- } else
- ipath_dev_err(dd, "Can't find MSI capability, "
- "can't save MSI settings for reset\n");
-
- dd->ipath_irq = pdev->irq;
-
- /*
- * We save the cachelinesize also, although it doesn't
- * really matter.
- */
- pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE,
- &dd->ipath_pci_cacheline);
-
- /*
- * this function called early, ipath_boardrev not set yet. Can't
- * use ipath_read_kreg64() yet, too early in init, so use readq()
- */
- boardrev = (readq(&dd->ipath_kregbase[dd->ipath_kregs->kr_revision])
- >> INFINIPATH_R_BOARDID_SHIFT) & INFINIPATH_R_BOARDID_MASK;
-
- ipath_7220_pcie_params(dd, boardrev);
-
- dd->ipath_flags |= IPATH_NODMA_RTAIL | IPATH_HAS_SEND_DMA |
- IPATH_HAS_PBC_CNT | IPATH_HAS_THRESH_UPDATE;
- dd->ipath_pioupd_thresh = 4U; /* set default update threshold */
- return 0;
-}
-
-static void ipath_init_7220_variables(struct ipath_devdata *dd)
-{
- /*
- * setup the register offsets, since they are different for each
- * chip
- */
- dd->ipath_kregs = &ipath_7220_kregs;
- dd->ipath_cregs = &ipath_7220_cregs;
-
- /*
- * bits for selecting i2c direction and values,
- * used for I2C serial flash
- */
- dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
- dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
- dd->ipath_gpio_sda = IPATH_GPIO_SDA;
- dd->ipath_gpio_scl = IPATH_GPIO_SCL;
-
- /*
- * Fill in data for field-values that change in IBA7220.
- * We dynamically specify only the mask for LINKTRAININGSTATE
- * and only the shift for LINKSTATE, as they are the only ones
- * that change. Also precalculate the 3 link states of interest
- * and the combined mask.
- */
- dd->ibcs_ls_shift = IBA7220_IBCS_LINKSTATE_SHIFT;
- dd->ibcs_lts_mask = IBA7220_IBCS_LINKTRAININGSTATE_MASK;
- dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK <<
- dd->ibcs_ls_shift) | dd->ibcs_lts_mask;
- dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
- (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift);
- dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
- (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift);
- dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP <<
- INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) |
- (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift);
-
- /*
- * Fill in data for ibcc field-values that change in IBA7220.
- * We dynamically specify only the mask for LINKINITCMD
- * and only the shift for LINKCMD and MAXPKTLEN, as they are
- * the only ones that change.
- */
- dd->ibcc_lic_mask = IBA7220_IBCC_LINKINITCMD_MASK;
- dd->ibcc_lc_shift = IBA7220_IBCC_LINKCMD_SHIFT;
- dd->ibcc_mpl_shift = IBA7220_IBCC_MAXPKTLEN_SHIFT;
-
- /* Fill in shifts for RcvCtrl. */
- dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT;
- dd->ipath_r_intravail_shift = IBA7220_R_INTRAVAIL_SHIFT;
- dd->ipath_r_tailupd_shift = IBA7220_R_TAILUPD_SHIFT;
- dd->ipath_r_portcfg_shift = IBA7220_R_PORTCFG_SHIFT;
-
- /* variables for sanity checking interrupt and errors */
- dd->ipath_hwe_bitsextant =
- (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
- (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
- (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
- INFINIPATH_HWE_PCIE1PLLFAILED |
- INFINIPATH_HWE_PCIE0PLLFAILED |
- INFINIPATH_HWE_PCIEPOISONEDTLP |
- INFINIPATH_HWE_PCIECPLTIMEOUT |
- INFINIPATH_HWE_PCIEBUSPARITYXTLH |
- INFINIPATH_HWE_PCIEBUSPARITYXADM |
- INFINIPATH_HWE_PCIEBUSPARITYRADM |
- INFINIPATH_HWE_MEMBISTFAILED |
- INFINIPATH_HWE_COREPLL_FBSLIP |
- INFINIPATH_HWE_COREPLL_RFSLIP |
- INFINIPATH_HWE_SERDESPLLFAILED |
- INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
- INFINIPATH_HWE_IBCBUSFRSPCPARITYERR |
- INFINIPATH_HWE_PCIECPLDATAQUEUEERR |
- INFINIPATH_HWE_PCIECPLHDRQUEUEERR |
- INFINIPATH_HWE_SDMAMEMREADERR |
- INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED |
- INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT |
- INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT |
- INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT |
- INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT |
- INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR |
- INFINIPATH_HWE_IB_UC_MEMORYPARITYERR |
- INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR |
- INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR;
- dd->ipath_i_bitsextant =
- INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED |
- (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
- (INFINIPATH_I_RCVAVAIL_MASK <<
- INFINIPATH_I_RCVAVAIL_SHIFT) |
- INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
- INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO |
- INFINIPATH_I_JINT | INFINIPATH_I_SERDESTRIMDONE;
- dd->ipath_e_bitsextant =
- INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
- INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
- INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
- INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR |
- INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP |
- INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION |
- INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
- INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN |
- INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK |
- INFINIPATH_E_SENDSPECIALTRIGGER |
- INFINIPATH_E_SDMADISABLED | INFINIPATH_E_SMINPKTLEN |
- INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNDERRUN |
- INFINIPATH_E_SPKTLEN | INFINIPATH_E_SDROPPEDSMPPKT |
- INFINIPATH_E_SDROPPEDDATAPKT |
- INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM |
- INFINIPATH_E_SUNSUPVL | INFINIPATH_E_SENDBUFMISUSE |
- INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND |
- INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE |
- INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG |
- INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW |
- INFINIPATH_E_SDMAUNEXPDATA |
- INFINIPATH_E_IBSTATUSCHANGED | INFINIPATH_E_INVALIDADDR |
- INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE |
- INFINIPATH_E_SDMADESCADDRMISALIGN |
- INFINIPATH_E_INVALIDEEPCMD;
-
- dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
- dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
- dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT;
- dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT;
- dd->ipath_flags |= IPATH_INTREG_64 | IPATH_HAS_MULT_IB_SPEED
- | IPATH_HAS_LINK_LATENCY;
-
- /*
- * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
- * 2 is Some Misc, 3 is reserved for future.
- */
- dd->ipath_eep_st_masks[0].hwerrs_to_log =
- INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
-
- dd->ipath_eep_st_masks[1].hwerrs_to_log =
- INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
- INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
-
- dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET;
-
- ipath_linkrecovery = 0;
-
- init_waitqueue_head(&dd->ipath_autoneg_wait);
- INIT_DELAYED_WORK(&dd->ipath_autoneg_work, autoneg_work);
-
- dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
- dd->ipath_link_speed_supported = IPATH_IB_SDR | IPATH_IB_DDR;
-
- dd->ipath_link_width_enabled = dd->ipath_link_width_supported;
- dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported;
- /*
- * set the initial values to reasonable default, will be set
- * for real when link is up.
- */
- dd->ipath_link_width_active = IB_WIDTH_4X;
- dd->ipath_link_speed_active = IPATH_IB_SDR;
- dd->delay_mult = rate_to_delay[0][1];
-}
-
-
-/*
- * Setup the MSI stuff again after a reset. I'd like to just call
- * pci_enable_msi() and request_irq() again, but when I do that,
- * the MSI enable bit doesn't get set in the command word, and
- * we switch to to a different interrupt vector, which is confusing,
- * so I instead just do it all inline. Perhaps somehow can tie this
- * into the PCIe hotplug support at some point
- * Note, because I'm doing it all here, I don't call pci_disable_msi()
- * or free_irq() at the start of ipath_setup_7220_reset().
- */
-static int ipath_reinit_msi(struct ipath_devdata *dd)
-{
- int ret = 0;
-
- int pos;
- u16 control;
- if (!dd->ipath_msi_lo) /* Using intX, or init problem */
- goto bail;
-
- pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
- if (!pos) {
- ipath_dev_err(dd, "Can't find MSI capability, "
- "can't restore MSI settings\n");
- goto bail;
- }
- ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
- dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO);
- pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
- dd->ipath_msi_lo);
- ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n",
- dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI);
- pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
- dd->ipath_msi_hi);
- pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
- if (!(control & PCI_MSI_FLAGS_ENABLE)) {
- ipath_cdbg(VERBOSE, "MSI control at off %x was %x, "
- "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS,
- control, control | PCI_MSI_FLAGS_ENABLE);
- control |= PCI_MSI_FLAGS_ENABLE;
- pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
- control);
- }
- /* now rewrite the data (vector) info */
- pci_write_config_word(dd->pcidev, pos +
- ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
- dd->ipath_msi_data);
- ret = 1;
-
-bail:
- if (!ret) {
- ipath_dbg("Using INTx, MSI disabled or not configured\n");
- ipath_enable_intx(dd->pcidev);
- ret = 1;
- }
- /*
- * We restore the cachelinesize also, although it doesn't really
- * matter.
- */
- pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE,
- dd->ipath_pci_cacheline);
- /* and now set the pci master bit again */
- pci_set_master(dd->pcidev);
-
- return ret;
-}
-
-/*
- * This routine sleeps, so it can only be called from user context, not
- * from interrupt context. If we need interrupt context, we can split
- * it into two routines.
- */
-static int ipath_setup_7220_reset(struct ipath_devdata *dd)
-{
- u64 val;
- int i;
- int ret;
- u16 cmdval;
-
- pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval);
-
- /* Use dev_err so it shows up in logs, etc. */
- ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
-
- /* keep chip from being accessed in a few places */
- dd->ipath_flags &= ~(IPATH_INITTED | IPATH_PRESENT);
- val = dd->ipath_control | INFINIPATH_C_RESET;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
- mb();
-
- for (i = 1; i <= 5; i++) {
- int r;
-
- /*
- * Allow MBIST, etc. to complete; longer on each retry.
- * We sometimes get machine checks from bus timeout if no
- * response, so for now, make it *really* long.
- */
- msleep(1000 + (1 + i) * 2000);
- r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
- dd->ipath_pcibar0);
- if (r)
- ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n", r);
- r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
- dd->ipath_pcibar1);
- if (r)
- ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n", r);
- /* now re-enable memory access */
- pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval);
- r = pci_enable_device(dd->pcidev);
- if (r)
- ipath_dev_err(dd, "pci_enable_device failed after "
- "reset: %d\n", r);
- /*
- * whether it fully enabled or not, mark as present,
- * again (but not INITTED)
- */
- dd->ipath_flags |= IPATH_PRESENT;
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
- if (val == dd->ipath_revision) {
- ipath_cdbg(VERBOSE, "Got matching revision "
- "register %llx on try %d\n",
- (unsigned long long) val, i);
- ret = ipath_reinit_msi(dd);
- goto bail;
- }
- /* Probably getting -1 back */
- ipath_dbg("Didn't get expected revision register, "
- "got %llx, try %d\n", (unsigned long long) val,
- i + 1);
- }
- ret = 0; /* failed */
-
-bail:
- if (ret)
- ipath_7220_pcie_params(dd, dd->ipath_boardrev);
-
- return ret;
-}
-
-/**
- * ipath_7220_put_tid - write a TID to the chip
- * @dd: the infinipath device
- * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
- * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
- *
- * This exists as a separate routine to allow for selection of the
- * appropriate "flavor". The static calls in cleanup just use the
- * revision-agnostic form, as they are not performance critical.
- */
-static void ipath_7220_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
- u32 type, unsigned long pa)
-{
- if (pa != dd->ipath_tidinvalid) {
- u64 chippa = pa >> IBA7220_TID_PA_SHIFT;
-
- /* paranoia checks */
- if (pa != (chippa << IBA7220_TID_PA_SHIFT)) {
- dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
- "not 2KB aligned!\n", pa);
- return;
- }
- if (pa >= (1UL << IBA7220_TID_SZ_SHIFT)) {
- ipath_dev_err(dd,
- "BUG: Physical page address 0x%lx "
- "larger than supported\n", pa);
- return;
- }
-
- if (type == RCVHQ_RCV_TYPE_EAGER)
- chippa |= dd->ipath_tidtemplate;
- else /* for now, always full 4KB page */
- chippa |= IBA7220_TID_SZ_4K;
- writeq(chippa, tidptr);
- } else
- writeq(pa, tidptr);
- mmiowb();
-}
-
-/**
- * ipath_7220_clear_tid - clear all TID entries for a port, expected and eager
- * @dd: the infinipath device
- * @port: the port
- *
- * clear all TID entries for a port, expected and eager.
- * Used from ipath_close(). On this chip, TIDs are only 32 bits,
- * not 64, but they are still on 64 bit boundaries, so tidbase
- * is declared as u64 * for the pointer math, even though we write 32 bits
- */
-static void ipath_7220_clear_tids(struct ipath_devdata *dd, unsigned port)
-{
- u64 __iomem *tidbase;
- unsigned long tidinv;
- int i;
-
- if (!dd->ipath_kregbase)
- return;
-
- ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port);
-
- tidinv = dd->ipath_tidinvalid;
- tidbase = (u64 __iomem *)
- ((char __iomem *)(dd->ipath_kregbase) +
- dd->ipath_rcvtidbase +
- port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
-
- for (i = 0; i < dd->ipath_rcvtidcnt; i++)
- ipath_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
- tidinv);
-
- tidbase = (u64 __iomem *)
- ((char __iomem *)(dd->ipath_kregbase) +
- dd->ipath_rcvegrbase + port_egrtid_idx(dd, port)
- * sizeof(*tidbase));
-
- for (i = port ? dd->ipath_rcvegrcnt : dd->ipath_p0_rcvegrcnt; i; i--)
- ipath_7220_put_tid(dd, &tidbase[i-1], RCVHQ_RCV_TYPE_EAGER,
- tidinv);
-}
-
-/**
- * ipath_7220_tidtemplate - setup constants for TID updates
- * @dd: the infinipath device
- *
- * We setup stuff that we use a lot, to avoid calculating each time
- */
-static void ipath_7220_tidtemplate(struct ipath_devdata *dd)
-{
- /* For now, we always allocate 4KB buffers (at init) so we can
- * receive max size packets. We may want a module parameter to
- * specify 2KB or 4KB and/or make be per port instead of per device
- * for those who want to reduce memory footprint. Note that the
- * ipath_rcvhdrentsize size must be large enough to hold the largest
- * IB header (currently 96 bytes) that we expect to handle (plus of
- * course the 2 dwords of RHF).
- */
- if (dd->ipath_rcvegrbufsize == 2048)
- dd->ipath_tidtemplate = IBA7220_TID_SZ_2K;
- else if (dd->ipath_rcvegrbufsize == 4096)
- dd->ipath_tidtemplate = IBA7220_TID_SZ_4K;
- else {
- dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize "
- "%u, using %u\n", dd->ipath_rcvegrbufsize,
- 4096);
- dd->ipath_tidtemplate = IBA7220_TID_SZ_4K;
- }
- dd->ipath_tidinvalid = 0;
-}
-
-static int ipath_7220_early_init(struct ipath_devdata *dd)
-{
- u32 i, s;
-
- if (strcmp(int_type, "auto") &&
- strcmp(int_type, "force_msi") &&
- strcmp(int_type, "force_intx")) {
- ipath_dev_err(dd, "Invalid interrupt_type: '%s', expecting "
- "auto, force_msi or force_intx\n", int_type);
- return -EINVAL;
- }
-
- /*
- * Control[4] has been added to change the arbitration within
- * the SDMA engine between favoring data fetches over descriptor
- * fetches. ipath_sdma_fetch_arb==0 gives data fetches priority.
- */
- if (ipath_sdma_fetch_arb && (dd->ipath_minrev > 1))
- dd->ipath_control |= 1<<4;
-
- dd->ipath_flags |= IPATH_4BYTE_TID;
-
- /*
- * For openfabrics, we need to be able to handle an IB header of
- * 24 dwords. HT chip has arbitrary sized receive buffers, so we
- * made them the same size as the PIO buffers. This chip does not
- * handle arbitrary size buffers, so we need the header large enough
- * to handle largest IB header, but still have room for a 2KB MTU
- * standard IB packet.
- */
- dd->ipath_rcvhdrentsize = 24;
- dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
- dd->ipath_rhf_offset =
- dd->ipath_rcvhdrentsize - sizeof(u64) / sizeof(u32);
-
- dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048;
- /*
- * the min() check here is currently a nop, but it may not always
- * be, depending on just how we do ipath_rcvegrbufsize
- */
- dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k :
- dd->ipath_piosize2k,
- dd->ipath_rcvegrbufsize +
- (dd->ipath_rcvhdrentsize << 2));
- dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
-
- ipath_7220_config_jint(dd, INFINIPATH_JINT_DEFAULT_IDLE_TICKS,
- INFINIPATH_JINT_DEFAULT_MAX_PACKETS);
-
- if (dd->ipath_boardrev) /* no eeprom on emulator */
- ipath_get_eeprom_info(dd);
-
- /* start of code to check and print procmon */
- s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
- s &= ~(1U<<31); /* clear done bit */
- s |= 1U<<14; /* clear counter (write 1 to clear) */
- ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
- /* make sure clear_counter low long enough before start */
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
-
- s &= ~(1U<<14); /* allow counter to count (before starting) */
- ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
- s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
-
- s |= 1U<<15; /* start the counter */
- s &= ~(1U<<31); /* clear done bit */
- s &= ~0x7ffU; /* clear frequency bits */
- s |= 0xe29; /* set frequency bits, in case cleared */
- ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s);
-
- s = 0;
- for (i = 500; i > 0 && !(s&(1ULL<<31)); i--) {
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
- s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon));
- }
- if (!(s&(1U<<31)))
- ipath_dev_err(dd, "ProcMon register not valid: 0x%x\n", s);
- else
- ipath_dbg("ProcMon=0x%x, count=0x%x\n", s, (s>>16)&0x1ff);
-
- return 0;
-}
-
-/**
- * ipath_init_7220_get_base_info - set chip-specific flags for user code
- * @pd: the infinipath port
- * @kbase: ipath_base_info pointer
- *
- * We set the PCIE flag because the lower bandwidth on PCIe vs
- * HyperTransport can affect some user packet algorithims.
- */
-static int ipath_7220_get_base_info(struct ipath_portdata *pd, void *kbase)
-{
- struct ipath_base_info *kinfo = kbase;
-
- kinfo->spi_runtime_flags |=
- IPATH_RUNTIME_PCIE | IPATH_RUNTIME_NODMA_RTAIL |
- IPATH_RUNTIME_SDMA;
-
- return 0;
-}
-
-static void ipath_7220_free_irq(struct ipath_devdata *dd)
-{
- free_irq(dd->ipath_irq, dd);
- dd->ipath_irq = 0;
-}
-
-static struct ipath_message_header *
-ipath_7220_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr)
-{
- u32 offset = ipath_hdrget_offset(rhf_addr);
-
- return (struct ipath_message_header *)
- (rhf_addr - dd->ipath_rhf_offset + offset);
-}
-
-static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
-{
- u32 nchipports;
-
- nchipports = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt);
- if (!cfgports) {
- int ncpus = num_online_cpus();
-
- if (ncpus <= 4)
- dd->ipath_portcnt = 5;
- else if (ncpus <= 8)
- dd->ipath_portcnt = 9;
- if (dd->ipath_portcnt)
- ipath_dbg("Auto-configured for %u ports, %d cpus "
- "online\n", dd->ipath_portcnt, ncpus);
- } else if (cfgports <= nchipports)
- dd->ipath_portcnt = cfgports;
- if (!dd->ipath_portcnt) /* none of the above, set to max */
- dd->ipath_portcnt = nchipports;
- /*
- * chip can be configured for 5, 9, or 17 ports, and choice
- * affects number of eager TIDs per port (1K, 2K, 4K).
- */
- if (dd->ipath_portcnt > 9)
- dd->ipath_rcvctrl |= 2ULL << IBA7220_R_PORTCFG_SHIFT;
- else if (dd->ipath_portcnt > 5)
- dd->ipath_rcvctrl |= 1ULL << IBA7220_R_PORTCFG_SHIFT;
- /* else configure for default 5 receive ports */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
- dd->ipath_rcvctrl);
- dd->ipath_p0_rcvegrcnt = 2048; /* always */
- if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
- dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
-}
-
-
-static int ipath_7220_get_ib_cfg(struct ipath_devdata *dd, int which)
-{
- int lsb, ret = 0;
- u64 maskr; /* right-justified mask */
-
- switch (which) {
- case IPATH_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */
- lsb = IBA7220_IBC_HRTBT_SHIFT;
- maskr = IBA7220_IBC_HRTBT_MASK;
- break;
-
- case IPATH_IB_CFG_LWID_ENB: /* Get allowed Link-width */
- ret = dd->ipath_link_width_enabled;
- goto done;
-
- case IPATH_IB_CFG_LWID: /* Get currently active Link-width */
- ret = dd->ipath_link_width_active;
- goto done;
-
- case IPATH_IB_CFG_SPD_ENB: /* Get allowed Link speeds */
- ret = dd->ipath_link_speed_enabled;
- goto done;
-
- case IPATH_IB_CFG_SPD: /* Get current Link spd */
- ret = dd->ipath_link_speed_active;
- goto done;
-
- case IPATH_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */
- lsb = IBA7220_IBC_RXPOL_SHIFT;
- maskr = IBA7220_IBC_RXPOL_MASK;
- break;
-
- case IPATH_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */
- lsb = IBA7220_IBC_LREV_SHIFT;
- maskr = IBA7220_IBC_LREV_MASK;
- break;
-
- case IPATH_IB_CFG_LINKLATENCY:
- ret = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrstatus)
- & IBA7220_DDRSTAT_LINKLAT_MASK;
- goto done;
-
- default:
- ret = -ENOTSUPP;
- goto done;
- }
- ret = (int)((dd->ipath_ibcddrctrl >> lsb) & maskr);
-done:
- return ret;
-}
-
-static int ipath_7220_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val)
-{
- int lsb, ret = 0, setforce = 0;
- u64 maskr; /* right-justified mask */
-
- switch (which) {
- case IPATH_IB_CFG_LIDLMC:
- /*
- * Set LID and LMC. Combined to avoid possible hazard
- * caller puts LMC in 16MSbits, DLID in 16LSbits of val
- */
- lsb = IBA7220_IBC_DLIDLMC_SHIFT;
- maskr = IBA7220_IBC_DLIDLMC_MASK;
- break;
-
- case IPATH_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */
- if (val & IPATH_IB_HRTBT_ON &&
- (dd->ipath_flags & IPATH_NO_HRTBT))
- goto bail;
- lsb = IBA7220_IBC_HRTBT_SHIFT;
- maskr = IBA7220_IBC_HRTBT_MASK;
- break;
-
- case IPATH_IB_CFG_LWID_ENB: /* set allowed Link-width */
- /*
- * As with speed, only write the actual register if
- * the link is currently down, otherwise takes effect
- * on next link change.
- */
- dd->ipath_link_width_enabled = val;
- if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) !=
- IPATH_LINKDOWN)
- goto bail;
- /*
- * We set the IPATH_IB_FORCE_NOTIFY bit so updown
- * will get called because we want update
- * link_width_active, and the change may not take
- * effect for some time (if we are in POLL), so this
- * flag will force the updown routine to be called
- * on the next ibstatuschange down interrupt, even
- * if it's not an down->up transition.
- */
- val--; /* convert from IB to chip */
- maskr = IBA7220_IBC_WIDTH_MASK;
- lsb = IBA7220_IBC_WIDTH_SHIFT;
- setforce = 1;
- dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY;
- break;
-
- case IPATH_IB_CFG_SPD_ENB: /* set allowed Link speeds */
- /*
- * If we turn off IB1.2, need to preset SerDes defaults,
- * but not right now. Set a flag for the next time
- * we command the link down. As with width, only write the
- * actual register if the link is currently down, otherwise
- * takes effect on next link change. Since setting is being
- * explictly requested (via MAD or sysfs), clear autoneg
- * failure status if speed autoneg is enabled.
- */
- dd->ipath_link_speed_enabled = val;
- if (dd->ipath_ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK &&
- !(val & (val - 1)))
- dd->ipath_presets_needed = 1;
- if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) !=
- IPATH_LINKDOWN)
- goto bail;
- /*
- * We set the IPATH_IB_FORCE_NOTIFY bit so updown
- * will get called because we want update
- * link_speed_active, and the change may not take
- * effect for some time (if we are in POLL), so this
- * flag will force the updown routine to be called
- * on the next ibstatuschange down interrupt, even
- * if it's not an down->up transition. When setting
- * speed autoneg, clear AUTONEG_FAILED.
- */
- if (val == (IPATH_IB_SDR | IPATH_IB_DDR)) {
- val = IBA7220_IBC_SPEED_AUTONEG_MASK |
- IBA7220_IBC_IBTA_1_2_MASK;
- dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED;
- } else
- val = val == IPATH_IB_DDR ? IBA7220_IBC_SPEED_DDR
- : IBA7220_IBC_SPEED_SDR;
- maskr = IBA7220_IBC_SPEED_AUTONEG_MASK |
- IBA7220_IBC_IBTA_1_2_MASK;
- lsb = 0; /* speed bits are low bits */
- setforce = 1;
- break;
-
- case IPATH_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */
- lsb = IBA7220_IBC_RXPOL_SHIFT;
- maskr = IBA7220_IBC_RXPOL_MASK;
- break;
-
- case IPATH_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */
- lsb = IBA7220_IBC_LREV_SHIFT;
- maskr = IBA7220_IBC_LREV_MASK;
- break;
-
- default:
- ret = -ENOTSUPP;
- goto bail;
- }
- dd->ipath_ibcddrctrl &= ~(maskr << lsb);
- dd->ipath_ibcddrctrl |= (((u64) val & maskr) << lsb);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
- dd->ipath_ibcddrctrl);
- if (setforce)
- dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY;
-bail:
- return ret;
-}
-
-static void ipath_7220_read_counters(struct ipath_devdata *dd,
- struct infinipath_counters *cntrs)
-{
- u64 *counters = (u64 *) cntrs;
- int i;
-
- for (i = 0; i < sizeof(*cntrs) / sizeof(u64); i++)
- counters[i] = ipath_snap_cntr(dd, i);
-}
-
-/* if we are using MSI, try to fallback to INTx */
-static int ipath_7220_intr_fallback(struct ipath_devdata *dd)
-{
- if (dd->ipath_msi_lo) {
- dev_info(&dd->pcidev->dev, "MSI interrupt not detected,"
- " trying INTx interrupts\n");
- ipath_7220_nomsi(dd);
- ipath_enable_intx(dd->pcidev);
- /*
- * some newer kernels require free_irq before disable_msi,
- * and irq can be changed during disable and intx enable
- * and we need to therefore use the pcidev->irq value,
- * not our saved MSI value.
- */
- dd->ipath_irq = dd->pcidev->irq;
- if (request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
- IPATH_DRV_NAME, dd))
- ipath_dev_err(dd,
- "Could not re-request_irq for INTx\n");
- return 1;
- }
- return 0;
-}
-
-/*
- * reset the XGXS (between serdes and IBC). Slightly less intrusive
- * than resetting the IBC or external link state, and useful in some
- * cases to cause some retraining. To do this right, we reset IBC
- * as well.
- */
-static void ipath_7220_xgxs_reset(struct ipath_devdata *dd)
-{
- u64 val, prev_val;
-
- prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
- val = prev_val | INFINIPATH_XGXS_RESET;
- prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- dd->ipath_control & ~INFINIPATH_C_LINKENABLE);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
- dd->ipath_control);
-}
-
-
-/* Still needs cleanup, too much hardwired stuff */
-static void autoneg_send(struct ipath_devdata *dd,
- u32 *hdr, u32 dcnt, u32 *data)
-{
- int i;
- u64 cnt;
- u32 __iomem *piobuf;
- u32 pnum;
-
- i = 0;
- cnt = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */
- while (!(piobuf = ipath_getpiobuf(dd, cnt, &pnum))) {
- if (i++ > 15) {
- ipath_dbg("Couldn't get pio buffer for send\n");
- return;
- }
- udelay(2);
- }
- if (dd->ipath_flags&IPATH_HAS_PBC_CNT)
- cnt |= 0x80000000UL<<32; /* mark as VL15 */
- writeq(cnt, piobuf);
- ipath_flush_wc();
- __iowrite32_copy(piobuf + 2, hdr, 7);
- __iowrite32_copy(piobuf + 9, data, dcnt);
- ipath_flush_wc();
-}
-
-/*
- * _start packet gets sent twice at start, _done gets sent twice at end
- */
-static void ipath_autoneg_send(struct ipath_devdata *dd, int which)
-{
- static u32 swapped;
- u32 dw, i, hcnt, dcnt, *data;
- static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba };
- static u32 madpayload_start[0x40] = {
- 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
- 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */
- };
- static u32 madpayload_done[0x40] = {
- 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
- 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x40000001, 0x1388, 0x15e, /* rest 0's */
- };
- dcnt = ARRAY_SIZE(madpayload_start);
- hcnt = ARRAY_SIZE(hdr);
- if (!swapped) {
- /* for maintainability, do it at runtime */
- for (i = 0; i < hcnt; i++) {
- dw = (__force u32) cpu_to_be32(hdr[i]);
- hdr[i] = dw;
- }
- for (i = 0; i < dcnt; i++) {
- dw = (__force u32) cpu_to_be32(madpayload_start[i]);
- madpayload_start[i] = dw;
- dw = (__force u32) cpu_to_be32(madpayload_done[i]);
- madpayload_done[i] = dw;
- }
- swapped = 1;
- }
-
- data = which ? madpayload_done : madpayload_start;
- ipath_cdbg(PKT, "Sending %s special MADs\n", which?"done":"start");
-
- autoneg_send(dd, hdr, dcnt, data);
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- udelay(2);
- autoneg_send(dd, hdr, dcnt, data);
- ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
- udelay(2);
-}
-
-
-
-/*
- * Do the absolute minimum to cause an IB speed change, and make it
- * ready, but don't actually trigger the change. The caller will
- * do that when ready (if link is in Polling training state, it will
- * happen immediately, otherwise when link next goes down)
- *
- * This routine should only be used as part of the DDR autonegotation
- * code for devices that are not compliant with IB 1.2 (or code that
- * fixes things up for same).
- *
- * When link has gone down, and autoneg enabled, or autoneg has
- * failed and we give up until next time we set both speeds, and
- * then we want IBTA enabled as well as "use max enabled speed.
- */
-static void set_speed_fast(struct ipath_devdata *dd, u32 speed)
-{
- dd->ipath_ibcddrctrl &= ~(IBA7220_IBC_SPEED_AUTONEG_MASK |
- IBA7220_IBC_IBTA_1_2_MASK |
- (IBA7220_IBC_WIDTH_MASK << IBA7220_IBC_WIDTH_SHIFT));
-
- if (speed == (IPATH_IB_SDR | IPATH_IB_DDR))
- dd->ipath_ibcddrctrl |= IBA7220_IBC_SPEED_AUTONEG_MASK |
- IBA7220_IBC_IBTA_1_2_MASK;
- else
- dd->ipath_ibcddrctrl |= speed == IPATH_IB_DDR ?
- IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR;
-
- /*
- * Convert from IB-style 1 = 1x, 2 = 4x, 3 = auto
- * to chip-centric 0 = 1x, 1 = 4x, 2 = auto
- */
- dd->ipath_ibcddrctrl |= (u64)(dd->ipath_link_width_enabled - 1) <<
- IBA7220_IBC_WIDTH_SHIFT;
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl,
- dd->ipath_ibcddrctrl);
- ipath_cdbg(VERBOSE, "setup for IB speed (%x) done\n", speed);
-}
-
-
-/*
- * this routine is only used when we are not talking to another
- * IB 1.2-compliant device that we think can do DDR.
- * (This includes all existing switch chips as of Oct 2007.)
- * 1.2-compliant devices go directly to DDR prior to reaching INIT
- */
-static void try_auto_neg(struct ipath_devdata *dd)
-{
- /*
- * required for older non-IB1.2 DDR switches. Newer
- * non-IB-compliant switches don't need it, but so far,
- * aren't bothered by it either. "Magic constant"
- */
- ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl),
- 0x3b9dc07);
- dd->ipath_flags |= IPATH_IB_AUTONEG_INPROG;
- ipath_autoneg_send(dd, 0);
- set_speed_fast(dd, IPATH_IB_DDR);
- ipath_toggle_rclkrls(dd);
- /* 2 msec is minimum length of a poll cycle */
- schedule_delayed_work(&dd->ipath_autoneg_work,
- msecs_to_jiffies(2));
-}
-
-
-static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
-{
- int ret = 0;
- u32 ltstate = ipath_ib_linkstate(dd, ibcs);
-
- dd->ipath_link_width_active =
- ((ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1) ?
- IB_WIDTH_4X : IB_WIDTH_1X;
- dd->ipath_link_speed_active =
- ((ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1) ?
- IPATH_IB_DDR : IPATH_IB_SDR;
-
- if (!ibup) {
- /*
- * when link goes down we don't want aeq running, so it
- * won't't interfere with IBC training, etc., and we need
- * to go back to the static SerDes preset values
- */
- if (dd->ipath_x1_fix_tries &&
- ltstate <= INFINIPATH_IBCS_LT_STATE_SLEEPQUIET &&
- ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP)
- dd->ipath_x1_fix_tries = 0;
- if (!(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED |
- IPATH_IB_AUTONEG_INPROG)))
- set_speed_fast(dd, dd->ipath_link_speed_enabled);
- if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) {
- ipath_cdbg(VERBOSE, "Setting RXEQ defaults\n");
- ipath_sd7220_presets(dd);
- }
- /* this might better in ipath_sd7220_presets() */
- ipath_set_relock_poll(dd, ibup);
- } else {
- if (ipath_compat_ddr_negotiate &&
- !(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED |
- IPATH_IB_AUTONEG_INPROG)) &&
- dd->ipath_link_speed_active == IPATH_IB_SDR &&
- (dd->ipath_link_speed_enabled &
- (IPATH_IB_DDR | IPATH_IB_SDR)) ==
- (IPATH_IB_DDR | IPATH_IB_SDR) &&
- dd->ipath_autoneg_tries < IPATH_AUTONEG_TRIES) {
- /* we are SDR, and DDR auto-negotiation enabled */
- ++dd->ipath_autoneg_tries;
- ipath_dbg("DDR negotiation try, %u/%u\n",
- dd->ipath_autoneg_tries,
- IPATH_AUTONEG_TRIES);
- try_auto_neg(dd);
- ret = 1; /* no other IB status change processing */
- } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
- && dd->ipath_link_speed_active == IPATH_IB_SDR) {
- ipath_autoneg_send(dd, 1);
- set_speed_fast(dd, IPATH_IB_DDR);
- udelay(2);
- ipath_toggle_rclkrls(dd);
- ret = 1; /* no other IB status change processing */
- } else {
- if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
- (dd->ipath_link_speed_active & IPATH_IB_DDR)) {
- ipath_dbg("Got to INIT with DDR autoneg\n");
- dd->ipath_flags &= ~(IPATH_IB_AUTONEG_INPROG
- | IPATH_IB_AUTONEG_FAILED);
- dd->ipath_autoneg_tries = 0;
- /* re-enable SDR, for next link down */
- set_speed_fast(dd,
- dd->ipath_link_speed_enabled);
- wake_up(&dd->ipath_autoneg_wait);
- } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) {
- /*
- * clear autoneg failure flag, and do setup
- * so we'll try next time link goes down and
- * back to INIT (possibly connected to different
- * device).
- */
- ipath_dbg("INIT %sDR after autoneg failure\n",
- (dd->ipath_link_speed_active &
- IPATH_IB_DDR) ? "D" : "S");
- dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED;
- dd->ipath_ibcddrctrl |=
- IBA7220_IBC_IBTA_1_2_MASK;
- ipath_write_kreg(dd,
- IPATH_KREG_OFFSET(IBNCModeCtrl), 0);
- }
- }
- /*
- * if we are in 1X, and are in autoneg width, it
- * could be due to an xgxs problem, so if we haven't
- * already tried, try twice to get to 4X; if we
- * tried, and couldn't, report it, since it will
- * probably not be what is desired.
- */
- if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
- IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X)
- && dd->ipath_link_width_active == IB_WIDTH_1X
- && dd->ipath_x1_fix_tries < 3) {
- if (++dd->ipath_x1_fix_tries == 3)
- dev_info(&dd->pcidev->dev,
- "IB link is in 1X mode\n");
- else {
- ipath_cdbg(VERBOSE, "IB 1X in "
- "auto-width, try %u to be "
- "sure it's really 1X; "
- "ltstate %u\n",
- dd->ipath_x1_fix_tries,
- ltstate);
- dd->ipath_f_xgxs_reset(dd);
- ret = 1; /* skip other processing */
- }
- }
-
- if (!ret) {
- dd->delay_mult = rate_to_delay
- [(ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1]
- [(ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1];
-
- ipath_set_relock_poll(dd, ibup);
- }
- }
-
- if (!ret)
- ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs),
- ltstate);
- return ret;
-}
-
-
-/*
- * Handle the empirically determined mechanism for auto-negotiation
- * of DDR speed with switches.
- */
-static void autoneg_work(struct work_struct *work)
-{
- struct ipath_devdata *dd;
- u64 startms;
- u32 lastlts, i;
-
- dd = container_of(work, struct ipath_devdata,
- ipath_autoneg_work.work);
-
- startms = jiffies_to_msecs(jiffies);
-
- /*
- * busy wait for this first part, it should be at most a
- * few hundred usec, since we scheduled ourselves for 2msec.
- */
- for (i = 0; i < 25; i++) {
- lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
- if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
- ipath_set_linkstate(dd, IPATH_IB_LINKDOWN_DISABLE);
- break;
- }
- udelay(100);
- }
-
- if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG))
- goto done; /* we got there early or told to stop */
-
- /* we expect this to timeout */
- if (wait_event_timeout(dd->ipath_autoneg_wait,
- !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
- msecs_to_jiffies(90)))
- goto done;
-
- ipath_toggle_rclkrls(dd);
-
- /* we expect this to timeout */
- if (wait_event_timeout(dd->ipath_autoneg_wait,
- !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
- msecs_to_jiffies(1700)))
- goto done;
-
- set_speed_fast(dd, IPATH_IB_SDR);
- ipath_toggle_rclkrls(dd);
-
- /*
- * wait up to 250 msec for link to train and get to INIT at DDR;
- * this should terminate early
- */
- wait_event_timeout(dd->ipath_autoneg_wait,
- !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG),
- msecs_to_jiffies(250));
-done:
- if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
- ipath_dbg("Did not get to DDR INIT (%x) after %Lu msecs\n",
- ipath_ib_state(dd, dd->ipath_lastibcstat),
- jiffies_to_msecs(jiffies)-startms);
- dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
- if (dd->ipath_autoneg_tries == IPATH_AUTONEG_TRIES) {
- dd->ipath_flags |= IPATH_IB_AUTONEG_FAILED;
- ipath_dbg("Giving up on DDR until next IB "
- "link Down\n");
- dd->ipath_autoneg_tries = 0;
- }
- set_speed_fast(dd, dd->ipath_link_speed_enabled);
- }
-}
-
-
-/**
- * ipath_init_iba7220_funcs - set up the chip-specific function pointers
- * @dd: the infinipath device
- *
- * This is global, and is called directly at init to set up the
- * chip-specific function pointers for later use.
- */
-void ipath_init_iba7220_funcs(struct ipath_devdata *dd)
-{
- dd->ipath_f_intrsetup = ipath_7220_intconfig;
- dd->ipath_f_bus = ipath_setup_7220_config;
- dd->ipath_f_reset = ipath_setup_7220_reset;
- dd->ipath_f_get_boardname = ipath_7220_boardname;
- dd->ipath_f_init_hwerrors = ipath_7220_init_hwerrors;
- dd->ipath_f_early_init = ipath_7220_early_init;
- dd->ipath_f_handle_hwerrors = ipath_7220_handle_hwerrors;
- dd->ipath_f_quiet_serdes = ipath_7220_quiet_serdes;
- dd->ipath_f_bringup_serdes = ipath_7220_bringup_serdes;
- dd->ipath_f_clear_tids = ipath_7220_clear_tids;
- dd->ipath_f_put_tid = ipath_7220_put_tid;
- dd->ipath_f_cleanup = ipath_setup_7220_cleanup;
- dd->ipath_f_setextled = ipath_setup_7220_setextled;
- dd->ipath_f_get_base_info = ipath_7220_get_base_info;
- dd->ipath_f_free_irq = ipath_7220_free_irq;
- dd->ipath_f_tidtemplate = ipath_7220_tidtemplate;
- dd->ipath_f_intr_fallback = ipath_7220_intr_fallback;
- dd->ipath_f_xgxs_reset = ipath_7220_xgxs_reset;
- dd->ipath_f_get_ib_cfg = ipath_7220_get_ib_cfg;
- dd->ipath_f_set_ib_cfg = ipath_7220_set_ib_cfg;
- dd->ipath_f_config_jint = ipath_7220_config_jint;
- dd->ipath_f_config_ports = ipath_7220_config_ports;
- dd->ipath_f_read_counters = ipath_7220_read_counters;
- dd->ipath_f_get_msgheader = ipath_7220_get_msgheader;
- dd->ipath_f_ib_updown = ipath_7220_ib_updown;
-
- /* initialize chip-specific variables */
- ipath_init_7220_variables(dd);
-}
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 3e5baa43fc8..be2a60e142b 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -33,6 +33,9 @@
#include <linux/pci.h>
#include <linux/netdevice.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
@@ -229,6 +232,7 @@ static int init_chip_first(struct ipath_devdata *dd)
spin_lock_init(&dd->ipath_kernel_tid_lock);
spin_lock_init(&dd->ipath_user_tid_lock);
spin_lock_init(&dd->ipath_sendctrl_lock);
+ spin_lock_init(&dd->ipath_uctxt_lock);
spin_lock_init(&dd->ipath_sdma_lock);
spin_lock_init(&dd->ipath_gpio_lock);
spin_lock_init(&dd->ipath_eep_st_lock);
@@ -333,7 +337,7 @@ done:
* @dd: the infinipath device
*
* sanity check at least some of the values after reset, and
- * ensure no receive or transmit (explictly, in case reset
+ * ensure no receive or transmit (explicitly, in case reset
* failed
*/
static int init_chip_reset(struct ipath_devdata *dd)
@@ -440,7 +444,7 @@ static void init_shadow_tids(struct ipath_devdata *dd)
struct page **pages;
dma_addr_t *addrs;
- pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
sizeof(struct page *));
if (!pages) {
ipath_dev_err(dd, "failed to allocate shadow page * "
@@ -454,14 +458,11 @@ static void init_shadow_tids(struct ipath_devdata *dd)
if (!addrs) {
ipath_dev_err(dd, "failed to allocate shadow dma handle "
"array, no expected sends!\n");
- vfree(dd->ipath_pageshadow);
+ vfree(pages);
dd->ipath_pageshadow = NULL;
return;
}
- memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
- sizeof(struct page *));
-
dd->ipath_pageshadow = pages;
dd->ipath_physshadow = addrs;
}
@@ -718,16 +719,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
goto done;
/*
- * we ignore most issues after reporting them, but have to specially
- * handle hardware-disabled chips.
- */
- if (ret == 2) {
- /* unique error, known to ipath_init_one */
- ret = -EPERM;
- goto done;
- }
-
- /*
* We could bump this to allow for full rcvegrcnt + rcvtidcnt,
* but then it no longer nicely fits power of two, and since
* we now use routines that backend onto __get_free_pages, the
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 26900b3b7a4..01ba792791a 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -33,6 +33,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/sched.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -69,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
"SendbufErrs %lx %lx", sbuf[0],
sbuf[1]);
@@ -208,8 +209,7 @@ void ipath_format_hwerrors(u64 hwerrs,
{
int i;
const int glen =
- sizeof(ipath_generic_hwerror_msgs) /
- sizeof(ipath_generic_hwerror_msgs[0]);
+ ARRAY_SIZE(ipath_generic_hwerror_msgs);
for (i=0; i<glen; i++) {
if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
@@ -356,9 +356,10 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
dd->ipath_cregs->cr_iblinkerrrecovcnt);
if (linkrecov != dd->ipath_lastlinkrecov) {
ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
- ibcs, ib_linkstate(dd, ibcs),
+ (unsigned long long) ibcs,
+ ib_linkstate(dd, ibcs),
ipath_ibcstatus_str[ltstate],
- linkrecov);
+ (unsigned long long) linkrecov);
/* and no more until active again */
dd->ipath_lastlinkrecov = 0;
ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
@@ -754,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
/* likely due to cancel; so suppress message unless verbose */
if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
/* armlaunch takes precedence; it often causes both. */
ipath_cdbg(VERBOSE,
"Suppressed %s error (%llx) after sendbuf cancel\n",
@@ -1118,9 +1119,11 @@ irqreturn_t ipath_intr(int irq, void *data)
if (unlikely(istat & ~dd->ipath_i_bitsextant))
ipath_dev_err(dd,
"interrupt with unknown interrupts %Lx set\n",
+ (unsigned long long)
istat & ~dd->ipath_i_bitsextant);
else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
- ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat);
+ ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
+ (unsigned long long) istat);
if (istat & INFINIPATH_I_ERROR) {
ipath_stats.sps_errints++;
@@ -1128,7 +1131,8 @@ irqreturn_t ipath_intr(int irq, void *data)
dd->ipath_kregs->kr_errorstatus);
if (!estat)
dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
- "but no error bits set!\n", istat);
+ "but no error bits set!\n",
+ (unsigned long long) istat);
else if (estat == -1LL)
/*
* should we try clearing all, or hope next read
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 0bd8bcb184a..6559af60bff 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -355,6 +355,19 @@ struct ipath_devdata {
/* errors masked because they occur too fast */
ipath_err_t ipath_maskederrs;
u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
+ /* these 5 fields are used to establish deltas for IB Symbol
+ * errors and linkrecovery errors. They can be reported on
+ * some chips during link negotiation prior to INIT, and with
+ * DDR when faking DDR negotiations with non-IBTA switches.
+ * The chip counters are adjusted at driver unload if there is
+ * a non-zero delta.
+ */
+ u64 ibdeltainprog;
+ u64 ibsymdelta;
+ u64 ibsymsnap;
+ u64 iblnkerrdelta;
+ u64 iblnkerrsnap;
+
/* time in jiffies at which to re-enable maskederrs */
unsigned long ipath_unmasktime;
/* count of egrfull errors, combined for all ports */
@@ -464,6 +477,8 @@ struct ipath_devdata {
spinlock_t ipath_kernel_tid_lock;
spinlock_t ipath_user_tid_lock;
spinlock_t ipath_sendctrl_lock;
+ /* around ipath_pd and (user ports) port_cnt use (intr vs free) */
+ spinlock_t ipath_uctxt_lock;
/*
* IPATH_STATUS_*,
@@ -1015,8 +1030,6 @@ void ipath_free_data(struct ipath_portdata *dd);
u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
unsigned len, int avail);
-void ipath_init_iba7220_funcs(struct ipath_devdata *);
-void ipath_init_iba6120_funcs(struct ipath_devdata *);
void ipath_init_iba6110_funcs(struct ipath_devdata *);
void ipath_get_eeprom_info(struct ipath_devdata *);
int ipath_update_eeprom_log(struct ipath_devdata *dd);
@@ -1271,7 +1284,7 @@ struct device_driver;
extern const char ib_ipath_version[];
-extern struct attribute_group *ipath_driver_attr_groups[];
+extern const struct attribute_group *ipath_driver_attr_groups[];
int ipath_device_create_group(struct device *, struct ipath_devdata *);
void ipath_device_remove_group(struct device *, struct ipath_devdata *);
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 8f32b17a5ee..c0e933fec21 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -132,6 +132,7 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
* (see ipath_get_dma_mr and ipath_dma.c).
*/
if (sge->lkey == 0) {
+ /* always a kernel port, no locking needed */
struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
if (pd->user) {
@@ -211,6 +212,7 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
* (see ipath_get_dma_mr and ipath_dma.c).
*/
if (rkey == 0) {
+ /* always a kernel port, no locking needed */
struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
if (pd->user) {
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index be4fc9ada8e..43f2d0424d4 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -32,15 +32,16 @@
*/
#include <rdma/ib_smi.h>
+#include <rdma/ib_pma.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
#include "ipath_common.h"
-#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004)
-#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008)
-#define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C)
-#define IB_SMP_INVALID_FIELD __constant_htons(0x001C)
+#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004)
+#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008)
+#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
+#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C)
static int reply(struct ib_smp *smp)
{
@@ -60,7 +61,7 @@ static int recv_subn_get_nodedescription(struct ib_smp *smp,
if (smp->attr_mod)
smp->status |= IB_SMP_INVALID_FIELD;
- strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
+ memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
return reply(smp);
}
@@ -348,6 +349,7 @@ bail:
*/
static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
{
+ /* always a kernel port, no locking needed */
struct ipath_portdata *pd = dd->ipath_pd[0];
memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
@@ -730,6 +732,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
int i;
int changed = 0;
+ /* always a kernel port, no locking needed */
pd = dd->ipath_pd[0];
for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
@@ -787,151 +790,18 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
return recv_subn_get_pkeytable(smp, ibdev);
}
-#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
-#define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010)
-#define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011)
-#define IB_PMA_PORT_COUNTERS __constant_htons(0x0012)
-#define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D)
-#define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E)
-
-struct ib_perf {
- u8 base_version;
- u8 mgmt_class;
- u8 class_version;
- u8 method;
- __be16 status;
- __be16 unused;
- __be64 tid;
- __be16 attr_id;
- __be16 resv;
- __be32 attr_mod;
- u8 reserved[40];
- u8 data[192];
-} __attribute__ ((packed));
-
-struct ib_pma_classportinfo {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- u8 reserved[3];
- u8 resp_time_value; /* only lower 5 bits */
- union ib_gid redirect_gid;
- __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
- __be16 redirect_lid;
- __be16 redirect_pkey;
- __be32 redirect_qp; /* only lower 24 bits */
- __be32 redirect_qkey;
- union ib_gid trap_gid;
- __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
- __be16 trap_lid;
- __be16 trap_pkey;
- __be32 trap_hl_qp; /* 8, 24 bits respectively */
- __be32 trap_qkey;
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplescontrol {
- u8 opcode;
- u8 port_select;
- u8 tick;
- u8 counter_width; /* only lower 3 bits */
- __be32 counter_mask0_9; /* 2, 10 * 3, bits */
- __be16 counter_mask10_14; /* 1, 5 * 3, bits */
- u8 sample_mechanisms;
- u8 sample_status; /* only lower 2 bits */
- __be64 option_mask;
- __be64 vendor_mask;
- __be32 sample_start;
- __be32 sample_interval;
- __be16 tag;
- __be16 counter_select[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplesresult {
- __be16 tag;
- __be16 sample_status; /* only lower 2 bits */
- __be32 counter[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portsamplesresult_ext {
- __be16 tag;
- __be16 sample_status; /* only lower 2 bits */
- __be32 extended_width; /* only upper 2 bits */
- __be64 counter[15];
-} __attribute__ ((packed));
-
-struct ib_pma_portcounters {
- u8 reserved;
- u8 port_select;
- __be16 counter_select;
- __be16 symbol_error_counter;
- u8 link_error_recovery_counter;
- u8 link_downed_counter;
- __be16 port_rcv_errors;
- __be16 port_rcv_remphys_errors;
- __be16 port_rcv_switch_relay_errors;
- __be16 port_xmit_discards;
- u8 port_xmit_constraint_errors;
- u8 port_rcv_constraint_errors;
- u8 reserved1;
- u8 lli_ebor_errors; /* 4, 4, bits */
- __be16 reserved2;
- __be16 vl15_dropped;
- __be32 port_xmit_data;
- __be32 port_rcv_data;
- __be32 port_xmit_packets;
- __be32 port_rcv_packets;
-} __attribute__ ((packed));
-
-#define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001)
-#define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002)
-#define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004)
-#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008)
-#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010)
-#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040)
-#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200)
-#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400)
-#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800)
-#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000)
-#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000)
-#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000)
-#define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000)
-
-struct ib_pma_portcounters_ext {
- u8 reserved;
- u8 port_select;
- __be16 counter_select;
- __be32 reserved1;
- __be64 port_xmit_data;
- __be64 port_rcv_data;
- __be64 port_xmit_packets;
- __be64 port_rcv_packets;
- __be64 port_unicast_xmit_packets;
- __be64 port_unicast_rcv_packets;
- __be64 port_multicast_xmit_packets;
- __be64 port_multicast_rcv_packets;
-} __attribute__ ((packed));
-
-#define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001)
-#define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002)
-#define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004)
-#define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008)
-#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010)
-#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020)
-#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040)
-#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080)
-
-static int recv_pma_get_classportinfo(struct ib_perf *pmp)
+static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp)
{
- struct ib_pma_classportinfo *p =
- (struct ib_pma_classportinfo *)pmp->data;
+ struct ib_class_port_info *p =
+ (struct ib_class_port_info *)pmp->data;
memset(pmp->data, 0, sizeof(pmp->data));
- if (pmp->attr_mod != 0)
- pmp->status |= IB_SMP_INVALID_FIELD;
+ if (pmp->mad_hdr.attr_mod != 0)
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
/* Indicate AllPortSelect is valid (only one port anyway) */
- p->cap_mask = __constant_cpu_to_be16(1 << 8);
+ p->capability_mask = cpu_to_be16(1 << 8);
p->base_version = 1;
p->class_version = 1;
/*
@@ -949,14 +819,13 @@ static int recv_pma_get_classportinfo(struct ib_perf *pmp)
* We support 5 counters which only count the mandatory quantities.
*/
#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
-#define COUNTER_MASK0_9 \
- __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \
- COUNTER_MASK(1, 1) | \
- COUNTER_MASK(1, 2) | \
- COUNTER_MASK(1, 3) | \
- COUNTER_MASK(1, 4))
-
-static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
+#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \
+ COUNTER_MASK(1, 1) | \
+ COUNTER_MASK(1, 2) | \
+ COUNTER_MASK(1, 3) | \
+ COUNTER_MASK(1, 4))
+
+static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplescontrol *p =
@@ -969,9 +838,9 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(port_select != port && port_select != 0xFF))
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
/*
* Ticks are 10x the link transfer period which for 2.5Gbs is 4
* nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample
@@ -1005,7 +874,7 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
+static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portsamplescontrol *p =
@@ -1016,9 +885,9 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp,
u8 status;
int ret;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(p->port_select != port && p->port_select != 0xFF)) {
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
@@ -1092,7 +961,7 @@ static u64 get_counter(struct ipath_ibdev *dev,
return ret;
}
-static int recv_pma_get_portsamplesresult(struct ib_perf *pmp,
+static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp,
struct ib_device *ibdev)
{
struct ib_pma_portsamplesresult *p =
@@ -1117,7 +986,7 @@ static int recv_pma_get_portsamplesresult(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
+static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev)
{
struct ib_pma_portsamplesresult_ext *p =
@@ -1135,7 +1004,7 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
status = dev->pma_sample_status;
p->sample_status = cpu_to_be16(status);
/* 64 bits */
- p->extended_width = __constant_cpu_to_be32(0x80000000);
+ p->extended_width = cpu_to_be32(0x80000000);
for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++)
p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 :
cpu_to_be64(
@@ -1144,7 +1013,7 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_get_portcounters(struct ib_perf *pmp,
+static int recv_pma_get_portcounters(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1178,12 +1047,12 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(port_select != port && port_select != 0xFF))
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
if (cntrs.symbol_error_counter > 0xFFFFUL)
- p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF);
+ p->symbol_error_counter = cpu_to_be16(0xFFFF);
else
p->symbol_error_counter =
cpu_to_be16((u16)cntrs.symbol_error_counter);
@@ -1197,17 +1066,17 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
else
p->link_downed_counter = (u8)cntrs.link_downed_counter;
if (cntrs.port_rcv_errors > 0xFFFFUL)
- p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF);
+ p->port_rcv_errors = cpu_to_be16(0xFFFF);
else
p->port_rcv_errors =
cpu_to_be16((u16) cntrs.port_rcv_errors);
if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
- p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF);
+ p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
else
p->port_rcv_remphys_errors =
cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
if (cntrs.port_xmit_discards > 0xFFFFUL)
- p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF);
+ p->port_xmit_discards = cpu_to_be16(0xFFFF);
else
p->port_xmit_discards =
cpu_to_be16((u16)cntrs.port_xmit_discards);
@@ -1215,27 +1084,27 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
cntrs.local_link_integrity_errors = 0xFUL;
if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
cntrs.excessive_buffer_overrun_errors = 0xFUL;
- p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) |
+ p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
cntrs.excessive_buffer_overrun_errors;
if (cntrs.vl15_dropped > 0xFFFFUL)
- p->vl15_dropped = __constant_cpu_to_be16(0xFFFF);
+ p->vl15_dropped = cpu_to_be16(0xFFFF);
else
p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
- p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
else
p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
- p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
else
p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
- p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
else
p->port_xmit_packets =
cpu_to_be32((u32)cntrs.port_xmit_packets);
if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
- p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF);
+ p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
else
p->port_rcv_packets =
cpu_to_be32((u32) cntrs.port_rcv_packets);
@@ -1243,7 +1112,7 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
+static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters_ext *p =
@@ -1264,9 +1133,9 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
memset(pmp->data, 0, sizeof(pmp->data));
p->port_select = port_select;
- if (pmp->attr_mod != 0 ||
+ if (pmp->mad_hdr.attr_mod != 0 ||
(port_select != port && port_select != 0xFF))
- pmp->status |= IB_SMP_INVALID_FIELD;
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
p->port_xmit_data = cpu_to_be64(swords);
p->port_rcv_data = cpu_to_be64(rwords);
@@ -1280,7 +1149,7 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
return reply((struct ib_smp *) pmp);
}
-static int recv_pma_set_portcounters(struct ib_perf *pmp,
+static int recv_pma_set_portcounters(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1343,7 +1212,7 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
return recv_pma_get_portcounters(pmp, ibdev, port);
}
-static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
+static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
@@ -1517,19 +1386,19 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
- struct ib_perf *pmp = (struct ib_perf *)out_mad;
+ struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
int ret;
*out_mad = *in_mad;
- if (pmp->class_version != 1) {
- pmp->status |= IB_SMP_UNSUP_VERSION;
+ if (pmp->mad_hdr.class_version != 1) {
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
- switch (pmp->method) {
+ switch (pmp->mad_hdr.method) {
case IB_MGMT_METHOD_GET:
- switch (pmp->attr_id) {
+ switch (pmp->mad_hdr.attr_id) {
case IB_PMA_CLASS_PORT_INFO:
ret = recv_pma_get_classportinfo(pmp);
goto bail;
@@ -1553,13 +1422,13 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
port_num);
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
case IB_MGMT_METHOD_SET:
- switch (pmp->attr_id) {
+ switch (pmp->mad_hdr.attr_id) {
case IB_PMA_PORT_SAMPLES_CONTROL:
ret = recv_pma_set_portsamplescontrol(pmp, ibdev,
port_num);
@@ -1573,7 +1442,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
port_num);
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METH_ATTR;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_smp *) pmp);
goto bail;
}
@@ -1587,7 +1456,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num,
ret = IB_MAD_RESULT_SUCCESS;
goto bail;
default:
- pmp->status |= IB_SMP_UNSUP_METHOD;
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_smp *) pmp);
}
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
index fa830e22002..e7327422940 100644
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <asm/pgtable.h>
@@ -74,7 +75,7 @@ static void ipath_vma_close(struct vm_area_struct *vma)
kref_put(&ip->ref, ipath_release_mmap_info);
}
-static struct vm_operations_struct ipath_vm_ops = {
+static const struct vm_operations_struct ipath_vm_ops = {
.open = ipath_vma_open,
.close = ipath_vma_close,
};
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 9d343b7c2f3..5e61e9bff69 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -31,6 +31,8 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+
#include <rdma/ib_umem.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_smi.h>
@@ -186,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct ipath_mr *mr;
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ int n, m, entry;
+ struct scatterlist *sg;
struct ib_mr *ret;
if (length == 0) {
@@ -200,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
- n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- n += chunk->nents;
-
+ n = umem->nmap;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
@@ -222,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m = 0;
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (i = 0; i < chunk->nents; i++) {
- void *vaddr;
-
- vaddr = page_address(sg_page(&chunk->page_list[i]));
- if (!vaddr) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- n++;
- if (n == IPATH_SEGSZ) {
- m++;
- n = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ void *vaddr;
+
+ vaddr = page_address(sg_page(sg));
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
+ n++;
+ if (n == IPATH_SEGSZ) {
+ m++;
+ n = 0;
}
}
ret = &mr->ibmr;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 4715911101e..face87602dc 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -32,6 +32,8 @@
*/
#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
@@ -461,7 +463,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
@@ -745,6 +747,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
struct ipath_swqe *swq = NULL;
struct ipath_ibdev *dev;
size_t sz;
+ size_t sg_list_sz;
struct ib_qp *ret;
if (init_attr->create_flags) {
@@ -789,19 +792,31 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
goto bail;
}
sz = sizeof(*qp);
+ sg_list_sz = 0;
if (init_attr->srq) {
struct ipath_srq *srq = to_isrq(init_attr->srq);
- sz += sizeof(*qp->r_sg_list) *
- srq->rq.max_sge;
- } else
- sz += sizeof(*qp->r_sg_list) *
- init_attr->cap.max_recv_sge;
- qp = kmalloc(sz, GFP_KERNEL);
+ if (srq->rq.max_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (srq->rq.max_sge - 1);
+ } else if (init_attr->cap.max_recv_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (init_attr->cap.max_recv_sge - 1);
+ qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
+ if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
+ init_attr->qp_type == IB_QPT_SMI ||
+ init_attr->qp_type == IB_QPT_GSI)) {
+ qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
+ if (!qp->r_ud_sg_list) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
+ } else
+ qp->r_ud_sg_list = NULL;
if (init_attr->srq) {
sz = 0;
qp->r_rq.size = 0;
@@ -818,7 +833,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
qp->r_rq.size * sz);
if (!qp->r_rq.wq) {
ret = ERR_PTR(-ENOMEM);
- goto bail_qp;
+ goto bail_sg_list;
}
}
@@ -848,7 +863,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
if (err) {
ret = ERR_PTR(err);
vfree(qp->r_rq.wq);
- goto bail_qp;
+ goto bail_sg_list;
}
qp->ip = NULL;
qp->s_tx = NULL;
@@ -925,6 +940,8 @@ bail_ip:
vfree(qp->r_rq.wq);
ipath_free_qp(&dev->qp_table, qp);
free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+bail_sg_list:
+ kfree(qp->r_ud_sg_list);
bail_qp:
kfree(qp);
bail_swq:
@@ -989,6 +1006,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
kref_put(&qp->ip->ref, ipath_release_mmap_info);
else
vfree(qp->r_rq.wq);
+ kfree(qp->r_ud_sg_list);
vfree(qp->s_wq);
kfree(qp);
return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 97710522624..79b3dbc9717 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -573,9 +573,8 @@ int ipath_make_rc_req(struct ipath_qp *qp)
ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
qp->s_state = OP(RDMA_READ_REQUEST);
hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
- bth2 = qp->s_psn++ & IPATH_PSN_MASK;
- if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
- qp->s_next_psn = qp->s_psn;
+ bth2 = qp->s_psn & IPATH_PSN_MASK;
+ qp->s_psn = wqe->lpsn + 1;
ss = NULL;
len = 0;
qp->s_cur++;
@@ -675,7 +674,8 @@ static void send_rc_ack(struct ipath_qp *qp)
hdr.lrh[0] = cpu_to_be16(lrh0);
hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(dd->ipath_lid);
+ hdr.lrh[3] = cpu_to_be16(dd->ipath_lid |
+ qp->remote_ah_attr.src_path_bits);
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@@ -1744,7 +1744,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
- __constant_cpu_to_be32(1 << 23)) != 0);
+ cpu_to_be32(1 << 23)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index af051f75766..1f95bbaf760 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include "ipath_verbs.h"
@@ -156,7 +157,7 @@ bail:
/**
* ipath_get_rwqe - copy the next RWQE into the QP's RWQE
* @qp: the QP
- * @wr_id_only: update wr_id only, not SGEs
+ * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
*
* Return 0 if no RWQE is available, otherwise return 1.
*
@@ -173,8 +174,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
u32 tail;
int ret;
- qp->r_sge.sg_list = qp->r_sg_list;
-
if (qp->ibqp.srq) {
srq = to_isrq(qp->ibqp.srq);
handler = srq->ibsrq.event_handler;
@@ -206,8 +205,10 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
wqe = get_rwqe_ptr(rq, tail);
if (++tail >= rq->size)
tail = 0;
- } while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len,
- &qp->r_sge));
+ if (wr_id_only)
+ break;
+ qp->r_sge.sg_list = qp->r_sg_list;
+ } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge));
qp->r_wr_id = wqe->wr_id;
wq->tail = tail;
@@ -618,7 +619,8 @@ void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+ qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid |
+ qp->remote_ah_attr.src_path_bits);
bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
bth0 |= extra_bytes << 20;
ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220_img.c b/drivers/infiniband/hw/ipath/ipath_sd7220_img.c
deleted file mode 100644
index 5ef59da9270..00000000000
--- a/drivers/infiniband/hw/ipath/ipath_sd7220_img.c
+++ /dev/null
@@ -1,1082 +0,0 @@
-/*
- * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This file contains the memory image from the vendor, to be copied into
- * the IB SERDES of the IBA7220 during initialization.
- * The file also includes the two functions which use this image.
- */
-#include <linux/pci.h>
-#include <linux/delay.h>
-
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-#include "ipath_7220.h"
-
-static unsigned char ipath_sd7220_ib_img[] = {
-/*0000*/0x02, 0x0A, 0x29, 0x02, 0x0A, 0x87, 0xE5, 0xE6,
- 0x30, 0xE6, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
-/*0010*/0x00, 0xE5, 0xE2, 0x30, 0xE4, 0x04, 0x7E, 0x01,
- 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x08,
-/*0020*/0x53, 0xF9, 0xF7, 0xE4, 0xF5, 0xFE, 0x80, 0x08,
- 0x7F, 0x0A, 0x12, 0x17, 0x31, 0x12, 0x0E, 0xA2,
-/*0030*/0x75, 0xFC, 0x08, 0xE4, 0xF5, 0xFD, 0xE5, 0xE7,
- 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0x22, 0x00,
-/*0040*/0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x75,
- 0x51, 0x01, 0xE4, 0xF5, 0x52, 0xF5, 0x53, 0xF5,
-/*0050*/0x52, 0xF5, 0x7E, 0x7F, 0x04, 0x02, 0x04, 0x38,
- 0xC2, 0x36, 0x05, 0x52, 0xE5, 0x52, 0xD3, 0x94,
-/*0060*/0x0C, 0x40, 0x05, 0x75, 0x52, 0x01, 0xD2, 0x36,
- 0x90, 0x07, 0x0C, 0x74, 0x07, 0xF0, 0xA3, 0x74,
-/*0070*/0xFF, 0xF0, 0xE4, 0xF5, 0x0C, 0xA3, 0xF0, 0x90,
- 0x07, 0x14, 0xF0, 0xA3, 0xF0, 0x75, 0x0B, 0x20,
-/*0080*/0xF5, 0x09, 0xE4, 0xF5, 0x08, 0xE5, 0x08, 0xD3,
- 0x94, 0x30, 0x40, 0x03, 0x02, 0x04, 0x04, 0x12,
-/*0090*/0x00, 0x06, 0x15, 0x0B, 0xE5, 0x08, 0x70, 0x04,
- 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x09,
-/*00A0*/0x70, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00,
- 0xEE, 0x5F, 0x60, 0x05, 0x12, 0x18, 0x71, 0xD2,
-/*00B0*/0x35, 0x53, 0xE1, 0xF7, 0xE5, 0x08, 0x45, 0x09,
- 0xFF, 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24,
-/*00C0*/0x83, 0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83,
- 0xEF, 0xF0, 0x85, 0xE2, 0x20, 0xE5, 0x52, 0xD3,
-/*00D0*/0x94, 0x01, 0x40, 0x0D, 0x12, 0x19, 0xF3, 0xE0,
- 0x54, 0xA0, 0x64, 0x40, 0x70, 0x03, 0x02, 0x03,
-/*00E0*/0xFB, 0x53, 0xF9, 0xF8, 0x90, 0x94, 0x70, 0xE4,
- 0xF0, 0xE0, 0xF5, 0x10, 0xAF, 0x09, 0x12, 0x1E,
-/*00F0*/0xB3, 0xAF, 0x08, 0xEF, 0x44, 0x08, 0xF5, 0x82,
- 0x75, 0x83, 0x80, 0xE0, 0xF5, 0x29, 0xEF, 0x44,
-/*0100*/0x07, 0x12, 0x1A, 0x3C, 0xF5, 0x22, 0x54, 0x40,
- 0xD3, 0x94, 0x00, 0x40, 0x1E, 0xE5, 0x29, 0x54,
-/*0110*/0xF0, 0x70, 0x21, 0x12, 0x19, 0xF3, 0xE0, 0x44,
- 0x80, 0xF0, 0xE5, 0x22, 0x54, 0x30, 0x65, 0x08,
-/*0120*/0x70, 0x09, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xBF,
- 0xF0, 0x80, 0x09, 0x12, 0x19, 0xF3, 0x74, 0x40,
-/*0130*/0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12, 0x75,
- 0x83, 0xAE, 0x74, 0xFF, 0xF0, 0xAF, 0x08, 0x7E,
-/*0140*/0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0xE0, 0xFD,
- 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24, 0x81,
-/*0150*/0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83, 0xED,
- 0xF0, 0x90, 0x07, 0x0E, 0xE0, 0x04, 0xF0, 0xEF,
-/*0160*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0x98, 0xE0,
- 0xF5, 0x28, 0x12, 0x1A, 0x23, 0x40, 0x0C, 0x12,
-/*0170*/0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12, 0x1A, 0x32,
- 0x02, 0x03, 0xF6, 0xAF, 0x08, 0x7E, 0x00, 0x74,
-/*0180*/0x80, 0xCD, 0xEF, 0xCD, 0x8D, 0x82, 0xF5, 0x83,
- 0xE0, 0x30, 0xE0, 0x0A, 0x12, 0x19, 0xF3, 0xE0,
-/*0190*/0x44, 0x20, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x19,
- 0xF3, 0xE0, 0x54, 0xDF, 0xF0, 0xEE, 0x44, 0xAE,
-/*01A0*/0x12, 0x1A, 0x43, 0x30, 0xE4, 0x03, 0x02, 0x03,
- 0xFB, 0x74, 0x9E, 0x12, 0x1A, 0x05, 0x20, 0xE0,
-/*01B0*/0x03, 0x02, 0x03, 0xFB, 0x8F, 0x82, 0x8E, 0x83,
- 0xE0, 0x20, 0xE0, 0x03, 0x02, 0x03, 0xFB, 0x12,
-/*01C0*/0x19, 0xF3, 0xE0, 0x44, 0x10, 0xF0, 0xE5, 0xE3,
- 0x20, 0xE7, 0x08, 0xE5, 0x08, 0x12, 0x1A, 0x3A,
-/*01D0*/0x44, 0x04, 0xF0, 0xAF, 0x08, 0x7E, 0x00, 0xEF,
- 0x12, 0x1A, 0x3A, 0x20, 0xE2, 0x34, 0x12, 0x19,
-/*01E0*/0xF3, 0xE0, 0x44, 0x08, 0xF0, 0xE5, 0xE4, 0x30,
- 0xE6, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
-/*01F0*/0xE5, 0x7E, 0xC3, 0x94, 0x04, 0x50, 0x04, 0x7C,
- 0x01, 0x80, 0x02, 0x7C, 0x00, 0xEC, 0x4D, 0x60,
-/*0200*/0x05, 0xC2, 0x35, 0x02, 0x03, 0xFB, 0xEE, 0x44,
- 0xD2, 0x12, 0x1A, 0x43, 0x44, 0x40, 0xF0, 0x02,
-/*0210*/0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xF7,
- 0xF0, 0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0,
-/*0220*/0x54, 0xBF, 0xF0, 0x90, 0x07, 0x14, 0xE0, 0x04,
- 0xF0, 0xE5, 0x7E, 0x70, 0x03, 0x75, 0x7E, 0x01,
-/*0230*/0xAF, 0x08, 0x7E, 0x00, 0x12, 0x1A, 0x23, 0x40,
- 0x12, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12,
-/*0240*/0x19, 0xF2, 0xE0, 0x54, 0x02, 0x12, 0x1A, 0x32,
- 0x02, 0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x44,
-/*0250*/0x02, 0x12, 0x19, 0xF2, 0xE0, 0x54, 0xFE, 0xF0,
- 0xC2, 0x35, 0xEE, 0x44, 0x8A, 0x8F, 0x82, 0xF5,
-/*0260*/0x83, 0xE0, 0xF5, 0x17, 0x54, 0x8F, 0x44, 0x40,
- 0xF0, 0x74, 0x90, 0xFC, 0xE5, 0x08, 0x44, 0x07,
-/*0270*/0xFD, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0x54, 0x3F,
- 0x90, 0x07, 0x02, 0xF0, 0xE0, 0x54, 0xC0, 0x8D,
-/*0280*/0x82, 0x8C, 0x83, 0xF0, 0x74, 0x92, 0x12, 0x1A,
- 0x05, 0x90, 0x07, 0x03, 0x12, 0x1A, 0x19, 0x74,
-/*0290*/0x82, 0x12, 0x1A, 0x05, 0x90, 0x07, 0x04, 0x12,
- 0x1A, 0x19, 0x74, 0xB4, 0x12, 0x1A, 0x05, 0x90,
-/*02A0*/0x07, 0x05, 0x12, 0x1A, 0x19, 0x74, 0x94, 0xFE,
- 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A, 0xF5,
-/*02B0*/0x10, 0x30, 0xE0, 0x04, 0xD2, 0x37, 0x80, 0x02,
- 0xC2, 0x37, 0xE5, 0x10, 0x54, 0x7F, 0x8F, 0x82,
-/*02C0*/0x8E, 0x83, 0xF0, 0x30, 0x44, 0x30, 0x12, 0x1A,
- 0x03, 0x54, 0x80, 0xD3, 0x94, 0x00, 0x40, 0x04,
-/*02D0*/0xD2, 0x39, 0x80, 0x02, 0xC2, 0x39, 0x8F, 0x82,
- 0x8E, 0x83, 0xE0, 0x44, 0x80, 0xF0, 0x12, 0x1A,
-/*02E0*/0x03, 0x54, 0x40, 0xD3, 0x94, 0x00, 0x40, 0x04,
- 0xD2, 0x3A, 0x80, 0x02, 0xC2, 0x3A, 0x8F, 0x82,
-/*02F0*/0x8E, 0x83, 0xE0, 0x44, 0x40, 0xF0, 0x74, 0x92,
- 0xFE, 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A,
-/*0300*/0x30, 0xE7, 0x04, 0xD2, 0x38, 0x80, 0x02, 0xC2,
- 0x38, 0x8F, 0x82, 0x8E, 0x83, 0xE0, 0x54, 0x7F,
-/*0310*/0xF0, 0x12, 0x1E, 0x46, 0xE4, 0xF5, 0x0A, 0x20,
- 0x03, 0x02, 0x80, 0x03, 0x30, 0x43, 0x03, 0x12,
-/*0320*/0x19, 0x95, 0x20, 0x02, 0x02, 0x80, 0x03, 0x30,
- 0x42, 0x03, 0x12, 0x0C, 0x8F, 0x30, 0x30, 0x06,
-/*0330*/0x12, 0x19, 0x95, 0x12, 0x0C, 0x8F, 0x12, 0x0D,
- 0x47, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xFB, 0xF0,
-/*0340*/0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, 0x46, 0x43,
- 0xE1, 0x08, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x04,
-/*0350*/0xF0, 0xE5, 0xE4, 0x20, 0xE7, 0x2A, 0x12, 0x1A,
- 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
-/*0360*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
- 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
-/*0370*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
- 0x5E, 0x60, 0x05, 0x12, 0x1D, 0xD7, 0x80, 0x17,
-/*0380*/0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x44,
- 0x08, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12,
-/*0390*/0x75, 0x83, 0xD2, 0xE0, 0x54, 0xF7, 0xF0, 0x12,
- 0x1E, 0x46, 0x7F, 0x08, 0x12, 0x17, 0x31, 0x74,
-/*03A0*/0x8E, 0xFE, 0x12, 0x1A, 0x12, 0x8E, 0x83, 0xE0,
- 0xF5, 0x10, 0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44,
-/*03B0*/0x01, 0xFF, 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07,
- 0xF5, 0x82, 0xEF, 0xF0, 0xE5, 0x10, 0x54, 0xFE,
-/*03C0*/0xFF, 0xED, 0x44, 0x07, 0xF5, 0x82, 0xEF, 0x12,
- 0x1A, 0x11, 0x75, 0x83, 0x86, 0xE0, 0x44, 0x10,
-/*03D0*/0x12, 0x1A, 0x11, 0xE0, 0x44, 0x10, 0xF0, 0x12,
- 0x19, 0xF3, 0xE0, 0x54, 0xFD, 0x44, 0x01, 0xFF,
-/*03E0*/0x12, 0x19, 0xF3, 0xEF, 0x12, 0x1A, 0x32, 0x30,
- 0x32, 0x0C, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
-/*03F0*/0x75, 0x83, 0x82, 0x74, 0x05, 0xF0, 0xAF, 0x0B,
- 0x12, 0x18, 0xD7, 0x74, 0x10, 0x25, 0x08, 0xF5,
-/*0400*/0x08, 0x02, 0x00, 0x85, 0x05, 0x09, 0xE5, 0x09,
- 0xD3, 0x94, 0x07, 0x50, 0x03, 0x02, 0x00, 0x82,
-/*0410*/0xE5, 0x7E, 0xD3, 0x94, 0x00, 0x40, 0x04, 0x7F,
- 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x7E, 0xC3,
-/*0420*/0x94, 0xFA, 0x50, 0x04, 0x7E, 0x01, 0x80, 0x02,
- 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x02, 0x05, 0x7E,
-/*0430*/0x30, 0x35, 0x0B, 0x43, 0xE1, 0x01, 0x7F, 0x09,
- 0x12, 0x17, 0x31, 0x02, 0x00, 0x58, 0x53, 0xE1,
-/*0440*/0xFE, 0x02, 0x00, 0x58, 0x8E, 0x6A, 0x8F, 0x6B,
- 0x8C, 0x6C, 0x8D, 0x6D, 0x75, 0x6E, 0x01, 0x75,
-/*0450*/0x6F, 0x01, 0x75, 0x70, 0x01, 0xE4, 0xF5, 0x73,
- 0xF5, 0x74, 0xF5, 0x75, 0x90, 0x07, 0x2F, 0xF0,
-/*0460*/0xF5, 0x3C, 0xF5, 0x3E, 0xF5, 0x46, 0xF5, 0x47,
- 0xF5, 0x3D, 0xF5, 0x3F, 0xF5, 0x6F, 0xE5, 0x6F,
-/*0470*/0x70, 0x0F, 0xE5, 0x6B, 0x45, 0x6A, 0x12, 0x07,
- 0x2A, 0x75, 0x83, 0x80, 0x74, 0x3A, 0xF0, 0x80,
-/*0480*/0x09, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74,
- 0x1A, 0xF0, 0xE4, 0xF5, 0x6E, 0xC3, 0x74, 0x3F,
-/*0490*/0x95, 0x6E, 0xFF, 0x12, 0x08, 0x65, 0x75, 0x83,
- 0x82, 0xEF, 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x08,
-/*04A0*/0xC6, 0xE5, 0x33, 0xF0, 0x12, 0x08, 0xFA, 0x12,
- 0x08, 0xB1, 0x40, 0xE1, 0xE5, 0x6F, 0x70, 0x0B,
-/*04B0*/0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74, 0x36,
- 0xF0, 0x80, 0x09, 0x12, 0x07, 0x2A, 0x75, 0x83,
-/*04C0*/0x80, 0x74, 0x16, 0xF0, 0x75, 0x6E, 0x01, 0x12,
- 0x07, 0x2A, 0x75, 0x83, 0xB4, 0xE5, 0x6E, 0xF0,
-/*04D0*/0x12, 0x1A, 0x4D, 0x74, 0x3F, 0x25, 0x6E, 0xF5,
- 0x82, 0xE4, 0x34, 0x00, 0xF5, 0x83, 0xE5, 0x33,
-/*04E0*/0xF0, 0x74, 0xBF, 0x25, 0x6E, 0xF5, 0x82, 0xE4,
- 0x34, 0x00, 0x12, 0x08, 0xB1, 0x40, 0xD8, 0xE4,
-/*04F0*/0xF5, 0x70, 0xF5, 0x46, 0xF5, 0x47, 0xF5, 0x6E,
- 0x12, 0x08, 0xFA, 0xF5, 0x83, 0xE0, 0xFE, 0x12,
-/*0500*/0x08, 0xC6, 0xE0, 0x7C, 0x00, 0x24, 0x00, 0xFF,
- 0xEC, 0x3E, 0xFE, 0xAD, 0x3B, 0xD3, 0xEF, 0x9D,
-/*0510*/0xEE, 0x9C, 0x50, 0x04, 0x7B, 0x01, 0x80, 0x02,
- 0x7B, 0x00, 0xE5, 0x70, 0x70, 0x04, 0x7A, 0x01,
-/*0520*/0x80, 0x02, 0x7A, 0x00, 0xEB, 0x5A, 0x60, 0x06,
- 0x85, 0x6E, 0x46, 0x75, 0x70, 0x01, 0xD3, 0xEF,
-/*0530*/0x9D, 0xEE, 0x9C, 0x50, 0x04, 0x7F, 0x01, 0x80,
- 0x02, 0x7F, 0x00, 0xE5, 0x70, 0xB4, 0x01, 0x04,
-/*0540*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, 0x5E,
- 0x60, 0x03, 0x85, 0x6E, 0x47, 0x05, 0x6E, 0xE5,
-/*0550*/0x6E, 0x64, 0x7F, 0x70, 0xA3, 0xE5, 0x46, 0x60,
- 0x05, 0xE5, 0x47, 0xB4, 0x7E, 0x03, 0x85, 0x46,
-/*0560*/0x47, 0xE5, 0x6F, 0x70, 0x08, 0x85, 0x46, 0x76,
- 0x85, 0x47, 0x77, 0x80, 0x0E, 0xC3, 0x74, 0x7F,
-/*0570*/0x95, 0x46, 0xF5, 0x78, 0xC3, 0x74, 0x7F, 0x95,
- 0x47, 0xF5, 0x79, 0xE5, 0x6F, 0x70, 0x37, 0xE5,
-/*0580*/0x46, 0x65, 0x47, 0x70, 0x0C, 0x75, 0x73, 0x01,
- 0x75, 0x74, 0x01, 0xF5, 0x3C, 0xF5, 0x3D, 0x80,
-/*0590*/0x35, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5, 0x47, 0x95,
- 0x46, 0xF5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
-/*05A0*/0x46, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
- 0xE4, 0xF5, 0x3D, 0x80, 0x40, 0xC3, 0x74, 0x3F,
-/*05B0*/0x95, 0x72, 0xF5, 0x3D, 0x80, 0x37, 0xE5, 0x46,
- 0x65, 0x47, 0x70, 0x0F, 0x75, 0x73, 0x01, 0x75,
-/*05C0*/0x75, 0x01, 0xF5, 0x3E, 0xF5, 0x3F, 0x75, 0x4E,
- 0x01, 0x80, 0x22, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5,
-/*05D0*/0x47, 0x95, 0x46, 0xF5, 0x3E, 0xC3, 0x13, 0xF5,
- 0x71, 0x25, 0x46, 0xF5, 0x72, 0xD3, 0x94, 0x3F,
-/*05E0*/0x50, 0x05, 0xE4, 0xF5, 0x3F, 0x80, 0x06, 0xE5,
- 0x72, 0x24, 0xC1, 0xF5, 0x3F, 0x05, 0x6F, 0xE5,
-/*05F0*/0x6F, 0xC3, 0x94, 0x02, 0x50, 0x03, 0x02, 0x04,
- 0x6E, 0xE5, 0x6D, 0x45, 0x6C, 0x70, 0x02, 0x80,
-/*0600*/0x04, 0xE5, 0x74, 0x45, 0x75, 0x90, 0x07, 0x2F,
- 0xF0, 0x7F, 0x01, 0xE5, 0x3E, 0x60, 0x04, 0xE5,
-/*0610*/0x3C, 0x70, 0x14, 0xE4, 0xF5, 0x3C, 0xF5, 0x3D,
- 0xF5, 0x3E, 0xF5, 0x3F, 0x12, 0x08, 0xD2, 0x70,
-/*0620*/0x04, 0xF0, 0x02, 0x06, 0xA4, 0x80, 0x7A, 0xE5,
- 0x3C, 0xC3, 0x95, 0x3E, 0x40, 0x07, 0xE5, 0x3C,
-/*0630*/0x95, 0x3E, 0xFF, 0x80, 0x06, 0xC3, 0xE5, 0x3E,
- 0x95, 0x3C, 0xFF, 0xE5, 0x76, 0xD3, 0x95, 0x79,
-/*0640*/0x40, 0x05, 0x85, 0x76, 0x7A, 0x80, 0x03, 0x85,
- 0x79, 0x7A, 0xE5, 0x77, 0xC3, 0x95, 0x78, 0x50,
-/*0650*/0x05, 0x85, 0x77, 0x7B, 0x80, 0x03, 0x85, 0x78,
- 0x7B, 0xE5, 0x7B, 0xD3, 0x95, 0x7A, 0x40, 0x30,
-/*0660*/0xE5, 0x7B, 0x95, 0x7A, 0xF5, 0x3C, 0xF5, 0x3E,
- 0xC3, 0xE5, 0x7B, 0x95, 0x7A, 0x90, 0x07, 0x19,
-/*0670*/0xF0, 0xE5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
- 0x7A, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
-/*0680*/0xE4, 0xF5, 0x3D, 0x80, 0x1F, 0xC3, 0x74, 0x3F,
- 0x95, 0x72, 0xF5, 0x3D, 0xF5, 0x3F, 0x80, 0x14,
-/*0690*/0xE4, 0xF5, 0x3C, 0xF5, 0x3E, 0x90, 0x07, 0x19,
- 0xF0, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
-/*06A0*/0x03, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x65, 0x75,
- 0x83, 0xD0, 0xE0, 0x54, 0x0F, 0xFE, 0xAD, 0x3C,
-/*06B0*/0x70, 0x02, 0x7E, 0x07, 0xBE, 0x0F, 0x02, 0x7E,
- 0x80, 0xEE, 0xFB, 0xEF, 0xD3, 0x9B, 0x74, 0x80,
-/*06C0*/0xF8, 0x98, 0x40, 0x1F, 0xE4, 0xF5, 0x3C, 0xF5,
- 0x3E, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
-/*06D0*/0x12, 0x74, 0x01, 0xF0, 0xE5, 0x08, 0xFB, 0xEB,
- 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xD2, 0xE0,
-/*06E0*/0x44, 0x10, 0xF0, 0xE5, 0x08, 0xFB, 0xEB, 0x44,
- 0x09, 0xF5, 0x82, 0x75, 0x83, 0x9E, 0xED, 0xF0,
-/*06F0*/0xEB, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xCA,
- 0xED, 0xF0, 0x12, 0x08, 0x65, 0x75, 0x83, 0xCC,
-/*0700*/0xEF, 0xF0, 0x22, 0xE5, 0x08, 0x44, 0x07, 0xF5,
- 0x82, 0x75, 0x83, 0xBC, 0xE0, 0x54, 0xF0, 0xF0,
-/*0710*/0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83,
- 0xBE, 0xE0, 0x54, 0xF0, 0xF0, 0xE5, 0x08, 0x44,
-/*0720*/0x07, 0xF5, 0x82, 0x75, 0x83, 0xC0, 0xE0, 0x54,
- 0xF0, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
-/*0730*/0x22, 0xF0, 0x90, 0x07, 0x28, 0xE0, 0xFE, 0xA3,
- 0xE0, 0xF5, 0x82, 0x8E, 0x83, 0x22, 0x85, 0x42,
-/*0740*/0x42, 0x85, 0x41, 0x41, 0x85, 0x40, 0x40, 0x74,
- 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E, 0xF5,
-/*0750*/0x83, 0xE5, 0x42, 0xF0, 0x74, 0xE0, 0x2F, 0xF5,
- 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xE5,
-/*0760*/0x42, 0x29, 0xFD, 0xE4, 0x33, 0xFC, 0xE5, 0x3C,
- 0xC3, 0x9D, 0xEC, 0x64, 0x80, 0xF8, 0x74, 0x80,
-/*0770*/0x98, 0x22, 0xF5, 0x83, 0xE0, 0x90, 0x07, 0x22,
- 0x54, 0x1F, 0xFD, 0xE0, 0xFA, 0xA3, 0xE0, 0xF5,
-/*0780*/0x82, 0x8A, 0x83, 0xED, 0xF0, 0x22, 0x90, 0x07,
- 0x22, 0xE0, 0xFC, 0xA3, 0xE0, 0xF5, 0x82, 0x8C,
-/*0790*/0x83, 0x22, 0x90, 0x07, 0x24, 0xFF, 0xED, 0x44,
- 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22, 0x85,
-/*07A0*/0x38, 0x38, 0x85, 0x39, 0x39, 0x85, 0x3A, 0x3A,
- 0x74, 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E,
-/*07B0*/0xF5, 0x83, 0x22, 0x90, 0x07, 0x26, 0xFF, 0xED,
- 0x44, 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22,
-/*07C0*/0xF0, 0x74, 0xA0, 0x2F, 0xF5, 0x82, 0x74, 0x02,
- 0x3E, 0xF5, 0x83, 0x22, 0x74, 0xC0, 0x25, 0x11,
-/*07D0*/0xF5, 0x82, 0xE4, 0x34, 0x01, 0xF5, 0x83, 0x22,
- 0x74, 0x00, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
-/*07E0*/0x02, 0xF5, 0x83, 0x22, 0x74, 0x60, 0x25, 0x11,
- 0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
-/*07F0*/0x74, 0x80, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
- 0x03, 0xF5, 0x83, 0x22, 0x74, 0xE0, 0x25, 0x11,
-/*0800*/0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
- 0x74, 0x40, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
-/*0810*/0x06, 0xF5, 0x83, 0x22, 0x74, 0x80, 0x2F, 0xF5,
- 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xAF,
-/*0820*/0x08, 0x7E, 0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82,
- 0x22, 0xF5, 0x83, 0xE5, 0x82, 0x44, 0x07, 0xF5,
-/*0830*/0x82, 0xE5, 0x40, 0xF0, 0x22, 0x74, 0x40, 0x25,
- 0x11, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
-/*0840*/0x22, 0x74, 0xC0, 0x25, 0x11, 0xF5, 0x82, 0xE4,
- 0x34, 0x03, 0xF5, 0x83, 0x22, 0x74, 0x00, 0x25,
-/*0850*/0x11, 0xF5, 0x82, 0xE4, 0x34, 0x06, 0xF5, 0x83,
- 0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
-/*0860*/0x34, 0x06, 0xF5, 0x83, 0x22, 0xE5, 0x08, 0xFD,
- 0xED, 0x44, 0x07, 0xF5, 0x82, 0x22, 0xE5, 0x41,
-/*0870*/0xF0, 0xE5, 0x65, 0x64, 0x01, 0x45, 0x64, 0x22,
- 0x7E, 0x00, 0xFB, 0x7A, 0x00, 0xFD, 0x7C, 0x00,
-/*0880*/0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
- 0x34, 0x02, 0x22, 0x74, 0xA0, 0x25, 0x11, 0xF5,
-/*0890*/0x82, 0xE4, 0x34, 0x03, 0x22, 0x85, 0x3E, 0x42,
- 0x85, 0x3F, 0x41, 0x8F, 0x40, 0x22, 0x85, 0x3C,
-/*08A0*/0x42, 0x85, 0x3D, 0x41, 0x8F, 0x40, 0x22, 0x75,
- 0x45, 0x3F, 0x90, 0x07, 0x20, 0xE4, 0xF0, 0xA3,
-/*08B0*/0x22, 0xF5, 0x83, 0xE5, 0x32, 0xF0, 0x05, 0x6E,
- 0xE5, 0x6E, 0xC3, 0x94, 0x40, 0x22, 0xF0, 0xE5,
-/*08C0*/0x08, 0x44, 0x06, 0xF5, 0x82, 0x22, 0x74, 0x00,
- 0x25, 0x6E, 0xF5, 0x82, 0xE4, 0x34, 0x00, 0xF5,
-/*08D0*/0x83, 0x22, 0xE5, 0x6D, 0x45, 0x6C, 0x90, 0x07,
- 0x2F, 0x22, 0xE4, 0xF9, 0xE5, 0x3C, 0xD3, 0x95,
-/*08E0*/0x3E, 0x22, 0x74, 0x80, 0x2E, 0xF5, 0x82, 0xE4,
- 0x34, 0x02, 0xF5, 0x83, 0xE0, 0x22, 0x74, 0xA0,
-/*08F0*/0x2E, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
- 0xE0, 0x22, 0x74, 0x80, 0x25, 0x6E, 0xF5, 0x82,
-/*0900*/0xE4, 0x34, 0x00, 0x22, 0x25, 0x42, 0xFD, 0xE4,
- 0x33, 0xFC, 0x22, 0x85, 0x42, 0x42, 0x85, 0x41,
-/*0910*/0x41, 0x85, 0x40, 0x40, 0x22, 0xED, 0x4C, 0x60,
- 0x03, 0x02, 0x09, 0xE5, 0xEF, 0x4E, 0x70, 0x37,
-/*0920*/0x90, 0x07, 0x26, 0x12, 0x07, 0x89, 0xE0, 0xFD,
- 0x12, 0x07, 0xCC, 0xED, 0xF0, 0x90, 0x07, 0x28,
-/*0930*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xD8,
- 0xED, 0xF0, 0x12, 0x07, 0x86, 0xE0, 0x54, 0x1F,
-/*0940*/0xFD, 0x12, 0x08, 0x81, 0xF5, 0x83, 0xED, 0xF0,
- 0x90, 0x07, 0x24, 0x12, 0x07, 0x89, 0xE0, 0x54,
-/*0950*/0x1F, 0xFD, 0x12, 0x08, 0x35, 0xED, 0xF0, 0xEF,
- 0x64, 0x04, 0x4E, 0x70, 0x37, 0x90, 0x07, 0x26,
-/*0960*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xE4,
- 0xED, 0xF0, 0x90, 0x07, 0x28, 0x12, 0x07, 0x89,
-/*0970*/0xE0, 0xFD, 0x12, 0x07, 0xF0, 0xED, 0xF0, 0x12,
- 0x07, 0x86, 0xE0, 0x54, 0x1F, 0xFD, 0x12, 0x08,
-/*0980*/0x8B, 0xF5, 0x83, 0xED, 0xF0, 0x90, 0x07, 0x24,
- 0x12, 0x07, 0x89, 0xE0, 0x54, 0x1F, 0xFD, 0x12,
-/*0990*/0x08, 0x41, 0xED, 0xF0, 0xEF, 0x64, 0x01, 0x4E,
- 0x70, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
-/*09A0*/0xEF, 0x64, 0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01,
- 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x78,
-/*09B0*/0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE0, 0xFF,
- 0x12, 0x07, 0xFC, 0xEF, 0x12, 0x07, 0x31, 0xE0,
-/*09C0*/0xFF, 0x12, 0x08, 0x08, 0xEF, 0xF0, 0x90, 0x07,
- 0x22, 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF,
-/*09D0*/0x12, 0x08, 0x4D, 0xEF, 0xF0, 0x90, 0x07, 0x24,
- 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
-/*09E0*/0x08, 0x59, 0xEF, 0xF0, 0x22, 0x12, 0x07, 0xCC,
- 0xE4, 0xF0, 0x12, 0x07, 0xD8, 0xE4, 0xF0, 0x12,
-/*09F0*/0x08, 0x81, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08,
- 0x35, 0x74, 0x14, 0xF0, 0x12, 0x07, 0xE4, 0xE4,
-/*0A00*/0xF0, 0x12, 0x07, 0xF0, 0xE4, 0xF0, 0x12, 0x08,
- 0x8B, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08, 0x41,
-/*0A10*/0x74, 0x14, 0xF0, 0x12, 0x07, 0xFC, 0xE4, 0xF0,
- 0x12, 0x08, 0x08, 0xE4, 0xF0, 0x12, 0x08, 0x4D,
-/*0A20*/0xE4, 0xF0, 0x12, 0x08, 0x59, 0x74, 0x14, 0xF0,
- 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFC, 0x10, 0xE4,
-/*0A30*/0xF5, 0xFD, 0x75, 0xFE, 0x30, 0xF5, 0xFF, 0xE5,
- 0xE7, 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0xE5,
-/*0A40*/0xE6, 0x20, 0xE7, 0x0B, 0x78, 0xFF, 0xE4, 0xF6,
- 0xD8, 0xFD, 0x53, 0xE6, 0xFE, 0x80, 0x09, 0x78,
-/*0A50*/0x08, 0xE4, 0xF6, 0xD8, 0xFD, 0x53, 0xE6, 0xFE,
- 0x75, 0x81, 0x80, 0xE4, 0xF5, 0xA8, 0xD2, 0xA8,
-/*0A60*/0xC2, 0xA9, 0xD2, 0xAF, 0xE5, 0xE2, 0x20, 0xE5,
- 0x05, 0x20, 0xE6, 0x02, 0x80, 0x03, 0x43, 0xE1,
-/*0A70*/0x02, 0xE5, 0xE2, 0x20, 0xE0, 0x0E, 0x90, 0x00,
- 0x00, 0x7F, 0x00, 0x7E, 0x08, 0xE4, 0xF0, 0xA3,
-/*0A80*/0xDF, 0xFC, 0xDE, 0xFA, 0x02, 0x0A, 0xDB, 0x43,
- 0xFA, 0x01, 0xC0, 0xE0, 0xC0, 0xF0, 0xC0, 0x83,
-/*0A90*/0xC0, 0x82, 0xC0, 0xD0, 0x12, 0x1C, 0xE7, 0xD0,
- 0xD0, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0xF0, 0xD0,
-/*0AA0*/0xE0, 0x53, 0xFA, 0xFE, 0x32, 0x02, 0x1B, 0x55,
- 0xE4, 0x93, 0xA3, 0xF8, 0xE4, 0x93, 0xA3, 0xF6,
-/*0AB0*/0x08, 0xDF, 0xF9, 0x80, 0x29, 0xE4, 0x93, 0xA3,
- 0xF8, 0x54, 0x07, 0x24, 0x0C, 0xC8, 0xC3, 0x33,
-/*0AC0*/0xC4, 0x54, 0x0F, 0x44, 0x20, 0xC8, 0x83, 0x40,
- 0x04, 0xF4, 0x56, 0x80, 0x01, 0x46, 0xF6, 0xDF,
-/*0AD0*/0xE4, 0x80, 0x0B, 0x01, 0x02, 0x04, 0x08, 0x10,
- 0x20, 0x40, 0x80, 0x90, 0x00, 0x3F, 0xE4, 0x7E,
-/*0AE0*/0x01, 0x93, 0x60, 0xC1, 0xA3, 0xFF, 0x54, 0x3F,
- 0x30, 0xE5, 0x09, 0x54, 0x1F, 0xFE, 0xE4, 0x93,
-/*0AF0*/0xA3, 0x60, 0x01, 0x0E, 0xCF, 0x54, 0xC0, 0x25,
- 0xE0, 0x60, 0xAD, 0x40, 0xB8, 0x80, 0xFE, 0x8C,
-/*0B00*/0x64, 0x8D, 0x65, 0x8A, 0x66, 0x8B, 0x67, 0xE4,
- 0xF5, 0x69, 0xEF, 0x4E, 0x70, 0x03, 0x02, 0x1D,
-/*0B10*/0x55, 0xE4, 0xF5, 0x68, 0xE5, 0x67, 0x45, 0x66,
- 0x70, 0x32, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90,
-/*0B20*/0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE4,
- 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4, 0x12,
-/*0B30*/0x08, 0x70, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
- 0x83, 0x92, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83,
-/*0B40*/0xC6, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8,
- 0xE4, 0xF0, 0x80, 0x11, 0x90, 0x07, 0x26, 0x12,
-/*0B50*/0x07, 0x35, 0xE4, 0x12, 0x08, 0x70, 0x70, 0x05,
- 0x12, 0x07, 0x32, 0xE4, 0xF0, 0x12, 0x1D, 0x55,
-/*0B60*/0x12, 0x1E, 0xBF, 0xE5, 0x67, 0x45, 0x66, 0x70,
- 0x33, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90, 0xE5,
-/*0B70*/0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
- 0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x12,
-/*0B80*/0x08, 0x6E, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
- 0x83, 0x92, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
-/*0B90*/0x83, 0xC6, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
- 0x83, 0xC8, 0x80, 0x0E, 0x90, 0x07, 0x26, 0x12,
-/*0BA0*/0x07, 0x35, 0x12, 0x08, 0x6E, 0x70, 0x06, 0x12,
- 0x07, 0x32, 0xE5, 0x40, 0xF0, 0xAF, 0x69, 0x7E,
-/*0BB0*/0x00, 0xAD, 0x67, 0xAC, 0x66, 0x12, 0x04, 0x44,
- 0x12, 0x07, 0x2A, 0x75, 0x83, 0xCA, 0xE0, 0xD3,
-/*0BC0*/0x94, 0x00, 0x50, 0x0C, 0x05, 0x68, 0xE5, 0x68,
- 0xC3, 0x94, 0x05, 0x50, 0x03, 0x02, 0x0B, 0x14,
-/*0BD0*/0x22, 0x8C, 0x60, 0x8D, 0x61, 0x12, 0x08, 0xDA,
- 0x74, 0x20, 0x40, 0x0D, 0x2F, 0xF5, 0x82, 0x74,
-/*0BE0*/0x03, 0x3E, 0xF5, 0x83, 0xE5, 0x3E, 0xF0, 0x80,
- 0x0B, 0x2F, 0xF5, 0x82, 0x74, 0x03, 0x3E, 0xF5,
-/*0BF0*/0x83, 0xE5, 0x3C, 0xF0, 0xE5, 0x3C, 0xD3, 0x95,
- 0x3E, 0x40, 0x3C, 0xE5, 0x61, 0x45, 0x60, 0x70,
-/*0C00*/0x10, 0xE9, 0x12, 0x09, 0x04, 0xE5, 0x3E, 0x12,
- 0x07, 0x68, 0x40, 0x3B, 0x12, 0x08, 0x95, 0x80,
-/*0C10*/0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40, 0x1D,
- 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05, 0x85,
-/*0C20*/0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F,
- 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x3E, 0x12, 0x07,
-/*0C30*/0xC0, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x43, 0xE5,
- 0x61, 0x45, 0x60, 0x70, 0x19, 0x12, 0x07, 0x5F,
-/*0C40*/0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x27, 0x12,
- 0x09, 0x0B, 0x12, 0x08, 0x14, 0xE5, 0x42, 0x12,
-/*0C50*/0x07, 0xC0, 0xE5, 0x41, 0xF0, 0x22, 0xE5, 0x3C,
- 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C, 0x38,
-/*0C60*/0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39, 0x80,
- 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x08,
-/*0C70*/0x14, 0xE5, 0x3C, 0x12, 0x07, 0xC0, 0xE5, 0x3D,
- 0xF0, 0x22, 0x85, 0x38, 0x38, 0x85, 0x39, 0x39,
-/*0C80*/0x85, 0x3A, 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x38,
- 0x12, 0x07, 0xC0, 0xE5, 0x39, 0xF0, 0x22, 0x7F,
-/*0C90*/0x06, 0x12, 0x17, 0x31, 0x12, 0x1D, 0x23, 0x12,
- 0x0E, 0x04, 0x12, 0x0E, 0x33, 0xE0, 0x44, 0x0A,
-/*0CA0*/0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E, 0x04, 0x12,
- 0x0E, 0x0B, 0xEF, 0xF0, 0xE5, 0x28, 0x30, 0xE5,
-/*0CB0*/0x03, 0xD3, 0x80, 0x01, 0xC3, 0x40, 0x05, 0x75,
- 0x14, 0x20, 0x80, 0x03, 0x75, 0x14, 0x08, 0x12,
-/*0CC0*/0x0E, 0x04, 0x75, 0x83, 0x8A, 0xE5, 0x14, 0xF0,
- 0xB4, 0xFF, 0x05, 0x75, 0x12, 0x80, 0x80, 0x06,
-/*0CD0*/0xE5, 0x14, 0xC3, 0x13, 0xF5, 0x12, 0xE4, 0xF5,
- 0x16, 0xF5, 0x7F, 0x12, 0x19, 0x36, 0x12, 0x13,
-/*0CE0*/0xA3, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x50, 0x09,
- 0x05, 0x16, 0xE5, 0x16, 0xC3, 0x94, 0x14, 0x40,
-/*0CF0*/0xEA, 0xE5, 0xE4, 0x20, 0xE7, 0x28, 0x12, 0x0E,
- 0x04, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
-/*0D00*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
- 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
-/*0D10*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
- 0x5E, 0x60, 0x03, 0x12, 0x1D, 0xD7, 0xE5, 0x7F,
-/*0D20*/0xC3, 0x94, 0x11, 0x40, 0x14, 0x12, 0x0E, 0x04,
- 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x80, 0xF0, 0xE5,
-/*0D30*/0xE4, 0x20, 0xE7, 0x0F, 0x12, 0x1D, 0xD7, 0x80,
- 0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0xD2, 0xE0,
-/*0D40*/0x54, 0x7F, 0xF0, 0x12, 0x1D, 0x23, 0x22, 0x74,
- 0x8A, 0x85, 0x08, 0x82, 0xF5, 0x83, 0xE5, 0x17,
-/*0D50*/0xF0, 0x12, 0x0E, 0x3A, 0xE4, 0xF0, 0x90, 0x07,
- 0x02, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x90,
-/*0D60*/0xEF, 0xF0, 0x74, 0x92, 0xFE, 0xE5, 0x08, 0x44,
- 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0, 0x54,
-/*0D70*/0xC0, 0xFD, 0x90, 0x07, 0x03, 0xE0, 0x54, 0x3F,
- 0x4D, 0x8F, 0x82, 0x8E, 0x83, 0xF0, 0x90, 0x07,
-/*0D80*/0x04, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x82,
- 0xEF, 0xF0, 0x90, 0x07, 0x05, 0xE0, 0xFF, 0xED,
-/*0D90*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xB4, 0xEF,
- 0x12, 0x0E, 0x03, 0x75, 0x83, 0x80, 0xE0, 0x54,
-/*0DA0*/0xBF, 0xF0, 0x30, 0x37, 0x0A, 0x12, 0x0E, 0x91,
- 0x75, 0x83, 0x94, 0xE0, 0x44, 0x80, 0xF0, 0x30,
-/*0DB0*/0x38, 0x0A, 0x12, 0x0E, 0x91, 0x75, 0x83, 0x92,
- 0xE0, 0x44, 0x80, 0xF0, 0xE5, 0x28, 0x30, 0xE4,
-/*0DC0*/0x1A, 0x20, 0x39, 0x0A, 0x12, 0x0E, 0x04, 0x75,
- 0x83, 0x88, 0xE0, 0x54, 0x7F, 0xF0, 0x20, 0x3A,
-/*0DD0*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x88, 0xE0,
- 0x54, 0xBF, 0xF0, 0x74, 0x8C, 0xFE, 0x12, 0x0E,
-/*0DE0*/0x04, 0x8E, 0x83, 0xE0, 0x54, 0x0F, 0x12, 0x0E,
- 0x03, 0x75, 0x83, 0x86, 0xE0, 0x54, 0xBF, 0xF0,
-/*0DF0*/0xE5, 0x08, 0x44, 0x06, 0x12, 0x0D, 0xFD, 0x75,
- 0x83, 0x8A, 0xE4, 0xF0, 0x22, 0xF5, 0x82, 0x75,
-/*0E00*/0x83, 0x82, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07,
- 0xF5, 0x82, 0x22, 0x8E, 0x83, 0xE0, 0xF5, 0x10,
-/*0E10*/0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44, 0x01, 0xFF,
- 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07, 0xF5, 0x82,
-/*0E20*/0x22, 0xE5, 0x15, 0xC4, 0x54, 0x07, 0xFF, 0xE5,
- 0x08, 0xFD, 0xED, 0x44, 0x08, 0xF5, 0x82, 0x75,
-/*0E30*/0x83, 0x82, 0x22, 0x75, 0x83, 0x80, 0xE0, 0x44,
- 0x40, 0xF0, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
-/*0E40*/0x75, 0x83, 0x8A, 0x22, 0xE5, 0x16, 0x25, 0xE0,
- 0x25, 0xE0, 0x24, 0xAF, 0xF5, 0x82, 0xE4, 0x34,
-/*0E50*/0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0D, 0x22,
- 0x43, 0xE1, 0x10, 0x43, 0xE1, 0x80, 0x53, 0xE1,
-/*0E60*/0xFD, 0x85, 0xE1, 0x10, 0x22, 0xE5, 0x16, 0x25,
- 0xE0, 0x25, 0xE0, 0x24, 0xB2, 0xF5, 0x82, 0xE4,
-/*0E70*/0x34, 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0x22, 0x85,
- 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15, 0xF0,
-/*0E80*/0x22, 0xE5, 0xE2, 0x54, 0x20, 0xD3, 0x94, 0x00,
- 0x22, 0xE5, 0xE2, 0x54, 0x40, 0xD3, 0x94, 0x00,
-/*0E90*/0x22, 0xE5, 0x08, 0x44, 0x06, 0xF5, 0x82, 0x22,
- 0xFD, 0xE5, 0x08, 0xFB, 0xEB, 0x44, 0x07, 0xF5,
-/*0EA0*/0x82, 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFE, 0x30,
- 0x22, 0xEF, 0x4E, 0x70, 0x26, 0x12, 0x07, 0xCC,
-/*0EB0*/0xE0, 0xFD, 0x90, 0x07, 0x26, 0x12, 0x07, 0x7B,
- 0x12, 0x07, 0xD8, 0xE0, 0xFD, 0x90, 0x07, 0x28,
-/*0EC0*/0x12, 0x07, 0x7B, 0x12, 0x08, 0x81, 0x12, 0x07,
- 0x72, 0x12, 0x08, 0x35, 0xE0, 0x90, 0x07, 0x24,
-/*0ED0*/0x12, 0x07, 0x78, 0xEF, 0x64, 0x04, 0x4E, 0x70,
- 0x29, 0x12, 0x07, 0xE4, 0xE0, 0xFD, 0x90, 0x07,
-/*0EE0*/0x26, 0x12, 0x07, 0x7B, 0x12, 0x07, 0xF0, 0xE0,
- 0xFD, 0x90, 0x07, 0x28, 0x12, 0x07, 0x7B, 0x12,
-/*0EF0*/0x08, 0x8B, 0x12, 0x07, 0x72, 0x12, 0x08, 0x41,
- 0xE0, 0x54, 0x1F, 0xFD, 0x90, 0x07, 0x24, 0x12,
-/*0F00*/0x07, 0x7B, 0xEF, 0x64, 0x01, 0x4E, 0x70, 0x04,
- 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0xEF, 0x64,
-/*0F10*/0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02,
- 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x35, 0x12, 0x07,
-/*0F20*/0xFC, 0xE0, 0xFF, 0x90, 0x07, 0x26, 0x12, 0x07,
- 0x89, 0xEF, 0xF0, 0x12, 0x08, 0x08, 0xE0, 0xFF,
-/*0F30*/0x90, 0x07, 0x28, 0x12, 0x07, 0x89, 0xEF, 0xF0,
- 0x12, 0x08, 0x4D, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
-/*0F40*/0x07, 0x86, 0xEF, 0xF0, 0x12, 0x08, 0x59, 0xE0,
- 0x54, 0x1F, 0xFF, 0x90, 0x07, 0x24, 0x12, 0x07,
-/*0F50*/0x89, 0xEF, 0xF0, 0x22, 0xE4, 0xF5, 0x53, 0x12,
- 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
-/*0F60*/0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40, 0x04, 0x7E,
- 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x70,
-/*0F70*/0x03, 0x02, 0x0F, 0xF6, 0x85, 0xE1, 0x10, 0x43,
- 0xE1, 0x02, 0x53, 0xE1, 0x0F, 0x85, 0xE1, 0x10,
-/*0F80*/0xE4, 0xF5, 0x51, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
- 0x52, 0x12, 0x0E, 0x89, 0x40, 0x1D, 0xAD, 0x52,
-/*0F90*/0xAF, 0x51, 0x12, 0x11, 0x18, 0xEF, 0x60, 0x08,
- 0x85, 0xE1, 0x10, 0x43, 0xE1, 0x40, 0x80, 0x0B,
-/*0FA0*/0x53, 0xE1, 0xBF, 0x12, 0x0E, 0x58, 0x12, 0x00,
- 0x06, 0x80, 0xFB, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
-/*0FB0*/0x51, 0xE5, 0xE4, 0x54, 0x3F, 0xF5, 0x52, 0x12,
- 0x0E, 0x81, 0x40, 0x1D, 0xAD, 0x52, 0xAF, 0x51,
-/*0FC0*/0x12, 0x11, 0x18, 0xEF, 0x60, 0x08, 0x85, 0xE1,
- 0x10, 0x43, 0xE1, 0x20, 0x80, 0x0B, 0x53, 0xE1,
-/*0FD0*/0xDF, 0x12, 0x0E, 0x58, 0x12, 0x00, 0x06, 0x80,
- 0xFB, 0x12, 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01,
-/*0FE0*/0x80, 0x02, 0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40,
- 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
-/*0FF0*/0x4F, 0x60, 0x03, 0x12, 0x0E, 0x5B, 0x22, 0x12,
- 0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x22,
-/*1000*/0x02, 0x11, 0x00, 0x02, 0x10, 0x40, 0x02, 0x10,
- 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1010*/0x01, 0x20, 0x01, 0x20, 0xE4, 0xF5, 0x57, 0x12,
- 0x16, 0xBD, 0x12, 0x16, 0x44, 0xE4, 0x12, 0x10,
-/*1020*/0x56, 0x12, 0x14, 0xB7, 0x90, 0x07, 0x26, 0x12,
- 0x07, 0x35, 0xE4, 0x12, 0x07, 0x31, 0xE4, 0xF0,
-/*1030*/0x12, 0x10, 0x56, 0x12, 0x14, 0xB7, 0x90, 0x07,
- 0x26, 0x12, 0x07, 0x35, 0xE5, 0x41, 0x12, 0x07,
-/*1040*/0x31, 0xE5, 0x40, 0xF0, 0xAF, 0x57, 0x7E, 0x00,
- 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
-/*1050*/0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xFF, 0x90,
- 0x07, 0x20, 0xA3, 0xE0, 0xFD, 0xE4, 0xF5, 0x56,
-/*1060*/0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x12,
- 0x11, 0x51, 0x7F, 0x0F, 0x7D, 0x18, 0xE4, 0xF5,
-/*1070*/0x56, 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA,
- 0x12, 0x15, 0x41, 0xAF, 0x56, 0x7E, 0x00, 0x12,
-/*1080*/0x1A, 0xFF, 0xE4, 0xFF, 0xF5, 0x56, 0x7D, 0x1F,
- 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x22,
-/*1090*/0x22, 0xE4, 0xF5, 0x55, 0xE5, 0x08, 0xFD, 0x74,
- 0xA0, 0xF5, 0x56, 0xED, 0x44, 0x07, 0xF5, 0x57,
-/*10A0*/0xE5, 0x28, 0x30, 0xE5, 0x03, 0xD3, 0x80, 0x01,
- 0xC3, 0x40, 0x05, 0x7F, 0x28, 0xEF, 0x80, 0x04,
-/*10B0*/0x7F, 0x14, 0xEF, 0xC3, 0x13, 0xF5, 0x54, 0xE4,
- 0xF9, 0x12, 0x0E, 0x18, 0x75, 0x83, 0x8E, 0xE0,
-/*10C0*/0xF5, 0x10, 0xCE, 0xEF, 0xCE, 0xEE, 0xD3, 0x94,
- 0x00, 0x40, 0x26, 0xE5, 0x10, 0x54, 0xFE, 0x12,
-/*10D0*/0x0E, 0x98, 0x75, 0x83, 0x8E, 0xED, 0xF0, 0xE5,
- 0x10, 0x44, 0x01, 0xFD, 0xEB, 0x44, 0x07, 0xF5,
-/*10E0*/0x82, 0xED, 0xF0, 0x85, 0x57, 0x82, 0x85, 0x56,
- 0x83, 0xE0, 0x30, 0xE3, 0x01, 0x09, 0x1E, 0x80,
-/*10F0*/0xD4, 0xC2, 0x34, 0xE9, 0xC3, 0x95, 0x54, 0x40,
- 0x02, 0xD2, 0x34, 0x22, 0x02, 0x00, 0x06, 0x22,
-/*1100*/0x30, 0x30, 0x11, 0x90, 0x10, 0x00, 0xE4, 0x93,
- 0xF5, 0x10, 0x90, 0x10, 0x10, 0xE4, 0x93, 0xF5,
-/*1110*/0x10, 0x12, 0x10, 0x90, 0x12, 0x11, 0x50, 0x22,
- 0xE4, 0xFC, 0xC3, 0xED, 0x9F, 0xFA, 0xEF, 0xF5,
-/*1120*/0x83, 0x75, 0x82, 0x00, 0x79, 0xFF, 0xE4, 0x93,
- 0xCC, 0x6C, 0xCC, 0xA3, 0xD9, 0xF8, 0xDA, 0xF6,
-/*1130*/0xE5, 0xE2, 0x30, 0xE4, 0x02, 0x8C, 0xE5, 0xED,
- 0x24, 0xFF, 0xFF, 0xEF, 0x75, 0x82, 0xFF, 0xF5,
-/*1140*/0x83, 0xE4, 0x93, 0x6C, 0x70, 0x03, 0x7F, 0x01,
- 0x22, 0x7F, 0x00, 0x22, 0x22, 0x11, 0x00, 0x00,
-/*1150*/0x22, 0x8E, 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D,
- 0x5B, 0x8A, 0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01,
-/*1160*/0xE4, 0xF5, 0x5F, 0xF5, 0x60, 0xF5, 0x62, 0x12,
- 0x07, 0x2A, 0x75, 0x83, 0xD0, 0xE0, 0xFF, 0xC4,
-/*1170*/0x54, 0x0F, 0xF5, 0x61, 0x12, 0x1E, 0xA5, 0x85,
- 0x59, 0x5E, 0xD3, 0xE5, 0x5E, 0x95, 0x5B, 0xE5,
-/*1180*/0x5A, 0x12, 0x07, 0x6B, 0x50, 0x4B, 0x12, 0x07,
- 0x03, 0x75, 0x83, 0xBC, 0xE0, 0x45, 0x5E, 0x12,
-/*1190*/0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x45, 0x5E,
- 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0, 0x45,
-/*11A0*/0x5E, 0xF0, 0xAF, 0x5F, 0xE5, 0x60, 0x12, 0x08,
- 0x78, 0x12, 0x0A, 0xFF, 0xAF, 0x62, 0x7E, 0x00,
-/*11B0*/0xAD, 0x5D, 0xAC, 0x5C, 0x12, 0x04, 0x44, 0xE5,
- 0x61, 0xAF, 0x5E, 0x7E, 0x00, 0xB4, 0x03, 0x05,
-/*11C0*/0x12, 0x1E, 0x21, 0x80, 0x07, 0xAD, 0x5D, 0xAC,
- 0x5C, 0x12, 0x13, 0x17, 0x05, 0x5E, 0x02, 0x11,
-/*11D0*/0x7A, 0x12, 0x07, 0x03, 0x75, 0x83, 0xBC, 0xE0,
- 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBE,
-/*11E0*/0xE0, 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83,
- 0xC0, 0xE0, 0x45, 0x40, 0xF0, 0x22, 0x8E, 0x58,
-/*11F0*/0x8F, 0x59, 0x75, 0x5A, 0x01, 0x79, 0x01, 0x75,
- 0x5B, 0x01, 0xE4, 0xFB, 0x12, 0x07, 0x2A, 0x75,
-/*1200*/0x83, 0xAE, 0xE0, 0x54, 0x1A, 0xFF, 0x12, 0x08,
- 0x65, 0xE0, 0xC4, 0x13, 0x54, 0x07, 0xFE, 0xEF,
-/*1210*/0x70, 0x0C, 0xEE, 0x65, 0x35, 0x70, 0x07, 0x90,
- 0x07, 0x2F, 0xE0, 0xB4, 0x01, 0x0D, 0xAF, 0x35,
-/*1220*/0x7E, 0x00, 0x12, 0x0E, 0xA9, 0xCF, 0xEB, 0xCF,
- 0x02, 0x1E, 0x60, 0xE5, 0x59, 0x64, 0x02, 0x45,
-/*1230*/0x58, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
- 0x00, 0xE5, 0x59, 0x45, 0x58, 0x70, 0x04, 0x7E,
-/*1240*/0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60,
- 0x23, 0x85, 0x41, 0x49, 0x85, 0x40, 0x4B, 0xE5,
-/*1250*/0x59, 0x45, 0x58, 0x70, 0x2C, 0xAF, 0x5A, 0xFE,
- 0xCD, 0xE9, 0xCD, 0xFC, 0xAB, 0x59, 0xAA, 0x58,
-/*1260*/0x12, 0x0A, 0xFF, 0xAF, 0x5B, 0x7E, 0x00, 0x12,
- 0x1E, 0x60, 0x80, 0x15, 0xAF, 0x5B, 0x7E, 0x00,
-/*1270*/0x12, 0x1E, 0x60, 0x90, 0x07, 0x26, 0x12, 0x07,
- 0x35, 0xE5, 0x49, 0x12, 0x07, 0x31, 0xE5, 0x4B,
-/*1280*/0xF0, 0xE4, 0xFD, 0xAF, 0x35, 0xFE, 0xFC, 0x12,
- 0x09, 0x15, 0x22, 0x8C, 0x64, 0x8D, 0x65, 0x12,
-/*1290*/0x08, 0xDA, 0x40, 0x3C, 0xE5, 0x65, 0x45, 0x64,
- 0x70, 0x10, 0x12, 0x09, 0x04, 0xC3, 0xE5, 0x3E,
-/*12A0*/0x12, 0x07, 0x69, 0x40, 0x3B, 0x12, 0x08, 0x95,
- 0x80, 0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40,
-/*12B0*/0x1D, 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05,
- 0x85, 0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39,
-/*12C0*/0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3E, 0x12,
- 0x07, 0x53, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x3B,
-/*12D0*/0xE5, 0x65, 0x45, 0x64, 0x70, 0x11, 0x12, 0x07,
- 0x5F, 0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x1F,
-/*12E0*/0x12, 0x07, 0x3E, 0xE5, 0x41, 0xF0, 0x22, 0xE5,
- 0x3C, 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C,
-/*12F0*/0x38, 0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39,
- 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12,
-/*1300*/0x07, 0xA8, 0xE5, 0x3C, 0x12, 0x07, 0x53, 0xE5,
- 0x3D, 0xF0, 0x22, 0x12, 0x07, 0x9F, 0xE5, 0x38,
-/*1310*/0x12, 0x07, 0x53, 0xE5, 0x39, 0xF0, 0x22, 0x8C,
- 0x63, 0x8D, 0x64, 0x12, 0x08, 0xDA, 0x40, 0x3C,
-/*1320*/0xE5, 0x64, 0x45, 0x63, 0x70, 0x10, 0x12, 0x09,
- 0x04, 0xC3, 0xE5, 0x3E, 0x12, 0x07, 0x69, 0x40,
-/*1330*/0x3B, 0x12, 0x08, 0x95, 0x80, 0x18, 0xE5, 0x3E,
- 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3E, 0x38,
-/*1340*/0xE5, 0x3E, 0x60, 0x05, 0x85, 0x3F, 0x39, 0x80,
- 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x07,
-/*1350*/0xA8, 0xE5, 0x3E, 0x12, 0x07, 0x53, 0xE5, 0x3F,
- 0xF0, 0x22, 0x80, 0x3B, 0xE5, 0x64, 0x45, 0x63,
-/*1360*/0x70, 0x11, 0x12, 0x07, 0x5F, 0x40, 0x05, 0x12,
- 0x08, 0x9E, 0x80, 0x1F, 0x12, 0x07, 0x3E, 0xE5,
-/*1370*/0x41, 0xF0, 0x22, 0xE5, 0x3C, 0xC3, 0x95, 0x38,
- 0x40, 0x1D, 0x85, 0x3C, 0x38, 0xE5, 0x3C, 0x60,
-/*1380*/0x05, 0x85, 0x3D, 0x39, 0x80, 0x03, 0x85, 0x39,
- 0x39, 0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3C,
-/*1390*/0x12, 0x07, 0x53, 0xE5, 0x3D, 0xF0, 0x22, 0x12,
- 0x07, 0x9F, 0xE5, 0x38, 0x12, 0x07, 0x53, 0xE5,
-/*13A0*/0x39, 0xF0, 0x22, 0xE5, 0x0D, 0xFE, 0xE5, 0x08,
- 0x8E, 0x54, 0x44, 0x05, 0xF5, 0x55, 0x75, 0x15,
-/*13B0*/0x0F, 0xF5, 0x82, 0x12, 0x0E, 0x7A, 0x12, 0x17,
- 0xA3, 0x20, 0x31, 0x05, 0x75, 0x15, 0x03, 0x80,
-/*13C0*/0x03, 0x75, 0x15, 0x0B, 0xE5, 0x0A, 0xC3, 0x94,
- 0x01, 0x50, 0x38, 0x12, 0x14, 0x20, 0x20, 0x31,
-/*13D0*/0x06, 0x05, 0x15, 0x05, 0x15, 0x80, 0x04, 0x15,
- 0x15, 0x15, 0x15, 0xE5, 0x0A, 0xC3, 0x94, 0x01,
-/*13E0*/0x50, 0x21, 0x12, 0x14, 0x20, 0x20, 0x31, 0x04,
- 0x05, 0x15, 0x80, 0x02, 0x15, 0x15, 0xE5, 0x0A,
-/*13F0*/0xC3, 0x94, 0x01, 0x50, 0x0E, 0x12, 0x0E, 0x77,
- 0x12, 0x17, 0xA3, 0x20, 0x31, 0x05, 0x05, 0x15,
-/*1400*/0x12, 0x0E, 0x77, 0xE5, 0x15, 0xB4, 0x08, 0x04,
- 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x15,
-/*1410*/0xB4, 0x07, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E,
- 0x00, 0xEE, 0x4F, 0x60, 0x02, 0x05, 0x7F, 0x22,
-/*1420*/0x85, 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15,
- 0xF0, 0x12, 0x17, 0xA3, 0x22, 0x12, 0x07, 0x2A,
-/*1430*/0x75, 0x83, 0xAE, 0x74, 0xFF, 0x12, 0x07, 0x29,
- 0xE0, 0x54, 0x1A, 0xF5, 0x34, 0xE0, 0xC4, 0x13,
-/*1440*/0x54, 0x07, 0xF5, 0x35, 0x24, 0xFE, 0x60, 0x24,
- 0x24, 0xFE, 0x60, 0x3C, 0x24, 0x04, 0x70, 0x63,
-/*1450*/0x75, 0x31, 0x2D, 0xE5, 0x08, 0xFD, 0x74, 0xB6,
- 0x12, 0x07, 0x92, 0x74, 0xBC, 0x90, 0x07, 0x22,
-/*1460*/0x12, 0x07, 0x95, 0x74, 0x90, 0x12, 0x07, 0xB3,
- 0x74, 0x92, 0x80, 0x3C, 0x75, 0x31, 0x3A, 0xE5,
-/*1470*/0x08, 0xFD, 0x74, 0xBA, 0x12, 0x07, 0x92, 0x74,
- 0xC0, 0x90, 0x07, 0x22, 0x12, 0x07, 0xB6, 0x74,
-/*1480*/0xC4, 0x12, 0x07, 0xB3, 0x74, 0xC8, 0x80, 0x20,
- 0x75, 0x31, 0x35, 0xE5, 0x08, 0xFD, 0x74, 0xB8,
-/*1490*/0x12, 0x07, 0x92, 0x74, 0xBE, 0xFF, 0xED, 0x44,
- 0x07, 0x90, 0x07, 0x22, 0xCF, 0xF0, 0xA3, 0xEF,
-/*14A0*/0xF0, 0x74, 0xC2, 0x12, 0x07, 0xB3, 0x74, 0xC6,
- 0xFF, 0xED, 0x44, 0x07, 0xA3, 0xCF, 0xF0, 0xA3,
-/*14B0*/0xEF, 0xF0, 0x22, 0x75, 0x34, 0x01, 0x22, 0x8E,
- 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D, 0x5B, 0x8A,
-/*14C0*/0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01, 0xE4, 0xF5,
- 0x5F, 0x12, 0x1E, 0xA5, 0x85, 0x59, 0x5E, 0xD3,
-/*14D0*/0xE5, 0x5E, 0x95, 0x5B, 0xE5, 0x5A, 0x12, 0x07,
- 0x6B, 0x50, 0x57, 0xE5, 0x5D, 0x45, 0x5C, 0x70,
-/*14E0*/0x30, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x92, 0xE5,
- 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE5,
-/*14F0*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8, 0xE5,
- 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0x90, 0xE5,
-/*1500*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
- 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x80,
-/*1510*/0x03, 0x12, 0x07, 0x32, 0xE5, 0x5E, 0xF0, 0xAF,
- 0x5F, 0x7E, 0x00, 0xAD, 0x5D, 0xAC, 0x5C, 0x12,
-/*1520*/0x04, 0x44, 0xAF, 0x5E, 0x7E, 0x00, 0xAD, 0x5D,
- 0xAC, 0x5C, 0x12, 0x0B, 0xD1, 0x05, 0x5E, 0x02,
-/*1530*/0x14, 0xCF, 0xAB, 0x5D, 0xAA, 0x5C, 0xAD, 0x5B,
- 0xAC, 0x5A, 0xAF, 0x59, 0xAE, 0x58, 0x02, 0x1B,
-/*1540*/0xFB, 0x8C, 0x5C, 0x8D, 0x5D, 0x8A, 0x5E, 0x8B,
- 0x5F, 0x75, 0x60, 0x01, 0xE4, 0xF5, 0x61, 0xF5,
-/*1550*/0x62, 0xF5, 0x63, 0x12, 0x1E, 0xA5, 0x8F, 0x60,
- 0xD3, 0xE5, 0x60, 0x95, 0x5D, 0xE5, 0x5C, 0x12,
-/*1560*/0x07, 0x6B, 0x50, 0x61, 0xE5, 0x5F, 0x45, 0x5E,
- 0x70, 0x27, 0x12, 0x07, 0x2A, 0x75, 0x83, 0xB6,
-/*1570*/0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xB8,
- 0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBA,
-/*1580*/0xE5, 0x60, 0xF0, 0xAF, 0x61, 0x7E, 0x00, 0xE5,
- 0x62, 0x12, 0x08, 0x7A, 0x12, 0x0A, 0xFF, 0x80,
-/*1590*/0x19, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE5,
- 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0x8E, 0xE4,
-/*15A0*/0x12, 0x07, 0x29, 0x74, 0x01, 0x12, 0x07, 0x29,
- 0xE4, 0xF0, 0xAF, 0x63, 0x7E, 0x00, 0xAD, 0x5F,
-/*15B0*/0xAC, 0x5E, 0x12, 0x04, 0x44, 0xAF, 0x60, 0x7E,
- 0x00, 0xAD, 0x5F, 0xAC, 0x5E, 0x12, 0x12, 0x8B,
-/*15C0*/0x05, 0x60, 0x02, 0x15, 0x58, 0x22, 0x90, 0x11,
- 0x4D, 0xE4, 0x93, 0x90, 0x07, 0x2E, 0xF0, 0x12,
-/*15D0*/0x08, 0x1F, 0x75, 0x83, 0xAE, 0xE0, 0x54, 0x1A,
- 0xF5, 0x34, 0x70, 0x67, 0xEF, 0x44, 0x07, 0xF5,
-/*15E0*/0x82, 0x75, 0x83, 0xCE, 0xE0, 0xFF, 0x13, 0x13,
- 0x13, 0x54, 0x07, 0xF5, 0x36, 0x54, 0x0F, 0xD3,
-/*15F0*/0x94, 0x00, 0x40, 0x06, 0x12, 0x14, 0x2D, 0x12,
- 0x1B, 0xA9, 0xE5, 0x36, 0x54, 0x0F, 0x24, 0xFE,
-/*1600*/0x60, 0x0C, 0x14, 0x60, 0x0C, 0x14, 0x60, 0x19,
- 0x24, 0x03, 0x70, 0x37, 0x80, 0x10, 0x02, 0x1E,
-/*1610*/0x91, 0x12, 0x1E, 0x91, 0x12, 0x07, 0x2A, 0x75,
- 0x83, 0xCE, 0xE0, 0x54, 0xEF, 0xF0, 0x02, 0x1D,
-/*1620*/0xAE, 0x12, 0x10, 0x14, 0xE4, 0xF5, 0x55, 0x12,
- 0x1D, 0x85, 0x05, 0x55, 0xE5, 0x55, 0xC3, 0x94,
-/*1630*/0x05, 0x40, 0xF4, 0x12, 0x07, 0x2A, 0x75, 0x83,
- 0xCE, 0xE0, 0x54, 0xC7, 0x12, 0x07, 0x29, 0xE0,
-/*1640*/0x44, 0x08, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
- 0x59, 0xAF, 0x08, 0xEF, 0x44, 0x07, 0xF5, 0x82,
-/*1650*/0x75, 0x83, 0xD0, 0xE0, 0xFD, 0xC4, 0x54, 0x0F,
- 0xF5, 0x5A, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0x75,
-/*1660*/0x83, 0x80, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x21,
- 0x75, 0x83, 0x82, 0xE5, 0x45, 0xF0, 0xEF, 0x44,
-/*1670*/0x07, 0xF5, 0x82, 0x75, 0x83, 0x8A, 0x74, 0xFF,
- 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x07, 0x2A, 0x75,
-/*1680*/0x83, 0xBC, 0xE0, 0x54, 0xEF, 0x12, 0x07, 0x29,
- 0x75, 0x83, 0xBE, 0xE0, 0x54, 0xEF, 0x12, 0x07,
-/*1690*/0x29, 0x75, 0x83, 0xC0, 0xE0, 0x54, 0xEF, 0x12,
- 0x07, 0x29, 0x75, 0x83, 0xBC, 0xE0, 0x44, 0x10,
-/*16A0*/0x12, 0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x44,
- 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0,
-/*16B0*/0x44, 0x10, 0xF0, 0xAF, 0x58, 0xE5, 0x59, 0x12,
- 0x08, 0x78, 0x02, 0x0A, 0xFF, 0xE4, 0xF5, 0x58,
-/*16C0*/0x7D, 0x01, 0xF5, 0x59, 0xAF, 0x35, 0xFE, 0xFC,
- 0x12, 0x09, 0x15, 0x12, 0x07, 0x2A, 0x75, 0x83,
-/*16D0*/0xB6, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
- 0xB8, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
-/*16E0*/0xBA, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
- 0xBC, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
-/*16F0*/0xBE, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
- 0xC0, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
-/*1700*/0x90, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2,
- 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4,
-/*1710*/0x12, 0x07, 0x29, 0x75, 0x83, 0x92, 0xE4, 0x12,
- 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE4, 0x12, 0x07,
-/*1720*/0x29, 0x75, 0x83, 0xC8, 0xE4, 0xF0, 0xAF, 0x58,
- 0xFE, 0xE5, 0x59, 0x12, 0x08, 0x7A, 0x02, 0x0A,
-/*1730*/0xFF, 0xE5, 0xE2, 0x30, 0xE4, 0x6C, 0xE5, 0xE7,
- 0x54, 0xC0, 0x64, 0x40, 0x70, 0x64, 0xE5, 0x09,
-/*1740*/0xC4, 0x54, 0x30, 0xFE, 0xE5, 0x08, 0x25, 0xE0,
- 0x25, 0xE0, 0x54, 0xC0, 0x4E, 0xFE, 0xEF, 0x54,
-/*1750*/0x3F, 0x4E, 0xFD, 0xE5, 0x2B, 0xAE, 0x2A, 0x78,
- 0x02, 0xC3, 0x33, 0xCE, 0x33, 0xCE, 0xD8, 0xF9,
-/*1760*/0xF5, 0x82, 0x8E, 0x83, 0xED, 0xF0, 0xE5, 0x2B,
- 0xAE, 0x2A, 0x78, 0x02, 0xC3, 0x33, 0xCE, 0x33,
-/*1770*/0xCE, 0xD8, 0xF9, 0xFF, 0xF5, 0x82, 0x8E, 0x83,
- 0xA3, 0xE5, 0xFE, 0xF0, 0x8F, 0x82, 0x8E, 0x83,
-/*1780*/0xA3, 0xA3, 0xE5, 0xFD, 0xF0, 0x8F, 0x82, 0x8E,
- 0x83, 0xA3, 0xA3, 0xA3, 0xE5, 0xFC, 0xF0, 0xC3,
-/*1790*/0xE5, 0x2B, 0x94, 0xFA, 0xE5, 0x2A, 0x94, 0x00,
- 0x50, 0x08, 0x05, 0x2B, 0xE5, 0x2B, 0x70, 0x02,
-/*17A0*/0x05, 0x2A, 0x22, 0xE4, 0xFF, 0xE4, 0xF5, 0x58,
- 0xF5, 0x56, 0xF5, 0x57, 0x74, 0x82, 0xFC, 0x12,
-/*17B0*/0x0E, 0x04, 0x8C, 0x83, 0xE0, 0xF5, 0x10, 0x54,
- 0x7F, 0xF0, 0xE5, 0x10, 0x44, 0x80, 0x12, 0x0E,
-/*17C0*/0x98, 0xED, 0xF0, 0x7E, 0x0A, 0x12, 0x0E, 0x04,
- 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0, 0x26, 0xDE,
-/*17D0*/0xF4, 0x05, 0x57, 0xE5, 0x57, 0x70, 0x02, 0x05,
- 0x56, 0xE5, 0x14, 0x24, 0x01, 0xFD, 0xE4, 0x33,
-/*17E0*/0xFC, 0xD3, 0xE5, 0x57, 0x9D, 0xE5, 0x56, 0x9C,
- 0x40, 0xD9, 0xE5, 0x0A, 0x94, 0x20, 0x50, 0x02,
-/*17F0*/0x05, 0x0A, 0x43, 0xE1, 0x08, 0xC2, 0x31, 0x12,
- 0x0E, 0x04, 0x75, 0x83, 0xA6, 0xE0, 0x55, 0x12,
-/*1800*/0x65, 0x12, 0x70, 0x03, 0xD2, 0x31, 0x22, 0xC2,
- 0x31, 0x22, 0x90, 0x07, 0x26, 0xE0, 0xFA, 0xA3,
-/*1810*/0xE0, 0xF5, 0x82, 0x8A, 0x83, 0xE0, 0xF5, 0x41,
- 0xE5, 0x39, 0xC3, 0x95, 0x41, 0x40, 0x26, 0xE5,
-/*1820*/0x39, 0x95, 0x41, 0xC3, 0x9F, 0xEE, 0x12, 0x07,
- 0x6B, 0x40, 0x04, 0x7C, 0x01, 0x80, 0x02, 0x7C,
-/*1830*/0x00, 0xE5, 0x41, 0x64, 0x3F, 0x60, 0x04, 0x7B,
- 0x01, 0x80, 0x02, 0x7B, 0x00, 0xEC, 0x5B, 0x60,
-/*1840*/0x29, 0x05, 0x41, 0x80, 0x28, 0xC3, 0xE5, 0x41,
- 0x95, 0x39, 0xC3, 0x9F, 0xEE, 0x12, 0x07, 0x6B,
-/*1850*/0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00,
- 0xE5, 0x41, 0x60, 0x04, 0x7E, 0x01, 0x80, 0x02,
-/*1860*/0x7E, 0x00, 0xEF, 0x5E, 0x60, 0x04, 0x15, 0x41,
- 0x80, 0x03, 0x85, 0x39, 0x41, 0x85, 0x3A, 0x40,
-/*1870*/0x22, 0xE5, 0xE2, 0x30, 0xE4, 0x60, 0xE5, 0xE1,
- 0x30, 0xE2, 0x5B, 0xE5, 0x09, 0x70, 0x04, 0x7F,
-/*1880*/0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x08, 0x70,
- 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
-/*1890*/0x5F, 0x60, 0x43, 0x53, 0xF9, 0xF8, 0xE5, 0xE2,
- 0x30, 0xE4, 0x3B, 0xE5, 0xE1, 0x30, 0xE2, 0x2E,
-/*18A0*/0x43, 0xFA, 0x02, 0x53, 0xFA, 0xFB, 0xE4, 0xF5,
- 0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0xE5,
-/*18B0*/0xE1, 0x30, 0xE2, 0xE7, 0x90, 0x94, 0x70, 0xE0,
- 0x65, 0x10, 0x60, 0x03, 0x43, 0xFA, 0x04, 0x05,
-/*18C0*/0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0x70,
- 0xE6, 0x12, 0x00, 0x06, 0x80, 0xE1, 0x53, 0xFA,
-/*18D0*/0xFD, 0x53, 0xFA, 0xFB, 0x80, 0xC0, 0x22, 0x8F,
- 0x54, 0x12, 0x00, 0x06, 0xE5, 0xE1, 0x30, 0xE0,
-/*18E0*/0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5,
- 0x7E, 0xD3, 0x94, 0x05, 0x40, 0x04, 0x7E, 0x01,
-/*18F0*/0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60, 0x3D,
- 0x85, 0x54, 0x11, 0xE5, 0xE2, 0x20, 0xE1, 0x32,
-/*1900*/0x74, 0xCE, 0x12, 0x1A, 0x05, 0x30, 0xE7, 0x04,
- 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0x8F, 0x82,
-/*1910*/0x8E, 0x83, 0xE0, 0x30, 0xE6, 0x04, 0x7F, 0x01,
- 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x5D, 0x70, 0x15,
-/*1920*/0x12, 0x15, 0xC6, 0x74, 0xCE, 0x12, 0x1A, 0x05,
- 0x30, 0xE6, 0x07, 0xE0, 0x44, 0x80, 0xF0, 0x43,
-/*1930*/0xF9, 0x80, 0x12, 0x18, 0x71, 0x22, 0x12, 0x0E,
- 0x44, 0xE5, 0x16, 0x25, 0xE0, 0x25, 0xE0, 0x24,
-/*1940*/0xB0, 0xF5, 0x82, 0xE4, 0x34, 0x1A, 0xF5, 0x83,
- 0xE4, 0x93, 0xF5, 0x0F, 0xE5, 0x16, 0x25, 0xE0,
-/*1950*/0x25, 0xE0, 0x24, 0xB1, 0xF5, 0x82, 0xE4, 0x34,
- 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0E, 0x12,
-/*1960*/0x0E, 0x65, 0xF5, 0x10, 0xE5, 0x0F, 0x54, 0xF0,
- 0x12, 0x0E, 0x17, 0x75, 0x83, 0x8C, 0xEF, 0xF0,
-/*1970*/0xE5, 0x0F, 0x30, 0xE0, 0x0C, 0x12, 0x0E, 0x04,
- 0x75, 0x83, 0x86, 0xE0, 0x44, 0x40, 0xF0, 0x80,
-/*1980*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x86, 0xE0,
- 0x54, 0xBF, 0xF0, 0x12, 0x0E, 0x91, 0x75, 0x83,
-/*1990*/0x82, 0xE5, 0x0E, 0xF0, 0x22, 0x7F, 0x05, 0x12,
- 0x17, 0x31, 0x12, 0x0E, 0x04, 0x12, 0x0E, 0x33,
-/*19A0*/0x74, 0x02, 0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E,
- 0x04, 0x12, 0x0E, 0x0B, 0xEF, 0xF0, 0x75, 0x15,
-/*19B0*/0x70, 0x12, 0x0F, 0xF7, 0x20, 0x34, 0x05, 0x75,
- 0x15, 0x10, 0x80, 0x03, 0x75, 0x15, 0x50, 0x12,
-/*19C0*/0x0F, 0xF7, 0x20, 0x34, 0x04, 0x74, 0x10, 0x80,
- 0x02, 0x74, 0xF0, 0x25, 0x15, 0xF5, 0x15, 0x12,
-/*19D0*/0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x20,
- 0x34, 0x17, 0xE5, 0x15, 0x64, 0x30, 0x60, 0x0C,
-/*19E0*/0x74, 0x10, 0x25, 0x15, 0xF5, 0x15, 0xB4, 0x80,
- 0x03, 0xE4, 0xF5, 0x15, 0x12, 0x0E, 0x21, 0xEF,
-/*19F0*/0xF0, 0x22, 0xF0, 0xE5, 0x0B, 0x25, 0xE0, 0x25,
- 0xE0, 0x24, 0x82, 0xF5, 0x82, 0xE4, 0x34, 0x07,
-/*1A00*/0xF5, 0x83, 0x22, 0x74, 0x88, 0xFE, 0xE5, 0x08,
- 0x44, 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0,
-/*1A10*/0x22, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
- 0x22, 0xF0, 0xE0, 0x54, 0xC0, 0x8F, 0x82, 0x8E,
-/*1A20*/0x83, 0xF0, 0x22, 0xEF, 0x44, 0x07, 0xF5, 0x82,
- 0x75, 0x83, 0x86, 0xE0, 0x54, 0x10, 0xD3, 0x94,
-/*1A30*/0x00, 0x22, 0xF0, 0x90, 0x07, 0x15, 0xE0, 0x04,
- 0xF0, 0x22, 0x44, 0x06, 0xF5, 0x82, 0x75, 0x83,
-/*1A40*/0x9E, 0xE0, 0x22, 0xFE, 0xEF, 0x44, 0x07, 0xF5,
- 0x82, 0x8E, 0x83, 0xE0, 0x22, 0xE4, 0x90, 0x07,
-/*1A50*/0x2A, 0xF0, 0xA3, 0xF0, 0x12, 0x07, 0x2A, 0x75,
- 0x83, 0x82, 0xE0, 0x54, 0x7F, 0x12, 0x07, 0x29,
-/*1A60*/0xE0, 0x44, 0x80, 0xF0, 0x12, 0x10, 0xFC, 0x12,
- 0x08, 0x1F, 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0,
-/*1A70*/0x1A, 0x90, 0x07, 0x2B, 0xE0, 0x04, 0xF0, 0x70,
- 0x06, 0x90, 0x07, 0x2A, 0xE0, 0x04, 0xF0, 0x90,
-/*1A80*/0x07, 0x2A, 0xE0, 0xB4, 0x10, 0xE1, 0xA3, 0xE0,
- 0xB4, 0x00, 0xDC, 0xEE, 0x44, 0xA6, 0xFC, 0xEF,
-/*1A90*/0x44, 0x07, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0xF5,
- 0x32, 0xEE, 0x44, 0xA8, 0xFE, 0xEF, 0x44, 0x07,
-/*1AA0*/0xF5, 0x82, 0x8E, 0x83, 0xE0, 0xF5, 0x33, 0x22,
- 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x90,
-/*1AB0*/0x00, 0x20, 0x0F, 0x92, 0x00, 0x21, 0x0F, 0x94,
- 0x00, 0x22, 0x0F, 0x96, 0x00, 0x23, 0x0F, 0x98,
-/*1AC0*/0x00, 0x24, 0x0F, 0x9A, 0x00, 0x25, 0x0F, 0x9C,
- 0x00, 0x26, 0x0F, 0x9E, 0x00, 0x27, 0x0F, 0xA0,
-/*1AD0*/0x01, 0x20, 0x01, 0xA2, 0x01, 0x21, 0x01, 0xA4,
- 0x01, 0x22, 0x01, 0xA6, 0x01, 0x23, 0x01, 0xA8,
-/*1AE0*/0x01, 0x24, 0x01, 0xAA, 0x01, 0x25, 0x01, 0xAC,
- 0x01, 0x26, 0x01, 0xAE, 0x01, 0x27, 0x01, 0xB0,
-/*1AF0*/0x01, 0x28, 0x01, 0xB4, 0x00, 0x28, 0x0F, 0xB6,
- 0x40, 0x28, 0x0F, 0xB8, 0x61, 0x28, 0x01, 0xCB,
-/*1B00*/0xEF, 0xCB, 0xCA, 0xEE, 0xCA, 0x7F, 0x01, 0xE4,
- 0xFD, 0xEB, 0x4A, 0x70, 0x24, 0xE5, 0x08, 0xF5,
-/*1B10*/0x82, 0x74, 0xB6, 0x12, 0x08, 0x29, 0xE5, 0x08,
- 0xF5, 0x82, 0x74, 0xB8, 0x12, 0x08, 0x29, 0xE5,
-/*1B20*/0x08, 0xF5, 0x82, 0x74, 0xBA, 0x12, 0x08, 0x29,
- 0x7E, 0x00, 0x7C, 0x00, 0x12, 0x0A, 0xFF, 0x80,
-/*1B30*/0x12, 0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE5,
- 0x41, 0xF0, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35,
-/*1B40*/0xE5, 0x40, 0xF0, 0x12, 0x07, 0x2A, 0x75, 0x83,
- 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74, 0x01, 0x12,
-/*1B50*/0x07, 0x29, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x26,
- 0xF5, 0x27, 0x53, 0xE1, 0xFE, 0xF5, 0x2A, 0x75,
-/*1B60*/0x2B, 0x01, 0xF5, 0x08, 0x7F, 0x01, 0x12, 0x17,
- 0x31, 0x30, 0x30, 0x1C, 0x90, 0x1A, 0xA9, 0xE4,
-/*1B70*/0x93, 0xF5, 0x10, 0x90, 0x1F, 0xF9, 0xE4, 0x93,
- 0xF5, 0x10, 0x90, 0x00, 0x41, 0xE4, 0x93, 0xF5,
-/*1B80*/0x10, 0x90, 0x1E, 0xCA, 0xE4, 0x93, 0xF5, 0x10,
- 0x7F, 0x02, 0x12, 0x17, 0x31, 0x12, 0x0F, 0x54,
-/*1B90*/0x7F, 0x03, 0x12, 0x17, 0x31, 0x12, 0x00, 0x06,
- 0xE5, 0xE2, 0x30, 0xE7, 0x09, 0x12, 0x10, 0x00,
-/*1BA0*/0x30, 0x30, 0x03, 0x12, 0x11, 0x00, 0x02, 0x00,
- 0x47, 0x12, 0x08, 0x1F, 0x75, 0x83, 0xD0, 0xE0,
-/*1BB0*/0xC4, 0x54, 0x0F, 0xFD, 0x75, 0x43, 0x01, 0x75,
- 0x44, 0xFF, 0x12, 0x08, 0xAA, 0x74, 0x04, 0xF0,
-/*1BC0*/0x75, 0x3B, 0x01, 0xED, 0x14, 0x60, 0x0C, 0x14,
- 0x60, 0x0B, 0x14, 0x60, 0x0F, 0x24, 0x03, 0x70,
-/*1BD0*/0x0B, 0x80, 0x09, 0x80, 0x00, 0x12, 0x08, 0xA7,
- 0x04, 0xF0, 0x80, 0x06, 0x12, 0x08, 0xA7, 0x74,
-/*1BE0*/0x04, 0xF0, 0xEE, 0x44, 0x82, 0xFE, 0xEF, 0x44,
- 0x07, 0xF5, 0x82, 0x8E, 0x83, 0xE5, 0x45, 0x12,
-/*1BF0*/0x08, 0xBE, 0x75, 0x83, 0x82, 0xE5, 0x31, 0xF0,
- 0x02, 0x11, 0x4C, 0x8E, 0x60, 0x8F, 0x61, 0x12,
-/*1C00*/0x1E, 0xA5, 0xE4, 0xFF, 0xCE, 0xED, 0xCE, 0xEE,
- 0xD3, 0x95, 0x61, 0xE5, 0x60, 0x12, 0x07, 0x6B,
-/*1C10*/0x40, 0x39, 0x74, 0x20, 0x2E, 0xF5, 0x82, 0xE4,
- 0x34, 0x03, 0xF5, 0x83, 0xE0, 0x70, 0x03, 0xFF,
-/*1C20*/0x80, 0x26, 0x12, 0x08, 0xE2, 0xFD, 0xC3, 0x9F,
- 0x40, 0x1E, 0xCF, 0xED, 0xCF, 0xEB, 0x4A, 0x70,
-/*1C30*/0x0B, 0x8D, 0x42, 0x12, 0x08, 0xEE, 0xF5, 0x41,
- 0x8E, 0x40, 0x80, 0x0C, 0x12, 0x08, 0xE2, 0xF5,
-/*1C40*/0x38, 0x12, 0x08, 0xEE, 0xF5, 0x39, 0x8E, 0x3A,
- 0x1E, 0x80, 0xBC, 0x22, 0x75, 0x58, 0x01, 0xE5,
-/*1C50*/0x35, 0x70, 0x0C, 0x12, 0x07, 0xCC, 0xE0, 0xF5,
- 0x4A, 0x12, 0x07, 0xD8, 0xE0, 0xF5, 0x4C, 0xE5,
-/*1C60*/0x35, 0xB4, 0x04, 0x0C, 0x12, 0x07, 0xE4, 0xE0,
- 0xF5, 0x4A, 0x12, 0x07, 0xF0, 0xE0, 0xF5, 0x4C,
-/*1C70*/0xE5, 0x35, 0xB4, 0x01, 0x04, 0x7F, 0x01, 0x80,
- 0x02, 0x7F, 0x00, 0xE5, 0x35, 0xB4, 0x02, 0x04,
-/*1C80*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F,
- 0x60, 0x0C, 0x12, 0x07, 0xFC, 0xE0, 0xF5, 0x4A,
-/*1C90*/0x12, 0x08, 0x08, 0xE0, 0xF5, 0x4C, 0x85, 0x41,
- 0x49, 0x85, 0x40, 0x4B, 0x22, 0x75, 0x5B, 0x01,
-/*1CA0*/0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE0, 0x54,
- 0x1F, 0xFF, 0xD3, 0x94, 0x02, 0x50, 0x04, 0x8F,
-/*1CB0*/0x58, 0x80, 0x05, 0xEF, 0x24, 0xFE, 0xF5, 0x58,
- 0xEF, 0xC3, 0x94, 0x18, 0x40, 0x05, 0x75, 0x59,
-/*1CC0*/0x18, 0x80, 0x04, 0xEF, 0x04, 0xF5, 0x59, 0x85,
- 0x43, 0x5A, 0xAF, 0x58, 0x7E, 0x00, 0xAD, 0x59,
-/*1CD0*/0x7C, 0x00, 0xAB, 0x5B, 0x7A, 0x00, 0x12, 0x15,
- 0x41, 0xAF, 0x5A, 0x7E, 0x00, 0x12, 0x18, 0x0A,
-/*1CE0*/0xAF, 0x5B, 0x7E, 0x00, 0x02, 0x1A, 0xFF, 0xE5,
- 0xE2, 0x30, 0xE7, 0x0E, 0x12, 0x10, 0x03, 0xC2,
-/*1CF0*/0x30, 0x30, 0x30, 0x03, 0x12, 0x10, 0xFF, 0x20,
- 0x33, 0x28, 0xE5, 0xE7, 0x30, 0xE7, 0x05, 0x12,
-/*1D00*/0x0E, 0xA2, 0x80, 0x0D, 0xE5, 0xFE, 0xC3, 0x94,
- 0x20, 0x50, 0x06, 0x12, 0x0E, 0xA2, 0x43, 0xF9,
-/*1D10*/0x08, 0xE5, 0xF2, 0x30, 0xE7, 0x03, 0x53, 0xF9,
- 0x7F, 0xE5, 0xF1, 0x54, 0x70, 0xD3, 0x94, 0x00,
-/*1D20*/0x50, 0xD8, 0x22, 0x12, 0x0E, 0x04, 0x75, 0x83,
- 0x80, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0x12,
-/*1D30*/0x0D, 0xFD, 0x75, 0x83, 0x84, 0x12, 0x0E, 0x02,
- 0x75, 0x83, 0x86, 0x12, 0x0E, 0x02, 0x75, 0x83,
-/*1D40*/0x8C, 0xE0, 0x54, 0xF3, 0x12, 0x0E, 0x03, 0x75,
- 0x83, 0x8E, 0x12, 0x0E, 0x02, 0x75, 0x83, 0x94,
-/*1D50*/0xE0, 0x54, 0xFB, 0xF0, 0x22, 0x12, 0x07, 0x2A,
- 0x75, 0x83, 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74,
-/*1D60*/0x01, 0x12, 0x07, 0x29, 0xE4, 0x12, 0x08, 0xBE,
- 0x75, 0x83, 0x8C, 0xE0, 0x44, 0x20, 0x12, 0x08,
-/*1D70*/0xBE, 0xE0, 0x54, 0xDF, 0xF0, 0x74, 0x84, 0x85,
- 0x08, 0x82, 0xF5, 0x83, 0xE0, 0x54, 0x7F, 0xF0,
-/*1D80*/0xE0, 0x44, 0x80, 0xF0, 0x22, 0x75, 0x56, 0x01,
- 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE, 0xFC,
-/*1D90*/0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12, 0x1E,
- 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E, 0x00,
-/*1DA0*/0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
- 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0x75, 0x56,
-/*1DB0*/0x01, 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE,
- 0xFC, 0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12,
-/*1DC0*/0x1E, 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E,
- 0x00, 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44,
-/*1DD0*/0xAF, 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xE4,
- 0xF5, 0x16, 0x12, 0x0E, 0x44, 0xFE, 0xE5, 0x08,
-/*1DE0*/0x44, 0x05, 0xFF, 0x12, 0x0E, 0x65, 0x8F, 0x82,
- 0x8E, 0x83, 0xF0, 0x05, 0x16, 0xE5, 0x16, 0xC3,
-/*1DF0*/0x94, 0x14, 0x40, 0xE6, 0xE5, 0x08, 0x12, 0x0E,
- 0x2B, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
-/*1E00*/0x59, 0xF5, 0x5A, 0xFF, 0xFE, 0xAD, 0x58, 0xFC,
- 0x12, 0x09, 0x15, 0x7F, 0x04, 0x7E, 0x00, 0xAD,
-/*1E10*/0x58, 0x7C, 0x00, 0x12, 0x09, 0x15, 0x7F, 0x02,
- 0x7E, 0x00, 0xAD, 0x58, 0x7C, 0x00, 0x02, 0x09,
-/*1E20*/0x15, 0xE5, 0x3C, 0x25, 0x3E, 0xFC, 0xE5, 0x42,
- 0x24, 0x00, 0xFB, 0xE4, 0x33, 0xFA, 0xEC, 0xC3,
-/*1E30*/0x9B, 0xEA, 0x12, 0x07, 0x6B, 0x40, 0x0B, 0x8C,
- 0x42, 0xE5, 0x3D, 0x25, 0x3F, 0xF5, 0x41, 0x8F,
-/*1E40*/0x40, 0x22, 0x12, 0x09, 0x0B, 0x22, 0x74, 0x84,
- 0xF5, 0x18, 0x85, 0x08, 0x19, 0x85, 0x19, 0x82,
-/*1E50*/0x85, 0x18, 0x83, 0xE0, 0x54, 0x7F, 0xF0, 0xE0,
- 0x44, 0x80, 0xF0, 0xE0, 0x44, 0x80, 0xF0, 0x22,
-/*1E60*/0xEF, 0x4E, 0x70, 0x0B, 0x12, 0x07, 0x2A, 0x75,
- 0x83, 0xD2, 0xE0, 0x54, 0xDF, 0xF0, 0x22, 0x12,
-/*1E70*/0x07, 0x2A, 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x20,
- 0xF0, 0x22, 0x75, 0x58, 0x01, 0x90, 0x07, 0x26,
-/*1E80*/0x12, 0x07, 0x35, 0xE0, 0x54, 0x3F, 0xF5, 0x41,
- 0x12, 0x07, 0x32, 0xE0, 0x54, 0x3F, 0xF5, 0x40,
-/*1E90*/0x22, 0x75, 0x56, 0x02, 0xE4, 0xF5, 0x57, 0x12,
- 0x1D, 0xFC, 0xAF, 0x57, 0x7E, 0x00, 0xAD, 0x56,
-/*1EA0*/0x7C, 0x00, 0x02, 0x04, 0x44, 0xE4, 0xF5, 0x42,
- 0xF5, 0x41, 0xF5, 0x40, 0xF5, 0x38, 0xF5, 0x39,
-/*1EB0*/0xF5, 0x3A, 0x22, 0xEF, 0x54, 0x07, 0xFF, 0xE5,
- 0xF9, 0x54, 0xF8, 0x4F, 0xF5, 0xF9, 0x22, 0x7F,
-/*1EC0*/0x01, 0xE4, 0xFE, 0x0F, 0x0E, 0xBE, 0xFF, 0xFB,
- 0x22, 0x01, 0x20, 0x00, 0x01, 0x04, 0x20, 0x00,
-/*1ED0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1EE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1EF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F00*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F10*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F20*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F30*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F40*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F50*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F60*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F70*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F80*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1F90*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1FA0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1FB0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1FC0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1FD0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1FE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-/*1FF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x81
-};
-
-int ipath_sd7220_ib_load(struct ipath_devdata *dd)
-{
- return ipath_sd7220_prog_ld(dd, IB_7220_SERDES, ipath_sd7220_ib_img,
- sizeof(ipath_sd7220_ib_img), 0);
-}
-
-int ipath_sd7220_ib_vfy(struct ipath_devdata *dd)
-{
- return ipath_sd7220_prog_vfy(dd, IB_7220_SERDES, ipath_sd7220_ib_img,
- sizeof(ipath_sd7220_ib_img), 0);
-}
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index eaba03273e4..17a517766ad 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -31,6 +31,7 @@
*/
#include <linux/spinlock.h>
+#include <linux/gfp.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -246,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque)
/* ipath_sdma_abort() is done, waiting for interrupt */
if (status == IPATH_SDMA_ABORT_DISARMED) {
- if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+ if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
goto resched_noprint;
/* give up, intr got lost somewhere */
ipath_dbg("give up waiting for SDMADISABLED intr\n");
@@ -340,7 +341,7 @@ resched:
* JAG - this is bad to just have default be a loop without
* state change
*/
- if (jiffies > dd->ipath_sdma_abort_jiffies) {
+ if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
ipath_dbg("looping with status 0x%08lx\n",
dd->ipath_sdma_status);
dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
@@ -698,10 +699,8 @@ retry:
addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
tx->map_len, DMA_TO_DEVICE);
- if (dma_mapping_error(addr)) {
- ret = -EIO;
- goto unlock;
- }
+ if (dma_mapping_error(&dd->pcidev->dev, addr))
+ goto ioerr;
dwoffset = tx->map_len >> 2;
make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
@@ -741,6 +740,8 @@ retry:
dw = (len + 3) >> 2;
addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, addr))
+ goto unmap;
make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
/* SDmaUseLargeBuf has to be set in every descriptor */
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
@@ -781,10 +782,10 @@ retry:
descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0];
descqp -= 2;
/* SDmaLastDesc */
- descqp[0] |= __constant_cpu_to_le64(1ULL << 11);
+ descqp[0] |= cpu_to_le64(1ULL << 11);
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) {
/* SDmaIntReq */
- descqp[0] |= __constant_cpu_to_le64(1ULL << 15);
+ descqp[0] |= cpu_to_le64(1ULL << 15);
}
/* Commit writes to memory and advance the tail on the chip */
@@ -798,7 +799,18 @@ retry:
list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
vl15_watchdog_enq(dd);
-
+ goto unlock;
+
+unmap:
+ while (tail != dd->ipath_sdma_descq_tail) {
+ if (!tail)
+ tail = dd->ipath_sdma_descq_cnt - 1;
+ else
+ tail--;
+ unmap_desc(dd, tail);
+ }
+ioerr:
+ ret = -EIO;
unlock:
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
fail:
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index e3d80ca84c1..26271984b71 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -32,6 +32,7 @@
*/
#include <linux/err.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "ipath_verbs.h"
@@ -106,6 +107,11 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
u32 sz;
struct ib_srq *ret;
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+ ret = ERR_PTR(-ENOSYS);
+ goto done;
+ }
+
if (srq_init_attr->attr.max_wr == 0) {
ret = ERR_PTR(-EINVAL);
goto done;
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index c8e3d65f0de..f63e143e329 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -112,6 +112,14 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
dd->ipath_lastrpkts = val;
}
val64 = dd->ipath_rpkts;
+ } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
+ if (dd->ibdeltainprog)
+ val64 -= val64 - dd->ibsymsnap;
+ val64 -= dd->ibsymdelta;
+ } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
+ if (dd->ibdeltainprog)
+ val64 -= val64 - dd->iblnkerrsnap;
+ val64 -= dd->iblnkerrdelta;
} else
val64 = (u64) val;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index a6c8efbdc0c..75558f33f1c 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -32,6 +32,7 @@
*/
#include <linux/ctype.h>
+#include <linux/stat.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
@@ -557,6 +558,7 @@ static ssize_t store_reset(struct device *dev,
dev_info(dev,"Unit %d is disabled, can't reset\n",
dd->ipath_unit);
ret = -EINVAL;
+ goto bail;
}
ret = ipath_reset_device(dd->ipath_unit);
bail:
@@ -1069,7 +1071,7 @@ static ssize_t show_tempsense(struct device *dev,
return ret;
}
-struct attribute_group *ipath_driver_attr_groups[] = {
+const struct attribute_group *ipath_driver_attr_groups[] = {
&driver_attr_group,
NULL,
};
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 82cc588b8bf..22e60998f1a 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -419,7 +419,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
- __constant_cpu_to_be32(1 << 23)) != 0);
+ cpu_to_be32(1 << 23)) != 0);
break;
case OP(RDMA_WRITE_FIRST):
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 36aa242c487..e8a2a915251 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/sched.h>
#include <rdma/ib_smi.h>
#include "ipath_verbs.h"
@@ -70,8 +71,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
goto done;
}
- rsge.sg_list = NULL;
-
/*
* Check that the qkey matches (except for QP0, see 9.6.1.4.1).
* Qkeys with the high order bit set mean use the
@@ -87,7 +86,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
}
/*
- * A GRH is expected to preceed the data even if not
+ * A GRH is expected to precede the data even if not
* present on the wire.
*/
length = swqe->length;
@@ -115,21 +114,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
rq = &qp->r_rq;
}
- if (rq->max_sge > 1) {
- /*
- * XXX We could use GFP_KERNEL if ipath_do_send()
- * was always called from the tasklet instead of
- * from ipath_post_send().
- */
- rsge.sg_list = kmalloc((rq->max_sge - 1) *
- sizeof(struct ipath_sge),
- GFP_ATOMIC);
- if (!rsge.sg_list) {
- dev->n_pkt_drops++;
- goto drop;
- }
- }
-
/*
* Get the next work request entry to find where to put the data.
* Note that it is safe to drop the lock after changing rq->tail
@@ -147,6 +131,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
goto drop;
}
wqe = get_rwqe_ptr(rq, tail);
+ rsge.sg_list = qp->r_ud_sg_list;
if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
spin_unlock_irqrestore(&rq->lock, flags);
dev->n_pkt_drops++;
@@ -242,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
swqe->wr.send_flags & IB_SEND_SOLICITED);
drop:
- kfree(rsge.sg_list);
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
done:;
@@ -267,6 +251,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
u16 lrh0;
u16 lid;
int ret = 0;
+ int next_cur;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -290,8 +275,9 @@ int ipath_make_ud_req(struct ipath_qp *qp)
goto bail;
wqe = get_swqe_ptr(qp, qp->s_cur);
- if (++qp->s_cur >= qp->s_size)
- qp->s_cur = 0;
+ next_cur = qp->s_cur + 1;
+ if (next_cur >= qp->s_size)
+ next_cur = 0;
/* Construct the header. */
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
@@ -315,6 +301,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
qp->s_flags |= IPATH_S_WAIT_DMA;
goto bail;
}
+ qp->s_cur = next_cur;
spin_unlock_irqrestore(&qp->s_lock, flags);
ipath_ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, flags);
@@ -323,6 +310,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
}
}
+ qp->s_cur = next_cur;
extra_bytes = -wqe->length & 3;
nwords = (wqe->length + extra_bytes) >> 2;
@@ -383,7 +371,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
*/
ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
ah_attr->dlid != IPATH_PERMISSIVE_LID ?
- __constant_cpu_to_be32(IPATH_MULTICAST_QPN) :
+ cpu_to_be32(IPATH_MULTICAST_QPN) :
cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
/*
@@ -527,7 +515,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
}
/*
- * A GRH is expected to preceed the data even if not
+ * A GRH is expected to precede the data even if not
* present on the wire.
*/
wc.byte_len = tlen + sizeof(struct ib_grh);
@@ -586,7 +574,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
/* Signal completion event if the solicited bit is set. */
ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
(ohdr->bth[0] &
- __constant_cpu_to_be32(1 << 23)) != 0);
+ cpu_to_be32(1 << 23)) != 0);
bail:;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index 0190edc8044..dc66c450691 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -33,6 +33,8 @@
#include <linux/mm.h>
#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
#include "ipath_kernel.h"
@@ -51,15 +53,14 @@ static void __ipath_release_user_pages(struct page **p, size_t num_pages,
}
/* call with current->mm->mmap_sem held */
-static int __get_user_pages(unsigned long start_page, size_t num_pages,
- struct page **p, struct vm_area_struct **vma)
+static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
{
unsigned long lock_limit;
size_t got;
int ret;
- lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >>
- PAGE_SHIFT;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (num_pages > lock_limit) {
ret = -ENOMEM;
@@ -78,7 +79,7 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages,
goto bail_release;
}
- current->mm->locked_vm += num_pages;
+ current->mm->pinned_vm += num_pages;
ret = 0;
goto bail;
@@ -164,7 +165,7 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
- ret = __get_user_pages(start_page, num_pages, p, NULL);
+ ret = __ipath_get_user_pages(start_page, num_pages, p, NULL);
up_write(&current->mm->mmap_sem);
@@ -177,7 +178,7 @@ void ipath_release_user_pages(struct page **p, size_t num_pages)
__ipath_release_user_pages(p, num_pages, 1);
- current->mm->locked_vm -= num_pages;
+ current->mm->pinned_vm -= num_pages;
up_write(&current->mm->mmap_sem);
}
@@ -194,7 +195,7 @@ static void user_pages_account(struct work_struct *_work)
container_of(_work, struct ipath_user_pages_work, work);
down_write(&work->mm->mmap_sem);
- work->mm->locked_vm -= work->num_pages;
+ work->mm->pinned_vm -= work->num_pages;
up_write(&work->mm->mmap_sem);
mmput(work->mm);
kfree(work);
@@ -209,20 +210,20 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages)
mm = get_task_mm(current);
if (!mm)
- goto bail;
+ return;
work = kmalloc(sizeof(*work), GFP_KERNEL);
if (!work)
goto bail_mm;
- goto bail;
-
INIT_WORK(&work->work, user_pages_account);
work->mm = mm;
work->num_pages = num_pages;
+ queue_work(ib_wq, &work->work);
+ return;
+
bail_mm:
mmput(mm);
-bail:
return;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index 86e016916cd..cc04b7ba348 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -33,6 +33,7 @@
#include <linux/types.h>
#include <linux/device.h>
#include <linux/dmapool.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/highmem.h>
@@ -206,7 +207,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd,
dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
DMA_TO_DEVICE);
- if (dma_mapping_error(dma_addr)) {
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto free_unmap;
}
@@ -235,7 +236,7 @@ static int ipath_user_sdma_num_pages(const struct iovec *iov)
return 1 + ((epage - spage) >> PAGE_SHIFT);
}
-/* truncate length to page boundry */
+/* truncate length to page boundary */
static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len)
{
const unsigned long offset = addr & ~PAGE_MASK;
@@ -279,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
int j;
int ret;
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
-
+ ret = get_user_pages_fast(addr, npages, 0, pages);
if (ret != npages) {
int i;
@@ -301,7 +300,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
pages[j], 0, flen, DMA_TO_DEVICE);
unsigned long fofs = addr & ~PAGE_MASK;
- if (dma_mapping_error(dma_addr)) {
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto done;
}
@@ -508,7 +507,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd,
if (page) {
dma_addr = dma_map_page(&dd->pcidev->dev,
page, 0, len, DMA_TO_DEVICE);
- if (dma_mapping_error(dma_addr)) {
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
ret = -ENOMEM;
goto free_pbc;
}
@@ -667,13 +666,13 @@ static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd,
static inline __le64 ipath_sdma_make_first_desc0(__le64 descq)
{
- return descq | __constant_cpu_to_le64(1ULL << 12);
+ return descq | cpu_to_le64(1ULL << 12);
}
static inline __le64 ipath_sdma_make_last_desc0(__le64 descq)
{
/* last */ /* dma head */
- return descq | __constant_cpu_to_le64(1ULL << 11 | 1ULL << 13);
+ return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
}
static inline __le64 ipath_sdma_make_desc1(u64 addr)
@@ -763,7 +762,7 @@ static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd,
if (ofs >= IPATH_SMALLBUF_DWORDS) {
for (i = 0; i < pkt->naddr; i++) {
dd->ipath_sdma_descq[dtail].qw[0] |=
- __constant_cpu_to_le64(1ULL << 14);
+ cpu_to_le64(1ULL << 14);
if (++dtail == dd->ipath_sdma_descq_cnt)
dtail = 0;
}
@@ -810,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
while (dim) {
const int mxp = 8;
- down_write(&current->mm->mmap_sem);
ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
- up_write(&current->mm->mmap_sem);
-
if (ret <= 0)
goto done_unlock;
else {
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 55c71882882..44ea9390417 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -34,6 +34,8 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_user_verbs.h>
#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/utsname.h>
#include <linux/rculist.h>
@@ -340,9 +342,16 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
int acc;
int ret;
unsigned long flags;
+ struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->ibqp.qp_type != IB_QPT_SMI &&
+ !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ ret = -ENETDOWN;
+ goto bail;
+ }
+
/* Check that state is OK to post send. */
if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
goto bail_inval;
@@ -611,7 +620,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
goto bail;
}
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
dev->opstats[opcode].n_bytes += tlen;
dev->opstats[opcode].n_packets++;
@@ -1021,7 +1030,7 @@ static void sdma_complete(void *cookie, int status)
struct ipath_verbs_txreq *tx = cookie;
struct ipath_qp *qp = tx->qp;
struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
- unsigned int flags;
+ unsigned long flags;
enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
@@ -1051,7 +1060,7 @@ static void sdma_complete(void *cookie, int status)
static void decrement_dma_busy(struct ipath_qp *qp)
{
- unsigned int flags;
+ unsigned long flags;
if (atomic_dec_and_test(&qp->s_dma_busy)) {
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1221,7 +1230,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
unsigned flush_wc;
u32 control;
int ret;
- unsigned int flags;
+ unsigned long flags;
piobuf = ipath_getpiobuf(dd, plen, NULL);
if (unlikely(piobuf == NULL)) {
@@ -1578,7 +1587,7 @@ static int ipath_query_port(struct ib_device *ibdev,
u64 ibcstat;
memset(props, 0, sizeof(*props));
- props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
+ props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
props->lmc = dd->ipath_lmc;
props->sm_lid = dev->sm_lid;
props->sm_sl = dev->sm_sl;
@@ -1845,7 +1854,7 @@ unsigned ipath_get_npkeys(struct ipath_devdata *dd)
}
/**
- * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * ipath_get_pkey - return the indexed PKEY from the port PKEY table
* @dd: the infinipath device
* @index: the PKEY index
*/
@@ -1853,6 +1862,7 @@ unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
{
unsigned ret;
+ /* always a kernel port, no locking needed */
if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
ret = 0;
else
@@ -2173,11 +2183,12 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
snprintf(dev->node_desc, sizeof(dev->node_desc),
IPATH_IDSTR " %s", init_utsname()->nodename);
- ret = ib_register_device(dev);
+ ret = ib_register_device(dev, NULL);
if (ret)
goto err_reg;
- if (ipath_verbs_register_sysfs(dev))
+ ret = ipath_verbs_register_sysfs(dev);
+ if (ret)
goto err_class;
enable_timer(dd);
@@ -2317,15 +2328,15 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
int i;
int ret;
- for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
- if (device_create_file(&dev->dev,
- ipath_class_attributes[i])) {
- ret = 1;
+ for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
+ ret = device_create_file(&dev->dev,
+ ipath_class_attributes[i]);
+ if (ret)
goto bail;
- }
-
- ret = 0;
-
+ }
+ return 0;
bail:
+ for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
+ device_remove_file(&dev->dev, ipath_class_attributes[i]);
return ret;
}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 9d12ae8a778..ae6cff4abff 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -86,11 +86,11 @@
#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
/* Mandatory IB performance counter select values. */
-#define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001)
-#define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002)
-#define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003)
-#define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004)
-#define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005)
+#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003)
+#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004)
+#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005)
struct ib_reth {
__be64 vaddr;
@@ -431,6 +431,7 @@ struct ipath_qp {
u32 s_lsn; /* limit sequence number (credit) */
struct ipath_swqe *s_wq; /* send work queue */
struct ipath_swqe *s_wqe;
+ struct ipath_sge *r_ud_sg_list;
struct ipath_rq r_rq; /* receive work queue */
struct ipath_sge r_sg_list[0]; /* verified SGEs */
};
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index d73e3223287..6216ea92385 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -32,6 +32,8 @@
*/
#include <linux/rculist.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
#include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index 4175a4bd0c7..fc01deac1d3 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,5 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
+ depends on NETDEVICES && ETHERNET && PCI && INET
+ select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
index 70f09c7826d..f4213b3a8fe 100644
--- a/drivers/infiniband/hw/mlx4/Makefile
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
-mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
+mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index c75ac9463e2..2d8c3397774 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -30,64 +30,141 @@
* SOFTWARE.
*/
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include <linux/slab.h>
+#include <linux/inet.h>
+#include <linux/string.h>
+
#include "mlx4_ib.h"
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct mlx4_ib_ah *ah)
{
struct mlx4_dev *dev = to_mdev(pd->device)->dev;
- struct mlx4_ib_ah *ah;
- ah = kmalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
-
- memset(&ah->av, 0, sizeof ah->av);
-
- ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ah->av.g_slid = ah_attr->src_path_bits;
- ah->av.dlid = cpu_to_be16(ah_attr->dlid);
- if (ah_attr->static_rate) {
- ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
- while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
- !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
- --ah->av.stat_rate;
- }
- ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+ ah->av.ib.g_slid = ah_attr->src_path_bits;
if (ah_attr->ah_flags & IB_AH_GRH) {
- ah->av.g_slid |= 0x80;
- ah->av.gid_index = ah_attr->grh.sgid_index;
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.sl_tclass_flowlabel |=
+ ah->av.ib.g_slid |= 0x80;
+ ah->av.ib.gid_index = ah_attr->grh.sgid_index;
+ ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.ib.sl_tclass_flowlabel |=
cpu_to_be32((ah_attr->grh.traffic_class << 20) |
ah_attr->grh.flow_label);
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+ memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
}
+ ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
+ if (ah_attr->static_rate) {
+ ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
+ --ah->av.ib.stat_rate;
+ }
+ ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
+
return &ah->ibah;
}
+static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct mlx4_ib_ah *ah)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+ struct mlx4_dev *dev = ibdev->dev;
+ int is_mcast = 0;
+ struct in6_addr in6;
+ u16 vlan_tag;
+
+ memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6)) {
+ is_mcast = 1;
+ rdma_get_mcast_mac(&in6, ah->av.eth.mac);
+ } else {
+ memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
+ }
+ vlan_tag = ah_attr->vlan_id;
+ if (vlan_tag < 0x1000)
+ vlan_tag |= (ah_attr->sl & 7) << 13;
+ ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
+ ah->av.eth.gid_index = ah_attr->grh.sgid_index;
+ ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+ if (ah_attr->static_rate) {
+ ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
+ --ah->av.eth.stat_rate;
+ }
+
+ /*
+ * HW requires multicast LID so we just choose one.
+ */
+ if (is_mcast)
+ ah->av.ib.dlid = cpu_to_be16(0xc000);
+
+ memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
+ ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
+
+ return &ah->ibah;
+}
+
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ struct mlx4_ib_ah *ah;
+ struct ib_ah *ret;
+
+ ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
+ if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+ ret = ERR_PTR(-EINVAL);
+ } else {
+ /*
+ * TBD: need to handle the case when we get
+ * called in an atomic context and there we
+ * might sleep. We don't expect this
+ * currently since we're working with link
+ * local addresses which we can translate
+ * without going to sleep.
+ */
+ ret = create_iboe_ah(pd, ah_attr, ah);
+ }
+
+ if (IS_ERR(ret))
+ kfree(ah);
+
+ return ret;
+ } else
+ return create_ib_ah(pd, ah_attr, ah); /* never fails */
+}
+
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
+ enum rdma_link_layer ll;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(ah->av.dlid);
- ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
- ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
- if (ah->av.stat_rate)
- ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
- ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
+ ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+ ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
+ ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
+ ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
+ if (ah->av.ib.stat_rate)
+ ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
+ ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
if (mlx4_ib_ah_grh_present(ah)) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.traffic_class =
- be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
ah_attr->grh.flow_label =
- be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.sgid_index = ah->av.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
+ ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
+ ah_attr->grh.sgid_index = ah->av.ib.gid_index;
+ memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
}
return 0;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
new file mode 100644
index 00000000000..0eb141c4141
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+ /***********************************************************/
+/*This file support the handling of the Alias GUID feature. */
+/***********************************************************/
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_pack.h>
+#include <linux/mlx4/cmd.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/delay.h>
+#include "mlx4_ib.h"
+
+/*
+The driver keeps the current state of all guids, as they are in the HW.
+Whenever we receive an smp mad GUIDInfo record, the data will be cached.
+*/
+
+struct mlx4_alias_guid_work_context {
+ u8 port;
+ struct mlx4_ib_dev *dev ;
+ struct ib_sa_query *sa_query;
+ struct completion done;
+ int query_id;
+ struct list_head list;
+ int block_num;
+};
+
+struct mlx4_next_alias_guid_work {
+ u8 port;
+ u8 block_num;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det;
+};
+
+
+void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
+ u8 port_num, u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ int port_index = port_num - 1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* The location of the specific index starts from bit number 4
+ * until bit num 11 */
+ if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_slaves) {
+ pr_debug("The last slave: %d\n", slave_id);
+ return;
+ }
+
+ /* cache the guid: */
+ memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
+ &p_data[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ } else
+ pr_debug("Guid number: %d in block: %d"
+ " was not updated\n", i, block_num);
+ }
+}
+
+static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
+{
+ if (index >= NUM_ALIAS_GUID_PER_PORT) {
+ pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
+ return (__force __be64) -1;
+ }
+ return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
+}
+
+
+ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
+{
+ return IB_SA_COMP_MASK(4 + index);
+}
+
+/*
+ * Whenever new GUID is set/unset (guid table change) create event and
+ * notify the relevant slave (master also should be notified).
+ * If the GUID value is not as we have in the cache the slave will not be
+ * updated; in this case it waits for the smp_snoop or the port management
+ * event to call the function and to update the slave.
+ * block_number - the index of the block (16 blocks available)
+ * port_number - 1 or 2
+ */
+void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num, u8 port_num,
+ u8 *p_data)
+{
+ int i;
+ u64 guid_indexes;
+ int slave_id;
+ enum slave_port_state new_state;
+ enum slave_port_state prev_state;
+ __be64 tmp_cur_ag, form_cache_ag;
+ enum slave_port_gen_event gen_event;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+
+ guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
+ ports_guid[port_num - 1].
+ all_rec_per_port[block_num].guid_indexes);
+ pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+
+ /*calculate the slaves and notify them*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ /* the location of the specific index runs from bits 4..11 */
+ if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
+ continue;
+
+ slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
+ if (slave_id >= dev->dev->num_vfs + 1)
+ return;
+ tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
+ form_cache_ag = get_cached_alias_guid(dev, port_num,
+ (NUM_ALIAS_GUID_IN_REC * block_num) + i);
+ /*
+ * Check if guid is not the same as in the cache,
+ * If it is different, wait for the snoop_smp or the port mgmt
+ * change event to update the slave on its port state change
+ */
+ if (tmp_cur_ag != form_cache_ag)
+ continue;
+ mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
+
+ /*2 cases: Valid GUID, and Invalid Guid*/
+
+ if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
+ prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
+ new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
+ &gen_event);
+ pr_debug("slave: %d, port: %d prev_port_state: %d,"
+ " new_port_state: %d, gen_event: %d\n",
+ slave_id, port_num, prev_state, new_state, gen_event);
+ if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
+ pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
+ port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
+ }
+ } else { /* request to invalidate GUID */
+ set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
+ MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
+ &gen_event);
+ pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+ slave_id, port_num);
+ mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
+ MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+ }
+ }
+}
+
+static void aliasguid_query_handler(int status,
+ struct ib_sa_guidinfo_rec *guid_rec,
+ void *context)
+{
+ struct mlx4_ib_dev *dev;
+ struct mlx4_alias_guid_work_context *cb_ctx = context;
+ u8 port_index ;
+ int i;
+ struct mlx4_sriov_alias_guid_info_rec_det *rec;
+ unsigned long flags, flags1;
+
+ if (!context)
+ return;
+
+ dev = cb_ctx->dev;
+ port_index = cb_ctx->port - 1;
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[cb_ctx->block_num];
+
+ if (status) {
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ pr_debug("(port: %d) failed: status = %d\n",
+ cb_ctx->port, status);
+ goto out;
+ }
+
+ if (guid_rec->block_num != cb_ctx->block_num) {
+ pr_err("block num mismatch: %d != %d\n",
+ cb_ctx->block_num, guid_rec->block_num);
+ goto out;
+ }
+
+ pr_debug("lid/port: %d/%d, block_num: %d\n",
+ be16_to_cpu(guid_rec->lid), cb_ctx->port,
+ guid_rec->block_num);
+
+ rec = &dev->sriov.alias_guid.ports_guid[port_index].
+ all_rec_per_port[guid_rec->block_num];
+
+ rec->status = MLX4_GUID_INFO_STATUS_SET;
+ rec->method = MLX4_GUID_INFO_RECORD_SET;
+
+ for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ __be64 tmp_cur_ag;
+ tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+ /* check if the SM didn't assign one of the records.
+ * if it didn't, if it was not sysadmin request:
+ * ask the SM to give a new GUID, (instead of the driver request).
+ */
+ if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
+ mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
+ "block_num: %d was declined by SM, "
+ "ownership by %d (0 = driver, 1=sysAdmin,"
+ " 2=None)\n", __func__, i,
+ guid_rec->block_num, rec->ownership);
+ if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
+ /* if it is driver assign, asks for new GUID from SM*/
+ *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+ MLX4_NOT_SET_GUID;
+
+ /* Mark the record as not assigned, and let it
+ * be sent again in the next work sched.*/
+ rec->status = MLX4_GUID_INFO_STATUS_IDLE;
+ rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ } else {
+ /* properly assigned record. */
+ /* We save the GUID we just got from the SM in the
+ * admin_guid in order to be persistent, and in the
+ * request from the sm the process will ask for the same GUID */
+ if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
+ tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
+ /* the sysadmin assignment failed.*/
+ mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+ " admin guid after SysAdmin "
+ "configuration. "
+ "Record num %d in block_num:%d "
+ "was declined by SM, "
+ "new val(0x%llx) was kept\n",
+ __func__, i,
+ guid_rec->block_num,
+ be64_to_cpu(*(__be64 *) &
+ rec->all_recs[i * GUID_REC_SIZE]));
+ } else {
+ memcpy(&rec->all_recs[i * GUID_REC_SIZE],
+ &guid_rec->guid_info_list[i * GUID_REC_SIZE],
+ GUID_REC_SIZE);
+ }
+ }
+ }
+ /*
+ The func is call here to close the cases when the
+ sm doesn't send smp, so in the sa response the driver
+ notifies the slave.
+ */
+ mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
+ cb_ctx->port,
+ guid_rec->guid_info_list);
+out:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down)
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
+ &dev->sriov.alias_guid.ports_guid[port_index].
+ alias_guid_work, 0);
+ if (cb_ctx->sa_query) {
+ list_del(&cb_ctx->list);
+ kfree(cb_ctx);
+ } else
+ complete(&cb_ctx->done);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
+{
+ int i;
+ u64 cur_admin_val;
+ ib_sa_comp_mask comp_mask = 0;
+
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
+ = MLX4_GUID_INFO_STATUS_IDLE;
+ dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
+ = MLX4_GUID_INFO_RECORD_SET;
+
+ /* calculate the comp_mask for that record.*/
+ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+ cur_admin_val =
+ *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
+ /*
+ check the admin value: if it's for delete (~00LL) or
+ it is the first guid of the first record (hw guid) or
+ the records is not in ownership of the sysadmin and the sm doesn't
+ need to assign GUIDs, then don't put it up for assignment.
+ */
+ if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
+ (!index && !i) ||
+ MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
+ ports_guid[port - 1].all_rec_per_port[index].ownership)
+ continue;
+ comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+ }
+ dev->sriov.alias_guid.ports_guid[port - 1].
+ all_rec_per_port[index].guid_indexes = comp_mask;
+}
+
+static int set_guid_rec(struct ib_device *ibdev,
+ u8 port, int index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ int err;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_guidinfo_rec guid_info_rec;
+ ib_sa_comp_mask comp_mask;
+ struct ib_port_attr attr;
+ struct mlx4_alias_guid_work_context *callback_context;
+ unsigned long resched_delay, flags, flags1;
+ struct list_head *head =
+ &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+
+ err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
+ if (err) {
+ pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
+ err, port);
+ return err;
+ }
+ /*check the port was configured by the sm, otherwise no need to send */
+ if (attr.state != IB_PORT_ACTIVE) {
+ pr_debug("port %d not active...rescheduling\n", port);
+ resched_delay = 5 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+
+ callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
+ if (!callback_context) {
+ err = -ENOMEM;
+ resched_delay = HZ * 5;
+ goto new_schedule;
+ }
+ callback_context->port = port;
+ callback_context->dev = dev;
+ callback_context->block_num = index;
+
+ memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
+
+ guid_info_rec.lid = cpu_to_be16(attr.lid);
+ guid_info_rec.block_num = index;
+
+ memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
+ GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
+ comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
+ rec_det->guid_indexes;
+
+ init_completion(&callback_context->done);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_add_tail(&callback_context->list, head);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+ callback_context->query_id =
+ ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
+ ibdev, port, &guid_info_rec,
+ comp_mask, rec_det->method, 1000,
+ GFP_KERNEL, aliasguid_query_handler,
+ callback_context,
+ &callback_context->sa_query);
+ if (callback_context->query_id < 0) {
+ pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
+ "%d. will reschedule to the next 1 sec.\n",
+ callback_context->query_id);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ list_del(&callback_context->list);
+ kfree(callback_context);
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ resched_delay = 1 * HZ;
+ err = -EAGAIN;
+ goto new_schedule;
+ }
+ err = 0;
+ goto out;
+
+new_schedule:
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ invalidate_guid_record(dev, port, index);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ resched_delay);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+
+out:
+ return err;
+}
+
+void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
+{
+ int i;
+ unsigned long flags, flags1;
+
+ pr_debug("port %d\n", port);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
+ invalidate_guid_record(dev, port, i);
+
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
+ /*
+ make sure no work waits in the queue, if the work is already
+ queued(not on the timer) the cancel will fail. That is not a problem
+ because we just want the work started.
+ */
+ cancel_delayed_work(&dev->sriov.alias_guid.
+ ports_guid[port - 1].alias_guid_work);
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
+ &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
+ 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+ struct mlx4_next_alias_guid_work *rec)
+{
+ int j;
+ unsigned long flags;
+
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+ if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
+ MLX4_GUID_INFO_STATUS_IDLE) {
+ memcpy(&rec->rec_det,
+ &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
+ sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
+ rec->port = port;
+ rec->block_num = j;
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
+ MLX4_GUID_INFO_STATUS_PENDING;
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+ }
+ return -ENOENT;
+}
+
+static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
+ int rec_index,
+ struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+{
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
+ rec_det->guid_indexes;
+ memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
+ rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
+ rec_det->status;
+}
+
+static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+{
+ int j;
+ struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
+
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
+ memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
+ rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
+ IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
+ IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
+ IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
+ IB_SA_GUIDINFO_REC_GID7;
+ rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
+ set_administratively_guid_record(dev, port, j, &rec_det);
+ }
+}
+
+static void alias_guid_work(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ int ret = 0;
+ struct mlx4_next_alias_guid_work *rec;
+ struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
+ container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
+ alias_guid_work);
+ struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
+ struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
+ struct mlx4_ib_sriov,
+ alias_guid);
+ struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
+
+ rec = kzalloc(sizeof *rec, GFP_KERNEL);
+ if (!rec) {
+ pr_err("alias_guid_work: No Memory\n");
+ return;
+ }
+
+ pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
+ ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
+ if (ret) {
+ pr_debug("No more records to update.\n");
+ goto out;
+ }
+
+ set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
+ &rec->rec_det);
+
+out:
+ kfree(rec);
+}
+
+
+void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
+{
+ unsigned long flags, flags1;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ if (!dev->sriov.is_going_down) {
+ queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
+ &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
+ }
+ spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ int i;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct mlx4_alias_guid_work_context *cb_ctx;
+ struct mlx4_sriov_alias_guid_port_rec_det *det;
+ struct ib_sa_query *sa_query;
+ unsigned long flags;
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
+ det = &sriov->alias_guid.ports_guid[i];
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ while (!list_empty(&det->cb_list)) {
+ cb_ctx = list_entry(det->cb_list.next,
+ struct mlx4_alias_guid_work_context,
+ list);
+ sa_query = cb_ctx->sa_query;
+ cb_ctx->sa_query = NULL;
+ list_del(&cb_ctx->list);
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ ib_sa_cancel_query(cb_ctx->query_id, sa_query);
+ wait_for_completion(&cb_ctx->done);
+ kfree(cb_ctx);
+ spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
+ }
+ for (i = 0 ; i < dev->num_ports; i++) {
+ flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ }
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+}
+
+int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
+{
+ char alias_wq_name[15];
+ int ret = 0;
+ int i, j, k;
+ union ib_gid gid;
+
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+ dev->sriov.alias_guid.sa_client =
+ kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
+ if (!dev->sriov.alias_guid.sa_client)
+ return -ENOMEM;
+
+ ib_sa_register_client(dev->sriov.alias_guid.sa_client);
+
+ spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
+
+ for (i = 1; i <= dev->num_ports; ++i) {
+ if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
+ ret = -EFAULT;
+ goto err_unregister;
+ }
+ }
+
+ for (i = 0 ; i < dev->num_ports; i++) {
+ memset(&dev->sriov.alias_guid.ports_guid[i], 0,
+ sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
+ /*Check if the SM doesn't need to assign the GUIDs*/
+ for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+ if (mlx4_ib_sm_guid_assign) {
+ dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].
+ ownership = MLX4_GUID_DRIVER_ASSIGN;
+ continue;
+ }
+ dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
+ ownership = MLX4_GUID_NONE_ASSIGN;
+ /*mark each val as it was deleted,
+ till the sysAdmin will give it valid val*/
+ for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+ *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
+ all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
+ cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+ }
+ }
+ INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
+ /*prepare the records, set them to be allocated by sm*/
+ for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
+ invalidate_guid_record(dev, i + 1, j);
+
+ dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
+ dev->sriov.alias_guid.ports_guid[i].port = i;
+ if (mlx4_ib_sm_guid_assign)
+ set_all_slaves_guids(dev, i);
+
+ snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
+ dev->sriov.alias_guid.ports_guid[i].wq =
+ create_singlethread_workqueue(alias_wq_name);
+ if (!dev->sriov.alias_guid.ports_guid[i].wq) {
+ ret = -ENOMEM;
+ goto err_thread;
+ }
+ INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
+ alias_guid_work);
+ }
+ return 0;
+
+err_thread:
+ for (--i; i >= 0; i--) {
+ destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ dev->sriov.alias_guid.ports_guid[i].wq = NULL;
+ }
+
+err_unregister:
+ ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
+ kfree(dev->sriov.alias_guid.sa_client);
+ dev->sriov.alias_guid.sa_client = NULL;
+ pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
new file mode 100644
index 00000000000..56a593e0ae5
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -0,0 +1,478 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/rbtree.h>
+#include <linux/idr.h>
+#include <rdma/ib_cm.h>
+
+#include "mlx4_ib.h"
+
+#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
+
+struct id_map_entry {
+ struct rb_node node;
+
+ u32 sl_cm_id;
+ u32 pv_cm_id;
+ int slave_id;
+ int scheduled_delete;
+ struct mlx4_ib_dev *dev;
+
+ struct list_head list;
+ struct delayed_work timeout;
+};
+
+struct cm_generic_msg {
+ struct ib_mad_hdr hdr;
+
+ __be32 local_comm_id;
+ __be32 remote_comm_id;
+};
+
+struct cm_sidr_generic_msg {
+ struct ib_mad_hdr hdr;
+ __be32 request_id;
+};
+
+struct cm_req_msg {
+ unsigned char unused[0x60];
+ union ib_gid primary_path_sgid;
+};
+
+
+static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->local_comm_id = cpu_to_be32(cm_id);
+ }
+}
+
+static u32 get_local_comm_id(struct ib_mad *mad)
+{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return -1;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ return be32_to_cpu(msg->local_comm_id);
+ }
+}
+
+static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
+{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->remote_comm_id = cpu_to_be32(cm_id);
+ }
+}
+
+static u32 get_remote_comm_id(struct ib_mad *mad)
+{
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return -1;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ return be32_to_cpu(msg->remote_comm_id);
+ }
+}
+
+static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
+{
+ struct cm_req_msg *msg = (struct cm_req_msg *)mad;
+
+ return msg->primary_path_sgid;
+}
+
+/* Lock should be taken before called */
+static struct id_map_entry *
+id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node *node = sl_id_map->rb_node;
+
+ while (node) {
+ struct id_map_entry *id_map_entry =
+ rb_entry(node, struct id_map_entry, node);
+
+ if (id_map_entry->sl_cm_id > sl_cm_id)
+ node = node->rb_left;
+ else if (id_map_entry->sl_cm_id < sl_cm_id)
+ node = node->rb_right;
+ else if (id_map_entry->slave_id > slave_id)
+ node = node->rb_left;
+ else if (id_map_entry->slave_id < slave_id)
+ node = node->rb_right;
+ else
+ return id_map_entry;
+ }
+ return NULL;
+}
+
+static void id_map_ent_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
+ struct id_map_entry *db_ent, *found_ent;
+ struct mlx4_ib_dev *dev = ent->dev;
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ int pv_id = (int) ent->pv_cm_id;
+
+ spin_lock(&sriov->id_map_lock);
+ db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
+ if (!db_ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_id);
+
+out:
+ list_del(&ent->list);
+ spin_unlock(&sriov->id_map_lock);
+ kfree(ent);
+}
+
+static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ struct id_map_entry *ent, *found_ent;
+
+ spin_lock(&sriov->id_map_lock);
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
+ if (!ent)
+ goto out;
+ found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
+ if (found_ent && found_ent == ent)
+ rb_erase(&found_ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, pv_cm_id);
+out:
+ spin_unlock(&sriov->id_map_lock);
+}
+
+static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
+{
+ struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
+ struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
+ struct id_map_entry *ent;
+ int slave_id = new->slave_id;
+ int sl_cm_id = new->sl_cm_id;
+
+ ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
+ if (ent) {
+ pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
+ sl_cm_id);
+
+ rb_replace_node(&ent->node, &new->node, sl_id_map);
+ return;
+ }
+
+ /* Go to the bottom of the tree */
+ while (*link) {
+ parent = *link;
+ ent = rb_entry(parent, struct id_map_entry, node);
+
+ if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ rb_link_node(&new->node, parent, link);
+ rb_insert_color(&new->node, sl_id_map);
+}
+
+static struct id_map_entry *
+id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
+{
+ int ret;
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
+ if (!ent) {
+ mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ent->sl_cm_id = sl_cm_id;
+ ent->slave_id = slave_id;
+ ent->scheduled_delete = 0;
+ ent->dev = to_mdev(ibdev);
+ INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
+
+ ret = idr_alloc_cyclic(&sriov->pv_id_table, ent, 0, 0, GFP_NOWAIT);
+ if (ret >= 0) {
+ ent->pv_cm_id = (u32)ret;
+ sl_id_map_add(ibdev, ent);
+ list_add_tail(&ent->list, &sriov->cm_list);
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+ idr_preload_end();
+
+ if (ret >= 0)
+ return ent;
+
+ /*error flow*/
+ kfree(ent);
+ mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
+ return ERR_PTR(-ENOMEM);
+}
+
+static struct id_map_entry *
+id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
+{
+ struct id_map_entry *ent;
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+
+ spin_lock(&sriov->id_map_lock);
+ if (*pv_cm_id == -1) {
+ ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
+ if (ent)
+ *pv_cm_id = (int) ent->pv_cm_id;
+ } else
+ ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
+ spin_unlock(&sriov->id_map_lock);
+
+ return ent;
+}
+
+static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
+{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ unsigned long flags;
+
+ spin_lock(&sriov->id_map_lock);
+ spin_lock_irqsave(&sriov->going_down_lock, flags);
+ /*make sure that there is no schedule inside the scheduled work.*/
+ if (!sriov->is_going_down) {
+ id->scheduled_delete = 1;
+ schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ }
+ spin_unlock_irqrestore(&sriov->going_down_lock, flags);
+ spin_unlock(&sriov->id_map_lock);
+}
+
+int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
+ struct ib_mad *mad)
+{
+ struct id_map_entry *id;
+ u32 sl_cm_id;
+ int pv_cm_id = -1;
+
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ sl_cm_id = get_local_comm_id(mad);
+ id = id_map_alloc(ibdev, slave_id, sl_cm_id);
+ if (IS_ERR(id)) {
+ mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
+ __func__, slave_id, sl_cm_id);
+ return PTR_ERR(id);
+ }
+ } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ return 0;
+ } else {
+ sl_cm_id = get_local_comm_id(mad);
+ id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
+ }
+
+ if (!id) {
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
+ slave_id, sl_cm_id);
+ return -EINVAL;
+ }
+
+ set_local_comm_id(mad, id->pv_cm_id);
+
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ schedule_delayed(ibdev, id);
+ else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
+ id_map_find_del(ibdev, pv_cm_id);
+
+ return 0;
+}
+
+int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
+ struct ib_mad *mad)
+{
+ u32 pv_cm_id;
+ struct id_map_entry *id;
+
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ union ib_gid gid;
+
+ if (!slave)
+ return 0;
+
+ gid = gid_from_req_msg(ibdev, mad);
+ *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
+ if (*slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
+ gid.global.interface_id);
+ return -ENOENT;
+ }
+ return 0;
+ }
+
+ pv_cm_id = get_remote_comm_id(mad);
+ id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
+
+ if (!id) {
+ pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+ return -ENOENT;
+ }
+
+ if (slave)
+ *slave = id->slave_id;
+ set_remote_comm_id(mad, id->sl_cm_id);
+
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ schedule_delayed(ibdev, id);
+ else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
+ id_map_find_del(ibdev, (int) pv_cm_id);
+ }
+
+ return 0;
+}
+
+void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
+{
+ spin_lock_init(&dev->sriov.id_map_lock);
+ INIT_LIST_HEAD(&dev->sriov.cm_list);
+ dev->sriov.sl_id_map = RB_ROOT;
+ idr_init(&dev->sriov.pv_id_table);
+}
+
+/* slave = -1 ==> all slaves */
+/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
+void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
+{
+ struct mlx4_ib_sriov *sriov = &dev->sriov;
+ struct rb_root *sl_id_map = &sriov->sl_id_map;
+ struct list_head lh;
+ struct rb_node *nd;
+ int need_flush = 1;
+ struct id_map_entry *map, *tmp_map;
+ /* cancel all delayed work queue entries */
+ INIT_LIST_HEAD(&lh);
+ spin_lock(&sriov->id_map_lock);
+ list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
+ if (slave < 0 || slave == map->slave_id) {
+ if (map->scheduled_delete)
+ need_flush &= !!cancel_delayed_work(&map->timeout);
+ }
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+
+ if (!need_flush)
+ flush_scheduled_work(); /* make sure all timers were flushed */
+
+ /* now, remove all leftover entries from databases*/
+ spin_lock(&sriov->id_map_lock);
+ if (slave < 0) {
+ while (rb_first(sl_id_map)) {
+ struct id_map_entry *ent =
+ rb_entry(rb_first(sl_id_map),
+ struct id_map_entry, node);
+
+ rb_erase(&ent->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
+ }
+ list_splice_init(&dev->sriov.cm_list, &lh);
+ } else {
+ /* first, move nodes belonging to slave to db remove list */
+ nd = rb_first(sl_id_map);
+ while (nd) {
+ struct id_map_entry *ent =
+ rb_entry(nd, struct id_map_entry, node);
+ nd = rb_next(nd);
+ if (ent->slave_id == slave)
+ list_move_tail(&ent->list, &lh);
+ }
+ /* remove those nodes from databases */
+ list_for_each_entry_safe(map, tmp_map, &lh, list) {
+ rb_erase(&map->node, sl_id_map);
+ idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
+ }
+
+ /* add remaining nodes from cm_list */
+ list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
+ if (slave == map->slave_id)
+ list_move_tail(&map->list, &lh);
+ }
+ }
+
+ spin_unlock(&sriov->id_map_lock);
+
+ /* free any map entries left behind due to cancel_delayed_work above */
+ list_for_each_entry_safe(map, tmp_map, &lh, list) {
+ list_del(&map->list);
+ kfree(map);
+ }
+}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 299f20832ab..1066eec854a 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -32,6 +33,8 @@
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
+#include <linux/mlx4/srq.h>
+#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
@@ -48,7 +51,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
struct ib_cq *ibcq;
if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on CQ %06x\n", type, cq->cqn);
return;
}
@@ -64,7 +67,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
- return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+ return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -75,8 +78,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+ struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
- return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+ return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
}
@@ -97,18 +101,19 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
{
int err;
- err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
- PAGE_SIZE * 2, &buf->buf);
+ err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
+ PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
if (err)
goto out;
+ buf->entry_size = dev->dev->caps.cqe_size;
err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
&buf->mtt);
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
if (err)
goto err_mtt;
@@ -118,8 +123,7 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &buf->mtt);
err_buf:
- mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
- &buf->buf);
+ mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
out:
return err;
@@ -127,7 +131,7 @@ out:
static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
{
- mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+ mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -135,8 +139,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
u64 buf_addr, int cqe)
{
int err;
+ int cqe_size = dev->dev->caps.cqe_size;
- *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+ *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@@ -204,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &to_mucontext(context)->uar;
} else {
- err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
if (err)
goto err_cq;
@@ -220,8 +225,11 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &dev->priv_uar;
}
+ if (dev->eq_table)
+ vector = dev->eq_table[vector % ibdev->num_comp_vectors];
+
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
- cq->db.dma, &cq->mcq, 0);
+ cq->db.dma, &cq->mcq, vector, 0, 0);
if (err)
goto err_dbmap;
@@ -316,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
u32 i;
i = cq->mcq.cons_index;
- while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+ while (get_sw_cqe(cq, i))
++i;
return i - cq->mcq.cons_index;
@@ -324,16 +332,25 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
- struct mlx4_cqe *cqe;
+ struct mlx4_cqe *cqe, *new_cqe;
int i;
+ int cqe_size = cq->buf.entry_size;
+ int cqe_inc = cqe_size == 64 ? 1 : 0;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
+ cqe += cqe_inc;
+
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
- memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
- (i + 1) & cq->resize_buf->cqe),
- get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+ new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
+ (i + 1) & cq->resize_buf->cqe);
+ memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
+ new_cqe += cqe_inc;
+
+ new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
+ (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+ cqe += cqe_inc;
}
++cq->mcq.cons_index;
}
@@ -342,12 +359,13 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
struct mlx4_ib_cq *cq = to_mcq(ibcq);
+ struct mlx4_mtt mtt;
int outst_cqe;
int err;
mutex_lock(&cq->resize_mutex);
- if (entries < 1 || entries > dev->dev->caps.max_cqes) {
+ if (entries < 1) {
err = -EINVAL;
goto out;
}
@@ -358,12 +376,17 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
goto out;
}
+ if (entries > dev->dev->caps.max_cqes) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (ibcq->uobject) {
err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
if (err)
goto out;
} else {
- /* Can't be smaller then the number of outstanding CQEs */
+ /* Can't be smaller than the number of outstanding CQEs */
outst_cqe = mlx4_ib_get_outstanding_cqes(cq);
if (entries < outst_cqe + 1) {
err = 0;
@@ -375,10 +398,13 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
goto out;
}
+ mtt = cq->buf.mtt;
+
err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt);
if (err)
goto err_buf;
+ mlx4_mtt_cleanup(dev->dev, &mtt);
if (ibcq->uobject) {
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
@@ -389,10 +415,14 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
cq->resize_buf = NULL;
cq->resize_umem = NULL;
} else {
+ struct mlx4_ib_cq_buf tmp_buf;
+ int tmp_cqe = 0;
+
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
mlx4_ib_cq_resize_copy_cqes(cq);
- mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+ tmp_buf = cq->buf;
+ tmp_cqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
@@ -400,11 +430,15 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
cq->resize_buf = NULL;
}
spin_unlock_irq(&cq->lock);
+
+ if (tmp_cqe)
+ mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe);
}
goto out;
err_buf:
+ mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt);
if (!ibcq->uobject)
mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf,
cq->resize_buf->cqe);
@@ -419,6 +453,7 @@ err_buf:
out:
mutex_unlock(&cq->resize_mutex);
+
return err;
}
@@ -447,7 +482,7 @@ static void dump_cqe(void *cqe)
{
__be32 *buf = cqe;
- printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
@@ -457,7 +492,7 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
struct ib_wc *wc)
{
if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
- printk(KERN_DEBUG "local QP operation err "
+ pr_debug("local QP operation err "
"(QPN %06x, WQE index %x, vendor syndrome %02x, "
"opcode = %02x)\n",
be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
@@ -514,20 +549,48 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
wc->vendor_err = cqe->vendor_err_syndrome;
}
-static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum)
+static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
{
- return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 |
- MLX4_CQE_IPOIB_STATUS_IPV4F |
- MLX4_CQE_IPOIB_STATUS_IPV4OPT |
- MLX4_CQE_IPOIB_STATUS_IPV6 |
- MLX4_CQE_IPOIB_STATUS_IPOK)) ==
- cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 |
- MLX4_CQE_IPOIB_STATUS_IPOK)) &&
- (status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP |
- MLX4_CQE_IPOIB_STATUS_TCP)) &&
+ return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
+ MLX4_CQE_STATUS_IPV4F |
+ MLX4_CQE_STATUS_IPV4OPT |
+ MLX4_CQE_STATUS_IPV6 |
+ MLX4_CQE_STATUS_IPOK)) ==
+ cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
+ MLX4_CQE_STATUS_IPOK)) &&
+ (status & cpu_to_be16(MLX4_CQE_STATUS_UDP |
+ MLX4_CQE_STATUS_TCP)) &&
checksum == cpu_to_be16(0xffff);
}
+static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
+ unsigned tail, struct mlx4_cqe *cqe, int is_eth)
+{
+ struct mlx4_ib_proxy_sqp_hdr *hdr;
+
+ ib_dma_sync_single_for_cpu(qp->ibqp.device,
+ qp->sqp_proxy_rcv[tail].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
+ wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
+ wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
+ wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
+ wc->dlid_path_bits = 0;
+
+ if (is_eth) {
+ wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
+ memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
+ memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
+ wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
+ } else {
+ wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
+ wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
+ }
+
+ return 0;
+}
+
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -536,16 +599,22 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
struct mlx4_ib_srq *srq;
+ struct mlx4_srq *msrq = NULL;
int is_send;
int is_error;
+ int is_eth;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
+ unsigned tail = 0;
repoll:
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
+ if (cq->buf.entry_size == 64)
+ cqe++;
+
++cq->mcq.cons_index;
/*
@@ -560,7 +629,7 @@ repoll:
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
is_send)) {
- printk(KERN_WARNING "Completion for NOP opcode detected!\n");
+ pr_warn("Completion for NOP opcode detected!\n");
return -EINVAL;
}
@@ -581,17 +650,17 @@ repoll:
}
if (!*cur_qp ||
- (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
+ (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
/*
* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
*/
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
- be32_to_cpu(cqe->my_qpn));
+ be32_to_cpu(cqe->vlan_my_qpn));
if (unlikely(!mqp)) {
- printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
- cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
+ pr_warn("CQ %06x with entry for unknown QPN %06x\n",
+ cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
return -EINVAL;
}
@@ -600,6 +669,20 @@ repoll:
wc->qp = &(*cur_qp)->ibqp;
+ if (wc->qp->qp_type == IB_QPT_XRC_TGT) {
+ u32 srq_num;
+ g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
+ srq_num = g_mlpath_rqpn & 0xffffff;
+ /* SRQ is also in the radix tree */
+ msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
+ srq_num);
+ if (unlikely(!msrq)) {
+ pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
+ cq->mcq.cqn, srq_num);
+ return -EINVAL;
+ }
+ }
+
if (is_send) {
wq = &(*cur_qp)->sq;
if (!(*cur_qp)->sq_signal_bits) {
@@ -613,9 +696,15 @@ repoll:
wqe_ctr = be16_to_cpu(cqe->wqe_index);
wc->wr_id = srq->wrid[wqe_ctr];
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
+ } else if (msrq) {
+ srq = to_mibsrq(msrq);
+ wqe_ctr = be16_to_cpu(cqe->wqe_index);
+ wc->wr_id = srq->wrid[wqe_ctr];
+ mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ tail = wq->tail & (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[tail];
++wq->tail;
}
@@ -637,6 +726,7 @@ repoll:
case MLX4_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
case MLX4_OPCODE_SEND:
+ case MLX4_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
break;
case MLX4_OPCODE_RDMA_READ:
@@ -651,12 +741,26 @@ repoll:
wc->opcode = IB_WC_FETCH_ADD;
wc->byte_len = 8;
break;
+ case MLX4_OPCODE_MASKED_ATOMIC_CS:
+ wc->opcode = IB_WC_MASKED_COMP_SWAP;
+ wc->byte_len = 8;
+ break;
+ case MLX4_OPCODE_MASKED_ATOMIC_FA:
+ wc->opcode = IB_WC_MASKED_FETCH_ADD;
+ wc->byte_len = 8;
+ break;
case MLX4_OPCODE_BIND_MW:
wc->opcode = IB_WC_BIND_MW;
break;
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
+ case MLX4_OPCODE_FMR:
+ wc->opcode = IB_WC_FAST_REG_MR;
+ break;
+ case MLX4_OPCODE_LOCAL_INVAL:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
}
} else {
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
@@ -667,6 +771,11 @@ repoll:
wc->wc_flags = IB_WC_WITH_IMM;
wc->ex.imm_data = cqe->immed_rss_invalid;
break;
+ case MLX4_RECV_OPCODE_SEND_INVAL:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid);
+ break;
case MLX4_RECV_OPCODE_SEND:
wc->opcode = IB_WC_RECV;
wc->wc_flags = 0;
@@ -678,15 +787,40 @@ repoll:
break;
}
+ is_eth = (rdma_port_get_link_layer(wc->qp->device,
+ (*cur_qp)->port) ==
+ IB_LINK_LAYER_ETHERNET);
+ if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
+ if ((*cur_qp)->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
+ return use_tunnel_data(*cur_qp, cq, wc, tail,
+ cqe, is_eth);
+ }
+
wc->slid = be16_to_cpu(cqe->rlid);
- wc->sl = cqe->sl >> 4;
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
- wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,
- cqe->checksum);
+ wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
+ cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
+ if (is_eth) {
+ wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
+ if (be32_to_cpu(cqe->vlan_my_qpn) &
+ MLX4_CQE_VLAN_PRESENT_MASK) {
+ wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
+ MLX4_CQE_VID_MASK;
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
+ } else {
+ wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
+ wc->vlan_id = 0xffff;
+ }
}
return 0;
@@ -708,8 +842,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
break;
}
- if (npolled)
- mlx4_cq_set_ci(&cq->mcq);
+ mlx4_cq_set_ci(&cq->mcq);
spin_unlock_irqrestore(&cq->lock, flags);
@@ -736,6 +869,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
int nfreed = 0;
struct mlx4_cqe *cqe, *dest;
u8 owner_bit;
+ int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
/*
* First we need to find the current producer index, so we
@@ -754,12 +888,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
*/
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
- if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
+ cqe += cqe_inc;
+
+ if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed;
} else if (nfreed) {
dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+ dest += cqe_inc;
+
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit |
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 8aee4233b38..c5174098636 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
- struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
@@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
- db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index cdca3a511e1..287ad0564ac 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -32,8 +32,13 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cache.h>
+#include <linux/random.h>
#include <linux/mlx4/cmd.h>
+#include <linux/gfp.h>
+#include <rdma/ib_pma.h>
#include "mlx4_ib.h"
@@ -42,7 +47,62 @@ enum {
MLX4_IB_VENDOR_CLASS2 = 0xa
};
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+#define MLX4_TUN_SEND_WRID_SHIFT 34
+#define MLX4_TUN_QPN_SHIFT 32
+#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
+#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
+
+#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
+#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
+
+ /* Port mgmt change event handling */
+
+#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
+#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
+#define NUM_IDX_IN_PKEY_TBL_BLK 32
+#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
+#define GUID_TBL_BLK_NUM_ENTRIES 8
+#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
+
+struct mlx4_mad_rcv_buf {
+ struct ib_grh grh;
+ u8 payload[256];
+} __packed;
+
+struct mlx4_mad_snd_buf {
+ u8 payload[256];
+} __packed;
+
+struct mlx4_tunnel_mad {
+ struct ib_grh grh;
+ struct mlx4_ib_tunnel_header hdr;
+ struct ib_mad mad;
+} __packed;
+
+struct mlx4_rcv_tunnel_mad {
+ struct mlx4_rcv_tunnel_hdr hdr;
+ struct ib_grh grh;
+ struct ib_mad mad;
+} __packed;
+
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ int block, u32 change_bitmap);
+
+__be64 mlx4_ib_gen_node_guid(void)
+{
+#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
+ return cpu_to_be64(NODE_GUID_HI | prandom_u32());
+}
+
+__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
+{
+ return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
+ cpu_to_be64(0xff00000000000000LL);
+}
+
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad)
{
@@ -69,10 +129,13 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
- if (ignore_mkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
op_modifier |= 0x1;
- if (ignore_bkey || !in_wc)
+ if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
op_modifier |= 0x2;
+ if (mlx4_is_mfunc(dev->dev) &&
+ (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
+ op_modifier |= 0x8;
if (in_wc) {
struct {
@@ -105,9 +168,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
in_modifier |= in_wc->slid << 16;
}
- err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
- in_modifier, op_modifier,
- MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+ err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
+ mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
+ MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+ (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
if (!err)
memcpy(response_mad, outmailbox->buf, 256);
@@ -122,6 +186,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
struct ib_ah_attr ah_attr;
+ unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
@@ -136,48 +201,134 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
if (IS_ERR(new_ah))
return;
- spin_lock(&dev->sm_lock);
+ spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
ib_destroy_ah(dev->sm_ah[port_num - 1]);
dev->sm_ah[port_num - 1] = new_ah;
- spin_unlock(&dev->sm_lock);
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
}
/*
- * Snoop SM MADs for port info and P_Key table sets, so we can
- * synthesize LID change and P_Key change events.
+ * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
+ * synthesize LID change, Client-Rereg, GID change, and P_Key change events.
*/
-static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
+static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
+ u16 prev_lid)
{
- struct ib_event event;
+ struct ib_port_info *pinfo;
+ u16 lid;
+ __be16 *base;
+ u32 bn, pkey_change_bitmap;
+ int i;
+
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
- mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
- struct ib_port_info *pinfo =
- (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+ mad->mad_hdr.method == IB_MGMT_METHOD_SET)
+ switch (mad->mad_hdr.attr_id) {
+ case IB_SMP_ATTR_PORT_INFO:
+ pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
+ lid = be16_to_cpu(pinfo->lid);
- update_sm_ah(to_mdev(ibdev), port_num,
+ update_sm_ah(dev, port_num,
be16_to_cpu(pinfo->sm_lid),
pinfo->neighbormtu_mastersmsl & 0xf);
- event.device = ibdev;
- event.element.port_num = port_num;
-
if (pinfo->clientrereg_resv_subnetto & 0x80)
- event.event = IB_EVENT_CLIENT_REREGISTER;
- else
- event.event = IB_EVENT_LID_CHANGE;
+ handle_client_rereg_event(dev, port_num);
+
+ if (prev_lid != lid)
+ handle_lid_change_event(dev, port_num);
+ break;
+
+ case IB_SMP_ATTR_PKEY_TABLE:
+ if (!mlx4_is_mfunc(dev->dev)) {
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_PKEY_CHANGE);
+ break;
+ }
+
+ /* at this point, we are running in the master.
+ * Slaves do not receive SMPs.
+ */
+ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
+ base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
+ pkey_change_bitmap = 0;
+ for (i = 0; i < 32; i++) {
+ pr_debug("PKEY[%d] = x%x\n",
+ i + bn*32, be16_to_cpu(base[i]));
+ if (be16_to_cpu(base[i]) !=
+ dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
+ pkey_change_bitmap |= (1 << i);
+ dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
+ be16_to_cpu(base[i]);
+ }
+ }
+ pr_debug("PKEY Change event: port=%d, "
+ "block=0x%x, change_bitmap=0x%x\n",
+ port_num, bn, pkey_change_bitmap);
- ib_dispatch_event(&event);
+ if (pkey_change_bitmap) {
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_PKEY_CHANGE);
+ if (!dev->sriov.is_going_down)
+ __propagate_pkey_ev(dev, port_num, bn,
+ pkey_change_bitmap);
+ }
+ break;
+
+ case IB_SMP_ATTR_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port_num,
+ IB_EVENT_GID_CHANGE);
+ /*if master, notify relevant slaves*/
+ if (mlx4_is_master(dev->dev) &&
+ !dev->sriov.is_going_down) {
+ bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
+ mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
+ (u8 *)(&((struct ib_smp *)mad)->data));
+ mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
+ (u8 *)(&((struct ib_smp *)mad)->data));
+ }
+ break;
+
+ default:
+ break;
}
+}
+
+static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ int block, u32 change_bitmap)
+{
+ int i, ix, slave, err;
+ int have_event = 0;
- if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
- event.device = ibdev;
- event.event = IB_EVENT_PKEY_CHANGE;
- event.element.port_num = port_num;
- ib_dispatch_event(&event);
+ for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
+ if (slave == mlx4_master_func_num(dev->dev))
+ continue;
+ if (!mlx4_is_slave_active(dev->dev, slave))
+ continue;
+
+ have_event = 0;
+ for (i = 0; i < 32; i++) {
+ if (!(change_bitmap & (1 << i)))
+ continue;
+ for (ix = 0;
+ ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
+ if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
+ [ix] == i + 32 * block) {
+ err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
+ pr_debug("propagate_pkey_ev: slave %d,"
+ " port %d, ix %d (%d)\n",
+ slave, port_num, ix, err);
+ have_event = 1;
+ break;
+ }
+ }
+ if (have_event)
+ break;
}
}
}
@@ -185,13 +336,15 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
static void node_desc_override(struct ib_device *dev,
struct ib_mad *mad)
{
+ unsigned long flags;
+
if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
- spin_lock(&to_mdev(dev)->sm_lock);
+ spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
- spin_unlock(&to_mdev(dev)->sm_lock);
+ spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
}
}
@@ -201,35 +354,391 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
int ret;
+ unsigned long flags;
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ return;
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
* wrong following the IB spec strictly, but we know
* it's OK for our devices).
*/
- spin_lock(&dev->sm_lock);
+ spin_lock_irqsave(&dev->sm_lock, flags);
memcpy(send_buf->mad, mad, sizeof *mad);
if ((send_buf->ah = dev->sm_ah[port_num - 1]))
ret = ib_post_send_mad(send_buf, NULL);
else
ret = -EINVAL;
- spin_unlock(&dev->sm_lock);
+ spin_unlock_irqrestore(&dev->sm_lock, flags);
if (ret)
ib_free_send_mad(send_buf);
}
}
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *sa_mad)
+{
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int i;
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
+ return i;
+ }
+ return -1;
+}
+
+
+static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
+ u8 port, u16 pkey, u16 *ix)
+{
+ int i, ret;
+ u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
+ u16 slot_pkey;
+
+ if (slave == mlx4_master_func_num(dev->dev))
+ return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
+
+ unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
+
+ for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
+ if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
+ continue;
+
+ pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
+
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
+ if (ret)
+ continue;
+ if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
+ if (slot_pkey & 0x8000) {
+ *ix = (u16) pkey_ix;
+ return 0;
+ } else {
+ /* take first partial pkey index found */
+ if (partial_ix == 0xFF)
+ partial_ix = pkey_ix;
+ }
+ }
+ }
+
+ if (partial_ix < 0xFF) {
+ *ix = (u16) partial_ix;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_mad *mad)
+{
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ struct mlx4_ib_demux_pv_ctx *tun_ctx;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct mlx4_rcv_tunnel_mad *tun_mad;
+ struct ib_ah_attr attr;
+ struct ib_ah *ah;
+ struct ib_qp *src_qp = NULL;
+ unsigned tun_tx_ix = 0;
+ int dqpn;
+ int ret = 0;
+ u16 tun_pkey_ix;
+ u16 cached_pkey;
+ u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
+
+ if (dest_qpt > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_ctx = dev->sriov.demux[port-1].tun[slave];
+
+ /* check if proxy qp created */
+ if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
+ return -EAGAIN;
+
+ if (!dest_qpt)
+ tun_qp = &tun_ctx->qp[0];
+ else
+ tun_qp = &tun_ctx->qp[1];
+
+ /* compute P_Key index to put in tunnel header for slave */
+ if (dest_qpt) {
+ u16 pkey_ix;
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
+ if (ret)
+ return -EINVAL;
+
+ ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
+ if (ret)
+ return -EINVAL;
+ tun_pkey_ix = pkey_ix;
+ } else
+ tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+
+ dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
+
+ /* get tunnel tx data buf for slave */
+ src_qp = tun_qp->qp;
+
+ /* create ah. Just need an empty one with the port num for the post send.
+ * The driver will set the force loopback bit in post_send */
+ memset(&attr, 0, sizeof attr);
+ attr.port_num = port;
+ if (is_eth) {
+ memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ attr.ah_flags = IB_AH_GRH;
+ }
+ ah = ib_create_ah(tun_ctx->pd, &attr);
+ if (IS_ERR(ah))
+ return -ENOMEM;
+
+ /* allocate tunnel tx buf after pass failure returns */
+ spin_lock(&tun_qp->tx_lock);
+ if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
+ (MLX4_NUM_TUNNEL_BUFS - 1))
+ ret = -EAGAIN;
+ else
+ tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ spin_unlock(&tun_qp->tx_lock);
+ if (ret)
+ goto out;
+
+ tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
+ if (tun_qp->tx_ring[tun_tx_ix].ah)
+ ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ tun_qp->tx_ring[tun_tx_ix].ah = ah;
+ ib_dma_sync_single_for_cpu(&dev->ib_dev,
+ tun_qp->tx_ring[tun_tx_ix].buf.map,
+ sizeof (struct mlx4_rcv_tunnel_mad),
+ DMA_TO_DEVICE);
+
+ /* copy over to tunnel buffer */
+ if (grh)
+ memcpy(&tun_mad->grh, grh, sizeof *grh);
+ memcpy(&tun_mad->mad, mad, sizeof *mad);
+
+ /* adjust tunnel data */
+ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
+ tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
+ tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+
+ if (is_eth) {
+ u16 vlan = 0;
+ if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
+ NULL)) {
+ /* VST mode */
+ if (vlan != wc->vlan_id)
+ /* Packet vlan is not the VST-assigned vlan.
+ * Drop the packet.
+ */
+ goto out;
+ else
+ /* Remove the vlan tag before forwarding
+ * the packet to the VF.
+ */
+ vlan = 0xffff;
+ } else {
+ vlan = wc->vlan_id;
+ }
+
+ tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
+ memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
+ memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
+ } else {
+ tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+ tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ }
+
+ ib_dma_sync_single_for_device(&dev->ib_dev,
+ tun_qp->tx_ring[tun_tx_ix].buf.map,
+ sizeof (struct mlx4_rcv_tunnel_mad),
+ DMA_TO_DEVICE);
+
+ list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
+ list.length = sizeof (struct mlx4_rcv_tunnel_mad);
+ list.lkey = tun_ctx->mr->lkey;
+
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.port_num = port;
+ wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+ wr.wr.ud.remote_qpn = dqpn;
+ wr.next = NULL;
+ wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = ib_post_send(src_qp, &wr, &bad_wr);
+out:
+ if (ret)
+ ib_destroy_ah(ah);
+ return ret;
+}
+
+static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
+ struct ib_wc *wc, struct ib_grh *grh,
+ struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ int slave;
+ u8 *slave_id;
+ int is_eth = 0;
+
+ if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ is_eth = 0;
+ else
+ is_eth = 1;
+
+ if (is_eth) {
+ if (!(wc->wc_flags & IB_WC_GRH)) {
+ mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
+ return -EINVAL;
+ }
+ if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
+ mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
+ return -EINVAL;
+ }
+ if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
+ return 0;
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+ }
+
+ /* Initially assume that this mad is for us */
+ slave = mlx4_master_func_num(dev->dev);
+
+ /* See if the slave id is encoded in a response mad */
+ if (mad->mad_hdr.method & 0x80) {
+ slave_id = (u8 *) &mad->mad_hdr.tid;
+ slave = *slave_id;
+ if (slave != 255) /*255 indicates the dom0*/
+ *slave_id = 0; /* remap tid */
+ }
+
+ /* If a grh is present, we demux according to it */
+ if (wc->wc_flags & IB_WC_GRH) {
+ slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
+ if (slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ }
+ /* Class-specific handling */
+ switch (mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ /* 255 indicates the dom0 */
+ if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+ if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+ return -EPERM;
+ /* for a VF. drop unsolicited MADs */
+ if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+ mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+ slave, mad->mad_hdr.mgmt_class,
+ mad->mad_hdr.method);
+ return -EINVAL;
+ }
+ }
+ break;
+ case IB_MGMT_CLASS_SUBN_ADM:
+ if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
+ (struct ib_sa_mad *) mad))
+ return 0;
+ break;
+ case IB_MGMT_CLASS_CM:
+ if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
+ return 0;
+ break;
+ case IB_MGMT_CLASS_DEVICE_MGMT:
+ if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
+ return 0;
+ break;
+ default:
+ /* Drop unsupported classes for slaves in tunnel mode */
+ if (slave != mlx4_master_func_num(dev->dev)) {
+ pr_debug("dropping unsupported ingress mad from class:%d "
+ "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
+ return 0;
+ }
+ }
+ /*make sure that no slave==255 was not handled yet.*/
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+}
+
+static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
- u16 slid;
+ u16 slid, prev_lid = 0;
int err;
+ struct ib_port_attr pattr;
+
+ if (in_wc && in_wc->qp->qp_num) {
+ pr_debug("received MAD: slid:%d sqpn:%d "
+ "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
+ in_wc->slid, in_wc->src_qp,
+ in_wc->dlid_path_bits,
+ in_wc->qp->qp_num,
+ in_wc->wc_flags,
+ in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
+ be16_to_cpu(in_mad->mad_hdr.attr_id));
+ if (in_wc->wc_flags & IB_WC_GRH) {
+ pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
+ be64_to_cpu(in_grh->sgid.global.subnet_prefix),
+ be64_to_cpu(in_grh->sgid.global.interface_id));
+ pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
+ be64_to_cpu(in_grh->dgid.global.subnet_prefix),
+ be64_to_cpu(in_grh->dgid.global.interface_id));
+ }
+ }
slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
@@ -246,12 +755,9 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS;
/*
- * Don't process SMInfo queries or vendor-specific
- * MADs -- the SMA can't handle them.
+ * Don't process SMInfo queries -- the SMA can't handle them.
*/
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
- ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
- IB_SMP_ATTR_VENDOR_MASK))
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
return IB_MAD_RESULT_SUCCESS;
} else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
@@ -263,16 +769,27 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
} else
return IB_MAD_RESULT_SUCCESS;
+ if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
+ !ib_query_port(ibdev, port_num, &pattr))
+ prev_lid = pattr.lid;
+
err = mlx4_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
+ (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
+ (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
+ MLX4_MAD_IFC_NET_VIEW,
port_num, in_wc, in_grh, in_mad, out_mad);
if (err)
return IB_MAD_RESULT_FAILURE;
if (!out_mad->mad_hdr.status) {
- smp_snoop(ibdev, port_num, in_mad);
- node_desc_override(ibdev, out_mad);
+ if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
+ smp_snoop(ibdev, port_num, in_mad, prev_lid);
+ /* slaves get node desc from FW */
+ if (!mlx4_is_slave(to_mdev(ibdev)->dev))
+ node_desc_override(ibdev, out_mad);
}
/* set return bit in status of directed route responses */
@@ -286,9 +803,77 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
+static void edit_counter(struct mlx4_counter *cnt,
+ struct ib_pma_portcounters *pma_cnt)
+{
+ pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2));
+ pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2));
+ pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->tx_frames));
+ pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->rx_frames));
+}
+
+static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int err;
+ u32 inmod = dev->counters[port_num - 1] & 0xffff;
+ u8 mode;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return -EINVAL;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+ if (IS_ERR(mailbox))
+ return IB_MAD_RESULT_FAILURE;
+
+ err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
+ MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ err = IB_MAD_RESULT_FAILURE;
+ else {
+ memset(out_mad->data, 0, sizeof out_mad->data);
+ mode = ((struct mlx4_counter *)mailbox->buf)->counter_mode;
+ switch (mode & 0xf) {
+ case 0:
+ edit_counter(mailbox->buf,
+ (void *)(out_mad->data + 40));
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ break;
+ default:
+ err = IB_MAD_RESULT_FAILURE;
+ }
+ }
+
+ mlx4_free_cmd_mailbox(dev->dev, mailbox);
+
+ return err;
+}
+
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ switch (rdma_port_get_link_layer(ibdev, port_num)) {
+ case IB_LINK_LAYER_INFINIBAND:
+ return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
+ in_grh, in_mad, out_mad);
+ case IB_LINK_LAYER_ETHERNET:
+ return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
+ in_grh, in_mad, out_mad);
+ default:
+ return -EINVAL;
+ }
+}
+
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
+ if (mad_send_wc->send_buf->context[0])
+ ib_destroy_ah(mad_send_wc->send_buf->context[0]);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -297,24 +882,30 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
struct ib_mad_agent *agent;
int p, q;
int ret;
+ enum rdma_link_layer ll;
- for (p = 0; p < dev->dev->caps.num_ports; ++p)
+ for (p = 0; p < dev->num_ports; ++p) {
+ ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
for (q = 0; q <= 1; ++q) {
- agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
- q ? IB_QPT_GSI : IB_QPT_SMI,
- NULL, 0, send_handler,
- NULL, NULL);
- if (IS_ERR(agent)) {
- ret = PTR_ERR(agent);
- goto err;
- }
- dev->send_agent[p][q] = agent;
+ if (ll == IB_LINK_LAYER_INFINIBAND) {
+ agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
+ q ? IB_QPT_GSI : IB_QPT_SMI,
+ NULL, 0, send_handler,
+ NULL, NULL);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ goto err;
+ }
+ dev->send_agent[p][q] = agent;
+ } else
+ dev->send_agent[p][q] = NULL;
}
+ }
return 0;
err:
- for (p = 0; p < dev->dev->caps.num_ports; ++p)
+ for (p = 0; p < dev->num_ports; ++p)
for (q = 0; q <= 1; ++q)
if (dev->send_agent[p][q])
ib_unregister_mad_agent(dev->send_agent[p][q]);
@@ -327,14 +918,1246 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
struct ib_mad_agent *agent;
int p, q;
- for (p = 0; p < dev->dev->caps.num_ports; ++p) {
+ for (p = 0; p < dev->num_ports; ++p) {
for (q = 0; q <= 1; ++q) {
agent = dev->send_agent[p][q];
- dev->send_agent[p][q] = NULL;
- ib_unregister_mad_agent(agent);
+ if (agent) {
+ dev->send_agent[p][q] = NULL;
+ ib_unregister_mad_agent(agent);
+ }
}
if (dev->sm_ah[p])
ib_destroy_ah(dev->sm_ah[p]);
}
}
+
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
+{
+ mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
+
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
+ MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
+}
+
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
+{
+ /* re-configure the alias-guid and mcg's */
+ if (mlx4_is_master(dev->dev)) {
+ mlx4_ib_invalidate_all_guid_record(dev, port_num);
+
+ if (!dev->sriov.is_going_down) {
+ mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
+ MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
+ }
+ }
+ mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
+}
+
+static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
+ struct mlx4_eqe *eqe)
+{
+ __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
+ GET_MASK_FROM_EQE(eqe));
+}
+
+static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
+ u32 guid_tbl_blk_num, u32 change_bitmap)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ u16 i;
+
+ if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
+ return;
+
+ in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+ out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ if (!in_mad || !out_mad) {
+ mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
+ goto out;
+ }
+
+ guid_tbl_blk_num *= 4;
+
+ for (i = 0; i < 4; i++) {
+ if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
+ continue;
+ memset(in_mad, 0, sizeof *in_mad);
+ memset(out_mad, 0, sizeof *out_mad);
+
+ in_mad->base_version = 1;
+ in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ in_mad->class_version = 1;
+ in_mad->method = IB_MGMT_METHOD_GET;
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
+
+ if (mlx4_MAD_IFC(dev,
+ MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
+ port_num, NULL, NULL, in_mad, out_mad)) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
+ goto out;
+ }
+
+ mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
+ port_num,
+ (u8 *)(&((struct ib_smp *)out_mad)->data));
+ mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
+ port_num,
+ (u8 *)(&((struct ib_smp *)out_mad)->data));
+ }
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return;
+}
+
+void handle_port_mgmt_change_event(struct work_struct *work)
+{
+ struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
+ struct mlx4_ib_dev *dev = ew->ib_dev;
+ struct mlx4_eqe *eqe = &(ew->ib_eqe);
+ u8 port = eqe->event.port_mgmt_change.port;
+ u32 changed_attr;
+ u32 tbl_block;
+ u32 change_bitmap;
+
+ switch (eqe->subtype) {
+ case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
+ changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
+
+ /* Update the SM ah - This should be done before handling
+ the other changed attributes so that MADs can be sent to the SM */
+ if (changed_attr & MSTR_SM_CHANGE_MASK) {
+ u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
+ u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
+ update_sm_ah(dev, port, lid, sl);
+ }
+
+ /* Check if it is a lid change event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
+ handle_lid_change_event(dev, port);
+
+ /* Generate GUID changed event */
+ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ /*if master, notify all slaves*/
+ if (mlx4_is_master(dev->dev))
+ mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
+ MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
+ }
+
+ if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
+ handle_client_rereg_event(dev, port);
+ break;
+
+ case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
+ if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
+ propagate_pkey_ev(dev, port, eqe);
+ break;
+ case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
+ /* paravirtualized master's guid is guid 0 -- does not change */
+ if (!mlx4_is_master(dev->dev))
+ mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
+ /*if master, notify relevant slaves*/
+ else if (!dev->sriov.is_going_down) {
+ tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
+ change_bitmap = GET_MASK_FROM_EQE(eqe);
+ handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
+ }
+ break;
+ default:
+ pr_warn("Unsupported subtype 0x%x for "
+ "Port Management Change event\n", eqe->subtype);
+ }
+
+ kfree(ew);
+}
+
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type)
+{
+ struct ib_event event;
+
+ event.device = &dev->ib_dev;
+ event.element.port_num = port_num;
+ event.event = type;
+
+ ib_dispatch_event(&event);
+}
+
+static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
+{
+ unsigned long flags;
+ struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+ queue_work(ctx->wq, &ctx->work);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
+static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
+ struct mlx4_ib_demux_pv_qp *tun_qp,
+ int index)
+{
+ struct ib_sge sg_list;
+ struct ib_recv_wr recv_wr, *bad_recv_wr;
+ int size;
+
+ size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
+ sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
+
+ sg_list.addr = tun_qp->ring[index].map;
+ sg_list.length = size;
+ sg_list.lkey = ctx->mr->lkey;
+
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sg_list;
+ recv_wr.num_sge = 1;
+ recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
+ MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
+ ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
+ size, DMA_FROM_DEVICE);
+ return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
+}
+
+static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
+ int slave, struct ib_sa_mad *sa_mad)
+{
+ int ret = 0;
+
+ /* dispatch to different sa handlers */
+ switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
+ case IB_SA_ATTR_MC_MEMBER_REC:
+ ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+ int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+ return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
+
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, u16 pkey_index,
+ u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u8 *s_mac, struct ib_mad *mad)
+{
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ struct mlx4_ib_demux_pv_ctx *sqp_ctx;
+ struct mlx4_ib_demux_pv_qp *sqp;
+ struct mlx4_mad_snd_buf *sqp_mad;
+ struct ib_ah *ah;
+ struct ib_qp *send_qp = NULL;
+ unsigned wire_tx_ix = 0;
+ int ret = 0;
+ u16 wire_pkey_ix;
+ int src_qpnum;
+ u8 sgid_index;
+
+
+ sqp_ctx = dev->sriov.sqps[port-1];
+
+ /* check if proxy qp created */
+ if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
+ return -EAGAIN;
+
+ if (dest_qpt == IB_QPT_SMI) {
+ src_qpnum = 0;
+ sqp = &sqp_ctx->qp[0];
+ wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
+ } else {
+ src_qpnum = 1;
+ sqp = &sqp_ctx->qp[1];
+ wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
+ }
+
+ send_qp = sqp->qp;
+
+ /* create ah */
+ sgid_index = attr->grh.sgid_index;
+ attr->grh.sgid_index = 0;
+ ah = ib_create_ah(sqp_ctx->pd, attr);
+ if (IS_ERR(ah))
+ return -ENOMEM;
+ attr->grh.sgid_index = sgid_index;
+ to_mah(ah)->av.ib.gid_index = sgid_index;
+ /* get rid of force-loopback bit */
+ to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+ spin_lock(&sqp->tx_lock);
+ if (sqp->tx_ix_head - sqp->tx_ix_tail >=
+ (MLX4_NUM_TUNNEL_BUFS - 1))
+ ret = -EAGAIN;
+ else
+ wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ spin_unlock(&sqp->tx_lock);
+ if (ret)
+ goto out;
+
+ sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
+ if (sqp->tx_ring[wire_tx_ix].ah)
+ ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ sqp->tx_ring[wire_tx_ix].ah = ah;
+ ib_dma_sync_single_for_cpu(&dev->ib_dev,
+ sqp->tx_ring[wire_tx_ix].buf.map,
+ sizeof (struct mlx4_mad_snd_buf),
+ DMA_TO_DEVICE);
+
+ memcpy(&sqp_mad->payload, mad, sizeof *mad);
+
+ ib_dma_sync_single_for_device(&dev->ib_dev,
+ sqp->tx_ring[wire_tx_ix].buf.map,
+ sizeof (struct mlx4_mad_snd_buf),
+ DMA_TO_DEVICE);
+
+ list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
+ list.length = sizeof (struct mlx4_mad_snd_buf);
+ list.lkey = sqp_ctx->mr->lkey;
+
+ wr.wr.ud.ah = ah;
+ wr.wr.ud.port_num = port;
+ wr.wr.ud.pkey_index = wire_pkey_ix;
+ wr.wr.ud.remote_qkey = qkey;
+ wr.wr.ud.remote_qpn = remote_qpn;
+ wr.next = NULL;
+ wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+ if (s_mac)
+ memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+
+
+ ret = ib_post_send(send_qp, &wr, &bad_wr);
+out:
+ if (ret)
+ ib_destroy_ah(ah);
+ return ret;
+}
+
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ return slave;
+ return mlx4_get_base_gid_ix(dev->dev, slave, port);
+}
+
+static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+ struct ib_ah_attr *ah_attr)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ ah_attr->grh.sgid_index = slave;
+ else
+ ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+}
+
+static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
+ int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
+ struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
+ struct mlx4_ib_ah ah;
+ struct ib_ah_attr ah_attr;
+ u8 *slave_id;
+ int slave;
+ int port;
+
+ /* Get slave that sent this packet */
+ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
+ wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
+ (wc->src_qp & 0x1) != ctx->port - 1 ||
+ wc->src_qp & 0x4) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
+ return;
+ }
+ slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
+ if (slave != ctx->slave) {
+ mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
+ "belongs to another slave\n", wc->src_qp);
+ return;
+ }
+
+ /* Map transaction ID */
+ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
+ sizeof (struct mlx4_tunnel_mad),
+ DMA_FROM_DEVICE);
+ switch (tunnel->mad.mad_hdr.method) {
+ case IB_MGMT_METHOD_SET:
+ case IB_MGMT_METHOD_GET:
+ case IB_MGMT_METHOD_REPORT:
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_SA_METHOD_DELETE:
+ case IB_SA_METHOD_GET_MULTI:
+ case IB_SA_METHOD_GET_TRACE_TBL:
+ slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
+ if (*slave_id) {
+ mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
+ "class:%d slave:%d\n", *slave_id,
+ tunnel->mad.mad_hdr.mgmt_class, slave);
+ return;
+ } else
+ *slave_id = slave;
+ default:
+ /* nothing */;
+ }
+
+ /* Class-specific handling */
+ switch (tunnel->mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ if (slave != mlx4_master_func_num(dev->dev) &&
+ !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+ return;
+ break;
+ case IB_MGMT_CLASS_SUBN_ADM:
+ if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
+ (struct ib_sa_mad *) &tunnel->mad))
+ return;
+ break;
+ case IB_MGMT_CLASS_CM:
+ if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
+ (struct ib_mad *) &tunnel->mad))
+ return;
+ break;
+ case IB_MGMT_CLASS_DEVICE_MGMT:
+ if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
+ tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
+ return;
+ break;
+ default:
+ /* Drop unsupported classes for slaves in tunnel mode */
+ if (slave != mlx4_master_func_num(dev->dev)) {
+ mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
+ "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
+ return;
+ }
+ }
+
+ /* We are using standard ib_core services to send the mad, so generate a
+ * stadard address handle by decoding the tunnelled mlx4_ah fields */
+ memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
+ ah.ibah.device = ctx->ib_dev;
+ mlx4_ib_query_ah(&ah.ibah, &ah_attr);
+ if (ah_attr.ah_flags & IB_AH_GRH)
+ fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
+
+ port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num);
+ if (port < 0)
+ return;
+ ah_attr.port_num = port;
+ memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+ /* if slave have default vlan use it */
+ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &ah_attr.vlan_id, &ah_attr.sl);
+
+ mlx4_ib_send_to_wire(dev, slave, ctx->port,
+ is_proxy_qp0(dev, wc->src_qp, slave) ?
+ IB_QPT_SMI : IB_QPT_GSI,
+ be16_to_cpu(tunnel->hdr.pkey_index),
+ be32_to_cpu(tunnel->hdr.remote_qpn),
+ be32_to_cpu(tunnel->hdr.qkey),
+ &ah_attr, wc->smac, &tunnel->mad);
+}
+
+static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int is_tun)
+{
+ int i;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ int rx_buf_size, tx_buf_size;
+
+ if (qp_type > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_qp = &ctx->qp[qp_type];
+
+ tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
+ GFP_KERNEL);
+ if (!tun_qp->ring)
+ return -ENOMEM;
+
+ tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ sizeof (struct mlx4_ib_tun_tx_buf),
+ GFP_KERNEL);
+ if (!tun_qp->tx_ring) {
+ kfree(tun_qp->ring);
+ tun_qp->ring = NULL;
+ return -ENOMEM;
+ }
+
+ if (is_tun) {
+ rx_buf_size = sizeof (struct mlx4_tunnel_mad);
+ tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
+ } else {
+ rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
+ tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
+ if (!tun_qp->ring[i].addr)
+ goto err;
+ tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
+ tun_qp->ring[i].addr,
+ rx_buf_size,
+ DMA_FROM_DEVICE);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ tun_qp->tx_ring[i].buf.addr =
+ kmalloc(tx_buf_size, GFP_KERNEL);
+ if (!tun_qp->tx_ring[i].buf.addr)
+ goto tx_err;
+ tun_qp->tx_ring[i].buf.map =
+ ib_dma_map_single(ctx->ib_dev,
+ tun_qp->tx_ring[i].buf.addr,
+ tx_buf_size,
+ DMA_TO_DEVICE);
+ tun_qp->tx_ring[i].ah = NULL;
+ }
+ spin_lock_init(&tun_qp->tx_lock);
+ tun_qp->tx_ix_head = 0;
+ tun_qp->tx_ix_tail = 0;
+ tun_qp->proxy_qpt = qp_type;
+
+ return 0;
+
+tx_err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
+ tx_buf_size, DMA_TO_DEVICE);
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ }
+ kfree(tun_qp->tx_ring);
+ tun_qp->tx_ring = NULL;
+ i = MLX4_NUM_TUNNEL_BUFS;
+err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
+ rx_buf_size, DMA_FROM_DEVICE);
+ kfree(tun_qp->ring[i].addr);
+ }
+ kfree(tun_qp->ring);
+ tun_qp->ring = NULL;
+ return -ENOMEM;
+}
+
+static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int is_tun)
+{
+ int i;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ int rx_buf_size, tx_buf_size;
+
+ if (qp_type > IB_QPT_GSI)
+ return;
+
+ tun_qp = &ctx->qp[qp_type];
+ if (is_tun) {
+ rx_buf_size = sizeof (struct mlx4_tunnel_mad);
+ tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
+ } else {
+ rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
+ tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
+ }
+
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
+ rx_buf_size, DMA_FROM_DEVICE);
+ kfree(tun_qp->ring[i].addr);
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
+ tx_buf_size, DMA_TO_DEVICE);
+ kfree(tun_qp->tx_ring[i].buf.addr);
+ if (tun_qp->tx_ring[i].ah)
+ ib_destroy_ah(tun_qp->tx_ring[i].ah);
+ }
+ kfree(tun_qp->tx_ring);
+ kfree(tun_qp->ring);
+}
+
+static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct ib_wc wc;
+ int ret;
+ ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+ ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+ while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+ tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_RECV:
+ mlx4_ib_multiplex_mad(ctx, &wc);
+ ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
+ wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1));
+ if (ret)
+ pr_err("Failed reposting tunnel "
+ "buf:%lld\n", wc.wr_id);
+ break;
+ case IB_WC_SEND:
+ pr_debug("received tunnel send completion:"
+ "wrid=0x%llx, status=0x%x\n",
+ wc.wr_id, wc.status);
+ ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&tun_qp->tx_lock);
+ tun_qp->tx_ix_tail++;
+ spin_unlock(&tun_qp->tx_lock);
+
+ break;
+ default:
+ break;
+ }
+ } else {
+ pr_debug("mlx4_ib: completion error in tunnel: %d."
+ " status = %d, wrid = 0x%llx\n",
+ ctx->slave, wc.status, wc.wr_id);
+ if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+ ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&tun_qp->tx_lock);
+ tun_qp->tx_ix_tail++;
+ spin_unlock(&tun_qp->tx_lock);
+ }
+ }
+ }
+}
+
+static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
+{
+ struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
+
+ /* It's worse than that! He's dead, Jim! */
+ pr_err("Fatal error (%d) on a MAD QP on port %d\n",
+ event->event, sqp->port);
+}
+
+static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
+ enum ib_qp_type qp_type, int create_tun)
+{
+ int i, ret;
+ struct mlx4_ib_demux_pv_qp *tun_qp;
+ struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
+ struct ib_qp_attr attr;
+ int qp_attr_mask_INIT;
+
+ if (qp_type > IB_QPT_GSI)
+ return -EINVAL;
+
+ tun_qp = &ctx->qp[qp_type];
+
+ memset(&qp_init_attr, 0, sizeof qp_init_attr);
+ qp_init_attr.init_attr.send_cq = ctx->cq;
+ qp_init_attr.init_attr.recv_cq = ctx->cq;
+ qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_send_sge = 1;
+ qp_init_attr.init_attr.cap.max_recv_sge = 1;
+ if (create_tun) {
+ qp_init_attr.init_attr.qp_type = IB_QPT_UD;
+ qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
+ qp_init_attr.port = ctx->port;
+ qp_init_attr.slave = ctx->slave;
+ qp_init_attr.proxy_qp_type = qp_type;
+ qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_QKEY | IB_QP_PORT;
+ } else {
+ qp_init_attr.init_attr.qp_type = qp_type;
+ qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
+ qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
+ }
+ qp_init_attr.init_attr.port_num = ctx->port;
+ qp_init_attr.init_attr.qp_context = ctx;
+ qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
+ tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
+ if (IS_ERR(tun_qp->qp)) {
+ ret = PTR_ERR(tun_qp->qp);
+ tun_qp->qp = NULL;
+ pr_err("Couldn't create %s QP (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ return ret;
+ }
+
+ memset(&attr, 0, sizeof attr);
+ attr.qp_state = IB_QPS_INIT;
+ ret = 0;
+ if (create_tun)
+ ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
+ ctx->port, IB_DEFAULT_PKEY_FULL,
+ &attr.pkey_index);
+ if (ret || !create_tun)
+ attr.pkey_index =
+ to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
+ attr.qkey = IB_QP1_QKEY;
+ attr.port_num = ctx->port;
+ ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to INIT (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+ attr.qp_state = IB_QPS_RTR;
+ ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to RTR (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+ attr.qp_state = IB_QPS_RTS;
+ attr.sq_psn = 0;
+ ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret) {
+ pr_err("Couldn't change %s qp state to RTS (%d)\n",
+ create_tun ? "tunnel" : "special", ret);
+ goto err_qp;
+ }
+
+ for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
+ if (ret) {
+ pr_err(" mlx4_ib_post_pv_buf error"
+ " (err = %d, i = %d)\n", ret, i);
+ goto err_qp;
+ }
+ }
+ return 0;
+
+err_qp:
+ ib_destroy_qp(tun_qp->qp);
+ tun_qp->qp = NULL;
+ return ret;
+}
+
+/*
+ * IB MAD completion callback for real SQPs
+ */
+static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+ struct mlx4_ib_demux_pv_qp *sqp;
+ struct ib_wc wc;
+ struct ib_grh *grh;
+ struct ib_mad *mad;
+
+ ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
+ ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+
+ while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
+ sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
+ if (wc.status == IB_WC_SUCCESS) {
+ switch (wc.opcode) {
+ case IB_WC_SEND:
+ ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&sqp->tx_lock);
+ sqp->tx_ix_tail++;
+ spin_unlock(&sqp->tx_lock);
+ break;
+ case IB_WC_RECV:
+ mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
+ (sqp->ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+ grh = &(((struct mlx4_mad_rcv_buf *)
+ (sqp->ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+ mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
+ if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)))
+ pr_err("Failed reposting SQP "
+ "buf:%lld\n", wc.wr_id);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
+ } else {
+ pr_debug("mlx4_ib: completion error in tunnel: %d."
+ " status = %d, wrid = 0x%llx\n",
+ ctx->slave, wc.status, wc.wr_id);
+ if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
+ ib_destroy_ah(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ = NULL;
+ spin_lock(&sqp->tx_lock);
+ sqp->tx_ix_tail++;
+ spin_unlock(&sqp->tx_lock);
+ }
+ }
+ }
+}
+
+static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
+ struct mlx4_ib_demux_pv_ctx **ret_ctx)
+{
+ struct mlx4_ib_demux_pv_ctx *ctx;
+
+ *ret_ctx = NULL;
+ ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
+ if (!ctx) {
+ pr_err("failed allocating pv resource context "
+ "for port %d, slave %d\n", port, slave);
+ return -ENOMEM;
+ }
+
+ ctx->ib_dev = &dev->ib_dev;
+ ctx->port = port;
+ ctx->slave = slave;
+ *ret_ctx = ctx;
+ return 0;
+}
+
+static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ if (dev->sriov.demux[port - 1].tun[slave]) {
+ kfree(dev->sriov.demux[port - 1].tun[slave]);
+ dev->sriov.demux[port - 1].tun[slave] = NULL;
+ }
+}
+
+static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
+ int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
+{
+ int ret, cq_size;
+
+ if (ctx->state != DEMUX_PV_STATE_DOWN)
+ return -EEXIST;
+
+ ctx->state = DEMUX_PV_STATE_STARTING;
+ /* have QP0 only if link layer is IB */
+ if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+ IB_LINK_LAYER_INFINIBAND)
+ ctx->has_smi = 1;
+
+ if (ctx->has_smi) {
+ ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
+ if (ret) {
+ pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
+ goto err_out;
+ }
+ }
+
+ ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
+ if (ret) {
+ pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
+ goto err_out_qp0;
+ }
+
+ cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+ if (ctx->has_smi)
+ cq_size *= 2;
+
+ ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+ NULL, ctx, cq_size, 0);
+ if (IS_ERR(ctx->cq)) {
+ ret = PTR_ERR(ctx->cq);
+ pr_err("Couldn't create tunnel CQ (%d)\n", ret);
+ goto err_buf;
+ }
+
+ ctx->pd = ib_alloc_pd(ctx->ib_dev);
+ if (IS_ERR(ctx->pd)) {
+ ret = PTR_ERR(ctx->pd);
+ pr_err("Couldn't create tunnel PD (%d)\n", ret);
+ goto err_cq;
+ }
+
+ ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(ctx->mr)) {
+ ret = PTR_ERR(ctx->mr);
+ pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
+ goto err_pd;
+ }
+
+ if (ctx->has_smi) {
+ ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
+ if (ret) {
+ pr_err("Couldn't create %s QP0 (%d)\n",
+ create_tun ? "tunnel for" : "", ret);
+ goto err_mr;
+ }
+ }
+
+ ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
+ if (ret) {
+ pr_err("Couldn't create %s QP1 (%d)\n",
+ create_tun ? "tunnel for" : "", ret);
+ goto err_qp0;
+ }
+
+ if (create_tun)
+ INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
+ else
+ INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
+
+ ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+
+ ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
+ if (ret) {
+ pr_err("Couldn't arm tunnel cq (%d)\n", ret);
+ goto err_wq;
+ }
+ ctx->state = DEMUX_PV_STATE_ACTIVE;
+ return 0;
+
+err_wq:
+ ctx->wq = NULL;
+ ib_destroy_qp(ctx->qp[1].qp);
+ ctx->qp[1].qp = NULL;
+
+
+err_qp0:
+ if (ctx->has_smi)
+ ib_destroy_qp(ctx->qp[0].qp);
+ ctx->qp[0].qp = NULL;
+
+err_mr:
+ ib_dereg_mr(ctx->mr);
+ ctx->mr = NULL;
+
+err_pd:
+ ib_dealloc_pd(ctx->pd);
+ ctx->pd = NULL;
+
+err_cq:
+ ib_destroy_cq(ctx->cq);
+ ctx->cq = NULL;
+
+err_buf:
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
+
+err_out_qp0:
+ if (ctx->has_smi)
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
+err_out:
+ ctx->state = DEMUX_PV_STATE_DOWN;
+ return ret;
+}
+
+static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
+ struct mlx4_ib_demux_pv_ctx *ctx, int flush)
+{
+ if (!ctx)
+ return;
+ if (ctx->state > DEMUX_PV_STATE_DOWN) {
+ ctx->state = DEMUX_PV_STATE_DOWNING;
+ if (flush)
+ flush_workqueue(ctx->wq);
+ if (ctx->has_smi) {
+ ib_destroy_qp(ctx->qp[0].qp);
+ ctx->qp[0].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
+ }
+ ib_destroy_qp(ctx->qp[1].qp);
+ ctx->qp[1].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
+ ib_dereg_mr(ctx->mr);
+ ctx->mr = NULL;
+ ib_dealloc_pd(ctx->pd);
+ ctx->pd = NULL;
+ ib_destroy_cq(ctx->cq);
+ ctx->cq = NULL;
+ ctx->state = DEMUX_PV_STATE_DOWN;
+ }
+}
+
+static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
+ int port, int do_init)
+{
+ int ret = 0;
+
+ if (!do_init) {
+ clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
+ /* for master, destroy real sqp resources */
+ if (slave == mlx4_master_func_num(dev->dev))
+ destroy_pv_resources(dev, slave, port,
+ dev->sriov.sqps[port - 1], 1);
+ /* destroy the tunnel qp resources */
+ destroy_pv_resources(dev, slave, port,
+ dev->sriov.demux[port - 1].tun[slave], 1);
+ return 0;
+ }
+
+ /* create the tunnel qp resources */
+ ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
+ dev->sriov.demux[port - 1].tun[slave]);
+
+ /* for master, create the real sqp resources */
+ if (!ret && slave == mlx4_master_func_num(dev->dev))
+ ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
+ dev->sriov.sqps[port - 1]);
+ return ret;
+}
+
+void mlx4_ib_tunnels_update_work(struct work_struct *work)
+{
+ struct mlx4_ib_demux_work *dmxw;
+
+ dmxw = container_of(work, struct mlx4_ib_demux_work, work);
+ mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
+ dmxw->do_init);
+ kfree(dmxw);
+ return;
+}
+
+static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_demux_ctx *ctx,
+ int port)
+{
+ char name[12];
+ int ret = 0;
+ int i;
+
+ ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
+ sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
+ if (!ctx->tun)
+ return -ENOMEM;
+
+ ctx->dev = dev;
+ ctx->port = port;
+ ctx->ib_dev = &dev->ib_dev;
+
+ for (i = 0;
+ i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
+ i++) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev->dev, i);
+
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
+
+ ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
+ if (ret) {
+ ret = -ENOMEM;
+ goto err_mcg;
+ }
+ }
+
+ ret = mlx4_ib_mcg_port_init(ctx);
+ if (ret) {
+ pr_err("Failed initializing mcg para-virt (%d)\n", ret);
+ goto err_mcg;
+ }
+
+ snprintf(name, sizeof name, "mlx4_ibt%d", port);
+ ctx->wq = create_singlethread_workqueue(name);
+ if (!ctx->wq) {
+ pr_err("Failed to create tunnelling WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_wq;
+ }
+
+ snprintf(name, sizeof name, "mlx4_ibud%d", port);
+ ctx->ud_wq = create_singlethread_workqueue(name);
+ if (!ctx->ud_wq) {
+ pr_err("Failed to create up/down WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_udwq;
+ }
+
+ return 0;
+
+err_udwq:
+ destroy_workqueue(ctx->wq);
+ ctx->wq = NULL;
+
+err_wq:
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
+err_mcg:
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++)
+ free_pv_object(dev, i, port);
+ kfree(ctx->tun);
+ ctx->tun = NULL;
+ return ret;
+}
+
+static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
+{
+ if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
+ sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
+ flush_workqueue(sqp_ctx->wq);
+ if (sqp_ctx->has_smi) {
+ ib_destroy_qp(sqp_ctx->qp[0].qp);
+ sqp_ctx->qp[0].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
+ }
+ ib_destroy_qp(sqp_ctx->qp[1].qp);
+ sqp_ctx->qp[1].qp = NULL;
+ mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
+ ib_dereg_mr(sqp_ctx->mr);
+ sqp_ctx->mr = NULL;
+ ib_dealloc_pd(sqp_ctx->pd);
+ sqp_ctx->pd = NULL;
+ ib_destroy_cq(sqp_ctx->cq);
+ sqp_ctx->cq = NULL;
+ sqp_ctx->state = DEMUX_PV_STATE_DOWN;
+ }
+}
+
+static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
+{
+ int i;
+ if (ctx) {
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+ mlx4_ib_mcg_port_cleanup(ctx, 1);
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (!ctx->tun[i])
+ continue;
+ if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
+ ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
+ }
+ flush_workqueue(ctx->wq);
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
+ free_pv_object(dev, i, ctx->port);
+ }
+ kfree(ctx->tun);
+ destroy_workqueue(ctx->ud_wq);
+ destroy_workqueue(ctx->wq);
+ }
+}
+
+static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
+{
+ int i;
+
+ if (!mlx4_is_master(dev->dev))
+ return;
+ /* initialize or tear down tunnel QPs for the master */
+ for (i = 0; i < dev->dev->caps.num_ports; i++)
+ mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
+ return;
+}
+
+int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
+{
+ int i = 0;
+ int err;
+
+ if (!mlx4_is_mfunc(dev->dev))
+ return 0;
+
+ dev->sriov.is_going_down = 0;
+ spin_lock_init(&dev->sriov.going_down_lock);
+ mlx4_ib_cm_paravirt_init(dev);
+
+ mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
+
+ if (mlx4_is_slave(dev->dev)) {
+ mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
+ return 0;
+ }
+
+ for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ if (i == mlx4_master_func_num(dev->dev))
+ mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
+ else
+ mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
+ }
+
+ err = mlx4_ib_init_alias_guid_service(dev);
+ if (err) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
+ goto paravirt_err;
+ }
+ err = mlx4_ib_device_register_sysfs(dev);
+ if (err) {
+ mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
+ goto sysfs_err;
+ }
+
+ mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
+ dev->dev->caps.sqp_demux);
+ for (i = 0; i < dev->num_ports; i++) {
+ union ib_gid gid;
+ err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
+ if (err)
+ goto demux_err;
+ dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+ err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
+ &dev->sriov.sqps[i]);
+ if (err)
+ goto demux_err;
+ err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
+ if (err)
+ goto free_pv;
+ }
+ mlx4_ib_master_tunnels(dev, 1);
+ return 0;
+
+free_pv:
+ free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
+demux_err:
+ while (--i >= 0) {
+ free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
+ mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
+ }
+ mlx4_ib_device_unregister_sysfs(dev);
+
+sysfs_err:
+ mlx4_ib_destroy_alias_guid_service(dev);
+
+paravirt_err:
+ mlx4_ib_cm_paravirt_clean(dev, -1);
+
+ return err;
+}
+
+void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
+{
+ int i;
+ unsigned long flags;
+
+ if (!mlx4_is_mfunc(dev->dev))
+ return;
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ dev->sriov.is_going_down = 1;
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+ if (mlx4_is_master(dev->dev)) {
+ for (i = 0; i < dev->num_ports; i++) {
+ flush_workqueue(dev->sriov.demux[i].ud_wq);
+ mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
+ kfree(dev->sriov.sqps[i]);
+ dev->sriov.sqps[i] = NULL;
+ mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
+ }
+
+ mlx4_ib_cm_paravirt_clean(dev, -1);
+ mlx4_ib_destroy_alias_guid_service(dev);
+ mlx4_ib_device_unregister_sysfs(dev);
+ }
+}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index bcf50648fa1..0f7027e7db1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -32,30 +33,57 @@
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
#include "user.h"
-#define DRV_NAME "mlx4_ib"
-#define DRV_VERSION "1.0"
-#define DRV_RELDATE "April 4, 2008"
+#define DRV_NAME MLX4_IB_DRV_NAME
+#define DRV_VERSION "2.2-1"
+#define DRV_RELDATE "Feb 2014"
+
+#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
+int mlx4_ib_sm_guid_assign = 1;
+module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
+MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
+
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
+struct update_gid_work {
+ struct work_struct work;
+ union ib_gid gids[128];
+ struct mlx4_ib_dev *dev;
+ int port;
+};
+
+static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
+
+static struct workqueue_struct *wq;
+
static void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -64,6 +92,33 @@ static void init_query_mad(struct ib_smp *mad)
mad->method = IB_MGMT_METHOD_GET;
}
+static union ib_gid zgid;
+
+static int check_flow_steering_support(struct mlx4_dev *dev)
+{
+ int eth_num_ports = 0;
+ int ib_num_ports = 0;
+
+ int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
+
+ if (dmfs) {
+ int i;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ eth_num_ports++;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+ dmfs &= (!ib_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
+ (!eth_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
+ if (ib_num_ports && mlx4_is_mfunc(dev)) {
+ pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
+ dmfs = 0;
+ }
+ }
+ return dmfs;
+}
+
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
@@ -80,7 +135,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -102,40 +158,60 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
- if (dev->dev->caps.max_gso_sz)
+ if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
+ props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+ if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
+ (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
+ (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
+ props->device_cap_flags |= IB_DEVICE_XRC;
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
+ if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
+ if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
+ else
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
+ if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
+ }
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
- props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
+ props->vendor_part_id = dev->dev->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
- props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
- props->max_qp_wr = dev->dev->caps.max_wqes;
+ props->max_qp = dev->dev->quotas.qp;
+ props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
- props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+ props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
- props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+ props->max_mr = dev->dev->quotas.mpt;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
- props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+ props->max_srq = dev->dev->quotas.srq;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
+ props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
+ props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
out:
kfree(in_mad);
@@ -144,11 +220,22 @@ out:
return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props)
+static enum rdma_link_layer
+mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+ struct mlx4_dev *dev = to_mdev(device)->dev;
+
+ return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
+ IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
+}
+
+static int ib_link_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int ext_active_speed;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -156,16 +243,19 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof *props);
-
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+ in_mad, out_mad);
if (err)
goto out;
+
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -173,7 +263,10 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
- props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
+ if (netw_view)
+ props->gid_tbl_len = out_mad->data[50];
+ else
+ props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@@ -186,19 +279,132 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
+ /* Check if extended speeds (EDR/FDR/...) are supported */
+ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+ ext_active_speed = out_mad->data[62] >> 4;
+
+ switch (ext_active_speed) {
+ case 1:
+ props->active_speed = IB_SPEED_FDR;
+ break;
+ case 2:
+ props->active_speed = IB_SPEED_EDR;
+ break;
+ }
+ }
+
+ /* If reported active speed is QDR, check if is FDR-10 */
+ if (props->active_speed == IB_SPEED_QDR) {
+ init_query_mad(in_mad);
+ in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
+ NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ /* Checking LinkSpeedActive for FDR-10 */
+ if (out_mad->data[15] & 0x1)
+ props->active_speed = IB_SPEED_FDR10;
+ }
+
+ /* Avoid wrong speed value returned by FW if the IB link is down. */
+ if (props->state == IB_PORT_DOWN)
+ props->active_speed = IB_SPEED_SDR;
+
out:
kfree(in_mad);
kfree(out_mad);
+ return err;
+}
+static u8 state_to_phys_state(enum ib_port_state state)
+{
+ return state == IB_PORT_ACTIVE ? 5 : 3;
+}
+
+static int eth_link_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
+{
+
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ struct mlx4_ib_iboe *iboe = &mdev->iboe;
+ struct net_device *ndev;
+ enum ib_mtu tmp;
+ struct mlx4_cmd_mailbox *mailbox;
+ int err = 0;
+
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
+ MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ goto out;
+
+ props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ props->active_speed = IB_SPEED_QDR;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+ props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
+ props->max_msg_sz = mdev->dev->caps.max_msg_sz;
+ props->pkey_tbl_len = 1;
+ props->max_mtu = IB_MTU_4096;
+ props->max_vl_num = 2;
+ props->state = IB_PORT_DOWN;
+ props->phys_state = state_to_phys_state(props->state);
+ props->active_mtu = IB_MTU_256;
+ spin_lock(&iboe->lock);
+ ndev = iboe->netdevs[port - 1];
+ if (!ndev)
+ goto out_unlock;
+
+ tmp = iboe_get_mtu(ndev->mtu);
+ props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
+
+ props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ props->phys_state = state_to_phys_state(props->state);
+out_unlock:
+ spin_unlock(&iboe->lock);
+out:
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
}
-static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *gid)
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view)
+{
+ int err;
+
+ memset(props, 0, sizeof *props);
+
+ err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
+ ib_link_query_port(ibdev, port, props, netw_view) :
+ eth_link_query_port(ibdev, port, props, netw_view);
+
+ return err;
+}
+
+static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ /* returns host view */
+ return __mlx4_ib_query_port(ibdev, port, props, 0);
+}
+
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ int clear = 0;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -209,33 +415,68 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(dev->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw, out_mad->data + 8, 8);
+ if (mlx4_is_mfunc(dev->dev) && !netw_view) {
+ if (index) {
+ /* For any index > 0, return the null guid */
+ err = 0;
+ clear = 1;
+ goto out;
+ }
+ }
+
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
+ NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
+ if (clear)
+ memset(gid->raw + 8, 0, 8);
kfree(in_mad);
kfree(out_mad);
return err;
}
-static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
+static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+
+ *gid = dev->iboe.gid_table[port - 1][index];
+
+ return 0;
+}
+
+static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
+ else
+ return iboe_query_gid(ibdev, port, index, gid);
+}
+
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -247,7 +488,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
- err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
+
+ err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
+ in_mad, out_mad);
if (err)
goto out;
@@ -259,23 +504,49 @@ out:
return err;
}
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
+}
+
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
+ struct mlx4_cmd_mailbox *mailbox;
+ unsigned long flags;
+
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
- if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
- spin_lock(&to_mdev(ibdev)->sm_lock);
- memcpy(ibdev->node_desc, props->node_desc, 64);
- spin_unlock(&to_mdev(ibdev)->sm_lock);
- }
+ if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
+ return 0;
+
+ if (mlx4_is_slave(to_mdev(ibdev)->dev))
+ return -EOPNOTSUPP;
+
+ spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
+
+ /*
+ * If possible, pass node desc to FW, so it can generate
+ * a 144 trap. If cmd fails, just ignore.
+ */
+ mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
+ if (IS_ERR(mailbox))
+ return 0;
+
+ memcpy(mailbox->buf, props->node_desc, 64);
+ mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
+ MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+ mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
return 0;
}
-static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
- u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+ u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -284,8 +555,6 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memset(mailbox->buf, 0, 256);
-
if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
*(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
@@ -295,7 +564,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
}
err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B);
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
@@ -304,11 +573,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
struct ib_port_attr attr;
u32 cap_mask;
int err;
- mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+ /* return OK if this is RoCE. CM calls ib_modify_port() regardless
+ * of whether port link layer is ETH or IB. For ETH ports, qkey
+ * violations and port capabilities are not meaningful.
+ */
+ if (is_eth)
+ return 0;
+
+ mutex_lock(&mdev->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
if (err)
@@ -317,9 +595,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
- err = mlx4_SET_PORT(to_mdev(ibdev), port,
- !!(mask & IB_PORT_RESET_QKEY_CNTR),
- cap_mask);
+ err = mlx4_ib_SET_PORT(mdev, port,
+ !!(mask & IB_PORT_RESET_QKEY_CNTR),
+ cap_mask);
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
@@ -331,12 +609,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
- resp.qp_tab_size = dev->dev->caps.num_qps;
- resp.bf_reg_size = dev->dev->caps.bf_reg_size;
- resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ if (!dev->ib_active)
+ return ERR_PTR(-EAGAIN);
+
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+ resp_v3.qp_tab_size = dev->dev->caps.num_qps;
+ resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ } else {
+ resp.dev_caps = dev->dev->caps.userspace_caps;
+ resp.qp_tab_size = dev->dev->caps.num_qps;
+ resp.bf_reg_size = dev->dev->caps.bf_reg_size;
+ resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+ resp.cqe_size = dev->dev->caps.cqe_size;
+ }
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
@@ -351,7 +641,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- err = ib_copy_to_udata(udata, &resp, sizeof resp);
+ if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+ err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
+ else
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
@@ -386,8 +680,7 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
- /* FIXME want pgprot_writecombine() for BlueFlame pages */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn +
@@ -435,24 +728,564 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
return 0;
}
+static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_xrcd *xrcd;
+ int err;
+
+ if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return ERR_PTR(-ENOSYS);
+
+ xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+ if (err)
+ goto err1;
+
+ xrcd->pd = ib_alloc_pd(ibdev);
+ if (IS_ERR(xrcd->pd)) {
+ err = PTR_ERR(xrcd->pd);
+ goto err2;
+ }
+
+ xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
+ if (IS_ERR(xrcd->cq)) {
+ err = PTR_ERR(xrcd->cq);
+ goto err3;
+ }
+
+ return &xrcd->ibxrcd;
+
+err3:
+ ib_dealloc_pd(xrcd->pd);
+err2:
+ mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
+err1:
+ kfree(xrcd);
+ return ERR_PTR(err);
+}
+
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+ ib_destroy_cq(to_mxrcd(xrcd)->cq);
+ ib_dealloc_pd(to_mxrcd(xrcd)->pd);
+ mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+ kfree(xrcd);
+
+ return 0;
+}
+
+static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_ib_gid_entry *ge;
+
+ ge = kzalloc(sizeof *ge, GFP_KERNEL);
+ if (!ge)
+ return -ENOMEM;
+
+ ge->gid = *gid;
+ if (mlx4_ib_add_mc(mdev, mqp, gid)) {
+ ge->port = mqp->port;
+ ge->added = 1;
+ }
+
+ mutex_lock(&mqp->mutex);
+ list_add_tail(&ge->list, &mqp->gid_list);
+ mutex_unlock(&mqp->mutex);
+
+ return 0;
+}
+
+int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ union ib_gid *gid)
+{
+ struct net_device *ndev;
+ int ret = 0;
+
+ if (!mqp->port)
+ return 0;
+
+ spin_lock(&mdev->iboe.lock);
+ ndev = mdev->iboe.netdevs[mqp->port - 1];
+ if (ndev)
+ dev_hold(ndev);
+ spin_unlock(&mdev->iboe.lock);
+
+ if (ndev) {
+ ret = 1;
+ dev_put(ndev);
+ }
+
+ return ret;
+}
+
+struct mlx4_ib_steering {
+ struct list_head list;
+ u64 reg_id;
+ union ib_gid gid;
+};
+
+static int parse_flow_attr(struct mlx4_dev *dev,
+ u32 qp_num,
+ union ib_flow_spec *ib_spec,
+ struct _rule_hw *mlx4_spec)
+{
+ enum mlx4_net_trans_rule_id type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ type = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
+ ETH_ALEN);
+ memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
+ ETH_ALEN);
+ mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
+ mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
+ break;
+ case IB_FLOW_SPEC_IB:
+ type = MLX4_NET_TRANS_RULE_ID_IB;
+ mlx4_spec->ib.l3_qpn =
+ cpu_to_be32(qp_num);
+ mlx4_spec->ib.qpn_mask =
+ cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
+ break;
+
+
+ case IB_FLOW_SPEC_IPV4:
+ type = MLX4_NET_TRANS_RULE_ID_IPV4;
+ mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
+ mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
+ mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
+ mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
+ break;
+
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ type = ib_spec->type == IB_FLOW_SPEC_TCP ?
+ MLX4_NET_TRANS_RULE_ID_TCP :
+ MLX4_NET_TRANS_RULE_ID_UDP;
+ mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
+ mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
+ mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
+ mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
+ mlx4_hw_rule_sz(dev, type) < 0)
+ return -EINVAL;
+ mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
+ mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
+ return mlx4_hw_rule_sz(dev, type);
+}
+
+struct default_rules {
+ __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u8 link_layer;
+};
+static const struct default_rules default_table[] = {
+ {
+ .mandatory_fields = {IB_FLOW_SPEC_IPV4},
+ .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
+ .rules_create_list = {IB_FLOW_SPEC_IB},
+ .link_layer = IB_LINK_LAYER_INFINIBAND
+ }
+};
+
+static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr)
+{
+ int i, j, k;
+ void *ib_flow;
+ const struct default_rules *pdefault_rules = default_table;
+ u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
+
+ for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
+ pdefault_rules++) {
+ __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ memset(&field_types, 0, sizeof(field_types));
+
+ if (link_layer != pdefault_rules->link_layer)
+ continue;
+
+ ib_flow = flow_attr + 1;
+ /* we assume the specs are sorted */
+ for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
+ j < flow_attr->num_of_specs; k++) {
+ union ib_flow_spec *current_flow =
+ (union ib_flow_spec *)ib_flow;
+
+ /* same layer but different type */
+ if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
+ (pdefault_rules->mandatory_fields[k] &
+ IB_FLOW_SPEC_LAYER_MASK)) &&
+ (current_flow->type !=
+ pdefault_rules->mandatory_fields[k]))
+ goto out;
+
+ /* same layer, try match next one */
+ if (current_flow->type ==
+ pdefault_rules->mandatory_fields[k]) {
+ j++;
+ ib_flow +=
+ ((union ib_flow_spec *)ib_flow)->size;
+ }
+ }
+
+ ib_flow = flow_attr + 1;
+ for (j = 0; j < flow_attr->num_of_specs;
+ j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
+ for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
+ /* same layer and same type */
+ if (((union ib_flow_spec *)ib_flow)->type ==
+ pdefault_rules->mandatory_not_fields[k])
+ goto out;
+
+ return i;
+ }
+out:
+ return -1;
+}
+
+static int __mlx4_ib_create_default_rules(
+ struct mlx4_ib_dev *mdev,
+ struct ib_qp *qp,
+ const struct default_rules *pdefault_rules,
+ struct _rule_hw *mlx4_spec) {
+ int size = 0;
+ int i;
+
+ for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
+ sizeof(pdefault_rules->rules_create_list[0]); i++) {
+ int ret;
+ union ib_flow_spec ib_spec;
+ switch (pdefault_rules->rules_create_list[i]) {
+ case 0:
+ /* no rule */
+ continue;
+ case IB_FLOW_SPEC_IB:
+ ib_spec.type = IB_FLOW_SPEC_IB;
+ ib_spec.size = sizeof(struct ib_flow_spec_ib);
+
+ break;
+ default:
+ /* invalid rule */
+ return -EINVAL;
+ }
+ /* We must put empty rule, qpn is being ignored */
+ ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
+ mlx4_spec);
+ if (ret < 0) {
+ pr_info("invalid parsing\n");
+ return -EINVAL;
+ }
+
+ mlx4_spec = (void *)mlx4_spec + ret;
+ size += ret;
+ }
+ return size;
+}
+
+static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
+ int domain,
+ enum mlx4_net_trans_promisc_mode flow_type,
+ u64 *reg_id)
+{
+ int ret, i;
+ int size = 0;
+ void *ib_flow;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->device);
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+ int default_flow;
+
+ static const u16 __mlx4_domain[] = {
+ [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
+ [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
+ [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
+ [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
+ };
+
+ if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
+ pr_err("Invalid priority value %d\n", flow_attr->priority);
+ return -EINVAL;
+ }
+
+ if (domain >= IB_FLOW_DOMAIN_NUM) {
+ pr_err("Invalid domain value %d\n", domain);
+ return -EINVAL;
+ }
+
+ if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
+ return -EINVAL;
+
+ mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ ctrl = mailbox->buf;
+
+ ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
+ flow_attr->priority);
+ ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
+ ctrl->port = flow_attr->port;
+ ctrl->qpn = cpu_to_be32(qp->qp_num);
+
+ ib_flow = flow_attr + 1;
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ /* Add default flows */
+ default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
+ if (default_flow >= 0) {
+ ret = __mlx4_ib_create_default_rules(
+ mdev, qp, default_table + default_flow,
+ mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ size += ret;
+ }
+ for (i = 0; i < flow_attr->num_of_specs; i++) {
+ ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
+ mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ ib_flow += ((union ib_flow_spec *) ib_flow)->size;
+ size += ret;
+ }
+
+ ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (ret == -ENOMEM)
+ pr_err("mcg table is full. Fail to register network rule.\n");
+ else if (ret == -ENXIO)
+ pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
+ else if (ret)
+ pr_err("Invalid argumant. Fail to register network rule.\n");
+
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return ret;
+}
+
+static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+ err = mlx4_cmd(dev, reg_id, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ pr_err("Fail to detach network rule. registration id = 0x%llx\n",
+ reg_id);
+ return err;
+}
+
+static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ int domain)
+{
+ int err = 0, i = 0;
+ struct mlx4_ib_flow *mflow;
+ enum mlx4_net_trans_promisc_mode type[2];
+
+ memset(type, 0, sizeof(type));
+
+ mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
+ if (!mflow) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ type[0] = MLX4_FS_REGULAR;
+ break;
+
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ type[0] = MLX4_FS_ALL_DEFAULT;
+ break;
+
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ type[0] = MLX4_FS_MC_DEFAULT;
+ break;
+
+ case IB_FLOW_ATTR_SNIFFER:
+ type[0] = MLX4_FS_UC_SNIFFER;
+ type[1] = MLX4_FS_MC_SNIFFER;
+ break;
+
+ default:
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ while (i < ARRAY_SIZE(type) && type[i]) {
+ err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
+ &mflow->reg_id[i]);
+ if (err)
+ goto err_free;
+ i++;
+ }
+
+ return &mflow->ibflow;
+
+err_free:
+ kfree(mflow);
+ return ERR_PTR(err);
+}
+
+static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ int err, ret = 0;
+ int i = 0;
+ struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
+ struct mlx4_ib_flow *mflow = to_mflow(flow_id);
+
+ while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]);
+ if (err)
+ ret = err;
+ i++;
+ }
+
+ kfree(mflow);
+ return ret;
+}
+
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
- return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
- &to_mqp(ibqp)->mqp, gid->raw,
- !!(to_mqp(ibqp)->flags &
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK));
+ int err;
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ u64 reg_id;
+ struct mlx4_ib_steering *ib_steering = NULL;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
+ if (!ib_steering)
+ return -ENOMEM;
+ }
+
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ prot, &reg_id);
+ if (err)
+ goto err_malloc;
+
+ err = add_gid_entry(ibqp, gid);
+ if (err)
+ goto err_add;
+
+ if (ib_steering) {
+ memcpy(ib_steering->gid.raw, gid->raw, 16);
+ ib_steering->reg_id = reg_id;
+ mutex_lock(&mqp->mutex);
+ list_add(&ib_steering->list, &mqp->steering_rules);
+ mutex_unlock(&mqp->mutex);
+ }
+ return 0;
+
+err_add:
+ mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id);
+err_malloc:
+ kfree(ib_steering);
+
+ return err;
+}
+
+static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
+{
+ struct mlx4_ib_gid_entry *ge;
+ struct mlx4_ib_gid_entry *tmp;
+ struct mlx4_ib_gid_entry *ret = NULL;
+
+ list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
+ if (!memcmp(raw, ge->gid.raw, 16)) {
+ ret = ge;
+ break;
+ }
+ }
+
+ return ret;
}
static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
- return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
- &to_mqp(ibqp)->mqp, gid->raw);
+ int err;
+ struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ struct net_device *ndev;
+ struct mlx4_ib_gid_entry *ge;
+ u64 reg_id = 0;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ struct mlx4_ib_steering *ib_steering;
+
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
+ if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
+ list_del(&ib_steering->list);
+ break;
+ }
+ }
+ mutex_unlock(&mqp->mutex);
+ if (&ib_steering->list == &mqp->steering_rules) {
+ pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
+ return -EINVAL;
+ }
+ reg_id = ib_steering->reg_id;
+ kfree(ib_steering);
+ }
+
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ prot, reg_id);
+ if (err)
+ return err;
+
+ mutex_lock(&mqp->mutex);
+ ge = find_gid_entry(mqp, gid->raw);
+ if (ge) {
+ spin_lock(&mdev->iboe.lock);
+ ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
+ if (ndev)
+ dev_hold(ndev);
+ spin_unlock(&mdev->iboe.lock);
+ if (ndev)
+ dev_put(ndev);
+ list_del(&ge->list);
+ kfree(ge);
+ } else
+ pr_warn("could not find mgid entry\n");
+
+ mutex_unlock(&mqp->mutex);
+
+ return 0;
}
static int init_node_data(struct mlx4_ib_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
+ int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -462,8 +1295,10 @@ static int init_node_data(struct mlx4_ib_dev *dev)
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+ if (mlx4_is_master(dev->dev))
+ mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -471,7 +1306,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
- err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@@ -531,31 +1366,620 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
-static void *mlx4_ib_add(struct mlx4_dev *dev)
+static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
+ struct net_device *dev)
{
- static int mlx4_ib_version_printed;
+ memcpy(eui, dev->dev_addr, 3);
+ memcpy(eui + 5, dev->dev_addr + 3, 3);
+ if (vlan_id < 0x1000) {
+ eui[3] = vlan_id >> 8;
+ eui[4] = vlan_id & 0xff;
+ } else {
+ eui[3] = 0xff;
+ eui[4] = 0xfe;
+ }
+ eui[0] ^= 2;
+}
+
+static void update_gids_task(struct work_struct *work)
+{
+ struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids;
+ int err;
+ struct mlx4_dev *dev = gw->dev->dev;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
+ return;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, gw->gids, sizeof gw->gids);
+
+ err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ pr_warn("set port command failed\n");
+ else
+ mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ kfree(gw);
+}
+
+static void reset_gids_task(struct work_struct *work)
+{
+ struct update_gid_work *gw =
+ container_of(work, struct update_gid_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids;
+ int err;
+ struct mlx4_dev *dev = gw->dev->dev;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ pr_warn("reset gid table failed\n");
+ goto free;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, gw->gids, sizeof(gw->gids));
+
+ if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+ 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ pr_warn(KERN_WARNING
+ "set port %d command failed\n", gw->port);
+ }
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+free:
+ kfree(gw);
+}
+
+static int update_gid_table(struct mlx4_ib_dev *dev, int port,
+ union ib_gid *gid, int clear,
+ int default_gid)
+{
+ struct update_gid_work *work;
+ int i;
+ int need_update = 0;
+ int free = -1;
+ int found = -1;
+ int max_gids;
+
+ if (default_gid) {
+ free = 0;
+ } else {
+ max_gids = dev->dev->caps.gid_table_len[port];
+ for (i = 1; i < max_gids; ++i) {
+ if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
+ sizeof(*gid)))
+ found = i;
+
+ if (clear) {
+ if (found >= 0) {
+ need_update = 1;
+ dev->iboe.gid_table[port - 1][found] =
+ zgid;
+ break;
+ }
+ } else {
+ if (found >= 0)
+ break;
+
+ if (free < 0 &&
+ !memcmp(&dev->iboe.gid_table[port - 1][i],
+ &zgid, sizeof(*gid)))
+ free = i;
+ }
+ }
+ }
+
+ if (found == -1 && !clear && free >= 0) {
+ dev->iboe.gid_table[port - 1][free] = *gid;
+ need_update = 1;
+ }
+
+ if (!need_update)
+ return 0;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
+ INIT_WORK(&work->work, update_gids_task);
+ work->port = port;
+ work->dev = dev;
+ queue_work(wq, &work->work);
+
+ return 0;
+}
+
+static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid)
+{
+ gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
+}
+
+
+static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
+{
+ struct update_gid_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
+ memset(work->gids, 0, sizeof(work->gids));
+ INIT_WORK(&work->work, reset_gids_task);
+ work->dev = dev;
+ work->port = port;
+ queue_work(wq, &work->work);
+ return 0;
+}
+
+static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
+ struct mlx4_ib_dev *ibdev, union ib_gid *gid)
+{
+ struct mlx4_ib_iboe *iboe;
+ int port = 0;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
+ rdma_vlan_dev_real_dev(event_netdev) :
+ event_netdev;
+ union ib_gid default_gid;
+
+ mlx4_make_default_gid(real_dev, &default_gid);
+
+ if (!memcmp(gid, &default_gid, sizeof(*gid)))
+ return 0;
+
+ if (event != NETDEV_DOWN && event != NETDEV_UP)
+ return 0;
+
+ if ((real_dev != event_netdev) &&
+ (event == NETDEV_DOWN) &&
+ rdma_link_local_addr((struct in6_addr *)gid))
+ return 0;
+
+ iboe = &ibdev->iboe;
+ spin_lock(&iboe->lock);
+
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ update_gid_table(ibdev, port, gid,
+ event == NETDEV_DOWN, 0);
+
+ spin_unlock(&iboe->lock);
+ return 0;
+
+}
+
+static u8 mlx4_ib_get_dev_port(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev)
+{
+ u8 port = 0;
+ struct mlx4_ib_iboe *iboe;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
+ rdma_vlan_dev_real_dev(dev) : dev;
+
+ iboe = &ibdev->iboe;
+
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ break;
+
+ if ((port == 0) || (port > ibdev->dev->caps.num_ports))
+ return 0;
+ else
+ return port;
+}
+
+static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct mlx4_ib_dev *ibdev;
+ struct in_ifaddr *ifa = ptr;
+ union ib_gid gid;
+ struct net_device *event_netdev = ifa->ifa_dev->dev;
+
+ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
+ return NOTIFY_DONE;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct mlx4_ib_dev *ibdev;
+ struct inet6_ifaddr *ifa = ptr;
+ union ib_gid *gid = (union ib_gid *)&ifa->addr;
+ struct net_device *event_netdev = ifa->idev->dev;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
+ return NOTIFY_DONE;
+}
+#endif
+
+#define MLX4_IB_INVALID_MAC ((u64)-1)
+static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ int port)
+{
+ u64 new_smac = 0;
+ u64 release_mac = MLX4_IB_INVALID_MAC;
+ struct mlx4_ib_qp *qp;
+
+ read_lock(&dev_base_lock);
+ new_smac = mlx4_mac_to_u64(dev->dev_addr);
+ read_unlock(&dev_base_lock);
+
+ mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
+ qp = ibdev->qp1_proxy[port - 1];
+ if (qp) {
+ int new_smac_index;
+ u64 old_smac = qp->pri.smac;
+ struct mlx4_update_qp_params update_params;
+
+ if (new_smac == old_smac)
+ goto unlock;
+
+ new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
+
+ if (new_smac_index < 0)
+ goto unlock;
+
+ update_params.smac_index = new_smac_index;
+ if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC,
+ &update_params)) {
+ release_mac = new_smac;
+ goto unlock;
+ }
+
+ qp->pri.smac = new_smac;
+ qp->pri.smac_index = new_smac_index;
+
+ release_mac = old_smac;
+ }
+
+unlock:
+ mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
+ if (release_mac != MLX4_IB_INVALID_MAC)
+ mlx4_unregister_mac(ibdev->dev, port, release_mac);
+}
+
+static void mlx4_ib_get_dev_addr(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev, u8 port)
+{
+ struct in_device *in_dev;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+#endif
+ union ib_gid gid;
+
+
+ if ((port == 0) || (port > ibdev->dev->caps.num_ports))
+ return;
+
+ /* IPv4 gids */
+ in_dev = in_dev_get(dev);
+ if (in_dev) {
+ for_ifa(in_dev) {
+ /*ifa->ifa_address;*/
+ ipv6_addr_set_v4mapped(ifa->ifa_address,
+ (struct in6_addr *)&gid);
+ update_gid_table(ibdev, port, &gid, 0, 0);
+ }
+ endfor_ifa(in_dev);
+ in_dev_put(in_dev);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ /* IPv6 gids */
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ update_gid_table(ibdev, port, pgid, 0, 0);
+ }
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
+}
+
+static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev, u8 port)
+{
+ union ib_gid gid;
+ mlx4_make_default_gid(dev, &gid);
+ update_gid_table(ibdev, port, &gid, 0, 1);
+}
+
+static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
+{
+ struct net_device *dev;
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ int i;
+
+ for (i = 1; i <= ibdev->num_ports; ++i)
+ if (reset_gid_table(ibdev, i))
+ return -1;
+
+ read_lock(&dev_base_lock);
+ spin_lock(&iboe->lock);
+
+ for_each_netdev(&init_net, dev) {
+ u8 port = mlx4_ib_get_dev_port(dev, ibdev);
+ if (port)
+ mlx4_ib_get_dev_addr(dev, ibdev, port);
+ }
+
+ spin_unlock(&iboe->lock);
+ read_unlock(&dev_base_lock);
+
+ return 0;
+}
+
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ unsigned long event)
+
+{
+ struct mlx4_ib_iboe *iboe;
+ int update_qps_port = -1;
+ int port;
+
+ iboe = &ibdev->iboe;
+
+ spin_lock(&iboe->lock);
+ mlx4_foreach_ib_transport_port(port, ibdev->dev) {
+ enum ib_port_state port_state = IB_PORT_NOP;
+ struct net_device *old_master = iboe->masters[port - 1];
+ struct net_device *curr_netdev;
+ struct net_device *curr_master;
+
+ iboe->netdevs[port - 1] =
+ mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
+ if (iboe->netdevs[port - 1])
+ mlx4_ib_set_default_gid(ibdev,
+ iboe->netdevs[port - 1], port);
+ curr_netdev = iboe->netdevs[port - 1];
+
+ if (iboe->netdevs[port - 1] &&
+ netif_is_bond_slave(iboe->netdevs[port - 1])) {
+ iboe->masters[port - 1] = netdev_master_upper_dev_get(
+ iboe->netdevs[port - 1]);
+ } else {
+ iboe->masters[port - 1] = NULL;
+ }
+ curr_master = iboe->masters[port - 1];
+
+ if (dev == iboe->netdevs[port - 1] &&
+ (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
+ event == NETDEV_UP || event == NETDEV_CHANGE))
+ update_qps_port = port;
+
+ if (curr_netdev) {
+ port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ } else {
+ reset_gid_table(ibdev, port);
+ }
+ /* if using bonding/team and a slave port is down, we don't the bond IP
+ * based gids in the table since flows that select port by gid may get
+ * the down port.
+ */
+ if (curr_master && (port_state == IB_PORT_DOWN)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ }
+ /* if bonding is used it is possible that we add it to masters
+ * only after IP address is assigned to the net bonding
+ * interface.
+ */
+ if (curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_master, ibdev, port);
+ }
+
+ if (!curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
+ }
+ }
+
+ spin_unlock(&iboe->lock);
+
+ if (update_qps_port > 0)
+ mlx4_ib_update_qps(ibdev, dev, update_qps_port);
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlx4_ib_dev *ibdev;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+ mlx4_ib_scan_netdevs(ibdev, dev, event);
+
+ return NOTIFY_DONE;
+}
+
+static void init_pkeys(struct mlx4_ib_dev *ibdev)
+{
+ int port;
+ int slave;
int i;
+ if (mlx4_is_master(ibdev->dev)) {
+ for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
+ for (i = 0;
+ i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
+ ++i) {
+ ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
+ /* master has the identity virt2phys pkey mapping */
+ (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
+ ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
+ mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
+ ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
+ }
+ }
+ }
+ /* initialize pkey cache */
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
+ for (i = 0;
+ i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
+ ++i)
+ ibdev->pkeys.phys_pkey_cache[port-1][i] =
+ (i) ? 0 : 0xFFFF;
+ }
+ }
+}
+
+static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
+{
+ char name[80];
+ int eq_per_port = 0;
+ int added_eqs = 0;
+ int total_eqs = 0;
+ int i, j, eq;
+
+ /* Legacy mode or comp_pool is not large enough */
+ if (dev->caps.comp_pool == 0 ||
+ dev->caps.num_ports > dev->caps.comp_pool)
+ return;
+
+ eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
+ dev->caps.num_ports);
+
+ /* Init eq table */
+ added_eqs = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ added_eqs += eq_per_port;
+
+ total_eqs = dev->caps.num_comp_vectors + added_eqs;
+
+ ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
+ if (!ibdev->eq_table)
+ return;
- if (!mlx4_ib_version_printed) {
- printk(KERN_INFO "%s", mlx4_ib_version);
- ++mlx4_ib_version_printed;
+ ibdev->eq_added = added_eqs;
+
+ eq = 0;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
+ for (j = 0; j < eq_per_port; j++) {
+ snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
+ i, j, dev->pdev->bus->name);
+ /* Set IRQ for specific name (per ring) */
+ if (mlx4_assign_eq(dev, name, NULL,
+ &ibdev->eq_table[eq])) {
+ /* Use legacy (same as mlx4_en driver) */
+ pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
+ ibdev->eq_table[eq] =
+ (eq % dev->caps.num_comp_vectors);
+ }
+ eq++;
+ }
}
+ /* Fill the reset of the vector with legacy EQ */
+ for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
+ ibdev->eq_table[eq++] = i;
+
+ /* Advertise the new number of EQs to clients */
+ ibdev->ib_dev.num_comp_vectors = total_eqs;
+}
+
+static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
+{
+ int i;
+
+ /* no additional eqs were added */
+ if (!ibdev->eq_table)
+ return;
+
+ /* Reset the advertised EQ number */
+ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
+
+ /* Free only the added eqs */
+ for (i = 0; i < ibdev->eq_added; i++) {
+ /* Don't free legacy eqs if used */
+ if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
+ continue;
+ mlx4_release_eq(dev, ibdev->eq_table[i]);
+ }
+
+ kfree(ibdev->eq_table);
+}
+
+static void *mlx4_ib_add(struct mlx4_dev *dev)
+{
+ struct mlx4_ib_dev *ibdev;
+ int num_ports = 0;
+ int i, j;
+ int err;
+ struct mlx4_ib_iboe *iboe;
+ int ib_num_ports = 0;
+
+ pr_info_once("%s", mlx4_ib_version);
+
+ num_ports = 0;
+ mlx4_foreach_ib_transport_port(i, dev)
+ num_ports++;
+
+ /* No point in registering a device with no ports... */
+ if (num_ports == 0)
+ return NULL;
+
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
return NULL;
}
+ iboe = &ibdev->iboe;
+
if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
goto err_dealloc;
if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
goto err_pd;
- ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
+ PAGE_SIZE);
if (!ibdev->uar_map)
goto err_uar;
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
@@ -565,11 +1989,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
- ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports;
- ibdev->ib_dev.num_comp_vectors = 1;
+ ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
+ ibdev->num_ports = num_ports;
+ ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
+ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ if (dev->caps.userspace_caps)
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+ else
+ ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -591,10 +2021,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
ibdev->ib_dev.query_port = mlx4_ib_query_port;
+ ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
@@ -627,38 +2060,208 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
+ ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
+ ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
+ ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
- ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
- ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
- ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
- ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
+ if (!mlx4_is_slave(ibdev->dev)) {
+ ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
+ ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
+ ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
+ ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
+ }
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
+ dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
+ ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
+ ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
+ ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
+
+ ibdev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ }
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
+ ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
+ ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
+ ibdev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+ (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ }
+
+ if (check_flow_steering_support(dev)) {
+ ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
+ ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
+
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ }
+
+ mlx4_ib_alloc_eqs(dev, ibdev);
+
+ spin_lock_init(&iboe->lock);
if (init_node_data(ibdev))
goto err_map;
+ for (i = 0; i < ibdev->num_ports; ++i) {
+ mutex_init(&ibdev->qp1_proxy_lock[i]);
+ if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
+ if (err)
+ ibdev->counters[i] = -1;
+ } else {
+ ibdev->counters[i] = -1;
+ }
+ }
+
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
- if (ib_register_device(&ibdev->ib_dev))
- goto err_map;
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ ib_num_ports) {
+ ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+ err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+ MLX4_IB_UC_STEER_QPN_ALIGN,
+ &ibdev->steer_qpn_base);
+ if (err)
+ goto err_counter;
+
+ ibdev->ib_uc_qpns_bitmap =
+ kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+ sizeof(long),
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ goto err_steer_qp_release;
+ }
+
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+ dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base +
+ ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ }
+
+ if (ib_register_device(&ibdev->ib_dev, NULL))
+ goto err_steer_free_bitmap;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
- for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
+ if (mlx4_ib_init_sriov(ibdev))
+ goto err_mad;
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+ if (!iboe->nb_inet.notifier_call) {
+ iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
+ err = register_inetaddr_notifier(&iboe->nb_inet);
+ if (err) {
+ iboe->nb_inet.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!iboe->nb_inet6.notifier_call) {
+ iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
+ err = register_inet6addr_notifier(&iboe->nb_inet6);
+ if (err) {
+ iboe->nb_inet6.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#endif
+ for (i = 1 ; i <= ibdev->num_ports ; ++i)
+ reset_gid_table(ibdev, i);
+ rtnl_lock();
+ mlx4_ib_scan_netdevs(ibdev, NULL, 0);
+ rtnl_unlock();
+ mlx4_ib_init_gid_table(ibdev);
+ }
+
+ for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
- mlx4_class_attributes[i]))
- goto err_reg;
+ mlx4_class_attributes[j]))
+ goto err_notif;
}
+ ibdev->ib_active = true;
+
+ if (mlx4_is_mfunc(ibdev->dev))
+ init_pkeys(ibdev);
+
+ /* create paravirt contexts for any VFs which are active */
+ if (mlx4_is_master(ibdev->dev)) {
+ for (j = 0; j < MLX4_MFUNC_MAX; j++) {
+ if (j == mlx4_master_func_num(ibdev->dev))
+ continue;
+ if (mlx4_is_slave_active(ibdev->dev, j))
+ do_slave_init(ibdev, j, 1);
+ }
+ }
return ibdev;
+err_notif:
+ if (ibdev->iboe.nb.notifier_call) {
+ if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb.notifier_call = NULL;
+ }
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
+ flush_workqueue(wq);
+
+ mlx4_ib_close_sriov(ibdev);
+
+err_mad:
+ mlx4_ib_mad_cleanup(ibdev);
+
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_steer_free_bitmap:
+ kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+err_counter:
+ for (; i; --i)
+ if (ibdev->counters[i - 1] != -1)
+ mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]);
+
err_map:
iounmap(ibdev->uar_map);
@@ -674,64 +2277,279 @@ err_dealloc:
return NULL;
}
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+ int offset;
+
+ WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+ offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+ dev->steer_qpn_count,
+ get_count_order(count));
+ if (offset < 0)
+ return offset;
+
+ *qpn = dev->steer_qpn_base + offset;
+ return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+ if (!qpn ||
+ dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return;
+
+ BUG_ON(qpn < dev->steer_qpn_base);
+
+ bitmap_release_region(dev->ib_uc_qpns_bitmap,
+ qpn - dev->steer_qpn_base,
+ get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach)
+{
+ int err;
+ size_t flow_size;
+ struct ib_flow_attr *flow = NULL;
+ struct ib_flow_spec_ib *ib_spec;
+
+ if (is_attach) {
+ flow_size = sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_ib);
+ flow = kzalloc(flow_size, GFP_KERNEL);
+ if (!flow)
+ return -ENOMEM;
+ flow->port = mqp->port;
+ flow->num_of_specs = 1;
+ flow->size = flow_size;
+ ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
+ ib_spec->type = IB_FLOW_SPEC_IB;
+ ib_spec->size = sizeof(struct ib_flow_spec_ib);
+ /* Add an empty rule for IB L2 */
+ memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
+
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
+ IB_FLOW_DOMAIN_NIC,
+ MLX4_FS_REGULAR,
+ &mqp->reg_id);
+ } else {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ }
+ kfree(flow);
+ return err;
+}
+
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p;
- for (p = 1; p <= dev->caps.num_ports; ++p)
- mlx4_CLOSE_PORT(dev, p);
-
+ mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
+ if (ibdev->iboe.nb.notifier_call) {
+ if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb.notifier_call = NULL;
+ }
+
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ kfree(ibdev->ib_uc_qpns_bitmap);
+ }
+
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
+
iounmap(ibdev->uar_map);
+ for (p = 0; p < ibdev->num_ports; ++p)
+ if (ibdev->counters[p] != -1)
+ mlx4_counter_free(ibdev->dev, ibdev->counters[p]);
+ mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
+ mlx4_CLOSE_PORT(dev, p);
+
+ mlx4_ib_free_eqs(dev, ibdev);
+
mlx4_uar_free(dev, &ibdev->priv_uar);
mlx4_pd_free(dev, ibdev->priv_pdn);
ib_dealloc_device(&ibdev->ib_dev);
}
+static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
+{
+ struct mlx4_ib_demux_work **dm = NULL;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ unsigned long flags;
+ struct mlx4_active_ports actv_ports;
+ unsigned int ports;
+ unsigned int first_port;
+
+ if (!mlx4_is_master(dev))
+ return;
+
+ actv_ports = mlx4_get_active_ports(dev, slave);
+ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+ first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
+
+ dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
+ if (!dm) {
+ pr_err("failed to allocate memory for tunneling qp update\n");
+ goto out;
+ }
+
+ for (i = 0; i < ports; i++) {
+ dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
+ if (!dm[i]) {
+ pr_err("failed to allocate memory for tunneling qp update work struct\n");
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (dm[i])
+ kfree(dm[i]);
+ }
+ goto out;
+ }
+ }
+ /* initialize or tear down tunnel QPs for the slave */
+ for (i = 0; i < ports; i++) {
+ INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
+ dm[i]->port = first_port + i + 1;
+ dm[i]->slave = slave;
+ dm[i]->do_init = do_init;
+ dm[i]->dev = ibdev;
+ spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
+ if (!ibdev->sriov.is_going_down)
+ queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
+ spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
+ }
+out:
+ kfree(dm);
+ return;
+}
+
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, int port)
+ enum mlx4_dev_event event, unsigned long param)
{
struct ib_event ibev;
+ struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
+ struct mlx4_eqe *eqe = NULL;
+ struct ib_event_work *ew;
+ int p = 0;
+
+ if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
+ eqe = (struct mlx4_eqe *)param;
+ else
+ p = (int) param;
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
+ if (p > ibdev->num_ports)
+ return;
+ if (mlx4_is_master(dev) &&
+ rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
+ IB_LINK_LAYER_INFINIBAND) {
+ mlx4_ib_invalidate_all_guid_record(ibdev, p);
+ }
ibev.event = IB_EVENT_PORT_ACTIVE;
break;
case MLX4_DEV_EVENT_PORT_DOWN:
+ if (p > ibdev->num_ports)
+ return;
ibev.event = IB_EVENT_PORT_ERR;
break;
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+ ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
+ ew = kmalloc(sizeof *ew, GFP_ATOMIC);
+ if (!ew) {
+ pr_err("failed to allocate memory for events work\n");
+ break;
+ }
+
+ INIT_WORK(&ew->work, handle_port_mgmt_change_event);
+ memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
+ ew->ib_dev = ibdev;
+ /* need to queue only for port owner, which uses GEN_EQE */
+ if (mlx4_is_master(dev))
+ queue_work(wq, &ew->work);
+ else
+ handle_port_mgmt_change_event(&ew->work);
+ return;
+
+ case MLX4_DEV_EVENT_SLAVE_INIT:
+ /* here, p is the slave id */
+ do_slave_init(ibdev, p, 1);
+ return;
+
+ case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+ /* here, p is the slave id */
+ do_slave_init(ibdev, p, 0);
+ return;
+
default:
return;
}
ibev.device = ibdev_ptr;
- ibev.element.port_num = port;
+ ibev.element.port_num = (u8) p;
ib_dispatch_event(&ibev);
}
static struct mlx4_interface mlx4_ib_interface = {
- .add = mlx4_ib_add,
- .remove = mlx4_ib_remove,
- .event = mlx4_ib_event
+ .add = mlx4_ib_add,
+ .remove = mlx4_ib_remove,
+ .event = mlx4_ib_event,
+ .protocol = MLX4_PROT_IB_IPV6
};
static int __init mlx4_ib_init(void)
{
- return mlx4_register_interface(&mlx4_ib_interface);
+ int err;
+
+ wq = create_singlethread_workqueue("mlx4_ib");
+ if (!wq)
+ return -ENOMEM;
+
+ err = mlx4_ib_mcg_init();
+ if (err)
+ goto clean_wq;
+
+ err = mlx4_register_interface(&mlx4_ib_interface);
+ if (err)
+ goto clean_mcg;
+
+ return 0;
+
+clean_mcg:
+ mlx4_ib_mcg_destroy();
+
+clean_wq:
+ destroy_workqueue(wq);
+ return err;
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
+ mlx4_ib_mcg_destroy();
+ destroy_workqueue(wq);
}
module_init(mlx4_ib_init);
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
new file mode 100644
index 00000000000..ed327e6c8fd
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -0,0 +1,1257 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
+
+#include <linux/mlx4/cmd.h>
+#include <linux/rbtree.h>
+#include <linux/delay.h>
+
+#include "mlx4_ib.h"
+
+#define MAX_VFS 80
+#define MAX_PEND_REQS_PER_FUNC 4
+#define MAD_TIMEOUT_MS 2000
+
+#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
+#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
+#define mcg_warn_group(group, format, arg...) \
+ pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
+ (group)->name, group->demux->port, ## arg)
+
+#define mcg_error_group(group, format, arg...) \
+ pr_err(" %16s: " format, (group)->name, ## arg)
+
+
+static union ib_gid mgid0;
+
+static struct workqueue_struct *clean_wq;
+
+enum mcast_state {
+ MCAST_NOT_MEMBER = 0,
+ MCAST_MEMBER,
+};
+
+enum mcast_group_state {
+ MCAST_IDLE,
+ MCAST_JOIN_SENT,
+ MCAST_LEAVE_SENT,
+ MCAST_RESP_READY
+};
+
+struct mcast_member {
+ enum mcast_state state;
+ uint8_t join_state;
+ int num_pend_reqs;
+ struct list_head pending;
+};
+
+struct ib_sa_mcmember_data {
+ union ib_gid mgid;
+ union ib_gid port_gid;
+ __be32 qkey;
+ __be16 mlid;
+ u8 mtusel_mtu;
+ u8 tclass;
+ __be16 pkey;
+ u8 ratesel_rate;
+ u8 lifetmsel_lifetm;
+ __be32 sl_flowlabel_hoplimit;
+ u8 scope_join_state;
+ u8 proxy_join;
+ u8 reserved[2];
+};
+
+struct mcast_group {
+ struct ib_sa_mcmember_data rec;
+ struct rb_node node;
+ struct list_head mgid0_list;
+ struct mlx4_ib_demux_ctx *demux;
+ struct mcast_member func[MAX_VFS];
+ struct mutex lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ int members[3];
+ enum mcast_group_state state;
+ enum mcast_group_state prev_state;
+ struct ib_sa_mad response_sa_mad;
+ __be64 last_req_tid;
+
+ char name[33]; /* MGID string */
+ struct device_attribute dentry;
+
+ /* refcount is the reference count for the following:
+ 1. Each queued request
+ 2. Each invocation of the worker thread
+ 3. Membership of the port at the SA
+ */
+ atomic_t refcount;
+
+ /* delayed work to clean pending SM request */
+ struct delayed_work timeout_work;
+ struct list_head cleanup_list;
+};
+
+struct mcast_req {
+ int func;
+ struct ib_sa_mad sa_mad;
+ struct list_head group_list;
+ struct list_head func_list;
+ struct mcast_group *group;
+ int clean;
+};
+
+
+#define safe_atomic_dec(ref) \
+ do {\
+ if (atomic_dec_and_test(ref)) \
+ mcg_warn_group(group, "did not expect to reach zero\n"); \
+ } while (0)
+
+static const char *get_state_string(enum mcast_group_state state)
+{
+ switch (state) {
+ case MCAST_IDLE:
+ return "MCAST_IDLE";
+ case MCAST_JOIN_SENT:
+ return "MCAST_JOIN_SENT";
+ case MCAST_LEAVE_SENT:
+ return "MCAST_LEAVE_SENT";
+ case MCAST_RESP_READY:
+ return "MCAST_RESP_READY";
+ }
+ return "Invalid State";
+}
+
+static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
+ union ib_gid *mgid)
+{
+ struct rb_node *node = ctx->mcg_table.rb_node;
+ struct mcast_group *group;
+ int ret;
+
+ while (node) {
+ group = rb_entry(node, struct mcast_group, node);
+ ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+ if (!ret)
+ return group;
+
+ if (ret < 0)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+ return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
+ struct mcast_group *group)
+{
+ struct rb_node **link = &ctx->mcg_table.rb_node;
+ struct rb_node *parent = NULL;
+ struct mcast_group *cur_group;
+ int ret;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct mcast_group, node);
+
+ ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+ sizeof group->rec.mgid);
+ if (ret < 0)
+ link = &(*link)->rb_left;
+ else if (ret > 0)
+ link = &(*link)->rb_right;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &ctx->mcg_table);
+ return NULL;
+}
+
+static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = ctx->dev;
+ struct ib_ah_attr ah_attr;
+
+ spin_lock(&dev->sm_lock);
+ if (!dev->sm_ah[ctx->port - 1]) {
+ /* port is not yet Active, sm_ah not ready */
+ spin_unlock(&dev->sm_lock);
+ return -EAGAIN;
+ }
+ mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+ spin_unlock(&dev->sm_lock);
+ return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
+ ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
+ &ah_attr, NULL, mad);
+}
+
+static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
+ struct ib_mad *mad)
+{
+ struct mlx4_ib_dev *dev = ctx->dev;
+ struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
+ struct ib_wc wc;
+ struct ib_ah_attr ah_attr;
+
+ /* Our agent might not yet be registered when mads start to arrive */
+ if (!agent)
+ return -EAGAIN;
+
+ ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+
+ if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
+ return -EINVAL;
+ wc.sl = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = ctx->port;
+ wc.slid = ah_attr.dlid; /* opensm lid */
+ wc.src_qp = 1;
+ return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
+}
+
+static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
+ int ret;
+
+ /* we rely on a mad request as arrived from a VF */
+ memcpy(&mad, sa_mad, sizeof mad);
+
+ /* fix port GID to be the real one (slave 0) */
+ sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
+
+ /* assign our own TID */
+ mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
+ group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
+
+ ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
+ /* set timeout handler */
+ if (!ret) {
+ /* calls mlx4_ib_mcg_timeout_handler */
+ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
+ msecs_to_jiffies(MAD_TIMEOUT_MS));
+ }
+
+ return ret;
+}
+
+static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
+ int ret;
+
+ memset(&mad, 0, sizeof mad);
+ mad.mad_hdr.base_version = 1;
+ mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad.mad_hdr.class_version = 2;
+ mad.mad_hdr.method = IB_SA_METHOD_DELETE;
+ mad.mad_hdr.status = cpu_to_be16(0);
+ mad.mad_hdr.class_specific = cpu_to_be16(0);
+ mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
+ group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
+ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad.mad_hdr.attr_mod = cpu_to_be32(0);
+ mad.sa_hdr.sm_key = 0x0;
+ mad.sa_hdr.attr_offset = cpu_to_be16(7);
+ mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+ *sa_data = group->rec;
+ sa_data->scope_join_state = join_state;
+
+ ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
+ if (ret)
+ group->state = MCAST_IDLE;
+
+ /* set timeout handler */
+ if (!ret) {
+ /* calls mlx4_ib_mcg_timeout_handler */
+ queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
+ msecs_to_jiffies(MAD_TIMEOUT_MS));
+ }
+
+ return ret;
+}
+
+static int send_reply_to_slave(int slave, struct mcast_group *group,
+ struct ib_sa_mad *req_sa_mad, u16 status)
+{
+ struct ib_sa_mad mad;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
+ struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
+ int ret;
+
+ memset(&mad, 0, sizeof mad);
+ mad.mad_hdr.base_version = 1;
+ mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
+ mad.mad_hdr.class_version = 2;
+ mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
+ mad.mad_hdr.status = cpu_to_be16(status);
+ mad.mad_hdr.class_specific = cpu_to_be16(0);
+ mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
+ *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
+ mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+ mad.mad_hdr.attr_mod = cpu_to_be32(0);
+ mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
+ mad.sa_hdr.attr_offset = cpu_to_be16(7);
+ mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
+
+ *sa_data = group->rec;
+
+ /* reconstruct VF's requested join_state and port_gid */
+ sa_data->scope_join_state &= 0xf0;
+ sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
+ memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
+
+ ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
+ return ret;
+}
+
+static int check_selector(ib_sa_comp_mask comp_mask,
+ ib_sa_comp_mask selector_mask,
+ ib_sa_comp_mask value_mask,
+ u8 src_value, u8 dst_value)
+{
+ int err;
+ u8 selector = dst_value >> 6;
+ dst_value &= 0x3f;
+ src_value &= 0x3f;
+
+ if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+ return 0;
+
+ switch (selector) {
+ case IB_SA_GT:
+ err = (src_value <= dst_value);
+ break;
+ case IB_SA_LT:
+ err = (src_value >= dst_value);
+ break;
+ case IB_SA_EQ:
+ err = (src_value != dst_value);
+ break;
+ default:
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static u16 cmp_rec(struct ib_sa_mcmember_data *src,
+ struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
+{
+ /* src is group record, dst is request record */
+ /* MGID must already match */
+ /* Port_GID we always replace to our Port_GID, so it is a match */
+
+#define MAD_STATUS_REQ_INVALID 0x0200
+ if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+ IB_SA_MCMEMBER_REC_MTU,
+ src->mtusel_mtu, dst->mtusel_mtu))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+ src->tclass != dst->tclass)
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+ IB_SA_MCMEMBER_REC_RATE,
+ src->ratesel_rate, dst->ratesel_rate))
+ return MAD_STATUS_REQ_INVALID;
+ if (check_selector(comp_mask,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+ IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+ src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+ (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
+ (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
+ return MAD_STATUS_REQ_INVALID;
+ if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
+ (src->scope_join_state & 0xf0) !=
+ (dst->scope_join_state & 0xf0))
+ return MAD_STATUS_REQ_INVALID;
+
+ /* join_state checked separately, proxy_join ignored */
+
+ return 0;
+}
+
+/* release group, return 1 if this was last release and group is destroyed
+ * timout work is canceled sync */
+static int release_group(struct mcast_group *group, int from_timeout_handler)
+{
+ struct mlx4_ib_demux_ctx *ctx = group->demux;
+ int nzgroup;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ mutex_lock(&group->lock);
+ if (atomic_dec_and_test(&group->refcount)) {
+ if (!from_timeout_handler) {
+ if (group->state != MCAST_IDLE &&
+ !cancel_delayed_work(&group->timeout_work)) {
+ atomic_inc(&group->refcount);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ return 0;
+ }
+ }
+
+ nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
+ if (nzgroup)
+ del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+ if (!list_empty(&group->pending_list))
+ mcg_warn_group(group, "releasing a group with non empty pending list\n");
+ if (nzgroup)
+ rb_erase(&group->node, &ctx->mcg_table);
+ list_del_init(&group->mgid0_list);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ kfree(group);
+ return 1;
+ } else {
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ }
+ return 0;
+}
+
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+ int i;
+
+ for (i = 0; i < 3; i++, join_state >>= 1)
+ if (join_state & 0x1)
+ group->members[i] += inc;
+}
+
+static u8 get_leave_state(struct mcast_group *group)
+{
+ u8 leave_state = 0;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (!group->members[i])
+ leave_state |= (1 << i);
+
+ return leave_state & (group->rec.scope_join_state & 7);
+}
+
+static int join_group(struct mcast_group *group, int slave, u8 join_mask)
+{
+ int ret = 0;
+ u8 join_state;
+
+ /* remove bits that slave is already member of, and adjust */
+ join_state = join_mask & (~group->func[slave].join_state);
+ adjust_membership(group, join_state, 1);
+ group->func[slave].join_state |= join_state;
+ if (group->func[slave].state != MCAST_MEMBER && join_state) {
+ group->func[slave].state = MCAST_MEMBER;
+ ret = 1;
+ }
+ return ret;
+}
+
+static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
+{
+ int ret = 0;
+
+ adjust_membership(group, leave_state, -1);
+ group->func[slave].join_state &= ~leave_state;
+ if (!group->func[slave].join_state) {
+ group->func[slave].state = MCAST_NOT_MEMBER;
+ ret = 1;
+ }
+ return ret;
+}
+
+static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
+{
+ if (group->func[slave].state != MCAST_MEMBER)
+ return MAD_STATUS_REQ_INVALID;
+
+ /* make sure we're not deleting unset bits */
+ if (~group->func[slave].join_state & leave_mask)
+ return MAD_STATUS_REQ_INVALID;
+
+ if (!leave_mask)
+ return MAD_STATUS_REQ_INVALID;
+
+ return 0;
+}
+
+static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct mcast_group *group;
+ struct mcast_req *req = NULL;
+
+ group = container_of(delay, typeof(*group), timeout_work);
+
+ mutex_lock(&group->lock);
+ if (group->state == MCAST_JOIN_SENT) {
+ if (!list_empty(&group->pending_list)) {
+ req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ --group->func[req->func].num_pend_reqs;
+ mutex_unlock(&group->lock);
+ kfree(req);
+ if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
+ if (release_group(group, 1))
+ return;
+ } else {
+ kfree(group);
+ return;
+ }
+ mutex_lock(&group->lock);
+ } else
+ mcg_warn_group(group, "DRIVER BUG\n");
+ } else if (group->state == MCAST_LEAVE_SENT) {
+ if (group->rec.scope_join_state & 7)
+ group->rec.scope_join_state &= 0xf8;
+ group->state = MCAST_IDLE;
+ mutex_unlock(&group->lock);
+ if (release_group(group, 1))
+ return;
+ mutex_lock(&group->lock);
+ } else
+ mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
+ group->state = MCAST_IDLE;
+ atomic_inc(&group->refcount);
+ if (!queue_work(group->demux->mcg_wq, &group->work))
+ safe_atomic_dec(&group->refcount);
+
+ mutex_unlock(&group->lock);
+}
+
+static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
+ struct mcast_req *req)
+{
+ u16 status;
+
+ if (req->clean)
+ leave_mask = group->func[req->func].join_state;
+
+ status = check_leave(group, req->func, leave_mask);
+ if (!status)
+ leave_group(group, req->func, leave_mask);
+
+ if (!req->clean)
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ return 1;
+}
+
+static int handle_join_req(struct mcast_group *group, u8 join_mask,
+ struct mcast_req *req)
+{
+ u8 group_join_state = group->rec.scope_join_state & 7;
+ int ref = 0;
+ u16 status;
+ struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+
+ if (join_mask == (group_join_state & join_mask)) {
+ /* port's membership need not change */
+ status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
+ if (!status)
+ join_group(group, req->func, join_mask);
+
+ --group->func[req->func].num_pend_reqs;
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ++ref;
+ } else {
+ /* port's membership needs to be updated */
+ group->prev_state = group->state;
+ if (send_join_to_wire(group, &req->sa_mad)) {
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ref = 1;
+ group->state = group->prev_state;
+ } else
+ group->state = MCAST_JOIN_SENT;
+ }
+
+ return ref;
+}
+
+static void mlx4_ib_mcg_work_handler(struct work_struct *work)
+{
+ struct mcast_group *group;
+ struct mcast_req *req = NULL;
+ struct ib_sa_mcmember_data *sa_data;
+ u8 req_join_state;
+ int rc = 1; /* release_count - this is for the scheduled work */
+ u16 status;
+ u8 method;
+
+ group = container_of(work, typeof(*group), work);
+
+ mutex_lock(&group->lock);
+
+ /* First, let's see if a response from SM is waiting regarding this group.
+ * If so, we need to update the group's REC. If this is a bad response, we
+ * may need to send a bad response to a VF waiting for it. If VF is waiting
+ * and this is a good response, the VF will be answered later in this func. */
+ if (group->state == MCAST_RESP_READY) {
+ /* cancels mlx4_ib_mcg_timeout_handler */
+ cancel_delayed_work(&group->timeout_work);
+ status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
+ method = group->response_sa_mad.mad_hdr.method;
+ if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
+ mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
+ be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
+ be64_to_cpu(group->last_req_tid));
+ group->state = group->prev_state;
+ goto process_requests;
+ }
+ if (status) {
+ if (!list_empty(&group->pending_list))
+ req = list_first_entry(&group->pending_list,
+ struct mcast_req, group_list);
+ if ((method == IB_MGMT_METHOD_GET_RESP)) {
+ if (req) {
+ send_reply_to_slave(req->func, group, &req->sa_mad, status);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ ++rc;
+ } else
+ mcg_warn_group(group, "no request for failed join\n");
+ } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
+ ++rc;
+ } else {
+ u8 resp_join_state;
+ u8 cur_join_state;
+
+ resp_join_state = ((struct ib_sa_mcmember_data *)
+ group->response_sa_mad.data)->scope_join_state & 7;
+ cur_join_state = group->rec.scope_join_state & 7;
+
+ if (method == IB_MGMT_METHOD_GET_RESP) {
+ /* successfull join */
+ if (!cur_join_state && resp_join_state)
+ --rc;
+ } else if (!resp_join_state)
+ ++rc;
+ memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
+ }
+ group->state = MCAST_IDLE;
+ }
+
+process_requests:
+ /* We should now go over pending join/leave requests, as long as we are idle. */
+ while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
+ req = list_first_entry(&group->pending_list, struct mcast_req,
+ group_list);
+ sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
+ req_join_state = sa_data->scope_join_state & 0x7;
+
+ /* For a leave request, we will immediately answer the VF, and
+ * update our internal counters. The actual leave will be sent
+ * to SM later, if at all needed. We dequeue the request now. */
+ if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
+ rc += handle_leave_req(group, req_join_state, req);
+ else
+ rc += handle_join_req(group, req_join_state, req);
+ }
+
+ /* Handle leaves */
+ if (group->state == MCAST_IDLE) {
+ req_join_state = get_leave_state(group);
+ if (req_join_state) {
+ group->rec.scope_join_state &= ~req_join_state;
+ group->prev_state = group->state;
+ if (send_leave_to_wire(group, req_join_state)) {
+ group->state = group->prev_state;
+ ++rc;
+ } else
+ group->state = MCAST_LEAVE_SENT;
+ }
+ }
+
+ if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
+ goto process_requests;
+ mutex_unlock(&group->lock);
+
+ while (rc--)
+ release_group(group, 0);
+}
+
+static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
+ __be64 tid,
+ union ib_gid *new_mgid)
+{
+ struct mcast_group *group = NULL, *cur_group;
+ struct mcast_req *req;
+ struct list_head *pos;
+ struct list_head *n;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
+ group = list_entry(pos, struct mcast_group, mgid0_list);
+ mutex_lock(&group->lock);
+ if (group->last_req_tid == tid) {
+ if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
+ group->rec.mgid = *new_mgid;
+ sprintf(group->name, "%016llx%016llx",
+ be64_to_cpu(group->rec.mgid.global.subnet_prefix),
+ be64_to_cpu(group->rec.mgid.global.interface_id));
+ list_del_init(&group->mgid0_list);
+ cur_group = mcast_insert(ctx, group);
+ if (cur_group) {
+ /* A race between our code and SM. Silently cleaning the new one */
+ req = list_first_entry(&group->pending_list,
+ struct mcast_req, group_list);
+ --group->func[req->func].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ release_group(group, 0);
+ return NULL;
+ }
+
+ atomic_inc(&group->refcount);
+ add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ return group;
+ } else {
+ struct mcast_req *tmp1, *tmp2;
+
+ list_del(&group->mgid0_list);
+ if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
+ cancel_delayed_work_sync(&group->timeout_work);
+
+ list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
+ list_del(&tmp1->group_list);
+ kfree(tmp1);
+ }
+ mutex_unlock(&group->lock);
+ mutex_unlock(&ctx->mcg_table_lock);
+ kfree(group);
+ return NULL;
+ }
+ }
+ mutex_unlock(&group->lock);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+
+ return NULL;
+}
+
+static ssize_t sysfs_show_group(struct device *dev,
+ struct device_attribute *attr, char *buf);
+
+static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
+ union ib_gid *mgid, int create,
+ gfp_t gfp_mask)
+{
+ struct mcast_group *group, *cur_group;
+ int is_mgid0;
+ int i;
+
+ is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+ if (!is_mgid0) {
+ group = mcast_find(ctx, mgid);
+ if (group)
+ goto found;
+ }
+
+ if (!create)
+ return ERR_PTR(-ENOENT);
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ group->demux = ctx;
+ group->rec.mgid = *mgid;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->mgid0_list);
+ for (i = 0; i < MAX_VFS; ++i)
+ INIT_LIST_HEAD(&group->func[i].pending);
+ INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
+ INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
+ mutex_init(&group->lock);
+ sprintf(group->name, "%016llx%016llx",
+ be64_to_cpu(group->rec.mgid.global.subnet_prefix),
+ be64_to_cpu(group->rec.mgid.global.interface_id));
+ sysfs_attr_init(&group->dentry.attr);
+ group->dentry.show = sysfs_show_group;
+ group->dentry.store = NULL;
+ group->dentry.attr.name = group->name;
+ group->dentry.attr.mode = 0400;
+ group->state = MCAST_IDLE;
+
+ if (is_mgid0) {
+ list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
+ goto found;
+ }
+
+ cur_group = mcast_insert(ctx, group);
+ if (cur_group) {
+ mcg_warn("group just showed up %s - confused\n", cur_group->name);
+ kfree(group);
+ return ERR_PTR(-EINVAL);
+ }
+
+ add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
+
+found:
+ atomic_inc(&group->refcount);
+ return group;
+}
+
+static void queue_req(struct mcast_req *req)
+{
+ struct mcast_group *group = req->group;
+
+ atomic_inc(&group->refcount); /* for the request */
+ atomic_inc(&group->refcount); /* for scheduling the work */
+ list_add_tail(&req->group_list, &group->pending_list);
+ list_add_tail(&req->func_list, &group->func[req->func].pending);
+ /* calls mlx4_ib_mcg_work_handler */
+ if (!queue_work(group->demux->mcg_wq, &group->work))
+ safe_atomic_dec(&group->refcount);
+}
+
+int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
+ struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
+ struct mcast_group *group;
+
+ switch (mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_SA_METHOD_DELETE_RESP:
+ mutex_lock(&ctx->mcg_table_lock);
+ group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (IS_ERR(group)) {
+ if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
+ __be64 tid = mad->mad_hdr.tid;
+ *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
+ group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
+ } else
+ group = NULL;
+ }
+
+ if (!group)
+ return 1;
+
+ mutex_lock(&group->lock);
+ group->response_sa_mad = *mad;
+ group->prev_state = group->state;
+ group->state = MCAST_RESP_READY;
+ /* calls mlx4_ib_mcg_work_handler */
+ atomic_inc(&group->refcount);
+ if (!queue_work(ctx->mcg_wq, &group->work))
+ safe_atomic_dec(&group->refcount);
+ mutex_unlock(&group->lock);
+ release_group(group, 0);
+ return 1; /* consumed */
+ case IB_MGMT_METHOD_SET:
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_SA_METHOD_GET_TABLE_RESP:
+ case IB_SA_METHOD_DELETE:
+ return 0; /* not consumed, pass-through to guest over tunnel */
+ default:
+ mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
+ port, mad->mad_hdr.method);
+ return 1; /* consumed */
+ }
+}
+
+int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
+ int slave, struct ib_sa_mad *sa_mad)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
+ struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
+ struct mcast_group *group;
+ struct mcast_req *req;
+ int may_create = 0;
+
+ if (ctx->flushing)
+ return -EAGAIN;
+
+ switch (sa_mad->mad_hdr.method) {
+ case IB_MGMT_METHOD_SET:
+ may_create = 1;
+ case IB_SA_METHOD_DELETE:
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ req->func = slave;
+ req->sa_mad = *sa_mad;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (IS_ERR(group)) {
+ kfree(req);
+ return PTR_ERR(group);
+ }
+ mutex_lock(&group->lock);
+ if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
+ mutex_unlock(&group->lock);
+ mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
+ port, slave, MAX_PEND_REQS_PER_FUNC);
+ release_group(group, 0);
+ kfree(req);
+ return -ENOMEM;
+ }
+ ++group->func[slave].num_pend_reqs;
+ req->group = group;
+ queue_req(req);
+ mutex_unlock(&group->lock);
+ release_group(group, 0);
+ return 1; /* consumed */
+ case IB_SA_METHOD_GET_TABLE:
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_SA_METHOD_GET_TABLE_RESP:
+ case IB_SA_METHOD_DELETE_RESP:
+ return 0; /* not consumed, pass-through */
+ default:
+ mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
+ port, slave, sa_mad->mad_hdr.method);
+ return 1; /* consumed */
+ }
+}
+
+static ssize_t sysfs_show_group(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mcast_group *group =
+ container_of(attr, struct mcast_group, dentry);
+ struct mcast_req *req = NULL;
+ char pending_str[40];
+ char state_str[40];
+ ssize_t len = 0;
+ int f;
+
+ if (group->state == MCAST_IDLE)
+ sprintf(state_str, "%s", get_state_string(group->state));
+ else
+ sprintf(state_str, "%s(TID=0x%llx)",
+ get_state_string(group->state),
+ be64_to_cpu(group->last_req_tid));
+ if (list_empty(&group->pending_list)) {
+ sprintf(pending_str, "No");
+ } else {
+ req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+ sprintf(pending_str, "Yes(TID=0x%llx)",
+ be64_to_cpu(req->sa_mad.mad_hdr.tid));
+ }
+ len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
+ group->rec.scope_join_state & 0xf,
+ group->members[2], group->members[1], group->members[0],
+ atomic_read(&group->refcount),
+ pending_str,
+ state_str);
+ for (f = 0; f < MAX_VFS; ++f)
+ if (group->func[f].state == MCAST_MEMBER)
+ len += sprintf(buf + len, "%d[%1x] ",
+ f, group->func[f].join_state);
+
+ len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
+ "%4x %4x %2x %2x)\n",
+ be16_to_cpu(group->rec.pkey),
+ be32_to_cpu(group->rec.qkey),
+ (group->rec.mtusel_mtu & 0xc0) >> 6,
+ group->rec.mtusel_mtu & 0x3f,
+ group->rec.tclass,
+ (group->rec.ratesel_rate & 0xc0) >> 6,
+ group->rec.ratesel_rate & 0x3f,
+ (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
+ (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
+ be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
+ group->rec.proxy_join);
+
+ return len;
+}
+
+int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
+{
+ char name[20];
+
+ atomic_set(&ctx->tid, 0);
+ sprintf(name, "mlx4_ib_mcg%d", ctx->port);
+ ctx->mcg_wq = create_singlethread_workqueue(name);
+ if (!ctx->mcg_wq)
+ return -ENOMEM;
+
+ mutex_init(&ctx->mcg_table_lock);
+ ctx->mcg_table = RB_ROOT;
+ INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
+ ctx->flushing = 0;
+
+ return 0;
+}
+
+static void force_clean_group(struct mcast_group *group)
+{
+ struct mcast_req *req, *tmp
+ ;
+ list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
+ list_del(&req->group_list);
+ kfree(req);
+ }
+ del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
+ rb_erase(&group->node, &group->demux->mcg_table);
+ kfree(group);
+}
+
+static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
+{
+ int i;
+ struct rb_node *p;
+ struct mcast_group *group;
+ unsigned long end;
+ int count;
+
+ for (i = 0; i < MAX_VFS; ++i)
+ clean_vf_mcast(ctx, i);
+
+ end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
+ do {
+ count = 0;
+ mutex_lock(&ctx->mcg_table_lock);
+ for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
+ ++count;
+ mutex_unlock(&ctx->mcg_table_lock);
+ if (!count)
+ break;
+
+ msleep(1);
+ } while (time_after(end, jiffies));
+
+ flush_workqueue(ctx->mcg_wq);
+ if (destroy_wq)
+ destroy_workqueue(ctx->mcg_wq);
+
+ mutex_lock(&ctx->mcg_table_lock);
+ while ((p = rb_first(&ctx->mcg_table)) != NULL) {
+ group = rb_entry(p, struct mcast_group, node);
+ if (atomic_read(&group->refcount))
+ mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
+
+ force_clean_group(group);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+}
+
+struct clean_work {
+ struct work_struct work;
+ struct mlx4_ib_demux_ctx *ctx;
+ int destroy_wq;
+};
+
+static void mcg_clean_task(struct work_struct *work)
+{
+ struct clean_work *cw = container_of(work, struct clean_work, work);
+
+ _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
+ cw->ctx->flushing = 0;
+ kfree(cw);
+}
+
+void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
+{
+ struct clean_work *work;
+
+ if (ctx->flushing)
+ return;
+
+ ctx->flushing = 1;
+
+ if (destroy_wq) {
+ _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
+ ctx->flushing = 0;
+ return;
+ }
+
+ work = kmalloc(sizeof *work, GFP_KERNEL);
+ if (!work) {
+ ctx->flushing = 0;
+ mcg_warn("failed allocating work for cleanup\n");
+ return;
+ }
+
+ work->ctx = ctx;
+ work->destroy_wq = destroy_wq;
+ INIT_WORK(&work->work, mcg_clean_task);
+ queue_work(clean_wq, &work->work);
+}
+
+static void build_leave_mad(struct mcast_req *req)
+{
+ struct ib_sa_mad *mad = &req->sa_mad;
+
+ mad->mad_hdr.method = IB_SA_METHOD_DELETE;
+}
+
+
+static void clear_pending_reqs(struct mcast_group *group, int vf)
+{
+ struct mcast_req *req, *tmp, *group_first = NULL;
+ int clear;
+ int pend = 0;
+
+ if (!list_empty(&group->pending_list))
+ group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
+
+ list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
+ clear = 1;
+ if (group_first == req &&
+ (group->state == MCAST_JOIN_SENT ||
+ group->state == MCAST_LEAVE_SENT)) {
+ clear = cancel_delayed_work(&group->timeout_work);
+ pend = !clear;
+ group->state = MCAST_IDLE;
+ }
+ if (clear) {
+ --group->func[vf].num_pend_reqs;
+ list_del(&req->group_list);
+ list_del(&req->func_list);
+ kfree(req);
+ atomic_dec(&group->refcount);
+ }
+ }
+
+ if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
+ mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
+ list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
+ }
+}
+
+static int push_deleteing_req(struct mcast_group *group, int slave)
+{
+ struct mcast_req *req;
+ struct mcast_req *pend_req;
+
+ if (!group->func[slave].join_state)
+ return 0;
+
+ req = kzalloc(sizeof *req, GFP_KERNEL);
+ if (!req) {
+ mcg_warn_group(group, "failed allocation - may leave stall groups\n");
+ return -ENOMEM;
+ }
+
+ if (!list_empty(&group->func[slave].pending)) {
+ pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
+ if (pend_req->clean) {
+ kfree(req);
+ return 0;
+ }
+ }
+
+ req->clean = 1;
+ req->func = slave;
+ req->group = group;
+ ++group->func[slave].num_pend_reqs;
+ build_leave_mad(req);
+ queue_req(req);
+ return 0;
+}
+
+void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
+{
+ struct mcast_group *group;
+ struct rb_node *p;
+
+ mutex_lock(&ctx->mcg_table_lock);
+ for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
+ group = rb_entry(p, struct mcast_group, node);
+ mutex_lock(&group->lock);
+ if (atomic_read(&group->refcount)) {
+ /* clear pending requests of this VF */
+ clear_pending_reqs(group, slave);
+ push_deleteing_req(group, slave);
+ }
+ mutex_unlock(&group->lock);
+ }
+ mutex_unlock(&ctx->mcg_table_lock);
+}
+
+
+int mlx4_ib_mcg_init(void)
+{
+ clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
+ if (!clean_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_mcg_destroy(void)
+{
+ destroy_workqueue(clean_wq);
+}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c4cf5b69eef..369da3ca5d6 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -36,13 +37,39 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/idr.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_sa.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
+#define MLX4_IB_DRV_NAME "mlx4_ib"
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
+
+#define mlx4_ib_warn(ibdev, format, arg...) \
+ dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
+
+enum {
+ MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
+ MLX4_IB_MAX_HEADROOM = 2048
+};
+
+#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
+#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
+
+/*module param to indicate if SM assigns the alias_GUID*/
+extern int mlx4_ib_sm_guid_assign;
+
+#define MLX4_IB_UC_STEER_QPN_ALIGN 1
+#define MLX4_IB_UC_MAX_NUM_QPS 256
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
@@ -55,9 +82,17 @@ struct mlx4_ib_pd {
u32 pdn;
};
+struct mlx4_ib_xrcd {
+ struct ib_xrcd ibxrcd;
+ u32 xrcdn;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+};
+
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
+ int entry_size;
};
struct mlx4_ib_cq_resize {
@@ -83,11 +118,28 @@ struct mlx4_ib_mr {
struct ib_umem *umem;
};
+struct mlx4_ib_mw {
+ struct ib_mw ibmw;
+ struct mlx4_mw mmw;
+};
+
+struct mlx4_ib_fast_reg_page_list {
+ struct ib_fast_reg_page_list ibfrpl;
+ __be64 *mapped_page_list;
+ dma_addr_t map;
+};
+
struct mlx4_ib_fmr {
struct ib_fmr ibfmr;
struct mlx4_fmr mfmr;
};
+struct mlx4_ib_flow {
+ struct ib_flow ibflow;
+ /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
+ u64 reg_id[2];
+};
+
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
@@ -101,8 +153,109 @@ struct mlx4_ib_wq {
};
enum mlx4_ib_qp_flags {
- MLX4_IB_QP_LSO = 1 << 0,
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+ MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
+ MLX4_IB_SRIOV_SQP = 1 << 31,
+};
+
+struct mlx4_ib_gid_entry {
+ struct list_head list;
+ union ib_gid gid;
+ int added;
+ u8 port;
+};
+
+enum mlx4_ib_qp_type {
+ /*
+ * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
+ * here (and in that order) since the MAD layer uses them as
+ * indices into a 2-entry table.
+ */
+ MLX4_IB_QPT_SMI = IB_QPT_SMI,
+ MLX4_IB_QPT_GSI = IB_QPT_GSI,
+
+ MLX4_IB_QPT_RC = IB_QPT_RC,
+ MLX4_IB_QPT_UC = IB_QPT_UC,
+ MLX4_IB_QPT_UD = IB_QPT_UD,
+ MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
+ MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
+ MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
+ MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
+ MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
+
+ MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
+ MLX4_IB_QPT_PROXY_SMI = 1 << 17,
+ MLX4_IB_QPT_PROXY_GSI = 1 << 18,
+ MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
+ MLX4_IB_QPT_TUN_SMI = 1 << 20,
+ MLX4_IB_QPT_TUN_GSI = 1 << 21,
+};
+
+#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
+ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
+
+enum mlx4_ib_mad_ifc_flags {
+ MLX4_MAD_IFC_IGNORE_MKEY = 1,
+ MLX4_MAD_IFC_IGNORE_BKEY = 2,
+ MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
+ MLX4_MAD_IFC_IGNORE_BKEY),
+ MLX4_MAD_IFC_NET_VIEW = 4,
+};
+
+enum {
+ MLX4_NUM_TUNNEL_BUFS = 256,
+};
+
+struct mlx4_ib_tunnel_header {
+ struct mlx4_av av;
+ __be32 remote_qpn;
+ __be32 qkey;
+ __be16 vlan;
+ u8 mac[6];
+ __be16 pkey_index;
+ u8 reserved[6];
+};
+
+struct mlx4_ib_buf {
+ void *addr;
+ dma_addr_t map;
+};
+
+struct mlx4_rcv_tunnel_hdr {
+ __be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
+ * 0x0 - no vlan was in the packet
+ * 0x01 - C-VLAN was in the packet */
+ u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
+ u8 reserved;
+ __be16 pkey_index;
+ __be16 sl_vid;
+ __be16 slid_mac_47_32;
+ __be32 mac_31_0;
+};
+
+struct mlx4_ib_proxy_sqp_hdr {
+ struct ib_grh grh;
+ struct mlx4_rcv_tunnel_hdr tun;
+} __packed;
+
+struct mlx4_roce_smac_vlan_info {
+ u64 smac;
+ int smac_index;
+ int smac_port;
+ u64 candidate_smac;
+ int candidate_smac_index;
+ int candidate_smac_port;
+ u16 vid;
+ int vlan_index;
+ int vlan_port;
+ u16 candidate_vid;
+ int candidate_vlan_index;
+ int candidate_vlan_port;
+ int update_vid;
};
struct mlx4_ib_qp {
@@ -120,10 +273,12 @@ struct mlx4_ib_qp {
int sq_spare_wqes;
struct mlx4_ib_wq sq;
+ enum mlx4_ib_qp_type mlx4_ib_qp_type;
struct ib_umem *umem;
struct mlx4_mtt mtt;
int buf_size;
struct mutex mutex;
+ u16 xrcdn;
u32 flags;
u8 port;
u8 alt_port;
@@ -131,6 +286,13 @@ struct mlx4_ib_qp {
u8 resp_depth;
u8 sq_no_prefetch;
u8 state;
+ int mlx_type;
+ struct list_head gid_list;
+ struct list_head steering_rules;
+ struct mlx4_ib_buf *sqp_proxy_rcv;
+ struct mlx4_roce_smac_vlan_info pri;
+ struct mlx4_roce_smac_vlan_info alt;
+ u64 reg_id;
};
struct mlx4_ib_srq {
@@ -150,12 +312,191 @@ struct mlx4_ib_srq {
struct mlx4_ib_ah {
struct ib_ah ibah;
- struct mlx4_av av;
+ union mlx4_ext_av av;
+};
+
+/****************************************/
+/* alias guid support */
+/****************************************/
+#define NUM_PORT_ALIAS_GUID 2
+#define NUM_ALIAS_GUID_IN_REC 8
+#define NUM_ALIAS_GUID_REC_IN_PORT 16
+#define GUID_REC_SIZE 8
+#define NUM_ALIAS_GUID_PER_PORT 128
+#define MLX4_NOT_SET_GUID (0x00LL)
+#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
+
+enum mlx4_guid_alias_rec_status {
+ MLX4_GUID_INFO_STATUS_IDLE,
+ MLX4_GUID_INFO_STATUS_SET,
+ MLX4_GUID_INFO_STATUS_PENDING,
+};
+
+enum mlx4_guid_alias_rec_ownership {
+ MLX4_GUID_DRIVER_ASSIGN,
+ MLX4_GUID_SYSADMIN_ASSIGN,
+ MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
+};
+
+enum mlx4_guid_alias_rec_method {
+ MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
+ MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
+};
+
+struct mlx4_sriov_alias_guid_info_rec_det {
+ u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
+ ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
+ enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
+ u8 method; /*set or delete*/
+ enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
+};
+
+struct mlx4_sriov_alias_guid_port_rec_det {
+ struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
+ struct workqueue_struct *wq;
+ struct delayed_work alias_guid_work;
+ u8 port;
+ struct mlx4_sriov_alias_guid *parent;
+ struct list_head cb_list;
+};
+
+struct mlx4_sriov_alias_guid {
+ struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
+ spinlock_t ag_work_lock;
+ struct ib_sa_client *sa_client;
+};
+
+struct mlx4_ib_demux_work {
+ struct work_struct work;
+ struct mlx4_ib_dev *dev;
+ int slave;
+ int do_init;
+ u8 port;
+
+};
+
+struct mlx4_ib_tun_tx_buf {
+ struct mlx4_ib_buf buf;
+ struct ib_ah *ah;
+};
+
+struct mlx4_ib_demux_pv_qp {
+ struct ib_qp *qp;
+ enum ib_qp_type proxy_qpt;
+ struct mlx4_ib_buf *ring;
+ struct mlx4_ib_tun_tx_buf *tx_ring;
+ spinlock_t tx_lock;
+ unsigned tx_ix_head;
+ unsigned tx_ix_tail;
+};
+
+enum mlx4_ib_demux_pv_state {
+ DEMUX_PV_STATE_DOWN,
+ DEMUX_PV_STATE_STARTING,
+ DEMUX_PV_STATE_ACTIVE,
+ DEMUX_PV_STATE_DOWNING,
+};
+
+struct mlx4_ib_demux_pv_ctx {
+ int port;
+ int slave;
+ enum mlx4_ib_demux_pv_state state;
+ int has_smi;
+ struct ib_device *ib_dev;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct work_struct work;
+ struct workqueue_struct *wq;
+ struct mlx4_ib_demux_pv_qp qp[2];
+};
+
+struct mlx4_ib_demux_ctx {
+ struct ib_device *ib_dev;
+ int port;
+ struct workqueue_struct *wq;
+ struct workqueue_struct *ud_wq;
+ spinlock_t ud_lock;
+ __be64 subnet_prefix;
+ __be64 guid_cache[128];
+ struct mlx4_ib_dev *dev;
+ /* the following lock protects both mcg_table and mcg_mgid0_list */
+ struct mutex mcg_table_lock;
+ struct rb_root mcg_table;
+ struct list_head mcg_mgid0_list;
+ struct workqueue_struct *mcg_wq;
+ struct mlx4_ib_demux_pv_ctx **tun;
+ atomic_t tid;
+ int flushing; /* flushing the work queue */
+};
+
+struct mlx4_ib_sriov {
+ struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
+ struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
+ /* when using this spinlock you should use "irq" because
+ * it may be called from interrupt context.*/
+ spinlock_t going_down_lock;
+ int is_going_down;
+
+ struct mlx4_sriov_alias_guid alias_guid;
+
+ /* CM paravirtualization fields */
+ struct list_head cm_list;
+ spinlock_t id_map_lock;
+ struct rb_root sl_id_map;
+ struct idr pv_id_table;
+};
+
+struct mlx4_ib_iboe {
+ spinlock_t lock;
+ struct net_device *netdevs[MLX4_MAX_PORTS];
+ struct net_device *masters[MLX4_MAX_PORTS];
+ struct notifier_block nb;
+ struct notifier_block nb_inet;
+ struct notifier_block nb_inet6;
+ union ib_gid gid_table[MLX4_MAX_PORTS][128];
+};
+
+struct pkey_mgt {
+ u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+ struct list_head pkey_port_list[MLX4_MFUNC_MAX];
+ struct kobject *device_parent[MLX4_MFUNC_MAX];
+};
+
+struct mlx4_ib_iov_sysfs_attr {
+ void *ctx;
+ struct kobject *kobj;
+ unsigned long data;
+ u32 entry_num;
+ char name[15];
+ struct device_attribute dentry;
+ struct device *dev;
+};
+
+struct mlx4_ib_iov_sysfs_attr_ar {
+ struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
+};
+
+struct mlx4_ib_iov_port {
+ char name[100];
+ u8 num;
+ struct mlx4_ib_dev *dev;
+ struct list_head list;
+ struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
+ struct ib_port_attr attr;
+ struct kobject *cur_port;
+ struct kobject *admin_alias_parent;
+ struct kobject *gids_parent;
+ struct kobject *pkeys_parent;
+ struct kobject *mcgs_parent;
+ struct mlx4_ib_iov_sysfs_attr mcg_dentry;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
+ int num_ports;
void __iomem *uar_map;
struct mlx4_uar priv_uar;
@@ -165,8 +506,39 @@ struct mlx4_ib_dev {
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
+ struct mlx4_ib_sriov sriov;
struct mutex cap_mask_mutex;
+ bool ib_active;
+ struct mlx4_ib_iboe iboe;
+ int counters[MLX4_MAX_PORTS];
+ int *eq_table;
+ int eq_added;
+ struct kobject *iov_parent;
+ struct kobject *ports_parent;
+ struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
+ struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
+ struct pkey_mgt pkeys;
+ unsigned long *ib_uc_qpns_bitmap;
+ int steer_qpn_count;
+ int steer_qpn_base;
+ int steering_support;
+ struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
+ /* lock when destroying qp1_proxy and getting netdev events */
+ struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
+};
+
+struct ib_event_work {
+ struct work_struct work;
+ struct mlx4_ib_dev *ib_dev;
+ struct mlx4_eqe ib_eqe;
+};
+
+struct mlx4_ib_qp_tunnel_init_attr {
+ struct ib_qp_init_attr init_attr;
+ int slave;
+ enum ib_qp_type proxy_qp_type;
+ u8 port;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -184,6 +556,11 @@ static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
return container_of(ibpd, struct mlx4_ib_pd, ibpd);
}
+static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd);
+}
+
static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mlx4_ib_cq, ibcq);
@@ -199,10 +576,26 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx4_ib_mr, ibmr);
}
+static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct mlx4_ib_mw, ibmw);
+}
+
+static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+ return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
+}
+
static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
}
+
+static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
+{
+ return container_of(ibflow, struct mlx4_ib_flow, ibflow);
+}
+
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
@@ -228,6 +621,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
+int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
+void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
+
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -239,6 +635,15 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+int mlx4_ib_dealloc_mw(struct ib_mw *mw);
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len);
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len);
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
@@ -279,7 +684,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -294,10 +699,94 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
+int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props, int netw_view);
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey, int netw_view);
+
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid, int netw_view);
-static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
+static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
- return !!(ah->av.g_slid & 0x80);
+ u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
+
+ if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET)
+ return true;
+
+ return !!(ah->av.ib.g_slid & 0x80);
}
+int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
+void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
+void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
+int mlx4_ib_mcg_init(void);
+void mlx4_ib_mcg_destroy(void);
+
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
+
+int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *sa_mad);
+int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
+ struct ib_sa_mad *mad);
+
+int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ union ib_gid *gid);
+
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+ enum ib_event_type type);
+
+void mlx4_ib_tunnels_update_work(struct work_struct *work);
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type qpt, struct ib_wc *wc,
+ struct ib_grh *grh, struct ib_mad *mad);
+
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+ enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
+ u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
+ struct ib_mad *mad);
+
+__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
+
+int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
+ struct ib_mad *mad);
+
+int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
+ struct ib_mad *mad);
+
+void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
+void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
+
+/* alias guid support */
+void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
+int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
+void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
+void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
+
+void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num,
+ u8 port_num, u8 *p_data);
+
+void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
+ int block_num, u8 port_num,
+ u8 *p_data);
+
+int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr);
+void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr);
+ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
+
+int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
+
+void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
+
+__be64 mlx4_ib_gen_node_guid(void);
+
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach);
+
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 68e92485fc7..cb2a8727f3f 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,6 +31,8 @@
* SOFTWARE.
*/
+#include <linux/slab.h>
+
#include "mlx4_ib.h"
static u32 convert_access(int acc)
@@ -38,9 +41,19 @@ static u32 convert_access(int acc)
(acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
+ (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
MLX4_PERM_LOCAL_READ;
}
+static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
+{
+ switch (type) {
+ case IB_MW_TYPE_1: return MLX4_MW_TYPE_1;
+ case IB_MW_TYPE_2: return MLX4_MW_TYPE_2;
+ default: return -1;
+ }
+}
+
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx4_ib_mr *mr;
@@ -65,7 +78,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
return &mr->ibmr;
err_mr:
- mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+ (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_free:
kfree(mr);
@@ -77,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
- struct ib_umem_chunk *chunk;
- int i, j, k;
+ int i, k, entry;
int n;
int len;
int err = 0;
+ struct scatterlist *sg;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
@@ -89,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
i = n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- umem->page_size * k;
- /*
- * Be friendly to mlx4_write_mtt() and
- * pass it chunks of appropriate size.
- */
- if (i == PAGE_SIZE / sizeof (u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, n,
- i, pages);
- if (err)
- goto out;
- n += i;
- i = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> mtt->page_shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ umem->page_size * k;
+ /*
+ * Be friendly to mlx4_write_mtt() and
+ * pass it chunks of appropriate size.
+ */
+ if (i == PAGE_SIZE / sizeof (u64)) {
+ err = mlx4_write_mtt(dev->dev, mtt, n,
+ i, pages);
+ if (err)
+ goto out;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
@@ -160,7 +172,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return &mr->ibmr;
err_mr:
- mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
+ (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_umem:
ib_umem_release(mr->umem);
@@ -174,8 +186,11 @@ err_free:
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
+ int ret;
- mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+ ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
+ if (ret)
+ return ret;
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
@@ -183,6 +198,149 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_mw *mw;
+ int err;
+
+ mw = kmalloc(sizeof(*mw), GFP_KERNEL);
+ if (!mw)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
+ to_mlx4_type(type), &mw->mmw);
+ if (err)
+ goto err_free;
+
+ err = mlx4_mw_enable(dev->dev, &mw->mmw);
+ if (err)
+ goto err_mw;
+
+ mw->ibmw.rkey = mw->mmw.key;
+
+ return &mw->ibmw;
+
+err_mw:
+ mlx4_mw_free(dev->dev, &mw->mmw);
+
+err_free:
+ kfree(mw);
+
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind)
+{
+ struct ib_send_wr wr;
+ struct ib_send_wr *bad_wr;
+ int ret;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.opcode = IB_WR_BIND_MW;
+ wr.wr_id = mw_bind->wr_id;
+ wr.send_flags = mw_bind->send_flags;
+ wr.wr.bind_mw.mw = mw;
+ wr.wr.bind_mw.bind_info = mw_bind->bind_info;
+ wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey);
+
+ ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
+ if (!ret)
+ mw->rkey = wr.wr.bind_mw.rkey;
+
+ return ret;
+}
+
+int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct mlx4_ib_mw *mw = to_mmw(ibmw);
+
+ mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
+ kfree(mw);
+
+ return 0;
+}
+
+struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ struct mlx4_ib_mr *mr;
+ int err;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
+ max_page_list_len, 0, &mr->mmr);
+ if (err)
+ goto err_free;
+
+ err = mlx4_mr_enable(dev->dev, &mr->mmr);
+ if (err)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_mr:
+ (void) mlx4_mr_free(dev->dev, &mr->mmr);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_fast_reg_page_list *mfrpl;
+ int size = page_list_len * sizeof (u64);
+
+ if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
+ return ERR_PTR(-EINVAL);
+
+ mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
+ if (!mfrpl)
+ return ERR_PTR(-ENOMEM);
+
+ mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
+ if (!mfrpl->ibfrpl.page_list)
+ goto err_free;
+
+ mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+ size, &mfrpl->map,
+ GFP_KERNEL);
+ if (!mfrpl->mapped_page_list)
+ goto err_free;
+
+ WARN_ON(mfrpl->map & 0x3f);
+
+ return &mfrpl->ibfrpl;
+
+err_free:
+ kfree(mfrpl->ibfrpl.page_list);
+ kfree(mfrpl);
+ return ERR_PTR(-ENOMEM);
+}
+
+void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+ struct mlx4_ib_dev *dev = to_mdev(page_list->device);
+ struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+ int size = page_list->max_page_list_len * sizeof (u64);
+
+ dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
+ mfrpl->map);
+ kfree(mfrpl->ibfrpl.page_list);
+ kfree(mfrpl);
+}
+
struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
struct ib_fmr_attr *fmr_attr)
{
@@ -209,7 +367,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
return &fmr->ibfmr;
err_mr:
- mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
+ (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
err_free:
kfree(fmr);
@@ -256,7 +414,7 @@ int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
err = mlx4_SYNC_TPT(mdev);
if (err)
- printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
+ pr_warn("SYNC_TPT error %d when "
"unmapping FMRs\n", err);
return 0;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 89eb6cbe592..67780452f0c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -31,9 +32,13 @@
*/
#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
#include <linux/mlx4/qp.h>
@@ -46,14 +51,24 @@ enum {
enum {
MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
- MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
+ MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
+ MLX4_IB_LINK_TYPE_IB = 0,
+ MLX4_IB_LINK_TYPE_ETH = 1
};
enum {
/*
- * Largest possible UD header: send with GRH and immediate data.
+ * Largest possible UD header: send with GRH and immediate
+ * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+ * tag. (LRH would only use 8 bytes, so Ethernet is the
+ * biggest case)
*/
- MLX4_IB_UD_HEADER_SIZE = 72
+ MLX4_IB_UD_HEADER_SIZE = 82,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
+};
+
+enum {
+ MLX4_IB_IBOE_ETHERTYPE = 0x8915
};
struct mlx4_ib_sqp {
@@ -66,18 +81,45 @@ struct mlx4_ib_sqp {
};
enum {
- MLX4_IB_MIN_SQ_STRIDE = 6
+ MLX4_IB_MIN_SQ_STRIDE = 6,
+ MLX4_IB_CACHE_LINE_SIZE = 64,
+};
+
+enum {
+ MLX4_RAW_QP_MTU = 7,
+ MLX4_RAW_QP_MSGMAX = 31,
};
+#ifndef ETH_ALEN
+#define ETH_ALEN 6
+#endif
+static inline u64 mlx4_mac_to_u64(u8 *addr)
+{
+ u64 mac = 0;
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac <<= 8;
+ mac |= addr[i];
+ }
+ return mac;
+}
+
static const __be32 mlx4_ib_opcode[] = {
- [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
- [IB_WR_LSO] = __constant_cpu_to_be32(MLX4_OPCODE_LSO),
- [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
- [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
- [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
- [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
- [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
- [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+ [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
+ [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
+ [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
+ [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
+ [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
+ [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
+ [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+ [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+ [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+ [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
+ [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
+ [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
+ [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -85,16 +127,62 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
return container_of(mqp, struct mlx4_ib_sqp, qp);
}
+static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+
+ return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
+ qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
+ 8 * MLX4_MFUNC_MAX;
+}
+
static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
- qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
+ int proxy_sqp = 0;
+ int real_sqp = 0;
+ int i;
+ /* PPF or Native -- real SQP */
+ real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+ qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+ qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
+ if (real_sqp)
+ return 1;
+ /* VF or PF -- proxy SQP */
+ if (mlx4_is_mfunc(dev->dev)) {
+ for (i = 0; i < dev->dev->caps.num_ports; i++) {
+ if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
+ qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
+ proxy_sqp = 1;
+ break;
+ }
+ }
+ }
+ return proxy_sqp;
}
+/* used for INIT/CLOSE port logic */
static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
- return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
- qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
+ int proxy_qp0 = 0;
+ int real_qp0 = 0;
+ int i;
+ /* PPF or Native -- real QP0 */
+ real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
+ qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
+ qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
+ if (real_qp0)
+ return 1;
+ /* VF or PF -- proxy QP0 */
+ if (mlx4_is_mfunc(dev->dev)) {
+ for (i = 0; i < dev->dev->caps.num_ports; i++) {
+ if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
+ proxy_qp0 = 1;
+ break;
+ }
+ }
+ }
+ return proxy_qp0;
}
static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -236,7 +324,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on QP %06x\n", type, qp->qpn);
return;
}
@@ -245,7 +333,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
}
}
-static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
+static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
{
/*
* UD WQEs must have a datagram segment.
@@ -254,19 +342,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
* header and space for the ICRC).
*/
switch (type) {
- case IB_QPT_UD:
+ case MLX4_IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
- ((flags & MLX4_IB_QP_LSO) ? 64 : 0);
- case IB_QPT_UC:
+ ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ case MLX4_IB_QPT_PROXY_SMI:
+ case MLX4_IB_QPT_PROXY_GSI:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg) + 64;
+ case MLX4_IB_QPT_TUN_SMI_OWNER:
+ case MLX4_IB_QPT_TUN_GSI:
+ return sizeof (struct mlx4_wqe_ctrl_seg) +
+ sizeof (struct mlx4_wqe_datagram_seg);
+
+ case MLX4_IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
- case IB_QPT_RC:
+ case MLX4_IB_QPT_RC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_atomic_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
- case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
ALIGN(MLX4_IB_UD_HEADER_SIZE +
DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@@ -282,15 +380,14 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_srq, struct mlx4_ib_qp *qp)
+ int is_user, int has_rq, struct mlx4_ib_qp *qp)
{
/* Sanity check RQ size before proceeding */
- if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
- cap->max_recv_sge > dev->dev->caps.max_rq_sg)
+ if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
+ cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg))
return -EINVAL;
- if (has_srq) {
- /* QPs attached to an SRQ should have no RQ */
+ if (!has_rq) {
if (cap->max_recv_wr)
return -EINVAL;
@@ -305,20 +402,29 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
}
- cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
- cap->max_recv_sge = qp->rq.max_gs;
+ /* leave userspace return values as they were, so as not to break ABI */
+ if (is_user) {
+ cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
+ cap->max_recv_sge = qp->rq.max_gs;
+ } else {
+ cap->max_recv_wr = qp->rq.max_post =
+ min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
+ cap->max_recv_sge = min(qp->rq.max_gs,
+ min(dev->dev->caps.max_sq_sg,
+ dev->dev->caps.max_rq_sg));
+ }
return 0;
}
static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- enum ib_qp_type type, struct mlx4_ib_qp *qp)
+ enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
{
int s;
/* Sanity check SQ size before proceeding */
- if (cap->max_send_wr > dev->dev->caps.max_wqes ||
- cap->max_send_sge > dev->dev->caps.max_sq_sg ||
+ if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) ||
+ cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) ||
cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
return -EINVAL;
@@ -327,7 +433,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
* For MLX transport we need 2 extra S/G entries:
* one for the header and one for the checksum at the end
*/
- if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
+ if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
+ type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
return -EINVAL;
@@ -348,7 +455,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
* anymore, so we do this only if selective signaling is off.
*
* Further, on 32-bit platforms, we can't use vmap() to make
- * the QP buffer virtually contigious. Thus we have to use
+ * the QP buffer virtually contiguous. Thus we have to use
* constant-sized WRs to make sure a WR is always fully within
* a single page-sized chunk.
*
@@ -371,7 +478,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
*/
if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
qp->sq_signal_bits && BITS_PER_LONG == 64 &&
- type != IB_QPT_SMI && type != IB_QPT_GSI)
+ type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
+ !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
qp->sq.wqe_shift = ilog2(64);
else
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@@ -443,21 +552,157 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
return 0;
}
+static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
+{
+ int i;
+
+ qp->sqp_proxy_rcv =
+ kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
+ GFP_KERNEL);
+ if (!qp->sqp_proxy_rcv)
+ return -ENOMEM;
+ for (i = 0; i < qp->rq.wqe_cnt; i++) {
+ qp->sqp_proxy_rcv[i].addr =
+ kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ GFP_KERNEL);
+ if (!qp->sqp_proxy_rcv[i].addr)
+ goto err;
+ qp->sqp_proxy_rcv[i].map =
+ ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ }
+ return 0;
+
+err:
+ while (i > 0) {
+ --i;
+ ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ }
+ kfree(qp->sqp_proxy_rcv);
+ qp->sqp_proxy_rcv = NULL;
+ return -ENOMEM;
+}
+
+static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
+{
+ int i;
+
+ for (i = 0; i < qp->rq.wqe_cnt; i++) {
+ ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ kfree(qp->sqp_proxy_rcv[i].addr);
+ }
+ kfree(qp->sqp_proxy_rcv);
+}
+
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
+ return 0;
+
+ return !attr->srq;
+}
+
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i])
+ return !!dev->caps.qp0_qkey[i];
+ }
+ return 0;
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+ gfp_t gfp)
{
+ int qpn;
int err;
+ struct mlx4_ib_sqp *sqp;
+ struct mlx4_ib_qp *qp;
+ enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
+
+ /* When tunneling special qps, we use a plain UD qp */
+ if (sqpn) {
+ if (mlx4_is_mfunc(dev->dev) &&
+ (!mlx4_is_master(dev->dev) ||
+ !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
+ if (init_attr->qp_type == IB_QPT_GSI)
+ qp_type = MLX4_IB_QPT_PROXY_GSI;
+ else {
+ if (mlx4_is_master(dev->dev) ||
+ qp0_enabled_vf(dev->dev, sqpn))
+ qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_PROXY_SMI;
+ }
+ }
+ qpn = sqpn;
+ /* add extra sg entry for tunneling */
+ init_attr->cap.max_recv_sge++;
+ } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
+ struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
+ container_of(init_attr,
+ struct mlx4_ib_qp_tunnel_init_attr, init_attr);
+ if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
+ tnl_init->proxy_qp_type != IB_QPT_GSI) ||
+ !mlx4_is_master(dev->dev))
+ return -EINVAL;
+ if (tnl_init->proxy_qp_type == IB_QPT_GSI)
+ qp_type = MLX4_IB_QPT_TUN_GSI;
+ else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+ mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+ tnl_init->port))
+ qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_TUN_SMI;
+ /* we are definitely in the PPF here, since we are creating
+ * tunnel QPs. base_tunnel_sqpn is therefore valid. */
+ qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
+ + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
+ sqpn = qpn;
+ }
+
+ if (!*caller_qp) {
+ if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
+ (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+ sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
+ if (!sqp)
+ return -ENOMEM;
+ qp = &sqp->qp;
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+ } else {
+ qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
+ if (!qp)
+ return -ENOMEM;
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+ }
+ } else
+ qp = *caller_qp;
+
+ qp->mlx4_ib_qp_type = qp_type;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
+ INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
if (err)
goto err;
@@ -491,7 +736,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
- if (!init_attr->srq) {
+ if (qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &qp->db);
if (err)
@@ -506,19 +751,27 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
- err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (dev->steering_support ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ qp->flags |= MLX4_IB_QP_NETIF;
+ else
+ goto err;
+ }
+
+ err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
- if (!init_attr->srq) {
- err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+ if (qp_has_rq(init_attr)) {
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
if (err)
goto err;
*qp->db.db = 0;
}
- if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+ if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
err = -ENOMEM;
goto err_db;
}
@@ -528,22 +781,47 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+ qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+ qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
}
}
- err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
+ if (sqpn) {
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+ if (alloc_proxy_bufs(pd->device, qp)) {
+ err = -ENOMEM;
+ goto err_wrid;
+ }
+ }
+ } else {
+ /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
+ * BlueFlame setup flow wrongly causes VLAN insertion. */
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET)
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
+ else
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
+ else
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1,
+ &qpn);
+ if (err)
+ goto err_proxy;
+ }
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
if (err)
- goto err_wrid;
+ goto err_qpn;
+
+ if (init_attr->qp_type == IB_QPT_XRC_TGT)
+ qp->mqp.qpn |= (1 << 23);
/*
* Hardware wants QPN written in big-endian order (after
@@ -553,14 +831,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
qp->mqp.event = mlx4_ib_qp_event;
-
+ if (!*caller_qp)
+ *caller_qp = qp;
return 0;
+err_qpn:
+ if (!sqpn) {
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ }
+err_proxy:
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
+ free_proxy_bufs(pd->device, qp);
err_wrid:
if (pd->uobject) {
- if (!init_attr->srq)
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
- &qp->db);
+ if (qp_has_rq(init_attr))
+ mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
@@ -576,10 +864,12 @@ err_buf:
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
- if (!pd->uobject && !init_attr->srq)
+ if (!pd->uobject && qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
err:
+ if (!*caller_qp)
+ kfree(qp);
return err;
}
@@ -598,10 +888,12 @@ static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
}
static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
@@ -611,10 +903,12 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
}
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
@@ -623,19 +917,76 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
}
}
+static void del_gid_entries(struct mlx4_ib_qp *qp)
+{
+ struct mlx4_ib_gid_entry *ge, *tmp;
+
+ list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
+ list_del(&ge->list);
+ kfree(ge);
+ }
+}
+
+static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
+{
+ if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
+ return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
+ else
+ return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx4_ib_qp *qp,
+ struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
+{
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_XRC_TGT:
+ *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
+ *recv_cq = *send_cq;
+ break;
+ case IB_QPT_XRC_INI:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = *send_cq;
+ break;
+ default:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ break;
+ }
+}
+
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
int is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
- if (qp->state != IB_QPS_RESET)
+ if (qp->state != IB_QPS_RESET) {
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
- printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
+ pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+ }
- send_cq = to_mcq(qp->ibqp.send_cq);
- recv_cq = to_mcq(qp->ibqp.recv_cq);
+ get_cqs(qp, &send_cq, &recv_cq);
mlx4_ib_lock_cqs(send_cq, recv_cq);
@@ -651,59 +1002,117 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_ib_unlock_cqs(send_cq, recv_cq);
mlx4_qp_free(dev->dev, &qp->mqp);
+
+ if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ }
+
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
if (is_user) {
- if (!qp->ibqp.srq)
+ if (qp->rq.wqe_cnt)
mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
&qp->db);
ib_umem_release(qp->umem);
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
+ free_proxy_bufs(&dev->ib_dev, qp);
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
- if (!qp->ibqp.srq)
+ if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
}
+
+ del_gid_entries(qp);
+}
+
+static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
+{
+ /* Native or PPF */
+ if (!mlx4_is_mfunc(dev->dev) ||
+ (mlx4_is_master(dev->dev) &&
+ attr->create_flags & MLX4_IB_SRIOV_SQP)) {
+ return dev->dev->phys_caps.base_sqpn +
+ (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
+ attr->port_num - 1;
+ }
+ /* PF or VF -- creating proxies */
+ if (attr->qp_type == IB_QPT_SMI)
+ return dev->dev->caps.qp0_proxy[attr->port_num - 1];
+ else
+ return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_sqp *sqp;
- struct mlx4_ib_qp *qp;
+ struct mlx4_ib_qp *qp = NULL;
int err;
+ u16 xrcdn = 0;
+ gfp_t gfp;
+ gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+ GFP_NOIO : GFP_KERNEL;
/*
- * We only support LSO and multicast loopback blocking, and
- * only for kernel UD QPs.
+ * We only support LSO, vendor flag1, and multicast loopback blocking,
+ * and only for kernel UD QPs.
*/
- if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
+ MLX4_IB_SRIOV_TUNNEL_QP |
+ MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (init_attr->qp_type != IB_QPT_UD)
+ return ERR_PTR(-EINVAL);
+ }
+
if (init_attr->create_flags &&
- (pd->uobject || init_attr->qp_type != IB_QPT_UD))
+ (udata ||
+ ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
+ init_attr->qp_type > IB_QPT_GSI)))
return ERR_PTR(-EINVAL);
switch (init_attr->qp_type) {
+ case IB_QPT_XRC_TGT:
+ pd = to_mxrcd(init_attr->xrcd)->pd;
+ xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+ init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
+ /* fall through */
+ case IB_QPT_XRC_INI:
+ if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return ERR_PTR(-ENOSYS);
+ init_attr->recv_cq = init_attr->send_cq;
+ /* fall through */
case IB_QPT_RC:
case IB_QPT_UC:
- case IB_QPT_UD:
- {
- qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ case IB_QPT_RAW_PACKET:
+ qp = kzalloc(sizeof *qp, gfp);
if (!qp)
return ERR_PTR(-ENOMEM);
-
- err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
- if (err) {
- kfree(qp);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+ /* fall through */
+ case IB_QPT_UD:
+ {
+ err = create_qp_common(to_mdev(pd->device), pd, init_attr,
+ udata, 0, &qp, gfp);
+ if (err)
return ERR_PTR(err);
- }
qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->xrcdn = xrcdn;
break;
}
@@ -711,24 +1120,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_GSI:
{
/* Userspace is not allowed to create special QPs: */
- if (pd->uobject)
+ if (udata)
return ERR_PTR(-EINVAL);
- sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
- if (!sqp)
- return ERR_PTR(-ENOMEM);
-
- qp = &sqp->qp;
-
- err = create_qp_common(dev, pd, init_attr, udata,
- dev->dev->caps.sqp_start +
- (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
- init_attr->port_num - 1,
- qp);
- if (err) {
- kfree(sqp);
+ err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
+ get_sqp_num(to_mdev(pd->device), init_attr),
+ &qp, gfp);
+ if (err)
return ERR_PTR(err);
- }
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -747,11 +1146,19 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
+ struct mlx4_ib_pd *pd;
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
- destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+ if (dev->qp1_proxy[mqp->port - 1] == mqp) {
+ mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
+ dev->qp1_proxy[mqp->port - 1] = NULL;
+ mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
+ }
+
+ pd = get_pd(mqp);
+ destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
@@ -761,15 +1168,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
-static int to_mlx4_st(enum ib_qp_type type)
+static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
- case IB_QPT_RC: return MLX4_QP_ST_RC;
- case IB_QPT_UC: return MLX4_QP_ST_UC;
- case IB_QPT_UD: return MLX4_QP_ST_UD;
- case IB_QPT_SMI:
- case IB_QPT_GSI: return MLX4_QP_ST_MLX;
- default: return -1;
+ case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
+ case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
+ case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
+ case MLX4_IB_QPT_XRC_INI:
+ case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
+ case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
+
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
+ MLX4_QP_ST_MLX : -1);
+ case MLX4_IB_QPT_PROXY_SMI:
+ case MLX4_IB_QPT_TUN_SMI:
+ case MLX4_IB_QPT_PROXY_GSI:
+ case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
+ MLX4_QP_ST_UD : -1);
+ default: return -1;
}
}
@@ -819,9 +1238,17 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
-static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
- struct mlx4_qp_path *path, u8 port)
+static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+ u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
+ struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
{
+ int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
+ IB_LINK_LAYER_ETHERNET;
+ int vidx;
+ int smac_index;
+ int err;
+
+
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
if (ah->static_rate) {
@@ -831,11 +1258,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
--path->static_rate;
} else
path->static_rate = 0;
- path->counter_index = 0xff;
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
- printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ pr_err("sgid_index (%u) too large. max is %d\n",
ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
return -1;
}
@@ -849,9 +1275,146 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
memcpy(path->rgid, ah->grh.dgid.raw, 16);
}
- path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+ if (is_eth) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -1;
+
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 7) << 3);
+
+ path->feup |= MLX4_FEUP_FORCE_ETH_UP;
+ if (vlan_tag < 0x1000) {
+ if (smac_info->vid < 0x1000) {
+ /* both valid vlan ids */
+ if (smac_info->vid != vlan_tag) {
+ /* different VIDs. unreg old and reg new */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ } else {
+ path->vlan_index = smac_info->vlan_index;
+ }
+ } else {
+ /* no current vlan tag in qp */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ }
+ path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
+ path->fl = 1 << 6;
+ } else {
+ /* have current vlan tag. unregister it at modify-qp success */
+ if (smac_info->vid < 0x1000) {
+ smac_info->candidate_vid = 0xFFFF;
+ smac_info->update_vid = 1;
+ }
+ }
+
+ /* get smac_index for RoCE use.
+ * If no smac was yet assigned, register one.
+ * If one was already assigned, but the new mac differs,
+ * unregister the old one and register the new one.
+ */
+ if (!smac_info->smac || smac_info->smac != smac) {
+ /* register candidate now, unreg if needed, after success */
+ smac_index = mlx4_register_mac(dev->dev, port, smac);
+ if (smac_index >= 0) {
+ smac_info->candidate_smac_index = smac_index;
+ smac_info->candidate_smac = smac;
+ smac_info->candidate_smac_port = port;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ smac_index = smac_info->smac_index;
+ }
+
+ memcpy(path->dmac, ah->dmac, 6);
+ path->ackto = MLX4_IB_LINK_TYPE_ETH;
+ /* put MAC table smac index for IBoE */
+ path->grh_mylmc = (u8) (smac_index) | 0x80;
+ } else {
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+ }
+
+ return 0;
+}
+
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
+ enum ib_qp_attr_mask qp_attr_mask,
+ struct mlx4_ib_qp *mqp,
+ struct mlx4_qp_path *path, u8 port)
+{
+ return _mlx4_set_path(dev, &qp->ah_attr,
+ mlx4_mac_to_u64((u8 *)qp->smac),
+ (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+ path, &mqp->pri, port);
+}
+
+static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
+ const struct ib_qp_attr *qp,
+ enum ib_qp_attr_mask qp_attr_mask,
+ struct mlx4_ib_qp *mqp,
+ struct mlx4_qp_path *path, u8 port)
+{
+ return _mlx4_set_path(dev, &qp->alt_ah_attr,
+ mlx4_mac_to_u64((u8 *)qp->alt_smac),
+ (qp_attr_mask & IB_QP_ALT_VID) ?
+ qp->alt_vlan_id : 0xffff,
+ path, &mqp->alt, port);
+}
+
+static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ struct mlx4_ib_gid_entry *ge, *tmp;
+
+ list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
+ if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) {
+ ge->added = 1;
+ ge->port = qp->port;
+ }
+ }
+}
+
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
+ struct mlx4_qp_context *context)
+{
+ struct net_device *ndev;
+ u64 u64_mac;
+ int smac_index;
+
+ ndev = dev->iboe.netdevs[qp->port - 1];
+ if (ndev) {
+ smac = ndev->dev_addr;
+ u64_mac = mlx4_mac_to_u64(smac);
+ } else {
+ u64_mac = dev->dev->caps.def_mac[qp->port];
+ }
+
+ context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
+ if (!qp->pri.smac) {
+ smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
+ if (smac_index >= 0) {
+ qp->pri.candidate_smac_index = smac_index;
+ qp->pri.candidate_smac = u64_mac;
+ qp->pri.candidate_smac_port = qp->port;
+ context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
+ } else {
+ return -ENOENT;
+ }
+ }
return 0;
}
@@ -861,9 +1424,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct mlx4_ib_pd *pd;
+ struct mlx4_ib_cq *send_cq, *recv_cq;
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
+ int steer_qp = 0;
int err = -EINVAL;
context = kzalloc(sizeof *context, GFP_KERNEL);
@@ -871,8 +1437,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
return -ENOMEM;
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
- (to_mlx4_st(ibqp->qp_type) << 16));
- context->flags |= cpu_to_be32(1 << 8); /* DE? */
+ (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -893,15 +1458,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+ else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
else if (ibqp->qp_type == IB_QPT_UD) {
if (qp->flags & MLX4_IB_QP_LSO)
context->mtu_msgmax = (IB_MTU_4096 << 5) |
ilog2(dev->dev->caps.max_gso_sz);
else
- context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
- printk(KERN_ERR "path MTU (%u) is invalid\n",
+ pr_err("path MTU (%u) is invalid\n",
attr->path_mtu);
goto out;
}
@@ -917,8 +1484,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
context->sq_size_stride |= qp->sq.wqe_shift - 4;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
+ context->xrcd = cpu_to_be32((u32) qp->xrcdn);
+ if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ context->param3 |= cpu_to_be32(1 << 30);
+ }
if (qp->ibqp.uobject)
context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
@@ -936,14 +1507,31 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ if (dev->counters[qp->port - 1] != -1) {
+ context->pri_path.counter_index =
+ dev->counters[qp->port - 1];
+ optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+ } else
+ context->pri_path.counter_index = 0xff;
+
+ if (qp->flags & MLX4_IB_QP_NETIF) {
+ mlx4_ib_steer_qp_reg(dev, qp, 1);
+ steer_qp = 1;
+ }
+ }
+
if (attr_mask & IB_QP_PKEY_INDEX) {
+ if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
+ context->pri_path.disable_pkey_check = 0x40;
context->pri_path.pkey_index = attr->pkey_index;
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
if (attr_mask & IB_QP_AV) {
- if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
- attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
+ if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
+ attr_mask & IB_QP_PORT ?
+ attr->port_num : qp->port))
goto out;
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -951,7 +1539,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_TIMEOUT) {
- context->pri_path.ackto = attr->timeout << 3;
+ context->pri_path.ackto |= attr->timeout << 3;
optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
}
@@ -964,8 +1552,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
- if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
- attr->alt_port_num))
+ if (mlx4_set_alt_path(dev, attr, attr_mask, qp,
+ &context->alt_path,
+ attr->alt_port_num))
goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
@@ -973,8 +1562,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
- context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
- context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+ pd = get_pd(qp);
+ get_cqs(qp, &send_cq, &recv_cq);
+ context->pd = cpu_to_be32(pd->pdn);
+ context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
+ context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
+ context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+
+ /* Set "fast registration enabled" for all kernel QPs */
+ if (!qp->ibqp.uobject)
+ context->params1 |= cpu_to_be32(1 << 11);
if (attr_mask & IB_QP_RNR_RETRY) {
context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
@@ -996,8 +1593,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_SQ_PSN)
context->next_send_psn = cpu_to_be32(attr->sq_psn);
- context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
-
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
if (attr->max_dest_rd_atomic)
context->params2 |=
@@ -1020,36 +1615,92 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_RQ_PSN)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
- context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
-
+ /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
if (attr_mask & IB_QP_QKEY) {
- context->qkey = cpu_to_be32(attr->qkey);
+ if (qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
+ context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
+ else {
+ if (mlx4_is_mfunc(dev->dev) &&
+ !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
+ (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
+ MLX4_RESERVED_QKEY_BASE) {
+ pr_err("Cannot use reserved QKEY"
+ " 0x%x (range 0xffff0000..0xffffffff"
+ " is reserved)\n", attr->qkey);
+ err = -EINVAL;
+ goto out;
+ }
+ context->qkey = cpu_to_be32(attr->qkey);
+ }
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
if (ibqp->srq)
context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
- if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_INIT &&
new_state == IB_QPS_RTR &&
(ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_UD)) {
+ ibqp->qp_type == IB_QPT_UD ||
+ ibqp->qp_type == IB_QPT_RAW_PACKET)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
- if (is_qp0(dev, qp))
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
+ qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
- else
+ if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
+ context->pri_path.fl = 0x80;
+ } else {
+ if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
+ context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
+ }
+ if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
+ context->pri_path.feup = 1 << 7; /* don't fsm */
+ /* handle smac_index */
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
+ err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
+ if (err)
+ return -EINVAL;
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
+ dev->qp1_proxy[qp->port - 1] = qp;
+ }
+ }
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+ context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
+ MLX4_IB_LINK_TYPE_ETH;
+
+ if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
+ int is_eth = rdma_port_get_link_layer(
+ &dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET;
+ if (is_eth) {
+ context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
+ optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
+ }
}
+
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
else
sqd_event = 0;
+ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ context->rlkey |= (1 << 4);
+
/*
* Before passing a kernel QP to the HW, make sure that the
* ownership bits of the send queue are set and the SQ
@@ -1082,8 +1733,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
qp->atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
qp->resp_depth = attr->max_dest_rd_atomic;
- if (attr_mask & IB_QP_PORT)
+ if (attr_mask & IB_QP_PORT) {
qp->port = attr->port_num;
+ update_mcg_macs(dev, qp);
+ }
if (attr_mask & IB_QP_ALT_PATH)
qp->alt_port = attr->alt_port_num;
@@ -1097,7 +1750,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (is_qp0(dev, qp)) {
if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
if (mlx4_INIT_PORT(dev->dev, qp->port))
- printk(KERN_WARNING "INIT_PORT failed for port %d\n",
+ pr_warn("INIT_PORT failed for port %d\n",
qp->port);
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
@@ -1109,23 +1762,113 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq): NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
+ if (new_state == IB_QPS_RESET) {
+ if (!ibqp->uobject) {
+ mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ if (send_cq != recv_cq)
+ mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->sq_next_wqe = 0;
+ if (qp->rq.wqe_cnt)
+ *qp->db.db = 0;
+
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
+ }
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
- qp->rq.head = 0;
- qp->rq.tail = 0;
- qp->sq.head = 0;
- qp->sq.tail = 0;
- qp->sq_next_wqe = 0;
- if (!ibqp->srq)
- *qp->db.db = 0;
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
}
-
out:
+ if (err && steer_qp)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
+ if (qp->pri.candidate_smac) {
+ if (err) {
+ mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
+ } else {
+ if (qp->pri.smac)
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = qp->pri.candidate_smac;
+ qp->pri.smac_index = qp->pri.candidate_smac_index;
+ qp->pri.smac_port = qp->pri.candidate_smac_port;
+ }
+ qp->pri.candidate_smac = 0;
+ qp->pri.candidate_smac_index = 0;
+ qp->pri.candidate_smac_port = 0;
+ }
+ if (qp->alt.candidate_smac) {
+ if (err) {
+ mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac);
+ } else {
+ if (qp->alt.smac)
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = qp->alt.candidate_smac;
+ qp->alt.smac_index = qp->alt.candidate_smac_index;
+ qp->alt.smac_port = qp->alt.candidate_smac_port;
+ }
+ qp->alt.candidate_smac = 0;
+ qp->alt.candidate_smac_index = 0;
+ qp->alt.candidate_smac_port = 0;
+ }
+
+ if (qp->pri.update_vid) {
+ if (err) {
+ if (qp->pri.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
+ qp->pri.candidate_vid);
+ } else {
+ if (qp->pri.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
+ qp->pri.vid);
+ qp->pri.vid = qp->pri.candidate_vid;
+ qp->pri.vlan_port = qp->pri.candidate_vlan_port;
+ qp->pri.vlan_index = qp->pri.candidate_vlan_index;
+ }
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+
+ if (qp->alt.update_vid) {
+ if (err) {
+ if (qp->alt.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
+ qp->alt.candidate_vid);
+ } else {
+ if (qp->alt.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
+ qp->alt.vid);
+ qp->alt.vid = qp->alt.candidate_vid;
+ qp->alt.vlan_port = qp->alt.candidate_vlan_port;
+ qp->alt.vlan_index = qp->alt.candidate_vlan_index;
+ }
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+
return err;
}
@@ -1136,33 +1879,69 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
-
+ int ll;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ ll = IB_LINK_LAYER_UNSPECIFIED;
+ } else {
+ int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+ }
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask, ll)) {
+ pr_debug("qpn 0x%x: invalid attribute mask specified "
+ "for transition %d to %d. qp_type %d,"
+ " attr_mask 0x%x\n",
+ ibqp->qp_num, cur_state, new_state,
+ ibqp->qp_type, attr_mask);
goto out;
+ }
if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
+ (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
+ pr_debug("qpn 0x%x: invalid port number (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->port_num, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
+ if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
+ (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
+ IB_LINK_LAYER_ETHERNET))
+ goto out;
+
if (attr_mask & IB_QP_PKEY_INDEX) {
int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])
+ if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
+ pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
+ "for transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->pkey_index, cur_state,
+ new_state, ibqp->qp_type);
goto out;
+ }
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
+ pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
+ pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
+ "Transition %d to %d. qp_type %d\n",
+ ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
+ new_state, ibqp->qp_type);
goto out;
}
@@ -1178,47 +1957,230 @@ out:
return err;
}
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i] ||
+ qpn == dev->caps.qp0_tunnel[i]) {
+ *qkey = dev->caps.qp0_qkey[i];
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
+ struct ib_send_wr *wr,
+ void *wqe, unsigned *mlx_seg_len)
+{
+ struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
+ struct ib_device *ib_dev = &mdev->ib_dev;
+ struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ u16 pkey;
+ u32 qkey;
+ int send_size;
+ int header_size;
+ int spc;
+ int i;
+
+ if (wr->opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ send_size = 0;
+
+ for (i = 0; i < wr->num_sge; ++i)
+ send_size += wr->sg_list[i].length;
+
+ /* for proxy-qp0 sends, need to add in size of tunnel header */
+ /* for tunnel-qp0 sends, tunnel header is already in s/g list */
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+ send_size += sizeof (struct mlx4_ib_tunnel_header);
+
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+ sqp->ud_header.lrh.service_level =
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+ sqp->ud_header.lrh.destination_lid =
+ cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ sqp->ud_header.lrh.source_lid =
+ cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ }
+
+ mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
+
+ /* force loopback */
+ mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
+
+ sqp->ud_header.lrh.virtual_lane = 0;
+ sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+ ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
+ if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+ sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ else
+ sqp->ud_header.bth.destination_qpn =
+ cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
+
+ sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
+ if (mlx4_is_master(mdev->dev)) {
+ if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ } else {
+ if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ }
+ sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
+
+ sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ sqp->ud_header.immediate_present = 0;
+
+ header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
+
+ /*
+ * Inline data segments may not cross a 64 byte boundary. If
+ * our UD header is bigger than the space available up to the
+ * next 64 byte boundary in the WQE, use two inline data
+ * segments to hold the UD header.
+ */
+ spc = MLX4_INLINE_ALIGN -
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ if (header_size <= spc) {
+ inl->byte_count = cpu_to_be32(1 << 31 | header_size);
+ memcpy(inl + 1, sqp->header_buf, header_size);
+ i = 1;
+ } else {
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
+ memcpy(inl + 1, sqp->header_buf, spc);
+
+ inl = (void *) (inl + 1) + spc;
+ memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
+ /*
+ * Need a barrier here to make sure all the data is
+ * visible before the byte_count field is set.
+ * Otherwise the HCA prefetcher could grab the 64-byte
+ * chunk with this inline segment and get a valid (!=
+ * 0xffffffff) byte count but stale data, and end up
+ * generating a packet with bad headers.
+ *
+ * The first inline segment's byte_count field doesn't
+ * need a barrier, because it comes after a
+ * control/MLX segment and therefore is at an offset
+ * of 16 mod 64.
+ */
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
+ i = 2;
+ }
+
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
+ return 0;
+}
+
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
+ struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ union ib_gid sgid;
u16 pkey;
int send_size;
int header_size;
int spc;
int i;
+ int err = 0;
+ u16 vlan = 0xffff;
+ bool is_eth;
+ bool is_vlan = false;
+ bool is_grh;
send_size = 0;
for (i = 0; i < wr->num_sge; ++i)
send_size += wr->sg_list[i].length;
- ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
+ is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
+ is_grh = mlx4_ib_ah_grh_present(ah);
+ if (is_eth) {
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache */
+ err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid.raw[0]);
+ if (err)
+ return err;
+ } else {
+ err = ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid);
+ if (err)
+ return err;
+ }
+
+ if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
+ vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
+ is_vlan = 1;
+ }
+ }
+ ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+
+ if (!is_eth) {
+ sqp->ud_header.lrh.service_level =
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
+ sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid;
+ sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
+ }
- sqp->ud_header.lrh.service_level =
- be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
- sqp->ud_header.lrh.destination_lid = ah->av.dlid;
- sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
- if (mlx4_ib_ah_grh_present(ah)) {
+ if (is_grh) {
sqp->ud_header.grh.traffic_class =
- (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
+ (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
- ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
- sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
- ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
- ah->av.gid_index, &sqp->ud_header.grh.source_gid);
+ ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
+ sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
+ if (is_eth)
+ memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
+ else {
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache */
+ sqp->ud_header.grh.source_gid.global.subnet_prefix =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ subnet_prefix;
+ sqp->ud_header.grh.source_gid.global.interface_id =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ guid_cache[ah->av.ib.gid_index];
+ } else
+ ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index,
+ &sqp->ud_header.grh.source_gid);
+ }
memcpy(sqp->ud_header.grh.destination_gid.raw,
- ah->av.dgid, 16);
+ ah->av.ib.dgid, 16);
}
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
- (sqp->ud_header.lrh.destination_lid ==
- IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
- (sqp->ud_header.lrh.service_level << 8));
- mlx->rlid = sqp->ud_header.lrh.destination_lid;
+
+ if (!is_eth) {
+ mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
+ (sqp->ud_header.lrh.service_level << 8));
+ if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
+ mlx->flags |= cpu_to_be32(0x1); /* force loopback */
+ mlx->rlid = sqp->ud_header.lrh.destination_lid;
+ }
switch (wr->opcode) {
case IB_WR_SEND:
@@ -1234,9 +2196,39 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
return -EINVAL;
}
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
- if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
- sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
+ if (is_eth) {
+ u8 *smac;
+ struct in6_addr in6;
+
+ u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+
+ mlx->sched_prio = cpu_to_be16(pcp);
+
+ memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
+ /* FIXME: cache smac value? */
+ memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
+ memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
+ memcpy(&in6, sgid.raw, sizeof(in6));
+
+ if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
+ smac = to_mdev(sqp->qp.ibqp.device)->
+ iboe.netdevs[sqp->qp.port - 1]->dev_addr;
+ else /* use the src mac of the tunnel */
+ smac = ah->av.eth.s_mac;
+ memcpy(sqp->ud_header.eth.smac_h, smac, 6);
+ if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
+ mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
+ if (!is_vlan) {
+ sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ } else {
+ sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
+ }
+ } else {
+ sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
+ sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
+ }
sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
if (!sqp->qp.ibqp.qp_num)
ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
@@ -1252,16 +2244,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
if (0) {
- printk(KERN_ERR "built UD header of size %d:\n", header_size);
+ pr_err("built UD header of size %d:\n", header_size);
for (i = 0; i < header_size / 4; ++i) {
if (i % 8 == 0)
- printk(" [%02x] ", i * 4);
- printk(" %08x",
- be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
+ pr_err(" [%02x] ", i * 4);
+ pr_cont(" %08x",
+ be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
if ((i + 1) % 8 == 0)
- printk("\n");
+ pr_cont("\n");
}
- printk("\n");
+ pr_err("\n");
}
/*
@@ -1322,6 +2314,63 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
return cur + nreq >= wq->max_post;
}
+static __be32 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ?
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ?
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ?
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
+ cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
+}
+
+static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+{
+ struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+ int i;
+
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
+ mfrpl->mapped_page_list[i] =
+ cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
+ MLX4_MTT_FLAG_PRESENT);
+
+ fseg->flags = convert_access(wr->wr.fast_reg.access_flags);
+ fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey);
+ fseg->buf_list = cpu_to_be64(mfrpl->map);
+ fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+ fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length);
+ fseg->offset = 0; /* XXX -- is this just for ZBVA? */
+ fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift);
+ fseg->reserved[0] = 0;
+ fseg->reserved[1] = 0;
+}
+
+static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
+{
+ bseg->flags1 =
+ convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
+ cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
+ MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
+ MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
+ bseg->flags2 = 0;
+ if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
+ bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
+ if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
+ bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
+ bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
+ bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
+ bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
+ bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
+}
+
+static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
+{
+ memset(iseg, 0, sizeof(*iseg));
+ iseg->mem_key = cpu_to_be32(rkey);
+}
+
static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
u64 remote_addr, u32 rkey)
{
@@ -1335,6 +2384,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
} else {
aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
aseg->compare = 0;
@@ -1342,12 +2394,85 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
}
+static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
+ struct ib_send_wr *wr)
+{
+ aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+ aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
+ aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
+ aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+}
+
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
{
memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
+ memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
+}
+
+static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
+ struct mlx4_wqe_datagram_seg *dseg,
+ struct ib_send_wr *wr,
+ enum mlx4_ib_qp_type qpt)
+{
+ union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
+ struct mlx4_av sqp_av = {0};
+ int port = *((u8 *) &av->ib.port_pd) & 0x3;
+
+ /* force loopback */
+ sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
+ sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
+ sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
+ cpu_to_be32(0xf0000000);
+
+ memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
+ if (qpt == MLX4_IB_QPT_PROXY_GSI)
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ else
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
+ /* Use QKEY from the QP context, which is set by master */
+ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
+}
+
+static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
+{
+ struct mlx4_wqe_inline_seg *inl = wqe;
+ struct mlx4_ib_tunnel_header hdr;
+ struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+ int spc;
+ int i;
+
+ memcpy(&hdr.av, &ah->av, sizeof hdr.av);
+ hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+ hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
+ hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(hdr.mac, ah->av.eth.mac, 6);
+ hdr.vlan = ah->av.eth.vlan;
+
+ spc = MLX4_INLINE_ALIGN -
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
+ if (sizeof (hdr) <= spc) {
+ memcpy(inl + 1, &hdr, sizeof (hdr));
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
+ i = 1;
+ } else {
+ memcpy(inl + 1, &hdr, spc);
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
+
+ inl = (void *) (inl + 1) + spc;
+ memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
+ wmb();
+ inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
+ i = 2;
+ }
+
+ *mlx_seg_len =
+ ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
}
static void set_mlx_icrc_seg(void *dseg)
@@ -1395,17 +2520,14 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
- struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
+ struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
+ __be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
- /*
- * This is a temporary limitation and will be removed in
- * a forthcoming FW release:
- */
- if (unlikely(halign > 64))
- return -EINVAL;
+ if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+ *blh = cpu_to_be32(1 << 6);
if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1413,16 +2535,34 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
- /* make sure LSO header is written before overwriting stamping */
- wmb();
-
- wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
- wr->wr.ud.hlen);
-
+ *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
+ wr->wr.ud.hlen);
*lso_seg_len = halign;
return 0;
}
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return wr->ex.imm_data;
+
+ case IB_WR_SEND_WITH_INV:
+ return cpu_to_be32(wr->ex.invalidate_rkey);
+
+ default:
+ return 0;
+ }
+}
+
+static void add_zero_len_inline(void *wqe)
+{
+ struct mlx4_wqe_inline_seg *inl = wqe;
+ memset(wqe, 0, 16);
+ inl->byte_count = cpu_to_be32(1 << 31);
+}
+
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -1437,6 +2577,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int uninitialized_var(stamp);
int uninitialized_var(size);
unsigned uninitialized_var(seglen);
+ __be32 dummy;
+ __be32 *lso_wqe;
+ __be32 uninitialized_var(lso_hdr_sz);
+ __be32 blh;
int i;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1444,6 +2588,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ind = qp->sq_next_wqe;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ lso_wqe = &dummy;
+ blh = 0;
+
if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
*bad_wr = wr;
@@ -1469,21 +2616,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
qp->sq_signal_bits;
- if (wr->opcode == IB_WR_SEND_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- ctrl->imm = wr->ex.imm_data;
- else
- ctrl->imm = 0;
+ ctrl->imm = send_ieth(wr);
wqe += sizeof *ctrl;
size = sizeof *ctrl / 16;
- switch (ibqp->qp_type) {
- case IB_QPT_RC:
- case IB_QPT_UC:
+ switch (qp->mlx4_ib_qp_type) {
+ case MLX4_IB_QPT_RC:
+ case MLX4_IB_QPT_UC:
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
+ case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
wr->wr.atomic.rkey);
wqe += sizeof (struct mlx4_wqe_raddr_seg);
@@ -1496,6 +2640,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
+ case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+ set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+ wr->wr.atomic.rkey);
+ wqe += sizeof (struct mlx4_wqe_raddr_seg);
+
+ set_masked_atomic_seg(wqe, wr);
+ wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
+
+ size += (sizeof (struct mlx4_wqe_raddr_seg) +
+ sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
+
+ break;
+
case IB_WR_RDMA_READ:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -1505,30 +2662,103 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
break;
+ case IB_WR_LOCAL_INV:
+ ctrl->srcrb_flags |=
+ cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
+ set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
+ wqe += sizeof (struct mlx4_wqe_local_inval_seg);
+ size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
+ break;
+
+ case IB_WR_FAST_REG_MR:
+ ctrl->srcrb_flags |=
+ cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
+ set_fmr_seg(wqe, wr);
+ wqe += sizeof (struct mlx4_wqe_fmr_seg);
+ size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+ break;
+
+ case IB_WR_BIND_MW:
+ ctrl->srcrb_flags |=
+ cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
+ set_bind_seg(wqe, wr);
+ wqe += sizeof(struct mlx4_wqe_bind_seg);
+ size += sizeof(struct mlx4_wqe_bind_seg) / 16;
+ break;
default:
/* No extra segments required for sends */
break;
}
break;
- case IB_QPT_UD:
+ case MLX4_IB_QPT_TUN_SMI_OWNER:
+ err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += seglen;
+ size += seglen / 16;
+ break;
+ case MLX4_IB_QPT_TUN_SMI:
+ case MLX4_IB_QPT_TUN_GSI:
+ /* this is a UD qp used in MAD responses to slaves. */
+ set_datagram_seg(wqe, wr);
+ /* set the forced-loopback bit in the data seg av */
+ *(__be32 *) wqe |= cpu_to_be32(0x80000000);
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ break;
+ case MLX4_IB_QPT_UD:
set_datagram_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
if (wr->opcode == IB_WR_LSO) {
- err = build_lso_seg(wqe, wr, qp, &seglen);
+ err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
+ lso_wqe = (__be32 *) wqe;
wqe += seglen;
size += seglen / 16;
}
break;
- case IB_QPT_SMI:
- case IB_QPT_GSI:
+ case MLX4_IB_QPT_PROXY_SMI_OWNER:
+ err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ wqe += seglen;
+ size += seglen / 16;
+ /* to start tunnel header on a cache-line boundary */
+ add_zero_len_inline(wqe);
+ wqe += 16;
+ size++;
+ build_tunnel_header(wr, wqe, &seglen);
+ wqe += seglen;
+ size += seglen / 16;
+ break;
+ case MLX4_IB_QPT_PROXY_SMI:
+ case MLX4_IB_QPT_PROXY_GSI:
+ /* If we are tunneling special qps, this is a UD qp.
+ * In this case we first add a UD segment targeting
+ * the tunnel qp, and then add a header with address
+ * information */
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+ qp->mlx4_ib_qp_type);
+ wqe += sizeof (struct mlx4_wqe_datagram_seg);
+ size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+ build_tunnel_header(wr, wqe, &seglen);
+ wqe += seglen;
+ size += seglen / 16;
+ break;
+
+ case MLX4_IB_QPT_SMI:
+ case MLX4_IB_QPT_GSI:
err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
@@ -1554,8 +2784,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
/* Add one more inline data segment for ICRC for MLX sends */
- if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)) {
+ if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
+ qp->mlx4_ib_qp_type &
+ (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
set_mlx_icrc_seg(dseg + 1);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
@@ -1563,6 +2795,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
set_data_seg(dseg, wr->sg_list + i);
+ /*
+ * Possibly overwrite stamping in cacheline with LSO
+ * segment only after making sure all data segments
+ * are written.
+ */
+ wmb();
+ *lso_wqe = lso_hdr_sz;
+
ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;
@@ -1574,12 +2814,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wmb();
if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
+ *bad_wr = wr;
err = -EINVAL;
goto out;
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
- (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
@@ -1597,7 +2838,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
stamp_send_wqe(qp, stamp, size * 16);
ind = pad_wraparound(qp, ind);
}
-
}
out:
@@ -1639,14 +2879,16 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int err = 0;
int nreq;
int ind;
+ int max_gs;
int i;
+ max_gs = qp->rq.max_gs;
spin_lock_irqsave(&qp->rq.lock, flags);
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
+ if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;
@@ -1660,10 +2902,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
scat = get_recv_wqe(qp, ind);
+ if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+ ib_dma_sync_single_for_device(ibqp->device,
+ qp->sqp_proxy_rcv[ind].map,
+ sizeof (struct mlx4_ib_proxy_sqp_hdr),
+ DMA_FROM_DEVICE);
+ scat->byte_count =
+ cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
+ /* use dma lkey from upper layer entry */
+ scat->lkey = cpu_to_be32(wr->sg_list->lkey);
+ scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
+ scat++;
+ max_gs--;
+ }
+
for (i = 0; i < wr->num_sge; ++i)
__set_data_seg(scat + i, wr->sg_list + i);
- if (i < qp->rq.max_gs) {
+ if (i < max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
@@ -1731,17 +2988,27 @@ static int to_ib_qp_access_flags(int mlx4_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
+static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
struct mlx4_qp_path *path)
{
+ struct mlx4_dev *dev = ibdev->dev;
+ int is_eth;
+
memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
return;
+ is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+ if (is_eth)
+ ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
+ ((path->sched_queue & 4) << 1);
+ else
+ ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
+
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
@@ -1794,8 +3061,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
to_ib_qp_access_flags(be32_to_cpu(context.params2));
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
- to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
+ to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
+ to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
}
@@ -1848,6 +3115,13 @@ done:
if (qp->flags & MLX4_IB_QP_LSO)
qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
+
+ qp_init_attr->sq_sig_type =
+ qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
+ IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 12d6bc6f800..62d9285300a 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -32,6 +33,7 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
+#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
@@ -57,7 +59,7 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
event.event = IB_EVENT_SRQ_ERR;
break;
default:
- printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
+ pr_warn("Unexpected event type %d "
"on SRQ %06x\n", type, srq->srqn);
return;
}
@@ -73,6 +75,9 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
+ struct mlx4_wqe_data_seg *scatter;
+ u32 cqn;
+ u16 xrcdn;
int desc_size;
int buf_size;
int err;
@@ -129,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
- err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
if (err)
goto err_srq;
*srq->db.db = 0;
- if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+ GFP_KERNEL)) {
err = -ENOMEM;
goto err_db;
}
@@ -148,6 +154,11 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
next = get_wqe(srq, i);
next->next_wqe_index =
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+
+ for (scatter = (void *) (next + 1);
+ (void *) scatter < (void *) next + desc_size;
+ ++scatter)
+ scatter->lkey = cpu_to_be32(MLX4_INVALID_LKEY);
}
err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
@@ -155,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
if (err)
goto err_mtt;
@@ -166,12 +177,18 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
}
}
- err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
+ cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
+ xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
+ (u16) dev->dev->caps.reserved_xrcds;
+ err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
+ srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
if (pd->uobject)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
new file mode 100644
index 00000000000..cb4c66e723b
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*#include "core_priv.h"*/
+#include "mlx4_ib.h"
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+
+#include <rdma/ib_mad.h>
+/*show_admin_alias_guid returns the administratively assigned value of that GUID.
+ * Values returned in buf parameter string:
+ * 0 - requests opensm to assign a value.
+ * ffffffffffffffff - delete this entry.
+ * other - value assigned by administrator.
+ */
+static ssize_t show_admin_alias_guid(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int record_num;/*0-15*/
+ int guid_index_in_rec; /*0 - 7*/
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+
+ record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
+ guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
+
+ return sprintf(buf, "%llx\n",
+ be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
+ ports_guid[port->num - 1].
+ all_rec_per_port[record_num].
+ all_recs[8 * guid_index_in_rec]));
+}
+
+/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
+ * Values in buf parameter string:
+ * 0 - requests opensm to assign a value.
+ * 0xffffffffffffffff - delete this entry.
+ * other - guid value assigned by the administrator.
+ */
+static ssize_t store_admin_alias_guid(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ int record_num;/*0-15*/
+ int guid_index_in_rec; /*0 - 7*/
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ u64 sysadmin_ag_val;
+
+ record_num = mlx4_ib_iov_dentry->entry_num / 8;
+ guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
+ if (0 == record_num && 0 == guid_index_in_rec) {
+ pr_err("GUID 0 block 0 is RO\n");
+ return count;
+ }
+ sscanf(buf, "%llx", &sysadmin_ag_val);
+ *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
+ all_rec_per_port[record_num].
+ all_recs[GUID_REC_SIZE * guid_index_in_rec] =
+ cpu_to_be64(sysadmin_ag_val);
+
+ /* Change the state to be pending for update */
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
+ = MLX4_GUID_INFO_STATUS_IDLE ;
+
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
+ = MLX4_GUID_INFO_RECORD_SET;
+
+ switch (sysadmin_ag_val) {
+ case MLX4_GUID_FOR_DELETE_VAL:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
+ = MLX4_GUID_INFO_RECORD_DELETE;
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_SYSADMIN_ASSIGN;
+ break;
+ /* The sysadmin requests the SM to re-assign */
+ case MLX4_NOT_SET_GUID:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_DRIVER_ASSIGN;
+ break;
+ /* The sysadmin requests a specific value.*/
+ default:
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
+ = MLX4_GUID_SYSADMIN_ASSIGN;
+ break;
+ }
+
+ /* set the record index */
+ mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
+ = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+
+ mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
+
+ return count;
+}
+
+static ssize_t show_port_gid(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ union ib_gid gid;
+ ssize_t ret;
+
+ ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
+ mlx4_ib_iov_dentry->entry_num, &gid, 1);
+ if (ret)
+ return ret;
+ ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(((__be16 *) gid.raw)[0]),
+ be16_to_cpu(((__be16 *) gid.raw)[1]),
+ be16_to_cpu(((__be16 *) gid.raw)[2]),
+ be16_to_cpu(((__be16 *) gid.raw)[3]),
+ be16_to_cpu(((__be16 *) gid.raw)[4]),
+ be16_to_cpu(((__be16 *) gid.raw)[5]),
+ be16_to_cpu(((__be16 *) gid.raw)[6]),
+ be16_to_cpu(((__be16 *) gid.raw)[7]));
+ return ret;
+}
+
+static ssize_t show_phys_port_pkey(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
+ container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
+ struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
+ struct mlx4_ib_dev *mdev = port->dev;
+ u16 pkey;
+ ssize_t ret;
+
+ ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
+ mlx4_ib_iov_dentry->entry_num, &pkey, 1);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "0x%04x\n", pkey);
+}
+
+#define DENTRY_REMOVE(_dentry) \
+do { \
+ sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
+} while (0);
+
+static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
+ char *_name, struct kobject *_kobj,
+ ssize_t (*show)(struct device *dev,
+ struct device_attribute *attr,
+ char *buf),
+ ssize_t (*store)(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+ )
+{
+ int ret = 0;
+ struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
+
+ vdentry->ctx = _ctx;
+ vdentry->dentry.show = show;
+ vdentry->dentry.store = store;
+ sysfs_attr_init(&vdentry->dentry.attr);
+ vdentry->dentry.attr.name = vdentry->name;
+ vdentry->dentry.attr.mode = 0;
+ vdentry->kobj = _kobj;
+ snprintf(vdentry->name, 15, "%s", _name);
+
+ if (vdentry->dentry.store)
+ vdentry->dentry.attr.mode |= S_IWUSR;
+
+ if (vdentry->dentry.show)
+ vdentry->dentry.attr.mode |= S_IRUGO;
+
+ ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
+ if (ret) {
+ pr_err("failed to create %s\n", vdentry->dentry.attr.name);
+ vdentry->ctx = NULL;
+ return ret;
+ }
+
+ return ret;
+}
+
+int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr)
+{
+ struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
+ int ret;
+
+ ret = sysfs_create_file(port->mcgs_parent, attr);
+ if (ret)
+ pr_err("failed to create %s\n", attr->name);
+
+ return ret;
+}
+
+void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
+ struct attribute *attr)
+{
+ struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
+
+ sysfs_remove_file(port->mcgs_parent, attr);
+}
+
+static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
+{
+ int i;
+ char buff[10];
+ struct mlx4_ib_iov_port *port = NULL;
+ int ret = 0 ;
+ struct ib_port_attr attr;
+
+ /* get the physical gid and pkey table sizes.*/
+ ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
+ if (ret)
+ goto err;
+
+ port = &device->iov_ports[port_num - 1];
+ port->dev = device;
+ port->num = port_num;
+ /* Directory structure:
+ * iov -
+ * port num -
+ * admin_guids
+ * gids (operational)
+ * mcg_table
+ */
+ port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
+ GFP_KERNEL);
+ if (!port->dentr_ar) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ sprintf(buff, "%d", port_num);
+ port->cur_port = kobject_create_and_add(buff,
+ kobject_get(device->ports_parent));
+ if (!port->cur_port) {
+ ret = -ENOMEM;
+ goto kobj_create_err;
+ }
+ /* admin GUIDs */
+ port->admin_alias_parent = kobject_create_and_add("admin_guids",
+ kobject_get(port->cur_port));
+ if (!port->admin_alias_parent) {
+ ret = -ENOMEM;
+ goto err_admin_guids;
+ }
+ for (i = 0 ; i < attr.gid_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[i].entry_num = i;
+ ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
+ buff, port->admin_alias_parent,
+ show_admin_alias_guid, store_admin_alias_guid);
+ if (ret)
+ goto err_admin_alias_parent;
+ }
+
+ /* gids subdirectory (operational gids) */
+ port->gids_parent = kobject_create_and_add("gids",
+ kobject_get(port->cur_port));
+ if (!port->gids_parent) {
+ ret = -ENOMEM;
+ goto err_gids;
+ }
+
+ for (i = 0 ; i < attr.gid_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
+ ret = create_sysfs_entry(port,
+ &port->dentr_ar->dentries[attr.gid_tbl_len + i],
+ buff,
+ port->gids_parent, show_port_gid, NULL);
+ if (ret)
+ goto err_gids_parent;
+ }
+
+ /* physical port pkey table */
+ port->pkeys_parent =
+ kobject_create_and_add("pkeys", kobject_get(port->cur_port));
+ if (!port->pkeys_parent) {
+ ret = -ENOMEM;
+ goto err_pkeys;
+ }
+
+ for (i = 0 ; i < attr.pkey_tbl_len; i++) {
+ sprintf(buff, "%d", i);
+ port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
+ ret = create_sysfs_entry(port,
+ &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
+ buff, port->pkeys_parent,
+ show_phys_port_pkey, NULL);
+ if (ret)
+ goto err_pkeys_parent;
+ }
+
+ /* MCGs table */
+ port->mcgs_parent =
+ kobject_create_and_add("mcgs", kobject_get(port->cur_port));
+ if (!port->mcgs_parent) {
+ ret = -ENOMEM;
+ goto err_mcgs;
+ }
+ return 0;
+
+err_mcgs:
+ kobject_put(port->cur_port);
+
+err_pkeys_parent:
+ kobject_put(port->pkeys_parent);
+
+err_pkeys:
+ kobject_put(port->cur_port);
+
+err_gids_parent:
+ kobject_put(port->gids_parent);
+
+err_gids:
+ kobject_put(port->cur_port);
+
+err_admin_alias_parent:
+ kobject_put(port->admin_alias_parent);
+
+err_admin_guids:
+ kobject_put(port->cur_port);
+ kobject_put(port->cur_port); /* once more for create_and_add buff */
+
+kobj_create_err:
+ kobject_put(device->ports_parent);
+ kfree(port->dentr_ar);
+
+err:
+ pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
+ port_num, ret);
+ return ret;
+}
+
+static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
+{
+ char base_name[9];
+
+ /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
+ strlcpy(name, pci_name(dev->dev->pdev), max);
+ strncpy(base_name, name, 8); /*till xxxx:yy:*/
+ base_name[8] = '\0';
+ /* with no ARI only 3 last bits are used so when the fn is higher than 8
+ * need to add it to the dev num, so count in the last number will be
+ * modulo 8 */
+ sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+}
+
+struct mlx4_port {
+ struct kobject kobj;
+ struct mlx4_ib_dev *dev;
+ struct attribute_group pkey_group;
+ struct attribute_group gid_group;
+ struct device_attribute enable_smi_admin;
+ struct device_attribute smi_enabled;
+ int slave;
+ u8 port_num;
+};
+
+
+static void mlx4_port_release(struct kobject *kobj)
+{
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+ struct attribute *a;
+ int i;
+
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
+ kfree(p->pkey_group.attrs);
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
+ kfree(p->gid_group.attrs);
+ kfree(p);
+}
+
+struct port_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
+ ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
+ const char *buf, size_t count);
+};
+
+static ssize_t port_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+
+ if (!port_attr->show)
+ return -EIO;
+ return port_attr->show(p, port_attr, buf);
+}
+
+static ssize_t port_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t size)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
+
+ if (!port_attr->store)
+ return -EIO;
+ return port_attr->store(p, port_attr, buf, size);
+}
+
+static const struct sysfs_ops port_sysfs_ops = {
+ .show = port_attr_show,
+ .store = port_attr_store,
+};
+
+static struct kobj_type port_type = {
+ .release = mlx4_port_release,
+ .sysfs_ops = &port_sysfs_ops,
+};
+
+struct port_table_attribute {
+ struct port_attribute attr;
+ char name[8];
+ int index;
+};
+
+static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ ssize_t ret = -ENODEV;
+
+ if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
+ (p->dev->dev->caps.pkey_table_len[p->port_num]))
+ ret = sprintf(buf, "none\n");
+ else
+ ret = sprintf(buf, "%d\n",
+ p->dev->pkeys.virt2phys_pkey[p->slave]
+ [p->port_num - 1][tab_attr->index]);
+ return ret;
+}
+
+static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int idx;
+ int err;
+
+ /* do not allow remapping Dom0 virtual pkey table */
+ if (p->slave == mlx4_master_func_num(p->dev->dev))
+ return -EINVAL;
+
+ if (!strncasecmp(buf, "no", 2))
+ idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
+ else if (sscanf(buf, "%i", &idx) != 1 ||
+ idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
+ idx < 0)
+ return -EINVAL;
+
+ p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
+ [tab_attr->index] = idx;
+ mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
+ tab_attr->index, idx);
+ err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
+ if (err) {
+ pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
+ " port %d, index %d\n", p->slave, p->port_num, idx);
+ return err;
+ }
+ return count;
+}
+
+static ssize_t show_port_gid_idx(struct mlx4_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", p->slave);
+}
+
+static struct attribute **
+alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
+ struct port_attribute *, char *buf),
+ ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
+ const char *buf, size_t count),
+ int len)
+{
+ struct attribute **tab_attr;
+ struct port_table_attribute *element;
+ int i;
+
+ tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
+ if (!tab_attr)
+ return NULL;
+
+ for (i = 0; i < len; i++) {
+ element = kzalloc(sizeof (struct port_table_attribute),
+ GFP_KERNEL);
+ if (!element)
+ goto err;
+ if (snprintf(element->name, sizeof (element->name),
+ "%d", i) >= sizeof (element->name)) {
+ kfree(element);
+ goto err;
+ }
+ sysfs_attr_init(&element->attr.attr);
+ element->attr.attr.name = element->name;
+ if (store) {
+ element->attr.attr.mode = S_IWUSR | S_IRUGO;
+ element->attr.store = store;
+ } else
+ element->attr.attr.mode = S_IRUGO;
+
+ element->attr.show = show;
+ element->index = i;
+ tab_attr[i] = &element->attr.attr;
+ }
+ return tab_attr;
+
+err:
+ while (--i >= 0)
+ kfree(tab_attr[i]);
+ kfree(tab_attr);
+ return NULL;
+}
+
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, smi_enabled);
+ ssize_t len = 0;
+
+ if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ ssize_t len = 0;
+
+ if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ int enable;
+
+ if (sscanf(buf, "%i", &enable) != 1 ||
+ enable < 0 || enable > 1)
+ return -EINVAL;
+
+ if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+ return -EINVAL;
+ return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+ int ret;
+
+ /* do not display entries if eth transport, or if master */
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return 0;
+
+ sysfs_attr_init(&p->smi_enabled.attr);
+ p->smi_enabled.show = sysfs_show_smi_enabled;
+ p->smi_enabled.store = NULL;
+ p->smi_enabled.attr.name = "smi_enabled";
+ p->smi_enabled.attr.mode = 0444;
+ ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+ if (ret) {
+ pr_err("failed to create smi_enabled\n");
+ return ret;
+ }
+
+ sysfs_attr_init(&p->enable_smi_admin.attr);
+ p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+ p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+ p->enable_smi_admin.attr.name = "enable_smi_admin";
+ p->enable_smi_admin.attr.mode = 0644;
+ ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+ if (ret) {
+ pr_err("failed to create enable_smi_admin\n");
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ return ret;
+ }
+ return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return;
+
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
+static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
+{
+ struct mlx4_port *p;
+ int i;
+ int ret;
+
+ p = kzalloc(sizeof *p, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->dev = dev;
+ p->port_num = port_num;
+ p->slave = slave;
+
+ ret = kobject_init_and_add(&p->kobj, &port_type,
+ kobject_get(dev->dev_ports_parent[slave]),
+ "%d", port_num);
+ if (ret)
+ goto err_alloc;
+
+ p->pkey_group.name = "pkey_idx";
+ p->pkey_group.attrs =
+ alloc_group_attrs(show_port_pkey, store_port_pkey,
+ dev->dev->caps.pkey_table_len[port_num]);
+ if (!p->pkey_group.attrs) {
+ ret = -ENOMEM;
+ goto err_alloc;
+ }
+
+ ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+ if (ret)
+ goto err_free_pkey;
+
+ p->gid_group.name = "gid_idx";
+ p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
+ if (!p->gid_group.attrs) {
+ ret = -ENOMEM;
+ goto err_free_pkey;
+ }
+
+ ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ if (ret)
+ goto err_free_gid;
+
+ ret = add_vf_smi_entries(p);
+ if (ret)
+ goto err_free_gid;
+
+ list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
+ return 0;
+
+err_free_gid:
+ kfree(p->gid_group.attrs[0]);
+ kfree(p->gid_group.attrs);
+
+err_free_pkey:
+ for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
+ kfree(p->pkey_group.attrs[i]);
+ kfree(p->pkey_group.attrs);
+
+err_alloc:
+ kobject_put(dev->dev_ports_parent[slave]);
+ kfree(p);
+ return ret;
+}
+
+static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
+{
+ char name[32];
+ int err;
+ int port;
+ struct kobject *p, *t;
+ struct mlx4_port *mport;
+ struct mlx4_active_ports actv_ports;
+
+ get_name(dev, name, slave, sizeof name);
+
+ dev->pkeys.device_parent[slave] =
+ kobject_create_and_add(name, kobject_get(dev->iov_parent));
+
+ if (!dev->pkeys.device_parent[slave]) {
+ err = -ENOMEM;
+ goto fail_dev;
+ }
+
+ INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
+
+ dev->dev_ports_parent[slave] =
+ kobject_create_and_add("ports",
+ kobject_get(dev->pkeys.device_parent[slave]));
+
+ if (!dev->dev_ports_parent[slave]) {
+ err = -ENOMEM;
+ goto err_ports;
+ }
+
+ actv_ports = mlx4_get_active_ports(dev->dev, slave);
+
+ for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
+ err = add_port(dev, port, slave);
+ if (err)
+ goto err_add;
+ }
+ return 0;
+
+err_add:
+ list_for_each_entry_safe(p, t,
+ &dev->pkeys.pkey_port_list[slave],
+ entry) {
+ list_del(&p->entry);
+ mport = container_of(p, struct mlx4_port, kobj);
+ sysfs_remove_group(p, &mport->pkey_group);
+ sysfs_remove_group(p, &mport->gid_group);
+ remove_vf_smi_entries(mport);
+ kobject_put(p);
+ }
+ kobject_put(dev->dev_ports_parent[slave]);
+
+err_ports:
+ kobject_put(dev->pkeys.device_parent[slave]);
+ /* extra put for the device_parent create_and_add */
+ kobject_put(dev->pkeys.device_parent[slave]);
+
+fail_dev:
+ kobject_put(dev->iov_parent);
+ return err;
+}
+
+static int register_pkey_tree(struct mlx4_ib_dev *device)
+{
+ int i;
+
+ if (!mlx4_is_master(device->dev))
+ return 0;
+
+ for (i = 0; i <= device->dev->num_vfs; ++i)
+ register_one_pkey_tree(device, i);
+
+ return 0;
+}
+
+static void unregister_pkey_tree(struct mlx4_ib_dev *device)
+{
+ int slave;
+ struct kobject *p, *t;
+ struct mlx4_port *port;
+
+ if (!mlx4_is_master(device->dev))
+ return;
+
+ for (slave = device->dev->num_vfs; slave >= 0; --slave) {
+ list_for_each_entry_safe(p, t,
+ &device->pkeys.pkey_port_list[slave],
+ entry) {
+ list_del(&p->entry);
+ port = container_of(p, struct mlx4_port, kobj);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ remove_vf_smi_entries(port);
+ kobject_put(p);
+ kobject_put(device->dev_ports_parent[slave]);
+ }
+ kobject_put(device->dev_ports_parent[slave]);
+ kobject_put(device->pkeys.device_parent[slave]);
+ kobject_put(device->pkeys.device_parent[slave]);
+ kobject_put(device->iov_parent);
+ }
+}
+
+int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
+{
+ int i;
+ int ret = 0;
+
+ if (!mlx4_is_master(dev->dev))
+ return 0;
+
+ dev->iov_parent =
+ kobject_create_and_add("iov",
+ kobject_get(dev->ib_dev.ports_parent->parent));
+ if (!dev->iov_parent) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ dev->ports_parent =
+ kobject_create_and_add("ports",
+ kobject_get(dev->iov_parent));
+ if (!dev->ports_parent) {
+ ret = -ENOMEM;
+ goto err_ports;
+ }
+
+ for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
+ ret = add_port_entries(dev, i);
+ if (ret)
+ goto err_add_entries;
+ }
+
+ ret = register_pkey_tree(dev);
+ if (ret)
+ goto err_add_entries;
+ return 0;
+
+err_add_entries:
+ kobject_put(dev->ports_parent);
+
+err_ports:
+ kobject_put(dev->iov_parent);
+err:
+ kobject_put(dev->ib_dev.ports_parent->parent);
+ pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
+ return ret;
+}
+
+static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
+{
+ struct mlx4_ib_iov_port *p;
+ int i;
+
+ if (!mlx4_is_master(device->dev))
+ return;
+
+ for (i = 0; i < device->dev->caps.num_ports; i++) {
+ p = &device->iov_ports[i];
+ kobject_put(p->admin_alias_parent);
+ kobject_put(p->gids_parent);
+ kobject_put(p->pkeys_parent);
+ kobject_put(p->mcgs_parent);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->cur_port);
+ kobject_put(p->dev->ports_parent);
+ kfree(p->dentr_ar);
+ }
+}
+
+void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
+{
+ unregister_alias_guid_tree(device);
+ unregister_pkey_tree(device);
+ kobject_put(device->ports_parent);
+ kobject_put(device->iov_parent);
+ kobject_put(device->iov_parent);
+ kobject_put(device->ib_dev.ports_parent->parent);
+}
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index e2d11be4525..07e6769ef43 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -39,7 +40,9 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
-#define MLX4_IB_UVERBS_ABI_VERSION 3
+
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
+#define MLX4_IB_UVERBS_ABI_VERSION 4
/*
* Make sure that all structs defined in this file remain laid out so
@@ -49,10 +52,18 @@
* instead.
*/
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+ __u32 qp_tab_size;
+ __u16 bf_reg_size;
+ __u16 bf_regs_per_page;
+};
+
struct mlx4_ib_alloc_ucontext_resp {
+ __u32 dev_caps;
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
+ __u32 cqe_size;
};
struct mlx4_ib_alloc_pd_resp {
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
new file mode 100644
index 00000000000..10df386c634
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -0,0 +1,10 @@
+config MLX5_INFINIBAND
+ tristate "Mellanox Connect-IB HCA support"
+ depends on NETDEVICES && ETHERNET && PCI
+ select NET_VENDOR_MELLANOX
+ select MLX5_CORE
+ ---help---
+ This driver provides low-level InfiniBand support for
+ Mellanox Connect-IB PCI Express host channel adapters (HCAs).
+ This is required to use InfiniBand protocols such as
+ IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
new file mode 100644
index 00000000000..4ea0135af48
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
+
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
new file mode 100644
index 00000000000..39ab0caefdf
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx5_ib.h"
+
+struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
+ struct mlx5_ib_ah *ah)
+{
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
+ ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
+ (1 << 30) |
+ ah_attr->grh.sgid_index << 20);
+ ah->av.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.tclass = ah_attr->grh.traffic_class;
+ }
+
+ ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+ ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+
+ return &ah->ibah;
+}
+
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+{
+ struct mlx5_ib_ah *ah;
+
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ return create_ib_ah(ah_attr, ah); /* never fails */
+}
+
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct mlx5_ib_ah *ah = to_mah(ibah);
+ u32 tmp;
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+
+ tmp = be32_to_cpu(ah->av.grh_gid_fl);
+ if (tmp & (1 << 30)) {
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
+ ah_attr->grh.flow_label = tmp & 0xfffff;
+ memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
+ ah_attr->grh.hop_limit = ah->av.hop_limit;
+ ah_attr->grh.traffic_class = ah->av.tclass;
+ }
+ ah_attr->dlid = be16_to_cpu(ah->av.rlid);
+ ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
+ ah_attr->sl = ah->av.stat_rate_sl & 0xf;
+
+ return 0;
+}
+
+int mlx5_ib_destroy_ah(struct ib_ah *ah)
+{
+ kfree(to_mah(ah));
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
new file mode 100644
index 00000000000..8ae4f896cb4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -0,0 +1,1187 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kref.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include "mlx5_ib.h"
+#include "user.h"
+
+static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+{
+ struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
+
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
+}
+
+static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
+{
+ struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct ib_cq *ibcq = &cq->ibcq;
+ struct ib_event event;
+
+ if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
+ mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
+ type, mcq->cqn);
+ return;
+ }
+
+ if (ibcq->event_handler) {
+ event.device = &dev->ib_dev;
+ event.event = IB_EVENT_CQ_ERR;
+ event.element.cq = ibcq;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
+}
+
+static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
+{
+ return mlx5_buf_offset(&buf->buf, n * size);
+}
+
+static void *get_cqe(struct mlx5_ib_cq *cq, int n)
+{
+ return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+}
+
+static u8 sw_ownership_bit(int n, int nent)
+{
+ return (n & nent) ? 1 : 0;
+}
+
+static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
+{
+ void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+ struct mlx5_cqe64 *cqe64;
+
+ cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+
+ if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
+ !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
+ return cqe;
+ } else {
+ return NULL;
+ }
+}
+
+static void *next_cqe_sw(struct mlx5_ib_cq *cq)
+{
+ return get_sw_cqe(cq, cq->mcq.cons_index);
+}
+
+static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
+{
+ switch (wq->wr_data[idx]) {
+ case MLX5_IB_WR_UMR:
+ return 0;
+
+ case IB_WR_LOCAL_INV:
+ return IB_WC_LOCAL_INV;
+
+ case IB_WR_FAST_REG_MR:
+ return IB_WC_FAST_REG_MR;
+
+ default:
+ pr_warn("unknown completion status\n");
+ return 0;
+ }
+}
+
+static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
+ struct mlx5_ib_wq *wq, int idx)
+{
+ wc->wc_flags = 0;
+ switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
+ case MLX5_OPCODE_RDMA_WRITE_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ case MLX5_OPCODE_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case MLX5_OPCODE_SEND_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ case MLX5_OPCODE_SEND:
+ case MLX5_OPCODE_SEND_INVAL:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case MLX5_OPCODE_RDMA_READ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+ break;
+ case MLX5_OPCODE_ATOMIC_CS:
+ wc->opcode = IB_WC_COMP_SWAP;
+ wc->byte_len = 8;
+ break;
+ case MLX5_OPCODE_ATOMIC_FA:
+ wc->opcode = IB_WC_FETCH_ADD;
+ wc->byte_len = 8;
+ break;
+ case MLX5_OPCODE_ATOMIC_MASKED_CS:
+ wc->opcode = IB_WC_MASKED_COMP_SWAP;
+ wc->byte_len = 8;
+ break;
+ case MLX5_OPCODE_ATOMIC_MASKED_FA:
+ wc->opcode = IB_WC_MASKED_FETCH_ADD;
+ wc->byte_len = 8;
+ break;
+ case MLX5_OPCODE_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+ case MLX5_OPCODE_UMR:
+ wc->opcode = get_umr_comp(wq, idx);
+ break;
+ }
+}
+
+enum {
+ MLX5_GRH_IN_BUFFER = 1,
+ MLX5_GRH_IN_CQE = 2,
+};
+
+static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
+ struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
+ struct mlx5_ib_srq *srq;
+ struct mlx5_ib_wq *wq;
+ u16 wqe_ctr;
+ u8 g;
+
+ if (qp->ibqp.srq || qp->ibqp.xrcd) {
+ struct mlx5_core_srq *msrq = NULL;
+
+ if (qp->ibqp.xrcd) {
+ msrq = mlx5_core_get_srq(&dev->mdev,
+ be32_to_cpu(cqe->srqn));
+ srq = to_mibsrq(msrq);
+ } else {
+ srq = to_msrq(qp->ibqp.srq);
+ }
+ if (srq) {
+ wqe_ctr = be16_to_cpu(cqe->wqe_counter);
+ wc->wr_id = srq->wrid[wqe_ctr];
+ mlx5_ib_free_srq_wqe(srq, wqe_ctr);
+ if (msrq && atomic_dec_and_test(&msrq->refcount))
+ complete(&msrq->free);
+ }
+ } else {
+ wq = &qp->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
+
+ switch (cqe->op_own >> 4) {
+ case MLX5_CQE_RESP_WR_IMM:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->imm_inval_pkey;
+ break;
+ case MLX5_CQE_RESP_SEND:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+ break;
+ case MLX5_CQE_RESP_SEND_IMM:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->imm_inval_pkey;
+ break;
+ case MLX5_CQE_RESP_SEND_INV:
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
+ break;
+ }
+ wc->slid = be16_to_cpu(cqe->slid);
+ wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
+ wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
+ wc->dlid_path_bits = cqe->ml_path;
+ g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
+ wc->wc_flags |= g ? IB_WC_GRH : 0;
+ wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
+}
+
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+{
+ __be32 *p = (__be32 *)cqe;
+ int i;
+
+ mlx5_ib_warn(dev, "dump error cqe\n");
+ for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
+ pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
+ be32_to_cpu(p[1]), be32_to_cpu(p[2]),
+ be32_to_cpu(p[3]));
+}
+
+static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
+ struct mlx5_err_cqe *cqe,
+ struct ib_wc *wc)
+{
+ int dump = 1;
+
+ switch (cqe->syndrome) {
+ case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
+ wc->status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
+ dump = 0;
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_MW_BIND_ERR:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
+ wc->status = IB_WC_BAD_RESP_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
+ wc->status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+ wc->status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+ wc->status = IB_WC_REM_OP_ERR;
+ break;
+ case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+ wc->status = IB_WC_RETRY_EXC_ERR;
+ dump = 0;
+ break;
+ case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+ wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+ dump = 0;
+ break;
+ case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
+ wc->status = IB_WC_REM_ABORT_ERR;
+ break;
+ default:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ }
+
+ wc->vendor_err = cqe->vendor_err_synd;
+ if (dump)
+ dump_cqe(dev, cqe);
+}
+
+static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
+{
+ /* TBD: waiting decision
+ */
+ return 0;
+}
+
+static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
+{
+ struct mlx5_wqe_data_seg *dpseg;
+ void *addr;
+
+ dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg) +
+ sizeof(struct mlx5_wqe_atomic_seg);
+ addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
+ return addr;
+}
+
+static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+ uint16_t idx)
+{
+ void *addr;
+ int byte_count;
+ int i;
+
+ if (!is_atomic_response(qp, idx))
+ return;
+
+ byte_count = be32_to_cpu(cqe64->byte_cnt);
+ addr = mlx5_get_atomic_laddr(qp, idx);
+
+ if (byte_count == 4) {
+ *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
+ } else {
+ for (i = 0; i < byte_count; i += 8) {
+ *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
+ addr += 8;
+ }
+ }
+
+ return;
+}
+
+static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+ u16 tail, u16 head)
+{
+ int idx;
+
+ do {
+ idx = tail & (qp->sq.wqe_cnt - 1);
+ handle_atomic(qp, cqe64, idx);
+ if (idx == head)
+ break;
+
+ tail = qp->sq.w_list[idx].next;
+ } while (1);
+ tail = qp->sq.w_list[idx].next;
+ qp->sq.last_poll = tail;
+}
+
+static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+{
+ mlx5_buf_free(&dev->mdev, &buf->buf);
+}
+
+static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
+ struct ib_sig_err *item)
+{
+ u16 syndrome = be16_to_cpu(cqe->syndrome);
+
+#define GUARD_ERR (1 << 13)
+#define APPTAG_ERR (1 << 12)
+#define REFTAG_ERR (1 << 11)
+
+ if (syndrome & GUARD_ERR) {
+ item->err_type = IB_SIG_BAD_GUARD;
+ item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
+ item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
+ } else
+ if (syndrome & REFTAG_ERR) {
+ item->err_type = IB_SIG_BAD_REFTAG;
+ item->expected = be32_to_cpu(cqe->expected_reftag);
+ item->actual = be32_to_cpu(cqe->actual_reftag);
+ } else
+ if (syndrome & APPTAG_ERR) {
+ item->err_type = IB_SIG_BAD_APPTAG;
+ item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
+ item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
+ } else {
+ pr_err("Got signature completion error with bad syndrome %04x\n",
+ syndrome);
+ }
+
+ item->sig_err_offset = be64_to_cpu(cqe->err_offset);
+ item->key = be32_to_cpu(cqe->mkey);
+}
+
+static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+ struct mlx5_ib_qp **cur_qp,
+ struct ib_wc *wc)
+{
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct mlx5_err_cqe *err_cqe;
+ struct mlx5_cqe64 *cqe64;
+ struct mlx5_core_qp *mqp;
+ struct mlx5_ib_wq *wq;
+ struct mlx5_sig_err_cqe *sig_err_cqe;
+ struct mlx5_core_mr *mmr;
+ struct mlx5_ib_mr *mr;
+ uint8_t opcode;
+ uint32_t qpn;
+ u16 wqe_ctr;
+ void *cqe;
+ int idx;
+
+repoll:
+ cqe = next_cqe_sw(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+
+ ++cq->mcq.cons_index;
+
+ /* Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rmb();
+
+ opcode = cqe64->op_own >> 4;
+ if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
+ if (likely(cq->resize_buf)) {
+ free_cq_buf(dev, &cq->buf);
+ cq->buf = *cq->resize_buf;
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ goto repoll;
+ } else {
+ mlx5_ib_warn(dev, "unexpected resize cqe\n");
+ }
+ }
+
+ qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
+ if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+ /* We do not have to take the QP table lock here,
+ * because CQs will be locked while QPs are removed
+ * from the table.
+ */
+ mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
+ if (unlikely(!mqp)) {
+ mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
+ cq->mcq.cqn, qpn);
+ return -EINVAL;
+ }
+
+ *cur_qp = to_mibqp(mqp);
+ }
+
+ wc->qp = &(*cur_qp)->ibqp;
+ switch (opcode) {
+ case MLX5_CQE_REQ:
+ wq = &(*cur_qp)->sq;
+ wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+ idx = wqe_ctr & (wq->wqe_cnt - 1);
+ handle_good_req(wc, cqe64, wq, idx);
+ handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
+ wc->wr_id = wq->wrid[idx];
+ wq->tail = wq->wqe_head[idx] + 1;
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case MLX5_CQE_RESP_WR_IMM:
+ case MLX5_CQE_RESP_SEND:
+ case MLX5_CQE_RESP_SEND_IMM:
+ case MLX5_CQE_RESP_SEND_INV:
+ handle_responder(wc, cqe64, *cur_qp);
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case MLX5_CQE_RESIZE_CQ:
+ break;
+ case MLX5_CQE_REQ_ERR:
+ case MLX5_CQE_RESP_ERR:
+ err_cqe = (struct mlx5_err_cqe *)cqe64;
+ mlx5_handle_error_cqe(dev, err_cqe, wc);
+ mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
+ opcode == MLX5_CQE_REQ_ERR ?
+ "Requestor" : "Responder", cq->mcq.cqn);
+ mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
+ err_cqe->syndrome, err_cqe->vendor_err_synd);
+ if (opcode == MLX5_CQE_REQ_ERR) {
+ wq = &(*cur_qp)->sq;
+ wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+ idx = wqe_ctr & (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
+ wq->tail = wq->wqe_head[idx] + 1;
+ } else {
+ struct mlx5_ib_srq *srq;
+
+ if ((*cur_qp)->ibqp.srq) {
+ srq = to_msrq((*cur_qp)->ibqp.srq);
+ wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+ wc->wr_id = srq->wrid[wqe_ctr];
+ mlx5_ib_free_srq_wqe(srq, wqe_ctr);
+ } else {
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+ }
+ break;
+ case MLX5_CQE_SIG_ERR:
+ sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+
+ read_lock(&dev->mdev.priv.mr_table.lock);
+ mmr = __mlx5_mr_lookup(&dev->mdev,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ if (unlikely(!mmr)) {
+ read_unlock(&dev->mdev.priv.mr_table.lock);
+ mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
+ cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
+ return -EINVAL;
+ }
+
+ mr = to_mibmr(mmr);
+ get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
+ mr->sig->sig_err_exists = true;
+ mr->sig->sigerr_count++;
+
+ mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
+ cq->mcq.cqn, mr->sig->err_item.key,
+ mr->sig->err_item.err_type,
+ mr->sig->err_item.sig_err_offset,
+ mr->sig->err_item.expected,
+ mr->sig->err_item.actual);
+
+ read_unlock(&dev->mdev.priv.mr_table.lock);
+ goto repoll;
+ }
+
+ return 0;
+}
+
+int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ struct mlx5_ib_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+ int err = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ for (npolled = 0; npolled < num_entries; npolled++) {
+ err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
+ if (err)
+ break;
+ }
+
+ if (npolled)
+ mlx5_cq_set_ci(&cq->mcq);
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ if (err == 0 || err == -EAGAIN)
+ return npolled;
+ else
+ return err;
+}
+
+int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ mlx5_cq_arm(&to_mcq(ibcq)->mcq,
+ (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
+ to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
+ MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
+
+ return 0;
+}
+
+static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
+ int nent, int cqe_size)
+{
+ int err;
+
+ err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
+ PAGE_SIZE * 2, &buf->buf);
+ if (err)
+ return err;
+
+ buf->cqe_size = cqe_size;
+ buf->nent = nent;
+
+ return 0;
+}
+
+static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+ struct ib_ucontext *context, struct mlx5_ib_cq *cq,
+ int entries, struct mlx5_create_cq_mbox_in **cqb,
+ int *cqe_size, int *index, int *inlen)
+{
+ struct mlx5_ib_create_cq ucmd;
+ size_t ucmdlen;
+ int page_shift;
+ int npages;
+ int ncont;
+ int err;
+
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
+ return -EFAULT;
+
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
+ if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
+ return -EINVAL;
+
+ *cqe_size = ucmd.cqe_size;
+
+ cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
+ entries * ucmd.cqe_size,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(cq->buf.umem)) {
+ err = PTR_ERR(cq->buf.umem);
+ return err;
+ }
+
+ err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+ &cq->db);
+ if (err)
+ goto err_umem;
+
+ mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
+ &ncont, NULL);
+ mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
+ ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
+
+ *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+ *cqb = mlx5_vzalloc(*inlen);
+ if (!*cqb) {
+ err = -ENOMEM;
+ goto err_db;
+ }
+ mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
+ (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+ *index = to_mucontext(context)->uuari.uars[0].index;
+
+ return 0;
+
+err_db:
+ mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+
+err_umem:
+ ib_umem_release(cq->buf.umem);
+ return err;
+}
+
+static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
+{
+ mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+ ib_umem_release(cq->buf.umem);
+}
+
+static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+{
+ int i;
+ void *cqe;
+ struct mlx5_cqe64 *cqe64;
+
+ for (i = 0; i < buf->nent; i++) {
+ cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+ cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
+ cqe64->op_own = MLX5_CQE_INVALID << 4;
+ }
+}
+
+static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int entries, int cqe_size,
+ struct mlx5_create_cq_mbox_in **cqb,
+ int *index, int *inlen)
+{
+ int err;
+
+ err = mlx5_db_alloc(&dev->mdev, &cq->db);
+ if (err)
+ return err;
+
+ cq->mcq.set_ci_db = cq->db.db;
+ cq->mcq.arm_db = cq->db.db + 1;
+ *cq->mcq.set_ci_db = 0;
+ *cq->mcq.arm_db = 0;
+ cq->mcq.cqe_sz = cqe_size;
+
+ err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+ if (err)
+ goto err_db;
+
+ init_cq_buf(cq, &cq->buf);
+
+ *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+ *cqb = mlx5_vzalloc(*inlen);
+ if (!*cqb) {
+ err = -ENOMEM;
+ goto err_buf;
+ }
+ mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
+
+ (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ *index = dev->mdev.priv.uuari.uars[0].index;
+
+ return 0;
+
+err_buf:
+ free_cq_buf(dev, &cq->buf);
+
+err_db:
+ mlx5_db_free(&dev->mdev, &cq->db);
+ return err;
+}
+
+static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
+{
+ free_cq_buf(dev, &cq->buf);
+ mlx5_db_free(&dev->mdev, &cq->db);
+}
+
+struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx5_create_cq_mbox_in *cqb = NULL;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_cq *cq;
+ int uninitialized_var(index);
+ int uninitialized_var(inlen);
+ int cqe_size;
+ int irqn;
+ int eqn;
+ int err;
+
+ if (entries < 0)
+ return ERR_PTR(-EINVAL);
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries > dev->mdev.caps.max_cqes)
+ return ERR_PTR(-EINVAL);
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ cq->ibcq.cqe = entries - 1;
+ mutex_init(&cq->resize_mutex);
+ spin_lock_init(&cq->lock);
+ cq->resize_buf = NULL;
+ cq->resize_umem = NULL;
+
+ if (context) {
+ err = create_cq_user(dev, udata, context, cq, entries,
+ &cqb, &cqe_size, &index, &inlen);
+ if (err)
+ goto err_create;
+ } else {
+ /* for now choose 64 bytes till we have a proper interface */
+ cqe_size = 64;
+ err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
+ &index, &inlen);
+ if (err)
+ goto err_create;
+ }
+
+ cq->cqe_size = cqe_size;
+ cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+ cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
+ err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
+ if (err)
+ goto err_cqb;
+
+ cqb->ctx.c_eqn = cpu_to_be16(eqn);
+ cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
+
+ err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
+ if (err)
+ goto err_cqb;
+
+ mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
+ cq->mcq.irqn = irqn;
+ cq->mcq.comp = mlx5_ib_cq_comp;
+ cq->mcq.event = mlx5_ib_cq_event;
+
+ if (context)
+ if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
+ err = -EFAULT;
+ goto err_cmd;
+ }
+
+
+ mlx5_vfree(cqb);
+ return &cq->ibcq;
+
+err_cmd:
+ mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
+
+err_cqb:
+ mlx5_vfree(cqb);
+ if (context)
+ destroy_cq_user(cq, context);
+ else
+ destroy_cq_kernel(dev, cq);
+
+err_create:
+ kfree(cq);
+
+ return ERR_PTR(err);
+}
+
+
+int mlx5_ib_destroy_cq(struct ib_cq *cq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(cq->device);
+ struct mlx5_ib_cq *mcq = to_mcq(cq);
+ struct ib_ucontext *context = NULL;
+
+ if (cq->uobject)
+ context = cq->uobject->context;
+
+ mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
+ if (context)
+ destroy_cq_user(mcq, context);
+ else
+ destroy_cq_kernel(dev, mcq);
+
+ kfree(mcq);
+
+ return 0;
+}
+
+static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
+{
+ return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
+}
+
+void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
+{
+ struct mlx5_cqe64 *cqe64, *dest64;
+ void *cqe, *dest;
+ u32 prod_index;
+ int nfreed = 0;
+ u8 owner_bit;
+
+ if (!cq)
+ return;
+
+ /* First we need to find the current producer index, so we
+ * know where to start cleaning from. It doesn't matter if HW
+ * adds new entries after this loop -- the QP we're worried
+ * about is already in RESET, so the new entries won't come
+ * from our QP and therefore don't need to be checked.
+ */
+ for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
+ if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
+ break;
+
+ /* Now sweep backwards through the CQ, removing CQ entries
+ * that match our QP by copying older entries on top of them.
+ */
+ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
+ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+ cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+ if (is_equal_rsn(cqe64, rsn)) {
+ if (srq && (ntohl(cqe64->srqn) & 0xffffff))
+ mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
+ ++nfreed;
+ } else if (nfreed) {
+ dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+ dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
+ owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
+ memcpy(dest, cqe, cq->mcq.cqe_sz);
+ dest64->op_own = owner_bit |
+ (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
+ }
+ }
+
+ if (nfreed) {
+ cq->mcq.cons_index += nfreed;
+ /* Make sure update of buffer contents is done before
+ * updating consumer index.
+ */
+ wmb();
+ mlx5_cq_set_ci(&cq->mcq);
+ }
+}
+
+void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
+{
+ if (!cq)
+ return;
+
+ spin_lock_irq(&cq->lock);
+ __mlx5_ib_cq_clean(cq, qpn, srq);
+ spin_unlock_irq(&cq->lock);
+}
+
+int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ struct mlx5_modify_cq_mbox_in *in;
+ struct mlx5_ib_dev *dev = to_mdev(cq->device);
+ struct mlx5_ib_cq *mcq = to_mcq(cq);
+ int err;
+ u32 fsel;
+
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+ return -ENOSYS;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->cqn = cpu_to_be32(mcq->mcq.cqn);
+ fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+ in->ctx.cq_period = cpu_to_be16(cq_period);
+ in->ctx.cq_max_count = cpu_to_be16(cq_count);
+ in->field_select = cpu_to_be32(fsel);
+ err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
+ kfree(in);
+
+ if (err)
+ mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
+
+ return err;
+}
+
+static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int entries, struct ib_udata *udata, int *npas,
+ int *page_shift, int *cqe_size)
+{
+ struct mlx5_ib_resize_cq ucmd;
+ struct ib_umem *umem;
+ int err;
+ int npages;
+ struct ib_ucontext *context = cq->buf.umem->context;
+
+ err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ if (err)
+ return err;
+
+ if (ucmd.reserved0 || ucmd.reserved1)
+ return -EINVAL;
+
+ umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ return err;
+ }
+
+ mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
+ npas, NULL);
+
+ cq->resize_umem = umem;
+ *cqe_size = ucmd.cqe_size;
+
+ return 0;
+}
+
+static void un_resize_user(struct mlx5_ib_cq *cq)
+{
+ ib_umem_release(cq->resize_umem);
+}
+
+static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int entries, int cqe_size)
+{
+ int err;
+
+ cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
+ if (!cq->resize_buf)
+ return -ENOMEM;
+
+ err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+ if (err)
+ goto ex;
+
+ init_cq_buf(cq, cq->resize_buf);
+
+ return 0;
+
+ex:
+ kfree(cq->resize_buf);
+ return err;
+}
+
+static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
+{
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+}
+
+static int copy_resize_cqes(struct mlx5_ib_cq *cq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct mlx5_cqe64 *scqe64;
+ struct mlx5_cqe64 *dcqe64;
+ void *start_cqe;
+ void *scqe;
+ void *dcqe;
+ int ssize;
+ int dsize;
+ int i;
+ u8 sw_own;
+
+ ssize = cq->buf.cqe_size;
+ dsize = cq->resize_buf->cqe_size;
+ if (ssize != dsize) {
+ mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
+ return -EINVAL;
+ }
+
+ i = cq->mcq.cons_index;
+ scqe = get_sw_cqe(cq, i);
+ scqe64 = ssize == 64 ? scqe : scqe + 64;
+ start_cqe = scqe;
+ if (!scqe) {
+ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+ return -EINVAL;
+ }
+
+ while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
+ dcqe = get_cqe_from_buf(cq->resize_buf,
+ (i + 1) & (cq->resize_buf->nent),
+ dsize);
+ dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
+ sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
+ memcpy(dcqe, scqe, dsize);
+ dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
+
+ ++i;
+ scqe = get_sw_cqe(cq, i);
+ scqe64 = ssize == 64 ? scqe : scqe + 64;
+ if (!scqe) {
+ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+ return -EINVAL;
+ }
+
+ if (scqe == start_cqe) {
+ pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
+ cq->mcq.cqn);
+ return -ENOMEM;
+ }
+ }
+ ++cq->mcq.cons_index;
+ return 0;
+}
+
+int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ struct mlx5_modify_cq_mbox_in *in;
+ int err;
+ int npas;
+ int page_shift;
+ int inlen;
+ int uninitialized_var(cqe_size);
+ unsigned long flags;
+
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+ pr_info("Firmware does not support resize CQ\n");
+ return -ENOSYS;
+ }
+
+ if (entries < 1)
+ return -EINVAL;
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries > dev->mdev.caps.max_cqes + 1)
+ return -EINVAL;
+
+ if (entries == ibcq->cqe + 1)
+ return 0;
+
+ mutex_lock(&cq->resize_mutex);
+ if (udata) {
+ err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
+ &cqe_size);
+ } else {
+ cqe_size = 64;
+ err = resize_kernel(dev, cq, entries, cqe_size);
+ if (!err) {
+ npas = cq->resize_buf->buf.npages;
+ page_shift = cq->resize_buf->buf.page_shift;
+ }
+ }
+
+ if (err)
+ goto ex;
+
+ inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto ex_resize;
+ }
+
+ if (udata)
+ mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
+ in->pas, 0);
+ else
+ mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
+
+ in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE |
+ MLX5_MODIFY_CQ_MASK_PG_OFFSET |
+ MLX5_MODIFY_CQ_MASK_PG_SIZE);
+ in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+ in->ctx.page_offset = 0;
+ in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
+ in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
+ in->cqn = cpu_to_be32(cq->mcq.cqn);
+
+ err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
+ if (err)
+ goto ex_alloc;
+
+ if (udata) {
+ cq->ibcq.cqe = entries - 1;
+ ib_umem_release(cq->buf.umem);
+ cq->buf.umem = cq->resize_umem;
+ cq->resize_umem = NULL;
+ } else {
+ struct mlx5_ib_cq_buf tbuf;
+ int resized = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+ if (cq->resize_buf) {
+ err = copy_resize_cqes(cq);
+ if (!err) {
+ tbuf = cq->buf;
+ cq->buf = *cq->resize_buf;
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ resized = 1;
+ }
+ }
+ cq->ibcq.cqe = entries - 1;
+ spin_unlock_irqrestore(&cq->lock, flags);
+ if (resized)
+ free_cq_buf(dev, &tbuf);
+ }
+ mutex_unlock(&cq->resize_mutex);
+
+ mlx5_vfree(in);
+ return 0;
+
+ex_alloc:
+ mlx5_vfree(in);
+
+ex_resize:
+ if (udata)
+ un_resize_user(cq);
+ else
+ un_resize_kernel(dev, cq);
+ex:
+ mutex_unlock(&cq->resize_mutex);
+ return err;
+}
+
+int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+{
+ struct mlx5_ib_cq *cq;
+
+ if (!ibcq)
+ return 128;
+
+ cq = to_mcq(ibcq);
+ return cq->cqe_size;
+}
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
new file mode 100644
index 00000000000..ece028fc47d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+#include <rdma/ib_umem.h>
+
+#include "mlx5_ib.h"
+
+struct mlx5_ib_user_db_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ unsigned long user_virt;
+ int refcnt;
+};
+
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+ struct mlx5_db *db)
+{
+ struct mlx5_ib_user_db_page *page;
+ int err = 0;
+
+ mutex_lock(&context->db_page_mutex);
+
+ list_for_each_entry(page, &context->db_page_list, list)
+ if (page->user_virt == (virt & PAGE_MASK))
+ goto found;
+
+ page = kmalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ page->user_virt = (virt & PAGE_MASK);
+ page->refcnt = 0;
+ page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
+ PAGE_SIZE, 0, 0);
+ if (IS_ERR(page->umem)) {
+ err = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &context->db_page_list);
+
+found:
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
+ db->u.user_page = page;
+ ++page->refcnt;
+
+out:
+ mutex_unlock(&context->db_page_mutex);
+
+ return err;
+}
+
+void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
+{
+ mutex_lock(&context->db_page_mutex);
+
+ if (!--db->u.user_page->refcnt) {
+ list_del(&db->u.user_page->list);
+ ib_umem_release(db->u.user_page->umem);
+ kfree(db->u.user_page);
+ }
+
+ mutex_unlock(&context->db_page_mutex);
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
new file mode 100644
index 00000000000..5c8938be0e0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/cmd.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+#include "mlx5_ib.h"
+
+enum {
+ MLX5_IB_VENDOR_CLASS1 = 0x9,
+ MLX5_IB_VENDOR_CLASS2 = 0xa
+};
+
+int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad)
+{
+ u8 op_modifier = 0;
+
+ /* Key check traps can't be generated unless we have in_wc to
+ * tell us where to send the trap.
+ */
+ if (ignore_mkey || !in_wc)
+ op_modifier |= 0x1;
+ if (ignore_bkey || !in_wc)
+ op_modifier |= 0x2;
+
+ return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port);
+}
+
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ u16 slid;
+ int err;
+
+ slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
+
+ /* Don't process SMInfo queries -- the SMA can't handle them.
+ */
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
+ return IB_MAD_RESULT_SUCCESS;
+ } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+ in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 ||
+ in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
+ if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } else {
+ return IB_MAD_RESULT_SUCCESS;
+ }
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev),
+ mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY,
+ port_num, in_wc, in_grh, in_mad, out_mad);
+ if (err)
+ return IB_MAD_RESULT_FAILURE;
+
+ /* set return bit in status of directed route responses */
+ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ u16 packet_error;
+
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+
+ packet_error = be16_to_cpu(out_mad->status);
+
+ dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+ MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
new file mode 100644
index 00000000000..364d4b6937f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -0,0 +1,1543 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <asm-generic/kmap_types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/io-mapping.h>
+#include <linux/sched.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_umem.h>
+#include "user.h"
+#include "mlx5_ib.h"
+
+#define DRIVER_NAME "mlx5_ib"
+#define DRIVER_VERSION "2.2-1"
+#define DRIVER_RELDATE "Feb 2014"
+
+MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+static int prof_sel = 2;
+module_param_named(prof_sel, prof_sel, int, 0444);
+MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+
+static char mlx5_version[] =
+ DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
+ DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+
+static struct mlx5_profile profile[] = {
+ [0] = {
+ .mask = 0,
+ },
+ [1] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE,
+ .log_max_qp = 12,
+ },
+ [2] = {
+ .mask = MLX5_PROF_MASK_QP_SIZE |
+ MLX5_PROF_MASK_MR_CACHE,
+ .log_max_qp = 17,
+ .mr_cache[0] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[1] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[2] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[3] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[4] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[5] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[6] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[7] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[8] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[9] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[10] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[11] = {
+ .size = 500,
+ .limit = 250
+ },
+ .mr_cache[12] = {
+ .size = 64,
+ .limit = 32
+ },
+ .mr_cache[13] = {
+ .size = 32,
+ .limit = 16
+ },
+ .mr_cache[14] = {
+ .size = 16,
+ .limit = 8
+ },
+ .mr_cache[15] = {
+ .size = 8,
+ .limit = 4
+ },
+ },
+};
+
+int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
+{
+ struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+ struct mlx5_eq *eq, *n;
+ int err = -ENOENT;
+
+ spin_lock(&table->lock);
+ list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+ if (eq->index == vector) {
+ *eqn = eq->eqn;
+ *irqn = eq->irqn;
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock(&table->lock);
+
+ return err;
+}
+
+static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+ char name[MLX5_MAX_EQ_NAME];
+ struct mlx5_eq *eq, *n;
+ int ncomp_vec;
+ int nent;
+ int err;
+ int i;
+
+ INIT_LIST_HEAD(&dev->eqs_list);
+ ncomp_vec = table->num_comp_vectors;
+ nent = MLX5_COMP_EQ_SIZE;
+ for (i = 0; i < ncomp_vec; i++) {
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq) {
+ err = -ENOMEM;
+ goto clean;
+ }
+
+ snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+ err = mlx5_create_map_eq(&dev->mdev, eq,
+ i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
+ name, &dev->mdev.priv.uuari.uars[0]);
+ if (err) {
+ kfree(eq);
+ goto clean;
+ }
+ mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
+ eq->index = i;
+ spin_lock(&table->lock);
+ list_add_tail(&eq->list, &dev->eqs_list);
+ spin_unlock(&table->lock);
+ }
+
+ dev->num_comp_vectors = ncomp_vec;
+ return 0;
+
+clean:
+ spin_lock(&table->lock);
+ list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+ list_del(&eq->list);
+ spin_unlock(&table->lock);
+ if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+ mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
+ kfree(eq);
+ spin_lock(&table->lock);
+ }
+ spin_unlock(&table->lock);
+ return err;
+}
+
+static void free_comp_eqs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+ struct mlx5_eq *eq, *n;
+
+ spin_lock(&table->lock);
+ list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
+ list_del(&eq->list);
+ spin_unlock(&table->lock);
+ if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
+ mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
+ kfree(eq);
+ spin_lock(&table->lock);
+ }
+ spin_unlock(&table->lock);
+}
+
+static int mlx5_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+ int max_rq_sg;
+ int max_sq_sg;
+ u64 flags;
+
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memset(props, 0, sizeof(*props));
+
+ props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) |
+ (fw_rev_min(&dev->mdev) << 16) |
+ fw_rev_sub(&dev->mdev);
+ props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
+ IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_RC_RNR_NAK_GEN;
+ flags = dev->mdev.caps.flags;
+ if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
+ props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
+ if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
+ props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
+ if (flags & MLX5_DEV_CAP_FLAG_APM)
+ props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
+ props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+ if (flags & MLX5_DEV_CAP_FLAG_XRC)
+ props->device_cap_flags |= IB_DEVICE_XRC;
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) {
+ props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ /* At this stage no support for signature handover */
+ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
+ IB_PROT_T10DIF_TYPE_2 |
+ IB_PROT_T10DIF_TYPE_3;
+ props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
+ IB_GUARD_T10DIF_CSUM;
+ }
+ if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)
+ props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+
+ props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
+ 0xffffff;
+ props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30));
+ props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32));
+ memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
+
+ props->max_mr_size = ~0ull;
+ props->page_size_cap = dev->mdev.caps.min_page_sz;
+ props->max_qp = 1 << dev->mdev.caps.log_max_qp;
+ props->max_qp_wr = dev->mdev.caps.max_wqes;
+ max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+ max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
+ props->max_sge = min(max_rq_sg, max_sq_sg);
+ props->max_cq = 1 << dev->mdev.caps.log_max_cq;
+ props->max_cqe = dev->mdev.caps.max_cqes - 1;
+ props->max_mr = 1 << dev->mdev.caps.log_max_mkey;
+ props->max_pd = 1 << dev->mdev.caps.log_max_pd;
+ props->max_qp_rd_atom = dev->mdev.caps.max_ra_req_qp;
+ props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_srq = 1 << dev->mdev.caps.log_max_srq;
+ props->max_srq_wr = dev->mdev.caps.max_srq_wqes - 1;
+ props->max_srq_sge = max_rq_sg - 1;
+ props->max_fast_reg_page_list_len = (unsigned int)-1;
+ props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
+ props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
+ props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg;
+ props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+ props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return err;
+}
+
+int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int ext_active_speed;
+ int err = -ENOMEM;
+
+ if (port < 1 || port > dev->mdev.caps.num_ports) {
+ mlx5_ib_warn(dev, "invalid port number %d\n", port);
+ return -EINVAL;
+ }
+
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ memset(props, 0, sizeof(*props));
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err) {
+ mlx5_ib_warn(dev, "err %d\n", err);
+ goto out;
+ }
+
+
+ props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16));
+ props->lmc = out_mad->data[34] & 0x7;
+ props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18));
+ props->sm_sl = out_mad->data[36] & 0xf;
+ props->state = out_mad->data[32] & 0xf;
+ props->phys_state = out_mad->data[33] >> 4;
+ props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
+ props->gid_tbl_len = out_mad->data[50];
+ props->max_msg_sz = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg;
+ props->pkey_tbl_len = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len;
+ props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
+ props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
+ props->active_width = out_mad->data[31] & 0xf;
+ props->active_speed = out_mad->data[35] >> 4;
+ props->max_mtu = out_mad->data[41] & 0xf;
+ props->active_mtu = out_mad->data[36] >> 4;
+ props->subnet_timeout = out_mad->data[51] & 0x1f;
+ props->max_vl_num = out_mad->data[37] >> 4;
+ props->init_type_reply = out_mad->data[41] >> 4;
+
+ /* Check if extended speeds (EDR/FDR/...) are supported */
+ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+ ext_active_speed = out_mad->data[62] >> 4;
+
+ switch (ext_active_speed) {
+ case 1:
+ props->active_speed = 16; /* FDR */
+ break;
+ case 2:
+ props->active_speed = 32; /* EDR */
+ break;
+ }
+ }
+
+ /* If reported active speed is QDR, check if is FDR-10 */
+ if (props->active_speed == 4) {
+ if (dev->mdev.caps.ext_port_cap[port - 1] &
+ MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
+ init_query_mad(in_mad);
+ in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx5_MAD_IFC(dev, 1, 1, port,
+ NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ /* Checking LinkSpeedActive for FDR-10 */
+ if (out_mad->data[15] & 0x1)
+ props->active_speed = 8;
+ }
+ }
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+
+ return err;
+}
+
+static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
+ in_mad->attr_mod = cpu_to_be32(port);
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(gid->raw, out_mad->data + 8, 8);
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
+ in_mad->attr_mod = cpu_to_be32(index / 8);
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
+ in_mad->attr_mod = cpu_to_be32(index / 32);
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+struct mlx5_reg_node_desc {
+ u8 desc[64];
+};
+
+static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_reg_node_desc in;
+ struct mlx5_reg_node_desc out;
+ int err;
+
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
+ return 0;
+
+ /*
+ * If possible, pass node desc to FW, so it can generate
+ * a 144 trap. If cmd fails, just ignore.
+ */
+ memcpy(&in, props->node_desc, 64);
+ err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out,
+ sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
+ if (err)
+ return err;
+
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+
+ return err;
+}
+
+static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+ struct ib_port_modify *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct ib_port_attr attr;
+ u32 tmp;
+ int err;
+
+ mutex_lock(&dev->cap_mask_mutex);
+
+ err = mlx5_ib_query_port(ibdev, port, &attr);
+ if (err)
+ goto out;
+
+ tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
+ ~props->clr_port_cap_mask;
+
+ err = mlx5_set_port_caps(&dev->mdev, port, tmp);
+
+out:
+ mutex_unlock(&dev->cap_mask_mutex);
+ return err;
+}
+
+static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_alloc_ucontext_req_v2 req;
+ struct mlx5_ib_alloc_ucontext_resp resp;
+ struct mlx5_ib_ucontext *context;
+ struct mlx5_uuar_info *uuari;
+ struct mlx5_uar *uars;
+ int gross_uuars;
+ int num_uars;
+ int ver;
+ int uuarn;
+ int err;
+ int i;
+ int reqlen;
+
+ if (!dev->ib_active)
+ return ERR_PTR(-EAGAIN);
+
+ memset(&req, 0, sizeof(req));
+ reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
+ if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
+ ver = 0;
+ else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ ver = 2;
+ else
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&req, udata, reqlen);
+ if (err)
+ return ERR_PTR(err);
+
+ if (req.flags || req.reserved)
+ return ERR_PTR(-EINVAL);
+
+ if (req.total_num_uuars > MLX5_MAX_UUARS)
+ return ERR_PTR(-ENOMEM);
+
+ if (req.total_num_uuars == 0)
+ return ERR_PTR(-EINVAL);
+
+ req.total_num_uuars = ALIGN(req.total_num_uuars,
+ MLX5_NON_FP_BF_REGS_PER_PAGE);
+ if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+ return ERR_PTR(-EINVAL);
+
+ num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
+ gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
+ resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
+ resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
+ resp.cache_line_size = L1_CACHE_BYTES;
+ resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz;
+ resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz;
+ resp.max_send_wqebb = dev->mdev.caps.max_wqes;
+ resp.max_recv_wr = dev->mdev.caps.max_wqes;
+ resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes;
+
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ uuari = &context->uuari;
+ mutex_init(&uuari->lock);
+ uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
+ if (!uars) {
+ err = -ENOMEM;
+ goto out_ctx;
+ }
+
+ uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
+ sizeof(*uuari->bitmap),
+ GFP_KERNEL);
+ if (!uuari->bitmap) {
+ err = -ENOMEM;
+ goto out_uar_ctx;
+ }
+ /*
+ * clear all fast path uuars
+ */
+ for (i = 0; i < gross_uuars; i++) {
+ uuarn = i & 3;
+ if (uuarn == 2 || uuarn == 3)
+ set_bit(i, uuari->bitmap);
+ }
+
+ uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
+ if (!uuari->count) {
+ err = -ENOMEM;
+ goto out_bitmap;
+ }
+
+ for (i = 0; i < num_uars; i++) {
+ err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index);
+ if (err)
+ goto out_count;
+ }
+
+ INIT_LIST_HEAD(&context->db_page_list);
+ mutex_init(&context->db_page_mutex);
+
+ resp.tot_uuars = req.total_num_uuars;
+ resp.num_ports = dev->mdev.caps.num_ports;
+ err = ib_copy_to_udata(udata, &resp,
+ sizeof(resp) - sizeof(resp.reserved));
+ if (err)
+ goto out_uars;
+
+ uuari->ver = ver;
+ uuari->num_low_latency_uuars = req.num_low_latency_uuars;
+ uuari->uars = uars;
+ uuari->num_uars = num_uars;
+ return &context->ibucontext;
+
+out_uars:
+ for (i--; i >= 0; i--)
+ mlx5_cmd_free_uar(&dev->mdev, uars[i].index);
+out_count:
+ kfree(uuari->count);
+
+out_bitmap:
+ kfree(uuari->bitmap);
+
+out_uar_ctx:
+ kfree(uars);
+
+out_ctx:
+ kfree(context);
+ return ERR_PTR(err);
+}
+
+static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+ struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
+ struct mlx5_uuar_info *uuari = &context->uuari;
+ int i;
+
+ for (i = 0; i < uuari->num_uars; i++) {
+ if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index))
+ mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
+ }
+
+ kfree(uuari->count);
+ kfree(uuari->bitmap);
+ kfree(uuari->uars);
+ kfree(context);
+
+ return 0;
+}
+
+static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
+{
+ return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index;
+}
+
+static int get_command(unsigned long offset)
+{
+ return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
+}
+
+static int get_arg(unsigned long offset)
+{
+ return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
+}
+
+static int get_index(unsigned long offset)
+{
+ return get_arg(offset);
+}
+
+static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+ struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
+ struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
+ struct mlx5_uuar_info *uuari = &context->uuari;
+ unsigned long command;
+ unsigned long idx;
+ phys_addr_t pfn;
+
+ command = get_command(vma->vm_pgoff);
+ switch (command) {
+ case MLX5_IB_MMAP_REGULAR_PAGE:
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ idx = get_index(vma->vm_pgoff);
+ pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+ mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
+ (unsigned long long)pfn);
+
+ if (idx >= uuari->num_uars)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
+ vma->vm_start,
+ (unsigned long long)pfn << PAGE_SHIFT);
+ break;
+
+ case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
+ return -ENOSYS;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
+{
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_mkey_seg *seg;
+ struct mlx5_core_mr mr;
+ int err;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ seg = &in->seg;
+ seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
+ seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ seg->start_addr = 0;
+
+ err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
+ NULL, NULL, NULL);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
+ goto err_in;
+ }
+
+ kfree(in);
+ *key = mr.key;
+
+ return 0;
+
+err_in:
+ kfree(in);
+
+ return err;
+}
+
+static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
+{
+ struct mlx5_core_mr mr;
+ int err;
+
+ memset(&mr, 0, sizeof(mr));
+ mr.key = key;
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr);
+ if (err)
+ mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
+}
+
+static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_alloc_pd_resp resp;
+ struct mlx5_ib_pd *pd;
+ int err;
+
+ pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn);
+ if (err) {
+ kfree(pd);
+ return ERR_PTR(err);
+ }
+
+ if (context) {
+ resp.pdn = pd->pdn;
+ if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+ mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ } else {
+ err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
+ if (err) {
+ mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
+ kfree(pd);
+ return ERR_PTR(err);
+ }
+ }
+
+ return &pd->ibpd;
+}
+
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(pd->device);
+ struct mlx5_ib_pd *mpd = to_mpd(pd);
+
+ if (!pd->uobject)
+ free_pa_mkey(mdev, mpd->pa_lkey);
+
+ mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn);
+ kfree(mpd);
+
+ return 0;
+}
+
+static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ int err;
+
+ err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num);
+ if (err)
+ mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
+ ibqp->qp_num, gid->raw);
+
+ return err;
+}
+
+static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ int err;
+
+ err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num);
+ if (err)
+ mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
+ ibqp->qp_num, gid->raw);
+
+ return err;
+}
+
+static int init_node_data(struct mlx5_ib_dev *dev)
+{
+ struct ib_smp *in_mad = NULL;
+ struct ib_smp *out_mad = NULL;
+ int err = -ENOMEM;
+
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
+ if (!in_mad || !out_mad)
+ goto out;
+
+ init_query_mad(in_mad);
+ in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
+
+ err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
+
+ in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
+
+ err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
+ if (err)
+ goto out;
+
+ dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
+ memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
+
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return err;
+}
+
+static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+
+ return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages);
+}
+
+static ssize_t show_reg_pages(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+
+ return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ return sprintf(buf, "MT%d\n", dev->mdev.pdev->device);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev),
+ fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev));
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ return sprintf(buf, "%x\n", dev->mdev.rev_id);
+}
+
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
+ dev->mdev.board_id);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
+static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
+
+static struct device_attribute *mlx5_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver,
+ &dev_attr_hca_type,
+ &dev_attr_board_id,
+ &dev_attr_fw_pages,
+ &dev_attr_reg_pages,
+};
+
+static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+ void *data)
+{
+ struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev);
+ struct ib_event ibev;
+ u8 port = 0;
+
+ switch (event) {
+ case MLX5_DEV_EVENT_SYS_ERROR:
+ ibdev->ib_active = false;
+ ibev.event = IB_EVENT_DEVICE_FATAL;
+ break;
+
+ case MLX5_DEV_EVENT_PORT_UP:
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ port = *(u8 *)data;
+ break;
+
+ case MLX5_DEV_EVENT_PORT_DOWN:
+ ibev.event = IB_EVENT_PORT_ERR;
+ port = *(u8 *)data;
+ break;
+
+ case MLX5_DEV_EVENT_PORT_INITIALIZED:
+ /* not used by ULPs */
+ return;
+
+ case MLX5_DEV_EVENT_LID_CHANGE:
+ ibev.event = IB_EVENT_LID_CHANGE;
+ port = *(u8 *)data;
+ break;
+
+ case MLX5_DEV_EVENT_PKEY_CHANGE:
+ ibev.event = IB_EVENT_PKEY_CHANGE;
+ port = *(u8 *)data;
+ break;
+
+ case MLX5_DEV_EVENT_GUID_CHANGE:
+ ibev.event = IB_EVENT_GID_CHANGE;
+ port = *(u8 *)data;
+ break;
+
+ case MLX5_DEV_EVENT_CLIENT_REREG:
+ ibev.event = IB_EVENT_CLIENT_REREGISTER;
+ port = *(u8 *)data;
+ break;
+ }
+
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = port;
+
+ if (port < 1 || port > ibdev->num_ports) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+ return;
+ }
+
+ if (ibdev->ib_active)
+ ib_dispatch_event(&ibev);
+}
+
+static void get_ext_port_caps(struct mlx5_ib_dev *dev)
+{
+ int port;
+
+ for (port = 1; port <= dev->mdev.caps.num_ports; port++)
+ mlx5_query_ext_port_caps(dev, port);
+}
+
+static int get_port_caps(struct mlx5_ib_dev *dev)
+{
+ struct ib_device_attr *dprops = NULL;
+ struct ib_port_attr *pprops = NULL;
+ int err = 0;
+ int port;
+
+ pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
+ if (!pprops)
+ goto out;
+
+ dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
+ if (!dprops)
+ goto out;
+
+ err = mlx5_ib_query_device(&dev->ib_dev, dprops);
+ if (err) {
+ mlx5_ib_warn(dev, "query_device failed %d\n", err);
+ goto out;
+ }
+
+ for (port = 1; port <= dev->mdev.caps.num_ports; port++) {
+ err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
+ if (err) {
+ mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
+ break;
+ }
+ dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
+ dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+ mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
+ dprops->max_pkeys, pprops->gid_tbl_len);
+ }
+
+out:
+ kfree(pprops);
+ kfree(dprops);
+
+ return err;
+}
+
+static void destroy_umrc_res(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ err = mlx5_mr_cache_cleanup(dev);
+ if (err)
+ mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+
+ mlx5_ib_destroy_qp(dev->umrc.qp);
+ ib_destroy_cq(dev->umrc.cq);
+ ib_dereg_mr(dev->umrc.mr);
+ ib_dealloc_pd(dev->umrc.pd);
+}
+
+enum {
+ MAX_UMR_WR = 128,
+};
+
+static int create_umr_res(struct mlx5_ib_dev *dev)
+{
+ struct ib_qp_init_attr *init_attr = NULL;
+ struct ib_qp_attr *attr = NULL;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ struct ib_mr *mr;
+ int ret;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto error_0;
+ }
+
+ pd = ib_alloc_pd(&dev->ib_dev);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ ret = PTR_ERR(pd);
+ goto error_0;
+ }
+
+ mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(mr)) {
+ mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
+ ret = PTR_ERR(mr);
+ goto error_1;
+ }
+
+ cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128,
+ 0);
+ if (IS_ERR(cq)) {
+ mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+ ret = PTR_ERR(cq);
+ goto error_2;
+ }
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+ init_attr->send_cq = cq;
+ init_attr->recv_cq = cq;
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->cap.max_send_wr = MAX_UMR_WR;
+ init_attr->cap.max_send_sge = 1;
+ init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
+ init_attr->port_num = 1;
+ qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+ if (IS_ERR(qp)) {
+ mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+ ret = PTR_ERR(qp);
+ goto error_3;
+ }
+ qp->device = &dev->ib_dev;
+ qp->real_qp = qp;
+ qp->uobject = NULL;
+ qp->qp_type = MLX5_IB_QPT_REG_UMR;
+
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
+ IB_QP_PORT, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTR;
+ attr->path_mtu = IB_MTU_256;
+
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+ goto error_4;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+ attr->qp_state = IB_QPS_RTS;
+ ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+ goto error_4;
+ }
+
+ dev->umrc.qp = qp;
+ dev->umrc.cq = cq;
+ dev->umrc.mr = mr;
+ dev->umrc.pd = pd;
+
+ sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ ret = mlx5_mr_cache_init(dev);
+ if (ret) {
+ mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+ goto error_4;
+ }
+
+ kfree(attr);
+ kfree(init_attr);
+
+ return 0;
+
+error_4:
+ mlx5_ib_destroy_qp(qp);
+
+error_3:
+ ib_destroy_cq(cq);
+
+error_2:
+ ib_dereg_mr(mr);
+
+error_1:
+ ib_dealloc_pd(pd);
+
+error_0:
+ kfree(attr);
+ kfree(init_attr);
+ return ret;
+}
+
+static int create_dev_resources(struct mlx5_ib_resources *devr)
+{
+ struct ib_srq_init_attr attr;
+ struct mlx5_ib_dev *dev;
+ int ret = 0;
+
+ dev = container_of(devr, struct mlx5_ib_dev, devr);
+
+ devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
+ if (IS_ERR(devr->p0)) {
+ ret = PTR_ERR(devr->p0);
+ goto error0;
+ }
+ devr->p0->device = &dev->ib_dev;
+ devr->p0->uobject = NULL;
+ atomic_set(&devr->p0->usecnt, 0);
+
+ devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
+ if (IS_ERR(devr->c0)) {
+ ret = PTR_ERR(devr->c0);
+ goto error1;
+ }
+ devr->c0->device = &dev->ib_dev;
+ devr->c0->uobject = NULL;
+ devr->c0->comp_handler = NULL;
+ devr->c0->event_handler = NULL;
+ devr->c0->cq_context = NULL;
+ atomic_set(&devr->c0->usecnt, 0);
+
+ devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+ if (IS_ERR(devr->x0)) {
+ ret = PTR_ERR(devr->x0);
+ goto error2;
+ }
+ devr->x0->device = &dev->ib_dev;
+ devr->x0->inode = NULL;
+ atomic_set(&devr->x0->usecnt, 0);
+ mutex_init(&devr->x0->tgt_qp_mutex);
+ INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
+
+ devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
+ if (IS_ERR(devr->x1)) {
+ ret = PTR_ERR(devr->x1);
+ goto error3;
+ }
+ devr->x1->device = &dev->ib_dev;
+ devr->x1->inode = NULL;
+ atomic_set(&devr->x1->usecnt, 0);
+ mutex_init(&devr->x1->tgt_qp_mutex);
+ INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr.max_sge = 1;
+ attr.attr.max_wr = 1;
+ attr.srq_type = IB_SRQT_XRC;
+ attr.ext.xrc.cq = devr->c0;
+ attr.ext.xrc.xrcd = devr->x0;
+
+ devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
+ if (IS_ERR(devr->s0)) {
+ ret = PTR_ERR(devr->s0);
+ goto error4;
+ }
+ devr->s0->device = &dev->ib_dev;
+ devr->s0->pd = devr->p0;
+ devr->s0->uobject = NULL;
+ devr->s0->event_handler = NULL;
+ devr->s0->srq_context = NULL;
+ devr->s0->srq_type = IB_SRQT_XRC;
+ devr->s0->ext.xrc.xrcd = devr->x0;
+ devr->s0->ext.xrc.cq = devr->c0;
+ atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
+ atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
+ atomic_inc(&devr->p0->usecnt);
+ atomic_set(&devr->s0->usecnt, 0);
+
+ return 0;
+
+error4:
+ mlx5_ib_dealloc_xrcd(devr->x1);
+error3:
+ mlx5_ib_dealloc_xrcd(devr->x0);
+error2:
+ mlx5_ib_destroy_cq(devr->c0);
+error1:
+ mlx5_ib_dealloc_pd(devr->p0);
+error0:
+ return ret;
+}
+
+static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+{
+ mlx5_ib_destroy_srq(devr->s0);
+ mlx5_ib_dealloc_xrcd(devr->x0);
+ mlx5_ib_dealloc_xrcd(devr->x1);
+ mlx5_ib_destroy_cq(devr->c0);
+ mlx5_ib_dealloc_pd(devr->p0);
+}
+
+static int init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct mlx5_core_dev *mdev;
+ struct mlx5_ib_dev *dev;
+ int err;
+ int i;
+
+ printk_once(KERN_INFO "%s", mlx5_version);
+
+ dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev)
+ return -ENOMEM;
+
+ mdev = &dev->mdev;
+ mdev->event = mlx5_ib_event;
+ if (prof_sel >= ARRAY_SIZE(profile)) {
+ pr_warn("selected pofile out of range, selceting default\n");
+ prof_sel = 0;
+ }
+ mdev->profile = &profile[prof_sel];
+ err = mlx5_dev_init(mdev, pdev);
+ if (err)
+ goto err_free;
+
+ err = get_port_caps(dev);
+ if (err)
+ goto err_cleanup;
+
+ get_ext_port_caps(dev);
+
+ err = alloc_comp_eqs(dev);
+ if (err)
+ goto err_cleanup;
+
+ MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
+
+ strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.owner = THIS_MODULE;
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey;
+ dev->num_ports = mdev->caps.num_ports;
+ dev->ib_dev.phys_port_cnt = dev->num_ports;
+ dev->ib_dev.num_comp_vectors = dev->num_comp_vectors;
+ dev->ib_dev.dma_device = &mdev->pdev->dev;
+
+ dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
+ dev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+ dev->ib_dev.query_device = mlx5_ib_query_device;
+ dev->ib_dev.query_port = mlx5_ib_query_port;
+ dev->ib_dev.query_gid = mlx5_ib_query_gid;
+ dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
+ dev->ib_dev.modify_device = mlx5_ib_modify_device;
+ dev->ib_dev.modify_port = mlx5_ib_modify_port;
+ dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
+ dev->ib_dev.mmap = mlx5_ib_mmap;
+ dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
+ dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
+ dev->ib_dev.create_ah = mlx5_ib_create_ah;
+ dev->ib_dev.query_ah = mlx5_ib_query_ah;
+ dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
+ dev->ib_dev.create_srq = mlx5_ib_create_srq;
+ dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
+ dev->ib_dev.query_srq = mlx5_ib_query_srq;
+ dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
+ dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
+ dev->ib_dev.create_qp = mlx5_ib_create_qp;
+ dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
+ dev->ib_dev.query_qp = mlx5_ib_query_qp;
+ dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
+ dev->ib_dev.post_send = mlx5_ib_post_send;
+ dev->ib_dev.post_recv = mlx5_ib_post_recv;
+ dev->ib_dev.create_cq = mlx5_ib_create_cq;
+ dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
+ dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
+ dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
+ dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
+ dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
+ dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
+ dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
+ dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
+ dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr;
+ dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
+ dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
+ dev->ib_dev.process_mad = mlx5_ib_process_mad;
+ dev->ib_dev.create_mr = mlx5_ib_create_mr;
+ dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
+ dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
+ dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
+ dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
+
+ if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
+ dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
+ dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
+ dev->ib_dev.uverbs_cmd_mask |=
+ (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+ (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ }
+
+ err = init_node_data(dev);
+ if (err)
+ goto err_eqs;
+
+ mutex_init(&dev->cap_mask_mutex);
+ spin_lock_init(&dev->mr_lock);
+
+ err = create_dev_resources(&dev->devr);
+ if (err)
+ goto err_eqs;
+
+ err = ib_register_device(&dev->ib_dev, NULL);
+ if (err)
+ goto err_rsrc;
+
+ err = create_umr_res(dev);
+ if (err)
+ goto err_dev;
+
+ for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
+ err = device_create_file(&dev->ib_dev.dev,
+ mlx5_class_attributes[i]);
+ if (err)
+ goto err_umrc;
+ }
+
+ dev->ib_active = true;
+
+ return 0;
+
+err_umrc:
+ destroy_umrc_res(dev);
+
+err_dev:
+ ib_unregister_device(&dev->ib_dev);
+
+err_rsrc:
+ destroy_dev_resources(&dev->devr);
+
+err_eqs:
+ free_comp_eqs(dev);
+
+err_cleanup:
+ mlx5_dev_cleanup(mdev);
+
+err_free:
+ ib_dealloc_device((struct ib_device *)dev);
+
+ return err;
+}
+
+static void remove_one(struct pci_dev *pdev)
+{
+ struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev);
+
+ destroy_umrc_res(dev);
+ ib_unregister_device(&dev->ib_dev);
+ destroy_dev_resources(&dev->devr);
+ free_comp_eqs(dev);
+ mlx5_dev_cleanup(&dev->mdev);
+ ib_dealloc_device(&dev->ib_dev);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = {
+ { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table);
+
+static struct pci_driver mlx5_ib_driver = {
+ .name = DRIVER_NAME,
+ .id_table = mlx5_ib_pci_table,
+ .probe = init_one,
+ .remove = remove_one
+};
+
+static int __init mlx5_ib_init(void)
+{
+ return pci_register_driver(&mlx5_ib_driver);
+}
+
+static void __exit mlx5_ib_cleanup(void)
+{
+ pci_unregister_driver(&mlx5_ib_driver);
+}
+
+module_init(mlx5_ib_init);
+module_exit(mlx5_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
new file mode 100644
index 00000000000..8499aec94db
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+
+/* @umem: umem object to scan
+ * @addr: ib virtual address requested by the user
+ * @count: number of PAGE_SIZE pages covered by umem
+ * @shift: page shift for the compound pages found in the region
+ * @ncont: number of compund pages
+ * @order: log2 of the number of compound pages
+ */
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+ int *ncont, int *order)
+{
+ unsigned long tmp;
+ unsigned long m;
+ int i, k;
+ u64 base = 0;
+ int p = 0;
+ int skip;
+ int mask;
+ u64 len;
+ u64 pfn;
+ struct scatterlist *sg;
+ int entry;
+
+ addr = addr >> PAGE_SHIFT;
+ tmp = (unsigned long)addr;
+ m = find_first_bit(&tmp, sizeof(tmp));
+ skip = 1 << m;
+ mask = skip - 1;
+ i = 0;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+ for (k = 0; k < len; k++) {
+ if (!(i & mask)) {
+ tmp = (unsigned long)pfn;
+ m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+ skip = 1 << m;
+ mask = skip - 1;
+ base = pfn;
+ p = 0;
+ } else {
+ if (base + p != pfn) {
+ tmp = (unsigned long)p;
+ m = find_first_bit(&tmp, sizeof(tmp));
+ skip = 1 << m;
+ mask = skip - 1;
+ base = pfn;
+ p = 0;
+ }
+ }
+ p++;
+ i++;
+ }
+ }
+
+ if (i) {
+ m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
+
+ if (order)
+ *order = ilog2(roundup_pow_of_two(i) >> m);
+
+ *ncont = DIV_ROUND_UP(i, (1 << m));
+ } else {
+ m = 0;
+
+ if (order)
+ *order = 0;
+
+ *ncont = 0;
+ }
+ *shift = PAGE_SHIFT + m;
+ *count = i;
+}
+
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ int page_shift, __be64 *pas, int umr)
+{
+ int shift = page_shift - PAGE_SHIFT;
+ int mask = (1 << shift) - 1;
+ int i, k;
+ u64 cur = 0;
+ u64 base;
+ int len;
+ struct scatterlist *sg;
+ int entry;
+
+ i = 0;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ base = sg_dma_address(sg);
+ for (k = 0; k < len; k++) {
+ if (!(i & mask)) {
+ cur = base + (k << PAGE_SHIFT);
+ if (umr)
+ cur |= 3;
+
+ pas[i >> shift] = cpu_to_be64(cur);
+ mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
+ i >> shift, be64_to_cpu(pas[i >> shift]));
+ } else
+ mlx5_ib_dbg(dev, "=====> 0x%llx\n",
+ base + (k << PAGE_SHIFT));
+ i++;
+ }
+ }
+}
+
+int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
+{
+ u64 page_size;
+ u64 page_mask;
+ u64 off_size;
+ u64 off_mask;
+ u64 buf_off;
+
+ page_size = 1 << page_shift;
+ page_mask = page_size - 1;
+ buf_off = addr & page_mask;
+ off_size = page_size >> 6;
+ off_mask = off_size - 1;
+
+ if (buf_off & off_mask)
+ return -EINVAL;
+
+ *offset = buf_off >> ilog2(off_size);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
new file mode 100644
index 00000000000..f2ccf1a5a29
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -0,0 +1,574 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_H
+#define MLX5_IB_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/types.h>
+
+#define mlx5_ib_dbg(dev, format, arg...) \
+pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
+ __LINE__, current->pid, ##arg)
+
+#define mlx5_ib_err(dev, format, arg...) \
+pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
+ __LINE__, current->pid, ##arg)
+
+#define mlx5_ib_warn(dev, format, arg...) \
+pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
+ __LINE__, current->pid, ##arg)
+
+enum {
+ MLX5_IB_MMAP_CMD_SHIFT = 8,
+ MLX5_IB_MMAP_CMD_MASK = 0xff,
+};
+
+enum mlx5_ib_mmap_cmd {
+ MLX5_IB_MMAP_REGULAR_PAGE = 0,
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
+};
+
+enum {
+ MLX5_RES_SCAT_DATA32_CQE = 0x1,
+ MLX5_RES_SCAT_DATA64_CQE = 0x2,
+ MLX5_REQ_SCAT_DATA32_CQE = 0x11,
+ MLX5_REQ_SCAT_DATA64_CQE = 0x22,
+};
+
+enum mlx5_ib_latency_class {
+ MLX5_IB_LATENCY_CLASS_LOW,
+ MLX5_IB_LATENCY_CLASS_MEDIUM,
+ MLX5_IB_LATENCY_CLASS_HIGH,
+ MLX5_IB_LATENCY_CLASS_FAST_PATH
+};
+
+enum mlx5_ib_mad_ifc_flags {
+ MLX5_MAD_IFC_IGNORE_MKEY = 1,
+ MLX5_MAD_IFC_IGNORE_BKEY = 2,
+ MLX5_MAD_IFC_NET_VIEW = 4,
+};
+
+struct mlx5_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ struct list_head db_page_list;
+
+ /* protect doorbell record alloc/free
+ */
+ struct mutex db_page_mutex;
+ struct mlx5_uuar_info uuari;
+};
+
+static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext);
+}
+
+struct mlx5_ib_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+ u32 pa_lkey;
+};
+
+/* Use macros here so that don't have to duplicate
+ * enum ib_send_flags and enum ib_qp_type for low-level driver
+ */
+
+#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
+#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
+#define MLX5_IB_WR_UMR IB_WR_RESERVED1
+
+struct wr_list {
+ u16 opcode;
+ u16 next;
+};
+
+struct mlx5_ib_wq {
+ u64 *wrid;
+ u32 *wr_data;
+ struct wr_list *w_list;
+ unsigned *wqe_head;
+ u16 unsig_count;
+
+ /* serialize post to the work queue
+ */
+ spinlock_t lock;
+ int wqe_cnt;
+ int max_post;
+ int max_gs;
+ int offset;
+ int wqe_shift;
+ unsigned head;
+ unsigned tail;
+ u16 cur_post;
+ u16 last_poll;
+ void *qend;
+};
+
+enum {
+ MLX5_QP_USER,
+ MLX5_QP_KERNEL,
+ MLX5_QP_EMPTY
+};
+
+struct mlx5_ib_qp {
+ struct ib_qp ibqp;
+ struct mlx5_core_qp mqp;
+ struct mlx5_buf buf;
+
+ struct mlx5_db db;
+ struct mlx5_ib_wq rq;
+
+ u32 doorbell_qpn;
+ u8 sq_signal_bits;
+ u8 fm_cache;
+ int sq_max_wqes_per_wr;
+ int sq_spare_wqes;
+ struct mlx5_ib_wq sq;
+
+ struct ib_umem *umem;
+ int buf_size;
+
+ /* serialize qp state modifications
+ */
+ struct mutex mutex;
+ u16 xrcdn;
+ u32 flags;
+ u8 port;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+ u8 state;
+ int mlx_type;
+ int wq_sig;
+ int scat_cqe;
+ int max_inline_data;
+ struct mlx5_bf *bf;
+ int has_rq;
+
+ /* only for user space QPs. For kernel
+ * we have it from the bf object
+ */
+ int uuarn;
+
+ int create_type;
+ u32 pa_lkey;
+
+ /* Store signature errors */
+ bool signature_en;
+};
+
+struct mlx5_ib_cq_buf {
+ struct mlx5_buf buf;
+ struct ib_umem *umem;
+ int cqe_size;
+ int nent;
+};
+
+enum mlx5_ib_qp_flags {
+ MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
+ MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
+};
+
+struct mlx5_shared_mr_info {
+ int mr_id;
+ struct ib_umem *umem;
+};
+
+struct mlx5_ib_cq {
+ struct ib_cq ibcq;
+ struct mlx5_core_cq mcq;
+ struct mlx5_ib_cq_buf buf;
+ struct mlx5_db db;
+
+ /* serialize access to the CQ
+ */
+ spinlock_t lock;
+
+ /* protect resize cq
+ */
+ struct mutex resize_mutex;
+ struct mlx5_ib_cq_buf *resize_buf;
+ struct ib_umem *resize_umem;
+ int cqe_size;
+};
+
+struct mlx5_ib_srq {
+ struct ib_srq ibsrq;
+ struct mlx5_core_srq msrq;
+ struct mlx5_buf buf;
+ struct mlx5_db db;
+ u64 *wrid;
+ /* protect SRQ hanlding
+ */
+ spinlock_t lock;
+ int head;
+ int tail;
+ u16 wqe_ctr;
+ struct ib_umem *umem;
+ /* serialize arming a SRQ
+ */
+ struct mutex mutex;
+ int wq_sig;
+};
+
+struct mlx5_ib_xrcd {
+ struct ib_xrcd ibxrcd;
+ u32 xrcdn;
+};
+
+struct mlx5_ib_mr {
+ struct ib_mr ibmr;
+ struct mlx5_core_mr mmr;
+ struct ib_umem *umem;
+ struct mlx5_shared_mr_info *smr_info;
+ struct list_head list;
+ int order;
+ int umred;
+ __be64 *pas;
+ dma_addr_t dma;
+ int npages;
+ struct mlx5_ib_dev *dev;
+ struct mlx5_create_mkey_mbox_out out;
+ struct mlx5_core_sig_ctx *sig;
+};
+
+struct mlx5_ib_fast_reg_page_list {
+ struct ib_fast_reg_page_list ibfrpl;
+ __be64 *mapped_page_list;
+ dma_addr_t map;
+};
+
+struct mlx5_ib_umr_context {
+ enum ib_wc_status status;
+ struct completion done;
+};
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+ context->status = -1;
+ init_completion(&context->done);
+}
+
+struct umr_common {
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ struct ib_mr *mr;
+ /* control access to UMR QP
+ */
+ struct semaphore sem;
+};
+
+enum {
+ MLX5_FMR_INVALID,
+ MLX5_FMR_VALID,
+ MLX5_FMR_BUSY,
+};
+
+struct mlx5_ib_fmr {
+ struct ib_fmr ibfmr;
+ struct mlx5_core_mr mr;
+ int access_flags;
+ int state;
+ /* protect fmr state
+ */
+ spinlock_t lock;
+ u64 wrid;
+ struct ib_send_wr wr[2];
+ u8 page_shift;
+ struct ib_fast_reg_page_list page_list;
+};
+
+struct mlx5_cache_ent {
+ struct list_head head;
+ /* sync access to the cahce entry
+ */
+ spinlock_t lock;
+
+
+ struct dentry *dir;
+ char name[4];
+ u32 order;
+ u32 size;
+ u32 cur;
+ u32 miss;
+ u32 limit;
+
+ struct dentry *fsize;
+ struct dentry *fcur;
+ struct dentry *fmiss;
+ struct dentry *flimit;
+
+ struct mlx5_ib_dev *dev;
+ struct work_struct work;
+ struct delayed_work dwork;
+ int pending;
+};
+
+struct mlx5_mr_cache {
+ struct workqueue_struct *wq;
+ struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
+ int stopped;
+ struct dentry *root;
+ unsigned long last_add;
+};
+
+struct mlx5_ib_resources {
+ struct ib_cq *c0;
+ struct ib_xrcd *x0;
+ struct ib_xrcd *x1;
+ struct ib_pd *p0;
+ struct ib_srq *s0;
+};
+
+struct mlx5_ib_dev {
+ struct ib_device ib_dev;
+ struct mlx5_core_dev mdev;
+ MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
+ struct list_head eqs_list;
+ int num_ports;
+ int num_comp_vectors;
+ /* serialize update of capability mask
+ */
+ struct mutex cap_mask_mutex;
+ bool ib_active;
+ struct umr_common umrc;
+ /* sync used page count stats
+ */
+ spinlock_t mr_lock;
+ struct mlx5_ib_resources devr;
+ struct mlx5_mr_cache cache;
+ struct timer_list delay_timer;
+ int fill_delay;
+};
+
+static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
+{
+ return container_of(mcq, struct mlx5_ib_cq, mcq);
+}
+
+static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd);
+}
+
+static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
+}
+
+static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
+}
+
+static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct mlx5_ib_cq, ibcq);
+}
+
+static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
+{
+ return container_of(mqp, struct mlx5_ib_qp, mqp);
+}
+
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+{
+ return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
+static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct mlx5_ib_pd, ibpd);
+}
+
+static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct mlx5_ib_srq, ibsrq);
+}
+
+static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct mlx5_ib_qp, ibqp);
+}
+
+static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
+{
+ return container_of(msrq, struct mlx5_ib_srq, msrq);
+}
+
+static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct mlx5_ib_mr, ibmr);
+}
+
+static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
+{
+ return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
+}
+
+struct mlx5_ib_ah {
+ struct ib_ah ibah;
+ struct mlx5_av av;
+};
+
+static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct mlx5_ib_ah, ibah);
+}
+
+static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev)
+{
+ return container_of(dev, struct mlx5_ib_dev, mdev);
+}
+
+static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev)
+{
+ return mlx5_core2ibdev(pci2mlx5_core_dev(pdev));
+}
+
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+ struct mlx5_db *db);
+void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
+void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
+void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
+void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
+int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
+ int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
+ void *in_mad, void *response_mad);
+struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
+ struct mlx5_ib_ah *ah);
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int mlx5_ib_destroy_ah(struct ib_ah *ah);
+struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
+int mlx5_ib_destroy_srq(struct ib_srq *srq);
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int mlx5_ib_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+int mlx5_ib_destroy_cq(struct ib_cq *cq);
+int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr);
+struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len);
+struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len);
+void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
+struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
+ struct ib_fmr_attr *fmr_attr);
+int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int npages, u64 iova);
+int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
+int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
+int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
+int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
+void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+ int *ncont, int *order);
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ int page_shift, __be64 *pas, int umr);
+void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
+int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
+int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
+int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+ struct ib_mr_status *mr_status);
+
+static inline void init_query_mad(struct ib_smp *mad)
+{
+ mad->base_version = 1;
+ mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ mad->class_version = 1;
+ mad->method = IB_MGMT_METHOD_GET;
+}
+
+static inline u8 convert_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
+ MLX5_PERM_LOCAL_READ;
+}
+
+#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
new file mode 100644
index 00000000000..afa873bd028
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -0,0 +1,1250 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+
+#include <linux/kref.h>
+#include <linux/random.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+
+enum {
+ MAX_PENDING_REG_MR = 8,
+};
+
+enum {
+ MLX5_UMR_ALIGN = 2048
+};
+
+static __be64 *mr_align(__be64 *ptr, int align)
+{
+ unsigned long mask = align - 1;
+
+ return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
+}
+
+static int order2idx(struct mlx5_ib_dev *dev, int order)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+
+ if (order < cache->ent[0].order)
+ return 0;
+ else
+ return order - cache->ent[0].order;
+}
+
+static void reg_mr_callback(int status, void *context)
+{
+ struct mlx5_ib_mr *mr = context;
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct mlx5_mr_cache *cache = &dev->cache;
+ int c = order2idx(dev, mr->order);
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ u8 key;
+ unsigned long flags;
+ struct mlx5_mr_table *table = &dev->mdev.priv.mr_table;
+ int err;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ ent->pending--;
+ spin_unlock_irqrestore(&ent->lock, flags);
+ if (status) {
+ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ if (mr->out.hdr.status) {
+ mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
+ mr->out.hdr.status,
+ be32_to_cpu(mr->out.hdr.syndrome));
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
+ key = dev->mdev.priv.mkey_key++;
+ spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
+ mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+
+ cache->last_add = jiffies;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ list_add_tail(&mr->list, &ent->head);
+ ent->cur++;
+ ent->size++;
+ spin_unlock_irqrestore(&ent->lock, flags);
+
+ write_lock_irqsave(&table->lock, flags);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
+ &mr->mmr);
+ if (err)
+ pr_err("Error inserting to mr tree. 0x%x\n", -err);
+ write_unlock_irqrestore(&table->lock, flags);
+}
+
+static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_ib_mr *mr;
+ int npages = 1 << ent->order;
+ int err = 0;
+ int i;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ for (i = 0; i < num; i++) {
+ if (ent->pending >= MAX_PENDING_REG_MR) {
+ err = -EAGAIN;
+ break;
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ err = -ENOMEM;
+ break;
+ }
+ mr->order = ent->order;
+ mr->umred = 1;
+ mr->dev = dev;
+ in->seg.status = 1 << 6;
+ in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
+ in->seg.log2_page_size = 12;
+
+ spin_lock_irq(&ent->lock);
+ ent->pending++;
+ spin_unlock_irq(&ent->lock);
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
+ sizeof(*in), reg_mr_callback,
+ mr, &mr->out);
+ if (err) {
+ mlx5_ib_warn(dev, "create mkey failed %d\n", err);
+ kfree(mr);
+ break;
+ }
+ }
+
+ kfree(in);
+ return err;
+}
+
+static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ struct mlx5_ib_mr *mr;
+ int err;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+ spin_unlock_irq(&ent->lock);
+ return;
+ }
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ list_del(&mr->list);
+ ent->cur--;
+ ent->size--;
+ spin_unlock_irq(&ent->lock);
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err)
+ mlx5_ib_warn(dev, "failed destroy mkey\n");
+ else
+ kfree(mr);
+ }
+}
+
+static ssize_t size_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_cache_ent *ent = filp->private_data;
+ struct mlx5_ib_dev *dev = ent->dev;
+ char lbuf[20];
+ u32 var;
+ int err;
+ int c;
+
+ if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+ return -EFAULT;
+
+ c = order2idx(dev, ent->order);
+ lbuf[sizeof(lbuf) - 1] = 0;
+
+ if (sscanf(lbuf, "%u", &var) != 1)
+ return -EINVAL;
+
+ if (var < ent->limit)
+ return -EINVAL;
+
+ if (var > ent->size) {
+ do {
+ err = add_keys(dev, c, var - ent->size);
+ if (err && err != -EAGAIN)
+ return err;
+
+ usleep_range(3000, 5000);
+ } while (err);
+ } else if (var < ent->size) {
+ remove_keys(dev, c, ent->size - var);
+ }
+
+ return count;
+}
+
+static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cache_ent *ent = filp->private_data;
+ char lbuf[20];
+ int err;
+
+ if (*pos)
+ return 0;
+
+ err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(buf, lbuf, err))
+ return -EFAULT;
+
+ *pos += err;
+
+ return err;
+}
+
+static const struct file_operations size_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = size_write,
+ .read = size_read,
+};
+
+static ssize_t limit_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_cache_ent *ent = filp->private_data;
+ struct mlx5_ib_dev *dev = ent->dev;
+ char lbuf[20];
+ u32 var;
+ int err;
+ int c;
+
+ if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+ return -EFAULT;
+
+ c = order2idx(dev, ent->order);
+ lbuf[sizeof(lbuf) - 1] = 0;
+
+ if (sscanf(lbuf, "%u", &var) != 1)
+ return -EINVAL;
+
+ if (var > ent->size)
+ return -EINVAL;
+
+ ent->limit = var;
+
+ if (ent->cur < ent->limit) {
+ err = add_keys(dev, c, 2 * ent->limit - ent->cur);
+ if (err)
+ return err;
+ }
+
+ return count;
+}
+
+static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_cache_ent *ent = filp->private_data;
+ char lbuf[20];
+ int err;
+
+ if (*pos)
+ return 0;
+
+ err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(buf, lbuf, err))
+ return -EFAULT;
+
+ *pos += err;
+
+ return err;
+}
+
+static const struct file_operations limit_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = limit_write,
+ .read = limit_read,
+};
+
+static int someone_adding(struct mlx5_mr_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ if (cache->ent[i].cur < cache->ent[i].limit)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void __cache_work_func(struct mlx5_cache_ent *ent)
+{
+ struct mlx5_ib_dev *dev = ent->dev;
+ struct mlx5_mr_cache *cache = &dev->cache;
+ int i = order2idx(dev, ent->order);
+ int err;
+
+ if (cache->stopped)
+ return;
+
+ ent = &dev->cache.ent[i];
+ if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
+ err = add_keys(dev, i, 1);
+ if (ent->cur < 2 * ent->limit) {
+ if (err == -EAGAIN) {
+ mlx5_ib_dbg(dev, "returned eagain, order %d\n",
+ i + 2);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(3));
+ } else if (err) {
+ mlx5_ib_warn(dev, "command failed order %d, err %d\n",
+ i + 2, err);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(1000));
+ } else {
+ queue_work(cache->wq, &ent->work);
+ }
+ }
+ } else if (ent->cur > 2 * ent->limit) {
+ if (!someone_adding(cache) &&
+ time_after(jiffies, cache->last_add + 300 * HZ)) {
+ remove_keys(dev, i, 1);
+ if (ent->cur > ent->limit)
+ queue_work(cache->wq, &ent->work);
+ } else {
+ queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+ }
+ }
+}
+
+static void delayed_cache_work_func(struct work_struct *work)
+{
+ struct mlx5_cache_ent *ent;
+
+ ent = container_of(work, struct mlx5_cache_ent, dwork.work);
+ __cache_work_func(ent);
+}
+
+static void cache_work_func(struct work_struct *work)
+{
+ struct mlx5_cache_ent *ent;
+
+ ent = container_of(work, struct mlx5_cache_ent, work);
+ __cache_work_func(ent);
+}
+
+static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_ib_mr *mr = NULL;
+ struct mlx5_cache_ent *ent;
+ int c;
+ int i;
+
+ c = order2idx(dev, order);
+ if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+ mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
+ return NULL;
+ }
+
+ for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+ ent = &cache->ent[i];
+
+ mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+
+ spin_lock_irq(&ent->lock);
+ if (!list_empty(&ent->head)) {
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+ list);
+ list_del(&mr->list);
+ ent->cur--;
+ spin_unlock_irq(&ent->lock);
+ if (ent->cur < ent->limit)
+ queue_work(cache->wq, &ent->work);
+ break;
+ }
+ spin_unlock_irq(&ent->lock);
+
+ queue_work(cache->wq, &ent->work);
+
+ if (mr)
+ break;
+ }
+
+ if (!mr)
+ cache->ent[c].miss++;
+
+ return mr;
+}
+
+static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int shrink = 0;
+ int c;
+
+ c = order2idx(dev, mr->order);
+ if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+ mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
+ return;
+ }
+ ent = &cache->ent[c];
+ spin_lock_irq(&ent->lock);
+ list_add_tail(&mr->list, &ent->head);
+ ent->cur++;
+ if (ent->cur > 2 * ent->limit)
+ shrink = 1;
+ spin_unlock_irq(&ent->lock);
+
+ if (shrink)
+ queue_work(cache->wq, &ent->work);
+}
+
+static void clean_keys(struct mlx5_ib_dev *dev, int c)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ cancel_delayed_work(&ent->dwork);
+ while (1) {
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+ spin_unlock_irq(&ent->lock);
+ return;
+ }
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ list_del(&mr->list);
+ ent->cur--;
+ ent->size--;
+ spin_unlock_irq(&ent->lock);
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err)
+ mlx5_ib_warn(dev, "failed destroy mkey\n");
+ else
+ kfree(mr);
+ }
+}
+
+static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int i;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
+ if (!cache->root)
+ return -ENOMEM;
+
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ ent = &cache->ent[i];
+ sprintf(ent->name, "%d", ent->order);
+ ent->dir = debugfs_create_dir(ent->name, cache->root);
+ if (!ent->dir)
+ return -ENOMEM;
+
+ ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
+ &size_fops);
+ if (!ent->fsize)
+ return -ENOMEM;
+
+ ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
+ &limit_fops);
+ if (!ent->flimit)
+ return -ENOMEM;
+
+ ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
+ &ent->cur);
+ if (!ent->fcur)
+ return -ENOMEM;
+
+ ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
+ &ent->miss);
+ if (!ent->fmiss)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_debugfs_root)
+ return;
+
+ debugfs_remove_recursive(dev->cache.root);
+}
+
+static void delay_time_func(unsigned long ctx)
+{
+ struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
+
+ dev->fill_delay = 0;
+}
+
+int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int limit;
+ int err;
+ int i;
+
+ cache->wq = create_singlethread_workqueue("mkey_cache");
+ if (!cache->wq) {
+ mlx5_ib_warn(dev, "failed to create work queue\n");
+ return -ENOMEM;
+ }
+
+ setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ INIT_LIST_HEAD(&cache->ent[i].head);
+ spin_lock_init(&cache->ent[i].lock);
+
+ ent = &cache->ent[i];
+ INIT_LIST_HEAD(&ent->head);
+ spin_lock_init(&ent->lock);
+ ent->order = i + 2;
+ ent->dev = dev;
+
+ if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
+ limit = dev->mdev.profile->mr_cache[i].limit;
+ else
+ limit = 0;
+
+ INIT_WORK(&ent->work, cache_work_func);
+ INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+ ent->limit = limit;
+ queue_work(cache->wq, &ent->work);
+ }
+
+ err = mlx5_mr_cache_debugfs_init(dev);
+ if (err)
+ mlx5_ib_warn(dev, "cache debugfs failure\n");
+
+ return 0;
+}
+
+int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+{
+ int i;
+
+ dev->cache.stopped = 1;
+ flush_workqueue(dev->cache.wq);
+
+ mlx5_mr_cache_debugfs_cleanup(dev);
+
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
+ clean_keys(dev, i);
+
+ destroy_workqueue(dev->cache.wq);
+ del_timer_sync(&dev->delay_timer);
+
+ return 0;
+}
+
+struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_core_dev *mdev = &dev->mdev;
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_mkey_seg *seg;
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ seg = &in->seg;
+ seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
+ seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ seg->start_addr = 0;
+
+ err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+ NULL);
+ if (err)
+ goto err_in;
+
+ kfree(in);
+ mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.rkey = mr->mmr.key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_in:
+ kfree(in);
+
+err_free:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+static int get_octo_len(u64 addr, u64 len, int page_size)
+{
+ u64 offset;
+ int npages;
+
+ offset = addr & (page_size - 1);
+ npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
+ return (npages + 1) / 2;
+}
+
+static int use_umr(int order)
+{
+ return order <= 17;
+}
+
+static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
+ struct ib_sge *sg, u64 dma, int n, u32 key,
+ int page_shift, u64 virt_addr, u64 len,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_mr *mr = dev->umrc.mr;
+
+ sg->addr = dma;
+ sg->length = ALIGN(sizeof(u64) * n, 64);
+ sg->lkey = mr->lkey;
+
+ wr->next = NULL;
+ wr->send_flags = 0;
+ wr->sg_list = sg;
+ if (n)
+ wr->num_sge = 1;
+ else
+ wr->num_sge = 0;
+
+ wr->opcode = MLX5_IB_WR_UMR;
+ wr->wr.fast_reg.page_list_len = n;
+ wr->wr.fast_reg.page_shift = page_shift;
+ wr->wr.fast_reg.rkey = key;
+ wr->wr.fast_reg.iova_start = virt_addr;
+ wr->wr.fast_reg.length = len;
+ wr->wr.fast_reg.access_flags = access_flags;
+ wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+}
+
+static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
+ struct ib_send_wr *wr, u32 key)
+{
+ wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+ wr->opcode = MLX5_IB_WR_UMR;
+ wr->wr.fast_reg.rkey = key;
+}
+
+void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
+{
+ struct mlx5_ib_umr_context *context;
+ struct ib_wc wc;
+ int err;
+
+ while (1) {
+ err = ib_poll_cq(cq, 1, &wc);
+ if (err < 0) {
+ pr_warn("poll cq error %d\n", err);
+ return;
+ }
+ if (err == 0)
+ break;
+
+ context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
+ context->status = wc.status;
+ complete(&context->done);
+ }
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+}
+
+static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
+ u64 virt_addr, u64 len, int npages,
+ int page_shift, int order, int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct device *ddev = dev->ib_dev.dma_device;
+ struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
+ struct ib_send_wr wr, *bad;
+ struct mlx5_ib_mr *mr;
+ struct ib_sge sg;
+ int size = sizeof(u64) * npages;
+ int err = 0;
+ int i;
+
+ for (i = 0; i < 1; i++) {
+ mr = alloc_cached_mr(dev, order);
+ if (mr)
+ break;
+
+ err = add_keys(dev, order2idx(dev, order), 1);
+ if (err && err != -EAGAIN) {
+ mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
+ break;
+ }
+ }
+
+ if (!mr)
+ return ERR_PTR(-EAGAIN);
+
+ mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+ if (!mr->pas) {
+ err = -ENOMEM;
+ goto free_mr;
+ }
+
+ mlx5_ib_populate_pas(dev, umem, page_shift,
+ mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+
+ mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->dma)) {
+ err = -ENOMEM;
+ goto free_pas;
+ }
+
+ memset(&wr, 0, sizeof(wr));
+ wr.wr_id = (u64)(unsigned long)&umr_context;
+ prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
+
+ mlx5_ib_init_umr_context(&umr_context);
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, &wr, &bad);
+ if (err) {
+ mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+ goto unmap_dma;
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed\n");
+ err = -EFAULT;
+ }
+ }
+
+ mr->mmr.iova = virt_addr;
+ mr->mmr.size = len;
+ mr->mmr.pd = to_mpd(pd)->pdn;
+
+unmap_dma:
+ up(&umrc->sem);
+ dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+
+free_pas:
+ kfree(mr->pas);
+
+free_mr:
+ if (err) {
+ free_cached_mr(dev, mr);
+ return ERR_PTR(err);
+ }
+
+ return mr;
+}
+
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
+ u64 length, struct ib_umem *umem,
+ int npages, int page_shift,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_ib_mr *mr;
+ int inlen;
+ int err;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_1;
+ }
+ mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+
+ in->seg.flags = convert_access(access_flags) |
+ MLX5_ACCESS_MODE_MTT;
+ in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+ in->seg.start_addr = cpu_to_be64(virt_addr);
+ in->seg.len = cpu_to_be64(length);
+ in->seg.bsfs_octo_size = 0;
+ in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
+ in->seg.log2_page_size = page_shift;
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
+ 1 << page_shift));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
+ NULL, NULL);
+ if (err) {
+ mlx5_ib_warn(dev, "create mkey failed\n");
+ goto err_2;
+ }
+ mr->umem = umem;
+ mlx5_vfree(in);
+
+ mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
+
+ return mr;
+
+err_2:
+ mlx5_vfree(in);
+
+err_1:
+ kfree(mr);
+
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr = NULL;
+ struct ib_umem *umem;
+ int page_shift;
+ int npages;
+ int ncont;
+ int order;
+ int err;
+
+ mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
+ start, virt_addr, length);
+ umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
+ 0);
+ if (IS_ERR(umem)) {
+ mlx5_ib_dbg(dev, "umem get failed\n");
+ return (void *)umem;
+ }
+
+ mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
+ if (!npages) {
+ mlx5_ib_warn(dev, "avoid zero region\n");
+ err = -EINVAL;
+ goto error;
+ }
+
+ mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
+ npages, ncont, order, page_shift);
+
+ if (use_umr(order)) {
+ mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
+ order, access_flags);
+ if (PTR_ERR(mr) == -EAGAIN) {
+ mlx5_ib_dbg(dev, "cache empty for order %d", order);
+ mr = NULL;
+ }
+ }
+
+ if (!mr)
+ mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
+ access_flags);
+
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ goto error;
+ }
+
+ mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
+
+ mr->umem = umem;
+ mr->npages = npages;
+ spin_lock(&dev->mr_lock);
+ dev->mdev.priv.reg_pages += npages;
+ spin_unlock(&dev->mr_lock);
+ mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.rkey = mr->mmr.key;
+
+ return &mr->ibmr;
+
+error:
+ ib_umem_release(umem);
+ return ERR_PTR(err);
+}
+
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
+ struct ib_send_wr wr, *bad;
+ int err;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.wr_id = (u64)(unsigned long)&umr_context;
+ prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+
+ mlx5_ib_init_umr_context(&umr_context);
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, &wr, &bad);
+ if (err) {
+ up(&umrc->sem);
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ goto error;
+ } else {
+ wait_for_completion(&umr_context.done);
+ up(&umrc->sem);
+ }
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "unreg umr failed\n");
+ err = -EFAULT;
+ goto error;
+ }
+ return 0;
+
+error:
+ return err;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct ib_umem *umem = mr->umem;
+ int npages = mr->npages;
+ int umred = mr->umred;
+ int err;
+
+ if (!umred) {
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
+ mr->mmr.key, err);
+ return err;
+ }
+ } else {
+ err = unreg_umr(dev, mr);
+ if (err) {
+ mlx5_ib_warn(dev, "failed unregister\n");
+ return err;
+ }
+ free_cached_mr(dev, mr);
+ }
+
+ if (umem) {
+ ib_umem_release(umem);
+ spin_lock(&dev->mr_lock);
+ dev->mdev.priv.reg_pages -= npages;
+ spin_unlock(&dev->mr_lock);
+ }
+
+ if (!umred)
+ kfree(mr);
+
+ return 0;
+}
+
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_ib_mr *mr;
+ int access_mode, err;
+ int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ in->seg.status = 1 << 6; /* free */
+ in->seg.xlt_oct_size = cpu_to_be32(ndescs);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+ access_mode = MLX5_ACCESS_MODE_MTT;
+
+ if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
+ u32 psv_index[2];
+
+ in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
+ MLX5_MKEY_BSF_EN);
+ in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig) {
+ err = -ENOMEM;
+ goto err_free_in;
+ }
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn,
+ 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ access_mode = MLX5_ACCESS_MODE_KLM;
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ }
+
+ in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in),
+ NULL, NULL, NULL);
+ if (err)
+ goto err_destroy_psv;
+
+ mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.rkey = mr->mmr.key;
+ mr->umem = NULL;
+ kfree(in);
+
+ return &mr->ibmr;
+
+err_destroy_psv:
+ if (mr->sig) {
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+ }
+err_free_sig:
+ kfree(mr->sig);
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ int err;
+
+ if (mr->sig) {
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+ kfree(mr->sig);
+ }
+
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
+ mr->mmr.key, err);
+ return err;
+ }
+
+ kfree(mr);
+
+ return err;
+}
+
+struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
+ int max_page_list_len)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ in->seg.status = 1 << 6; /* free */
+ in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+ in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+ /*
+ * TBD not needed - issue 197292 */
+ in->seg.log2_page_size = PAGE_SHIFT;
+
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
+ NULL, NULL);
+ kfree(in);
+ if (err)
+ goto err_free;
+
+ mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.rkey = mr->mmr.key;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
+ int page_list_len)
+{
+ struct mlx5_ib_fast_reg_page_list *mfrpl;
+ int size = page_list_len * sizeof(u64);
+
+ mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
+ if (!mfrpl)
+ return ERR_PTR(-ENOMEM);
+
+ mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
+ if (!mfrpl->ibfrpl.page_list)
+ goto err_free;
+
+ mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
+ size, &mfrpl->map,
+ GFP_KERNEL);
+ if (!mfrpl->mapped_page_list)
+ goto err_free;
+
+ WARN_ON(mfrpl->map & 0x3f);
+
+ return &mfrpl->ibfrpl;
+
+err_free:
+ kfree(mfrpl->ibfrpl.page_list);
+ kfree(mfrpl);
+ return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
+{
+ struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
+ struct mlx5_ib_dev *dev = to_mdev(page_list->device);
+ int size = page_list->max_page_list_len * sizeof(u64);
+
+ dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
+ mfrpl->map);
+ kfree(mfrpl->ibfrpl.page_list);
+ kfree(mfrpl);
+}
+
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+ struct ib_mr_status *mr_status)
+{
+ struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+ int ret = 0;
+
+ if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
+ pr_err("Invalid status check mask\n");
+ ret = -EINVAL;
+ goto done;
+ }
+
+ mr_status->fail_status = 0;
+ if (check_mask & IB_MR_CHECK_SIG_STATUS) {
+ if (!mmr->sig) {
+ ret = -EINVAL;
+ pr_err("signature status check requested on a non-signature enabled MR\n");
+ goto done;
+ }
+
+ mmr->sig->sig_status_checked = true;
+ if (!mmr->sig->sig_err_exists)
+ goto done;
+
+ if (ibmr->lkey == mmr->sig->err_item.key)
+ memcpy(&mr_status->sig_err, &mmr->sig->err_item,
+ sizeof(mr_status->sig_err));
+ else {
+ mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
+ mr_status->sig_err.sig_err_offset = 0;
+ mr_status->sig_err.key = mmr->sig->err_item.key;
+ }
+
+ mmr->sig->sig_err_exists = false;
+ mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
+ }
+
+done:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
new file mode 100644
index 00000000000..bbbcf389272
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -0,0 +1,3048 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <rdma/ib_umem.h>
+#include "mlx5_ib.h"
+#include "user.h"
+
+/* not supported currently */
+static int wq_signature;
+
+enum {
+ MLX5_IB_ACK_REQ_FREQ = 8,
+};
+
+enum {
+ MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83,
+ MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
+ MLX5_IB_LINK_TYPE_IB = 0,
+ MLX5_IB_LINK_TYPE_ETH = 1
+};
+
+enum {
+ MLX5_IB_SQ_STRIDE = 6,
+ MLX5_IB_CACHE_LINE_SIZE = 64,
+};
+
+static const u32 mlx5_ib_opcode[] = {
+ [IB_WR_SEND] = MLX5_OPCODE_SEND,
+ [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
+ [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
+ [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
+ [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
+ [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
+ [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
+ [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
+ [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
+ [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
+};
+
+struct umr_wr {
+ u64 virt_addr;
+ struct ib_pd *pd;
+ unsigned int page_shift;
+ unsigned int npages;
+ u32 length;
+ int access_flags;
+ u32 mkey;
+};
+
+static int is_qp0(enum ib_qp_type qp_type)
+{
+ return qp_type == IB_QPT_SMI;
+}
+
+static int is_qp1(enum ib_qp_type qp_type)
+{
+ return qp_type == IB_QPT_GSI;
+}
+
+static int is_sqp(enum ib_qp_type qp_type)
+{
+ return is_qp0(qp_type) || is_qp1(qp_type);
+}
+
+static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
+{
+ return mlx5_buf_offset(&qp->buf, offset);
+}
+
+static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
+{
+ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
+}
+
+void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
+{
+ return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
+}
+
+static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
+{
+ struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+ struct ib_event event;
+
+ if (type == MLX5_EVENT_TYPE_PATH_MIG)
+ to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+
+ if (ibqp->event_handler) {
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (type) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX5_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
+ return;
+ }
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+ }
+}
+
+static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
+ int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
+{
+ int wqe_size;
+ int wq_size;
+
+ /* Sanity check RQ size before proceeding */
+ if (cap->max_recv_wr > dev->mdev.caps.max_wqes)
+ return -EINVAL;
+
+ if (!has_rq) {
+ qp->rq.max_gs = 0;
+ qp->rq.wqe_cnt = 0;
+ qp->rq.wqe_shift = 0;
+ } else {
+ if (ucmd) {
+ qp->rq.wqe_cnt = ucmd->rq_wqe_count;
+ qp->rq.wqe_shift = ucmd->rq_wqe_shift;
+ qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ qp->rq.max_post = qp->rq.wqe_cnt;
+ } else {
+ wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
+ wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
+ wqe_size = roundup_pow_of_two(wqe_size);
+ wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
+ wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
+ qp->rq.wqe_cnt = wq_size / wqe_size;
+ if (wqe_size > dev->mdev.caps.max_rq_desc_sz) {
+ mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
+ wqe_size,
+ dev->mdev.caps.max_rq_desc_sz);
+ return -EINVAL;
+ }
+ qp->rq.wqe_shift = ilog2(wqe_size);
+ qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ qp->rq.max_post = qp->rq.wqe_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static int sq_overhead(enum ib_qp_type qp_type)
+{
+ int size = 0;
+
+ switch (qp_type) {
+ case IB_QPT_XRC_INI:
+ size += sizeof(struct mlx5_wqe_xrc_seg);
+ /* fall through */
+ case IB_QPT_RC:
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
+ sizeof(struct mlx5_wqe_atomic_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg);
+ break;
+
+ case IB_QPT_XRC_TGT:
+ return 0;
+
+ case IB_QPT_UC:
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg) +
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg);
+ break;
+
+ case IB_QPT_UD:
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
+ sizeof(struct mlx5_wqe_datagram_seg);
+ break;
+
+ case MLX5_IB_QPT_REG_UMR:
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return size;
+}
+
+static int calc_send_wqe(struct ib_qp_init_attr *attr)
+{
+ int inl_size = 0;
+ int size;
+
+ size = sq_overhead(attr->qp_type);
+ if (size < 0)
+ return size;
+
+ if (attr->cap.max_inline_data) {
+ inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
+ attr->cap.max_inline_data;
+ }
+
+ size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
+ return MLX5_SIG_WQE_SIZE;
+ else
+ return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
+}
+
+static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
+ struct mlx5_ib_qp *qp)
+{
+ int wqe_size;
+ int wq_size;
+
+ if (!attr->cap.max_send_wr)
+ return 0;
+
+ wqe_size = calc_send_wqe(attr);
+ mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
+ if (wqe_size < 0)
+ return wqe_size;
+
+ if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
+ mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
+ wqe_size, dev->mdev.caps.max_sq_desc_sz);
+ return -EINVAL;
+ }
+
+ qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
+ sizeof(struct mlx5_wqe_inline_seg);
+ attr->cap.max_inline_data = qp->max_inline_data;
+
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
+ qp->signature_en = true;
+
+ wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
+ qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
+ if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+ mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
+ qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+ return -ENOMEM;
+ }
+ qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+ qp->sq.max_gs = attr->cap.max_send_sge;
+ qp->sq.max_post = wq_size / wqe_size;
+ attr->cap.max_send_wr = qp->sq.max_post;
+
+ return wq_size;
+}
+
+static int set_user_buf_size(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_ib_create_qp *ucmd)
+{
+ int desc_sz = 1 << qp->sq.wqe_shift;
+
+ if (desc_sz > dev->mdev.caps.max_sq_desc_sz) {
+ mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
+ desc_sz, dev->mdev.caps.max_sq_desc_sz);
+ return -EINVAL;
+ }
+
+ if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
+ mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
+ ucmd->sq_wqe_count, ucmd->sq_wqe_count);
+ return -EINVAL;
+ }
+
+ qp->sq.wqe_cnt = ucmd->sq_wqe_count;
+
+ if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+ mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
+ qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+ return -EINVAL;
+ }
+
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << 6);
+
+ return 0;
+}
+
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI ||
+ attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
+ attr->qp_type == MLX5_IB_QPT_REG_UMR ||
+ !attr->cap.max_recv_wr)
+ return 0;
+
+ return 1;
+}
+
+static int first_med_uuar(void)
+{
+ return 1;
+}
+
+static int next_uuar(int n)
+{
+ n++;
+
+ while (((n % 4) & 2))
+ n++;
+
+ return n;
+}
+
+static int num_med_uuar(struct mlx5_uuar_info *uuari)
+{
+ int n;
+
+ n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
+ uuari->num_low_latency_uuars - 1;
+
+ return n >= 0 ? n : 0;
+}
+
+static int max_uuari(struct mlx5_uuar_info *uuari)
+{
+ return uuari->num_uars * 4;
+}
+
+static int first_hi_uuar(struct mlx5_uuar_info *uuari)
+{
+ int med;
+ int i;
+ int t;
+
+ med = num_med_uuar(uuari);
+ for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
+ t++;
+ if (t == med)
+ return next_uuar(i);
+ }
+
+ return 0;
+}
+
+static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+{
+ int i;
+
+ for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
+ if (!test_bit(i, uuari->bitmap)) {
+ set_bit(i, uuari->bitmap);
+ uuari->count[i]++;
+ return i;
+ }
+ }
+
+ return -ENOMEM;
+}
+
+static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+{
+ int minidx = first_med_uuar();
+ int i;
+
+ for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
+ if (uuari->count[i] < uuari->count[minidx])
+ minidx = i;
+ }
+
+ uuari->count[minidx]++;
+ return minidx;
+}
+
+static int alloc_uuar(struct mlx5_uuar_info *uuari,
+ enum mlx5_ib_latency_class lat)
+{
+ int uuarn = -EINVAL;
+
+ mutex_lock(&uuari->lock);
+ switch (lat) {
+ case MLX5_IB_LATENCY_CLASS_LOW:
+ uuarn = 0;
+ uuari->count[uuarn]++;
+ break;
+
+ case MLX5_IB_LATENCY_CLASS_MEDIUM:
+ if (uuari->ver < 2)
+ uuarn = -ENOMEM;
+ else
+ uuarn = alloc_med_class_uuar(uuari);
+ break;
+
+ case MLX5_IB_LATENCY_CLASS_HIGH:
+ if (uuari->ver < 2)
+ uuarn = -ENOMEM;
+ else
+ uuarn = alloc_high_class_uuar(uuari);
+ break;
+
+ case MLX5_IB_LATENCY_CLASS_FAST_PATH:
+ uuarn = 2;
+ break;
+ }
+ mutex_unlock(&uuari->lock);
+
+ return uuarn;
+}
+
+static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+ clear_bit(uuarn, uuari->bitmap);
+ --uuari->count[uuarn];
+}
+
+static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+ clear_bit(uuarn, uuari->bitmap);
+ --uuari->count[uuarn];
+}
+
+static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+{
+ int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
+ int high_uuar = nuuars - uuari->num_low_latency_uuars;
+
+ mutex_lock(&uuari->lock);
+ if (uuarn == 0) {
+ --uuari->count[uuarn];
+ goto out;
+ }
+
+ if (uuarn < high_uuar) {
+ free_med_class_uuar(uuari, uuarn);
+ goto out;
+ }
+
+ free_high_class_uuar(uuari, uuarn);
+
+out:
+ mutex_unlock(&uuari->lock);
+}
+
+static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET: return MLX5_QP_STATE_RST;
+ case IB_QPS_INIT: return MLX5_QP_STATE_INIT;
+ case IB_QPS_RTR: return MLX5_QP_STATE_RTR;
+ case IB_QPS_RTS: return MLX5_QP_STATE_RTS;
+ case IB_QPS_SQD: return MLX5_QP_STATE_SQD;
+ case IB_QPS_SQE: return MLX5_QP_STATE_SQER;
+ case IB_QPS_ERR: return MLX5_QP_STATE_ERR;
+ default: return -1;
+ }
+}
+
+static int to_mlx5_st(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_RC: return MLX5_QP_ST_RC;
+ case IB_QPT_UC: return MLX5_QP_ST_UC;
+ case IB_QPT_UD: return MLX5_QP_ST_UD;
+ case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR;
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
+ case IB_QPT_SMI: return MLX5_QP_ST_QP0;
+ case IB_QPT_GSI: return MLX5_QP_ST_QP1;
+ case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
+ case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
+ case IB_QPT_RAW_PACKET:
+ case IB_QPT_MAX:
+ default: return -EINVAL;
+ }
+}
+
+static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
+{
+ return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
+}
+
+static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct mlx5_create_qp_mbox_in **in,
+ struct mlx5_ib_create_qp_resp *resp, int *inlen)
+{
+ struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_create_qp ucmd;
+ int page_shift = 0;
+ int uar_index;
+ int npages;
+ u32 offset = 0;
+ int uuarn;
+ int ncont = 0;
+ int err;
+
+ err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ if (err) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return err;
+ }
+
+ context = to_mucontext(pd->uobject->context);
+ /*
+ * TBD: should come from the verbs when we have the API
+ */
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
+ if (uuarn < 0) {
+ mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (uuarn < 0) {
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
+ }
+ }
+
+ uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
+ mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+
+ qp->rq.offset = 0;
+ qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+
+ err = set_user_buf_size(dev, qp, &ucmd);
+ if (err)
+ goto err_uuar;
+
+ if (ucmd.buf_addr && qp->buf_size) {
+ qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ qp->buf_size, 0, 0);
+ if (IS_ERR(qp->umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ err = PTR_ERR(qp->umem);
+ goto err_uuar;
+ }
+ } else {
+ qp->umem = NULL;
+ }
+
+ if (qp->umem) {
+ mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+ &ncont, NULL);
+ err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+ mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+ ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+ }
+
+ *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+ *in = mlx5_vzalloc(*inlen);
+ if (!*in) {
+ err = -ENOMEM;
+ goto err_umem;
+ }
+ if (qp->umem)
+ mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+ cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+ (*in)->ctx.params2 = cpu_to_be32(offset << 6);
+
+ (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+ resp->uuar_index = uuarn;
+ qp->uuarn = uuarn;
+
+ err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
+ if (err) {
+ mlx5_ib_dbg(dev, "map failed\n");
+ goto err_free;
+ }
+
+ err = ib_copy_to_udata(udata, resp, sizeof(*resp));
+ if (err) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ goto err_unmap;
+ }
+ qp->create_type = MLX5_QP_USER;
+
+ return 0;
+
+err_unmap:
+ mlx5_ib_db_unmap_user(context, &qp->db);
+
+err_free:
+ mlx5_vfree(*in);
+
+err_umem:
+ if (qp->umem)
+ ib_umem_release(qp->umem);
+
+err_uuar:
+ free_uuar(&context->uuari, uuarn);
+ return err;
+}
+
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_ucontext *context;
+
+ context = to_mucontext(pd->uobject->context);
+ mlx5_ib_db_unmap_user(context, &qp->db);
+ if (qp->umem)
+ ib_umem_release(qp->umem);
+ free_uuar(&context->uuari, qp->uuarn);
+}
+
+static int create_kernel_qp(struct mlx5_ib_dev *dev,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_mbox_in **in, int *inlen)
+{
+ enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
+ struct mlx5_uuar_info *uuari;
+ int uar_index;
+ int uuarn;
+ int err;
+
+ uuari = &dev->mdev.priv.uuari;
+ if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ return -EINVAL;
+
+ if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
+ lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
+
+ uuarn = alloc_uuar(uuari, lc);
+ if (uuarn < 0) {
+ mlx5_ib_dbg(dev, "\n");
+ return -ENOMEM;
+ }
+
+ qp->bf = &uuari->bfs[uuarn];
+ uar_index = qp->bf->uar->index;
+
+ err = calc_sq_size(dev, init_attr, qp);
+ if (err < 0) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ goto err_uuar;
+ }
+
+ qp->rq.offset = 0;
+ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+ qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+
+ err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ goto err_uuar;
+ }
+
+ qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
+ *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+ *in = mlx5_vzalloc(*inlen);
+ if (!*in) {
+ err = -ENOMEM;
+ goto err_buf;
+ }
+ (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+ cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+ /* Set "fast registration enabled" for all kernel QPs */
+ (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
+ (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+
+ mlx5_fill_page_array(&qp->buf, (*in)->pas);
+
+ err = mlx5_db_alloc(&dev->mdev, &qp->db);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ goto err_free;
+ }
+
+ qp->db.db[0] = 0;
+ qp->db.db[1] = 0;
+
+ qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
+ qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
+ qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
+ qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
+ qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
+
+ if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
+ !qp->sq.w_list || !qp->sq.wqe_head) {
+ err = -ENOMEM;
+ goto err_wrid;
+ }
+ qp->create_type = MLX5_QP_KERNEL;
+
+ return 0;
+
+err_wrid:
+ mlx5_db_free(&dev->mdev, &qp->db);
+ kfree(qp->sq.wqe_head);
+ kfree(qp->sq.w_list);
+ kfree(qp->sq.wrid);
+ kfree(qp->sq.wr_data);
+ kfree(qp->rq.wrid);
+
+err_free:
+ mlx5_vfree(*in);
+
+err_buf:
+ mlx5_buf_free(&dev->mdev, &qp->buf);
+
+err_uuar:
+ free_uuar(&dev->mdev.priv.uuari, uuarn);
+ return err;
+}
+
+static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+ mlx5_db_free(&dev->mdev, &qp->db);
+ kfree(qp->sq.wqe_head);
+ kfree(qp->sq.w_list);
+ kfree(qp->sq.wrid);
+ kfree(qp->sq.wr_data);
+ kfree(qp->rq.wrid);
+ mlx5_buf_free(&dev->mdev, &qp->buf);
+ free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn);
+}
+
+static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
+{
+ if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
+ (attr->qp_type == IB_QPT_XRC_INI))
+ return cpu_to_be32(MLX5_SRQ_RQ);
+ else if (!qp->has_rq)
+ return cpu_to_be32(MLX5_ZERO_LEN_RQ);
+ else
+ return cpu_to_be32(MLX5_NON_ZERO_RQ);
+}
+
+static int is_connected(enum ib_qp_type qp_type)
+{
+ if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
+ return 1;
+
+ return 0;
+}
+
+static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ struct mlx5_ib_create_qp_resp resp;
+ struct mlx5_create_qp_mbox_in *in;
+ struct mlx5_ib_create_qp ucmd;
+ int inlen = sizeof(*in);
+ int err;
+
+ mutex_init(&qp->mutex);
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+ mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
+ return -EINVAL;
+ } else {
+ qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+ }
+ }
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ if (pd && pd->uobject) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ mlx5_ib_dbg(dev, "copy failed\n");
+ return -EFAULT;
+ }
+
+ qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
+ qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+ } else {
+ qp->wq_sig = !!wq_signature;
+ }
+
+ qp->has_rq = qp_has_rq(init_attr);
+ err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
+ qp, (pd && pd->uobject) ? &ucmd : NULL);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ return err;
+ }
+
+ if (pd) {
+ if (pd->uobject) {
+ mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
+ if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
+ ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
+ mlx5_ib_dbg(dev, "invalid rq params\n");
+ return -EINVAL;
+ }
+ if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) {
+ mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
+ ucmd.sq_wqe_count, dev->mdev.caps.max_wqes);
+ return -EINVAL;
+ }
+ err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+ if (err)
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ } else {
+ err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+ if (err)
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ else
+ qp->pa_lkey = to_mpd(pd)->pa_lkey;
+ }
+
+ if (err)
+ return err;
+ } else {
+ in = mlx5_vzalloc(sizeof(*in));
+ if (!in)
+ return -ENOMEM;
+
+ qp->create_type = MLX5_QP_EMPTY;
+ }
+
+ if (is_sqp(init_attr->qp_type))
+ qp->port = init_attr->port_num;
+
+ in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
+ MLX5_QP_PM_MIGRATED << 11);
+
+ if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
+ in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
+ else
+ in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
+
+ if (qp->wq_sig)
+ in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+
+ if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+
+ if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
+ int rcqe_sz;
+ int scqe_sz;
+
+ rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
+ scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
+
+ if (rcqe_sz == 128)
+ in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
+ else
+ in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
+ if (scqe_sz == 128)
+ in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
+ else
+ in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
+ }
+ }
+
+ if (qp->rq.wqe_cnt) {
+ in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
+ in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
+ }
+
+ in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
+
+ if (qp->sq.wqe_cnt)
+ in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
+ else
+ in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
+
+ /* Set default resources */
+ switch (init_attr->qp_type) {
+ case IB_QPT_XRC_TGT:
+ in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+ in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+ in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+ in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
+ break;
+ case IB_QPT_XRC_INI:
+ in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
+ in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
+ in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+ break;
+ default:
+ if (init_attr->srq) {
+ in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
+ in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
+ } else {
+ in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
+ in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+ }
+ }
+
+ if (init_attr->send_cq)
+ in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
+
+ if (init_attr->recv_cq)
+ in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
+
+ in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
+
+ err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen);
+ if (err) {
+ mlx5_ib_dbg(dev, "create qp failed\n");
+ goto err_create;
+ }
+
+ mlx5_vfree(in);
+ /* Hardware wants QPN written in big-endian order (after
+ * shifting) for send doorbell. Precompute this value to save
+ * a little bit when posting sends.
+ */
+ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+ qp->mqp.event = mlx5_ib_qp_event;
+
+ return 0;
+
+err_create:
+ if (qp->create_type == MLX5_QP_USER)
+ destroy_qp_user(pd, qp);
+ else if (qp->create_type == MLX5_QP_KERNEL)
+ destroy_qp_kernel(dev, qp);
+
+ mlx5_vfree(in);
+ return err;
+}
+
+static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
+{
+ if (send_cq) {
+ if (recv_cq) {
+ if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ spin_lock_irq(&send_cq->lock);
+ spin_lock_nested(&recv_cq->lock,
+ SINGLE_DEPTH_NESTING);
+ } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
+ spin_lock_irq(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else {
+ spin_lock_irq(&recv_cq->lock);
+ spin_lock_nested(&send_cq->lock,
+ SINGLE_DEPTH_NESTING);
+ }
+ } else {
+ spin_lock_irq(&send_cq->lock);
+ }
+ } else if (recv_cq) {
+ spin_lock_irq(&recv_cq->lock);
+ }
+}
+
+static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
+{
+ if (send_cq) {
+ if (recv_cq) {
+ if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ spin_unlock(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
+ __release(&recv_cq->lock);
+ spin_unlock_irq(&send_cq->lock);
+ } else {
+ spin_unlock(&send_cq->lock);
+ spin_unlock_irq(&recv_cq->lock);
+ }
+ } else {
+ spin_unlock_irq(&send_cq->lock);
+ }
+ } else if (recv_cq) {
+ spin_unlock_irq(&recv_cq->lock);
+ }
+}
+
+static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
+{
+ return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
+{
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_XRC_TGT:
+ *send_cq = NULL;
+ *recv_cq = NULL;
+ break;
+ case MLX5_IB_QPT_REG_UMR:
+ case IB_QPT_XRC_INI:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = NULL;
+ break;
+
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ case IB_QPT_RAW_IPV6:
+ case IB_QPT_RAW_ETHERTYPE:
+ *send_cq = to_mcq(qp->ibqp.send_cq);
+ *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ break;
+
+ case IB_QPT_RAW_PACKET:
+ case IB_QPT_MAX:
+ default:
+ *send_cq = NULL;
+ *recv_cq = NULL;
+ break;
+ }
+}
+
+static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_cq *send_cq, *recv_cq;
+ struct mlx5_modify_qp_mbox_in *in;
+ int err;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return;
+ if (qp->state != IB_QPS_RESET)
+ if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state),
+ MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
+ mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
+ qp->mqp.qpn);
+
+ get_cqs(qp, &send_cq, &recv_cq);
+
+ if (qp->create_type == MLX5_QP_KERNEL) {
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+ if (send_cq != recv_cq)
+ __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ }
+
+ err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp);
+ if (err)
+ mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
+ kfree(in);
+
+
+ if (qp->create_type == MLX5_QP_KERNEL)
+ destroy_qp_kernel(dev, qp);
+ else if (qp->create_type == MLX5_QP_USER)
+ destroy_qp_user(&get_pd(qp)->ibpd, qp);
+}
+
+static const char *ib_qp_type_str(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_SMI:
+ return "IB_QPT_SMI";
+ case IB_QPT_GSI:
+ return "IB_QPT_GSI";
+ case IB_QPT_RC:
+ return "IB_QPT_RC";
+ case IB_QPT_UC:
+ return "IB_QPT_UC";
+ case IB_QPT_UD:
+ return "IB_QPT_UD";
+ case IB_QPT_RAW_IPV6:
+ return "IB_QPT_RAW_IPV6";
+ case IB_QPT_RAW_ETHERTYPE:
+ return "IB_QPT_RAW_ETHERTYPE";
+ case IB_QPT_XRC_INI:
+ return "IB_QPT_XRC_INI";
+ case IB_QPT_XRC_TGT:
+ return "IB_QPT_XRC_TGT";
+ case IB_QPT_RAW_PACKET:
+ return "IB_QPT_RAW_PACKET";
+ case MLX5_IB_QPT_REG_UMR:
+ return "MLX5_IB_QPT_REG_UMR";
+ case IB_QPT_MAX:
+ default:
+ return "Invalid QP type";
+ }
+}
+
+struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_qp *qp;
+ u16 xrcdn = 0;
+ int err;
+
+ if (pd) {
+ dev = to_mdev(pd->device);
+ } else {
+ /* being cautious here */
+ if (init_attr->qp_type != IB_QPT_XRC_TGT &&
+ init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
+ pr_warn("%s: no PD for transport %s\n", __func__,
+ ib_qp_type_str(init_attr->qp_type));
+ return ERR_PTR(-EINVAL);
+ }
+ dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+ }
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_XRC_INI:
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+ mlx5_ib_dbg(dev, "XRC not supported\n");
+ return ERR_PTR(-ENOSYS);
+ }
+ init_attr->recv_cq = NULL;
+ if (init_attr->qp_type == IB_QPT_XRC_TGT) {
+ xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+ init_attr->send_cq = NULL;
+ }
+
+ /* fall through */
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case MLX5_IB_QPT_REG_UMR:
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ err = create_qp_common(dev, pd, init_attr, udata, qp);
+ if (err) {
+ mlx5_ib_dbg(dev, "create_qp_common failed\n");
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ if (is_qp0(init_attr->qp_type))
+ qp->ibqp.qp_num = 0;
+ else if (is_qp1(init_attr->qp_type))
+ qp->ibqp.qp_num = 1;
+ else
+ qp->ibqp.qp_num = qp->mqp.qpn;
+
+ mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
+ qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+ to_mcq(init_attr->send_cq)->mcq.cqn);
+
+ qp->xrcdn = xrcdn;
+
+ break;
+
+ case IB_QPT_RAW_IPV6:
+ case IB_QPT_RAW_ETHERTYPE:
+ case IB_QPT_RAW_PACKET:
+ case IB_QPT_MAX:
+ default:
+ mlx5_ib_dbg(dev, "unsupported qp type %d\n",
+ init_attr->qp_type);
+ /* Don't support raw QPs */
+ return ERR_PTR(-EINVAL);
+ }
+
+ return &qp->ibqp;
+}
+
+int mlx5_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+
+ destroy_qp_common(dev, mqp);
+
+ kfree(mqp);
+
+ return 0;
+}
+
+static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ u32 hw_access_flags = 0;
+ u8 dest_rd_atomic;
+ u32 access_flags;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ dest_rd_atomic = attr->max_dest_rd_atomic;
+ else
+ dest_rd_atomic = qp->resp_depth;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ access_flags = attr->qp_access_flags;
+ else
+ access_flags = qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ hw_access_flags |= MLX5_QP_BIT_RRE;
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ hw_access_flags |= MLX5_QP_BIT_RWE;
+
+ return cpu_to_be32(hw_access_flags);
+}
+
+enum {
+ MLX5_PATH_FLAG_FL = 1 << 0,
+ MLX5_PATH_FLAG_FREE_AR = 1 << 1,
+ MLX5_PATH_FLAG_COUNTER = 1 << 2,
+};
+
+static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
+{
+ if (rate == IB_RATE_PORT_CURRENT) {
+ return 0;
+ } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
+ return -EINVAL;
+ } else {
+ while (rate != IB_RATE_2_5_GBPS &&
+ !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
+ dev->mdev.caps.stat_rate_support))
+ --rate;
+ }
+
+ return rate + MLX5_STAT_RATE_OFFSET;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+ struct mlx5_qp_path *path, u8 port, int attr_mask,
+ u32 path_flags, const struct ib_qp_attr *attr)
+{
+ int err;
+
+ path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+ path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ path->pkey_index = attr->pkey_index;
+
+ path->grh_mlid = ah->src_path_bits & 0x7f;
+ path->rlid = cpu_to_be16(ah->dlid);
+
+ if (ah->ah_flags & IB_AH_GRH) {
+ path->grh_mlid |= 1 << 7;
+ path->mgid_index = ah->grh.sgid_index;
+ path->hop_limit = ah->grh.hop_limit;
+ path->tclass_flowlabel =
+ cpu_to_be32((ah->grh.traffic_class << 20) |
+ (ah->grh.flow_label));
+ memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ }
+
+ err = ib_rate_to_mlx5(dev, ah->static_rate);
+ if (err < 0)
+ return err;
+ path->static_rate = err;
+ path->port = port;
+
+ if (ah->ah_flags & IB_AH_GRH) {
+ if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) {
+ pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
+ ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len);
+ return -EINVAL;
+ }
+
+ path->grh_mlid |= 1 << 7;
+ path->mgid_index = ah->grh.sgid_index;
+ path->hop_limit = ah->grh.hop_limit;
+ path->tclass_flowlabel =
+ cpu_to_be32((ah->grh.traffic_class << 20) |
+ (ah->grh.flow_label));
+ memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT)
+ path->ackto_lt = attr->timeout << 3;
+
+ path->sl = ah->sl & 0xf;
+
+ return 0;
+}
+
+static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
+ [MLX5_QP_STATE_INIT] = {
+ [MLX5_QP_STATE_INIT] = {
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_PRI_PORT,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_PRI_PORT,
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_Q_KEY |
+ MLX5_QP_OPTPAR_PRI_PORT,
+ },
+ [MLX5_QP_STATE_RTR] = {
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX,
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX,
+ },
+ },
+ [MLX5_QP_STATE_RTR] = {
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_RNR_TIMEOUT,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PM_STATE,
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
+ },
+ },
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
+ MLX5_QP_OPTPAR_SRQN |
+ MLX5_QP_OPTPAR_CQN_RCV,
+ },
+ },
+ [MLX5_QP_STATE_SQER] = {
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RRE,
+ },
+ },
+};
+
+static int ib_nr_to_mlx5_nr(int ib_mask)
+{
+ switch (ib_mask) {
+ case IB_QP_STATE:
+ return 0;
+ case IB_QP_CUR_STATE:
+ return 0;
+ case IB_QP_EN_SQD_ASYNC_NOTIFY:
+ return 0;
+ case IB_QP_ACCESS_FLAGS:
+ return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE;
+ case IB_QP_PKEY_INDEX:
+ return MLX5_QP_OPTPAR_PKEY_INDEX;
+ case IB_QP_PORT:
+ return MLX5_QP_OPTPAR_PRI_PORT;
+ case IB_QP_QKEY:
+ return MLX5_QP_OPTPAR_Q_KEY;
+ case IB_QP_AV:
+ return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
+ MLX5_QP_OPTPAR_PRI_PORT;
+ case IB_QP_PATH_MTU:
+ return 0;
+ case IB_QP_TIMEOUT:
+ return MLX5_QP_OPTPAR_ACK_TIMEOUT;
+ case IB_QP_RETRY_CNT:
+ return MLX5_QP_OPTPAR_RETRY_COUNT;
+ case IB_QP_RNR_RETRY:
+ return MLX5_QP_OPTPAR_RNR_RETRY;
+ case IB_QP_RQ_PSN:
+ return 0;
+ case IB_QP_MAX_QP_RD_ATOMIC:
+ return MLX5_QP_OPTPAR_SRA_MAX;
+ case IB_QP_ALT_PATH:
+ return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
+ case IB_QP_MIN_RNR_TIMER:
+ return MLX5_QP_OPTPAR_RNR_TIMEOUT;
+ case IB_QP_SQ_PSN:
+ return 0;
+ case IB_QP_MAX_DEST_RD_ATOMIC:
+ return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
+ case IB_QP_PATH_MIG_STATE:
+ return MLX5_QP_OPTPAR_PM_STATE;
+ case IB_QP_CAP:
+ return 0;
+ case IB_QP_DEST_QPN:
+ return 0;
+ }
+ return 0;
+}
+
+static int ib_mask_to_mlx5_opt(int ib_mask)
+{
+ int result = 0;
+ int i;
+
+ for (i = 0; i < 8 * sizeof(int); i++) {
+ if ((1 << i) & ib_mask)
+ result |= ib_nr_to_mlx5_nr(1 << i);
+ }
+
+ return result;
+}
+
+static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ enum ib_qp_state cur_state, enum ib_qp_state new_state)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_cq *send_cq, *recv_cq;
+ struct mlx5_qp_context *context;
+ struct mlx5_modify_qp_mbox_in *in;
+ struct mlx5_ib_pd *pd;
+ enum mlx5_qp_state mlx5_cur, mlx5_new;
+ enum mlx5_qp_optpar optpar;
+ int sqd_event;
+ int mlx5_st;
+ int err;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ context = &in->ctx;
+ err = to_mlx5_st(ibqp->qp_type);
+ if (err < 0)
+ goto out;
+
+ context->flags = cpu_to_be32(err << 16);
+
+ if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
+ context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ } else {
+ switch (attr->path_mig_state) {
+ case IB_MIG_MIGRATED:
+ context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ break;
+ case IB_MIG_REARM:
+ context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
+ break;
+ case IB_MIG_ARMED:
+ context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
+ break;
+ }
+ }
+
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+ context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
+ } else if (ibqp->qp_type == IB_QPT_UD ||
+ ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+ } else if (attr_mask & IB_QP_PATH_MTU) {
+ if (attr->path_mtu < IB_MTU_256 ||
+ attr->path_mtu > IB_MTU_4096) {
+ mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
+ err = -EINVAL;
+ goto out;
+ }
+ context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg;
+ }
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ context->pri_path.pkey_index = attr->pkey_index;
+
+ /* todo implement counter_index functionality */
+
+ if (is_sqp(ibqp->qp_type))
+ context->pri_path.port = qp->port;
+
+ if (attr_mask & IB_QP_PORT)
+ context->pri_path.port = attr->port_num;
+
+ if (attr_mask & IB_QP_AV) {
+ err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+ attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
+ attr_mask, 0, attr);
+ if (err)
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT)
+ context->pri_path.ackto_lt |= attr->timeout << 3;
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ attr->alt_port_num, attr_mask, 0, attr);
+ if (err)
+ goto out;
+ }
+
+ pd = get_pd(qp);
+ get_cqs(qp, &send_cq, &recv_cq);
+
+ context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
+ context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
+ context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
+ context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
+
+ if (attr_mask & IB_QP_RNR_RETRY)
+ context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic)
+ context->params1 |=
+ cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN)
+ context->next_send_psn = cpu_to_be32(attr->sq_psn);
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic)
+ context->params2 |=
+ cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
+ }
+
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
+ context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER)
+ context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+
+ if (attr_mask & IB_QP_RQ_PSN)
+ context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+
+ if (attr_mask & IB_QP_QKEY)
+ context->qkey = cpu_to_be32(attr->qkey);
+
+ if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ context->db_rec_addr = cpu_to_be64(qp->db.dma);
+
+ if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
+ attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
+ sqd_event = 1;
+ else
+ sqd_event = 0;
+
+ if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ context->sq_crq_size |= cpu_to_be16(1 << 4);
+
+
+ mlx5_cur = to_mlx5_state(cur_state);
+ mlx5_new = to_mlx5_state(new_state);
+ mlx5_st = to_mlx5_st(ibqp->qp_type);
+ if (mlx5_st < 0)
+ goto out;
+
+ optpar = ib_mask_to_mlx5_opt(attr_mask);
+ optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
+ in->optparam = cpu_to_be32(optpar);
+ err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state),
+ to_mlx5_state(new_state), in, sqd_event,
+ &qp->mqp);
+ if (err)
+ goto out;
+
+ qp->state = new_state;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->atomic_rd_en = attr->qp_access_flags;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_port = attr->alt_port_num;
+
+ /*
+ * If we moved a kernel QP to RESET, clean up all old CQ
+ * entries and reinitialize the QP.
+ */
+ if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+ mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ if (send_cq != recv_cq)
+ mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->sq.cur_post = 0;
+ qp->sq.last_poll = 0;
+ qp->db.db[MLX5_RCV_DBR] = 0;
+ qp->db.db[MLX5_SND_DBR] = 0;
+ }
+
+out:
+ kfree(in);
+ return err;
+}
+
+int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
+ int port;
+
+ mutex_lock(&qp->mutex);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
+ !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_UNSPECIFIED))
+ goto out;
+
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports))
+ goto out;
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len)
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp)
+ goto out;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp)
+ goto out;
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
+ }
+
+ err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
+static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
+{
+ struct mlx5_ib_cq *cq;
+ unsigned cur;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max_post))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max_post;
+}
+
+static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
+ struct ib_send_wr *wr)
+{
+ memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
+static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
+static __be16 get_klm_octo(int npages)
+{
+ return cpu_to_be16(ALIGN(npages, 8) / 2);
+}
+
+static __be64 frwr_mkey_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_A |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 sig_mkey_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_SIGERR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE |
+ MLX5_MKEY_MASK_BSF_EN;
+
+ return cpu_to_be64(result);
+}
+
+static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct ib_send_wr *wr, int li)
+{
+ memset(umr, 0, sizeof(*umr));
+
+ if (li) {
+ umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+ umr->flags = 1 << 7;
+ return;
+ }
+
+ umr->flags = (1 << 5); /* fail if not free */
+ umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
+ umr->mkey_mask = frwr_mkey_mask();
+}
+
+static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct ib_send_wr *wr)
+{
+ struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
+ u64 mask;
+
+ memset(umr, 0, sizeof(*umr));
+
+ if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
+ umr->flags = 1 << 5; /* fail if not free */
+ umr->klm_octowords = get_klm_octo(umrwr->npages);
+ mask = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_PD |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_A |
+ MLX5_MKEY_MASK_FREE;
+ umr->mkey_mask = cpu_to_be64(mask);
+ } else {
+ umr->flags = 2 << 5; /* fail if free */
+ mask = MLX5_MKEY_MASK_FREE;
+ umr->mkey_mask = cpu_to_be64(mask);
+ }
+
+ if (!wr->num_sge)
+ umr->flags |= (1 << 7); /* inline */
+}
+
+static u8 get_umr_flags(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
+ MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
+}
+
+static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
+ int li, int *writ)
+{
+ memset(seg, 0, sizeof(*seg));
+ if (li) {
+ seg->status = 1 << 6;
+ return;
+ }
+
+ seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
+ MLX5_ACCESS_MODE_MTT;
+ *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
+ seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+ seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+ seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+ seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
+ seg->log2_page_size = wr->wr.fast_reg.page_shift;
+}
+
+static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
+{
+ memset(seg, 0, sizeof(*seg));
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
+ seg->status = 1 << 6;
+ return;
+ }
+
+ seg->flags = convert_access(wr->wr.fast_reg.access_flags);
+ seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
+ seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
+ seg->len = cpu_to_be64(wr->wr.fast_reg.length);
+ seg->log2_page_size = wr->wr.fast_reg.page_shift;
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
+ mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+}
+
+static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
+ struct ib_send_wr *wr,
+ struct mlx5_core_dev *mdev,
+ struct mlx5_ib_pd *pd,
+ int writ)
+{
+ struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
+ u64 *page_list = wr->wr.fast_reg.page_list->page_list;
+ u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
+ int i;
+
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
+ mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
+ dseg->addr = cpu_to_be64(mfrpl->map);
+ dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
+ dseg->lkey = cpu_to_be32(pd->pa_lkey);
+}
+
+static __be32 send_ieth(struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return wr->ex.imm_data;
+
+ case IB_WR_SEND_WITH_INV:
+ return cpu_to_be32(wr->ex.invalidate_rkey);
+
+ default:
+ return 0;
+ }
+}
+
+static u8 calc_sig(void *wqe, int size)
+{
+ u8 *p = wqe;
+ u8 res = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ res ^= p[i];
+
+ return ~res;
+}
+
+static u8 wq_sig(void *wqe)
+{
+ return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
+}
+
+static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
+ void *wqe, int *sz)
+{
+ struct mlx5_wqe_inline_seg *seg;
+ void *qend = qp->sq.qend;
+ void *addr;
+ int inl = 0;
+ int copy;
+ int len;
+ int i;
+
+ seg = wqe;
+ wqe += sizeof(*seg);
+ for (i = 0; i < wr->num_sge; i++) {
+ addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+ len = wr->sg_list[i].length;
+ inl += len;
+
+ if (unlikely(inl > qp->max_inline_data))
+ return -ENOMEM;
+
+ if (unlikely(wqe + len > qend)) {
+ copy = qend - wqe;
+ memcpy(wqe, addr, copy);
+ addr += copy;
+ len -= copy;
+ wqe = mlx5_get_send_wqe(qp, 0);
+ }
+ memcpy(wqe, addr, len);
+ wqe += len;
+ }
+
+ seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
+
+ *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+
+ return 0;
+}
+
+static u16 prot_field_size(enum ib_signature_type type)
+{
+ switch (type) {
+ case IB_SIG_TYPE_T10_DIF:
+ return MLX5_DIF_SIZE;
+ default:
+ return 0;
+ }
+}
+
+static u8 bs_selector(int block_size)
+{
+ switch (block_size) {
+ case 512: return 0x1;
+ case 520: return 0x2;
+ case 4096: return 0x3;
+ case 4160: return 0x4;
+ case 1073741824: return 0x5;
+ default: return 0;
+ }
+}
+
+static int format_selector(struct ib_sig_attrs *attr,
+ struct ib_sig_domain *domain,
+ int *selector)
+{
+
+#define FORMAT_DIF_NONE 0
+#define FORMAT_DIF_CRC_INC 8
+#define FORMAT_DIF_CRC_NO_INC 12
+#define FORMAT_DIF_CSUM_INC 13
+#define FORMAT_DIF_CSUM_NO_INC 14
+
+ switch (domain->sig.dif.type) {
+ case IB_T10DIF_NONE:
+ /* No DIF */
+ *selector = FORMAT_DIF_NONE;
+ break;
+ case IB_T10DIF_TYPE1: /* Fall through */
+ case IB_T10DIF_TYPE2:
+ switch (domain->sig.dif.bg_type) {
+ case IB_T10DIF_CRC:
+ *selector = FORMAT_DIF_CRC_INC;
+ break;
+ case IB_T10DIF_CSUM:
+ *selector = FORMAT_DIF_CSUM_INC;
+ break;
+ default:
+ return 1;
+ }
+ break;
+ case IB_T10DIF_TYPE3:
+ switch (domain->sig.dif.bg_type) {
+ case IB_T10DIF_CRC:
+ *selector = domain->sig.dif.type3_inc_reftag ?
+ FORMAT_DIF_CRC_INC :
+ FORMAT_DIF_CRC_NO_INC;
+ break;
+ case IB_T10DIF_CSUM:
+ *selector = domain->sig.dif.type3_inc_reftag ?
+ FORMAT_DIF_CSUM_INC :
+ FORMAT_DIF_CSUM_NO_INC;
+ break;
+ default:
+ return 1;
+ }
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int mlx5_set_bsf(struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_bsf *bsf, u32 data_size)
+{
+ struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
+ struct mlx5_bsf_basic *basic = &bsf->basic;
+ struct ib_sig_domain *mem = &sig_attrs->mem;
+ struct ib_sig_domain *wire = &sig_attrs->wire;
+ int ret, selector;
+
+ memset(bsf, 0, sizeof(*bsf));
+ switch (sig_attrs->mem.sig_type) {
+ case IB_SIG_TYPE_T10_DIF:
+ if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
+ return -EINVAL;
+
+ /* Input domain check byte mask */
+ basic->check_byte_mask = sig_attrs->check_mask;
+ if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
+ mem->sig.dif.type == wire->sig.dif.type) {
+ /* Same block structure */
+ basic->bsf_size_sbs = 1 << 4;
+ if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
+ basic->wire.copy_byte_mask |= 0xc0;
+ if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+ basic->wire.copy_byte_mask |= 0x30;
+ if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+ basic->wire.copy_byte_mask |= 0x0f;
+ } else
+ basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
+
+ basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+ basic->raw_data_size = cpu_to_be32(data_size);
+
+ ret = format_selector(sig_attrs, mem, &selector);
+ if (ret)
+ return -EINVAL;
+ basic->m_bfs_psv = cpu_to_be32(selector << 24 |
+ msig->psv_memory.psv_idx);
+
+ ret = format_selector(sig_attrs, wire, &selector);
+ if (ret)
+ return -EINVAL;
+ basic->w_bfs_psv = cpu_to_be32(selector << 24 |
+ msig->psv_wire.psv_idx);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size)
+{
+ struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
+ struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ struct mlx5_bsf *bsf;
+ u32 data_len = wr->sg_list->length;
+ u32 data_key = wr->sg_list->lkey;
+ u64 data_va = wr->sg_list->addr;
+ int ret;
+ int wqe_size;
+
+ if (!wr->wr.sig_handover.prot ||
+ (data_key == wr->wr.sig_handover.prot->lkey &&
+ data_va == wr->wr.sig_handover.prot->addr &&
+ data_len == wr->wr.sig_handover.prot->length)) {
+ /**
+ * Source domain doesn't contain signature information
+ * or data and protection are interleaved in memory.
+ * So need construct:
+ * ------------------
+ * | data_klm |
+ * ------------------
+ * | BSF |
+ * ------------------
+ **/
+ struct mlx5_klm *data_klm = *seg;
+
+ data_klm->bcount = cpu_to_be32(data_len);
+ data_klm->key = cpu_to_be32(data_key);
+ data_klm->va = cpu_to_be64(data_va);
+ wqe_size = ALIGN(sizeof(*data_klm), 64);
+ } else {
+ /**
+ * Source domain contains signature information
+ * So need construct a strided block format:
+ * ---------------------------
+ * | stride_block_ctrl |
+ * ---------------------------
+ * | data_klm |
+ * ---------------------------
+ * | prot_klm |
+ * ---------------------------
+ * | BSF |
+ * ---------------------------
+ **/
+ struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
+ struct mlx5_stride_block_entry *data_sentry;
+ struct mlx5_stride_block_entry *prot_sentry;
+ u32 prot_key = wr->wr.sig_handover.prot->lkey;
+ u64 prot_va = wr->wr.sig_handover.prot->addr;
+ u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
+ int prot_size;
+
+ sblock_ctrl = *seg;
+ data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
+ prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
+
+ prot_size = prot_field_size(sig_attrs->mem.sig_type);
+ if (!prot_size) {
+ pr_err("Bad block size given: %u\n", block_size);
+ return -EINVAL;
+ }
+ sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
+ prot_size);
+ sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
+ sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
+ sblock_ctrl->num_entries = cpu_to_be16(2);
+
+ data_sentry->bcount = cpu_to_be16(block_size);
+ data_sentry->key = cpu_to_be32(data_key);
+ data_sentry->va = cpu_to_be64(data_va);
+ data_sentry->stride = cpu_to_be16(block_size);
+
+ prot_sentry->bcount = cpu_to_be16(prot_size);
+ prot_sentry->key = cpu_to_be32(prot_key);
+ prot_sentry->va = cpu_to_be64(prot_va);
+ prot_sentry->stride = cpu_to_be16(prot_size);
+
+ wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
+ sizeof(*prot_sentry), 64);
+ }
+
+ *seg += wqe_size;
+ *size += wqe_size / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ bsf = *seg;
+ ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
+ if (ret)
+ return -EINVAL;
+
+ *seg += sizeof(*bsf);
+ *size += sizeof(*bsf) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ return 0;
+}
+
+static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
+ struct ib_send_wr *wr, u32 nelements,
+ u32 length, u32 pdn)
+{
+ struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ u32 sig_key = sig_mr->rkey;
+ u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
+
+ memset(seg, 0, sizeof(*seg));
+
+ seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
+ MLX5_ACCESS_MODE_KLM;
+ seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
+ MLX5_MKEY_BSF_EN | pdn);
+ seg->len = cpu_to_be64(length);
+ seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
+ seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+}
+
+static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct ib_send_wr *wr, u32 nelements)
+{
+ memset(umr, 0, sizeof(*umr));
+
+ umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
+ umr->klm_octowords = get_klm_octo(nelements);
+ umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
+ umr->mkey_mask = sig_mkey_mask();
+}
+
+
+static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size)
+{
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
+ u32 pdn = get_pd(qp)->pdn;
+ u32 klm_oct_size;
+ int region_len, ret;
+
+ if (unlikely(wr->num_sge != 1) ||
+ unlikely(wr->wr.sig_handover.access_flags &
+ IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+ unlikely(!sig_mr->sig->sig_status_checked))
+ return -EINVAL;
+
+ /* length of the protected region, data + protection */
+ region_len = wr->sg_list->length;
+ if (wr->wr.sig_handover.prot &&
+ (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey ||
+ wr->wr.sig_handover.prot->addr != wr->sg_list->addr ||
+ wr->wr.sig_handover.prot->length != wr->sg_list->length))
+ region_len += wr->wr.sig_handover.prot->length;
+
+ /**
+ * KLM octoword size - if protection was provided
+ * then we use strided block format (3 octowords),
+ * else we use single KLM (1 octoword)
+ **/
+ klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
+
+ set_sig_umr_segment(*seg, wr, klm_oct_size);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ ret = set_sig_data_segment(wr, qp, seg, size);
+ if (ret)
+ return ret;
+
+ sig_mr->sig->sig_status_checked = false;
+ return 0;
+}
+
+static int set_psv_wr(struct ib_sig_domain *domain,
+ u32 psv_idx, void **seg, int *size)
+{
+ struct mlx5_seg_set_psv *psv_seg = *seg;
+
+ memset(psv_seg, 0, sizeof(*psv_seg));
+ psv_seg->psv_num = cpu_to_be32(psv_idx);
+ switch (domain->sig_type) {
+ case IB_SIG_TYPE_T10_DIF:
+ psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
+ domain->sig.dif.app_tag);
+ psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
+
+ *seg += sizeof(*psv_seg);
+ *size += sizeof(*psv_seg) / 16;
+ break;
+
+ default:
+ pr_err("Bad signature type given.\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
+ struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
+{
+ int writ = 0;
+ int li;
+
+ li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
+ if (unlikely(wr->send_flags & IB_SEND_INLINE))
+ return -EINVAL;
+
+ set_frwr_umr_segment(*seg, wr, li);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+ set_mkey_segment(*seg, wr, li, &writ);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+ if (!li) {
+ if (unlikely(wr->wr.fast_reg.page_list_len >
+ wr->wr.fast_reg.page_list->max_page_list_len))
+ return -ENOMEM;
+
+ set_frwr_pages(*seg, wr, mdev, pd, writ);
+ *seg += sizeof(struct mlx5_wqe_data_seg);
+ *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+ }
+ return 0;
+}
+
+static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+{
+ __be32 *p = NULL;
+ int tidx = idx;
+ int i, j;
+
+ pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
+ for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
+ if ((i & 0xf) == 0) {
+ void *buf = mlx5_get_send_wqe(qp, tidx);
+ tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
+ p = buf;
+ j = 0;
+ }
+ pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
+ be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
+ be32_to_cpu(p[j + 3]));
+ }
+}
+
+static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
+ unsigned bytecnt, struct mlx5_ib_qp *qp)
+{
+ while (bytecnt > 0) {
+ __iowrite64_copy(dst++, src++, 8);
+ __iowrite64_copy(dst++, src++, 8);
+ __iowrite64_copy(dst++, src++, 8);
+ __iowrite64_copy(dst++, src++, 8);
+ __iowrite64_copy(dst++, src++, 8);
+ __iowrite64_copy(dst++, src++, 8);
+ __iowrite64_copy(dst++, src++, 8);
+ __iowrite64_copy(dst++, src++, 8);
+ bytecnt -= 64;
+ if (unlikely(src == qp->sq.qend))
+ src = mlx5_get_send_wqe(qp, 0);
+ }
+}
+
+static u8 get_fence(u8 fence, struct ib_send_wr *wr)
+{
+ if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
+ wr->send_flags & IB_SEND_FENCE))
+ return MLX5_FENCE_MODE_STRONG_ORDERING;
+
+ if (unlikely(fence)) {
+ if (wr->send_flags & IB_SEND_FENCE)
+ return MLX5_FENCE_MODE_SMALL_AND_FENCE;
+ else
+ return fence;
+
+ } else {
+ return 0;
+ }
+}
+
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ struct ib_send_wr *wr, int *idx,
+ int *size, int nreq)
+{
+ int err = 0;
+
+ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+ *seg = mlx5_get_send_wqe(qp, *idx);
+ *ctrl = *seg;
+ *(uint32_t *)(*seg + 8) = 0;
+ (*ctrl)->imm = send_ieth(wr);
+ (*ctrl)->fm_ce_se = qp->sq_signal_bits |
+ (wr->send_flags & IB_SEND_SIGNALED ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+ (wr->send_flags & IB_SEND_SOLICITED ?
+ MLX5_WQE_CTRL_SOLICITED : 0);
+
+ *seg += sizeof(**ctrl);
+ *size = sizeof(**ctrl) / 16;
+
+ return err;
+}
+
+static void finish_wqe(struct mlx5_ib_qp *qp,
+ struct mlx5_wqe_ctrl_seg *ctrl,
+ u8 size, unsigned idx, u64 wr_id,
+ int nreq, u8 fence, u8 next_fence,
+ u32 mlx5_opcode)
+{
+ u8 opmod = 0;
+
+ ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
+ mlx5_opcode | ((u32)opmod << 24));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->fm_ce_se |= fence;
+ qp->fm_cache = next_fence;
+ if (unlikely(qp->wq_sig))
+ ctrl->signature = wq_sig(ctrl);
+
+ qp->sq.wrid[idx] = wr_id;
+ qp->sq.w_list[idx].opcode = mlx5_opcode;
+ qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+ qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+ qp->sq.w_list[idx].next = qp->sq.cur_post;
+}
+
+
+int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = &dev->mdev;
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_mr *mr;
+ struct mlx5_wqe_data_seg *dpseg;
+ struct mlx5_wqe_xrc_seg *xrc;
+ struct mlx5_bf *bf = qp->bf;
+ int uninitialized_var(size);
+ void *qend = qp->sq.qend;
+ unsigned long flags;
+ unsigned idx;
+ int err = 0;
+ int inl = 0;
+ int num_sge;
+ void *seg;
+ int nreq;
+ int i;
+ u8 next_fence = 0;
+ u8 fence;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
+ mlx5_ib_warn(dev, "\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ fence = qp->fm_cache;
+ num_sge = wr->num_sge;
+ if (unlikely(num_sge > qp->sq.max_gs)) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ switch (ibqp->qp_type) {
+ case IB_QPT_XRC_INI:
+ xrc = seg;
+ xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
+ seg += sizeof(*xrc);
+ size += sizeof(*xrc) / 16;
+ /* fall through */
+ case IB_QPT_RC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(seg, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ seg += sizeof(struct mlx5_wqe_raddr_seg);
+ size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+ mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
+ err = -ENOSYS;
+ *bad_wr = wr;
+ goto out;
+
+ case IB_WR_LOCAL_INV:
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
+ ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
+ err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+ num_sge = 0;
+ break;
+
+ case IB_WR_FAST_REG_MR:
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
+ ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+ err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+ num_sge = 0;
+ break;
+
+ case IB_WR_REG_SIG_MR:
+ qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
+ mr = to_mmr(wr->wr.sig_handover.sig_mr);
+
+ ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
+ err = set_sig_umr_wr(wr, qp, &seg, &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_UMR);
+ /*
+ * SET_PSV WQEs are not signaled and solicited
+ * on error
+ */
+ wr->send_flags &= ~IB_SEND_SIGNALED;
+ wr->send_flags |= IB_SEND_SOLICITED;
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
+ mr->sig->psv_memory.psv_idx, &seg,
+ &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_SET_PSV);
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
+ mr->sig->psv_wire.psv_idx, &seg,
+ &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_SET_PSV);
+ num_sge = 0;
+ goto skip_psv;
+
+ default:
+ break;
+ }
+ break;
+
+ case IB_QPT_UC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ set_raddr_seg(seg, wr->wr.rdma.remote_addr,
+ wr->wr.rdma.rkey);
+ seg += sizeof(struct mlx5_wqe_raddr_seg);
+ size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case IB_QPT_UD:
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ set_datagram_seg(seg, wr);
+ seg += sizeof(struct mlx5_wqe_datagram_seg);
+ size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+ if (unlikely((seg == qend)))
+ seg = mlx5_get_send_wqe(qp, 0);
+ break;
+
+ case MLX5_IB_QPT_REG_UMR:
+ if (wr->opcode != MLX5_IB_WR_UMR) {
+ err = -EINVAL;
+ mlx5_ib_warn(dev, "bad opcode\n");
+ goto out;
+ }
+ qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+ ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+ set_reg_umr_segment(seg, wr);
+ seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ if (unlikely((seg == qend)))
+ seg = mlx5_get_send_wqe(qp, 0);
+ set_reg_mkey_segment(seg, wr);
+ seg += sizeof(struct mlx5_mkey_seg);
+ size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((seg == qend)))
+ seg = mlx5_get_send_wqe(qp, 0);
+ break;
+
+ default:
+ break;
+ }
+
+ if (wr->send_flags & IB_SEND_INLINE && num_sge) {
+ int uninitialized_var(sz);
+
+ err = set_data_inl_seg(qp, wr, seg, &sz);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+ inl = 1;
+ size += sz;
+ } else {
+ dpseg = seg;
+ for (i = 0; i < num_sge; i++) {
+ if (unlikely(dpseg == qend)) {
+ seg = mlx5_get_send_wqe(qp, 0);
+ dpseg = seg;
+ }
+ if (likely(wr->sg_list[i].length)) {
+ set_data_ptr_seg(dpseg, wr->sg_list + i);
+ size += sizeof(struct mlx5_wqe_data_seg) / 16;
+ dpseg++;
+ }
+ }
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ get_fence(fence, wr), next_fence,
+ mlx5_ib_opcode[wr->opcode]);
+skip_psv:
+ if (0)
+ dump_wqe(qp, idx, size);
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->sq.head += nreq;
+
+ /* Make sure that descriptors are written before
+ * updating doorbell record and ringing the doorbell
+ */
+ wmb();
+
+ qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+ /* Make sure doorbell record is visible to the HCA before
+ * we hit doorbell */
+ wmb();
+
+ if (bf->need_lock)
+ spin_lock(&bf->lock);
+
+ /* TBD enable WC */
+ if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
+ mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
+ /* wc_wmb(); */
+ } else {
+ mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
+ MLX5_GET_DOORBELL_LOCK(&bf->lock32));
+ /* Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ mmiowb();
+ }
+ bf->offset ^= bf->buf_size;
+ if (bf->need_lock)
+ spin_unlock(&bf->lock);
+ }
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return err;
+}
+
+static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
+{
+ sig->signature = calc_sig(sig, size);
+}
+
+int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_wqe_data_seg *scat;
+ struct mlx5_rwqe_sig *sig;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i;
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ scat = get_recv_wqe(qp, ind);
+ if (qp->wq_sig)
+ scat++;
+
+ for (i = 0; i < wr->num_sge; i++)
+ set_data_ptr_seg(scat + i, wr->sg_list + i);
+
+ if (i < qp->rq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+
+ if (qp->wq_sig) {
+ sig = (struct mlx5_rwqe_sig *)scat;
+ set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
+ }
+
+ qp->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+
+ /* Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return err;
+}
+
+static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
+{
+ switch (mlx5_state) {
+ case MLX5_QP_STATE_RST: return IB_QPS_RESET;
+ case MLX5_QP_STATE_INIT: return IB_QPS_INIT;
+ case MLX5_QP_STATE_RTR: return IB_QPS_RTR;
+ case MLX5_QP_STATE_RTS: return IB_QPS_RTS;
+ case MLX5_QP_STATE_SQ_DRAINING:
+ case MLX5_QP_STATE_SQD: return IB_QPS_SQD;
+ case MLX5_QP_STATE_SQER: return IB_QPS_SQE;
+ case MLX5_QP_STATE_ERR: return IB_QPS_ERR;
+ default: return -1;
+ }
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
+{
+ switch (mlx5_mig_state) {
+ case MLX5_QP_PM_ARMED: return IB_MIG_ARMED;
+ case MLX5_QP_PM_REARM: return IB_MIG_REARM;
+ case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
+ default: return -1;
+ }
+}
+
+static int to_ib_qp_access_flags(int mlx5_flags)
+{
+ int ib_flags = 0;
+
+ if (mlx5_flags & MLX5_QP_BIT_RRE)
+ ib_flags |= IB_ACCESS_REMOTE_READ;
+ if (mlx5_flags & MLX5_QP_BIT_RWE)
+ ib_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (mlx5_flags & MLX5_QP_BIT_RAE)
+ ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
+ struct mlx5_qp_path *path)
+{
+ struct mlx5_core_dev *dev = &ibdev->mdev;
+
+ memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
+ ib_ah_attr->port_num = path->port;
+
+ if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ return;
+
+ ib_ah_attr->sl = path->sl & 0xf;
+
+ ib_ah_attr->dlid = be16_to_cpu(path->rlid);
+ ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
+ ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
+ ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
+ if (ib_ah_attr->ah_flags) {
+ ib_ah_attr->grh.sgid_index = path->mgid_index;
+ ib_ah_attr->grh.hop_limit = path->hop_limit;
+ ib_ah_attr->grh.traffic_class =
+ (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
+ ib_ah_attr->grh.flow_label =
+ be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
+ memcpy(ib_ah_attr->grh.dgid.raw,
+ path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
+ }
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_query_qp_mbox_out *outb;
+ struct mlx5_qp_context *context;
+ int mlx5_state;
+ int err = 0;
+
+ mutex_lock(&qp->mutex);
+ outb = kzalloc(sizeof(*outb), GFP_KERNEL);
+ if (!outb) {
+ err = -ENOMEM;
+ goto out;
+ }
+ context = &outb->ctx;
+ err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb));
+ if (err)
+ goto out_free;
+
+ mlx5_state = be32_to_cpu(context->flags) >> 28;
+
+ qp->state = to_ib_qp_state(mlx5_state);
+ qp_attr->qp_state = qp->state;
+ qp_attr->path_mtu = context->mtu_msgmax >> 5;
+ qp_attr->path_mig_state =
+ to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
+ qp_attr->qkey = be32_to_cpu(context->qkey);
+ qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
+ qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
+ qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
+ qp_attr->qp_access_flags =
+ to_ib_qp_access_flags(be32_to_cpu(context->params2));
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
+ qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ }
+
+ qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
+ qp_attr->port_num = context->pri_path.port;
+
+ /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+ qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
+
+ qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+
+ qp_attr->max_dest_rd_atomic =
+ 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
+ qp_attr->min_rnr_timer =
+ (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
+ qp_attr->timeout = context->pri_path.ackto_lt >> 3;
+ qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
+ qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
+ qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = qp->rq.max_gs;
+
+ if (!ibqp->uobject) {
+ qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = qp->sq.max_gs;
+ } else {
+ qp_attr->cap.max_send_wr = 0;
+ qp_attr->cap.max_send_sge = 0;
+ }
+
+ /* We don't support inline sends for kernel QPs (yet), and we
+ * don't know what userspace's value should be.
+ */
+ qp_attr->cap.max_inline_data = 0;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+ qp_init_attr->create_flags = 0;
+ if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+ qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
+ IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+
+out_free:
+ kfree(outb);
+
+out:
+ mutex_unlock(&qp->mutex);
+ return err;
+}
+
+struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_xrcd *xrcd;
+ int err;
+
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+ return ERR_PTR(-ENOSYS);
+
+ xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn);
+ if (err) {
+ kfree(xrcd);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return &xrcd->ibxrcd;
+}
+
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+ struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
+ u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
+ int err;
+
+ err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
+ return err;
+ }
+
+ kfree(xrcd);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
new file mode 100644
index 00000000000..384af6dec5e
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/srq.h>
+#include <linux/slab.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "mlx5_ib.h"
+#include "user.h"
+
+/* not supported currently */
+static int srq_signature;
+
+static void *get_wqe(struct mlx5_ib_srq *srq, int n)
+{
+ return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+}
+
+static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
+{
+ struct ib_event event;
+ struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (type) {
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n",
+ type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+ struct mlx5_create_srq_mbox_in **in,
+ struct ib_udata *udata, int buf_size, int *inlen)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_create_srq ucmd;
+ size_t ucmdlen;
+ int err;
+ int npages;
+ int page_shift;
+ int ncont;
+ u32 offset;
+
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
+ mlx5_ib_dbg(dev, "failed copy udata\n");
+ return -EFAULT;
+ }
+
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
+ srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
+
+ srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
+ 0, 0);
+ if (IS_ERR(srq->umem)) {
+ mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
+ err = PTR_ERR(srq->umem);
+ return err;
+ }
+
+ mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
+ &page_shift, &ncont, NULL);
+ err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
+ &offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+ *in = mlx5_vzalloc(*inlen);
+ if (!(*in)) {
+ err = -ENOMEM;
+ goto err_umem;
+ }
+
+ mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
+
+ err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
+ ucmd.db_addr, &srq->db);
+ if (err) {
+ mlx5_ib_dbg(dev, "map doorbell failed\n");
+ goto err_in;
+ }
+
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+
+ return 0;
+
+err_in:
+ mlx5_vfree(*in);
+
+err_umem:
+ ib_umem_release(srq->umem);
+
+ return err;
+}
+
+static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
+ struct mlx5_create_srq_mbox_in **in, int buf_size,
+ int *inlen)
+{
+ int err;
+ int i;
+ struct mlx5_wqe_srq_next_seg *next;
+ int page_shift;
+ int npages;
+
+ err = mlx5_db_alloc(&dev->mdev, &srq->db);
+ if (err) {
+ mlx5_ib_warn(dev, "alloc dbell rec failed\n");
+ return err;
+ }
+
+ *srq->db.db = 0;
+
+ if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+ mlx5_ib_dbg(dev, "buf alloc failed\n");
+ err = -ENOMEM;
+ goto err_db;
+ }
+ page_shift = srq->buf.page_shift;
+
+ srq->head = 0;
+ srq->tail = srq->msrq.max - 1;
+ srq->wqe_ctr = 0;
+
+ for (i = 0; i < srq->msrq.max; i++) {
+ next = get_wqe(srq, i);
+ next->next_wqe_index =
+ cpu_to_be16((i + 1) & (srq->msrq.max - 1));
+ }
+
+ npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
+ mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
+ buf_size, page_shift, srq->buf.npages, npages);
+ *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
+ *in = mlx5_vzalloc(*inlen);
+ if (!*in) {
+ err = -ENOMEM;
+ goto err_buf;
+ }
+ mlx5_fill_page_array(&srq->buf, (*in)->pas);
+
+ srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
+ (unsigned long)(srq->msrq.max * sizeof(u64)));
+ err = -ENOMEM;
+ goto err_in;
+ }
+ srq->wq_sig = !!srq_signature;
+
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+ return 0;
+
+err_in:
+ mlx5_vfree(*in);
+
+err_buf:
+ mlx5_buf_free(&dev->mdev, &srq->buf);
+
+err_db:
+ mlx5_db_free(&dev->mdev, &srq->db);
+ return err;
+}
+
+static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
+{
+ mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+ ib_umem_release(srq->umem);
+}
+
+
+static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
+{
+ kfree(srq->wrid);
+ mlx5_buf_free(&dev->mdev, &srq->buf);
+ mlx5_db_free(&dev->mdev, &srq->db);
+}
+
+struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_srq *srq;
+ int desc_size;
+ int buf_size;
+ int err;
+ struct mlx5_create_srq_mbox_in *uninitialized_var(in);
+ int uninitialized_var(inlen);
+ int is_xrc;
+ u32 flgs, xrcdn;
+
+ /* Sanity check SRQ size before proceeding */
+ if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) {
+ mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
+ init_attr->attr.max_wr,
+ dev->mdev.caps.max_srq_wqes);
+ return ERR_PTR(-EINVAL);
+ }
+
+ srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+ srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
+ srq->msrq.max_gs = init_attr->attr.max_sge;
+
+ desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
+ srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
+ desc_size = roundup_pow_of_two(desc_size);
+ desc_size = max_t(int, 32, desc_size);
+ srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
+ srq->msrq.wqe_shift = ilog2(desc_size);
+ buf_size = srq->msrq.max * desc_size;
+ mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
+ desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
+ srq->msrq.max_avail_gather);
+
+ if (pd->uobject)
+ err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+ else
+ err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+
+ if (err) {
+ mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
+ pd->uobject ? "user" : "kernel", err);
+ goto err_srq;
+ }
+
+ is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+ in->ctx.state_log_sz = ilog2(srq->msrq.max);
+ flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
+ xrcdn = 0;
+ if (is_xrc) {
+ xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
+ in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
+ } else if (init_attr->srq_type == IB_SRQT_BASIC) {
+ xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
+ in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
+ }
+
+ in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
+
+ in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
+ in->ctx.db_record = cpu_to_be64(srq->db.dma);
+ err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen);
+ mlx5_vfree(in);
+ if (err) {
+ mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
+ goto err_usr_kern_srq;
+ }
+
+ mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
+
+ srq->msrq.event = mlx5_ib_srq_event;
+ srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
+
+ if (pd->uobject)
+ if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
+ mlx5_ib_dbg(dev, "copy to user failed\n");
+ err = -EFAULT;
+ goto err_core;
+ }
+
+ init_attr->attr.max_wr = srq->msrq.max - 1;
+
+ return &srq->ibsrq;
+
+err_core:
+ mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
+
+err_usr_kern_srq:
+ if (pd->uobject)
+ destroy_srq_user(pd, srq);
+ else
+ destroy_srq_kernel(dev, srq);
+
+err_srq:
+ kfree(srq);
+
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+ int ret;
+
+ /* We don't support resizing SRQs yet */
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ if (attr->srq_limit >= srq->msrq.max)
+ return -EINVAL;
+
+ mutex_lock(&srq->mutex);
+ ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1);
+ mutex_unlock(&srq->mutex);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
+ struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+ int ret;
+ struct mlx5_query_srq_mbox_out *out;
+
+ out = kzalloc(sizeof(*out), GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out);
+ if (ret)
+ goto out_box;
+
+ srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
+ srq_attr->max_wr = srq->msrq.max - 1;
+ srq_attr->max_sge = srq->msrq.max_gs;
+
+out_box:
+ kfree(out);
+ return ret;
+}
+
+int mlx5_ib_destroy_srq(struct ib_srq *srq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(srq->device);
+ struct mlx5_ib_srq *msrq = to_msrq(srq);
+
+ mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq);
+
+ if (srq->uobject) {
+ mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
+ ib_umem_release(msrq->umem);
+ } else {
+ destroy_srq_kernel(dev, msrq);
+ }
+
+ kfree(srq);
+ return 0;
+}
+
+void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
+{
+ struct mlx5_wqe_srq_next_seg *next;
+
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ next = get_wqe(srq, srq->tail);
+ next->next_wqe_index = cpu_to_be16(wqe_index);
+ srq->tail = wqe_index;
+
+ spin_unlock(&srq->lock);
+}
+
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct mlx5_ib_srq *srq = to_msrq(ibsrq);
+ struct mlx5_wqe_srq_next_seg *next;
+ struct mlx5_wqe_data_seg *scat;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int i;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely(srq->head == srq->tail)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ srq->wrid[srq->head] = wr->wr_id;
+
+ next = get_wqe(srq, srq->head);
+ srq->head = be16_to_cpu(next->next_wqe_index);
+ scat = (struct mlx5_wqe_data_seg *)(next + 1);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
+ scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
+ scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->msrq.max_avail_gather) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ scat[i].addr = 0;
+ }
+ }
+
+ if (likely(nreq)) {
+ srq->wqe_ctr += nreq;
+
+ /* Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *srq->db.db = cpu_to_be32(srq->wqe_ctr);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
new file mode 100644
index 00000000000..d0ba264ac1e
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_USER_H
+#define MLX5_IB_USER_H
+
+#include <linux/types.h>
+
+enum {
+ MLX5_QP_FLAG_SIGNATURE = 1 << 0,
+ MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
+};
+
+enum {
+ MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
+};
+
+
+/* Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MLX5_IB_UVERBS_ABI_VERSION 1
+
+/* Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mlx5_ib_alloc_ucontext_req {
+ __u32 total_num_uuars;
+ __u32 num_low_latency_uuars;
+};
+
+struct mlx5_ib_alloc_ucontext_req_v2 {
+ __u32 total_num_uuars;
+ __u32 num_low_latency_uuars;
+ __u32 flags;
+ __u32 reserved;
+};
+
+struct mlx5_ib_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 bf_reg_size;
+ __u32 tot_uuars;
+ __u32 cache_line_size;
+ __u16 max_sq_desc_sz;
+ __u16 max_rq_desc_sz;
+ __u32 max_send_wqebb;
+ __u32 max_recv_wr;
+ __u32 max_srq_recv_wr;
+ __u16 num_ports;
+ __u16 reserved;
+};
+
+struct mlx5_ib_alloc_pd_resp {
+ __u32 pdn;
+};
+
+struct mlx5_ib_create_cq {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u32 cqe_size;
+ __u32 reserved; /* explicit padding (optional on i386) */
+};
+
+struct mlx5_ib_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mlx5_ib_resize_cq {
+ __u64 buf_addr;
+ __u16 cqe_size;
+ __u16 reserved0;
+ __u32 reserved1;
+};
+
+struct mlx5_ib_create_srq {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u32 flags;
+ __u32 reserved; /* explicit padding (optional on i386) */
+};
+
+struct mlx5_ib_create_srq_resp {
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct mlx5_ib_create_qp {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u32 sq_wqe_count;
+ __u32 rq_wqe_count;
+ __u32 rq_wqe_shift;
+ __u32 flags;
+};
+
+struct mlx5_ib_create_qp_resp {
+ __u32 uuar_index;
+};
+#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index 03efc074967..da314c3fec2 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -7,7 +7,7 @@ config INFINIBAND_MTHCA
("Tavor") and the MT25208 PCI Express HCA ("Arbel").
config INFINIBAND_MTHCA_DEBUG
- bool "Verbose debugging output" if EMBEDDED
+ bool "Verbose debugging output" if EXPERT
depends on INFINIBAND_MTHCA
default y
---help---
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index c5ccc2daab6..b4e0cf4e95c 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -211,7 +211,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
if (!buf->direct.buf)
return -ENOMEM;
- pci_unmap_addr_set(&buf->direct, mapping, t);
+ dma_unmap_addr_set(&buf->direct, mapping, t);
memset(buf->direct.buf, 0, size);
@@ -251,7 +251,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
goto err_free;
dma_list[i] = t;
- pci_unmap_addr_set(&buf->page_list[i], mapping, t);
+ dma_unmap_addr_set(&buf->page_list[i], mapping, t);
clear_page(buf->page_list[i].buf);
}
@@ -289,12 +289,12 @@ void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf,
if (is_direct)
dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
- pci_unmap_addr(&buf->direct, mapping));
+ dma_unmap_addr(&buf->direct, mapping));
else {
for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
buf->page_list[i].buf,
- pci_unmap_addr(&buf->page_list[i],
+ dma_unmap_addr(&buf->page_list[i],
mapping));
kfree(buf->page_list);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index cc440f90000..712d2a30fbe 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -31,6 +31,7 @@
*/
#include <linux/jiffies.h>
+#include <linux/module.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
@@ -68,11 +69,16 @@ static void catas_reset(struct work_struct *work)
spin_unlock_irq(&catas_lock);
list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+ struct pci_dev *pdev = dev->pdev;
ret = __mthca_restart_one(dev->pdev);
+ /* 'dev' now is not valid */
if (ret)
- mthca_err(dev, "Reset failed (%d)\n", ret);
- else
- mthca_dbg(dev, "Reset succeeded\n");
+ printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
+ pci_name(pdev), ret);
+ else {
+ struct mthca_dev *d = pci_get_drvdata(pdev);
+ mthca_dbg(d, "Reset succeeded\n");
+ }
}
mutex_unlock(&mthca_device_mutex);
@@ -88,6 +94,7 @@ static void handle_catas(struct mthca_dev *dev)
event.device = &dev->ib_dev;
event.event = IB_EVENT_DEVICE_FATAL;
event.element.port_num = 0;
+ dev->active = false;
ib_dispatch_event(&event);
@@ -140,7 +147,7 @@ static void poll_catas(unsigned long dev_ptr)
void mthca_start_catas_poll(struct mthca_dev *dev)
{
- unsigned long addr;
+ phys_addr_t addr;
init_timer(&dev->catas_err.timer);
dev->catas_err.map = NULL;
@@ -149,18 +156,11 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
((pci_resource_len(dev->pdev, 0) - 1) &
dev->catas_err.addr);
- if (!request_mem_region(addr, dev->catas_err.size * 4,
- DRV_NAME)) {
- mthca_warn(dev, "couldn't request catastrophic error region "
- "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
- return;
- }
-
dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
if (!dev->catas_err.map) {
mthca_warn(dev, "couldn't map catastrophic error region "
- "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
- release_mem_region(addr, dev->catas_err.size * 4);
+ "at 0x%llx/0x%x\n", (unsigned long long) addr,
+ dev->catas_err.size * 4);
return;
}
@@ -175,13 +175,8 @@ void mthca_stop_catas_poll(struct mthca_dev *dev)
{
del_timer_sync(&dev->catas_err.timer);
- if (dev->catas_err.map) {
+ if (dev->catas_err.map)
iounmap(dev->catas_err.map);
- release_mem_region(pci_resource_start(dev->pdev, 0) +
- ((pci_resource_len(dev->pdev, 0) - 1) &
- dev->catas_err.addr),
- dev->catas_err.size * 4);
- }
spin_lock_irq(&catas_lock);
list_del(&dev->catas_err.list);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index c33e1c53c79..9d3e5c1ac60 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -36,6 +36,8 @@
#include <linux/pci.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
#include <asm/io.h>
#include <rdma/ib_mad.h>
@@ -157,13 +159,15 @@ enum {
enum {
CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1,
CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1,
- CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1
+ CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1,
+ CMD_TIME_CLASS_D = 60 * HZ
};
#else
enum {
CMD_TIME_CLASS_A = 60 * HZ,
CMD_TIME_CLASS_B = 60 * HZ,
- CMD_TIME_CLASS_C = 60 * HZ
+ CMD_TIME_CLASS_C = 60 * HZ,
+ CMD_TIME_CLASS_D = 60 * HZ
};
#endif
@@ -298,6 +302,38 @@ static int mthca_cmd_post(struct mthca_dev *dev,
return err;
}
+
+static int mthca_status_to_errno(u8 status)
+{
+ static const int trans_table[] = {
+ [MTHCA_CMD_STAT_INTERNAL_ERR] = -EIO,
+ [MTHCA_CMD_STAT_BAD_OP] = -EPERM,
+ [MTHCA_CMD_STAT_BAD_PARAM] = -EINVAL,
+ [MTHCA_CMD_STAT_BAD_SYS_STATE] = -ENXIO,
+ [MTHCA_CMD_STAT_BAD_RESOURCE] = -EBADF,
+ [MTHCA_CMD_STAT_RESOURCE_BUSY] = -EBUSY,
+ [MTHCA_CMD_STAT_DDR_MEM_ERR] = -ENOMEM,
+ [MTHCA_CMD_STAT_EXCEED_LIM] = -ENOMEM,
+ [MTHCA_CMD_STAT_BAD_RES_STATE] = -EBADF,
+ [MTHCA_CMD_STAT_BAD_INDEX] = -EBADF,
+ [MTHCA_CMD_STAT_BAD_NVMEM] = -EFAULT,
+ [MTHCA_CMD_STAT_BAD_QPEE_STATE] = -EINVAL,
+ [MTHCA_CMD_STAT_BAD_SEG_PARAM] = -EFAULT,
+ [MTHCA_CMD_STAT_REG_BOUND] = -EBUSY,
+ [MTHCA_CMD_STAT_LAM_NOT_PRE] = -EAGAIN,
+ [MTHCA_CMD_STAT_BAD_PKT] = -EBADMSG,
+ [MTHCA_CMD_STAT_BAD_SIZE] = -ENOMEM,
+ };
+
+ if (status >= ARRAY_SIZE(trans_table) ||
+ (status != MTHCA_CMD_STAT_OK
+ && trans_table[status] == 0))
+ return -EINVAL;
+
+ return trans_table[status];
+}
+
+
static int mthca_cmd_poll(struct mthca_dev *dev,
u64 in_param,
u64 *out_param,
@@ -305,11 +341,11 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
int err = 0;
unsigned long end;
+ u8 status;
down(&dev->cmd.poll_sem);
@@ -338,7 +374,12 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
- *status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
+ status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
+ if (status) {
+ mthca_dbg(dev, "Command %02x completed with status %02x\n",
+ op, status);
+ err = mthca_status_to_errno(status);
+ }
out:
up(&dev->cmd.poll_sem);
@@ -371,8 +412,7 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
int err = 0;
struct mthca_cmd_context *context;
@@ -404,10 +444,11 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
if (err)
goto out;
- *status = context->status;
- if (*status)
+ if (context->status) {
mthca_dbg(dev, "Command %02x completed with status %02x\n",
- op, *status);
+ op, context->status);
+ err = mthca_status_to_errno(context->status);
+ }
if (out_is_imm)
*out_param = context->out_param;
@@ -429,17 +470,16 @@ static int mthca_cmd_box(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
return mthca_cmd_wait(dev, in_param, &out_param, 0,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
else
return mthca_cmd_poll(dev, in_param, &out_param, 0,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
}
/* Invoke a command with no output parameter */
@@ -448,11 +488,10 @@ static int mthca_cmd(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
return mthca_cmd_box(dev, in_param, 0, in_modifier,
- op_modifier, op, timeout, status);
+ op_modifier, op, timeout);
}
/*
@@ -466,17 +505,16 @@ static int mthca_cmd_imm(struct mthca_dev *dev,
u32 in_modifier,
u8 op_modifier,
u16 op,
- unsigned long timeout,
- u8 *status)
+ unsigned long timeout)
{
if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS)
return mthca_cmd_wait(dev, in_param, out_param, 1,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
else
return mthca_cmd_poll(dev, in_param, out_param, 1,
in_modifier, op_modifier, op,
- timeout, status);
+ timeout);
}
int mthca_cmd_init(struct mthca_dev *dev)
@@ -593,14 +631,14 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
kfree(mailbox);
}
-int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
+int mthca_SYS_EN(struct mthca_dev *dev)
{
u64 out;
int ret;
- ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status);
+ ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D);
- if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR)
+ if (ret == -ENOMEM)
mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, "
"sladdr=%d, SPD source=%s\n",
(int) (out >> 6) & 0xf, (int) (out >> 4) & 3,
@@ -609,13 +647,13 @@ int mthca_SYS_EN(struct mthca_dev *dev, u8 *status)
return ret;
}
-int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status)
+int mthca_SYS_DIS(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C);
}
static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
- u64 virt, u8 *status)
+ u64 virt)
{
struct mthca_mailbox *mailbox;
struct mthca_icm_iter iter;
@@ -663,8 +701,8 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
if (++nent == MTHCA_MAILBOX_SIZE / 16) {
err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
- CMD_TIME_CLASS_B, status);
- if (err || *status)
+ CMD_TIME_CLASS_B);
+ if (err)
goto out;
nent = 0;
}
@@ -673,7 +711,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
if (nent)
err = mthca_cmd(dev, mailbox->dma, nent, 0, op,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
switch (op) {
case CMD_MAP_FA:
@@ -693,24 +731,24 @@ out:
return err;
}
-int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm)
{
- return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1, status);
+ return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1);
}
-int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status)
+int mthca_UNMAP_FA(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B);
}
-int mthca_RUN_FW(struct mthca_dev *dev, u8 *status)
+int mthca_RUN_FW(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A);
}
static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
{
- unsigned long addr;
+ phys_addr_t addr;
u16 max_off = 0;
int i;
@@ -734,7 +772,7 @@ static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base)
mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n");
}
-int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
+int mthca_QUERY_FW(struct mthca_dev *dev)
{
struct mthca_mailbox *mailbox;
u32 *outbox;
@@ -768,7 +806,7 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status)
outbox = mailbox->buf;
err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
if (err)
goto out;
@@ -840,7 +878,7 @@ out:
return err;
}
-int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
+int mthca_ENABLE_LAM(struct mthca_dev *dev)
{
struct mthca_mailbox *mailbox;
u8 info;
@@ -861,14 +899,11 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status)
outbox = mailbox->buf;
err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM,
- CMD_TIME_CLASS_C, status);
+ CMD_TIME_CLASS_C);
if (err)
goto out;
- if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE)
- goto out;
-
MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET);
MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET);
MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET);
@@ -893,12 +928,12 @@ out:
return err;
}
-int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status)
+int mthca_DISABLE_LAM(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C);
}
-int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
+int mthca_QUERY_DDR(struct mthca_dev *dev)
{
struct mthca_mailbox *mailbox;
u8 info;
@@ -919,7 +954,7 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status)
outbox = mailbox->buf;
err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
if (err)
goto out;
@@ -949,7 +984,7 @@ out:
}
int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
- struct mthca_dev_lim *dev_lim, u8 *status)
+ struct mthca_dev_lim *dev_lim)
{
struct mthca_mailbox *mailbox;
u32 *outbox;
@@ -1025,7 +1060,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
outbox = mailbox->buf;
err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
if (err)
goto out;
@@ -1057,7 +1092,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
if (mthca_is_memfree(dev))
dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
- MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
+ dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size;
else
dev_lim->reserved_mtts = 1 << (field >> 4);
MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
@@ -1229,7 +1264,7 @@ static void get_board_id(void *vsd, char *board_id)
}
int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
- struct mthca_adapter *adapter, u8 *status)
+ struct mthca_adapter *adapter)
{
struct mthca_mailbox *mailbox;
u32 *outbox;
@@ -1248,7 +1283,7 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
outbox = mailbox->buf;
err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
if (err)
goto out;
@@ -1272,8 +1307,7 @@ out:
}
int mthca_INIT_HCA(struct mthca_dev *dev,
- struct mthca_init_hca_param *param,
- u8 *status)
+ struct mthca_init_hca_param *param)
{
struct mthca_mailbox *mailbox;
__be32 *inbox;
@@ -1390,7 +1424,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET);
}
- err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status);
+ err = mthca_cmd(dev, mailbox->dma, 0, 0,
+ CMD_INIT_HCA, CMD_TIME_CLASS_D);
mthca_free_mailbox(dev, mailbox);
return err;
@@ -1398,7 +1433,7 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
int mthca_INIT_IB(struct mthca_dev *dev,
struct mthca_init_ib_param *param,
- int port, u8 *status)
+ int port)
{
struct mthca_mailbox *mailbox;
u32 *inbox;
@@ -1442,24 +1477,24 @@ int mthca_INIT_IB(struct mthca_dev *dev,
MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET);
err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status)
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port)
{
- return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status);
+ return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, CMD_TIME_CLASS_A);
}
-int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status)
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic)
{
- return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status);
+ return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, CMD_TIME_CLASS_C);
}
int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
- int port, u8 *status)
+ int port)
{
struct mthca_mailbox *mailbox;
u32 *inbox;
@@ -1488,18 +1523,18 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET);
err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
mthca_free_mailbox(dev, mailbox);
return err;
}
-int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status)
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt)
{
- return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status);
+ return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt);
}
-int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status)
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt)
{
struct mthca_mailbox *mailbox;
__be64 *inbox;
@@ -1514,7 +1549,7 @@ int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status
inbox[1] = cpu_to_be64(dma_addr);
err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
mthca_free_mailbox(dev, mailbox);
@@ -1525,31 +1560,31 @@ int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status
return err;
}
-int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status)
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count)
{
mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n",
page_count, (unsigned long long) virt);
- return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, virt, page_count, 0,
+ CMD_UNMAP_ICM, CMD_TIME_CLASS_B);
}
-int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status)
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm)
{
- return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1, status);
+ return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1);
}
-int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status)
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B);
}
-int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
- u8 *status)
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages)
{
- int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE,
- CMD_TIME_CLASS_A, status);
+ int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0,
+ 0, CMD_SET_ICM_SIZE, CMD_TIME_CLASS_A);
- if (ret || status)
+ if (ret)
return ret;
/*
@@ -1563,74 +1598,73 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
}
int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int mpt_index, u8 *status)
+ int mpt_index)
{
return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
}
int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int mpt_index, u8 *status)
+ int mpt_index)
{
return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
!mailbox, CMD_HW2SW_MPT,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
}
int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int num_mtt, u8 *status)
+ int num_mtt)
{
return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
}
-int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status)
+int mthca_SYNC_TPT(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status);
+ return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B);
}
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
- int eq_num, u8 *status)
+ int eq_num)
{
mthca_dbg(dev, "%s mask %016llx for eqn %d\n",
unmap ? "Clearing" : "Setting",
(unsigned long long) event_mask, eq_num);
return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num,
- 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status);
+ 0, CMD_MAP_EQ, CMD_TIME_CLASS_B);
}
int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int eq_num, u8 *status)
+ int eq_num)
{
return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
}
int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int eq_num, u8 *status)
+ int eq_num)
{
return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0,
CMD_HW2SW_EQ,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
}
int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int cq_num, u8 *status)
+ int cq_num)
{
return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
}
int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int cq_num, u8 *status)
+ int cq_num)
{
return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0,
CMD_HW2SW_CQ,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
}
-int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
- u8 *status)
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size)
{
struct mthca_mailbox *mailbox;
__be32 *inbox;
@@ -1654,44 +1688,43 @@ int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET);
err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int srq_num, u8 *status)
+ int srq_num)
{
return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
}
int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int srq_num, u8 *status)
+ int srq_num)
{
return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0,
CMD_HW2SW_SRQ,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
}
int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
- struct mthca_mailbox *mailbox, u8 *status)
+ struct mthca_mailbox *mailbox)
{
return mthca_cmd_box(dev, 0, mailbox->dma, num, 0,
- CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status);
+ CMD_QUERY_SRQ, CMD_TIME_CLASS_A);
}
-int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status)
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit)
{
return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
}
int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
enum ib_qp_state next, u32 num, int is_ee,
- struct mthca_mailbox *mailbox, u32 optmask,
- u8 *status)
+ struct mthca_mailbox *mailbox, u32 optmask)
{
static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
@@ -1752,7 +1785,7 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
(!!is_ee << 24) | num, op_mod,
- op[cur][next], CMD_TIME_CLASS_C, status);
+ op[cur][next], CMD_TIME_CLASS_C);
if (0 && mailbox) {
int i;
@@ -1786,21 +1819,20 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
}
err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num,
- op_mod, op[cur][next], CMD_TIME_CLASS_C, status);
+ op_mod, op[cur][next], CMD_TIME_CLASS_C);
}
return err;
}
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
- struct mthca_mailbox *mailbox, u8 *status)
+ struct mthca_mailbox *mailbox)
{
return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0,
- CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status);
+ CMD_QUERY_QPEE, CMD_TIME_CLASS_A);
}
-int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
- u8 *status)
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn)
{
u8 op_mod;
@@ -1814,7 +1846,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
case IB_QPT_RAW_IPV6:
op_mod = 2;
break;
- case IB_QPT_RAW_ETY:
+ case IB_QPT_RAW_ETHERTYPE:
op_mod = 3;
break;
default:
@@ -1822,12 +1854,12 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
}
return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP,
- CMD_TIME_CLASS_B, status);
+ CMD_TIME_CLASS_B);
}
int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
- void *in_mad, void *response_mad, u8 *status)
+ void *in_mad, void *response_mad)
{
struct mthca_mailbox *inmailbox, *outmailbox;
void *inbox;
@@ -1894,9 +1926,9 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
in_modifier, op_modifier,
- CMD_MAD_IFC, CMD_TIME_CLASS_C, status);
+ CMD_MAD_IFC, CMD_TIME_CLASS_C);
- if (!err && !*status)
+ if (!err)
memcpy(response_mad, outmailbox->buf, 256);
mthca_free_mailbox(dev, inmailbox);
@@ -1905,33 +1937,33 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
}
int mthca_READ_MGM(struct mthca_dev *dev, int index,
- struct mthca_mailbox *mailbox, u8 *status)
+ struct mthca_mailbox *mailbox)
{
return mthca_cmd_box(dev, 0, mailbox->dma, index, 0,
- CMD_READ_MGM, CMD_TIME_CLASS_A, status);
+ CMD_READ_MGM, CMD_TIME_CLASS_A);
}
int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
- struct mthca_mailbox *mailbox, u8 *status)
+ struct mthca_mailbox *mailbox)
{
return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
}
int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- u16 *hash, u8 *status)
+ u16 *hash)
{
u64 imm;
int err;
err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
- CMD_TIME_CLASS_A, status);
+ CMD_TIME_CLASS_A);
*hash = imm;
return err;
}
-int mthca_NOP(struct mthca_dev *dev, u8 *status)
+int mthca_NOP(struct mthca_dev *dev)
{
- return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100), status);
+ return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100));
}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h
index 6efd3265f24..f952244c54d 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.h
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.h
@@ -252,79 +252,74 @@ struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
gfp_t gfp_mask);
void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
-int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);
-int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status);
-int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
-int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status);
-int mthca_RUN_FW(struct mthca_dev *dev, u8 *status);
-int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status);
-int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status);
-int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status);
-int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status);
+int mthca_SYS_EN(struct mthca_dev *dev);
+int mthca_SYS_DIS(struct mthca_dev *dev);
+int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm);
+int mthca_UNMAP_FA(struct mthca_dev *dev);
+int mthca_RUN_FW(struct mthca_dev *dev);
+int mthca_QUERY_FW(struct mthca_dev *dev);
+int mthca_ENABLE_LAM(struct mthca_dev *dev);
+int mthca_DISABLE_LAM(struct mthca_dev *dev);
+int mthca_QUERY_DDR(struct mthca_dev *dev);
int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
- struct mthca_dev_lim *dev_lim, u8 *status);
+ struct mthca_dev_lim *dev_lim);
int mthca_QUERY_ADAPTER(struct mthca_dev *dev,
- struct mthca_adapter *adapter, u8 *status);
+ struct mthca_adapter *adapter);
int mthca_INIT_HCA(struct mthca_dev *dev,
- struct mthca_init_hca_param *param,
- u8 *status);
+ struct mthca_init_hca_param *param);
int mthca_INIT_IB(struct mthca_dev *dev,
struct mthca_init_ib_param *param,
- int port, u8 *status);
-int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status);
-int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status);
+ int port);
+int mthca_CLOSE_IB(struct mthca_dev *dev, int port);
+int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic);
int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param,
- int port, u8 *status);
-int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status);
-int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status);
-int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status);
-int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status);
-int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status);
-int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages,
- u8 *status);
+ int port);
+int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt);
+int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt);
+int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count);
+int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm);
+int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev);
+int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages);
int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int mpt_index, u8 *status);
+ int mpt_index);
int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int mpt_index, u8 *status);
+ int mpt_index);
int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int num_mtt, u8 *status);
-int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status);
+ int num_mtt);
+int mthca_SYNC_TPT(struct mthca_dev *dev);
int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap,
- int eq_num, u8 *status);
+ int eq_num);
int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int eq_num, u8 *status);
+ int eq_num);
int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int eq_num, u8 *status);
+ int eq_num);
int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int cq_num, u8 *status);
+ int cq_num);
int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int cq_num, u8 *status);
-int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
- u8 *status);
+ int cq_num);
+int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size);
int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int srq_num, u8 *status);
+ int srq_num);
int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- int srq_num, u8 *status);
+ int srq_num);
int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num,
- struct mthca_mailbox *mailbox, u8 *status);
-int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status);
+ struct mthca_mailbox *mailbox);
+int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit);
int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur,
enum ib_qp_state next, u32 num, int is_ee,
- struct mthca_mailbox *mailbox, u32 optmask,
- u8 *status);
+ struct mthca_mailbox *mailbox, u32 optmask);
int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee,
- struct mthca_mailbox *mailbox, u8 *status);
-int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
- u8 *status);
+ struct mthca_mailbox *mailbox);
+int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn);
int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
- void *in_mad, void *response_mad, u8 *status);
+ void *in_mad, void *response_mad);
int mthca_READ_MGM(struct mthca_dev *dev, int index,
- struct mthca_mailbox *mailbox, u8 *status);
+ struct mthca_mailbox *mailbox);
int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
- struct mthca_mailbox *mailbox, u8 *status);
+ struct mthca_mailbox *mailbox);
int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
- u16 *hash, u8 *status);
-int mthca_NOP(struct mthca_dev *dev, u8 *status);
+ u16 *hash);
+int mthca_NOP(struct mthca_dev *dev);
#endif /* MTHCA_CMD_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h
index 75671f75cac..155bc66395b 100644
--- a/drivers/infiniband/hw/mthca/mthca_config_reg.h
+++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h
@@ -34,8 +34,6 @@
#ifndef MTHCA_CONFIG_REG_H
#define MTHCA_CONFIG_REG_H
-#include <asm/page.h>
-
#define MTHCA_HCR_BASE 0x80680
#define MTHCA_HCR_SIZE 0x0001c
#define MTHCA_ECR_BASE 0x80700
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index d9f4735c2b3..40ba8333815 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -34,6 +34,7 @@
* SOFTWARE.
*/
+#include <linux/gfp.h>
#include <linux/hardirq.h>
#include <linux/sched.h>
@@ -642,7 +643,8 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0;
checksum = (be32_to_cpu(cqe->rqpn) >> 24) |
((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00);
- entry->csum_ok = (cqe->sl_ipok & 1 && checksum == 0xffff);
+ entry->wc_flags |= (cqe->sl_ipok & 1 && checksum == 0xffff) ?
+ IB_WC_IP_CSUM_OK : 0;
}
entry->status = IB_WC_SUCCESS;
@@ -778,7 +780,6 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
struct mthca_mailbox *mailbox;
struct mthca_cq_context *cq_context;
int err = -ENOMEM;
- u8 status;
cq->ibcq.cqe = nent - 1;
cq->is_kernel = !ctx;
@@ -846,19 +847,12 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context->state_db = cpu_to_be32(cq->arm_db_index);
}
- err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status);
+ err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn);
if (err) {
mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err);
goto err_out_free_mr;
}
- if (status) {
- mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_free_mr;
- }
-
spin_lock_irq(&dev->cq_table.lock);
if (mthca_array_set(&dev->cq_table.cq,
cq->cqn & (dev->limits.num_cqs - 1),
@@ -914,7 +908,6 @@ void mthca_free_cq(struct mthca_dev *dev,
{
struct mthca_mailbox *mailbox;
int err;
- u8 status;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
@@ -922,11 +915,9 @@ void mthca_free_cq(struct mthca_dev *dev,
return;
}
- err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status);
+ err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn);
if (err)
mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err);
- else if (status)
- mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status);
if (0) {
__be32 *ctx = mailbox->buf;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index ee4d073c889..7e6a6d64ad4 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -159,6 +159,7 @@ struct mthca_limits {
int reserved_eqs;
int num_mpts;
int num_mtt_segs;
+ int mtt_seg_size;
int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
@@ -202,6 +203,7 @@ struct mthca_pd_table {
struct mthca_buddy {
unsigned long **bits;
+ int *num_free;
int max_order;
spinlock_t lock;
};
@@ -355,6 +357,7 @@ struct mthca_dev {
struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
spinlock_t sm_lock;
u8 rate[MTHCA_MAX_PORTS];
+ bool active;
};
#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 4e36aa7cb3d..69020173899 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -34,6 +34,7 @@
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
+#include <linux/slab.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -356,7 +357,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
eqe->type, eqe->subtype, eq->eqn);
break;
- };
+ }
set_eqe_hw(eqe);
++eq->cons_index;
@@ -473,7 +474,6 @@ static int mthca_create_eq(struct mthca_dev *dev,
struct mthca_eq_context *eq_context;
int err = -ENOMEM;
int i;
- u8 status;
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
@@ -503,7 +503,7 @@ static int mthca_create_eq(struct mthca_dev *dev,
goto err_out_free_pages;
dma_list[i] = t;
- pci_unmap_addr_set(&eq->page_list[i], mapping, t);
+ dma_unmap_addr_set(&eq->page_list[i], mapping, t);
clear_page(eq->page_list[i].buf);
}
@@ -542,15 +542,9 @@ static int mthca_create_eq(struct mthca_dev *dev,
eq_context->intr = intr;
eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey);
- err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status);
+ err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn);
if (err) {
- mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err);
- goto err_out_free_mr;
- }
- if (status) {
- mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n",
- status);
- err = -EINVAL;
+ mthca_warn(dev, "SW2HW_EQ returned %d\n", err);
goto err_out_free_mr;
}
@@ -578,7 +572,7 @@ static int mthca_create_eq(struct mthca_dev *dev,
if (eq->page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
- pci_unmap_addr(&eq->page_list[i],
+ dma_unmap_addr(&eq->page_list[i],
mapping));
mthca_free_mailbox(dev, mailbox);
@@ -596,7 +590,6 @@ static void mthca_free_eq(struct mthca_dev *dev,
{
struct mthca_mailbox *mailbox;
int err;
- u8 status;
int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
PAGE_SIZE;
int i;
@@ -605,11 +598,9 @@ static void mthca_free_eq(struct mthca_dev *dev,
if (IS_ERR(mailbox))
return;
- err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status);
+ err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn);
if (err)
- mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err);
- if (status)
- mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status);
+ mthca_warn(dev, "HW2SW_EQ returned %d\n", err);
dev->eq_table.arm_mask &= ~eq->eqn_mask;
@@ -628,7 +619,7 @@ static void mthca_free_eq(struct mthca_dev *dev,
for (i = 0; i < npages; ++i)
pci_free_consistent(dev->pdev, PAGE_SIZE,
eq->page_list[i].buf,
- pci_unmap_addr(&eq->page_list[i], mapping));
+ dma_unmap_addr(&eq->page_list[i], mapping));
kfree(eq->page_list);
mthca_free_mailbox(dev, mailbox);
@@ -641,38 +632,26 @@ static void mthca_free_irqs(struct mthca_dev *dev)
if (dev->eq_table.have_irq)
free_irq(dev->pdev->irq, dev);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
- if (dev->eq_table.eq[i].have_irq)
+ if (dev->eq_table.eq[i].have_irq) {
free_irq(dev->eq_table.eq[i].msi_x_vector,
dev->eq_table.eq + i);
+ dev->eq_table.eq[i].have_irq = 0;
+ }
}
static int mthca_map_reg(struct mthca_dev *dev,
unsigned long offset, unsigned long size,
void __iomem **map)
{
- unsigned long base = pci_resource_start(dev->pdev, 0);
-
- if (!request_mem_region(base + offset, size, DRV_NAME))
- return -EBUSY;
+ phys_addr_t base = pci_resource_start(dev->pdev, 0);
*map = ioremap(base + offset, size);
- if (!*map) {
- release_mem_region(base + offset, size);
+ if (!*map)
return -ENOMEM;
- }
return 0;
}
-static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset,
- unsigned long size, void __iomem *map)
-{
- unsigned long base = pci_resource_start(dev->pdev, 0);
-
- release_mem_region(base + offset, size);
- iounmap(map);
-}
-
static int mthca_map_eq_regs(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev)) {
@@ -699,9 +678,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
dev->fw.arbel.eq_arm_base) + 4, 4,
&dev->eq_regs.arbel.eq_arm)) {
mthca_err(dev, "Couldn't map EQ arm register, aborting.\n");
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->clr_base);
return -ENOMEM;
}
@@ -710,12 +687,8 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
MTHCA_EQ_SET_CI_SIZE,
&dev->eq_regs.arbel.eq_set_ci_base)) {
mthca_err(dev, "Couldn't map EQ CI register, aborting.\n");
- mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.eq_arm_base) + 4, 4,
- dev->eq_regs.arbel.eq_arm);
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->eq_regs.arbel.eq_arm);
+ iounmap(dev->clr_base);
return -ENOMEM;
}
} else {
@@ -731,8 +704,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
&dev->eq_regs.tavor.ecr_base)) {
mthca_err(dev, "Couldn't map ecr register, "
"aborting.\n");
- mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->clr_base);
return -ENOMEM;
}
}
@@ -744,29 +716,18 @@ static int mthca_map_eq_regs(struct mthca_dev *dev)
static void mthca_unmap_eq_regs(struct mthca_dev *dev)
{
if (mthca_is_memfree(dev)) {
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.eq_set_ci_base,
- MTHCA_EQ_SET_CI_SIZE,
- dev->eq_regs.arbel.eq_set_ci_base);
- mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.eq_arm_base) + 4, 4,
- dev->eq_regs.arbel.eq_arm);
- mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) &
- dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->eq_regs.arbel.eq_set_ci_base);
+ iounmap(dev->eq_regs.arbel.eq_arm);
+ iounmap(dev->clr_base);
} else {
- mthca_unmap_reg(dev, MTHCA_ECR_BASE,
- MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE,
- dev->eq_regs.tavor.ecr_base);
- mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE,
- dev->clr_base);
+ iounmap(dev->eq_regs.tavor.ecr_base);
+ iounmap(dev->clr_base);
}
}
int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
{
int ret;
- u8 status;
/*
* We assume that mapping one page is enough for the whole EQ
@@ -780,14 +741,12 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
return -ENOMEM;
dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->eq_table.icm_dma)) {
+ if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
__free_page(dev->eq_table.icm_page);
return -ENOMEM;
}
- ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status);
- if (!ret && status)
- ret = -EINVAL;
+ ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt);
if (ret) {
pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
@@ -799,9 +758,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
void mthca_unmap_eq_icm(struct mthca_dev *dev)
{
- u8 status;
-
- mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status);
+ mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1);
pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
__free_page(dev->eq_table.icm_page);
@@ -810,7 +767,6 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev)
int mthca_init_eq_table(struct mthca_dev *dev)
{
int err;
- u8 status;
u8 intr;
int i;
@@ -858,49 +814,50 @@ int mthca_init_eq_table(struct mthca_dev *dev)
if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
static const char *eq_name[] = {
- [MTHCA_EQ_COMP] = DRV_NAME " (comp)",
- [MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
- [MTHCA_EQ_CMD] = DRV_NAME " (cmd)"
+ [MTHCA_EQ_COMP] = DRV_NAME "-comp",
+ [MTHCA_EQ_ASYNC] = DRV_NAME "-async",
+ [MTHCA_EQ_CMD] = DRV_NAME "-cmd"
};
for (i = 0; i < MTHCA_NUM_EQ; ++i) {
+ snprintf(dev->eq_table.eq[i].irq_name,
+ IB_DEVICE_NAME_MAX,
+ "%s@pci:%s", eq_name[i],
+ pci_name(dev->pdev));
err = request_irq(dev->eq_table.eq[i].msi_x_vector,
mthca_is_memfree(dev) ?
mthca_arbel_msi_x_interrupt :
mthca_tavor_msi_x_interrupt,
- 0, eq_name[i], dev->eq_table.eq + i);
+ 0, dev->eq_table.eq[i].irq_name,
+ dev->eq_table.eq + i);
if (err)
goto err_out_cmd;
dev->eq_table.eq[i].have_irq = 1;
}
} else {
+ snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
+ DRV_NAME "@pci:%s", pci_name(dev->pdev));
err = request_irq(dev->pdev->irq,
mthca_is_memfree(dev) ?
mthca_arbel_interrupt :
mthca_tavor_interrupt,
- IRQF_SHARED, DRV_NAME, dev);
+ IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
if (err)
goto err_out_cmd;
dev->eq_table.have_irq = 1;
}
err = mthca_MAP_EQ(dev, async_mask(dev),
- 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
if (err)
mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err);
- if (status)
- mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n",
- dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status);
err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
- 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+ 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
if (err)
mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n",
dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err);
- if (status)
- mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n",
- dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
if (mthca_is_memfree(dev))
@@ -930,15 +887,14 @@ err_out_free:
void mthca_cleanup_eq_table(struct mthca_dev *dev)
{
- u8 status;
int i;
mthca_free_irqs(dev);
mthca_MAP_EQ(dev, async_mask(dev),
- 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status);
+ 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK,
- 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status);
+ 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn);
for (i = 0; i < MTHCA_NUM_EQ; ++i)
mthca_free_eq(dev, &dev->eq_table.eq[i]);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 640449582ab..b6f7f457fc5 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -104,7 +104,8 @@ static void update_sm_ah(struct mthca_dev *dev,
*/
static void smp_snoop(struct ib_device *ibdev,
u8 port_num,
- struct ib_mad *mad)
+ struct ib_mad *mad,
+ u16 prev_lid)
{
struct ib_event event;
@@ -114,6 +115,7 @@ static void smp_snoop(struct ib_device *ibdev,
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
struct ib_port_info *pinfo =
(struct ib_port_info *) ((struct ib_smp *) mad)->data;
+ u16 lid = be16_to_cpu(pinfo->lid);
mthca_update_rate(to_mdev(ibdev), port_num);
update_sm_ah(to_mdev(ibdev), port_num,
@@ -123,12 +125,15 @@ static void smp_snoop(struct ib_device *ibdev,
event.device = ibdev;
event.element.port_num = port_num;
- if (pinfo->clientrereg_resv_subnetto & 0x80)
+ if (pinfo->clientrereg_resv_subnetto & 0x80) {
event.event = IB_EVENT_CLIENT_REREGISTER;
- else
- event.event = IB_EVENT_LID_CHANGE;
+ ib_dispatch_event(&event);
+ }
- ib_dispatch_event(&event);
+ if (prev_lid != lid) {
+ event.event = IB_EVENT_LID_CHANGE;
+ ib_dispatch_event(&event);
+ }
}
if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
@@ -166,6 +171,8 @@ static void forward_trap(struct mthca_dev *dev,
if (agent) {
send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ return;
/*
* We rely here on the fact that MLX QPs don't use the
* address handle after the send is posted (this is
@@ -194,8 +201,9 @@ int mthca_process_mad(struct ib_device *ibdev,
struct ib_mad *out_mad)
{
int err;
- u8 status;
u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+ u16 prev_lid = 0;
+ struct ib_port_attr pattr;
/* Forward locally generated traps to the SM */
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
@@ -233,26 +241,26 @@ int mthca_process_mad(struct ib_device *ibdev,
return IB_MAD_RESULT_SUCCESS;
} else
return IB_MAD_RESULT_SUCCESS;
+ if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
+ in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
+ !ib_query_port(ibdev, port_num, &pattr))
+ prev_lid = pattr.lid;
err = mthca_MAD_IFC(to_mdev(ibdev),
mad_flags & IB_MAD_IGNORE_MKEY,
mad_flags & IB_MAD_IGNORE_BKEY,
- port_num, in_wc, in_grh, in_mad, out_mad,
- &status);
- if (err) {
- mthca_err(to_mdev(ibdev), "MAD_IFC failed\n");
- return IB_MAD_RESULT_FAILURE;
- }
- if (status == MTHCA_CMD_STAT_BAD_PKT)
+ port_num, in_wc, in_grh, in_mad, out_mad);
+ if (err == -EBADMSG)
return IB_MAD_RESULT_SUCCESS;
- if (status) {
- mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n",
- status);
+ else if (err) {
+ mthca_err(to_mdev(ibdev), "MAD_IFC returned %d\n", err);
return IB_MAD_RESULT_FAILURE;
}
if (!out_mad->mad_hdr.status) {
- smp_snoop(ibdev, port_num, in_mad);
+ smp_snoop(ibdev, port_num, in_mad, prev_lid);
node_desc_override(ibdev, out_mad);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index fb9f91b60f3..ded76c101dd 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -37,6 +37,7 @@
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
+#include <linux/gfp.h>
#include "mthca_dev.h"
#include "mthca_config_reg.h"
@@ -125,7 +126,11 @@ module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444);
MODULE_PARM_DESC(fmr_reserved_mtts,
"number of memory translation table segments reserved for FMR");
-static char mthca_version[] __devinitdata =
+static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
+module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
+MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)");
+
+static char mthca_version[] =
DRV_NAME ": Mellanox InfiniBand HCA driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -144,7 +149,7 @@ static int mthca_tune_pci(struct mthca_dev *mdev)
} else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE))
mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
- if (pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP)) {
+ if (pci_is_pcie(mdev->pdev)) {
if (pcie_set_readrq(mdev->pdev, 4096)) {
mthca_err(mdev, "Couldn't write PCI Express read request, "
"aborting.\n");
@@ -160,18 +165,14 @@ static int mthca_tune_pci(struct mthca_dev *mdev)
static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
{
int err;
- u8 status;
- err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
+ mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8;
+ err = mthca_QUERY_DEV_LIM(mdev, dev_lim);
if (err) {
- mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_DEV_LIM command returned %d"
+ ", aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
if (dev_lim->min_page_sz > PAGE_SIZE) {
mthca_err(mdev, "HCA minimum page size of %d bigger than "
"kernel PAGE_SIZE of %ld, aborting.\n",
@@ -287,49 +288,32 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
static int mthca_init_tavor(struct mthca_dev *mdev)
{
s64 size;
- u8 status;
int err;
struct mthca_dev_lim dev_lim;
struct mthca_profile profile;
struct mthca_init_hca_param init_hca;
- err = mthca_SYS_EN(mdev, &status);
+ err = mthca_SYS_EN(mdev);
if (err) {
- mthca_err(mdev, "SYS_EN command failed, aborting.\n");
+ mthca_err(mdev, "SYS_EN command returned %d, aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "SYS_EN returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
- err = mthca_QUERY_FW(mdev, &status);
+ err = mthca_QUERY_FW(mdev);
if (err) {
- mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
- goto err_disable;
- }
- if (status) {
- mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "QUERY_FW command returned %d,"
+ " aborting.\n", err);
goto err_disable;
}
- err = mthca_QUERY_DDR(mdev, &status);
+ err = mthca_QUERY_DDR(mdev);
if (err) {
- mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
- goto err_disable;
- }
- if (status) {
- mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "QUERY_DDR command returned %d, aborting.\n", err);
goto err_disable;
}
err = mthca_dev_lim(mdev, &dev_lim);
if (err) {
- mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_DEV_LIM command returned %d, aborting.\n", err);
goto err_disable;
}
@@ -345,29 +329,22 @@ static int mthca_init_tavor(struct mthca_dev *mdev)
goto err_disable;
}
- err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ err = mthca_INIT_HCA(mdev, &init_hca);
if (err) {
- mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
- goto err_disable;
- }
- if (status) {
- mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err);
goto err_disable;
}
return 0;
err_disable:
- mthca_SYS_DIS(mdev, &status);
+ mthca_SYS_DIS(mdev);
return err;
}
static int mthca_load_fw(struct mthca_dev *mdev)
{
- u8 status;
int err;
/* FIXME: use HCA-attached memory for FW if present */
@@ -380,31 +357,21 @@ static int mthca_load_fw(struct mthca_dev *mdev)
return -ENOMEM;
}
- err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status);
+ err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm);
if (err) {
- mthca_err(mdev, "MAP_FA command failed, aborting.\n");
- goto err_free;
- }
- if (status) {
- mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "MAP_FA command returned %d, aborting.\n", err);
goto err_free;
}
- err = mthca_RUN_FW(mdev, &status);
+ err = mthca_RUN_FW(mdev);
if (err) {
- mthca_err(mdev, "RUN_FW command failed, aborting.\n");
- goto err_unmap_fa;
- }
- if (status) {
- mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "RUN_FW command returned %d, aborting.\n", err);
goto err_unmap_fa;
}
return 0;
err_unmap_fa:
- mthca_UNMAP_FA(mdev, &status);
+ mthca_UNMAP_FA(mdev);
err_free:
mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
@@ -417,19 +384,13 @@ static int mthca_init_icm(struct mthca_dev *mdev,
u64 icm_size)
{
u64 aux_pages;
- u8 status;
int err;
- err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status);
+ err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages);
if (err) {
- mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n");
+ mthca_err(mdev, "SET_ICM_SIZE command returned %d, aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n",
(unsigned long long) icm_size >> 10,
@@ -442,14 +403,9 @@ static int mthca_init_icm(struct mthca_dev *mdev,
return -ENOMEM;
}
- err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status);
+ err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm);
if (err) {
- mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n");
- goto err_free_aux;
- }
- if (status) {
- mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "MAP_ICM_AUX returned %d, aborting.\n", err);
goto err_free_aux;
}
@@ -460,11 +416,11 @@ static int mthca_init_icm(struct mthca_dev *mdev,
}
/* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
- mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
- dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
+ mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size,
+ dma_get_cache_alignment()) / mdev->limits.mtt_seg_size;
mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
- MTHCA_MTT_SEG_SIZE,
+ mdev->limits.mtt_seg_size,
mdev->limits.num_mtt_segs,
mdev->limits.reserved_mtts,
1, 0);
@@ -590,7 +546,7 @@ err_unmap_eq:
mthca_unmap_eq_icm(mdev);
err_unmap_aux:
- mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_UNMAP_ICM_AUX(mdev);
err_free_aux:
mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
@@ -600,7 +556,6 @@ err_free_aux:
static void mthca_free_icms(struct mthca_dev *mdev)
{
- u8 status;
mthca_free_icm_table(mdev, mdev->mcg_table.table);
if (mdev->mthca_flags & MTHCA_FLAG_SRQ)
@@ -613,7 +568,7 @@ static void mthca_free_icms(struct mthca_dev *mdev)
mthca_free_icm_table(mdev, mdev->mr_table.mtt_table);
mthca_unmap_eq_icm(mdev);
- mthca_UNMAP_ICM_AUX(mdev, &status);
+ mthca_UNMAP_ICM_AUX(mdev);
mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
}
@@ -623,43 +578,32 @@ static int mthca_init_arbel(struct mthca_dev *mdev)
struct mthca_profile profile;
struct mthca_init_hca_param init_hca;
s64 icm_size;
- u8 status;
int err;
- err = mthca_QUERY_FW(mdev, &status);
+ err = mthca_QUERY_FW(mdev);
if (err) {
- mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_FW command failed %d, aborting.\n", err);
return err;
}
- if (status) {
- mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
- }
- err = mthca_ENABLE_LAM(mdev, &status);
- if (err) {
- mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n");
- return err;
- }
- if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) {
+ err = mthca_ENABLE_LAM(mdev);
+ if (err == -EAGAIN) {
mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n");
mdev->mthca_flags |= MTHCA_FLAG_NO_LAM;
- } else if (status) {
- mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, "
- "aborting.\n", status);
- return -EINVAL;
+ } else if (err) {
+ mthca_err(mdev, "ENABLE_LAM returned %d, aborting.\n", err);
+ return err;
}
err = mthca_load_fw(mdev);
if (err) {
- mthca_err(mdev, "Failed to start FW, aborting.\n");
+ mthca_err(mdev, "Loading FW returned %d, aborting.\n", err);
goto err_disable;
}
err = mthca_dev_lim(mdev, &dev_lim);
if (err) {
- mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+ mthca_err(mdev, "QUERY_DEV_LIM returned %d, aborting.\n", err);
goto err_stop_fw;
}
@@ -679,15 +623,9 @@ static int mthca_init_arbel(struct mthca_dev *mdev)
if (err)
goto err_stop_fw;
- err = mthca_INIT_HCA(mdev, &init_hca, &status);
+ err = mthca_INIT_HCA(mdev, &init_hca);
if (err) {
- mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
- goto err_free_icm;
- }
- if (status) {
- mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err);
goto err_free_icm;
}
@@ -697,37 +635,34 @@ err_free_icm:
mthca_free_icms(mdev);
err_stop_fw:
- mthca_UNMAP_FA(mdev, &status);
+ mthca_UNMAP_FA(mdev);
mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
err_disable:
if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
- mthca_DISABLE_LAM(mdev, &status);
+ mthca_DISABLE_LAM(mdev);
return err;
}
static void mthca_close_hca(struct mthca_dev *mdev)
{
- u8 status;
-
- mthca_CLOSE_HCA(mdev, 0, &status);
+ mthca_CLOSE_HCA(mdev, 0);
if (mthca_is_memfree(mdev)) {
mthca_free_icms(mdev);
- mthca_UNMAP_FA(mdev, &status);
+ mthca_UNMAP_FA(mdev);
mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
- mthca_DISABLE_LAM(mdev, &status);
+ mthca_DISABLE_LAM(mdev);
} else
- mthca_SYS_DIS(mdev, &status);
+ mthca_SYS_DIS(mdev);
}
static int mthca_init_hca(struct mthca_dev *mdev)
{
- u8 status;
int err;
struct mthca_adapter adapter;
@@ -739,15 +674,9 @@ static int mthca_init_hca(struct mthca_dev *mdev)
if (err)
return err;
- err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
+ err = mthca_QUERY_ADAPTER(mdev, &adapter);
if (err) {
- mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
- goto err_close;
- }
- if (status) {
- mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
- "aborting.\n", status);
- err = -EINVAL;
+ mthca_err(mdev, "QUERY_ADAPTER command returned %d, aborting.\n", err);
goto err_close;
}
@@ -766,7 +695,6 @@ err_close:
static int mthca_setup_hca(struct mthca_dev *dev)
{
int err;
- u8 status;
MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
@@ -784,7 +712,7 @@ static int mthca_setup_hca(struct mthca_dev *dev)
goto err_uar_table_free;
}
- dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ dev->kar = ioremap((phys_addr_t) dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
if (!dev->kar) {
mthca_err(dev, "Couldn't map kernel access region, "
"aborting.\n");
@@ -827,8 +755,8 @@ static int mthca_setup_hca(struct mthca_dev *dev)
goto err_eq_table_free;
}
- err = mthca_NOP(dev, &status);
- if (err || status) {
+ err = mthca_NOP(dev);
+ if (err) {
if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
mthca_warn(dev, "NOP command failed to generate interrupt "
"(IRQ %d).\n",
@@ -921,58 +849,6 @@ err_uar_table_free:
return err;
}
-static int mthca_request_regions(struct pci_dev *pdev, int ddr_hidden)
-{
- int err;
-
- /*
- * We can't just use pci_request_regions() because the MSI-X
- * table is right in the middle of the first BAR. If we did
- * pci_request_region and grab all of the first BAR, then
- * setting up MSI-X would fail, since the PCI core wants to do
- * request_mem_region on the MSI-X vector table.
- *
- * So just request what we need right now, and request any
- * other regions we need when setting up EQs.
- */
- if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
- MTHCA_HCR_SIZE, DRV_NAME))
- return -EBUSY;
-
- err = pci_request_region(pdev, 2, DRV_NAME);
- if (err)
- goto err_bar2_failed;
-
- if (!ddr_hidden) {
- err = pci_request_region(pdev, 4, DRV_NAME);
- if (err)
- goto err_bar4_failed;
- }
-
- return 0;
-
-err_bar4_failed:
- pci_release_region(pdev, 2);
-
-err_bar2_failed:
- release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
- MTHCA_HCR_SIZE);
-
- return err;
-}
-
-static void mthca_release_regions(struct pci_dev *pdev,
- int ddr_hidden)
-{
- if (!ddr_hidden)
- pci_release_region(pdev, 4);
-
- pci_release_region(pdev, 2);
-
- release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE,
- MTHCA_HCR_SIZE);
-}
-
static int mthca_enable_msi_x(struct mthca_dev *mdev)
{
struct msix_entry entries[3];
@@ -982,13 +858,9 @@ static int mthca_enable_msi_x(struct mthca_dev *mdev)
entries[1].entry = 1;
entries[2].entry = 2;
- err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
- if (err) {
- if (err > 0)
- mthca_info(mdev, "Only %d MSI-X vectors available, "
- "not using MSI-X\n", err);
+ err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries));
+ if (err)
return err;
- }
mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
@@ -1059,7 +931,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
ddr_hidden = 1;
- err = mthca_request_regions(pdev, ddr_hidden);
+ err = pci_request_regions(pdev, DRV_NAME);
if (err) {
dev_err(&pdev->dev, "Cannot obtain PCI resources, "
"aborting.\n");
@@ -1068,20 +940,20 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
- err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
goto err_free_res;
}
}
- err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
"consistent PCI DMA mask.\n");
- err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
@@ -1089,6 +961,9 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
}
}
+ /* We can handle large RDMA requests, so allow larger segments. */
+ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+
mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
if (!mdev) {
dev_err(&pdev->dev, "Device struct alloc failed, "
@@ -1163,6 +1038,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
pci_set_drvdata(pdev, mdev);
mdev->hca_type = hca_type;
+ mdev->active = true;
+
return 0;
err_unregister:
@@ -1196,7 +1073,7 @@ err_free_dev:
ib_dealloc_device(&mdev->ib_dev);
err_free_res:
- mthca_release_regions(pdev, ddr_hidden);
+ pci_release_regions(pdev);
err_disable_pdev:
pci_disable_device(pdev);
@@ -1207,7 +1084,6 @@ err_disable_pdev:
static void __mthca_remove_one(struct pci_dev *pdev)
{
struct mthca_dev *mdev = pci_get_drvdata(pdev);
- u8 status;
int p;
if (mdev) {
@@ -1215,7 +1091,7 @@ static void __mthca_remove_one(struct pci_dev *pdev)
mthca_unregister_device(mdev);
for (p = 1; p <= mdev->limits.num_ports; ++p)
- mthca_CLOSE_IB(mdev, p, &status);
+ mthca_CLOSE_IB(mdev, p);
mthca_cleanup_mcg_table(mdev);
mthca_cleanup_av_table(mdev);
@@ -1240,8 +1116,7 @@ static void __mthca_remove_one(struct pci_dev *pdev)
pci_disable_msix(pdev);
ib_dealloc_device(&mdev->ib_dev);
- mthca_release_regions(pdev, mdev->mthca_flags &
- MTHCA_FLAG_DDR_HIDDEN);
+ pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
@@ -1260,18 +1135,13 @@ int __mthca_restart_one(struct pci_dev *pdev)
return __mthca_init_one(pdev, hca_type);
}
-static int __devinit mthca_init_one(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int mthca_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
{
- static int mthca_version_printed = 0;
int ret;
mutex_lock(&mthca_device_mutex);
- if (!mthca_version_printed) {
- printk(KERN_INFO "%s", mthca_version);
- ++mthca_version_printed;
- }
+ printk_once(KERN_INFO "%s", mthca_version);
if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
@@ -1287,7 +1157,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
return ret;
}
-static void __devexit mthca_remove_one(struct pci_dev *pdev)
+static void mthca_remove_one(struct pci_dev *pdev)
{
mutex_lock(&mthca_device_mutex);
__mthca_remove_one(pdev);
@@ -1324,7 +1194,7 @@ static struct pci_driver mthca_driver = {
.name = DRV_NAME,
.id_table = mthca_pci_table,
.probe = mthca_init_one,
- .remove = __devexit_p(mthca_remove_one)
+ .remove = mthca_remove_one,
};
static void __init __mthca_check_profile_val(const char *name, int *pval,
@@ -1368,6 +1238,12 @@ static void __init mthca_validate_profile(void)
printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n",
hca_profile.fmr_reserved_mtts);
}
+
+ if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
+ printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n",
+ log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8));
+ log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
+ }
}
static int __init mthca_init(void)
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index 3f5f9487920..6304ae8f4a6 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -31,7 +31,7 @@
*/
#include <linux/string.h>
-#include <linux/slab.h>
+#include <linux/gfp.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -68,7 +68,6 @@ static int find_mgm(struct mthca_dev *dev,
struct mthca_mgm *mgm = mgm_mailbox->buf;
u8 *mgid;
int err;
- u8 status;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
@@ -77,38 +76,22 @@ static int find_mgm(struct mthca_dev *dev,
memcpy(mgid, gid, 16);
- err = mthca_MGID_HASH(dev, mailbox, hash, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "MGID_HASH returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_MGID_HASH(dev, mailbox, hash);
+ if (err) {
+ mthca_err(dev, "MGID_HASH failed (%d)\n", err);
goto out;
}
if (0)
- mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:"
- "%04x:%04x:%04x:%04x is %04x\n",
- be16_to_cpu(((__be16 *) gid)[0]),
- be16_to_cpu(((__be16 *) gid)[1]),
- be16_to_cpu(((__be16 *) gid)[2]),
- be16_to_cpu(((__be16 *) gid)[3]),
- be16_to_cpu(((__be16 *) gid)[4]),
- be16_to_cpu(((__be16 *) gid)[5]),
- be16_to_cpu(((__be16 *) gid)[6]),
- be16_to_cpu(((__be16 *) gid)[7]),
- *hash);
+ mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash);
*index = *hash;
*prev = -1;
do {
- err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_READ_MGM(dev, *index, mgm_mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM failed (%d)\n", err);
goto out;
}
@@ -144,7 +127,6 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int link = 0;
int i;
int err;
- u8 status;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
@@ -170,12 +152,9 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
goto out;
}
- err = mthca_READ_MGM(dev, index, mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_READ_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM failed (%d)\n", err);
goto out;
}
memset(mgm, 0, sizeof *mgm);
@@ -199,11 +178,9 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
goto out;
}
- err = mthca_WRITE_MGM(dev, index, mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
+ err = mthca_WRITE_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM failed %d\n", err);
err = -EINVAL;
goto out;
}
@@ -211,24 +188,17 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (!link)
goto out;
- err = mthca_READ_MGM(dev, prev, mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_READ_MGM(dev, prev, mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM failed %d\n", err);
goto out;
}
mgm->next_gid_index = cpu_to_be32(index << 6);
- err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
+ err = mthca_WRITE_MGM(dev, prev, mailbox);
if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
- }
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
out:
if (err && link && index != -1) {
@@ -250,7 +220,6 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int prev, index;
int i, loc;
int err;
- u8 status;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
@@ -264,16 +233,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
goto out;
if (index == -1) {
- mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
- "not found\n",
- be16_to_cpu(((__be16 *) gid->raw)[0]),
- be16_to_cpu(((__be16 *) gid->raw)[1]),
- be16_to_cpu(((__be16 *) gid->raw)[2]),
- be16_to_cpu(((__be16 *) gid->raw)[3]),
- be16_to_cpu(((__be16 *) gid->raw)[4]),
- be16_to_cpu(((__be16 *) gid->raw)[5]),
- be16_to_cpu(((__be16 *) gid->raw)[6]),
- be16_to_cpu(((__be16 *) gid->raw)[7]));
+ mthca_err(dev, "MGID %pI6 not found\n", gid->raw);
err = -EINVAL;
goto out;
}
@@ -294,12 +254,9 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
mgm->qp[loc] = mgm->qp[i - 1];
mgm->qp[i - 1] = 0;
- err = mthca_WRITE_MGM(dev, index, mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_WRITE_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
@@ -311,24 +268,17 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
if (amgm_index_to_free) {
err = mthca_READ_MGM(dev, amgm_index_to_free,
- mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n",
- status);
- err = -EINVAL;
+ mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM returned %d\n", err);
goto out;
}
} else
memset(mgm->gid, 0, 16);
- err = mthca_WRITE_MGM(dev, index, mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_WRITE_MGM(dev, index, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
if (amgm_index_to_free) {
@@ -338,23 +288,17 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
} else {
/* Remove entry from AMGM */
int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
- err = mthca_READ_MGM(dev, prev, mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "READ_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_READ_MGM(dev, prev, mailbox);
+ if (err) {
+ mthca_err(dev, "READ_MGM returned %d\n", err);
goto out;
}
mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
- err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
- if (err)
- goto out;
- if (status) {
- mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_WRITE_MGM(dev, prev, mailbox);
+ if (err) {
+ mthca_err(dev, "WRITE_MGM returned %d\n", err);
goto out;
}
BUG_ON(index < dev->limits.num_mgms);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 1f7d1a29d2a..7d2e42dd692 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -35,6 +35,7 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <asm/page.h>
@@ -222,7 +223,6 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
{
int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
int ret = 0;
- u8 status;
mutex_lock(&table->mutex);
@@ -239,8 +239,8 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
goto out;
}
- if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- &status) || status) {
+ if (mthca_MAP_ICM(dev, table->icm[i],
+ table->virt + i * MTHCA_TABLE_CHUNK_SIZE)) {
mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
ret = -ENOMEM;
@@ -257,7 +257,6 @@ out:
void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
{
int i;
- u8 status;
if (!mthca_is_memfree(dev))
return;
@@ -268,8 +267,7 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o
if (--table->icm[i]->refcount == 0) {
mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
- &status);
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
}
@@ -365,7 +363,6 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
int num_icm;
unsigned chunk_size;
int i;
- u8 status;
obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
@@ -395,8 +392,8 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
__GFP_NOWARN, use_coherent);
if (!table->icm[i])
goto err;
- if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
- &status) || status) {
+ if (mthca_MAP_ICM(dev, table->icm[i],
+ virt + i * MTHCA_TABLE_CHUNK_SIZE)) {
mthca_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
goto err;
@@ -415,8 +412,7 @@ err:
for (i = 0; i < num_icm; ++i)
if (table->icm[i]) {
mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
- &status);
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
mthca_free_icm(dev, table->icm[i], table->coherent);
}
@@ -428,13 +424,12 @@ err:
void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
{
int i;
- u8 status;
for (i = 0; i < table->num_icm; ++i)
if (table->icm[i]) {
- mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
- MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
- &status);
+ mthca_UNMAP_ICM(dev,
+ table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
+ MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE);
mthca_free_icm(dev, table->icm[i], table->coherent);
}
@@ -453,7 +448,6 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
{
struct page *pages[1];
int ret = 0;
- u8 status;
int i;
if (!mthca_is_memfree(dev))
@@ -493,9 +487,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
}
ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
- mthca_uarc_virt(dev, uar, i), &status);
- if (!ret && status)
- ret = -EINVAL;
+ mthca_uarc_virt(dev, uar, i));
if (ret) {
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem));
@@ -556,14 +548,13 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
struct mthca_user_db_table *db_tab)
{
int i;
- u8 status;
if (!mthca_is_memfree(dev))
return;
for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
if (db_tab->page[i].uvirt) {
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
put_page(sg_page(&db_tab->page[i].mem));
}
@@ -580,7 +571,6 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
int i, j;
struct mthca_db_page *page;
int ret = 0;
- u8 status;
mutex_lock(&dev->db_tab->mutex);
@@ -643,9 +633,7 @@ alloc:
memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
ret = mthca_MAP_ICM_page(dev, page->mapping,
- mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
- if (!ret && status)
- ret = -EINVAL;
+ mthca_uarc_virt(dev, &dev->driver_uar, i));
if (ret) {
dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
@@ -677,7 +665,6 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
{
int i, j;
struct mthca_db_page *page;
- u8 status;
i = db_index / MTHCA_DB_REC_PER_PAGE;
j = db_index % MTHCA_DB_REC_PER_PAGE;
@@ -693,7 +680,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index)
if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
i >= dev->db_tab->max_group1 - 1) {
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1);
dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
page->db_rec, page->mapping);
@@ -744,7 +731,6 @@ int mthca_init_db_tab(struct mthca_dev *dev)
void mthca_cleanup_db_tab(struct mthca_dev *dev)
{
int i;
- u8 status;
if (!mthca_is_memfree(dev))
return;
@@ -762,7 +748,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev)
if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1);
dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
dev->db_tab->page[i].db_rec,
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 8489b1e81c0..ed9a989e501 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -89,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
spin_lock(&buddy->lock);
- for (o = order; o <= buddy->max_order; ++o) {
- m = 1 << (buddy->max_order - o);
- seg = find_first_bit(buddy->bits[o], m);
- if (seg < m)
- goto found;
- }
+ for (o = order; o <= buddy->max_order; ++o)
+ if (buddy->num_free[o]) {
+ m = 1 << (buddy->max_order - o);
+ seg = find_first_bit(buddy->bits[o], m);
+ if (seg < m)
+ goto found;
+ }
spin_unlock(&buddy->lock);
return -1;
found:
clear_bit(seg, buddy->bits[o]);
+ --buddy->num_free[o];
while (o > order) {
--o;
seg <<= 1;
set_bit(seg ^ 1, buddy->bits[o]);
+ ++buddy->num_free[o];
}
spin_unlock(&buddy->lock);
@@ -123,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
while (test_bit(seg ^ 1, buddy->bits[order])) {
clear_bit(seg ^ 1, buddy->bits[order]);
+ --buddy->num_free[order];
seg >>= 1;
++order;
}
set_bit(seg, buddy->bits[order]);
+ ++buddy->num_free[order];
spin_unlock(&buddy->lock);
}
@@ -141,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
GFP_KERNEL);
- if (!buddy->bits)
+ buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
+ GFP_KERNEL);
+ if (!buddy->bits || !buddy->num_free)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
@@ -154,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
}
set_bit(0, buddy->bits[buddy->max_order]);
+ buddy->num_free[buddy->max_order] = 1;
return 0;
@@ -161,9 +169,10 @@ err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
+err_out:
kfree(buddy->bits);
+ kfree(buddy->num_free);
-err_out:
return -ENOMEM;
}
@@ -175,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
kfree(buddy->bits[i]);
kfree(buddy->bits);
+ kfree(buddy->num_free);
}
static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
@@ -210,7 +220,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
mtt->buddy = buddy;
mtt->order = 0;
- for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+ for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
++mtt->order;
mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
@@ -247,7 +257,6 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
struct mthca_mailbox *mailbox;
__be64 *mtt_entry;
int err = 0;
- u8 status;
int i;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
@@ -257,7 +266,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
while (list_len > 0) {
mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
- mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+ mtt->first_seg * dev->limits.mtt_seg_size +
start_index * 8);
mtt_entry[1] = 0;
for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
@@ -271,17 +280,11 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
if (i & 1)
mtt_entry[i + 2] = 0;
- err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
+ err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
if (err) {
mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
goto out;
}
- if (status) {
- mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto out;
- }
list_len -= i;
start_index += i;
@@ -316,7 +319,7 @@ static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
u64 __iomem *mtts;
int i;
- mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+ mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
start_index * sizeof (u64);
for (i = 0; i < list_len; ++i)
mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
@@ -335,17 +338,21 @@ static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
/* For Arbel, all MTTs must fit in the same page. */
BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
/* Require full segments */
- BUG_ON(s % MTHCA_MTT_SEG_SIZE);
+ BUG_ON(s % dev->limits.mtt_seg_size);
mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
- s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+ s / dev->limits.mtt_seg_size, &dma_handle);
BUG_ON(!mtts);
+ dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
+ list_len * sizeof (u64), DMA_TO_DEVICE);
+
for (i = 0; i < list_len; ++i)
mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
- dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE);
+ dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
+ list_len * sizeof (u64), DMA_TO_DEVICE);
}
int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
@@ -427,7 +434,6 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
u32 key;
int i;
int err;
- u8 status;
WARN_ON(buffer_size_shift >= 32);
@@ -469,7 +475,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
if (mr->mtt)
mpt_entry->mtt_seg =
cpu_to_be64(dev->mr_table.mtt_base +
- mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
+ mr->mtt->first_seg * dev->limits.mtt_seg_size);
if (0) {
mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -483,16 +489,10 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
}
err = mthca_SW2HW_MPT(dev, mailbox,
- key & (dev->limits.num_mpts - 1),
- &status);
+ key & (dev->limits.num_mpts - 1));
if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_out_mailbox;
- } else if (status) {
- mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_mailbox;
}
mthca_free_mailbox(dev, mailbox);
@@ -553,17 +553,12 @@ static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
{
int err;
- u8 status;
err = mthca_HW2SW_MPT(dev, NULL,
key_to_hw_index(dev, mr->ibmr.lkey) &
- (dev->limits.num_mpts - 1),
- &status);
+ (dev->limits.num_mpts - 1));
if (err)
mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
- else if (status)
- mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
- status);
mthca_free_region(dev, mr->ibmr.lkey);
mthca_free_mtt(dev, mr->mtt);
@@ -576,7 +571,6 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
struct mthca_mailbox *mailbox;
u64 mtt_seg;
u32 key, idx;
- u8 status;
int list_len = mr->attr.max_pages;
int err = -ENOMEM;
int i;
@@ -616,7 +610,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
goto err_out_table;
}
- mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+ mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
if (mthca_is_memfree(dev)) {
mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
@@ -658,18 +652,11 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
}
err = mthca_SW2HW_MPT(dev, mailbox,
- key & (dev->limits.num_mpts - 1),
- &status);
+ key & (dev->limits.num_mpts - 1));
if (err) {
mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_out_mailbox_free;
}
- if (status) {
- mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_mailbox_free;
- }
mthca_free_mailbox(dev, mailbox);
return 0;
@@ -793,12 +780,15 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
wmb();
+ dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
+ list_len * sizeof(u64), DMA_TO_DEVICE);
+
for (i = 0; i < list_len; ++i)
fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
MTHCA_MTT_FLAG_PRESENT);
- dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
- list_len * sizeof(u64), DMA_TO_DEVICE);
+ dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
+ list_len * sizeof(u64), DMA_TO_DEVICE);
fmr->mem.arbel.mpt->key = cpu_to_be32(key);
fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
@@ -836,7 +826,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
int mthca_init_mr_table(struct mthca_dev *dev)
{
- unsigned long addr;
+ phys_addr_t addr;
int mpts, mtts, err, i;
err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
@@ -898,7 +888,7 @@ int mthca_init_mr_table(struct mthca_dev *dev)
dev->mr_table.mtt_base);
dev->mr_table.tavor_fmr.mtt_base =
- ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
+ ioremap(addr, mtts * dev->limits.mtt_seg_size);
if (!dev->mr_table.tavor_fmr.mtt_base) {
mthca_warn(dev, "MTT ioremap for FMR failed.\n");
err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index d168c254061..8edb28a9a0e 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -94,7 +94,7 @@ s64 mthca_make_profile(struct mthca_dev *dev,
profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE;
profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE;
profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz;
- profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE;
+ profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size;
profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz;
profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
profile[MTHCA_RES_UARC].size = request->uarc_size;
@@ -232,7 +232,7 @@ s64 mthca_make_profile(struct mthca_dev *dev,
dev->limits.num_mtt_segs = profile[i].num;
dev->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
- init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7;
+ init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7;
break;
case MTHCA_RES_UAR:
dev->limits.num_uars = profile[i].num;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 87ad889e367..415f8e1a54d 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -39,7 +39,10 @@
#include <rdma/ib_user_verbs.h>
#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
#include <linux/mm.h>
+#include <linux/export.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -62,8 +65,6 @@ static int mthca_query_device(struct ib_device *ibdev,
int err = -ENOMEM;
struct mthca_dev *mdev = to_mdev(ibdev);
- u8 status;
-
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
@@ -77,14 +78,9 @@ static int mthca_query_device(struct ib_device *ibdev,
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(mdev, 1, 1,
- 1, NULL, NULL, in_mad, out_mad,
- &status);
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
props->device_cap_flags = mdev->device_cap_flags;
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
@@ -140,7 +136,6 @@ static int mthca_query_port(struct ib_device *ibdev,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -154,14 +149,9 @@ static int mthca_query_port(struct ib_device *ibdev,
in_mad->attr_mod = cpu_to_be32(port);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
@@ -213,7 +203,6 @@ static int mthca_modify_port(struct ib_device *ibdev,
struct mthca_set_ib_param set_ib;
struct ib_port_attr attr;
int err;
- u8 status;
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
@@ -228,14 +217,9 @@ static int mthca_modify_port(struct ib_device *ibdev,
set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
- err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
+ err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
-
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
return err;
@@ -247,7 +231,6 @@ static int mthca_query_pkey(struct ib_device *ibdev,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -259,14 +242,9 @@ static int mthca_query_pkey(struct ib_device *ibdev,
in_mad->attr_mod = cpu_to_be32(index / 32);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
@@ -282,7 +260,6 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -294,14 +271,9 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
in_mad->attr_mod = cpu_to_be32(port);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
memcpy(gid->raw, out_mad->data + 8, 8);
@@ -310,14 +282,9 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
in_mad->attr_mod = cpu_to_be32(index / 8);
err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
- port, NULL, NULL, in_mad, out_mad,
- &status);
+ port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
@@ -334,6 +301,9 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
struct mthca_ucontext *context;
int err;
+ if (!(to_mdev(ibdev)->active))
+ return ERR_PTR(-EAGAIN);
+
memset(&uresp, 0, sizeof uresp);
uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
@@ -470,6 +440,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
struct mthca_srq *srq;
int err;
+ if (init_attr->srq_type != IB_SRQT_BASIC)
+ return ERR_PTR(-ENOSYS);
+
srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
@@ -722,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
mthca_free_cq(to_mdev(ibdev), cq);
+ err = -EFAULT;
goto err_free;
}
@@ -796,7 +770,6 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
struct mthca_cq *cq = to_mcq(ibcq);
struct mthca_resize_cq ucmd;
u32 lkey;
- u8 status;
int ret;
if (entries < 1 || entries > dev->limits.max_cqes)
@@ -823,9 +796,7 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda
lkey = ucmd.lkey;
}
- ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status);
- if (status)
- ret = -EINVAL;
+ ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries));
if (ret) {
if (cq->resize_buf) {
@@ -1006,12 +977,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
int write_mtt_size;
@@ -1039,10 +1010,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(mr->umem->page_size) - 1;
-
- n = 0;
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- n += chunk->nents;
+ n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
@@ -1060,25 +1028,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- mr->umem->page_size * k;
- /*
- * Be friendly to write_mtt and pass it chunks
- * of appropriate size.
- */
- if (i == write_mtt_size) {
- err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
- if (err)
- goto mtt_done;
- n += i;
- i = 0;
- }
+ for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ mr->umem->page_size * k;
+ /*
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
+ */
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
@@ -1157,7 +1124,6 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
{
struct ib_fmr *fmr;
int err;
- u8 status;
struct mthca_dev *mdev = NULL;
list_for_each_entry(fmr, fmr_list, list) {
@@ -1178,12 +1144,8 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
list_for_each_entry(fmr, fmr_list, list)
mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
- err = mthca_SYNC_TPT(mdev, &status);
- if (err)
- return err;
- if (status)
- return -EINVAL;
- return 0;
+ err = mthca_SYNC_TPT(mdev);
+ return err;
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
@@ -1249,7 +1211,6 @@ static int mthca_init_node_data(struct mthca_dev *dev)
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
- u8 status;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -1260,28 +1221,18 @@ static int mthca_init_node_data(struct mthca_dev *dev)
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mthca_MAD_IFC(dev, 1, 1,
- 1, NULL, NULL, in_mad, out_mad,
- &status);
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(dev, 1, 1,
- 1, NULL, NULL, in_mad, out_mad,
- &status);
+ 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
- if (status) {
- err = -EINVAL;
- goto out;
- }
if (mthca_is_memfree(dev))
dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
@@ -1399,7 +1350,7 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
- ret = ib_register_device(&dev->ib_dev);
+ ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index c621f8794b8..596acc45569 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -46,7 +46,7 @@
struct mthca_buf_list {
void *buf;
- DECLARE_PCI_UNMAP_ADDR(mapping)
+ DEFINE_DMA_UNMAP_ADDR(mapping);
};
union mthca_buf {
@@ -113,6 +113,7 @@ struct mthca_eq {
int nent;
struct mthca_buf_list *page_list;
struct mthca_mr mr;
+ char irq_name[IB_DEVICE_NAME_MAX];
};
struct mthca_av;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index f5081bfde6d..e354b2f04ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -247,7 +247,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
spin_unlock(&dev->qp_table.lock);
if (!qp) {
- mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ mthca_warn(dev, "Async event %d for bogus QP %08x\n",
+ event_type, qpn);
return;
}
@@ -308,7 +309,6 @@ static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
static void init_port(struct mthca_dev *dev, int port)
{
int err;
- u8 status;
struct mthca_init_ib_param param;
memset(&param, 0, sizeof param);
@@ -319,11 +319,9 @@ static void init_port(struct mthca_dev *dev, int port)
param.gid_cap = dev->limits.gid_table_len;
param.pkey_cap = dev->limits.pkey_table_len;
- err = mthca_INIT_IB(dev, &param, port, &status);
+ err = mthca_INIT_IB(dev, &param, port);
if (err)
mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
- if (status)
- mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
}
static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
@@ -433,7 +431,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
struct mthca_qp_param *qp_param;
struct mthca_qp_context *context;
int mthca_state;
- u8 status;
mutex_lock(&qp->mutex);
@@ -448,12 +445,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
goto out;
}
- err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
- if (err)
- goto out_mailbox;
- if (status) {
- mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
- err = -EINVAL;
+ err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox);
+ if (err) {
+ mthca_warn(dev, "QUERY_QP failed (%d)\n", err);
goto out_mailbox;
}
@@ -508,6 +502,7 @@ done:
qp_attr->cap.max_inline_data = qp->max_inline_data;
qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = qp->sq_policy;
out_mailbox:
mthca_free_mailbox(dev, mailbox);
@@ -555,7 +550,6 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
struct mthca_qp_param *qp_param;
struct mthca_qp_context *qp_context;
u32 sqd_event = 0;
- u8 status;
int err = -EINVAL;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
@@ -781,13 +775,10 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
sqd_event = 1 << 31;
err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
- mailbox, sqd_event, &status);
- if (err)
- goto out_mailbox;
- if (status) {
- mthca_warn(dev, "modify QP %d->%d returned status %02x.\n",
- cur_state, new_state, status);
- err = -EINVAL;
+ mailbox, sqd_event);
+ if (err) {
+ mthca_warn(dev, "modify QP %d->%d returned %d.\n",
+ cur_state, new_state, err);
goto out_mailbox;
}
@@ -817,7 +808,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
cur_state != IB_QPS_ERR &&
(new_state == IB_QPS_RESET ||
new_state == IB_QPS_ERR))
- mthca_CLOSE_IB(dev, qp->port, &status);
+ mthca_CLOSE_IB(dev, qp->port);
}
/*
@@ -869,7 +860,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_UNSPECIFIED)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
@@ -1319,10 +1311,12 @@ int mthca_alloc_qp(struct mthca_dev *dev,
}
static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- else if (send_cq->cqn < recv_cq->cqn) {
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
@@ -1332,10 +1326,12 @@ static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
}
static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
- else if (send_cq->cqn < recv_cq->cqn) {
+ } else if (send_cq->cqn < recv_cq->cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
@@ -1425,7 +1421,6 @@ static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp)
void mthca_free_qp(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- u8 status;
struct mthca_cq *send_cq;
struct mthca_cq *recv_cq;
@@ -1450,7 +1445,7 @@ void mthca_free_qp(struct mthca_dev *dev,
if (qp->state != IB_QPS_RESET)
mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
- NULL, 0, &status);
+ NULL, 0);
/*
* If this is a userspace QP, the buffers, MR, CQs and so on
@@ -1489,8 +1484,8 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
int err;
u16 pkey;
- ib_ud_header_init(256, /* assume a MAD */
- mthca_ah_grh_present(to_mah(wr->wr.ud.ah)),
+ ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
+ mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
&sqp->ud_header);
err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
@@ -2259,7 +2254,6 @@ void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
int mthca_init_qp_table(struct mthca_dev *dev)
{
int err;
- u8 status;
int i;
spin_lock_init(&dev->qp_table.lock);
@@ -2286,15 +2280,10 @@ int mthca_init_qp_table(struct mthca_dev *dev)
for (i = 0; i < 2; ++i) {
err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
- dev->qp_table.sqp_start + i * 2,
- &status);
- if (err)
- goto err_out;
- if (status) {
+ dev->qp_table.sqp_start + i * 2);
+ if (err) {
mthca_warn(dev, "CONF_SPECIAL_QP returned "
- "status %02x, aborting.\n",
- status);
- err = -EINVAL;
+ "%d, aborting.\n", err);
goto err_out;
}
}
@@ -2302,7 +2291,7 @@ int mthca_init_qp_table(struct mthca_dev *dev)
err_out:
for (i = 0; i < 2; ++i)
- mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+ mthca_CONF_SPECIAL_QP(dev, i, 0);
mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
mthca_alloc_cleanup(&dev->qp_table.alloc);
@@ -2313,10 +2302,9 @@ int mthca_init_qp_table(struct mthca_dev *dev)
void mthca_cleanup_qp_table(struct mthca_dev *dev)
{
int i;
- u8 status;
for (i = 0; i < 2; ++i)
- mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
+ mthca_CONF_SPECIAL_QP(dev, i, 0);
mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
mthca_alloc_cleanup(&dev->qp_table.alloc);
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index acb6817f606..74c6a942604 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
@@ -114,7 +113,7 @@ int mthca_reset(struct mthca_dev *mdev)
}
hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
- hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
+ hca_pcie_cap = pci_pcie_cap(mdev->pdev);
if (bridge) {
bridge_header = kmalloc(256, GFP_KERNEL);
@@ -242,16 +241,16 @@ good:
if (hca_pcie_cap) {
devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
- if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
- devctl)) {
+ if (pcie_capability_write_word(mdev->pdev, PCI_EXP_DEVCTL,
+ devctl)) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
goto out;
}
linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
- if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
- linkctl)) {
+ if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL,
+ linkctl)) {
err = -ENODEV;
mthca_err(mdev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 4fabe62aab8..d22f970480c 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -200,7 +200,6 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
struct ib_srq_attr *attr, struct mthca_srq *srq)
{
struct mthca_mailbox *mailbox;
- u8 status;
int ds;
int err;
@@ -266,18 +265,12 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
else
mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
- err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status);
+ err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
if (err) {
mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err);
goto err_out_free_buf;
}
- if (status) {
- mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n",
- status);
- err = -EINVAL;
- goto err_out_free_buf;
- }
spin_lock_irq(&dev->srq_table.lock);
if (mthca_array_set(&dev->srq_table.srq,
@@ -299,11 +292,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
return 0;
err_out_free_srq:
- err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
+ err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
if (err)
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
- else if (status)
- mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
err_out_free_buf:
if (!pd->ibpd.uobject)
@@ -340,7 +331,6 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
{
struct mthca_mailbox *mailbox;
int err;
- u8 status;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox)) {
@@ -348,11 +338,9 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
return;
}
- err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status);
+ err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
if (err)
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
- else if (status)
- mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status);
spin_lock_irq(&dev->srq_table.lock);
mthca_array_clear(&dev->srq_table.srq,
@@ -378,8 +366,7 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
- int ret;
- u8 status;
+ int ret = 0;
/* We don't support resizing SRQs (yet?) */
if (attr_mask & IB_SRQ_MAX_WR)
@@ -391,16 +378,11 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return -EINVAL;
mutex_lock(&srq->mutex);
- ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
+ ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit);
mutex_unlock(&srq->mutex);
-
- if (ret)
- return ret;
- if (status)
- return -EINVAL;
}
- return 0;
+ return ret;
}
int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
@@ -410,14 +392,13 @@ int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
struct mthca_mailbox *mailbox;
struct mthca_arbel_srq_context *arbel_ctx;
struct mthca_tavor_srq_context *tavor_ctx;
- u8 status;
int err;
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status);
+ err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox);
if (err)
goto out;
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
index d449eb6ec78..846dc97cf26 100644
--- a/drivers/infiniband/hw/nes/Kconfig
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -4,14 +4,13 @@ config INFINIBAND_NES
select LIBCRC32C
select INET_LRO
---help---
- This is a low-level driver for NetEffect RDMA enabled
- Network Interface Cards (RNIC).
+ This is the RDMA Network Interface Card (RNIC) driver for
+ NetEffect Ethernet Cluster Server Adapters.
config INFINIBAND_NES_DEBUG
bool "Verbose debugging output"
depends on INFINIBAND_NES
default n
---help---
- This option causes the NetEffect RNIC driver to produce debug
- messages. Select this if you are developing the driver
- or trying to diagnose a problem.
+ This option enables debug messages from the NetEffect RNIC
+ driver. Select this if you are diagnosing a problem.
diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile
index 35148513c47..97820c23ece 100644
--- a/drivers/infiniband/hw/nes/Makefile
+++ b/drivers/infiniband/hw/nes/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o
-iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o
+iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index d2884e77809..3b2a6dc8ea9 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -44,6 +44,7 @@
#include <linux/init.h>
#include <linux/if_arp.h>
#include <linux/highmem.h>
+#include <linux/slab.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/byteorder.h>
@@ -67,30 +68,32 @@ MODULE_VERSION(DRV_VERSION);
int max_mtu = 9000;
int interrupt_mod_interval = 0;
-
/* Interoperability */
int mpa_version = 1;
-module_param(mpa_version, int, 0);
+module_param(mpa_version, int, 0644);
MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)");
/* Interoperability */
int disable_mpa_crc = 0;
-module_param(disable_mpa_crc, int, 0);
+module_param(disable_mpa_crc, int, 0644);
MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
-unsigned int send_first = 0;
-module_param(send_first, int, 0);
-MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection");
-
-
-unsigned int nes_drv_opt = 0;
-module_param(nes_drv_opt, int, 0);
+unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU;
+module_param(nes_drv_opt, int, 0644);
MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
unsigned int nes_debug_level = 0;
module_param_named(debug_level, nes_debug_level, uint, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug output level");
+unsigned int wqm_quanta = 0x10000;
+module_param(wqm_quanta, int, 0644);
+MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
+
+static bool limit_maxrdreqsz;
+module_param(limit_maxrdreqsz, bool, 0644);
+MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes");
+
LIST_HEAD(nes_adapter_list);
static LIST_HEAD(nes_dev_list);
@@ -101,12 +104,23 @@ static unsigned int sysfs_nonidx_addr;
static unsigned int sysfs_idx_addr;
static struct pci_device_id nes_pci_table[] = {
- {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID},
+ { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), },
+ { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), },
{0}
};
MODULE_DEVICE_TABLE(pci, nes_pci_table);
+/* registered nes netlink callbacks */
+static struct ibnl_client_cbs nes_nl_cb_table[] = {
+ [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+ [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+ [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+ [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+ [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+ [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
static int nes_net_event(struct notifier_block *, unsigned long, void *);
static int nes_notifiers_registered;
@@ -120,9 +134,6 @@ static struct notifier_block nes_net_notifier = {
.notifier_call = nes_net_event
};
-
-
-
/**
* nes_inetaddr_event
*/
@@ -133,21 +144,21 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
struct net_device *event_netdev = ifa->ifa_dev->dev;
struct nes_device *nesdev;
struct net_device *netdev;
+ struct net_device *upper_dev;
struct nes_vnic *nesvnic;
- unsigned int addr;
- unsigned int mask;
-
- addr = ntohl(ifa->ifa_address);
- mask = ntohl(ifa->ifa_mask);
- nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address " NIPQUAD_FMT
- ", netmask " NIPQUAD_FMT ".\n",
- HIPQUAD(addr), HIPQUAD(mask));
+ unsigned int is_bonded;
+
+ nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n",
+ &ifa->ifa_address, &ifa->ifa_mask);
list_for_each_entry(nesdev, &nes_dev_list, list) {
nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n",
nesdev, nesdev->netdev[0]->name);
netdev = nesdev->netdev[0];
nesvnic = netdev_priv(netdev);
- if (netdev == event_netdev) {
+ upper_dev = netdev_master_upper_dev_get(netdev);
+ is_bonded = netif_is_bond_slave(netdev) &&
+ (upper_dev == event_netdev);
+ if ((netdev == event_netdev) || is_bonded) {
if (nesvnic->rdma_enabled == 0) {
nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since"
" RDMA is not enabled.\n",
@@ -164,7 +175,10 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
nes_manage_arp_cache(netdev, netdev->dev_addr,
ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE);
nesvnic->local_ipaddr = 0;
- return NOTIFY_OK;
+ if (is_bonded)
+ continue;
+ else
+ return NOTIFY_OK;
break;
case NETDEV_UP:
nes_debug(NES_DBG_NETDEV, "event:UP\n");
@@ -173,15 +187,24 @@ static int nes_inetaddr_event(struct notifier_block *notifier,
nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n");
return NOTIFY_OK;
}
+ /* fall through */
+ case NETDEV_CHANGEADDR:
/* Add the address to the IP table */
- nesvnic->local_ipaddr = ifa->ifa_address;
+ if (upper_dev)
+ nesvnic->local_ipaddr =
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+ else
+ nesvnic->local_ipaddr = ifa->ifa_address;
nes_write_indexed(nesdev,
NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)),
- ntohl(ifa->ifa_address));
+ ntohl(nesvnic->local_ipaddr));
nes_manage_arp_cache(netdev, netdev->dev_addr,
ntohl(nesvnic->local_ipaddr), NES_ARP_ADD);
- return NOTIFY_OK;
+ if (is_bonded)
+ continue;
+ else
+ return NOTIFY_OK;
break;
default:
break;
@@ -254,13 +277,11 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r
unsigned long flags;
struct nes_qp *nesqp = cqp_request->cqp_callback_pointer;
struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 qp_id;
atomic_inc(&qps_destroyed);
/* Free the control structures */
- qp_id = nesqp->hwqp.qp_id;
if (nesqp->pbl_vbase) {
pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
@@ -276,6 +297,7 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r
}
nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);
+ nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
kfree(nesqp->allocated_buffer);
}
@@ -289,7 +311,6 @@ void nes_rem_ref(struct ib_qp *ibqp)
struct nes_qp *nesqp;
struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
struct nes_hw_cqp_wqe *cqp_wqe;
struct nes_cqp_request *cqp_request;
u32 opcode;
@@ -303,7 +324,8 @@ void nes_rem_ref(struct ib_qp *ibqp)
}
if (atomic_dec_and_test(&nesqp->refcount)) {
- nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
+ if (nesqp->pau_mode)
+ nes_destroy_pau_qp(nesdev, nesqp);
/* Destroy the QP */
cqp_request = nes_get_cqp_request(nesdev);
@@ -354,10 +376,8 @@ struct ib_qp *nes_get_qp(struct ib_device *device, int qpn)
*/
static void nes_print_macaddr(struct net_device *netdev)
{
- DECLARE_MAC_BUF(mac);
-
- nes_debug(NES_DBG_INIT, "%s: %s, IRQ %u\n",
- netdev->name, print_mac(mac, netdev->dev_addr), netdev->irq);
+ nes_debug(NES_DBG_INIT, "%s: %pM, IRQ %u\n",
+ netdev->name, netdev->dev_addr, netdev->irq);
}
/**
@@ -435,12 +455,11 @@ static irqreturn_t nes_interrupt(int irq, void *dev_id)
/**
* nes_probe - Device initialization
*/
-static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
+static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
{
struct net_device *netdev = NULL;
struct nes_device *nesdev = NULL;
int ret = 0;
- struct nes_vnic *nesvnic = NULL;
void __iomem *mmio_regs = NULL;
u8 hw_rev;
@@ -479,23 +498,23 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
}
if ((sizeof(dma_addr_t) > 4)) {
- ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+ ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
if (ret < 0) {
printk(KERN_ERR PFX "64b DMA mask configuration failed\n");
goto bail2;
}
- ret = pci_set_consistent_dma_mask(pcidev, DMA_64BIT_MASK);
+ ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64));
if (ret) {
printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n");
goto bail2;
}
} else {
- ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+ ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
if (ret < 0) {
printk(KERN_ERR PFX "32b DMA mask configuration failed\n");
goto bail2;
}
- ret = pci_set_consistent_dma_mask(pcidev, DMA_32BIT_MASK);
+ ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32));
if (ret) {
printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n");
goto bail2;
@@ -522,7 +541,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
spin_lock_init(&nesdev->indexed_regs_lock);
/* Remap the PCI registers in adapter BAR0 to kernel VA space */
- mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
+ mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
+ pci_resource_len(pcidev, BAR_0));
if (mmio_regs == NULL) {
printk(KERN_ERR PFX "Unable to remap BAR0\n");
ret = -EIO;
@@ -559,12 +579,44 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
goto bail5;
}
nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
+ nesdev->nesadapter->wqm_quanta = wqm_quanta;
/* nesdev->base_doorbell_index =
nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */
nesdev->base_doorbell_index = 1;
nesdev->doorbell_start = nesdev->nesadapter->doorbell_start;
- nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count;
+ if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+ switch (PCI_FUNC(nesdev->pcidev->devfn) %
+ nesdev->nesadapter->port_count) {
+ case 1:
+ nesdev->mac_index = 2;
+ break;
+ case 2:
+ nesdev->mac_index = 1;
+ break;
+ case 3:
+ nesdev->mac_index = 3;
+ break;
+ case 0:
+ default:
+ nesdev->mac_index = 0;
+ }
+ } else {
+ nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) %
+ nesdev->nesadapter->port_count;
+ }
+
+ if ((limit_maxrdreqsz ||
+ ((nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_GLADIUS) &&
+ (hw_rev == NE020_REV1))) &&
+ (pcie_get_readrq(pcidev) > 256)) {
+ if (pcie_set_readrq(pcidev, 256))
+ printk(KERN_ERR PFX "Unable to set max read request"
+ " to 256 bytes\n");
+ else
+ nes_debug(NES_DBG_INIT, "Max read request size set"
+ " to 256 bytes\n");
+ }
tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev);
@@ -583,7 +635,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) |
(1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
if (PCI_FUNC(nesdev->pcidev->devfn) < 4) {
- nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24));
+ nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24));
}
/* TODO: This really should be the first driver to load, not function 0 */
@@ -629,28 +681,40 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
}
nes_notifiers_registered++;
- /* Initialize network devices */
- if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) {
- goto bail7;
- }
+ if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
+ printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n",
+ __func__, __LINE__);
- /* Register network device */
- ret = register_netdev(netdev);
- if (ret) {
- printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret);
- nes_netdev_destroy(netdev);
- goto bail7;
- }
+ ret = iwpm_init(RDMA_NL_NES);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: port mapper initialization failed\n",
+ pci_name(pcidev));
+ goto bail7;
+ }
- nes_print_macaddr(netdev);
- /* create a CM core for this netdev */
- nesvnic = netdev_priv(netdev);
+ INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
+
+ /* Initialize network devices */
+ netdev = nes_netdev_init(nesdev, mmio_regs);
+ if (netdev == NULL) {
+ ret = -ENOMEM;
+ goto bail7;
+ }
+
+ /* Register network device */
+ ret = register_netdev(netdev);
+ if (ret) {
+ printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret);
+ nes_netdev_destroy(netdev);
+ goto bail7;
+ }
- nesdev->netdev_count++;
- nesdev->nesadapter->netdev_count++;
+ nes_print_macaddr(netdev);
+ nesdev->netdev_count++;
+ nesdev->nesadapter->netdev_count++;
- printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n",
+ printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n",
pci_name(pcidev));
return 0;
@@ -666,6 +730,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+ ibnl_remove_client(RDMA_NL_NES);
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
@@ -710,11 +775,12 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i
/**
* nes_remove - unload from kernel
*/
-static void __devexit nes_remove(struct pci_dev *pcidev)
+static void nes_remove(struct pci_dev *pcidev)
{
struct nes_device *nesdev = pci_get_drvdata(pcidev);
struct net_device *netdev;
int netdev_index = 0;
+ unsigned long flags;
if (nesdev->netdev_count) {
netdev = nesdev->netdev[netdev_index];
@@ -728,6 +794,8 @@ static void __devexit nes_remove(struct pci_dev *pcidev)
nesdev->nesadapter->netdev_count--;
}
}
+ ibnl_remove_client(RDMA_NL_NES);
+ iwpm_exit(RDMA_NL_NES);
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
@@ -741,6 +809,14 @@ static void __devexit nes_remove(struct pci_dev *pcidev)
free_irq(pcidev->irq, nesdev);
tasklet_kill(&nesdev->dpc_tasklet);
+ spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
+ if (nesdev->link_recheck) {
+ spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+ cancel_delayed_work_sync(&nesdev->work);
+ } else {
+ spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+ }
+
/* Deallocate the Adapter Structure */
nes_destroy_adapter(nesdev->nesadapter);
@@ -762,7 +838,7 @@ static struct pci_driver nes_pci_driver = {
.name = DRV_NAME,
.id_table = nes_pci_table,
.probe = nes_probe,
- .remove = __devexit_p(nes_remove),
+ .remove = nes_remove,
};
static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
@@ -774,14 +850,14 @@ static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
list_for_each_entry(nesdev, &nes_dev_list, list) {
if (i == ee_flsh_adapter) {
- devfn = nesdev->nesadapter->devfn;
- bus_number = nesdev->nesadapter->bus_number;
+ devfn = nesdev->pcidev->devfn;
+ bus_number = nesdev->pcidev->bus->number;
break;
}
i++;
}
- return snprintf(buf, PAGE_SIZE, "%x:%x", bus_number, devfn);
+ return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn);
}
static ssize_t nes_store_adapter(struct device_driver *ddp,
@@ -1052,6 +1128,57 @@ static ssize_t nes_store_idx_data(struct device_driver *ddp,
return strnlen(buf, count);
}
+
+/**
+ * nes_show_wqm_quanta
+ */
+static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf)
+{
+ u32 wqm_quanta_value = 0xdead;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ wqm_quanta_value = nesdev->nesadapter->wqm_quanta;
+ break;
+ }
+ i++;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value);
+}
+
+
+/**
+ * nes_store_wqm_quanta
+ */
+static ssize_t nes_store_wqm_quanta(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ unsigned long wqm_quanta_value;
+ u32 wqm_config1;
+ u32 i = 0;
+ struct nes_device *nesdev;
+
+ if (kstrtoul(buf, 0, &wqm_quanta_value) < 0)
+ return -EINVAL;
+
+ list_for_each_entry(nesdev, &nes_dev_list, list) {
+ if (i == ee_flsh_adapter) {
+ nesdev->nesadapter->wqm_quanta = wqm_quanta_value;
+ wqm_config1 = nes_read_indexed(nesdev,
+ NES_IDX_WQM_CONFIG1);
+ nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1,
+ ((wqm_quanta_value << 1) |
+ (wqm_config1 & 0x00000001)));
+ break;
+ }
+ i++;
+ }
+ return strnlen(buf, count);
+}
+
static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR,
nes_show_adapter, nes_store_adapter);
static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR,
@@ -1070,6 +1197,8 @@ static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR,
nes_show_idx_addr, nes_store_idx_addr);
static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR,
nes_show_idx_data, nes_store_idx_data);
+static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR,
+ nes_show_wqm_quanta, nes_store_wqm_quanta);
static int nes_create_driver_sysfs(struct pci_driver *drv)
{
@@ -1083,6 +1212,7 @@ static int nes_create_driver_sysfs(struct pci_driver *drv)
error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data);
error |= driver_create_file(&drv->driver, &driver_attr_idx_addr);
error |= driver_create_file(&drv->driver, &driver_attr_idx_data);
+ error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta);
return error;
}
@@ -1097,6 +1227,7 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv)
driver_remove_file(&drv->driver, &driver_attr_nonidx_data);
driver_remove_file(&drv->driver, &driver_attr_idx_addr);
driver_remove_file(&drv->driver, &driver_attr_idx_data);
+ driver_remove_file(&drv->driver, &driver_attr_wqm_quanta);
}
/**
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 39bd897b40c..bd9d132f11c 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -43,7 +43,6 @@
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
-#include <linux/version.h>
#include <asm/io.h>
#include <linux/crc32c.h>
@@ -52,23 +51,24 @@
#include <rdma/ib_pack.h>
#include <rdma/rdma_cm.h>
#include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
#define NES_SEND_FIRST_WRITE
#define QUEUE_DISCONNECTS
-#define DRV_BUILD "1"
-
#define DRV_NAME "iw_nes"
-#define DRV_VERSION "1.0 KO Build " DRV_BUILD
+#define DRV_VERSION "1.5.0.1"
#define PFX DRV_NAME ": "
/*
* NetEffect PCI vendor id and NE010 PCI device id.
*/
#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */
-#define PCI_VENDOR_ID_NETEFFECT 0x1678
-#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
+#define PCI_VENDOR_ID_NETEFFECT 0x1678
+#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
+#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110
#endif
#define NE020_REV 4
@@ -104,6 +104,7 @@
#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080
#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100
#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
+#define NES_DRV_OPT_ENABLE_PAU 0x00000400
#define NES_AEQ_EVENT_TIMEOUT 2500
#define NES_DISCONNECT_EVENT_TIMEOUT 2000
@@ -130,6 +131,8 @@
#define NES_DBG_IW_RX 0x00020000
#define NES_DBG_IW_TX 0x00040000
#define NES_DBG_SHUTDOWN 0x00080000
+#define NES_DBG_PAU 0x00100000
+#define NES_DBG_NLMSG 0x00200000
#define NES_DBG_RSVD1 0x10000000
#define NES_DBG_RSVD2 0x20000000
#define NES_DBG_RSVD3 0x40000000
@@ -138,14 +141,18 @@
#ifdef CONFIG_INFINIBAND_NES_DEBUG
#define nes_debug(level, fmt, args...) \
+do { \
if (level & nes_debug_level) \
- printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args)
+ printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
+} while (0)
-#define assert(expr) \
-if (!(expr)) { \
- printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
- #expr, __FILE__, __func__, __LINE__); \
-}
+#define assert(expr) \
+do { \
+ if (!(expr)) { \
+ printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
+ #expr, __FILE__, __func__, __LINE__); \
+ } \
+} while (0)
#define NES_EVENT_TIMEOUT 1200000
#else
@@ -160,6 +167,7 @@ if (!(expr)) { \
#include "nes_context.h"
#include "nes_user.h"
#include "nes_cm.h"
+#include "nes_mgt.h"
extern int max_mtu;
#define max_frame_len (max_mtu+ETH_HLEN)
@@ -167,10 +175,9 @@ extern int interrupt_mod_interval;
extern int nes_if_count;
extern int mpa_version;
extern int disable_mpa_crc;
-extern unsigned int send_first;
extern unsigned int nes_drv_opt;
extern unsigned int nes_debug_level;
-
+extern unsigned int wqm_quanta;
extern struct list_head nes_adapter_list;
extern atomic_t cm_connects;
@@ -192,14 +199,16 @@ extern u32 cm_packets_created;
extern u32 cm_packets_received;
extern u32 cm_packets_dropped;
extern u32 cm_packets_retrans;
-extern u32 cm_listens_created;
-extern u32 cm_listens_destroyed;
+extern atomic_t cm_listens_created;
+extern atomic_t cm_listens_destroyed;
extern u32 cm_backlog_drops;
extern atomic_t cm_loopbacks;
extern atomic_t cm_nodes_created;
extern atomic_t cm_nodes_destroyed;
extern atomic_t cm_accel_dropped_pkts;
extern atomic_t cm_resets_recvd;
+extern atomic_t pau_qps_created;
+extern atomic_t pau_qps_destroyed;
extern u32 int_mod_timer_init;
extern u32 int_mod_cq_depth_256;
@@ -260,13 +269,25 @@ struct nes_device {
u16 base_doorbell_index;
u16 currcq_count;
u16 deepcq_count;
+ u8 iw_status;
u8 msi_enabled;
u8 netdev_count;
u8 napi_isr_ran;
u8 disable_rx_flow_control;
u8 disable_tx_flow_control;
+
+ struct delayed_work work;
+ u8 link_recheck;
};
+/* Receive skb private area - must fit in skb->cb area */
+struct nes_rskb_cb {
+ u64 busaddr;
+ u32 maplen;
+ u32 seqnum;
+ u8 *data_start;
+ struct nes_qp *nesqp;
+};
static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad)
{
@@ -286,8 +307,8 @@ static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad)
static inline void
set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value)
{
- wqe_words[index] = cpu_to_le32((u32) ((unsigned long)value));
- wqe_words[index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)value)));
+ wqe_words[index] = cpu_to_le32((u32) value);
+ wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value));
}
static inline void
@@ -299,8 +320,8 @@ set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value)
static inline void
nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev)
{
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX,
- (u64)((unsigned long) &nesdev->cqp));
+ cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0;
+ cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0;
@@ -380,11 +401,20 @@ static inline void nes_write8(void __iomem *addr, u8 val)
writeb(val, addr);
}
-
+enum nes_resource {
+ NES_RESOURCE_MW = 1,
+ NES_RESOURCE_FAST_MR,
+ NES_RESOURCE_PHYS_MR,
+ NES_RESOURCE_USER_MR,
+ NES_RESOURCE_PD,
+ NES_RESOURCE_QP,
+ NES_RESOURCE_CQ,
+ NES_RESOURCE_ARP
+};
static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
unsigned long *resource_array, u32 max_resources,
- u32 *req_resource_num, u32 *next)
+ u32 *req_resource_num, u32 *next, enum nes_resource resource_type)
{
unsigned long flags;
u32 resource_num;
@@ -395,7 +425,7 @@ static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
if (resource_num >= max_resources) {
resource_num = find_first_zero_bit(resource_array, max_resources);
if (resource_num >= max_resources) {
- printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__);
+ printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type);
spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
return -EMFILE;
}
@@ -504,6 +534,8 @@ void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
int nes_destroy_cqp(struct nes_device *);
int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
+void nes_recheck_link_status(struct work_struct *work);
+void nes_terminate_timeout(unsigned long context);
/* nes_nic.c */
struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
@@ -522,9 +554,10 @@ int nes_cm_disconn(struct nes_qp *);
void nes_cm_disconn_worker(void *);
/* nes_verbs.c */
-int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32);
+int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32);
int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
struct nes_ib_device *nes_init_ofa_device(struct net_device *);
+void nes_port_ibevent(struct nes_vnic *nesvnic);
void nes_destroy_ofa_device(struct nes_ib_device *);
int nes_register_ofa_device(struct nes_ib_device *);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6aa531d5276..6f09a72e78d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,7 +34,7 @@
#define TCPOPT_TIMESTAMP 8
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/tcp.h>
@@ -52,10 +52,14 @@
#include <linux/random.h>
#include <linux/list.h>
#include <linux/threads.h>
-
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <net/ip_fib.h>
+#include <net/tcp.h>
+#include <linux/fcntl.h>
#include "nes.h"
@@ -65,8 +69,8 @@ u32 cm_packets_dropped;
u32 cm_packets_retrans;
u32 cm_packets_created;
u32 cm_packets_received;
-u32 cm_listens_created;
-u32 cm_listens_destroyed;
+atomic_t cm_listens_created;
+atomic_t cm_listens_destroyed;
u32 cm_backlog_drops;
atomic_t cm_loopbacks;
atomic_t cm_nodes_created;
@@ -75,35 +79,57 @@ atomic_t cm_accel_dropped_pkts;
atomic_t cm_resets_recvd;
static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *);
-static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *,
- struct nes_vnic *, struct nes_cm_info *);
-static int add_ref_cm_node(struct nes_cm_node *);
-static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
+static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *);
static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
-static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
- void *, u32, void *, u32, u8);
-static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
-
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *,
- struct nes_vnic *,
- struct ietf_mpa_frame *,
- struct nes_cm_info *);
-static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
- struct nes_cm_node *);
-static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
- struct nes_cm_node *);
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *);
static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
- struct sk_buff *);
+static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *);
static int mini_cm_dealloc_core(struct nes_cm_core *);
static int mini_cm_get(struct nes_cm_core *);
static int mini_cm_set(struct nes_cm_core *, u32, u32);
+
+static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8);
+static int add_ref_cm_node(struct nes_cm_node *);
+static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
+
static int nes_cm_disconn_true(struct nes_qp *);
static int nes_cm_post_event(struct nes_cm_event *event);
static int nes_disconnect(struct nes_qp *nesqp, int abrupt);
static void nes_disconnect_worker(struct work_struct *work);
-static int send_ack(struct nes_cm_node *cm_node);
+
+static int send_mpa_request(struct nes_cm_node *, struct sk_buff *);
+static int send_mpa_reject(struct nes_cm_node *);
+static int send_syn(struct nes_cm_node *, u32, struct sk_buff *);
+static int send_reset(struct nes_cm_node *, struct sk_buff *);
+static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
+static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *);
+
+static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
+static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
+static void cleanup_retrans_entry(struct nes_cm_node *);
+static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *);
+static void free_retrans_entry(struct nes_cm_node *cm_node);
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive);
+
+/* CM event handler functions */
+static void cm_event_connected(struct nes_cm_event *);
+static void cm_event_connect_error(struct nes_cm_event *);
+static void cm_event_reset(struct nes_cm_event *);
+static void cm_event_mpa_req(struct nes_cm_event *);
+static void cm_event_mpa_reject(struct nes_cm_event *);
+static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node);
+
+/* MPA build functions */
+static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8);
+static void build_mpa_v2(struct nes_cm_node *, void *, u8);
+static void build_mpa_v1(struct nes_cm_node *, void *, u8);
+static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
+
+static void print_core(struct nes_cm_core *core);
+static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* External CM API Interface */
/* instance of function pointers for client API */
@@ -132,12 +158,20 @@ atomic_t cm_connecteds;
atomic_t cm_connect_reqs;
atomic_t cm_rejects;
+int nes_add_ref_cm_node(struct nes_cm_node *cm_node)
+{
+ return add_ref_cm_node(cm_node);
+}
+int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
+{
+ return rem_ref_cm_node(cm_node->cm_core, cm_node);
+}
/**
* create_event
*/
-static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
- enum nes_cm_event_type type)
+static struct nes_cm_event *create_event(struct nes_cm_node * cm_node,
+ enum nes_cm_event_type type)
{
struct nes_cm_event *event;
@@ -158,11 +192,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
event->cm_info.loc_port = cm_node->loc_port;
event->cm_info.cm_id = cm_node->cm_id;
- nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x],"
- " src_addr=%08x[%x]\n",
- event, type,
- event->cm_info.loc_addr, event->cm_info.loc_port,
- event->cm_info.rem_addr, event->cm_info.rem_port);
+ nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
+ "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
+ cm_node, event, type, event->cm_info.loc_addr,
+ event->cm_info.loc_port, event->cm_info.rem_addr,
+ event->cm_info.rem_port);
nes_cm_post_event(event);
return event;
@@ -172,27 +206,50 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
/**
* send_mpa_request
*/
-static int send_mpa_request(struct nes_cm_node *cm_node)
+static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
{
- struct sk_buff *skb;
- int ret;
+ u8 start_addr = 0;
+ u8 *start_ptr = &start_addr;
+ u8 **start_buff = &start_ptr;
+ u16 buff_len = 0;
- skb = get_free_pkt(cm_node);
if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+ nes_debug(NES_DBG_CM, "skb set to NULL\n");
return -1;
}
/* send an MPA Request frame */
- form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
- cm_node->mpa_frame_size, SET_ACK);
+ cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST);
+ form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
- if (ret < 0) {
- return ret;
+ return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+}
+
+
+
+static int send_mpa_reject(struct nes_cm_node *cm_node)
+{
+ struct sk_buff *skb = NULL;
+ u8 start_addr = 0;
+ u8 *start_ptr = &start_addr;
+ u8 **start_buff = &start_ptr;
+ u16 buff_len = 0;
+ struct ietf_mpa_v1 *mpa_frame;
+
+ skb = dev_alloc_skb(MAX_CM_BUFFER);
+ if (!skb) {
+ nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
+ return -ENOMEM;
}
- return 0;
+ /* send an MPA reject frame */
+ cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY);
+ mpa_frame = (struct ietf_mpa_v1 *)*start_buff;
+ mpa_frame->flags |= IETF_MPA_FLAGS_REJECT;
+ form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN);
+
+ cm_node->state = NES_CM_STATE_FIN_WAIT1;
+ return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
}
@@ -200,65 +257,174 @@ static int send_mpa_request(struct nes_cm_node *cm_node)
* recv_mpa - process a received TCP pkt, we are expecting an
* IETF MPA frame
*/
-static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len)
+static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
+ u32 len)
{
- struct ietf_mpa_frame *mpa_frame;
+ struct ietf_mpa_v1 *mpa_frame;
+ struct ietf_mpa_v2 *mpa_v2_frame;
+ struct ietf_rtr_msg *rtr_msg;
+ int mpa_hdr_len;
+ int priv_data_len;
+
+ *type = NES_MPA_REQUEST_ACCEPT;
/* assume req frame is in tcp data payload */
- if (len < sizeof(struct ietf_mpa_frame)) {
+ if (len < sizeof(struct ietf_mpa_v1)) {
nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
- return -1;
+ return -EINVAL;
}
- mpa_frame = (struct ietf_mpa_frame *)buffer;
- cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+ /* points to the beginning of the frame, which could be MPA V1 or V2 */
+ mpa_frame = (struct ietf_mpa_v1 *)buffer;
+ mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+ priv_data_len = ntohs(mpa_frame->priv_data_len);
- if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
- nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
- " complete (%x + %x != %x)\n",
- cm_node->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len);
- return -1;
+ /* make sure mpa private data len is less than 512 bytes */
+ if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
+ nes_debug(NES_DBG_CM, "The received Length of Private"
+ " Data field exceeds 512 octets\n");
+ return -EINVAL;
+ }
+ /*
+ * make sure MPA receiver interoperate with the
+ * received MPA version and MPA key information
+ *
+ */
+ if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+ nes_debug(NES_DBG_CM, "The received mpa version"
+ " is not supported\n");
+ return -EINVAL;
+ }
+ /*
+ * backwards compatibility only
+ */
+ if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+ nes_debug(NES_DBG_CM, "The received mpa version"
+ " can not be interoperated\n");
+ return -EINVAL;
+ } else {
+ cm_node->mpa_frame_rev = mpa_frame->rev;
}
- /* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame),
- cm_node->mpa_frame_size);
+ if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
+ nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+ return -EINVAL;
+ }
+ } else {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
+ nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+ return -EINVAL;
+ }
+ }
- return 0;
-}
+ if (priv_data_len + mpa_hdr_len != len) {
+ nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
+ " complete (%x + %x != %x)\n",
+ priv_data_len, mpa_hdr_len, len);
+ return -EINVAL;
+ }
+ /* make sure it does not exceed the max size */
+ if (len > MAX_CM_BUFFER) {
+ nes_debug(NES_DBG_CM, "The received ietf buffer was too large"
+ " (%x + %x != %x)\n",
+ priv_data_len, mpa_hdr_len, len);
+ return -EINVAL;
+ }
+
+ cm_node->mpa_frame_size = priv_data_len;
+
+ switch (mpa_frame->rev) {
+ case IETF_MPA_V2: {
+ u16 ird_size;
+ u16 ord_size;
+ u16 rtr_ctrl_ird;
+ u16 rtr_ctrl_ord;
+
+ mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
+ mpa_hdr_len += IETF_RTR_MSG_SIZE;
+ cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE;
+ rtr_msg = &mpa_v2_frame->rtr_msg;
+
+ /* parse rtr message */
+ rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+ rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+ ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD;
+ ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD;
+
+ if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) {
+ /* send reset */
+ return -EINVAL;
+ }
+ if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD)
+ cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
+
+ if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) {
+ /* responder */
+ if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+ /* we are still negotiating */
+ if (ord_size > NES_MAX_IRD) {
+ cm_node->ird_size = NES_MAX_IRD;
+ } else {
+ cm_node->ird_size = ord_size;
+ if (ord_size == 0 &&
+ (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+ cm_node->ird_size = 1;
+ nes_debug(NES_DBG_CM,
+ "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n",
+ __func__, ord_size);
+ }
+ }
+ if (ird_size > NES_MAX_ORD)
+ cm_node->ord_size = NES_MAX_ORD;
+ else
+ cm_node->ord_size = ird_size;
+ } else { /* initiator */
+ if (ord_size > NES_MAX_IRD) {
+ nes_debug(NES_DBG_CM,
+ "%s: Unable to support the requested (ord =%u)\n",
+ __func__, ord_size);
+ return -EINVAL;
+ }
+ cm_node->ird_size = ord_size;
+ if (ird_size > NES_MAX_ORD) {
+ cm_node->ord_size = NES_MAX_ORD;
+ } else {
+ if (ird_size == 0 &&
+ (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+ nes_debug(NES_DBG_CM,
+ "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n",
+ __func__, ird_size);
+ return -EINVAL;
+ } else {
+ cm_node->ord_size = ird_size;
+ }
+ }
+ }
+ }
-/**
- * handle_exception_pkt - process an exception packet.
- * We have been in a TSA state, and we have now received SW
- * TCP/IP traffic should be a FIN request or IP pkt with options
- */
-static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret = 0;
- struct tcphdr *tcph = tcp_hdr(skb);
+ if (rtr_ctrl_ord & IETF_RDMA0_READ) {
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- /* first check to see if this a FIN pkt */
- if (tcph->fin) {
- /* we need to ACK the FIN request */
- send_ack(cm_node);
-
- /* check which side we are (client/server) and set next state accordingly */
- if (cm_node->tcp_cntxt.client)
- cm_node->state = NES_CM_STATE_CLOSING;
- else {
- /* we are the server side */
- cm_node->state = NES_CM_STATE_CLOSE_WAIT;
- /* since this is a self contained CM we don't wait for */
- /* an APP to close us, just send final FIN immediately */
- ret = send_fin(cm_node, NULL);
- cm_node->state = NES_CM_STATE_LAST_ACK;
+ } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
+ cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+ } else { /* Not supported RDMA0 operation */
+ return -EINVAL;
}
- } else {
- ret = -EINVAL;
+ break;
+ }
+ case IETF_MPA_V1:
+ default:
+ break;
}
- return ret;
+ /* copy entire MPA frame to our cm_node's frame */
+ memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size);
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
+ *type = NES_MPA_REQUEST_REJECT;
+ return 0;
}
@@ -266,9 +432,9 @@ static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb
* form_cm_frame - get a free packet and build empty frame Use
* node info to build.
*/
-static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node,
- void *options, u32 optionsize, void *data,
- u32 datasize, u8 flags)
+static void form_cm_frame(struct sk_buff *skb,
+ struct nes_cm_node *cm_node, void *options, u32 optionsize,
+ void *data, u32 datasize, u8 flags)
{
struct tcphdr *tcph;
struct iphdr *iph;
@@ -277,14 +443,14 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm
u16 packetsize = sizeof(*iph);
packetsize += sizeof(*tcph);
- packetsize += optionsize + datasize;
+ packetsize += optionsize + datasize;
+ skb_trim(skb, 0);
memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
- skb->len = 0;
buf = skb_put(skb, packetsize + ETH_HLEN);
- ethh = (struct ethhdr *) buf;
+ ethh = (struct ethhdr *)buf;
buf += ETH_HLEN;
iph = (struct iphdr *)buf;
@@ -292,10 +458,12 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm
tcph = (struct tcphdr *)buf;
skb_reset_mac_header(skb);
skb_set_network_header(skb, ETH_HLEN);
- skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph));
+ skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph));
buf += sizeof(*tcph);
skb->ip_summed = CHECKSUM_PARTIAL;
+ if (!(cm_node->netdev->features & NETIF_F_IP_CSUM))
+ skb->ip_summed = CHECKSUM_NONE;
skb->protocol = htons(0x800);
skb->data_len = 0;
skb->mac_len = ETH_HLEN;
@@ -305,37 +473,41 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm
ethh->h_proto = htons(0x0800);
iph->version = IPVERSION;
- iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
+ iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
iph->tos = 0;
iph->tot_len = htons(packetsize);
iph->id = htons(++cm_node->tcp_cntxt.loc_id);
iph->frag_off = htons(0x4000);
iph->ttl = 0x40;
- iph->protocol = 0x06; /* IPPROTO_TCP */
+ iph->protocol = 0x06; /* IPPROTO_TCP */
- iph->saddr = htonl(cm_node->loc_addr);
- iph->daddr = htonl(cm_node->rem_addr);
+ iph->saddr = htonl(cm_node->mapped_loc_addr);
+ iph->daddr = htonl(cm_node->mapped_rem_addr);
- tcph->source = htons(cm_node->loc_port);
- tcph->dest = htons(cm_node->rem_port);
+ tcph->source = htons(cm_node->mapped_loc_port);
+ tcph->dest = htons(cm_node->mapped_rem_port);
tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
if (flags & SET_ACK) {
cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
tcph->ack = 1;
- } else
+ } else {
tcph->ack_seq = 0;
+ }
if (flags & SET_SYN) {
cm_node->tcp_cntxt.loc_seq_num++;
tcph->syn = 1;
- } else
- cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += datasize;
+ }
- if (flags & SET_FIN)
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
tcph->fin = 1;
+ }
if (flags & SET_RST)
tcph->rst = 1;
@@ -351,10 +523,101 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm
skb_shinfo(skb)->nr_frags = 0;
cm_packets_created++;
+}
+
+/*
+ * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct
+ */
+static void nes_create_sockaddr(__be32 ip_addr, __be16 port,
+ struct sockaddr_storage *addr)
+{
+ struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr;
+ nes_sockaddr->sin_family = AF_INET;
+ memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32));
+ nes_sockaddr->sin_port = port;
+}
+
+/*
+ * nes_create_mapinfo - Create a mapinfo object in the port mapper data base
+ */
+static int nes_create_mapinfo(struct nes_cm_info *cm_info)
+{
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+
+ nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+ &local_sockaddr);
+ nes_create_sockaddr(htonl(cm_info->mapped_loc_addr),
+ htons(cm_info->mapped_loc_port), &mapped_sockaddr);
+
+ return iwpm_create_mapinfo(&local_sockaddr,
+ &mapped_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base
+ * and send a remove mapping op message to
+ * the userspace port mapper
+ */
+static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port,
+ u32 mapped_loc_addr, u16 mapped_loc_port)
+{
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+
+ nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr);
+ nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port),
+ &mapped_sockaddr);
+
+ iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr);
+ return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void nes_form_pm_msg(struct nes_cm_info *cm_info,
+ struct iwpm_sa_data *pm_msg)
+{
+ nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+ &pm_msg->loc_addr);
+ nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port),
+ &pm_msg->rem_addr);
+}
- return skb;
+/*
+ * nes_form_reg_msg - Form a port mapper message with dev info
+ */
+static void nes_form_reg_msg(struct nes_vnic *nesvnic,
+ struct iwpm_dev_data *pm_msg)
+{
+ memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name,
+ IWPM_DEVNAME_SIZE);
+ memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
}
+/*
+ * nes_record_pm_msg - Save the received mapping info
+ */
+static void nes_record_pm_msg(struct nes_cm_info *cm_info,
+ struct iwpm_sa_data *pm_msg)
+{
+ struct sockaddr_in *mapped_loc_addr =
+ (struct sockaddr_in *)&pm_msg->mapped_loc_addr;
+ struct sockaddr_in *mapped_rem_addr =
+ (struct sockaddr_in *)&pm_msg->mapped_rem_addr;
+
+ if (mapped_loc_addr->sin_family == AF_INET) {
+ cm_info->mapped_loc_addr =
+ ntohl(mapped_loc_addr->sin_addr.s_addr);
+ cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port);
+ }
+ if (mapped_rem_addr->sin_family == AF_INET) {
+ cm_info->mapped_rem_addr =
+ ntohl(mapped_rem_addr->sin_addr.s_addr);
+ cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port);
+ }
+}
/**
* print_core - dump a cm core
@@ -367,9 +630,8 @@ static void print_core(struct nes_cm_core *core)
return;
nes_debug(NES_DBG_CM, "---------------------------------------------\n");
- nes_debug(NES_DBG_CM, "State : %u \n", core->state);
+ nes_debug(NES_DBG_CM, "State : %u \n", core->state);
- nes_debug(NES_DBG_CM, "Tx Free cnt : %u \n", skb_queue_len(&core->tx_free_list));
nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt));
nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt));
@@ -378,6 +640,162 @@ static void print_core(struct nes_cm_core *core)
nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
}
+static void record_ird_ord(struct nes_cm_node *cm_node,
+ u16 conn_ird, u16 conn_ord)
+{
+ if (conn_ird > NES_MAX_IRD)
+ conn_ird = NES_MAX_IRD;
+
+ if (conn_ord > NES_MAX_ORD)
+ conn_ord = NES_MAX_ORD;
+
+ cm_node->ird_size = conn_ird;
+ cm_node->ord_size = conn_ord;
+}
+
+/**
+ * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
+ */
+static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff,
+ u16 *buff_len, u8 *pci_mem, u8 mpa_key)
+{
+ int ret = 0;
+
+ *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0];
+
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V1:
+ *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg);
+ *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size;
+ build_mpa_v1(cm_node, *start_buff, mpa_key);
+ break;
+ case IETF_MPA_V2:
+ *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size;
+ build_mpa_v2(cm_node, *start_buff, mpa_key);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+/**
+ * build_mpa_v2 - build a MPA V2 frame
+ */
+static void build_mpa_v2(struct nes_cm_node *cm_node,
+ void *start_addr, u8 mpa_key)
+{
+ struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
+ struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+ u16 ctrl_ird;
+ u16 ctrl_ord;
+
+ /* initialize the upper 5 bytes of the frame */
+ build_mpa_v1(cm_node, start_addr, mpa_key);
+ mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */
+ mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
+
+ /* initialize RTR msg */
+ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
+ } else {
+ ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD;
+ ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
+ }
+ ctrl_ird |= IETF_PEER_TO_PEER;
+ ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ case MPA_KEY_REPLY:
+ switch (cm_node->send_rdma0_op) {
+ case SEND_RDMA_WRITE_ZERO:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ break;
+ case SEND_RDMA_READ_ZERO:
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ }
+ }
+ rtr_msg->ctrl_ird = htons(ctrl_ird);
+ rtr_msg->ctrl_ord = htons(ctrl_ord);
+}
+
+/**
+ * build_mpa_v1 - build a MPA V1 frame
+ */
+static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key)
+{
+ struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+ break;
+ case MPA_KEY_REPLY:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+ break;
+ }
+ mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+ mpa_frame->rev = cm_node->mpa_frame_rev;
+ mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size);
+}
+
+static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr)
+{
+ u64 u64temp;
+ struct nes_qp *nesqp = *nesqp_addr;
+ struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
+
+ u64temp = (unsigned long)nesqp->nesuqp_addr;
+ u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
+
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
+
+ switch (cm_node->send_rdma0_op) {
+ case SEND_RDMA_WRITE_ZERO:
+ nes_debug(NES_DBG_CM, "Sending first write.\n");
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+ break;
+
+ case SEND_RDMA_READ_ZERO:
+ default:
+ if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO)
+ WARN(1, "Unsupported RDMA0 len operation=%u\n",
+ cm_node->send_rdma0_op);
+ nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1;
+ wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1;
+ break;
+ }
+
+ if (nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
+ kunmap(nesqp->page);
+ }
+
+ /*use the reserved spot on the WQ for the extra first WQE*/
+ nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU |
+ NES_QPCONTEXT_ORDIRD_ALSMM));
+ nesqp->skip_lsmm = 1;
+ nesqp->hwqp.sq_tail = 0;
+}
/**
* schedule_nes_timer
@@ -385,20 +803,17 @@ static void print_core(struct nes_cm_core *core)
* rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
*/
int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
- enum nes_timer_type type, int send_retrans,
- int close_when_complete)
+ enum nes_timer_type type, int send_retrans,
+ int close_when_complete)
{
- unsigned long flags;
- struct nes_cm_core *cm_core;
+ unsigned long flags;
+ struct nes_cm_core *cm_core = cm_node->cm_core;
struct nes_timer_entry *new_send;
int ret = 0;
- u32 was_timer_set;
- if (!cm_node)
- return -EINVAL;
new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
if (!new_send)
- return -1;
+ return -ENOMEM;
/* new_send->timetosend = currenttime */
new_send->retrycount = NES_DEFAULT_RETRYS;
@@ -411,255 +826,262 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
new_send->close_when_complete = close_when_complete;
if (type == NES_TIMER_TYPE_CLOSE) {
- new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */
- spin_lock_irqsave(&cm_node->recv_list_lock, flags);
- list_add_tail(&new_send->list, &cm_node->recv_list);
- spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
+ new_send->timetosend += (HZ / 10);
+ if (cm_node->recv_entry) {
+ kfree(new_send);
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ cm_node->recv_entry = new_send;
}
if (type == NES_TIMER_TYPE_SEND) {
new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
atomic_inc(&new_send->skb->users);
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ cm_node->send_entry = new_send;
+ add_ref_cm_node(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
if (ret != NETDEV_TX_OK) {
- nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n",
- new_send, jiffies);
- atomic_dec(&new_send->skb->users);
+ nes_debug(NES_DBG_CM, "Error sending packet %p "
+ "(jiffies = %lu)\n", new_send, jiffies);
new_send->timetosend = jiffies;
+ ret = NETDEV_TX_OK;
} else {
cm_packets_sent++;
if (!send_retrans) {
+ cleanup_retrans_entry(cm_node);
if (close_when_complete)
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- dev_kfree_skb_any(new_send->skb);
- kfree(new_send);
+ rem_ref_cm_node(cm_core, cm_node);
return ret;
}
- new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
}
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- list_add_tail(&new_send->list, &cm_node->retrans_list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
}
- if (type == NES_TIMER_TYPE_RECV) {
- new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
- new_send->timetosend = jiffies;
- spin_lock_irqsave(&cm_node->recv_list_lock, flags);
- list_add_tail(&new_send->list, &cm_node->recv_list);
- spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
- }
- cm_core = cm_node->cm_core;
- was_timer_set = timer_pending(&cm_core->tcp_timer);
-
- if (!was_timer_set) {
- cm_core->tcp_timer.expires = new_send->timetosend;
- add_timer(&cm_core->tcp_timer);
- }
+ if (!timer_pending(&cm_core->tcp_timer))
+ mod_timer(&cm_core->tcp_timer, new_send->timetosend);
return ret;
}
+static void nes_retrans_expired(struct nes_cm_node *cm_node)
+{
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ enum nes_cm_node_state state = cm_node->state;
+ cm_node->state = NES_CM_STATE_CLOSED;
+
+ switch (state) {
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_CLOSING:
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ break;
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_FIN_WAIT1:
+ if (cm_node->cm_id)
+ cm_id->rem_ref(cm_id);
+ send_reset(cm_node, NULL);
+ break;
+ default:
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, NULL);
+ create_event(cm_node, NES_CM_EVENT_ABORTED);
+ }
+}
+
+static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node)
+{
+ struct nes_timer_entry *recv_entry = cm_node->recv_entry;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct nes_qp *nesqp;
+ unsigned long qplockflags;
+
+ if (!recv_entry)
+ return;
+ nesqp = (struct nes_qp *)recv_entry->skb;
+ if (nesqp) {
+ spin_lock_irqsave(&nesqp->lock, qplockflags);
+ if (nesqp->cm_id) {
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
+ "refcount = %d: HIT A "
+ "NES_TIMER_TYPE_CLOSE with something "
+ "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+ atomic_read(&nesqp->refcount));
+ nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
+ nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
+ nesqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_cm_disconn(nesqp);
+ } else {
+ spin_unlock_irqrestore(&nesqp->lock, qplockflags);
+ nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
+ "refcount = %d: HIT A "
+ "NES_TIMER_TYPE_CLOSE with nothing "
+ "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+ atomic_read(&nesqp->refcount));
+ }
+ } else if (rem_node) {
+ /* TIME_WAIT state */
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ }
+ if (cm_node->cm_id)
+ cm_id->rem_ref(cm_id);
+ kfree(recv_entry);
+ cm_node->recv_entry = NULL;
+}
/**
* nes_cm_timer_tick
*/
static void nes_cm_timer_tick(unsigned long pass)
{
- unsigned long flags, qplockflags;
+ unsigned long flags;
unsigned long nexttimeout = jiffies + NES_LONG_TIME;
- struct iw_cm_id *cm_id;
struct nes_cm_node *cm_node;
struct nes_timer_entry *send_entry, *recv_entry;
- struct list_head *list_core, *list_core_temp;
- struct list_head *list_node, *list_node_temp;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
struct nes_cm_core *cm_core = g_cm_core;
- struct nes_qp *nesqp;
- struct sk_buff *skb;
u32 settimer = 0;
+ unsigned long timetosend;
int ret = NETDEV_TX_OK;
- int node_done;
+ struct list_head timer_list;
+
+ INIT_LIST_HEAD(&timer_list);
spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+ list_for_each_safe(list_node, list_core_temp,
+ &cm_core->connected_nodes) {
cm_node = container_of(list_node, struct nes_cm_node, list);
- add_ref_cm_node(cm_node);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- spin_lock_irqsave(&cm_node->recv_list_lock, flags);
- list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) {
- recv_entry = container_of(list_core, struct nes_timer_entry, list);
- if ((time_after(recv_entry->timetosend, jiffies)) &&
- (recv_entry->type == NES_TIMER_TYPE_CLOSE)) {
- if (nexttimeout > recv_entry->timetosend || !settimer) {
+ if ((cm_node->recv_entry) || (cm_node->send_entry)) {
+ add_ref_cm_node(cm_node);
+ list_add(&cm_node->timer_entry, &timer_list);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp, &timer_list) {
+ cm_node = container_of(list_node, struct nes_cm_node,
+ timer_entry);
+ recv_entry = cm_node->recv_entry;
+
+ if (recv_entry) {
+ if (time_after(recv_entry->timetosend, jiffies)) {
+ if (nexttimeout > recv_entry->timetosend ||
+ !settimer) {
nexttimeout = recv_entry->timetosend;
settimer = 1;
}
- continue;
- }
- list_del(&recv_entry->list);
- cm_id = cm_node->cm_id;
- spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
- if (recv_entry->type == NES_TIMER_TYPE_CLOSE) {
- nesqp = (struct nes_qp *)recv_entry->skb;
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: "
- "****** HIT A NES_TIMER_TYPE_CLOSE"
- " with something to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- nesqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:"
- " ****** HIT A NES_TIMER_TYPE_CLOSE"
- " with nothing to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- nes_rem_ref(&nesqp->ibqp);
- }
- if (cm_id)
- cm_id->rem_ref(cm_id);
+ } else {
+ handle_recv_entry(cm_node, 1);
}
- kfree(recv_entry);
- spin_lock_irqsave(&cm_node->recv_list_lock, flags);
}
- spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- node_done = 0;
- list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) {
- if (node_done) {
+ do {
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
break;
- }
- send_entry = container_of(list_core, struct nes_timer_entry, list);
if (time_after(send_entry->timetosend, jiffies)) {
if (cm_node->state != NES_CM_STATE_TSA) {
- if ((nexttimeout > send_entry->timetosend) || !settimer) {
- nexttimeout = send_entry->timetosend;
+ if ((nexttimeout >
+ send_entry->timetosend) ||
+ !settimer) {
+ nexttimeout =
+ send_entry->timetosend;
settimer = 1;
}
- node_done = 1;
- continue;
} else {
- list_del(&send_entry->list);
- skb = send_entry->skb;
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- dev_kfree_skb_any(skb);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
+ free_retrans_entry(cm_node);
}
+ break;
}
- if (send_entry->type == NES_TIMER_NODE_CLEANUP) {
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
- }
- if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) ||
- (cm_node->state == NES_CM_STATE_TSA) ||
- (cm_node->state == NES_CM_STATE_CLOSED)) {
- skb = send_entry->skb;
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- kfree(send_entry);
- dev_kfree_skb_any(skb);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
+
+ if ((cm_node->state == NES_CM_STATE_TSA) ||
+ (cm_node->state == NES_CM_STATE_CLOSED)) {
+ free_retrans_entry(cm_node);
+ break;
}
- if (!send_entry->retranscount || !send_entry->retrycount) {
+ if (!send_entry->retranscount ||
+ !send_entry->retrycount) {
cm_packets_dropped++;
- skb = send_entry->skb;
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- dev_kfree_skb_any(skb);
- kfree(send_entry);
- if (cm_node->state == NES_CM_STATE_SYN_RCVD) {
- /* this node never even generated an indication up to the cm */
- rem_ref_cm_node(cm_core, cm_node);
- } else {
- cm_node->state = NES_CM_STATE_CLOSED;
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- }
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
+ free_retrans_entry(cm_node);
+
+ spin_unlock_irqrestore(
+ &cm_node->retrans_list_lock, flags);
+ nes_retrans_expired(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ spin_lock_irqsave(&cm_node->retrans_list_lock,
+ flags);
+ break;
}
- /* this seems like the correct place, but leave send entry unprotected */
- /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */
atomic_inc(&send_entry->skb->users);
cm_packets_retrans++;
- nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p,"
- " jiffies = %lu, time to send = %lu, retranscount = %u, "
- "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n",
- send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount,
- send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
+ "for node %p, jiffies = %lu, time to send = "
+ "%lu, retranscount = %u, send_entry->seq_num = "
+ "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
+ "0x%08X\n", send_entry, cm_node, jiffies,
+ send_entry->timetosend,
+ send_entry->retranscount,
+ send_entry->seq_num,
+ cm_node->tcp_cntxt.rem_ack_num);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock,
+ flags);
ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
if (ret != NETDEV_TX_OK) {
+ nes_debug(NES_DBG_CM, "rexmit failed for "
+ "node=%p\n", cm_node);
cm_packets_bounced++;
- atomic_dec(&send_entry->skb->users);
send_entry->retrycount--;
nexttimeout = jiffies + NES_SHORT_TIME;
settimer = 1;
- node_done = 1;
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
+ break;
} else {
cm_packets_sent++;
}
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- list_del(&send_entry->list);
- nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n",
- send_entry->retranscount, send_entry->retrycount);
+ nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
+ "%u, retry count = %u.\n",
+ send_entry->retranscount,
+ send_entry->retrycount);
if (send_entry->send_retrans) {
send_entry->retranscount--;
- send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT;
- if (nexttimeout > send_entry->timetosend || !settimer) {
+ timetosend = (NES_RETRY_TIMEOUT <<
+ (NES_DEFAULT_RETRANS - send_entry->retranscount));
+
+ send_entry->timetosend = jiffies +
+ min(timetosend, NES_MAX_TIMEOUT);
+ if (nexttimeout > send_entry->timetosend ||
+ !settimer) {
nexttimeout = send_entry->timetosend;
settimer = 1;
}
- list_add(&send_entry->list, &cm_node->retrans_list);
- continue;
} else {
int close_when_complete;
- skb = send_entry->skb;
- close_when_complete = send_entry->close_when_complete;
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- if (close_when_complete) {
- BUG_ON(atomic_read(&cm_node->ref_count) == 1);
- rem_ref_cm_node(cm_core, cm_node);
- }
- dev_kfree_skb_any(skb);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
+ close_when_complete =
+ send_entry->close_when_complete;
+ nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
+ cm_node, cm_node->state);
+ free_retrans_entry(cm_node);
+ if (close_when_complete)
+ rem_ref_cm_node(cm_node->cm_core,
+ cm_node);
}
- }
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-
- rem_ref_cm_node(cm_core, cm_node);
+ } while (0);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (ret != NETDEV_TX_OK)
- break;
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
}
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
if (settimer) {
- if (!timer_pending(&cm_core->tcp_timer)) {
- cm_core->tcp_timer.expires = nexttimeout;
- add_timer(&cm_core->tcp_timer);
- }
+ if (!timer_pending(&cm_core->tcp_timer))
+ mod_timer(&cm_core->tcp_timer, nexttimeout);
}
}
@@ -667,14 +1089,14 @@ static void nes_cm_timer_tick(unsigned long pass)
/**
* send_syn
*/
-static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
+static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
+ struct sk_buff *skb)
{
int ret;
int flags = SET_SYN;
- struct sk_buff *skb;
char optionsbuffer[sizeof(struct option_mss) +
- sizeof(struct option_windowscale) +
- sizeof(struct option_base) + 1];
+ sizeof(struct option_windowscale) + sizeof(struct option_base) +
+ TCP_OPTIONS_PADDING];
int optionssize = 0;
/* Sending MSS option */
@@ -695,8 +1117,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
optionssize += sizeof(struct option_windowscale);
- if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)
- ) {
+ if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) {
options = (union all_known_options *)&optionsbuffer[optionssize];
options->as_base.optionnum = OPTION_NUMBER_WRITE0;
options->as_base.length = sizeof(struct option_base);
@@ -714,7 +1135,8 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
options->as_end = OPTION_NUMBER_END;
optionssize += 1;
- skb = get_free_pkt(cm_node);
+ if (!skb)
+ skb = dev_alloc_skb(MAX_CM_BUFFER);
if (!skb) {
nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
return -1;
@@ -733,18 +1155,18 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack)
/**
* send_reset
*/
-static int send_reset(struct nes_cm_node *cm_node)
+static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb)
{
int ret;
- struct sk_buff *skb = get_free_pkt(cm_node);
int flags = SET_RST | SET_ACK;
+ if (!skb)
+ skb = dev_alloc_skb(MAX_CM_BUFFER);
if (!skb) {
nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
+ return -ENOMEM;
}
- add_ref_cm_node(cm_node);
form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags);
ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1);
@@ -755,10 +1177,12 @@ static int send_reset(struct nes_cm_node *cm_node)
/**
* send_ack
*/
-static int send_ack(struct nes_cm_node *cm_node)
+static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb)
{
int ret;
- struct sk_buff *skb = get_free_pkt(cm_node);
+
+ if (!skb)
+ skb = dev_alloc_skb(MAX_CM_BUFFER);
if (!skb) {
nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -781,7 +1205,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
/* if we didn't get a frame get one */
if (!skb)
- skb = get_free_pkt(cm_node);
+ skb = dev_alloc_skb(MAX_CM_BUFFER);
if (!skb) {
nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -796,76 +1220,32 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
/**
- * get_free_pkt
- */
-static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
-{
- struct sk_buff *skb, *new_skb;
-
- /* check to see if we need to repopulate the free tx pkt queue */
- if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) {
- while (skb_queue_len(&cm_node->cm_core->tx_free_list) <
- cm_node->cm_core->free_tx_pkt_max) {
- /* replace the frame we took, we won't get it back */
- new_skb = dev_alloc_skb(cm_node->cm_core->mtu);
- BUG_ON(!new_skb);
- /* add a replacement frame to the free tx list head */
- skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb);
- }
- }
-
- skb = skb_dequeue(&cm_node->cm_core->tx_free_list);
-
- return skb;
-}
-
-
-/**
- * make_hashkey - generate hash key from node tuple
- */
-static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port,
- nes_addr_t rem_addr)
-{
- u32 hashkey = 0;
-
- hashkey = loc_addr + rem_addr + loc_port + rem_port;
- hashkey = (hashkey % NES_CM_HASHTABLE_SIZE);
-
- return hashkey;
-}
-
-
-/**
* find_node - find a cm node that matches the reference cm node
*/
static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
- u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
+ u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
{
unsigned long flags;
- u32 hashkey;
struct list_head *hte;
struct nes_cm_node *cm_node;
- /* make a hash index key for this packet */
- hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr);
-
/* get a handle on the hte */
hte = &cm_core->connected_nodes;
- nes_debug(NES_DBG_CM, "Searching for an owner node: " NIPQUAD_FMT ":%x from core %p->%p\n",
- HIPQUAD(loc_addr), loc_port, cm_core, hte);
-
/* walk list and find cm_node associated with this session ID */
spin_lock_irqsave(&cm_core->ht_lock, flags);
list_for_each_entry(cm_node, hte, list) {
/* compare quad, return node handle if a match */
nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
- cm_node->loc_addr, cm_node->loc_port,
- loc_addr, loc_port,
- cm_node->rem_addr, cm_node->rem_port,
- rem_addr, rem_port);
- if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
- (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+ cm_node->loc_addr, cm_node->loc_port,
+ loc_addr, loc_port,
+ cm_node->rem_addr, cm_node->rem_port,
+ rem_addr, rem_port);
+ if ((cm_node->mapped_loc_addr == loc_addr) &&
+ (cm_node->mapped_loc_port == loc_port) &&
+ (cm_node->mapped_rem_addr == rem_addr) &&
+ (cm_node->mapped_rem_port == rem_port)) {
+
add_ref_cm_node(cm_node);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
return cm_node;
@@ -882,19 +1262,29 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
* find_listener - find a cm node listening on this addr-port pair
*/
static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
- nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+ nes_addr_t dst_addr, u16 dst_port,
+ enum nes_cm_listener_state listener_state, int local)
{
unsigned long flags;
struct nes_cm_listener *listen_node;
+ nes_addr_t listen_addr;
+ u16 listen_port;
/* walk list and find cm_node associated with this session ID */
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
+ if (local) {
+ listen_addr = listen_node->loc_addr;
+ listen_port = listen_node->loc_port;
+ } else {
+ listen_addr = listen_node->mapped_loc_addr;
+ listen_port = listen_node->mapped_loc_port;
+ }
/* compare node pair, return node handle if a match */
- if (((listen_node->loc_addr == dst_addr) ||
- listen_node->loc_addr == 0x00000000) &&
- (listen_node->loc_port == dst_port) &&
- (listener_state & listen_node->listener_state)) {
+ if (((listen_addr == dst_addr) ||
+ listen_addr == 0x00000000) &&
+ (listen_port == dst_port) &&
+ (listener_state & listen_node->listener_state)) {
atomic_inc(&listen_node->ref_count);
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
return listen_node;
@@ -902,32 +1292,23 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
}
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- nes_debug(NES_DBG_CM, "Unable to find listener for " NIPQUAD_FMT ":%x\n",
- HIPQUAD(dst_addr), dst_port);
-
/* no listener */
return NULL;
}
-
/**
* add_hte_node - add a cm node to the hash table
*/
static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
{
unsigned long flags;
- u32 hashkey;
struct list_head *hte;
if (!cm_node || !cm_core)
return -EINVAL;
- nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n");
-
- /* first, make an index into our hash table */
- hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr,
- cm_node->rem_port, cm_node->rem_addr);
- cm_node->hashkey = hashkey;
+ nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
+ cm_node);
spin_lock_irqsave(&cm_core->ht_lock, flags);
@@ -946,10 +1327,86 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
* mini_cm_dec_refcnt_listen
*/
static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
- struct nes_cm_listener *listener, int free_hanging_nodes)
+ struct nes_cm_listener *listener, int free_hanging_nodes)
{
- int ret = 1;
+ int ret = -EINVAL;
+ int err = 0;
unsigned long flags;
+ struct list_head *list_pos = NULL;
+ struct list_head *list_temp = NULL;
+ struct nes_cm_node *cm_node = NULL;
+ struct list_head reset_list;
+
+ nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
+ "refcnt=%d\n", listener, free_hanging_nodes,
+ atomic_read(&listener->ref_count));
+ /* free non-accelerated child nodes for this listener */
+ INIT_LIST_HEAD(&reset_list);
+ if (free_hanging_nodes) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_pos, list_temp,
+ &g_cm_core->connected_nodes) {
+ cm_node = container_of(list_pos, struct nes_cm_node,
+ list);
+ if ((cm_node->listener == listener) &&
+ (!cm_node->accelerated)) {
+ add_ref_cm_node(cm_node);
+ list_add(&cm_node->reset_entry, &reset_list);
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+
+ list_for_each_safe(list_pos, list_temp, &reset_list) {
+ cm_node = container_of(list_pos, struct nes_cm_node,
+ reset_entry);
+ {
+ struct nes_cm_node *loopback = cm_node->loopbackpartner;
+ enum nes_cm_node_state old_state;
+ if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ } else {
+ if (!loopback) {
+ cleanup_retrans_entry(cm_node);
+ err = send_reset(cm_node, NULL);
+ if (err) {
+ cm_node->state =
+ NES_CM_STATE_CLOSED;
+ WARN_ON(1);
+ } else {
+ old_state = cm_node->state;
+ cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
+ if (old_state != NES_CM_STATE_MPAREQ_RCVD)
+ rem_ref_cm_node(
+ cm_node->cm_core,
+ cm_node);
+ }
+ } else {
+ struct nes_cm_event event;
+
+ event.cm_node = loopback;
+ event.cm_info.rem_addr =
+ loopback->rem_addr;
+ event.cm_info.loc_addr =
+ loopback->loc_addr;
+ event.cm_info.rem_port =
+ loopback->rem_port;
+ event.cm_info.loc_port =
+ loopback->loc_port;
+ event.cm_info.cm_id = loopback->cm_id;
+ add_ref_cm_node(loopback);
+ loopback->state = NES_CM_STATE_CLOSED;
+ cm_event_connect_error(&event);
+ cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
+
+ rem_ref_cm_node(cm_node->cm_core,
+ cm_node);
+
+ }
+ }
+ }
+ }
+
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
if (!atomic_dec_return(&listener->ref_count)) {
list_del(&listener->list);
@@ -960,8 +1417,18 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
if (listener->nesvnic) {
- nes_manage_apbvt(listener->nesvnic, listener->loc_port,
- PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+ nes_manage_apbvt(listener->nesvnic,
+ listener->mapped_loc_port,
+ PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
+
+ nes_remove_mapinfo(listener->loc_addr,
+ listener->loc_port,
+ listener->mapped_loc_addr,
+ listener->mapped_loc_port);
+ nes_debug(NES_DBG_NLMSG,
+ "Delete APBVT mapped_loc_port = %04X\n",
+ listener->mapped_loc_port);
}
nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
@@ -969,15 +1436,15 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
kfree(listener);
listener = NULL;
ret = 0;
- cm_listens_destroyed++;
+ atomic_inc(&cm_listens_destroyed);
} else {
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
}
if (listener) {
if (atomic_read(&listener->pend_accepts_cnt) > 0)
nes_debug(NES_DBG_CM, "destroying listener (%p)"
- " with non-zero pending accepts=%u\n",
- listener, atomic_read(&listener->pend_accepts_cnt));
+ " with non-zero pending accepts=%u\n",
+ listener, atomic_read(&listener->pend_accepts_cnt));
}
return ret;
@@ -988,7 +1455,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
* mini_cm_del_listen
*/
static int mini_cm_del_listen(struct nes_cm_core *cm_core,
- struct nes_cm_listener *listener)
+ struct nes_cm_listener *listener)
{
listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
listener->cm_id = NULL; /* going to be destroyed pretty soon */
@@ -1000,61 +1467,98 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
* mini_cm_accelerated
*/
static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
- struct nes_cm_node *cm_node)
+ struct nes_cm_node *cm_node)
{
- u32 was_timer_set;
cm_node->accelerated = 1;
if (cm_node->accept_pend) {
BUG_ON(!cm_node->listener);
atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
}
- was_timer_set = timer_pending(&cm_core->tcp_timer);
- if (!was_timer_set) {
- cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME;
- add_timer(&cm_core->tcp_timer);
- }
+ if (!timer_pending(&cm_core->tcp_timer))
+ mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME));
return 0;
}
/**
- * nes_addr_send_arp
+ * nes_addr_resolve_neigh
*/
-static void nes_addr_send_arp(u32 dst_ip)
+static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
{
struct rtable *rt;
- struct flowi fl;
+ struct neighbour *neigh;
+ int rc = arpindex;
+ struct net_device *netdev;
+ struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
+
+ rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0);
+ if (IS_ERR(rt)) {
+ printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n",
+ __func__, dst_ip);
+ return rc;
+ }
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = htonl(dst_ip);
- if (ip_route_output_key(&init_net, &rt, &fl)) {
- printk("%s: ip_route_output_key failed for 0x%08X\n",
- __func__, dst_ip);
- return;
+ if (netif_is_bond_slave(nesvnic->netdev))
+ netdev = netdev_master_upper_dev_get(nesvnic->netdev);
+ else
+ netdev = nesvnic->netdev;
+
+ neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev);
+
+ rcu_read_lock();
+ if (neigh) {
+ if (neigh->nud_state & NUD_VALID) {
+ nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
+ " is %pM, Gateway is 0x%08X \n", dst_ip,
+ neigh->ha, ntohl(rt->rt_gateway));
+
+ if (arpindex >= 0) {
+ if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
+ /* Mac address same as in nes_arp_table */
+ goto out;
+ }
+
+ nes_manage_arp_cache(nesvnic->netdev,
+ nesadapter->arp_table[arpindex].mac_addr,
+ dst_ip, NES_ARP_DELETE);
+ }
+
+ nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
+ dst_ip, NES_ARP_ADD);
+ rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
+ NES_ARP_RESOLVE);
+ } else {
+ neigh_event_send(neigh, NULL);
+ }
}
+out:
+ rcu_read_unlock();
+
+ if (neigh)
+ neigh_release(neigh);
- neigh_event_send(rt->u.dst.neighbour, NULL);
ip_rt_put(rt);
+ return rc;
}
-
/**
* make_cm_node - create a new instance of a cm node
*/
static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
- struct nes_cm_listener *listener)
+ struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
+ struct nes_cm_listener *listener)
{
struct nes_cm_node *cm_node;
struct timespec ts;
+ int oldarpindex = 0;
int arpindex = 0;
struct nes_device *nesdev;
struct nes_adapter *nesadapter;
- DECLARE_MAC_BUF(mac);
/* create an hte and cm_node for this instance */
cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
@@ -1066,22 +1570,30 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->rem_addr = cm_info->rem_addr;
cm_node->loc_port = cm_info->loc_port;
cm_node->rem_port = cm_info->rem_port;
- cm_node->send_write0 = send_first;
- nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n",
- HIPQUAD(cm_node->loc_addr), cm_node->loc_port,
- HIPQUAD(cm_node->rem_addr), cm_node->rem_port);
+
+ cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
+ cm_node->mapped_rem_addr = cm_info->mapped_rem_addr;
+ cm_node->mapped_loc_port = cm_info->mapped_loc_port;
+ cm_node->mapped_rem_port = cm_info->mapped_rem_port;
+
+ cm_node->mpa_frame_rev = mpa_version;
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ cm_node->mpav2_ird_ord = 0;
+ cm_node->ird_size = 0;
+ cm_node->ord_size = 0;
+
+ nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
+ &cm_node->loc_addr, cm_node->loc_port,
+ &cm_node->rem_addr, cm_node->rem_port);
cm_node->listener = listener;
cm_node->netdev = nesvnic->netdev;
cm_node->cm_id = cm_info->cm_id;
memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
- nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n",
- cm_node->listener, cm_node->cm_id);
+ nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
+ cm_node->cm_id);
- INIT_LIST_HEAD(&cm_node->retrans_list);
spin_lock_init(&cm_node->retrans_list_lock);
- INIT_LIST_HEAD(&cm_node->recv_list);
- spin_lock_init(&cm_node->recv_list_lock);
cm_node->loopbackpartner = NULL;
atomic_set(&cm_node->ref_count, 1);
@@ -1090,11 +1602,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
- NES_CM_DEFAULT_RCV_WND_SCALE;
+ NES_CM_DEFAULT_RCV_WND_SCALE;
ts = current_kernel_time();
cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
- sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
+ sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
cm_node->tcp_cntxt.rcv_nxt = 0;
/* get a unique session ID , add thread_id to an upcounter to handle race */
atomic_inc(&cm_core->node_cnt);
@@ -1108,18 +1620,21 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
nesadapter = nesdev->nesadapter;
cm_node->loopbackpartner = NULL;
+
/* get the mac addr for the remote node */
- arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+ oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr,
+ NULL, NES_ARP_RESOLVE);
+ arpindex = nes_addr_resolve_neigh(nesvnic,
+ cm_node->mapped_rem_addr, oldarpindex);
if (arpindex < 0) {
kfree(cm_node);
- nes_addr_send_arp(cm_info->rem_addr);
return NULL;
}
/* copy the mac addr to node context */
memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN);
- nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %s\n",
- print_mac(mac, cm_node->rem_mac));
+ nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %pM\n",
+ cm_node->rem_mac);
add_hte_node(cm_core, cm_node);
atomic_inc(&cm_nodes_created);
@@ -1142,13 +1657,9 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node)
* rem_ref_cm_node - destroy an instance of a cm node
*/
static int rem_ref_cm_node(struct nes_cm_core *cm_core,
- struct nes_cm_node *cm_node)
+ struct nes_cm_node *cm_node)
{
- unsigned long flags, qplockflags;
- struct nes_timer_entry *send_entry;
- struct nes_timer_entry *recv_entry;
- struct iw_cm_id *cm_id;
- struct list_head *list_core, *list_node_temp;
+ unsigned long flags;
struct nes_qp *nesqp;
if (!cm_node)
@@ -1169,75 +1680,41 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
atomic_dec(&cm_node->listener->pend_accepts_cnt);
BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
}
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) {
- send_entry = container_of(list_core, struct nes_timer_entry, list);
- list_del(&send_entry->list);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- dev_kfree_skb_any(send_entry->skb);
- kfree(send_entry);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- continue;
- }
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-
- spin_lock_irqsave(&cm_node->recv_list_lock, flags);
- list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) {
- recv_entry = container_of(list_core, struct nes_timer_entry, list);
- list_del(&recv_entry->list);
- cm_id = cm_node->cm_id;
- spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
- if (recv_entry->type == NES_TIMER_TYPE_CLOSE) {
- nesqp = (struct nes_qp *)recv_entry->skb;
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
- " with something to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id);
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- nesqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
- " with nothing to do!!! ******\n",
- nesqp->hwqp.qp_id, cm_id);
- nes_rem_ref(&nesqp->ibqp);
- }
- cm_id->rem_ref(cm_id);
- } else if (recv_entry->type == NES_TIMER_TYPE_RECV) {
- dev_kfree_skb_any(recv_entry->skb);
- }
- kfree(recv_entry);
- spin_lock_irqsave(&cm_node->recv_list_lock, flags);
- }
- spin_unlock_irqrestore(&cm_node->recv_list_lock, flags);
-
+ WARN_ON(cm_node->send_entry);
+ if (cm_node->recv_entry)
+ handle_recv_entry(cm_node, 0);
if (cm_node->listener) {
mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
} else {
if (cm_node->apbvt_set && cm_node->nesvnic) {
- nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
- PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
+ nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port,
+ PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
}
+ nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n",
+ cm_node->mapped_loc_port);
+ nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port,
+ cm_node->mapped_loc_addr, cm_node->mapped_loc_port);
}
- kfree(cm_node);
atomic_dec(&cm_core->node_cnt);
atomic_inc(&cm_nodes_destroyed);
+ nesqp = cm_node->nesqp;
+ if (nesqp) {
+ nesqp->cm_node = NULL;
+ nes_rem_ref(&nesqp->ibqp);
+ cm_node->nesqp = NULL;
+ }
+ kfree(cm_node);
return 0;
}
-
/**
* process_options
*/
-static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet)
+static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
+ u32 optionsize, u32 syn_packet)
{
u32 tmp;
u32 offset = 0;
@@ -1247,35 +1724,34 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti
while (offset < optionsize) {
all_options = (union all_known_options *)(optionsloc + offset);
switch (all_options->as_base.optionnum) {
- case OPTION_NUMBER_END:
- offset = optionsize;
- break;
- case OPTION_NUMBER_NONE:
- offset += 1;
- continue;
- case OPTION_NUMBER_MSS:
- nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n",
- __func__,
- all_options->as_mss.length, offset, optionsize);
- got_mss_option = 1;
- if (all_options->as_mss.length != 4) {
- return 1;
- } else {
- tmp = ntohs(all_options->as_mss.mss);
- if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
- cm_node->tcp_cntxt.mss = tmp;
- }
- break;
- case OPTION_NUMBER_WINDOW_SCALE:
- cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount;
- break;
- case OPTION_NUMBER_WRITE0:
- cm_node->send_write0 = 1;
- break;
- default:
- nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
- all_options->as_base.optionnum);
- break;
+ case OPTION_NUMBER_END:
+ offset = optionsize;
+ break;
+ case OPTION_NUMBER_NONE:
+ offset += 1;
+ continue;
+ case OPTION_NUMBER_MSS:
+ nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
+ "Size: %d\n", __func__,
+ all_options->as_mss.length, offset, optionsize);
+ got_mss_option = 1;
+ if (all_options->as_mss.length != 4) {
+ return 1;
+ } else {
+ tmp = ntohs(all_options->as_mss.mss);
+ if (tmp > 0 && tmp <
+ cm_node->tcp_cntxt.mss)
+ cm_node->tcp_cntxt.mss = tmp;
+ }
+ break;
+ case OPTION_NUMBER_WINDOW_SCALE:
+ cm_node->tcp_cntxt.snd_wscale =
+ all_options->as_windowscale.shiftcount;
+ break;
+ default:
+ nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
+ all_options->as_base.optionnum);
+ break;
}
offset += all_options->as_base.length;
}
@@ -1284,310 +1760,623 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti
return 0;
}
+static void drop_packet(struct sk_buff *skb)
+{
+ atomic_inc(&cm_accel_dropped_pkts);
+ dev_kfree_skb_any(skb);
+}
-/**
- * process_packet
- */
-static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct nes_cm_core *cm_core)
+static void handle_fin_pkt(struct nes_cm_node *cm_node)
{
- int optionsize;
- int datasize;
- int ret = 0;
- struct tcphdr *tcph = tcp_hdr(skb);
- u32 inc_sequence;
- if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) {
- inc_sequence = ntohl(tcph->seq);
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence;
+ nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
+ "refcnt=%d\n", cm_node, cm_node->state,
+ atomic_read(&cm_node->ref_count));
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_MPAREJ_RCVD:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_LAST_ACK;
+ send_fin(cm_node, NULL);
+ break;
+ case NES_CM_STATE_MPAREQ_SENT:
+ create_event(cm_node, NES_CM_EVENT_ABORTED);
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, NULL);
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSING;
+ send_ack(cm_node, NULL);
+ /* Wait for ACK as this is simultaneous close..
+ * After we receive ACK, do not send anything..
+ * Just rm the node.. Done.. */
+ break;
+ case NES_CM_STATE_FIN_WAIT2:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_TIME_WAIT;
+ send_ack(cm_node, NULL);
+ schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0);
+ break;
+ case NES_CM_STATE_TIME_WAIT:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n",
+ cm_node, cm_node->state);
+ break;
}
+}
- if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) {
- BUG_ON(!tcph);
- atomic_inc(&cm_accel_dropped_pkts);
- return -1;
+
+static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+
+ int reset = 0; /* whether to send reset in case of err.. */
+ atomic_inc(&cm_resets_recvd);
+ nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
+ " refcnt=%d\n", cm_node, cm_node->state,
+ atomic_read(&cm_node->ref_count));
+ cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V2:
+ cm_node->mpa_frame_rev = IETF_MPA_V1;
+ /* send a syn and goto syn sent state */
+ cm_node->state = NES_CM_STATE_SYN_SENT;
+ if (send_syn(cm_node, 0, NULL)) {
+ active_open_err(cm_node, skb, reset);
+ }
+ break;
+ case IETF_MPA_V1:
+ default:
+ active_open_err(cm_node, skb, reset);
+ break;
+ }
+ break;
+ case NES_CM_STATE_MPAREQ_RCVD:
+ atomic_inc(&cm_node->passive_state);
+ dev_kfree_skb_any(skb);
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_LISTENING:
+ nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
+ passive_open_err(cm_node, skb, reset);
+ break;
+ case NES_CM_STATE_TSA:
+ active_open_err(cm_node, skb, reset);
+ break;
+ case NES_CM_STATE_CLOSED:
+ drop_packet(skb);
+ break;
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_LAST_ACK:
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ case NES_CM_STATE_TIME_WAIT:
+ cm_node->state = NES_CM_STATE_CLOSED;
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ drop_packet(skb);
+ break;
+ default:
+ drop_packet(skb);
+ break;
}
+}
- if (tcph->rst) {
- atomic_inc(&cm_resets_recvd);
- nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n",
- cm_node, cm_node->state, atomic_read(&cm_node->ref_count));
- switch (cm_node->state) {
- case NES_CM_STATE_LISTENING:
- rem_ref_cm_node(cm_core, cm_node);
- break;
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_CLOSED:
- break;
- case NES_CM_STATE_SYN_RCVD:
- nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X,"
- " remote 0x%08X:%04X, node state = %u\n",
- cm_node->loc_addr, cm_node->loc_port,
- cm_node->rem_addr, cm_node->rem_port,
- cm_node->state);
- rem_ref_cm_node(cm_core, cm_node);
- break;
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_MPAREQ_SENT:
- default:
- nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X,"
- " remote 0x%08X:%04X, node state = %u refcnt=%d\n",
- cm_node->loc_addr, cm_node->loc_port,
- cm_node->rem_addr, cm_node->rem_port,
- cm_node->state, atomic_read(&cm_node->ref_count));
- /* create event */
- cm_node->state = NES_CM_STATE_CLOSED;
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- break;
+static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ int ret = 0;
+ int datasize = skb->len;
+ u8 *dataloc = skb->data;
+
+ enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN;
+ u32 res_type;
+
+ ret = parse_mpa(cm_node, dataloc, &res_type, datasize);
+ if (ret) {
+ nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
+ if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) {
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for "
+ "cm_node=%p listener=%p state=%d\n", __func__,
+ __LINE__, cm_node, cm_node->listener,
+ cm_node->state);
+ active_open_err(cm_node, skb, 1);
+ } else {
+ passive_open_err(cm_node, skb, 1);
+ }
+ return;
+ }
+ switch (cm_node->state) {
+ case NES_CM_STATE_ESTABLISHED:
+ if (res_type == NES_MPA_REQUEST_REJECT)
+ /*BIG problem as we are receiving the MPA.. So should
+ * not be REJECT.. This is Passive Open.. We can
+ * only receive it Reject for Active Open...*/
+ WARN_ON(1);
+ cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
+ type = NES_CM_EVENT_MPA_REQ;
+ atomic_set(&cm_node->passive_state,
+ NES_PASSIVE_STATE_INDICATED);
+ break;
+ case NES_CM_STATE_MPAREQ_SENT:
+ cleanup_retrans_entry(cm_node);
+ if (res_type == NES_MPA_REQUEST_REJECT) {
+ type = NES_CM_EVENT_MPA_REJECT;
+ cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
+ } else {
+ type = NES_CM_EVENT_CONNECTED;
+ cm_node->state = NES_CM_STATE_TSA;
}
- return -1;
+
+ break;
+ default:
+ WARN_ON(1);
+ break;
}
+ dev_kfree_skb_any(skb);
+ create_event(cm_node, type);
+}
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
+{
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ active_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_SYN_RCVD:
+ passive_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_TSA:
+ default:
+ drop_packet(skb);
+ }
+}
- skb_pull(skb, ip_hdr(skb)->ihl << 2);
- skb_pull(skb, tcph->doff << 2);
+static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb)
+{
+ int err;
- datasize = skb->len;
- inc_sequence = ntohl(tcph->seq);
- nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X,"
- " rcv_nxt = 0x%08X Flags: %s %s.\n",
- datasize, inc_sequence, ntohl(tcph->ack_seq),
- cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""),
- (tcph->ack ? "ACK":""));
-
- if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt)
- ) {
- nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X,"
- " ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n",
- datasize, inc_sequence, ntohl(tcph->ack_seq),
- cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":""));
- if (cm_node->state == NES_CM_STATE_LISTENING) {
- rem_ref_cm_node(cm_core, cm_node);
- }
- return -1;
+ err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1;
+ if (err)
+ active_open_err(cm_node, skb, 1);
+
+ return err;
+}
+
+static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb)
+{
+ int err = 0;
+ u32 seq;
+ u32 ack_seq;
+ u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+ u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ u32 rcv_wnd;
+
+ seq = ntohl(tcph->seq);
+ ack_seq = ntohl(tcph->ack_seq);
+ rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ if (ack_seq != loc_seq_num)
+ err = 1;
+ else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
+ err = 1;
+ if (err) {
+ nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
+ "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+ cm_node->listener, cm_node->state);
+ indicate_pkt_err(cm_node, skb);
+ nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
+ "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
+ rcv_wnd);
}
+ return err;
+}
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+/*
+ * handle_syn_pkt() is for Passive node. The syn packet is received when a node
+ * is created with a listener or it may comein as rexmitted packet which in
+ * that case will be just dropped.
+ */
+static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ skb_trim(skb, 0);
+ inc_sequence = ntohl(tcph->seq);
- if (optionsize) {
- u8 *optionsloc = (u8 *)&tcph[1];
- if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) {
- nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node);
- send_reset(cm_node);
- if (cm_node->state != NES_CM_STATE_SYN_SENT)
- rem_ref_cm_node(cm_core, cm_node);
- return 0;
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_MPAREQ_SENT:
+ /* Rcvd syn on active open connection*/
+ active_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_LISTENING:
+ /* Passive OPEN */
+ if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+ cm_node->listener->backlog) {
+ nes_debug(NES_DBG_CM, "drop syn due to backlog "
+ "pressure \n");
+ cm_backlog_drops++;
+ passive_open_err(cm_node, skb, 0);
+ break;
}
- } else if (tcph->syn)
- cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS;
+ ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
+ 1);
+ if (ret) {
+ passive_open_err(cm_node, skb, 0);
+ /* drop pkt */
+ break;
+ }
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ BUG_ON(cm_node->send_entry);
+ cm_node->accept_pend = 1;
+ atomic_inc(&cm_node->listener->pend_accepts_cnt);
+
+ cm_node->state = NES_CM_STATE_SYN_RCVD;
+ send_syn(cm_node, 1, skb);
+ break;
+ case NES_CM_STATE_CLOSED:
+ cleanup_retrans_entry(cm_node);
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, skb);
+ break;
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ default:
+ drop_packet(skb);
+ break;
+ }
+}
- cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
- cm_node->tcp_cntxt.snd_wscale;
+static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ int ret;
+ u32 inc_sequence;
+ int optionsize;
- if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) {
- cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ skb_trim(skb, 0);
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_SENT:
+ cleanup_retrans_entry(cm_node);
+ /* active open */
+ if (check_syn(cm_node, tcph, skb))
+ return;
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ /* setup options */
+ ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
+ if (ret) {
+ nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
+ cm_node);
+ break;
+ }
+ cleanup_retrans_entry(cm_node);
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ send_mpa_request(cm_node, skb);
+ cm_node->state = NES_CM_STATE_MPAREQ_SENT;
+ break;
+ case NES_CM_STATE_MPAREQ_RCVD:
+ /* passive open, so should not be here */
+ passive_open_err(cm_node, skb, 1);
+ break;
+ case NES_CM_STATE_LISTENING:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ send_reset(cm_node, skb);
+ break;
+ case NES_CM_STATE_CLOSED:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ cleanup_retrans_entry(cm_node);
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, skb);
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_CLOSING:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_MPAREQ_SENT:
+ default:
+ drop_packet(skb);
+ break;
}
+}
- if (tcph->ack) {
+static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct tcphdr *tcph)
+{
+ int datasize = 0;
+ u32 inc_sequence;
+ int ret = 0;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+ if (check_seq(cm_node, tcph, skb))
+ return -EINVAL;
+
+ skb_pull(skb, tcph->doff << 2);
+ inc_sequence = ntohl(tcph->seq);
+ datasize = skb->len;
+ switch (cm_node->state) {
+ case NES_CM_STATE_SYN_RCVD:
+ /* Passive OPEN */
+ cleanup_retrans_entry(cm_node);
+ ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
+ if (ret)
+ break;
cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- /* read and stash current sequence number */
- if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) {
- nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !="
- " cm_node->tcp_cntxt.loc_seq_num\n");
- send_reset(cm_node);
- return 0;
- }
- if (cm_node->state == NES_CM_STATE_SYN_SENT)
- cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED;
- else {
- cm_node->state = NES_CM_STATE_ESTABLISHED;
- }
- break;
- case NES_CM_STATE_LAST_ACK:
- cm_node->state = NES_CM_STATE_CLOSED;
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cm_node->state = NES_CM_STATE_FIN_WAIT2;
- break;
- case NES_CM_STATE_CLOSING:
- cm_node->state = NES_CM_STATE_TIME_WAIT;
- /* need to schedule this to happen in 2MSL timeouts */
- cm_node->state = NES_CM_STATE_CLOSED;
- break;
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_MPAREQ_SENT:
- case NES_CM_STATE_CLOSE_WAIT:
- case NES_CM_STATE_TIME_WAIT:
- case NES_CM_STATE_CLOSED:
- break;
- case NES_CM_STATE_LISTENING:
- nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn);
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- send_reset(cm_node);
- /* send_reset bumps refcount, this should have been a new node */
- rem_ref_cm_node(cm_core, cm_node);
- return -1;
- break;
- case NES_CM_STATE_TSA:
- nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n");
- break;
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_FIN_WAIT2:
- default:
- nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n",
- cm_node->state);
- send_reset(cm_node);
- break;
+ cm_node->state = NES_CM_STATE_ESTABLISHED;
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ handle_rcv_mpa(cm_node, skb);
+ } else { /* rcvd ACK only */
+ dev_kfree_skb_any(skb);
+ }
+ break;
+ case NES_CM_STATE_ESTABLISHED:
+ /* Passive OPEN */
+ cleanup_retrans_entry(cm_node);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ handle_rcv_mpa(cm_node, skb);
+ } else {
+ drop_packet(skb);
}
+ break;
+ case NES_CM_STATE_MPAREQ_SENT:
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ handle_rcv_mpa(cm_node, skb);
+ } else { /* Could be just an ack pkt.. */
+ dev_kfree_skb_any(skb);
+ }
+ break;
+ case NES_CM_STATE_LISTENING:
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ send_reset(cm_node, skb);
+ break;
+ case NES_CM_STATE_CLOSED:
+ cleanup_retrans_entry(cm_node);
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, skb);
+ break;
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_CLOSING:
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ drop_packet(skb);
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ cleanup_retrans_entry(cm_node);
+ drop_packet(skb);
+ cm_node->state = NES_CM_STATE_FIN_WAIT2;
+ break;
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_TSA:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ case NES_CM_STATE_UNKNOWN:
+ default:
+ cleanup_retrans_entry(cm_node);
+ drop_packet(skb);
+ break;
}
+ return ret;
+}
- if (tcph->syn) {
- if (cm_node->state == NES_CM_STATE_LISTENING) {
- /* do not exceed backlog */
- atomic_inc(&cm_node->listener->pend_accepts_cnt);
- if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
- cm_node->listener->backlog) {
- nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n");
- cm_backlog_drops++;
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- rem_ref_cm_node(cm_core, cm_node);
- return 0;
- }
- cm_node->accept_pend = 1;
- }
- if (datasize == 0)
- cm_node->tcp_cntxt.rcv_nxt ++;
- if (cm_node->state == NES_CM_STATE_LISTENING) {
- cm_node->state = NES_CM_STATE_SYN_RCVD;
- send_syn(cm_node, 1);
- }
- if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) {
- cm_node->state = NES_CM_STATE_ESTABLISHED;
- /* send final handshake ACK */
- ret = send_ack(cm_node);
- if (ret < 0)
- return ret;
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
+ struct sk_buff *skb, int optionsize, int passive)
+{
+ u8 *optionsloc = (u8 *)&tcph[1];
- cm_node->state = NES_CM_STATE_MPAREQ_SENT;
- ret = send_mpa_request(cm_node);
- if (ret < 0)
- return ret;
+ if (optionsize) {
+ if (process_options(cm_node, optionsloc, optionsize,
+ (u32)tcph->syn)) {
+ nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
+ __func__, cm_node);
+ if (passive)
+ passive_open_err(cm_node, skb, 1);
+ else
+ active_open_err(cm_node, skb, 1);
+ return 1;
}
}
- if (tcph->fin) {
- cm_node->tcp_cntxt.rcv_nxt++;
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_MPAREQ_SENT:
- cm_node->state = NES_CM_STATE_CLOSE_WAIT;
- cm_node->state = NES_CM_STATE_LAST_ACK;
- ret = send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cm_node->state = NES_CM_STATE_CLOSING;
- ret = send_ack(cm_node);
- break;
- case NES_CM_STATE_FIN_WAIT2:
- cm_node->state = NES_CM_STATE_TIME_WAIT;
- cm_node->tcp_cntxt.loc_seq_num ++;
- ret = send_ack(cm_node);
- /* need to schedule this to happen in 2MSL timeouts */
- cm_node->state = NES_CM_STATE_CLOSED;
- break;
- case NES_CM_STATE_CLOSE_WAIT:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- case NES_CM_STATE_TSA:
- default:
- nes_debug(NES_DBG_CM, "Received a fin while in %x state\n",
- cm_node->state);
- ret = -EINVAL;
- break;
- }
+ cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
+ cm_node->tcp_cntxt.snd_wscale;
+
+ if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+ cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+ return 0;
+}
+
+/*
+ * active_open_err() will send reset() if flag set..
+ * It will also send ABORT event.
+ */
+static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ int reset)
+{
+ cleanup_retrans_entry(cm_node);
+ if (reset) {
+ nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
+ "state=%d\n", cm_node, cm_node->state);
+ add_ref_cm_node(cm_node);
+ send_reset(cm_node, skb);
+ } else {
+ dev_kfree_skb_any(skb);
}
- if (datasize) {
- u8 *dataloc = skb->data;
- /* figure out what state we are in and handle transition to next state */
- switch (cm_node->state) {
- case NES_CM_STATE_LISTENING:
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_CLOSE_WAIT:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- /* recv the mpa res frame, ret=frame len (incl priv data) */
- ret = parse_mpa(cm_node, dataloc, datasize);
- if (ret < 0)
- break;
- /* set the req frame payload len in skb */
- /* we are done handling this state, set node to a TSA state */
- cm_node->state = NES_CM_STATE_TSA;
- send_ack(cm_node);
- create_event(cm_node, NES_CM_EVENT_CONNECTED);
- break;
+ cm_node->state = NES_CM_STATE_CLOSED;
+ create_event(cm_node, NES_CM_EVENT_ABORTED);
+}
- case NES_CM_STATE_ESTABLISHED:
- /* we are expecting an MPA req frame */
- ret = parse_mpa(cm_node, dataloc, datasize);
- if (ret < 0) {
- break;
- }
- cm_node->state = NES_CM_STATE_TSA;
- send_ack(cm_node);
- /* we got a valid MPA request, create an event */
- create_event(cm_node, NES_CM_EVENT_MPA_REQ);
- break;
- case NES_CM_STATE_TSA:
- handle_exception_pkt(cm_node, skb);
- break;
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- default:
- ret = -1;
- }
+/*
+ * passive_open_err() will either do a reset() or will free up the skb and
+ * remove the cm_node.
+ */
+static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ int reset)
+{
+ cleanup_retrans_entry(cm_node);
+ cm_node->state = NES_CM_STATE_CLOSED;
+ if (reset) {
+ nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
+ "cm_node=%p state =%d\n", cm_node, cm_node->state);
+ send_reset(cm_node, skb);
+ } else {
+ dev_kfree_skb_any(skb);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
}
+}
- return ret;
+/*
+ * free_retrans_entry() routines assumes that the retrans_list_lock has
+ * been acquired before calling.
+ */
+static void free_retrans_entry(struct nes_cm_node *cm_node)
+{
+ struct nes_timer_entry *send_entry;
+
+ send_entry = cm_node->send_entry;
+ if (send_entry) {
+ cm_node->send_entry = NULL;
+ dev_kfree_skb_any(send_entry->skb);
+ kfree(send_entry);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ }
+}
+
+static void cleanup_retrans_entry(struct nes_cm_node *cm_node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
}
+/**
+ * process_packet
+ * Returns skb if to be freed, else it will return NULL if already used..
+ */
+static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
+ struct nes_cm_core *cm_core)
+{
+ enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
+ struct tcphdr *tcph = tcp_hdr(skb);
+ u32 fin_set = 0;
+ int ret = 0;
+
+ skb_pull(skb, ip_hdr(skb)->ihl << 2);
+
+ nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
+ "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
+ tcph->ack, tcph->rst, tcph->fin);
+
+ if (tcph->rst) {
+ pkt_type = NES_PKT_TYPE_RST;
+ } else if (tcph->syn) {
+ pkt_type = NES_PKT_TYPE_SYN;
+ if (tcph->ack)
+ pkt_type = NES_PKT_TYPE_SYNACK;
+ } else if (tcph->ack) {
+ pkt_type = NES_PKT_TYPE_ACK;
+ }
+ if (tcph->fin)
+ fin_set = 1;
+
+ switch (pkt_type) {
+ case NES_PKT_TYPE_SYN:
+ handle_syn_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_SYNACK:
+ handle_synack_pkt(cm_node, skb, tcph);
+ break;
+ case NES_PKT_TYPE_ACK:
+ ret = handle_ack_pkt(cm_node, skb, tcph);
+ if (fin_set && !ret)
+ handle_fin_pkt(cm_node);
+ break;
+ case NES_PKT_TYPE_RST:
+ handle_rst_pkt(cm_node, skb, tcph);
+ break;
+ default:
+ if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
+ handle_fin_pkt(cm_node);
+ drop_packet(skb);
+ break;
+ }
+}
/**
* mini_cm_listen - create a listen node with params
*/
static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+ struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
{
struct nes_cm_listener *listener;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
unsigned long flags;
+ int iwpm_err = 0;
nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
- cm_info->loc_addr, cm_info->loc_port);
+ cm_info->loc_addr, cm_info->loc_port);
/* cannot have multiple matching listeners */
- listener = find_listener(cm_core, htonl(cm_info->loc_addr),
- htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+ listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
+ NES_CM_LISTENER_EITHER_STATE, 1);
+
if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
/* find automatically incs ref count ??? */
atomic_dec(&listener->ref_count);
@@ -1596,6 +2385,22 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
}
if (!listener) {
+ nes_form_reg_msg(nesvnic, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+ if (iwpm_err) {
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ nes_form_pm_msg(cm_info, &pm_msg);
+ iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES);
+ if (iwpm_err)
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper query fail (err = %d).\n", iwpm_err);
+ else
+ nes_record_pm_msg(cm_info, &pm_msg);
+ }
+
/* create a CM listen node (1/2 node to compare incoming traffic to) */
listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
if (!listener) {
@@ -1603,8 +2408,10 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
return NULL;
}
- listener->loc_addr = htonl(cm_info->loc_addr);
- listener->loc_port = htons(cm_info->loc_port);
+ listener->loc_addr = cm_info->loc_addr;
+ listener->loc_port = cm_info->loc_port;
+ listener->mapped_loc_addr = cm_info->mapped_loc_addr;
+ listener->mapped_loc_port = cm_info->mapped_loc_port;
listener->reused_node = 0;
atomic_set(&listener->ref_count, 1);
@@ -1633,9 +2440,9 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
}
nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
- " listener = %p, backlog = %d, cm_id = %p.\n",
- cm_info->loc_addr, cm_info->loc_port,
- listener, listener->backlog, listener->cm_id);
+ " listener = %p, backlog = %d, cm_id = %p.\n",
+ cm_info->loc_addr, cm_info->loc_port,
+ listener, listener->backlog, listener->cm_id);
return listener;
}
@@ -1645,23 +2452,15 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
* mini_cm_connect - make a connection node with params
*/
static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic,
- struct ietf_mpa_frame *mpa_frame,
- struct nes_cm_info *cm_info)
+ struct nes_vnic *nesvnic, u16 private_data_len,
+ void *private_data, struct nes_cm_info *cm_info)
{
int ret = 0;
struct nes_cm_node *cm_node;
struct nes_cm_listener *loopbackremotelistener;
struct nes_cm_node *loopbackremotenode;
struct nes_cm_info loopback_cm_info;
-
- u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) +
- ntohs(mpa_frame->priv_data_len);
-
- cm_info->loc_addr = htonl(cm_info->loc_addr);
- cm_info->rem_addr = htonl(cm_info->rem_addr);
- cm_info->loc_port = htons(cm_info->loc_port);
- cm_info->rem_port = htons(cm_info->rem_port);
+ u8 *start_buff;
/* create a CM connection node */
cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
@@ -1673,55 +2472,85 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
if (cm_info->loc_addr == cm_info->rem_addr) {
- loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr,
- cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE);
+ loopbackremotelistener = find_listener(cm_core,
+ cm_node->mapped_loc_addr, cm_node->mapped_rem_port,
+ NES_CM_LISTENER_ACTIVE_STATE, 0);
if (loopbackremotelistener == NULL) {
create_event(cm_node, NES_CM_EVENT_ABORTED);
} else {
- atomic_inc(&cm_loopbacks);
loopback_cm_info = *cm_info;
loopback_cm_info.loc_port = cm_info->rem_port;
loopback_cm_info.rem_port = cm_info->loc_port;
+ loopback_cm_info.mapped_loc_port =
+ cm_info->mapped_rem_port;
+ loopback_cm_info.mapped_rem_port =
+ cm_info->mapped_loc_port;
loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
- loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info,
- loopbackremotelistener);
+ loopbackremotenode = make_cm_node(cm_core, nesvnic,
+ &loopback_cm_info, loopbackremotelistener);
+ if (!loopbackremotenode) {
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ return NULL;
+ }
+ atomic_inc(&cm_loopbacks);
loopbackremotenode->loopbackpartner = cm_node;
- loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
+ loopbackremotenode->tcp_cntxt.rcv_wscale =
+ NES_CM_DEFAULT_RCV_WND_SCALE;
cm_node->loopbackpartner = loopbackremotenode;
- memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data,
- mpa_frame_size);
- loopbackremotenode->mpa_frame_size = mpa_frame_size -
- sizeof(struct ietf_mpa_frame);
+ memcpy(loopbackremotenode->mpa_frame_buf, private_data,
+ private_data_len);
+ loopbackremotenode->mpa_frame_size = private_data_len;
- /* we are done handling this state, set node to a TSA state */
+ /* we are done handling this state. */
+ /* set node to a TSA state */
cm_node->state = NES_CM_STATE_TSA;
- cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num;
- loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num;
- cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale;
- loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
-
+ cm_node->tcp_cntxt.rcv_nxt =
+ loopbackremotenode->tcp_cntxt.loc_seq_num;
+ loopbackremotenode->tcp_cntxt.rcv_nxt =
+ cm_node->tcp_cntxt.loc_seq_num;
+ cm_node->tcp_cntxt.max_snd_wnd =
+ loopbackremotenode->tcp_cntxt.rcv_wnd;
+ loopbackremotenode->tcp_cntxt.max_snd_wnd =
+ cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wnd =
+ loopbackremotenode->tcp_cntxt.rcv_wnd;
+ loopbackremotenode->tcp_cntxt.snd_wnd =
+ cm_node->tcp_cntxt.rcv_wnd;
+ cm_node->tcp_cntxt.snd_wscale =
+ loopbackremotenode->tcp_cntxt.rcv_wscale;
+ loopbackremotenode->tcp_cntxt.snd_wscale =
+ cm_node->tcp_cntxt.rcv_wscale;
+ loopbackremotenode->state = NES_CM_STATE_MPAREQ_RCVD;
create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ);
}
return cm_node;
}
- /* set our node side to client (active) side */
- cm_node->tcp_cntxt.client = 1;
- /* init our MPA frame ptr */
- memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size);
- cm_node->mpa_frame_size = mpa_frame_size;
+ start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
+ cm_node->mpa_frame_size = private_data_len;
+
+ memcpy(start_buff, private_data, private_data_len);
/* send a syn and goto syn sent state */
cm_node->state = NES_CM_STATE_SYN_SENT;
- ret = send_syn(cm_node, 0);
+ ret = send_syn(cm_node, 0, NULL);
+
+ if (ret) {
+ /* error in sending the syn free up the cm_node struct */
+ nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
+ "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_addr, cm_node->rem_port, cm_node,
+ cm_node->cm_id);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ cm_node = NULL;
+ }
- nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x,"
- " cm_node=%p, cm_id = %p.\n",
- cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id);
+ if (cm_node) {
+ nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
+ "port=0x%04x, cm_node=%p, cm_id = %p.\n",
+ cm_node->rem_addr, cm_node->rem_port, cm_node,
+ cm_node->cm_id);
+ }
return cm_node;
}
@@ -1731,8 +2560,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
* mini_cm_accept - accept a connection
* This function is never called
*/
-static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame,
- struct nes_cm_node *cm_node)
+static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
{
return 0;
}
@@ -1741,31 +2569,62 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mp
/**
* mini_cm_reject - reject and teardown a connection
*/
-static int mini_cm_reject(struct nes_cm_core *cm_core,
- struct ietf_mpa_frame *mpa_frame,
- struct nes_cm_node *cm_node)
+static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
{
int ret = 0;
- struct sk_buff *skb;
- u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) +
- ntohs(mpa_frame->priv_data_len);
+ int err = 0;
+ int passive_state;
+ struct nes_cm_event event;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct nes_cm_node *loopback = cm_node->loopbackpartner;
- skb = get_free_pkt(cm_node);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
+ nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
+ __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
- /* send an MPA Request frame */
- form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
+ if (cm_node->tcp_cntxt.client)
+ return ret;
+ cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- ret = send_fin(cm_node, NULL);
+ if (!loopback) {
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == NES_SEND_RESET_EVENT) {
+ cm_node->state = NES_CM_STATE_CLOSED;
+ rem_ref_cm_node(cm_core, cm_node);
+ } else {
+ if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+ rem_ref_cm_node(cm_core, cm_node);
+ } else {
+ ret = send_mpa_reject(cm_node);
+ if (ret) {
+ cm_node->state = NES_CM_STATE_CLOSED;
+ err = send_reset(cm_node, NULL);
+ if (err)
+ WARN_ON(1);
+ } else {
+ cm_id->add_ref(cm_id);
+ }
+ }
+ }
+ } else {
+ cm_node->cm_id = NULL;
+ if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+ rem_ref_cm_node(cm_core, cm_node);
+ rem_ref_cm_node(cm_core, loopback);
+ } else {
+ event.cm_node = loopback;
+ event.cm_info.rem_addr = loopback->rem_addr;
+ event.cm_info.loc_addr = loopback->loc_addr;
+ event.cm_info.rem_port = loopback->rem_port;
+ event.cm_info.loc_port = loopback->loc_port;
+ event.cm_info.cm_id = loopback->cm_id;
+ cm_event_mpa_reject(&event);
+ rem_ref_cm_node(cm_core, cm_node);
+ loopback->state = NES_CM_STATE_CLOSING;
- if (ret < 0) {
- printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n");
- return ret;
+ cm_id = loopback->cm_id;
+ rem_ref_cm_node(cm_core, loopback);
+ cm_id->rem_ref(cm_id);
+ }
}
return ret;
@@ -1783,37 +2642,45 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
return -EINVAL;
switch (cm_node->state) {
- /* if passed in node is null, create a reference key node for node search */
- /* check if we found an owner node for this pkt */
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_MPAREQ_SENT:
- cm_node->state = NES_CM_STATE_FIN_WAIT1;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_CLOSE_WAIT:
- cm_node->state = NES_CM_STATE_LAST_ACK;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_TIME_WAIT:
- case NES_CM_STATE_CLOSING:
- ret = -1;
- break;
- case NES_CM_STATE_LISTENING:
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- case NES_CM_STATE_CLOSED:
- case NES_CM_STATE_TSA:
- ret = rem_ref_cm_node(cm_core, cm_node);
- break;
+ case NES_CM_STATE_SYN_RCVD:
+ case NES_CM_STATE_SYN_SENT:
+ case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
+ case NES_CM_STATE_ESTABLISHED:
+ case NES_CM_STATE_ACCEPTING:
+ case NES_CM_STATE_MPAREQ_SENT:
+ case NES_CM_STATE_MPAREQ_RCVD:
+ cleanup_retrans_entry(cm_node);
+ send_reset(cm_node, NULL);
+ break;
+ case NES_CM_STATE_CLOSE_WAIT:
+ cm_node->state = NES_CM_STATE_LAST_ACK;
+ send_fin(cm_node, NULL);
+ break;
+ case NES_CM_STATE_FIN_WAIT1:
+ case NES_CM_STATE_FIN_WAIT2:
+ case NES_CM_STATE_LAST_ACK:
+ case NES_CM_STATE_TIME_WAIT:
+ case NES_CM_STATE_CLOSING:
+ ret = -1;
+ break;
+ case NES_CM_STATE_LISTENING:
+ cleanup_retrans_entry(cm_node);
+ send_reset(cm_node, NULL);
+ break;
+ case NES_CM_STATE_MPAREJ_RCVD:
+ case NES_CM_STATE_UNKNOWN:
+ case NES_CM_STATE_INITED:
+ case NES_CM_STATE_CLOSED:
+ case NES_CM_STATE_LISTENER_DESTROYED:
+ ret = rem_ref_cm_node(cm_core, cm_node);
+ break;
+ case NES_CM_STATE_TSA:
+ if (cm_node->send_entry)
+ printk(KERN_ERR "ERROR Close got called from STATE_TSA "
+ "send_entry=%p\n", cm_node->send_entry);
+ ret = rem_ref_cm_node(cm_core, cm_node);
+ break;
}
- cm_node->cm_id = NULL;
return ret;
}
@@ -1822,92 +2689,102 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
* recv_pkt - recv an ETHERNET packet, and process it through CM
* node state machine
*/
-static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic,
- struct sk_buff *skb)
+static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
+ struct nes_vnic *nesvnic, struct sk_buff *skb)
{
struct nes_cm_node *cm_node = NULL;
struct nes_cm_listener *listener = NULL;
struct iphdr *iph;
struct tcphdr *tcph;
struct nes_cm_info nfo;
- int ret = 0;
+ int skb_handled = 1;
+ __be32 tmp_daddr, tmp_saddr;
- if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
- ret = -EINVAL;
- goto out;
- }
+ if (!skb)
+ return 0;
+ if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr))
+ return 0;
iph = (struct iphdr *)skb->data;
tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(*tcph));
- skb->len = ntohs(iph->tot_len);
nfo.loc_addr = ntohl(iph->daddr);
nfo.loc_port = ntohs(tcph->dest);
nfo.rem_addr = ntohl(iph->saddr);
nfo.rem_port = ntohs(tcph->source);
- nes_debug(NES_DBG_CM, "Received packet: dest=" NIPQUAD_FMT
- ":0x%04X src=" NIPQUAD_FMT ":0x%04X\n",
- NIPQUAD(iph->daddr), tcph->dest,
- NIPQUAD(iph->saddr), tcph->source);
+ /* If port mapper is available these should be mapped address info */
+ nfo.mapped_loc_addr = ntohl(iph->daddr);
+ nfo.mapped_loc_port = ntohs(tcph->dest);
+ nfo.mapped_rem_addr = ntohl(iph->saddr);
+ nfo.mapped_rem_port = ntohs(tcph->source);
- /* note: this call is going to increment cm_node ref count */
- cm_node = find_node(cm_core,
- nfo.rem_port, nfo.rem_addr,
- nfo.loc_port, nfo.loc_addr);
+ tmp_daddr = cpu_to_be32(iph->daddr);
+ tmp_saddr = cpu_to_be32(iph->saddr);
- if (!cm_node) {
- listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port,
- NES_CM_LISTENER_ACTIVE_STATE);
- if (listener) {
- nfo.cm_id = listener->cm_id;
- nfo.conn_type = listener->conn_type;
- } else {
- nfo.cm_id = NULL;
- nfo.conn_type = 0;
- }
+ nes_debug(NES_DBG_CM, "Received packet: dest=%pI4:0x%04X src=%pI4:0x%04X\n",
+ &tmp_daddr, tcph->dest, &tmp_saddr, tcph->source);
+
+ do {
+ cm_node = find_node(cm_core,
+ nfo.mapped_rem_port, nfo.mapped_rem_addr,
+ nfo.mapped_loc_port, nfo.mapped_loc_addr);
- cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener);
if (!cm_node) {
- nes_debug(NES_DBG_CM, "Unable to allocate node\n");
- if (listener) {
- nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n");
+ /* Only type of packet accepted are for */
+ /* the PASSIVE open (syn only) */
+ if ((!tcph->syn) || (tcph->ack)) {
+ skb_handled = 0;
+ break;
+ }
+ listener = find_listener(cm_core, nfo.mapped_loc_addr,
+ nfo.mapped_loc_port,
+ NES_CM_LISTENER_ACTIVE_STATE, 0);
+ if (!listener) {
+ nfo.cm_id = NULL;
+ nfo.conn_type = 0;
+ nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n");
+ skb_handled = 0;
+ break;
+ }
+ nfo.cm_id = listener->cm_id;
+ nfo.conn_type = listener->conn_type;
+ cm_node = make_cm_node(cm_core, nesvnic, &nfo,
+ listener);
+ if (!cm_node) {
+ nes_debug(NES_DBG_CM, "Unable to allocate "
+ "node\n");
+ cm_packets_dropped++;
atomic_dec(&listener->ref_count);
+ dev_kfree_skb_any(skb);
+ break;
}
- ret = -1;
- goto out;
- }
- if (!listener) {
- nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n",
- nfo.loc_port, atomic_read(&cm_node->ref_count));
- if (!tcph->rst) {
- nes_debug(NES_DBG_CM, "Packet found for unknown port=%d"
- " rem_port=%d refcnt=%d\n",
- nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count));
-
- cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq);
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- send_reset(cm_node);
+ if (!tcph->rst && !tcph->fin) {
+ cm_node->state = NES_CM_STATE_LISTENING;
+ } else {
+ cm_packets_dropped++;
+ rem_ref_cm_node(cm_core, cm_node);
+ dev_kfree_skb_any(skb);
+ break;
}
- rem_ref_cm_node(cm_core, cm_node);
- ret = -1;
- goto out;
+ add_ref_cm_node(cm_node);
+ } else if (cm_node->state == NES_CM_STATE_TSA) {
+ if (cm_node->nesqp->pau_mode)
+ nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp);
+ else {
+ rem_ref_cm_node(cm_core, cm_node);
+ atomic_inc(&cm_accel_dropped_pkts);
+ dev_kfree_skb_any(skb);
+ }
+ break;
}
- add_ref_cm_node(cm_node);
- cm_node->state = NES_CM_STATE_LISTENING;
- }
-
- nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n",
- cm_node, skb->data);
- process_packet(cm_node, skb, cm_core);
-
- rem_ref_cm_node(cm_core, cm_node);
- out:
- if (skb)
- dev_kfree_skb_any(skb);
- return ret;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, sizeof(*tcph));
+ skb->len = ntohs(iph->tot_len);
+ process_packet(cm_node, skb, cm_core);
+ rem_ref_cm_node(cm_core, cm_node);
+ } while (0);
+ return skb_handled;
}
@@ -1916,10 +2793,7 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni
*/
static struct nes_cm_core *nes_cm_alloc_core(void)
{
- int i;
-
struct nes_cm_core *cm_core;
- struct sk_buff *skb = NULL;
/* setup the CM core */
/* alloc top level core control structure */
@@ -1931,25 +2805,12 @@ static struct nes_cm_core *nes_cm_alloc_core(void)
init_timer(&cm_core->tcp_timer);
cm_core->tcp_timer.function = nes_cm_timer_tick;
- cm_core->mtu = NES_CM_DEFAULT_MTU;
+ cm_core->mtu = NES_CM_DEFAULT_MTU;
cm_core->state = NES_CM_STATE_INITED;
cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
atomic_set(&cm_core->events_posted, 0);
- /* init the packet lists */
- skb_queue_head_init(&cm_core->tx_free_list);
-
- for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) {
- skb = dev_alloc_skb(cm_core->mtu);
- if (!skb) {
- kfree(cm_core);
- return NULL;
- }
- /* add 'raw' skb to free frame list */
- skb_queue_head(&cm_core->tx_free_list, skb);
- }
-
cm_core->api = &nes_cm_api;
spin_lock_init(&cm_core->ht_lock);
@@ -1982,9 +2843,8 @@ static int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
barrier();
- if (timer_pending(&cm_core->tcp_timer)) {
+ if (timer_pending(&cm_core->tcp_timer))
del_timer(&cm_core->tcp_timer);
- }
destroy_workqueue(cm_core->event_wq);
destroy_workqueue(cm_core->disconn_wq);
@@ -2012,15 +2872,15 @@ static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value)
int ret = 0;
switch (type) {
- case NES_CM_SET_PKT_SIZE:
- cm_core->mtu = value;
- break;
- case NES_CM_SET_FREE_PKT_Q_SIZE:
- cm_core->free_tx_pkt_max = value;
- break;
- default:
- /* unknown set option */
- ret = -EINVAL;
+ case NES_CM_SET_PKT_SIZE:
+ cm_core->mtu = value;
+ break;
+ case NES_CM_SET_FREE_PKT_Q_SIZE:
+ cm_core->free_tx_pkt_max = value;
+ break;
+ default:
+ /* unknown set option */
+ ret = -EINVAL;
}
return ret;
@@ -2039,8 +2899,8 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
return -EINVAL;
nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
- NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
- NES_QPCONTEXT_MISC_DROS);
+ NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
+ NES_QPCONTEXT_MISC_DROS);
if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
@@ -2050,15 +2910,15 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
- (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
+ (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
- (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
- NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
+ (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
+ NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
- (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
- NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
+ (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
+ NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
nesqp->nesqp_context->ts_recent = 0;
@@ -2067,24 +2927,24 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
- cm_node->tcp_cntxt.rcv_wscale);
+ cm_node->tcp_cntxt.rcv_wscale);
nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
nesqp->nesqp_context->srtt = 0;
nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
- nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss);
+ nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
- " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
- nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
- le32_to_cpu(nesqp->nesqp_context->snd_nxt),
- cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
- le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
- le32_to_cpu(nesqp->nesqp_context->misc));
+ " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
+ nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+ le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+ cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
+ le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
+ le32_to_cpu(nesqp->nesqp_context->misc));
nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
@@ -2101,22 +2961,16 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
*/
int nes_cm_disconn(struct nes_qp *nesqp)
{
- unsigned long flags;
+ struct disconn_work *work;
- spin_lock_irqsave(&nesqp->lock, flags);
- if (nesqp->disconn_pending == 0) {
- nesqp->disconn_pending++;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- /* nes_add_ref(&nesqp->ibqp); */
- /* init our disconnect work element, to */
- INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
-
- queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_rem_ref(&nesqp->ibqp);
- }
+ work = kzalloc(sizeof *work, GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM; /* Timer will clean up */
+ nes_add_ref(&nesqp->ibqp);
+ work->nesqp = nesqp;
+ INIT_WORK(&work->work, nes_disconnect_worker);
+ queue_work(g_cm_core->disconn_wq, &work->work);
return 0;
}
@@ -2126,11 +2980,14 @@ int nes_cm_disconn(struct nes_qp *nesqp)
*/
static void nes_disconnect_worker(struct work_struct *work)
{
- struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work);
+ struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+ struct nes_qp *nesqp = dwork->nesqp;
+ kfree(dwork);
nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
- nesqp->last_aeq, nesqp->hwqp.qp_id);
+ nesqp->last_aeq, nesqp->hwqp.qp_id);
nes_cm_disconn_true(nesqp);
+ nes_rem_ref(&nesqp->ibqp);
}
@@ -2147,7 +3004,12 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
u16 last_ae;
u8 original_hw_tcp_state;
u8 original_ibqp_state;
- u8 issued_disconnect_reset = 0;
+ int disconn_status = 0;
+ int issue_disconn = 0;
+ int issue_close = 0;
+ int issue_flush = 0;
+ u32 flush_q = NES_CQP_FLUSH_RQ;
+ struct ib_event ibevent;
if (!nesqp) {
nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
@@ -2159,9 +3021,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
/* make sure we havent already closed this connection */
if (!cm_id) {
nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
- nesqp->hwqp.qp_id);
+ nesqp->hwqp.qp_id);
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_rem_ref(&nesqp->ibqp);
return -1;
}
@@ -2169,67 +3030,86 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
original_hw_tcp_state = nesqp->hw_tcp_state;
- original_ibqp_state = nesqp->ibqp_state;
+ original_ibqp_state = nesqp->ibqp_state;
last_ae = nesqp->last_aeq;
+ if (nesqp->term_flags) {
+ issue_disconn = 1;
+ issue_close = 1;
+ nesqp->cm_id = NULL;
+ del_timer(&nesqp->terminate_timer);
+ if (nesqp->flush_issued == 0) {
+ nesqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+ ((original_ibqp_state == IB_QPS_RTS) &&
+ (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ issue_disconn = 1;
+ if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
+ disconn_status = -ECONNRESET;
+ }
- nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state);
+ if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+ (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
+ (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
+ (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ issue_close = 1;
+ nesqp->cm_id = NULL;
+ if (nesqp->flush_issued == 0) {
+ nesqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ if ((issue_flush) && (nesqp->destroyed == 0)) {
+ /* Flush the queue(s) */
+ if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE)
+ flush_q |= NES_CQP_FLUSH_SQ;
+ flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1);
+
+ if (nesqp->term_flags) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.event = nesqp->terminate_eventtype;
+ ibevent.element.qp = &nesqp->ibqp;
+ if (nesqp->ibqp.event_handler)
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ }
- if ((nesqp->cm_id) && (cm_id->event_handler)) {
- if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- ((original_ibqp_state == IB_QPS_RTS) &&
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ if ((cm_id) && (cm_id->event_handler)) {
+ if (issue_disconn) {
atomic_inc(&cm_disconnects);
cm_event.event = IW_CM_EVENT_DISCONNECT;
- if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
- issued_disconnect_reset = 1;
- cm_event.status = IW_CM_EVENT_STATUS_RESET;
- nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for "
- " QP%u, cm_id = %p. \n",
- nesqp->hwqp.qp_id, cm_id);
- } else {
- cm_event.status = IW_CM_EVENT_STATUS_OK;
- }
-
+ cm_event.status = disconn_status;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
- nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for "
- " QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n",
- nesqp->hwqp.qp_id,
- nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id,
- atomic_read(&nesqp->refcount));
+ nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
+ " for QP%u, SQ Head = %u, SQ Tail = %u. "
+ "cm_id = %p, refcount = %u.\n",
+ nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
+ nesqp->hwqp.sq_tail, cm_id,
+ atomic_read(&nesqp->refcount));
- spin_unlock_irqrestore(&nesqp->lock, flags);
ret = cm_id->event_handler(cm_id, &cm_event);
if (ret)
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
- spin_lock_irqsave(&nesqp->lock, flags);
+ nes_debug(NES_DBG_CM, "OFA CM event_handler "
+ "returned, ret=%d\n", ret);
}
- nesqp->disconn_pending = 0;
- /* There might have been another AE while the lock was released */
- original_hw_tcp_state = nesqp->hw_tcp_state;
- original_ibqp_state = nesqp->ibqp_state;
- last_ae = nesqp->last_aeq;
-
- if ((issued_disconnect_reset == 0) && (nesqp->cm_id) &&
- ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
- (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
- (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ if (issue_close) {
atomic_inc(&cm_closes);
- nesqp->cm_id = NULL;
- nesqp->in_disconnect = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
nes_disconnect(nesqp, 1);
cm_id->provider_data = nesqp;
/* Send up the close complete event */
cm_event.event = IW_CM_EVENT_CLOSE;
- cm_event.status = IW_CM_EVENT_STATUS_OK;
+ cm_event.status = 0;
cm_event.provider_data = cm_id->provider_data;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
@@ -2237,41 +3117,12 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
cm_event.private_data_len = 0;
ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret) {
+ if (ret)
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
- }
cm_id->rem_ref(cm_id);
-
- spin_lock_irqsave(&nesqp->lock, flags);
- if (nesqp->flush_issued == 0) {
- nesqp->flush_issued = 1;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, flags);
- }
-
- /* This reference is from either ModifyQP or the AE processing,
- there is still a race here with modifyqp */
- nes_rem_ref(&nesqp->ibqp);
-
- } else {
- cm_id = nesqp->cm_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- /* check to see if the inbound reset beat the outbound reset */
- if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
- nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset"
- " beating the outbound reset.\n",
- nesqp->hwqp.qp_id);
- nes_rem_ref(&nesqp->ibqp);
- }
}
- } else {
- nesqp->disconn_pending = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
}
- nes_rem_ref(&nesqp->ibqp);
return 0;
}
@@ -2285,15 +3136,17 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
int ret = 0;
struct nes_vnic *nesvnic;
struct nes_device *nesdev;
+ struct nes_ib_device *nesibdev;
nesvnic = to_nesvnic(nesqp->ibqp.device);
if (!nesvnic)
return -EINVAL;
nesdev = nesvnic->nesdev;
+ nesibdev = nesvnic->nesibdev;
nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
- atomic_read(&nesvnic->netdev->refcnt));
+ netdev_refcnt_read(nesvnic->netdev));
if (nesqp->active_conn) {
@@ -2302,9 +3155,11 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
} else {
/* Need to free the Last Streaming Mode Message */
if (nesqp->ietf_frame) {
+ if (nesqp->lsmm_mr)
+ nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
pci_free_consistent(nesdev->pcidev,
- nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
- nesqp->ietf_frame, nesqp->ietf_frame_pbase);
+ nesqp->private_data_len + nesqp->ietf_frame_size,
+ nesqp->ietf_frame, nesqp->ietf_frame_pbase);
}
}
@@ -2313,7 +3168,6 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
nes_debug(NES_DBG_CM, "Call close API\n");
g_cm_core->api->close(g_cm_core, nesqp->cm_node);
- nesqp->cm_node = NULL;
}
return ret;
@@ -2338,6 +3192,20 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct nes_v4_quad nes_quad;
u32 crc_value;
int ret;
+ int passive_state;
+ struct nes_ib_device *nesibdev;
+ struct ib_mr *ibmr = NULL;
+ struct ib_phys_buf ibphysbuf;
+ struct nes_pd *nespd;
+ u64 tagged_offset;
+ u8 mpa_frame_offset = 0;
+ struct ietf_mpa_v2 *mpa_v2_frame;
+ u8 start_addr = 0;
+ u8 *start_ptr = &start_addr;
+ u8 **start_buff = &start_ptr;
+ u16 buff_len = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
@@ -2349,72 +3217,117 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nesdev = nesvnic->nesdev;
adapter = nesdev->nesadapter;
- nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
- nesvnic, nesvnic->netdev, nesvnic->netdev->name);
-
- /* since this is from a listen, we were able to put node handle into cm_id */
cm_node = (struct nes_cm_node *)cm_id->provider_data;
+ nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p,"
+ "%s\n", cm_node, nesvnic, nesvnic->netdev,
+ nesvnic->netdev->name);
+
+ if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
+ if (cm_node->loopbackpartner)
+ rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ return -EINVAL;
+ }
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == NES_SEND_RESET_EVENT) {
+ rem_ref_cm_node(cm_node->cm_core, cm_node);
+ return -ECONNRESET;
+ }
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
+ cm_node->nesqp = nesqp;
- nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n",
- nesqp->hwqp.qp_id, cm_node, jiffies);
+
+ nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
+ nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
atomic_inc(&cm_accepts);
nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
- atomic_read(&nesvnic->netdev->refcnt));
+ netdev_refcnt_read(nesvnic->netdev));
+
+ nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2);
+ /* allocate the ietf frame and space for private data */
+ nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
+ nesqp->ietf_frame_size + conn_param->private_data_len,
+ &nesqp->ietf_frame_pbase);
+
+ if (!nesqp->ietf_frame) {
+ nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
+ return -ENOMEM;
+ }
+ mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame;
- /* allocate the ietf frame and space for private data */
- nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
- sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
- &nesqp->ietf_frame_pbase);
+ if (cm_node->mpa_frame_rev == IETF_MPA_V1)
+ mpa_frame_offset = 4;
+
+ if (cm_node->mpa_frame_rev == IETF_MPA_V1 ||
+ cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+ }
+
+ memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
+ conn_param->private_data_len);
+
+ cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY);
+ nesqp->private_data_len = conn_param->private_data_len;
- if (!nesqp->ietf_frame) {
- nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
+ /* setup our first outgoing iWarp send WQE (the IETF frame response) */
+ wqe = &nesqp->hwqp.sq_vbase[0];
+
+ if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) {
+ u64temp = (unsigned long)nesqp;
+ nesibdev = nesvnic->nesibdev;
+ nespd = nesqp->nespd;
+ ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
+ ibphysbuf.size = buff_len;
+ tagged_offset = (u64)(unsigned long)*start_buff;
+ ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
+ &ibphysbuf, 1,
+ IB_ACCESS_LOCAL_WRITE,
+ &tagged_offset);
+ if (!ibmr) {
+ nes_debug(NES_DBG_CM, "Unable to register memory region"
+ "for lSMM for cm_node = %p \n",
+ cm_node);
+ pci_free_consistent(nesdev->pcidev,
+ nesqp->private_data_len + nesqp->ietf_frame_size,
+ nesqp->ietf_frame, nesqp->ietf_frame_pbase);
return -ENOMEM;
}
-
- /* setup the MPA frame */
- nesqp->private_data_len = conn_param->private_data_len;
- memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
-
- memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
- conn_param->private_data_len);
-
- nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len);
- nesqp->ietf_frame->rev = mpa_version;
- nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
-
- /* setup our first outgoing iWarp send WQE (the IETF frame response) */
- wqe = &nesqp->hwqp.sq_vbase[0];
-
- if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) {
- u64temp = (unsigned long)nesqp;
- u64temp |= NES_SW_CONTEXT_ALIGN>>1;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
- u64temp);
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
- cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame));
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)nesqp->ietf_frame_pbase);
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32));
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
- cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame));
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
-
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
- NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU);
- } else {
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM));
+ ibmr->pd = &nespd->ibpd;
+ ibmr->device = nespd->ibpd.device;
+ nesqp->lsmm_mr = ibmr;
+
+ u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
+ u64temp);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+ cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
+ NES_IWARP_SQ_WQE_WRPDU);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
+ cpu_to_le32(buff_len);
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+ (u64)(unsigned long)(*start_buff));
+ wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
+ cpu_to_le32(buff_len);
+ wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
+ if (nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
+ kunmap(nesqp->page);
}
- nesqp->skip_lsmm = 1;
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+ NES_QPCONTEXT_ORDIRD_WRPDU);
+ } else {
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU);
+ }
+ nesqp->skip_lsmm = 1;
/* Cache the cm_id in the qp */
nesqp->cm_id = cm_id;
@@ -2422,82 +3335,95 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
/* nesqp->cm_node = (void *)cm_id->provider_data; */
cm_id->provider_data = nesqp;
- nesqp->active_conn = 0;
+ nesqp->active_conn = 0;
+
+ if (cm_node->state == NES_CM_STATE_TSA)
+ nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
+ cm_node);
nes_cm_init_tsa_conn(nesqp, cm_node);
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(cm_node->mapped_loc_port);
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(cm_node->mapped_rem_port);
+
+ nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+ (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+ NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
- nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
- nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
- NES_ARP_RESOLVE) << 16);
+ nesqp->nesqp_context->arp_index_vlan |=
+ cpu_to_le32(nes_arp_table(nesdev,
+ le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
+ NES_ARP_RESOLVE) << 16);
nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
- jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
+ jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
- ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
+ ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((u32)cm_node->ord_size);
memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
- nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
- nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+ nes_quad.DstIpAdrIndex =
+ cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+ nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+ nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+ nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
+ nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
nesqp->hte_index &= adapter->hte_index_mask;
nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
- nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X,"
- " rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n",
- nesqp->hwqp.qp_id,
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port),
- ntohl(cm_id->local_addr.sin_addr.s_addr),
- ntohs(cm_id->local_addr.sin_port),
- le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
- le32_to_cpu(nesqp->nesqp_context->snd_nxt),
- conn_param->private_data_len+sizeof(struct ietf_mpa_frame));
-
- attr.qp_state = IB_QPS_RTS;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
+ nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
+ "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
+ "private data length=%u.\n", nesqp->hwqp.qp_id,
+ ntohl(raddr->sin_addr.s_addr), ntohs(raddr->sin_port),
+ ntohl(laddr->sin_addr.s_addr), ntohs(laddr->sin_port),
+ le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+ le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+ buff_len);
- /* notify OF layer that accept event was successfull */
+ /* notify OF layer that accept event was successful */
cm_id->add_ref(cm_id);
+ nes_add_ref(&nesqp->ibqp);
cm_event.event = IW_CM_EVENT_ESTABLISHED;
- cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+ cm_event.status = 0;
cm_event.provider_data = (void *)nesqp;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
+ cm_event.ird = cm_node->ird_size;
+ cm_event.ord = cm_node->ord_size;
+
ret = cm_id->event_handler(cm_id, &cm_event);
+ attr.qp_state = IB_QPS_RTS;
+ nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
if (cm_node->loopbackpartner) {
- cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len;
+ cm_node->loopbackpartner->mpa_frame_size =
+ nesqp->private_data_len;
/* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data,
- nesqp->private_data_len);
+ memcpy(cm_node->loopbackpartner->mpa_frame_buf,
+ conn_param->private_data, conn_param->private_data_len);
create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
}
if (ret)
- printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
return 0;
}
@@ -2509,23 +3435,29 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
struct nes_cm_node *cm_node;
+ struct nes_cm_node *loopback;
struct nes_cm_core *cm_core;
+ u8 *start_buff;
atomic_inc(&cm_rejects);
- cm_node = (struct nes_cm_node *) cm_id->provider_data;
+ cm_node = (struct nes_cm_node *)cm_id->provider_data;
+ loopback = cm_node->loopbackpartner;
cm_core = cm_node->cm_core;
- cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len;
-
- strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP);
- memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len);
-
- cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len);
- cm_node->mpa_frame.rev = mpa_version;
- cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT;
+ cm_node->cm_id = cm_id;
- cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node);
+ if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
+ return -EINVAL;
- return 0;
+ if (loopback) {
+ memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
+ loopback->mpa_frame.priv_data_len = pdata_len;
+ loopback->mpa_frame_size = pdata_len;
+ } else {
+ start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
+ cm_node->mpa_frame_size = pdata_len;
+ memcpy(start_buff, pdata, pdata_len);
+ }
+ return cm_core->api->reject(cm_core, cm_node);
}
@@ -2541,7 +3473,15 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct nes_device *nesdev;
struct nes_cm_node *cm_node;
struct nes_cm_info cm_info;
-
+ int apbvt_set = 0;
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
+ int iwpm_err = 0;
+
+ if (cm_id->remote_addr.ss_family != AF_INET)
+ return -ENOSYS;
ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
return -EINVAL;
@@ -2551,78 +3491,99 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nesvnic = to_nesvnic(nesqp->ibqp.device);
if (!nesvnic)
return -EINVAL;
- nesdev = nesvnic->nesdev;
+ nesdev = nesvnic->nesdev;
if (!nesdev)
return -EINVAL;
- atomic_inc(&cm_connects);
+ if (!laddr->sin_port || !raddr->sin_port)
+ return -EINVAL;
- nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) +
- conn_param->private_data_len, GFP_KERNEL);
- if (!nesqp->ietf_frame)
- return -ENOMEM;
+ nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
+ "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
+ ntohl(nesvnic->local_ipaddr), ntohl(raddr->sin_addr.s_addr),
+ ntohs(raddr->sin_port), ntohl(laddr->sin_addr.s_addr),
+ ntohs(laddr->sin_port));
- /* set qp as having an active connection */
+ atomic_inc(&cm_connects);
nesqp->active_conn = 1;
- nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n",
- nesqp->hwqp.qp_id,
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port),
- ntohl(cm_id->local_addr.sin_addr.s_addr),
- ntohs(cm_id->local_addr.sin_port));
-
/* cache the cm_id in the qp */
nesqp->cm_id = cm_id;
-
cm_id->provider_data = nesqp;
-
- /* copy the private data */
- if (conn_param->private_data_len) {
- memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
- conn_param->private_data_len);
- }
-
nesqp->private_data_len = conn_param->private_data_len;
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
- nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
- nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len);
-
- strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ);
- nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
- nesqp->ietf_frame->rev = IETF_MPA_VERSION;
- nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len);
- if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr)
- nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
- PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+ nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
+ nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
+ conn_param->private_data_len);
/* set up the connection params for the node */
- cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr);
- cm_info.loc_port = (cm_id->local_addr.sin_port);
- cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr);
- cm_info.rem_port = (cm_id->remote_addr.sin_port);
+ cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+ cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr);
+ cm_info.rem_port = ntohs(raddr->sin_port);
cm_info.cm_id = cm_id;
cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+ /* No port mapper available, go with the specified peer information */
+ cm_info.mapped_loc_addr = cm_info.loc_addr;
+ cm_info.mapped_loc_port = cm_info.loc_port;
+ cm_info.mapped_rem_addr = cm_info.rem_addr;
+ cm_info.mapped_rem_port = cm_info.rem_port;
+
+ nes_form_reg_msg(nesvnic, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+ if (iwpm_err) {
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ nes_form_pm_msg(&cm_info, &pm_msg);
+ iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES);
+ if (iwpm_err)
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper query fail (err = %d).\n", iwpm_err);
+ else
+ nes_record_pm_msg(&cm_info, &pm_msg);
+ }
+
+ if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
+ nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
+ PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+ apbvt_set = 1;
+ }
+
+ if (nes_create_mapinfo(&cm_info))
+ return -ENOMEM;
+
cm_id->add_ref(cm_id);
- nes_add_ref(&nesqp->ibqp);
/* create a connect CM node connection */
- cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info);
+ cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
+ conn_param->private_data_len, (void *)conn_param->private_data,
+ &cm_info);
if (!cm_node) {
- if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr)
- nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
- PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
- nes_rem_ref(&nesqp->ibqp);
- kfree(nesqp->ietf_frame);
- nesqp->ietf_frame = NULL;
+ if (apbvt_set)
+ nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
+ PCI_FUNC(nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
+
+ nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n",
+ cm_info.mapped_loc_port);
+ nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port,
+ cm_info.mapped_loc_addr, cm_info.mapped_loc_port);
cm_id->rem_ref(cm_id);
return -ENOMEM;
}
- cm_node->apbvt_set = 1;
+ record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+ if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+ cm_node->ord_size == 0)
+ cm_node->ord_size = 1;
+
+ cm_node->apbvt_set = apbvt_set;
nesqp->cm_node = cm_node;
+ cm_node->nesqp = nesqp;
+ nes_add_ref(&nesqp->ibqp);
return 0;
}
@@ -2636,50 +3597,59 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
struct nes_vnic *nesvnic;
struct nes_cm_listener *cm_node;
struct nes_cm_info cm_info;
- struct nes_adapter *adapter;
int err;
-
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
- cm_id, ntohs(cm_id->local_addr.sin_port));
+ cm_id, ntohs(laddr->sin_port));
+ if (cm_id->local_addr.ss_family != AF_INET)
+ return -ENOSYS;
nesvnic = to_nesvnic(cm_id->device);
if (!nesvnic)
return -EINVAL;
- adapter = nesvnic->nesdev->nesadapter;
+
nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
nesvnic, nesvnic->netdev, nesvnic->netdev->name);
nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n",
- nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr);
+ nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
/* setup listen params in our api call struct */
- cm_info.loc_addr = nesvnic->local_ipaddr;
- cm_info.loc_port = cm_id->local_addr.sin_port;
+ cm_info.loc_addr = ntohl(nesvnic->local_ipaddr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
cm_info.backlog = backlog;
cm_info.cm_id = cm_id;
cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+ /* No port mapper available, go with the specified info */
+ cm_info.mapped_loc_addr = cm_info.loc_addr;
+ cm_info.mapped_loc_port = cm_info.loc_port;
cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
if (!cm_node) {
- printk("%s[%u] Error returned from listen API call\n",
- __func__, __LINE__);
+ printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
+ __func__, __LINE__);
return -ENOMEM;
}
cm_id->provider_data = cm_node;
if (!cm_node->reused_node) {
- err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
- PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+ if (nes_create_mapinfo(&cm_info))
+ return -ENOMEM;
+
+ err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port,
+ PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_ADD);
if (err) {
- printk("nes_manage_apbvt call returned %d.\n", err);
+ printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
+ err);
g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
return err;
}
- cm_listens_created++;
+ atomic_inc(&cm_listens_created);
}
cm_id->add_ref(cm_id);
@@ -2711,15 +3681,16 @@ int nes_destroy_listen(struct iw_cm_id *cm_id)
*/
int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
{
+ int rc = 0;
+
cm_packets_received++;
- if ((g_cm_core) && (g_cm_core->api)) {
- g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
- } else {
+ if ((g_cm_core) && (g_cm_core->api))
+ rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
+ else
nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
- " cm is not setup properly.\n");
- }
+ " cm is not setup properly.\n");
- return 0;
+ return rc;
}
@@ -2732,11 +3703,10 @@ int nes_cm_start(void)
nes_debug(NES_DBG_CM, "\n");
/* create the primary CM core, pass this handle to subsequent core inits */
g_cm_core = nes_cm_alloc_core();
- if (g_cm_core) {
+ if (g_cm_core)
return 0;
- } else {
+ else
return -ENOMEM;
- }
}
@@ -2757,7 +3727,6 @@ int nes_cm_stop(void)
*/
static void cm_event_connected(struct nes_cm_event *event)
{
- u64 u64temp;
struct nes_qp *nesqp;
struct nes_vnic *nesvnic;
struct nes_device *nesdev;
@@ -2766,10 +3735,12 @@ static void cm_event_connected(struct nes_cm_event *event)
struct ib_qp_attr attr;
struct iw_cm_id *cm_id;
struct iw_cm_event cm_event;
- struct nes_hw_qp_wqe *wqe;
struct nes_v4_quad nes_quad;
u32 crc_value;
int ret;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in *cm_event_laddr;
/* get all our handles */
cm_node = event->cm_node;
@@ -2779,111 +3750,97 @@ static void cm_event_connected(struct nes_cm_event *event)
nesvnic = to_nesvnic(nesqp->ibqp.device);
nesdev = nesvnic->nesdev;
nesadapter = nesdev->nesadapter;
+ laddr = (struct sockaddr_in *)&cm_id->local_addr;
+ raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+ cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
- if (nesqp->destroyed) {
+ if (nesqp->destroyed)
return;
- }
atomic_inc(&cm_connecteds);
nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on"
- " local port 0x%04X. jiffies = %lu.\n",
- nesqp->hwqp.qp_id,
- ntohl(cm_id->remote_addr.sin_addr.s_addr),
- ntohs(cm_id->remote_addr.sin_port),
- ntohs(cm_id->local_addr.sin_port),
- jiffies);
+ " local port 0x%04X. jiffies = %lu.\n",
+ nesqp->hwqp.qp_id, ntohl(raddr->sin_addr.s_addr),
+ ntohs(raddr->sin_port), ntohs(laddr->sin_port), jiffies);
nes_cm_init_tsa_conn(nesqp, cm_node);
/* set the QP tsa context */
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(cm_node->mapped_loc_port);
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(cm_node->mapped_rem_port);
+ nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+ (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+ NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
- nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0),
+ nes_arp_table(nesdev,
+ le32_to_cpu(nesqp->nesqp_context->ip0),
NULL, NES_ARP_RESOLVE) << 16);
nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+ cpu_to_le32((u32)1 <<
+ NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((u32)cm_node->ord_size);
/* Adjust tail for not having a LSMM */
- nesqp->hwqp.sq_tail = 1;
-
-#if defined(NES_SEND_FIRST_WRITE)
- if (cm_node->send_write0) {
- nes_debug(NES_DBG_CM, "Sending first write.\n");
- wqe = &nesqp->hwqp.sq_vbase[0];
- u64temp = (unsigned long)nesqp;
- u64temp |= NES_SW_CONTEXT_ALIGN>>1;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
- u64temp);
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
-
- /* use the reserved spot on the WQ for the extra first WQE */
- nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM));
- nesqp->skip_lsmm = 1;
- nesqp->hwqp.sq_tail = 0;
- nes_write32(nesdev->regs + NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
- }
-#endif
+ /*nesqp->hwqp.sq_tail = 1;*/
+
+ build_rdma0_msg(cm_node, &nesqp);
+
+ nes_write32(nesdev->regs + NES_WQE_ALLOC,
+ (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
- nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
- nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
+ nes_quad.DstIpAdrIndex =
+ cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+ nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+ nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+ nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
+ nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
nesqp->hte_index &= nesadapter->hte_index_mask;
nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
nesqp->ietf_frame = &cm_node->mpa_frame;
- nesqp->private_data_len = (u8) cm_node->mpa_frame_size;
+ nesqp->private_data_len = (u8)cm_node->mpa_frame_size;
cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
- /* modify QP state to rts */
- attr.qp_state = IB_QPS_RTS;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
-
/* notify OF layer we successfully created the requested connection */
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED;
+ cm_event.status = 0;
cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr.sin_family = AF_INET;
- cm_event.local_addr.sin_port = cm_id->local_addr.sin_port;
+ cm_event_laddr->sin_family = AF_INET;
+ cm_event_laddr->sin_port = laddr->sin_port;
cm_event.remote_addr = cm_id->remote_addr;
- cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
+ cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
+ cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
+ cm_event.ird = cm_node->ird_size;
+ cm_event.ord = cm_node->ord_size;
- cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
+ cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
if (ret)
- printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
- nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n",
- nesqp->hwqp.qp_id, jiffies );
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
+ attr.qp_state = IB_QPS_RTS;
+ nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
- nes_rem_ref(&nesqp->ibqp);
+ nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
+ "%lu\n", nesqp->hwqp.qp_id, jiffies);
return;
}
@@ -2904,40 +3861,46 @@ static void cm_event_connect_error(struct nes_cm_event *event)
return;
cm_id = event->cm_node->cm_id;
- if (!cm_id) {
+ if (!cm_id)
return;
- }
nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
nesqp = cm_id->provider_data;
- if (!nesqp) {
+ if (!nesqp)
return;
- }
/* notify OF layer about this connection error event */
/* cm_id->rem_ref(cm_id); */
nesqp->cm_id = NULL;
cm_id->provider_data = NULL;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
+ cm_event.status = -ECONNRESET;
cm_event.provider_data = cm_id->provider_data;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
- nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n",
- cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr);
+#ifdef CONFIG_INFINIBAND_NES_DEBUG
+ {
+ struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+ &cm_event.local_addr;
+ struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+ &cm_event.remote_addr;
+ nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remote_addr=%08x\n",
+ cm_event_laddr->sin_addr.s_addr, cm_event_raddr->sin_addr.s_addr);
+ }
+#endif
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
if (ret)
- printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
- nes_rem_ref(&nesqp->ibqp);
- cm_id->rem_ref(cm_id);
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
+ "ret=%d\n", __func__, __LINE__, ret);
+ cm_id->rem_ref(cm_id);
+ rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
return;
}
@@ -2963,18 +3926,32 @@ static void cm_event_reset(struct nes_cm_event *event)
nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
nesqp = cm_id->provider_data;
+ if (!nesqp)
+ return;
nesqp->cm_id = NULL;
/* cm_id->provider_data = NULL; */
cm_event.event = IW_CM_EVENT_DISCONNECT;
- cm_event.status = IW_CM_EVENT_STATUS_RESET;
+ cm_event.status = -ECONNRESET;
cm_event.provider_data = cm_id->provider_data;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
+ cm_id->add_ref(cm_id);
ret = cm_id->event_handler(cm_id, &cm_event);
+ atomic_inc(&cm_closes);
+ cm_event.event = IW_CM_EVENT_CLOSE;
+ cm_event.status = 0;
+ cm_event.provider_data = cm_id->provider_data;
+ cm_event.local_addr = cm_id->local_addr;
+ cm_event.remote_addr = cm_id->remote_addr;
+ cm_event.private_data = NULL;
+ cm_event.private_data_len = 0;
+ nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
+ ret = cm_id->event_handler(cm_id, &cm_event);
+
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
@@ -2990,10 +3967,14 @@ static void cm_event_reset(struct nes_cm_event *event)
*/
static void cm_event_mpa_req(struct nes_cm_event *event)
{
- struct iw_cm_id *cm_id;
+ struct iw_cm_id *cm_id;
struct iw_cm_event cm_event;
int ret;
struct nes_cm_node *cm_node;
+ struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+ &cm_event.local_addr;
+ struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+ &cm_event.remote_addr;
cm_node = event->cm_node;
if (!cm_node)
@@ -3002,27 +3983,81 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
atomic_inc(&cm_connect_reqs);
nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
- cm_node, cm_id, jiffies);
+ cm_node, cm_id, jiffies);
cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
- cm_event.status = IW_CM_EVENT_STATUS_OK;
+ cm_event.status = 0;
cm_event.provider_data = (void *)cm_node;
- cm_event.local_addr.sin_family = AF_INET;
- cm_event.local_addr.sin_port = htons(event->cm_info.loc_port);
- cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+ cm_event_laddr->sin_family = AF_INET;
+ cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
+ cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+
+ cm_event_raddr->sin_family = AF_INET;
+ cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
+ cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+ cm_event.private_data = cm_node->mpa_frame_buf;
+ cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
+ if (cm_node->mpa_frame_rev == IETF_MPA_V1) {
+ cm_event.ird = NES_MAX_IRD;
+ cm_event.ord = NES_MAX_ORD;
+ } else {
+ cm_event.ird = cm_node->ird_size;
+ cm_event.ord = cm_node->ord_size;
+ }
+
+ ret = cm_id->event_handler(cm_id, &cm_event);
+ if (ret)
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
+ __func__, __LINE__, ret);
+ return;
+}
+
+
+static void cm_event_mpa_reject(struct nes_cm_event *event)
+{
+ struct iw_cm_id *cm_id;
+ struct iw_cm_event cm_event;
+ struct nes_cm_node *cm_node;
+ int ret;
+ struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
+ &cm_event.local_addr;
+ struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
+ &cm_event.remote_addr;
+
+ cm_node = event->cm_node;
+ if (!cm_node)
+ return;
+ cm_id = cm_node->cm_id;
+
+ atomic_inc(&cm_connect_reqs);
+ nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
+ cm_node, cm_id, jiffies);
+
+ cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
+ cm_event.status = -ECONNREFUSED;
+ cm_event.provider_data = cm_id->provider_data;
+
+ cm_event_laddr->sin_family = AF_INET;
+ cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
+ cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
+
+ cm_event_raddr->sin_family = AF_INET;
+ cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
+ cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
- cm_event.remote_addr.sin_family = AF_INET;
- cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
- cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+ cm_event.private_data = cm_node->mpa_frame_buf;
+ cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
- cm_event.private_data = cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8) cm_node->mpa_frame_size;
+ nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
+ "remove_addr=%08x\n",
+ cm_event_laddr->sin_addr.s_addr,
+ cm_event_raddr->sin_addr.s_addr);
ret = cm_id->event_handler(cm_id, &cm_event);
if (ret)
- printk("%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
+ printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
+ __func__, __LINE__, ret);
return;
}
@@ -3040,7 +4075,8 @@ static int nes_cm_post_event(struct nes_cm_event *event)
add_ref_cm_node(event->cm_node);
event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
INIT_WORK(&event->event_work, nes_cm_event_handler);
- nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event);
+ nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
+ event->cm_node, event);
queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
@@ -3056,46 +4092,56 @@ static int nes_cm_post_event(struct nes_cm_event *event)
*/
static void nes_cm_event_handler(struct work_struct *work)
{
- struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work);
+ struct nes_cm_event *event = container_of(work, struct nes_cm_event,
+ event_work);
struct nes_cm_core *cm_core;
- if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) {
+ if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
return;
- }
+
cm_core = event->cm_node->cm_core;
nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
- event, event->type, atomic_read(&cm_core->events_posted));
+ event, event->type, atomic_read(&cm_core->events_posted));
switch (event->type) {
- case NES_CM_EVENT_MPA_REQ:
- cm_event_mpa_req(event);
- nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n");
+ case NES_CM_EVENT_MPA_REQ:
+ cm_event_mpa_req(event);
+ nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
+ event->cm_node);
+ break;
+ case NES_CM_EVENT_RESET:
+ nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
+ event->cm_node);
+ cm_event_reset(event);
+ break;
+ case NES_CM_EVENT_CONNECTED:
+ if ((!event->cm_node->cm_id) ||
+ (event->cm_node->state != NES_CM_STATE_TSA))
break;
- case NES_CM_EVENT_RESET:
- nes_debug(NES_DBG_CM, "CM Event: RESET\n");
- cm_event_reset(event);
- break;
- case NES_CM_EVENT_CONNECTED:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state != NES_CM_STATE_TSA)) {
- break;
- }
- cm_event_connected(event);
- nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
+ cm_event_connected(event);
+ nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
+ break;
+ case NES_CM_EVENT_MPA_REJECT:
+ if ((!event->cm_node->cm_id) ||
+ (event->cm_node->state == NES_CM_STATE_TSA))
break;
- case NES_CM_EVENT_ABORTED:
- if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) {
- break;
- }
- cm_event_connect_error(event);
- nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
- break;
- case NES_CM_EVENT_DROPPED_PKT:
- nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
- break;
- default:
- nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
+ cm_event_mpa_reject(event);
+ nes_debug(NES_DBG_CM, "CM Event: REJECT\n");
+ break;
+
+ case NES_CM_EVENT_ABORTED:
+ if ((!event->cm_node->cm_id) ||
+ (event->cm_node->state == NES_CM_STATE_TSA))
break;
+ cm_event_connect_error(event);
+ nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
+ break;
+ case NES_CM_EVENT_DROPPED_PKT:
+ nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
+ break;
+ default:
+ nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
+ break;
}
atomic_dec(&cm_core->events_posted);
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 7717cb2ab50..f522cf63978 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -39,11 +39,27 @@
#define NES_MANAGE_APBVT_DEL 0
#define NES_MANAGE_APBVT_ADD 1
+#define NES_MPA_REQUEST_ACCEPT 1
+#define NES_MPA_REQUEST_REJECT 2
+
/* IETF MPA -- defines, enums, structs */
#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
#define IETF_MPA_KEY_SIZE 16
#define IETF_MPA_VERSION 1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE 20
+#define IETF_RTR_MSG_SIZE 4
+#define IETF_MPA_V2_FLAG 0x10
+
+/* IETF RTR MSG Fields */
+#define IETF_PEER_TO_PEER 0x8000
+#define IETF_FLPDU_ZERO_LEN 0x4000
+#define IETF_RDMA0_WRITE 0x8000
+#define IETF_RDMA0_READ 0x4000
+#define IETF_NO_IRD_ORD 0x3FFF
+#define NES_MAX_IRD 0x40
+#define NES_MAX_ORD 0x7F
enum ietf_mpa_flags {
IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
@@ -51,7 +67,7 @@ enum ietf_mpa_flags {
IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
};
-struct ietf_mpa_frame {
+struct ietf_mpa_v1 {
u8 key[IETF_MPA_KEY_SIZE];
u8 flags;
u8 rev;
@@ -61,6 +77,20 @@ struct ietf_mpa_frame {
#define ietf_mpa_req_resp_frame ietf_mpa_frame
+struct ietf_rtr_msg {
+ __be16 ctrl_ird;
+ __be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ struct ietf_rtr_msg rtr_msg;
+ u8 priv_data[0];
+};
+
struct nes_v4_quad {
u32 rsvd0;
__le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */
@@ -76,6 +106,10 @@ enum nes_timer_type {
NES_TIMER_TYPE_CLOSE,
};
+#define NES_PASSIVE_STATE_INDICATED 0
+#define NES_DO_NOT_SEND_RESET_EVENT 1
+#define NES_SEND_RESET_EVENT 2
+
#define MAX_NES_IFS 4
#define SET_ACK 1
@@ -83,6 +117,8 @@ enum nes_timer_type {
#define SET_FIN 4
#define SET_RST 8
+#define TCP_OPTIONS_PADDING 3
+
struct option_base {
u8 optionnum;
u8 length;
@@ -140,6 +176,7 @@ struct nes_timer_entry {
#endif
#define NES_SHORT_TIME (10)
#define NES_LONG_TIME (2000*HZ/1000)
+#define NES_MAX_TIMEOUT ((unsigned long) (12*HZ))
#define NES_CM_HASHTABLE_SIZE 1024
#define NES_CM_TCP_TIMER_INTERVAL 3000
@@ -159,6 +196,7 @@ struct nes_timer_entry {
#define NES_CM_DEF_SEQ2 0x18ed5740
#define NES_CM_DEF_LOCAL_ID2 0xb807
+#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN)
typedef u32 nes_addr_t;
@@ -177,6 +215,8 @@ enum nes_cm_node_state {
NES_CM_STATE_ESTABLISHED,
NES_CM_STATE_ACCEPTING,
NES_CM_STATE_MPAREQ_SENT,
+ NES_CM_STATE_MPAREQ_RCVD,
+ NES_CM_STATE_MPAREJ_RCVD,
NES_CM_STATE_TSA,
NES_CM_STATE_FIN_WAIT1,
NES_CM_STATE_FIN_WAIT2,
@@ -184,9 +224,35 @@ enum nes_cm_node_state {
NES_CM_STATE_TIME_WAIT,
NES_CM_STATE_LAST_ACK,
NES_CM_STATE_CLOSING,
+ NES_CM_STATE_LISTENER_DESTROYED,
NES_CM_STATE_CLOSED
};
+enum mpa_frame_version {
+ IETF_MPA_V1 = 1,
+ IETF_MPA_V2 = 2
+};
+
+enum mpa_frame_key {
+ MPA_KEY_REQUEST,
+ MPA_KEY_REPLY
+};
+
+enum send_rdma0 {
+ SEND_RDMA_READ_ZERO = 1,
+ SEND_RDMA_WRITE_ZERO = 2
+};
+
+enum nes_tcpip_pkt_type {
+ NES_PKT_TYPE_UNKNOWN,
+ NES_PKT_TYPE_SYN,
+ NES_PKT_TYPE_SYNACK,
+ NES_PKT_TYPE_ACK,
+ NES_PKT_TYPE_FIN,
+ NES_PKT_TYPE_RST
+};
+
+
/* type of nes connection */
enum nes_cm_conn_type {
NES_CM_IWARP_CONN_TYPE,
@@ -218,17 +284,17 @@ struct nes_cm_tcp_context {
enum nes_cm_listener_state {
- NES_CM_LISTENER_PASSIVE_STATE=1,
- NES_CM_LISTENER_ACTIVE_STATE=2,
- NES_CM_LISTENER_EITHER_STATE=3
+ NES_CM_LISTENER_PASSIVE_STATE = 1,
+ NES_CM_LISTENER_ACTIVE_STATE = 2,
+ NES_CM_LISTENER_EITHER_STATE = 3
};
struct nes_cm_listener {
struct list_head list;
struct nes_cm_core *cm_core;
u8 loc_mac[ETH_ALEN];
- nes_addr_t loc_addr;
- u16 loc_port;
+ nes_addr_t loc_addr, mapped_loc_addr;
+ u16 loc_port, mapped_loc_port;
struct iw_cm_id *cm_id;
enum nes_cm_conn_type conn_type;
atomic_t ref_count;
@@ -241,10 +307,10 @@ struct nes_cm_listener {
/* per connection node and node state information */
struct nes_cm_node {
- u32 hashkey;
-
nes_addr_t loc_addr, rem_addr;
+ nes_addr_t mapped_loc_addr, mapped_rem_addr;
u16 loc_port, rem_port;
+ u16 mapped_loc_port, mapped_rem_port;
u8 loc_mac[ETH_ALEN];
u8 rem_mac[ETH_ALEN];
@@ -257,16 +323,22 @@ struct nes_cm_node {
struct net_device *netdev;
struct nes_cm_node *loopbackpartner;
- struct list_head retrans_list;
+
+ struct nes_timer_entry *send_entry;
+ struct nes_timer_entry *recv_entry;
spinlock_t retrans_list_lock;
- struct list_head recv_list;
- spinlock_t recv_list_lock;
+ enum send_rdma0 send_rdma0_op;
- int send_write0;
union {
- struct ietf_mpa_frame mpa_frame;
- u8 mpa_frame_buf[NES_CM_DEFAULT_MTU];
+ struct ietf_mpa_v1 mpa_frame;
+ struct ietf_mpa_v2 mpa_v2_frame;
+ u8 mpa_frame_buf[MAX_CM_BUFFER];
};
+ enum mpa_frame_version mpa_frame_rev;
+ u16 ird_size;
+ u16 ord_size;
+ u16 mpav2_ird_ord;
+
u16 mpa_frame_size;
struct iw_cm_id *cm_id;
struct list_head list;
@@ -276,6 +348,10 @@ struct nes_cm_node {
struct nes_vnic *nesvnic;
int apbvt_set;
int accept_pend;
+ struct list_head timer_entry;
+ struct list_head reset_entry;
+ struct nes_qp *nesqp;
+ atomic_t passive_state;
};
/* structure for client or CM to fill when making CM api calls. */
@@ -290,6 +366,10 @@ struct nes_cm_info {
u16 rem_port;
nes_addr_t loc_addr;
nes_addr_t rem_addr;
+ u16 mapped_loc_port;
+ u16 mapped_rem_port;
+ nes_addr_t mapped_loc_addr;
+ nes_addr_t mapped_rem_addr;
enum nes_cm_conn_type conn_type;
int backlog;
@@ -302,6 +382,7 @@ enum nes_cm_event_type {
NES_CM_EVENT_MPA_REQ,
NES_CM_EVENT_MPA_CONNECT,
NES_CM_EVENT_MPA_ACCEPT,
+ NES_CM_EVENT_MPA_REJECT,
NES_CM_EVENT_MPA_ESTABLISHED,
NES_CM_EVENT_CONNECTED,
NES_CM_EVENT_CLOSED,
@@ -333,7 +414,6 @@ struct nes_cm_core {
u32 mtu;
u32 free_tx_pkt_max;
u32 rx_pkt_posted;
- struct sk_buff_head tx_free_list;
atomic_t ht_node_cnt;
struct list_head connected_nodes;
/* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
@@ -366,13 +446,11 @@ struct nes_cm_ops {
struct nes_cm_info *);
int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *);
struct nes_cm_node * (*connect)(struct nes_cm_core *,
- struct nes_vnic *, struct ietf_mpa_frame *,
+ struct nes_vnic *, u16, void *,
struct nes_cm_info *);
int (*close)(struct nes_cm_core *, struct nes_cm_node *);
- int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
- struct nes_cm_node *);
- int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
- struct nes_cm_node *);
+ int (*accept)(struct nes_cm_core *, struct nes_cm_node *);
+ int (*reject)(struct nes_cm_core *, struct nes_cm_node *);
int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
struct sk_buff *);
int (*destroy_cm_core)(struct nes_cm_core *);
@@ -383,8 +461,6 @@ struct nes_cm_ops {
int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
enum nes_timer_type, int, int);
-int nes_cm_disconn(struct nes_qp *);
-
int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
int nes_reject(struct iw_cm_id *, const void *, u8);
int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
@@ -394,5 +470,7 @@ int nes_destroy_listen(struct iw_cm_id *);
int nes_cm_recv(struct sk_buff *, struct net_device *);
int nes_cm_start(void);
int nes_cm_stop(void);
+int nes_add_ref_cm_node(struct nes_cm_node *cm_node);
+int nes_rem_ref_cm_node(struct nes_cm_node *cm_node);
#endif /* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
index da9daba8e66..a69eef16d72 100644
--- a/drivers/infiniband/hw/nes/nes_context.h
+++ b/drivers/infiniband/hw/nes/nes_context.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 85f26d19a32..90200245c5e 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -39,6 +39,7 @@
#include <linux/tcp.h>
#include <linux/if_vlan.h>
#include <linux/inet_lro.h>
+#include <linux/slab.h>
#include "nes.h"
@@ -46,6 +47,10 @@ static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
module_param(nes_lro_max_aggr, uint, 0444);
MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
+static int wide_ppm_offset;
+module_param(wide_ppm_offset, int, 0644);
+MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm");
+
static u32 crit_err_count;
u32 int_mod_timer_init;
u32 int_mod_cq_depth_256;
@@ -55,24 +60,26 @@ u32 int_mod_cq_depth_24;
u32 int_mod_cq_depth_16;
u32 int_mod_cq_depth_4;
u32 int_mod_cq_depth_1;
-
+static const u8 nes_max_critical_error_count = 100;
#include "nes_cm.h"
static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq);
static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count);
static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
- u8 OneG_Mode);
+ struct nes_adapter *nesadapter, u8 OneG_Mode);
static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq);
static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq);
static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
struct nes_hw_aeqe *aeqe);
+static void process_critical_error(struct nes_device *nesdev);
static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
+static void nes_terminate_start_timer(struct nes_qp *nesqp);
#ifdef CONFIG_INFINIBAND_NES_DEBUG
static unsigned char *nes_iwarp_state_str[] = {
- "Non-Existant",
+ "Non-Existent",
"Idle",
"RTS",
"Closing",
@@ -83,7 +90,7 @@ static unsigned char *nes_iwarp_state_str[] = {
};
static unsigned char *nes_tcp_state_str[] = {
- "Non-Existant",
+ "Non-Existent",
"Closed",
"Listen",
"SYN Sent",
@@ -102,6 +109,14 @@ static unsigned char *nes_tcp_state_str[] = {
};
#endif
+static inline void print_ip(struct nes_cm_node *cm_node)
+{
+ unsigned char *rem_addr;
+ if (cm_node) {
+ rem_addr = (unsigned char *)&cm_node->rem_addr;
+ printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr);
+ }
+}
/**
* nes_nic_init_timer_defaults
@@ -222,11 +237,10 @@ static void nes_nic_tune_timer(struct nes_device *nesdev)
}
/* boundary checking */
- if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH)
- shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH;
- else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) {
- shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW;
- }
+ if (shared_timer->timer_in_use > shared_timer->threshold_high)
+ shared_timer->timer_in_use = shared_timer->threshold_high;
+ else if (shared_timer->timer_in_use < shared_timer->threshold_low)
+ shared_timer->timer_in_use = shared_timer->threshold_low;
nesdev->currcq_count = 0;
@@ -254,6 +268,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
u32 adapter_size;
u32 arp_table_size;
u16 vendor_id;
+ u16 device_id;
u8 OneG_Mode;
u8 func_index;
@@ -292,9 +307,6 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0)
return NULL;
- if (nes_init_serdes(nesdev, hw_rev, port_count, OneG_Mode))
- return NULL;
- nes_init_csr_ne020(nesdev, hw_rev, port_count);
max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE);
nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp);
@@ -353,6 +365,29 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
+ if (nes_read_eeprom_values(nesdev, nesadapter)) {
+ printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
+ kfree(nesadapter);
+ return NULL;
+ }
+
+ nesadapter->vendor_id = (((u32) nesadapter->mac_addr_high) << 8) |
+ (nesadapter->mac_addr_low >> 24);
+
+ pci_bus_read_config_word(nesdev->pcidev->bus, nesdev->pcidev->devfn,
+ PCI_DEVICE_ID, &device_id);
+ nesadapter->vendor_part_id = device_id;
+
+ if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter,
+ OneG_Mode)) {
+ kfree(nesadapter);
+ return NULL;
+ }
+ nes_init_csr_ne020(nesdev, hw_rev, port_count);
+
+ memset(nesadapter->pft_mcast_map, 255,
+ sizeof nesadapter->pft_mcast_map);
+
/* populate the new nesadapter */
nesadapter->devfn = nesdev->pcidev->devfn;
nesadapter->bus_number = nesdev->pcidev->bus->number;
@@ -397,8 +432,9 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->base_pd = 1;
- nesadapter->device_cap_flags =
- IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+ nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -409,11 +445,12 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
- /* mark the usual suspect QPs and CQs as in use */
+ /* mark the usual suspect QPs, MR and CQs as in use */
for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
set_bit(u32temp, nesadapter->allocated_qps);
set_bit(u32temp, nesadapter->allocated_cqs);
}
+ set_bit(0, nesadapter->allocated_mrs);
for (u32temp = 0; u32temp < 20; u32temp++)
set_bit(u32temp, nesadapter->allocated_pds);
@@ -454,7 +491,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
nesadapter->max_sge = 4;
- nesadapter->max_cqe = 32767;
+ nesadapter->max_cqe = 32766;
if (nes_read_eeprom_values(nesdev, nesadapter)) {
printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
@@ -468,20 +505,25 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
/* setup port configuration */
if (nesadapter->port_count == 1) {
- u32temp = 0x00000000;
+ nesadapter->log_port = 0x00000000;
if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT)
nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002);
else
nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
} else {
- if (nesadapter->port_count == 2)
- u32temp = 0x00000044;
- else
- u32temp = 0x000000e4;
+ if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+ nesadapter->log_port = 0x000000D8;
+ } else {
+ if (nesadapter->port_count == 2)
+ nesadapter->log_port = 0x00000044;
+ else
+ nesadapter->log_port = 0x000000e4;
+ }
nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
}
- nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp);
+ nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT,
+ nesadapter->log_port);
nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n",
nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT));
@@ -521,7 +563,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
}
if (int_cnt > 1) {
spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
mh_detected++;
reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
reset_value |= 0x0000003d;
@@ -546,7 +588,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
if (++ext_cnt > int_cnt) {
spin_lock_irqsave(&nesadapter->phy_lock, flags);
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1,
- 0x0000F0C8);
+ 0x0000F088);
mh_detected++;
reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
reset_value |= 0x0000003d;
@@ -637,7 +679,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
i = 0;
while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
mdelay(1);
- if (i >= 10000) {
+ if (i > 10000) {
nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n");
return 0;
}
@@ -645,7 +687,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
i = 0;
while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
mdelay(1);
- if (i >= 10000) {
+ if (i > 10000) {
printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
return 0;
@@ -671,7 +713,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
i = 0;
while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
mdelay(1);
- if (i >= 10000) {
+ if (i > 10000) {
nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n");
return 0;
}
@@ -681,7 +723,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
& 0x0000000f)) != 0x0000000f) && i++ < 5000)
mdelay(1);
- if (i >= 5000) {
+ if (i > 5000) {
nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp);
return 0;
}
@@ -692,7 +734,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
& 0x0000000f)) != 0x0000000f) && i++ < 5000)
mdelay(1);
- if (i >= 5000) {
+ if (i > 5000) {
nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp);
return 0;
}
@@ -706,23 +748,70 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_
* nes_init_serdes
*/
static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
- u8 OneG_Mode)
+ struct nes_adapter *nesadapter, u8 OneG_Mode)
{
int i;
u32 u32temp;
+ u32 sds;
if (hw_rev != NE020_REV) {
/* init serdes 0 */
+ switch (nesadapter->phy_type[0]) {
+ case NES_PHY_TYPE_CX4:
+ if (wide_ppm_offset)
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
+ else
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+ break;
+ case NES_PHY_TYPE_KR:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
+ break;
+ case NES_PHY_TYPE_PUMA_1G:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+ sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+ sds |= 0x00000100;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds);
+ break;
+ default:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+ break;
+ }
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
if (!OneG_Mode)
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
- if (port_count > 1) {
- /* init serdes 1 */
+
+ if (port_count < 2)
+ return 0;
+
+ /* init serdes 1 */
+ if (!(OneG_Mode && (nesadapter->phy_type[1] != NES_PHY_TYPE_PUMA_1G)))
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
- if (!OneG_Mode)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
- }
+
+ switch (nesadapter->phy_type[1]) {
+ case NES_PHY_TYPE_ARGUS:
+ case NES_PHY_TYPE_SFP_D:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
+ break;
+ case NES_PHY_TYPE_CX4:
+ if (wide_ppm_offset)
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA);
+ break;
+ case NES_PHY_TYPE_KR:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
+ break;
+ case NES_PHY_TYPE_PUMA_1G:
+ sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+ sds |= 0x000000100;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
+ }
+ if (!OneG_Mode) {
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
+ sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+ sds &= 0xFFFFFFBF;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
+ }
} else {
/* init serdes 0 */
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
@@ -730,7 +819,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
& 0x0000000f)) != 0x0000000f) && i++ < 5000)
mdelay(1);
- if (i >= 5000) {
+ if (i > 5000) {
nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp);
return 1;
}
@@ -753,7 +842,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
& 0x0000000f)) != 0x0000000f) && (i++ < 5000))
mdelay(1);
- if (i >= 5000) {
+ if (i > 5000) {
printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp);
/* return 1; */
}
@@ -826,7 +915,8 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou
nes_write_indexed(nesdev, 0x00005000, 0x00018000);
/* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */
- nes_write_indexed(nesdev, 0x00005004, 0x00020001);
+ nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) |
+ 0x00000001);
nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F);
nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F);
nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F);
@@ -849,6 +939,12 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou
u32temp &= 0x7fffffff;
u32temp |= 0x7fff0010;
nes_write_indexed(nesdev, 0x000021f8, u32temp);
+ if (port_count > 1) {
+ u32temp = nes_read_indexed(nesdev, 0x000023f8);
+ u32temp &= 0x7fffffff;
+ u32temp |= 0x7fff0010;
+ nes_write_indexed(nesdev, 0x000023f8, u32temp);
+ }
}
}
@@ -1206,209 +1302,252 @@ int nes_destroy_cqp(struct nes_device *nesdev)
/**
- * nes_init_phy
+ * nes_init_1g_phy
*/
-int nes_init_phy(struct nes_device *nesdev)
+static int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 counter = 0;
- u32 sds_common_control0;
- u32 mac_index = nesdev->mac_index;
- u32 tx_config = 0;
u16 phy_data;
+ int ret = 0;
+
+ nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
+
+ /* Reset the PHY */
+ nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
+ udelay(100);
+ counter = 0;
+ do {
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+ if (counter++ > 100) {
+ ret = -1;
+ break;
+ }
+ } while (phy_data & 0x8000);
+
+ /* Setting no phy loopback */
+ phy_data &= 0xbfff;
+ phy_data |= 0x1140;
+ nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+ nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
+ nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
+
+ /* Setting the interrupt mask */
+ nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
+ nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
+
+ /* turning on flow control */
+ nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
+ nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
+
+ /* Clear Half duplex */
+ nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
+ nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
+
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
+
+ return ret;
+}
+
+
+/**
+ * nes_init_2025_phy
+ */
+static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
+{
u32 temp_phy_data = 0;
u32 temp_phy_data2 = 0;
- u32 i = 0;
-
- if ((nesadapter->OneG_Mode) &&
- (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
- if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
- printk(PFX "%s: Programming mdc config for 1G\n", __func__);
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- tx_config |= 0x04;
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+ u32 counter = 0;
+ u32 sds;
+ u32 mac_index = nesdev->mac_index;
+ int ret = 0;
+ unsigned int first_attempt = 1;
+
+ /* Check firmware heartbeat */
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ udelay(1500);
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+ if (temp_phy_data != temp_phy_data2) {
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((temp_phy_data & 0xff) > 0x20)
+ return 0;
+ printk(PFX "Reinitialize external PHY\n");
+ }
+
+ /* no heartbeat, configure the PHY */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
+
+ switch (phy_type) {
+ case NES_PHY_TYPE_ARGUS:
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
+
+ /* setup LEDs */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
+ break;
+
+ case NES_PHY_TYPE_SFP_D:
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
+
+ /* setup LEDs */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
+ break;
+
+ case NES_PHY_TYPE_KR:
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
+
+ /* setup LEDs */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004);
+
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020);
+ break;
+ }
+
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
+
+ /* Bring PHY out of reset */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
+
+ /* Check for heartbeat */
+ counter = 0;
+ mdelay(690);
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ do {
+ if (counter++ > 150) {
+ printk(PFX "No PHY heartbeat\n");
+ break;
}
+ mdelay(1);
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ } while ((temp_phy_data2 == temp_phy_data));
- nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n",
- nesadapter->phy_index[mac_index], phy_data);
- nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
-
- /* Reset the PHY */
- nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
- udelay(100);
- counter = 0;
- do {
- nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
- if (counter++ > 100) break;
- } while (phy_data & 0x8000);
-
- /* Setting no phy loopback */
- phy_data &= 0xbfff;
- phy_data |= 0x1140;
- nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data);
- nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
-
- nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x17 = 0x%X.\n", phy_data);
-
- nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x1e = 0x%X.\n", phy_data);
-
- /* Setting the interrupt mask */
- nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
- nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee);
-
- nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
-
- /* turning on flow control */
- nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
- nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
- (phy_data & ~(0x03E0)) | 0xc00);
- /* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
- phy_data | 0xc00); */
- nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
-
- nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
- /* Clear Half duplex */
- nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index],
- phy_data & ~(0x0100));
- nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
-
- nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
- nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);
- } else {
- if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) ||
- (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
- /* setup 10G MDIO operation */
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- tx_config |= 0x14;
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+ /* wait for tracking */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if (counter++ > 300) {
+ if (((temp_phy_data & 0xff) == 0x0) && first_attempt) {
+ first_attempt = 0;
+ counter = 0;
+ /* reset AMCC PHY and try again */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
+ continue;
+ } else {
+ ret = 1;
+ break;
+ }
}
- if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
-
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- mdelay(10);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- /*
- * if firmware is already running (like from a
- * driver un-load/load, don't do anything.
- */
- if (temp_phy_data == temp_phy_data2) {
- /* configure QT2505 AMCC PHY */
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000);
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528);
+ mdelay(10);
+ } while ((temp_phy_data & 0xff) < 0x30);
+
+ /* setup signal integrity */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
+ if (phy_type == NES_PHY_TYPE_KR) {
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C);
+ } else {
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063);
+ }
- /*
- * remove micro from reset; chip boots from ROM,
- * uploads EEPROM f/w image, uC executes f/w
- */
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002);
+ /* reset serdes */
+ sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200);
+ sds |= 0x1;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
+ sds &= 0xfffffffe;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
- /*
- * wait for heart beat to start to
- * know loading is done
- */
- counter = 0;
- do {
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if (counter++ > 1000) {
- nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n");
- break;
- }
- mdelay(100);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- } while ((temp_phy_data2 == temp_phy_data));
+ counter = 0;
+ while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
+ && (counter++ < 5000))
+ ;
- /*
- * wait for tracking to start to know
- * f/w is good to go
- */
- counter = 0;
- do {
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if (counter++ > 1000) {
- nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n");
- break;
- }
- mdelay(1000);
- /*
- * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n",
- * temp_phy_data);
- */
- } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
-
- /* set LOS Control invert RXLOSB_I_PADINV */
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000);
- /* set LOS Control to mask of RXLOSB_I */
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042);
- /* set LED1 to input mode (LED1 and LED2 share same LED) */
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007);
- /* set LED2 to RX link_status and activity */
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A);
- /* set LED3 to RX link_status */
- nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009);
+ return ret;
+}
- /*
- * reset the res-calibration on t2
- * serdes; ensures it is stable after
- * the amcc phy is stable
- */
- sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
- sds_common_control0 |= 0x1;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+/**
+ * nes_init_phy
+ */
+int nes_init_phy(struct nes_device *nesdev)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 mac_index = nesdev->mac_index;
+ u32 tx_config = 0;
+ unsigned long flags;
+ u8 phy_type = nesadapter->phy_type[mac_index];
+ u8 phy_index = nesadapter->phy_index[mac_index];
+ int ret = 0;
- /* release the res-calibration reset */
- sds_common_control0 &= 0xfffffffe;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+ if (phy_type == NES_PHY_TYPE_1G) {
+ /* setup 1G MDIO operation */
+ tx_config &= 0xFFFFFFE3;
+ tx_config |= 0x04;
+ } else {
+ /* setup 10G MDIO operation */
+ tx_config &= 0xFFFFFFE3;
+ tx_config |= 0x1D;
+ }
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
- i = 0;
- while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
- && (i++ < 5000)) {
- /* mdelay(1); */
- }
+ spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
- /*
- * wait for link train done before moving on,
- * or will get an interupt storm
- */
- counter = 0;
- do {
- temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)));
- if (counter++ > 1000) {
- nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n");
- break;
- }
- mdelay(1);
- } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000));
- }
- }
+ switch (phy_type) {
+ case NES_PHY_TYPE_1G:
+ ret = nes_init_1g_phy(nesdev, phy_type, phy_index);
+ break;
+ case NES_PHY_TYPE_ARGUS:
+ case NES_PHY_TYPE_SFP_D:
+ case NES_PHY_TYPE_KR:
+ ret = nes_init_2025_phy(nesdev, phy_type, phy_index);
+ break;
}
- return 0;
+
+ spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+
+ return ret;
}
@@ -1423,6 +1562,7 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
struct nes_hw_nic_rq_wqe *nic_rqe;
struct nes_hw_nic *nesnic;
struct nes_device *nesdev;
+ struct nes_rskb_cb *cb;
u32 rx_wqes_posted = 0;
nesnic = &nesvnic->nic;
@@ -1448,6 +1588,9 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
bus_address = pci_map_single(nesdev->pcidev,
skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ cb->busaddr = bus_address;
+ cb->maplen = nesvnic->max_frame_size;
nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head];
nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
@@ -1537,6 +1680,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
u32 cqp_head;
u32 counter;
u32 wqe_count;
+ struct nes_rskb_cb *cb;
u8 jumbomode=0;
/* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */
@@ -1595,7 +1739,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
nesvnic->post_cqp_request = nes_post_cqp_request;
nesvnic->mcrq_mcast_filter = NULL;
- spin_lock_init(&nesvnic->nic.sq_lock);
spin_lock_init(&nesvnic->nic.rq_lock);
/* setup the RQ */
@@ -1714,6 +1857,9 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
pmem = pci_map_single(nesdev->pcidev, skb->data,
nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ cb->busaddr = pmem;
+ cb->maplen = nesvnic->max_frame_size;
nic_rqe = &nesvnic->nic.rq_vbase[counter];
nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
@@ -1742,6 +1888,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
jumbomode = 1;
nes_nic_init_timer_defaults(nesdev, jumbomode);
}
+ if ((nesdev->nesadapter->allow_unaligned_fpdus) &&
+ (nes_init_mgt_qp(nesdev, netdev, nesvnic))) {
+ nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name);
+ nes_destroy_nic_qp(nesvnic);
+ return -ENOMEM;
+ }
+
nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr;
nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
@@ -1759,25 +1912,91 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
*/
void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
{
+ u64 u64temp;
+ dma_addr_t bus_address;
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- u64 wqe_frag;
+ struct nes_hw_nic_sq_wqe *nic_sqe;
+ __le16 *wqe_fragment_length;
+ u16 wqe_fragment_index;
u32 cqp_head;
+ u32 wqm_cfg0;
unsigned long flags;
+ struct sk_buff *rx_skb;
+ struct nes_rskb_cb *cb;
int ret;
+ if (nesdev->nesadapter->allow_unaligned_fpdus)
+ nes_destroy_mgt(nesvnic);
+
+ /* clear wqe stall before destroying NIC QP */
+ wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0);
+ nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF);
+
/* Free remaining NIC receive buffers */
while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
- nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
- wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
- wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
- pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
- nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail];
+ cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
+ pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen,
+ PCI_DMA_FROMDEVICE);
+
dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
}
+ /* Free remaining NIC transmit buffers */
+ while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) {
+ nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail];
+ wqe_fragment_index = 1;
+ wqe_fragment_length = (__le16 *)
+ &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
+ /* bump past the vlan tag */
+ wqe_fragment_length++;
+ if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
+ u64temp = (u64)le32_to_cpu(
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
+ wqe_fragment_index*2]);
+ u64temp += ((u64)le32_to_cpu(
+ nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
+ + wqe_fragment_index*2]))<<32;
+ bus_address = (dma_addr_t)u64temp;
+ if (test_and_clear_bit(nesvnic->nic.sq_tail,
+ nesvnic->nic.first_frag_overflow)) {
+ pci_unmap_single(nesdev->pcidev,
+ bus_address,
+ le16_to_cpu(wqe_fragment_length[
+ wqe_fragment_index++]),
+ PCI_DMA_TODEVICE);
+ }
+ for (; wqe_fragment_index < 5; wqe_fragment_index++) {
+ if (wqe_fragment_length[wqe_fragment_index]) {
+ u64temp = le32_to_cpu(
+ nic_sqe->wqe_words[
+ NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
+ wqe_fragment_index*2]);
+ u64temp += ((u64)le32_to_cpu(
+ nic_sqe->wqe_words[
+ NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+
+ wqe_fragment_index*2]))<<32;
+ bus_address = (dma_addr_t)u64temp;
+ pci_unmap_page(nesdev->pcidev,
+ bus_address,
+ le16_to_cpu(
+ wqe_fragment_length[
+ wqe_fragment_index]),
+ PCI_DMA_TODEVICE);
+ } else
+ break;
+ }
+ }
+ if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail])
+ dev_kfree_skb(
+ nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]);
+
+ nesvnic->nic.sq_tail = (nesvnic->nic.sq_tail + 1)
+ & (nesvnic->nic.sq_size - 1);
+ }
+
spin_lock_irqsave(&nesdev->cqp.lock, flags);
/* Destroy NIC QP */
@@ -1830,6 +2049,9 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
nesvnic->nic_pbase);
+
+ /* restore old wqm_cfg0 value */
+ nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0);
}
/**
@@ -1894,7 +2116,30 @@ int nes_napi_isr(struct nes_device *nesdev)
}
}
-
+static void process_critical_error(struct nes_device *nesdev)
+{
+ u32 debug_error;
+ u32 nes_idx_debug_error_masks0 = 0;
+ u16 error_module = 0;
+
+ debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
+ printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
+ (u16)debug_error);
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
+ 0x01010000 | (debug_error & 0x0000ffff));
+ if (crit_err_count++ > 10)
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
+ error_module = (u16) (debug_error & 0x1F00) >> 8;
+ if (++nesdev->nesadapter->crit_error_count[error_module-1] >=
+ nes_max_critical_error_count) {
+ printk(KERN_ERR PFX "Masking off critical error for module "
+ "0x%02X\n", (u16)error_module);
+ nes_idx_debug_error_masks0 = nes_read_indexed(nesdev,
+ NES_IDX_DEBUG_ERROR_MASKS0);
+ nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0,
+ nes_idx_debug_error_masks0 | (1 << error_module));
+ }
+}
/**
* nes_dpc
*/
@@ -1909,7 +2154,6 @@ void nes_dpc(unsigned long param)
u32 timer_stat;
u32 temp_int_stat;
u32 intf_int_stat;
- u32 debug_error;
u32 processed_intf_int = 0;
u16 processed_timer_int = 0;
u16 completion_ints = 0;
@@ -1987,14 +2231,7 @@ void nes_dpc(unsigned long param)
intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
intf_int_stat &= nesdev->intf_int_req;
if (NES_INTF_INT_CRITERR & intf_int_stat) {
- debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
- printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
- (u16)debug_error);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
- 0x01010000 | (debug_error & 0x0000ffff));
- /* BUG(); */
- if (crit_err_count++ > 10)
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
+ process_critical_error(nesdev);
}
if (NES_INTF_INT_PCIERR & intf_int_stat) {
printk(KERN_ERR PFX "PCI Error reported by device!!!\n");
@@ -2145,6 +2382,8 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
if (++head >= aeq_size)
head = 0;
+
+ nes_write32(nesdev->regs + NES_AEQ_ALLOC, 1 << 16);
}
while (1);
aeq->aeq_head = head;
@@ -2234,6 +2473,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
u16 temp_phy_data;
u32 pcs_val = 0x0f0f0000;
u32 pcs_mask = 0x0f1f0000;
+ u32 cdr_ctrl;
spin_lock_irqsave(&nesadapter->phy_lock, flags);
if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
@@ -2241,7 +2481,6 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
return;
}
nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT;
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
/* ack the MAC interrupt */
mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200));
@@ -2252,13 +2491,12 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) {
nesdev->link_status_interrupts++;
- if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) {
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS)))
nes_reset_link(nesdev, mac_index);
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- }
+
/* read the PHY interrupt status register */
- if (nesadapter->OneG_Mode) {
+ if ((nesadapter->OneG_Mode) &&
+ (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
do {
nes_read_1G_phy_reg(nesdev, 0x1a,
nesadapter->phy_index[mac_index], &phy_data);
@@ -2331,22 +2569,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
}
} else {
switch (nesadapter->phy_type[mac_index]) {
- case NES_PHY_TYPE_IRIS:
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- u32temp = 20;
- do {
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if ((phy_data == temp_phy_data) || (!(--u32temp)))
- break;
- temp_phy_data = phy_data;
- } while (1);
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
- break;
-
case NES_PHY_TYPE_ARGUS:
+ case NES_PHY_TYPE_SFP_D:
+ case NES_PHY_TYPE_KR:
/* clear the alarms */
nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
@@ -2357,19 +2582,18 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
/* check link status */
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- u32temp = 100;
- do {
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if ((phy_data == temp_phy_data) || (!(--u32temp)))
- break;
- temp_phy_data = phy_data;
- } while (1);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+ nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+ phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
+
nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+ __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
break;
case NES_PHY_TYPE_PUMA_1G:
@@ -2385,6 +2609,17 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
}
if (phy_data & 0x0004) {
+ if (wide_ppm_offset &&
+ (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
+ (nesadapter->hw_rev != NE020_REV)) {
+ cdr_ctrl = nes_read_indexed(nesdev,
+ NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+ mac_index * 0x200);
+ nes_write_indexed(nesdev,
+ NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+ mac_index * 0x200,
+ cdr_ctrl | 0x000F0000);
+ }
nesadapter->mac_link_down[mac_index] = 0;
list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n",
@@ -2396,9 +2631,29 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
netif_start_queue(nesvnic->netdev);
nesvnic->linkup = 1;
netif_carrier_on(nesvnic->netdev);
+
+ spin_lock(&nesvnic->port_ibevent_lock);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 0) {
+ nesdev->iw_status = 1;
+ nes_port_ibevent(nesvnic);
+ }
+ }
+ spin_unlock(&nesvnic->port_ibevent_lock);
}
}
} else {
+ if (wide_ppm_offset &&
+ (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
+ (nesadapter->hw_rev != NE020_REV)) {
+ cdr_ctrl = nes_read_indexed(nesdev,
+ NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+ mac_index * 0x200);
+ nes_write_indexed(nesdev,
+ NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+ mac_index * 0x200,
+ cdr_ctrl & 0xFFF0FFFF);
+ }
nesadapter->mac_link_down[mac_index] = 1;
list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
@@ -2410,21 +2665,115 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
netif_stop_queue(nesvnic->netdev);
nesvnic->linkup = 0;
netif_carrier_off(nesvnic->netdev);
+
+ spin_lock(&nesvnic->port_ibevent_lock);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 1) {
+ nesdev->iw_status = 0;
+ nes_port_ibevent(nesvnic);
+ }
+ }
+ spin_unlock(&nesvnic->port_ibevent_lock);
}
}
}
+ if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) {
+ nesdev->link_recheck = 1;
+ mod_delayed_work(system_wq, &nesdev->work,
+ NES_LINK_RECHECK_DELAY);
+ }
}
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+
nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE;
}
+void nes_recheck_link_status(struct work_struct *work)
+{
+ unsigned long flags;
+ struct nes_device *nesdev = container_of(work, struct nes_device, work.work);
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ struct nes_vnic *nesvnic;
+ u32 mac_index = nesdev->mac_index;
+ u16 phy_data;
+ u16 temp_phy_data;
+
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+
+ /* check link status */
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+ nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+ phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
+
+ nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+ __func__, phy_data,
+ nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
+
+ if (phy_data & 0x0004) {
+ nesadapter->mac_link_down[mac_index] = 0;
+ list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+ if (nesvnic->linkup == 0) {
+ printk(PFX "The Link is now up for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
+ if (netif_queue_stopped(nesvnic->netdev))
+ netif_start_queue(nesvnic->netdev);
+ nesvnic->linkup = 1;
+ netif_carrier_on(nesvnic->netdev);
+
+ spin_lock(&nesvnic->port_ibevent_lock);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 0) {
+ nesdev->iw_status = 1;
+ nes_port_ibevent(nesvnic);
+ }
+ }
+ spin_unlock(&nesvnic->port_ibevent_lock);
+ }
+ }
+
+ } else {
+ nesadapter->mac_link_down[mac_index] = 1;
+ list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
+ if (nesvnic->linkup == 1) {
+ printk(PFX "The Link is now down for port %s, netdev %p.\n",
+ nesvnic->netdev->name, nesvnic->netdev);
+ if (!(netif_queue_stopped(nesvnic->netdev)))
+ netif_stop_queue(nesvnic->netdev);
+ nesvnic->linkup = 0;
+ netif_carrier_off(nesvnic->netdev);
+
+ spin_lock(&nesvnic->port_ibevent_lock);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 1) {
+ nesdev->iw_status = 0;
+ nes_port_ibevent(nesvnic);
+ }
+ }
+ spin_unlock(&nesvnic->port_ibevent_lock);
+ }
+ }
+ }
+ if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX)
+ schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY);
+ else
+ nesdev->link_recheck = 0;
+
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
+}
static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
{
struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
- netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index], &nesvnic->napi);
+ napi_schedule(&nesvnic->napi);
}
@@ -2447,6 +2796,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
struct nes_hw_nic_sq_wqe *nic_sqe;
struct sk_buff *skb;
struct sk_buff *rx_skb;
+ struct nes_rskb_cb *cb;
__le16 *wqe_fragment_length;
u32 head;
u32 cq_size;
@@ -2505,16 +2855,16 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
} else
break;
}
- if (skb)
- dev_kfree_skb_any(skb);
}
+ if (skb)
+ dev_kfree_skb_any(skb);
nesnic->sq_tail++;
nesnic->sq_tail &= nesnic->sq_size-1;
if (sq_cqes > 128) {
barrier();
- /* restart the queue if it had been stopped */
- if (netif_queue_stopped(nesvnic->netdev))
- netif_wake_queue(nesvnic->netdev);
+ /* restart the queue if it had been stopped */
+ if (netif_queue_stopped(nesvnic->netdev))
+ netif_wake_queue(nesvnic->netdev);
sq_cqes = 0;
}
} else {
@@ -2531,6 +2881,8 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
pci_unmap_single(nesdev->pcidev, bus_address,
nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
+ cb->busaddr = 0;
/* rx_skb->tail = rx_skb->data + rx_pkt_size; */
/* rx_skb->len = rx_pkt_size; */
rx_skb->len = 0; /* TODO: see if this is necessary */
@@ -2557,9 +2909,8 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
if ((cqe_errv &
(NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR |
NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
- if (nesvnic->rx_checksum_disabled == 0) {
+ if (nesvnic->netdev->features & NETIF_F_RXCSUM)
rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
- }
} else
nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
" errv = 0x%X, pkt_type = 0x%X.\n",
@@ -2569,7 +2920,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
if ((cqe_errv &
(NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR |
NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
- if (nesvnic->rx_checksum_disabled == 0) {
+ if (nesvnic->netdev->features & NETIF_F_RXCSUM) {
rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
/* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n",
nesvnic->netdev->name); */
@@ -2583,28 +2934,29 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
- nes_cm_recv(rx_skb, nesvnic->netdev);
- } else {
- if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) {
- vlan_tag = (u16)(le32_to_cpu(
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
- >> 16);
- nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
- nesvnic->netdev->name, vlan_tag);
- if (nes_use_lro)
- lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
- nesvnic->vlan_grp, vlan_tag, NULL);
- else
- nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
- } else {
- if (nes_use_lro)
- lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
- else
- nes_netif_rx(rx_skb);
- }
+ if (nes_cm_recv(rx_skb, nesvnic->netdev))
+ rx_skb = NULL;
}
+ if (rx_skb == NULL)
+ goto skip_rx_indicate0;
+
+
+ if (cqe_misc & NES_NIC_CQE_TAG_VALID) {
+ vlan_tag = (u16)(le32_to_cpu(
+ cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
+ >> 16);
+ nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
+ nesvnic->netdev->name, vlan_tag);
+
+ __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag);
+ }
+ if (nes_use_lro)
+ lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
+ else
+ netif_receive_skb(rx_skb);
- nesvnic->netdev->last_rx = jiffies;
+skip_rx_indicate0:
+ ;
/* nesvnic->netstats.rx_packets++; */
/* nesvnic->netstats.rx_bytes += rx_pkt_size; */
}
@@ -2655,6 +3007,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
}
+
/**
* nes_cqp_ce_handler
*/
@@ -2669,6 +3022,8 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
u32 cq_size;
u32 cqe_count=0;
u32 error_code;
+ u32 opcode;
+ u32 ctx_index;
/* u32 counter; */
head = cq->cq_head;
@@ -2679,12 +3034,9 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
/* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head,
le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */
- if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
- u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
- ((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
- cqp = *((struct nes_hw_cqp **)&u64temp);
+ opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]);
+ if (opcode & NES_CQE_VALID) {
+ cqp = &nesdev->cqp;
error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]);
if (error_code) {
@@ -2693,15 +3045,14 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f,
(u16)(error_code >> 16),
(u16)error_code);
- nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n",
- cqp->qp_id, cqp->sq_head, cqp->sq_tail);
}
- u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
- wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
- ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
- wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
- cqp_request = *((struct nes_cqp_request **)&u64temp);
+ u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
+ cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
+ ((u64)(le32_to_cpu(cq->cq_vbase[head].
+ cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
+
+ cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp;
if (cqp_request) {
if (cqp_request->waiting) {
/* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */
@@ -2747,9 +3098,15 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
cqp_wqe = &nesdev->cqp.sq_vbase[head];
memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
barrier();
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] =
+
+ opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX];
+ if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
+ ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
+ else
+ ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
+ cqp_wqe->wqe_words[ctx_index] =
cpu_to_le32((u32)((unsigned long)cqp_request));
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] =
+ cqp_wqe->wqe_words[ctx_index + 1] =
cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request)));
nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n",
cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
@@ -2765,6 +3122,415 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
nes_read32(nesdev->regs+NES_CQE_ALLOC);
}
+static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
+{
+ if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
+ /* skip over ethernet header */
+ pkt += ETH_HLEN;
+
+ /* Skip over IP and TCP headers */
+ pkt += 4 * (pkt[0] & 0x0f);
+ pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+ }
+ return pkt;
+}
+
+/* Determine if incoming error pkt is rdma layer */
+static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info)
+{
+ u8 *pkt;
+ u16 *mpa;
+ u32 opcode = 0xffffffff;
+
+ if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+ pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+ mpa = (u16 *)locate_mpa(pkt, aeq_info);
+ opcode = be16_to_cpu(mpa[1]) & 0xf;
+ }
+
+ return opcode;
+}
+
+/* Build iWARP terminate header */
+static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info)
+{
+ u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+ u16 ddp_seg_len;
+ int copy_len = 0;
+ u8 is_tagged = 0;
+ u8 flush_code = 0;
+ struct nes_terminate_hdr *termhdr;
+
+ termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase;
+ memset(termhdr, 0, 64);
+
+ if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+
+ /* Use data from offending packet to fill in ddp & rdma hdrs */
+ pkt = locate_mpa(pkt, aeq_info);
+ ddp_seg_len = be16_to_cpu(*(u16 *)pkt);
+ if (ddp_seg_len) {
+ copy_len = 2;
+ termhdr->hdrct = DDP_LEN_FLAG;
+ if (pkt[2] & 0x80) {
+ is_tagged = 1;
+ if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+ copy_len += TERM_DDP_LEN_TAGGED;
+ termhdr->hdrct |= DDP_HDR_FLAG;
+ }
+ } else {
+ if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+ copy_len += TERM_DDP_LEN_UNTAGGED;
+ termhdr->hdrct |= DDP_HDR_FLAG;
+ }
+
+ if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
+ if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
+ copy_len += TERM_RDMA_LEN;
+ termhdr->hdrct |= RDMA_HDR_FLAG;
+ }
+ }
+ }
+ }
+ }
+
+ switch (async_event_id) {
+ case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
+ switch (iwarp_opcode(nesqp, aeq_info)) {
+ case IWARP_OPCODE_WRITE:
+ flush_code = IB_WC_LOC_PROT_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_INV_STAG;
+ break;
+ default:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_STAG;
+ }
+ break;
+ case NES_AEQE_AEID_AMP_INVALID_STAG:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_STAG;
+ break;
+ case NES_AEQE_AEID_AMP_BAD_QP:
+ flush_code = IB_WC_LOC_QP_OP_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_QN;
+ break;
+ case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
+ case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
+ switch (iwarp_opcode(nesqp, aeq_info)) {
+ case IWARP_OPCODE_SEND_INV:
+ case IWARP_OPCODE_SEND_SE_INV:
+ flush_code = IB_WC_REM_OP_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_CANT_INV_STAG;
+ break;
+ default:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_STAG;
+ }
+ break;
+ case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+ if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) {
+ flush_code = IB_WC_LOC_PROT_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_BOUNDS;
+ } else {
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_BOUNDS;
+ }
+ break;
+ case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+ case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_ACCESS;
+ break;
+ case NES_AEQE_AEID_AMP_TO_WRAP:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_TO_WRAP;
+ break;
+ case NES_AEQE_AEID_AMP_BAD_PD:
+ switch (iwarp_opcode(nesqp, aeq_info)) {
+ case IWARP_OPCODE_WRITE:
+ flush_code = IB_WC_LOC_PROT_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_UNASSOC_STAG;
+ break;
+ case IWARP_OPCODE_SEND_INV:
+ case IWARP_OPCODE_SEND_SE_INV:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_CANT_INV_STAG;
+ break;
+ default:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_UNASSOC_STAG;
+ }
+ break;
+ case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+ flush_code = IB_WC_LOC_LEN_ERR;
+ termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+ termhdr->error_code = MPA_MARKER;
+ break;
+ case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+ termhdr->error_code = MPA_CRC;
+ break;
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+ flush_code = IB_WC_LOC_LEN_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+ termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+ break;
+ case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+ case NES_AEQE_AEID_DDP_NO_L_BIT:
+ flush_code = IB_WC_FATAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+ termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+ break;
+ case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+ case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ flush_code = IB_WC_LOC_LEN_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+ flush_code = IB_WC_GENERAL_ERR;
+ if (is_tagged) {
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_INV_DDP_VER;
+ } else {
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER;
+ }
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_MO;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ flush_code = IB_WC_REM_OP_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_QN;
+ break;
+ case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_INV_RDMAP_VER;
+ break;
+ case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+ flush_code = IB_WC_LOC_QP_OP_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_UNEXPECTED_OP;
+ break;
+ default:
+ flush_code = IB_WC_FATAL_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_UNSPECIFIED;
+ break;
+ }
+
+ if (copy_len)
+ memcpy(termhdr + 1, pkt, copy_len);
+
+ if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) {
+ if (aeq_info & NES_AEQE_SQ)
+ nesqp->term_sq_flush_code = flush_code;
+ else
+ nesqp->term_rq_flush_code = flush_code;
+ }
+
+ return sizeof(struct nes_terminate_hdr) + copy_len;
+}
+
+static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp,
+ struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype)
+{
+ u64 context;
+ unsigned long flags;
+ u32 aeq_info;
+ u16 async_event_id;
+ u8 tcp_state;
+ u8 iwarp_state;
+ u32 termlen = 0;
+ u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE |
+ NES_CQP_QP_TERM_DONT_SEND_FIN;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+ if (nesqp->term_flags & NES_TERM_SENT)
+ return; /* Sanity check */
+
+ aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+ iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+ async_event_id = (u16)aeq_info;
+
+ context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
+ aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
+ if (!context) {
+ WARN_ON(!context);
+ return;
+ }
+
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ nesqp->terminate_eventtype = eventtype;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ if (nesadapter->send_term_ok)
+ termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info);
+ else
+ mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG;
+
+ if (!nesdev->iw_status) {
+ nesqp->term_flags = NES_TERM_DONE;
+ nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_ERROR, 0, 0);
+ nes_cm_disconn(nesqp);
+ } else {
+ nes_terminate_start_timer(nesqp);
+ nesqp->term_flags |= NES_TERM_SENT;
+ nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0);
+ }
+}
+
+static void nes_terminate_send_fin(struct nes_device *nesdev,
+ struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+ u32 aeq_info;
+ u16 async_event_id;
+ u8 tcp_state;
+ u8 iwarp_state;
+ unsigned long flags;
+
+ aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+ iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+ async_event_id = (u16)aeq_info;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ /* Send the fin only */
+ nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE |
+ NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0);
+}
+
+/* Cleanup after a terminate sent or received */
+static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred)
+{
+ u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+ unsigned long flags;
+ struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ u8 first_time = 0;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+ if (nesqp->hte_added) {
+ nesqp->hte_added = 0;
+ next_iwarp_state |= NES_CQP_QP_DEL_HTE;
+ }
+
+ first_time = (nesqp->term_flags & NES_TERM_DONE) == 0;
+ nesqp->term_flags |= NES_TERM_DONE;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ /* Make sure we go through this only once */
+ if (first_time) {
+ if (timeout_occurred == 0)
+ del_timer(&nesqp->terminate_timer);
+ else
+ next_iwarp_state |= NES_CQP_QP_RESET;
+
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+ nes_cm_disconn(nesqp);
+ }
+}
+
+static void nes_terminate_received(struct nes_device *nesdev,
+ struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+ u32 aeq_info;
+ u8 *pkt;
+ u32 *mpa;
+ u8 ddp_ctl;
+ u8 rdma_ctl;
+ u16 aeq_id = 0;
+
+ aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+ /* Terminate is not a performance path so the silicon */
+ /* did not validate the frame - do it now */
+ pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+ mpa = (u32 *)locate_mpa(pkt, aeq_info);
+ ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff;
+ rdma_ctl = be32_to_cpu(mpa[0]) & 0xff;
+ if ((ddp_ctl & 0xc0) != 0x40)
+ aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC;
+ else if ((ddp_ctl & 0x03) != 1)
+ aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION;
+ else if (be32_to_cpu(mpa[2]) != 2)
+ aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN;
+ else if (be32_to_cpu(mpa[3]) != 1)
+ aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN;
+ else if (be32_to_cpu(mpa[4]) != 0)
+ aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO;
+ else if ((rdma_ctl & 0xc0) != 0x40)
+ aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+ if (aeq_id) {
+ /* Bad terminate recvd - send back a terminate */
+ aeq_info = (aeq_info & 0xffff0000) | aeq_id;
+ aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
+ nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+ return;
+ }
+ }
+
+ nesqp->term_flags |= NES_TERM_RCVD;
+ nesqp->terminate_eventtype = IB_EVENT_QP_FATAL;
+ nes_terminate_start_timer(nesqp);
+ nes_terminate_send_fin(nesdev, nesqp, aeqe);
+}
+
+/* Timeout routine in case terminate fails to complete */
+void nes_terminate_timeout(unsigned long context)
+{
+ struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
+
+ nes_terminate_done(nesqp, 1);
+}
+
+/* Set a timer in case hw cannot complete the terminate sequence */
+static void nes_terminate_start_timer(struct nes_qp *nesqp)
+{
+ mod_timer(&nesqp->terminate_timer, (jiffies + HZ));
+}
/**
* nes_process_iwarp_aeqe
@@ -2773,33 +3539,33 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
struct nes_hw_aeqe *aeqe)
{
u64 context;
- u64 aeqe_context = 0;
unsigned long flags;
struct nes_qp *nesqp;
+ struct nes_hw_cq *hw_cq;
+ struct nes_cq *nescq;
int resource_allocated;
- /* struct iw_cm_id *cm_id; */
struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct ib_event ibevent;
- /* struct iw_cm_event cm_event; */
u32 aeq_info;
u32 next_iwarp_state = 0;
+ u32 aeqe_cq_id;
u16 async_event_id;
u8 tcp_state;
u8 iwarp_state;
+ struct ib_event ibevent;
nes_debug(NES_DBG_AEQ, "\n");
aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
+ if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) {
context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
} else {
- aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
- aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
BUG_ON(!context);
}
+ /* context is nesqp unless async_event_id == CQ ERROR */
+ nesqp = (struct nes_qp *)(unsigned long)context;
async_event_id = (u16)aeq_info;
tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
@@ -2809,12 +3575,33 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
+ aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
+ if (aeq_info & NES_AEQE_QP) {
+ if (!nes_is_resource_allocated(nesadapter,
+ nesadapter->allocated_qps,
+ aeqe_cq_id))
+ return;
+ }
+
switch (async_event_id) {
case NES_AEQE_AEID_LLP_FIN_RECEIVED:
- nesqp = *((struct nes_qp **)&context);
+ if (nesqp->term_flags)
+ return; /* Ignore it, wait for close complete */
+
if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
+ if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) &&
+ (nesqp->ibqp_state == IB_QPS_RTS)) {
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+ nes_cm_disconn(nesqp);
+ }
nesqp->cm_id->add_ref(nesqp->cm_id);
- nes_add_ref(&nesqp->ibqp);
schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
NES_TIMER_TYPE_CLOSE, 1, 0);
nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
@@ -2823,196 +3610,115 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
async_event_id, nesqp->last_aeq, tcp_state);
}
- if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- (nesqp->ibqp_state != IB_QPS_RTS)) {
- /* FIN Received but tcp state or IB state moved on,
- should expect a close complete */
- return;
- }
- case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
- case NES_AEQE_AEID_LLP_CONNECTION_RESET:
- case NES_AEQE_AEID_TERMINATE_SENT:
- case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
- case NES_AEQE_AEID_RESET_SENT:
- nesqp = *((struct nes_qp **)&context);
- if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
- tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- }
- nes_add_ref(&nesqp->ibqp);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
-
- if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
- (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
- nesqp->hte_added = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n",
- nesqp->hwqp.qp_id);
- nes_hw_modify_qp(nesdev, nesqp,
- NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0);
- spin_lock_irqsave(&nesqp->lock, flags);
- }
-
- if ((nesqp->ibqp_state == IB_QPS_RTS) &&
- ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- switch (nesqp->hw_iwarp_state) {
- case NES_AEQE_IWARP_STATE_RTS:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
- break;
- case NES_AEQE_IWARP_STATE_TERMINATE:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
- if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
- next_iwarp_state |= 0x02000000;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- }
- break;
- default:
- next_iwarp_state = 0;
- }
- spin_unlock_irqrestore(&nesqp->lock, flags);
- if (next_iwarp_state) {
- nes_add_ref(&nesqp->ibqp);
- nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
- " also added another reference\n",
- nesqp->hwqp.qp_id, next_iwarp_state);
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
- }
- nes_cm_disconn(nesqp);
- } else {
- if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) {
- /* FIN Received but ib state not RTS,
- close complete will be on its way */
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_rem_ref(&nesqp->ibqp);
- return;
- }
- spin_unlock_irqrestore(&nesqp->lock, flags);
- if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
- " also added another reference\n",
- nesqp->hwqp.qp_id, next_iwarp_state);
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
- }
- nes_cm_disconn(nesqp);
- }
break;
- case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
- nesqp = *((struct nes_qp **)&context);
+ case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = iwarp_state;
nesqp->hw_tcp_state = tcp_state;
nesqp->last_aeq = async_event_id;
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED"
- " event on QP%u \n Q2 Data:\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- ((nesqp->ibqp_state == IB_QPS_RTS)&&
- (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- nes_add_ref(&nesqp->ibqp);
- nes_cm_disconn(nesqp);
- } else {
- nesqp->in_disconnect = 0;
- wake_up(&nesqp->kick_waitq);
- }
- break;
- case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
- nesqp = *((struct nes_qp **)&context);
- nes_add_ref(&nesqp->ibqp);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = async_event_id;
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
- " event on QP%u, remote IP = 0x%08X \n",
- nesqp->hwqp.qp_id,
- ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr));
- } else {
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
- " event on QP%u \n",
- nesqp->hwqp.qp_id);
- }
- spin_unlock_irqrestore(&nesqp->lock, flags);
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET;
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
+ nes_cm_disconn(nesqp);
break;
- case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
- if (NES_AEQE_INBOUND_RDMA&aeq_info) {
- nesqp = nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
- } else {
- /* TODO: get the actual WQE and mask off wqe index */
- context &= ~((u64)511);
- nesqp = *((struct nes_qp **)&context);
- }
+
+ case NES_AEQE_AEID_RESET_SENT:
+ tcp_state = NES_AEQE_TCP_STATE_CLOSED;
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = iwarp_state;
nesqp->hw_tcp_state = tcp_state;
nesqp->last_aeq = async_event_id;
+ nesqp->hte_added = 0;
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+ nes_cm_disconn(nesqp);
break;
- case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
- nesqp = *((struct nes_qp **)&context);
+
+ case NES_AEQE_AEID_LLP_CONNECTION_RESET:
+ if (atomic_read(&nesqp->close_timer_started))
+ return;
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = iwarp_state;
nesqp->hw_tcp_state = tcp_state;
nesqp->last_aeq = async_event_id;
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
+ nes_cm_disconn(nesqp);
+ break;
+
+ case NES_AEQE_AEID_TERMINATE_SENT:
+ nes_terminate_send_fin(nesdev, nesqp, aeqe);
break;
+
+ case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
+ nes_terminate_received(nesdev, nesqp, aeqe);
+ break;
+
+ case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
+ case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
+ case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
+ case NES_AEQE_AEID_AMP_INVALID_STAG:
+ case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+ case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
- nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words
- [NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u,"
- " nesqp = %p, AE reported %p\n",
- nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context));
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+ case NES_AEQE_AEID_AMP_TO_WRAP:
+ printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n",
+ nesqp->hwqp.qp_id, async_event_id);
+ nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
+ break;
+
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+ if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
+ aeq_info &= 0xffff0000;
+ aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
+ aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
}
+
+ case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
+ case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+ case NES_AEQE_AEID_AMP_BAD_QP:
+ case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+ case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+ case NES_AEQE_AEID_DDP_NO_L_BIT:
+ case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+ case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+ case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+ case NES_AEQE_AEID_AMP_BAD_PD:
+ case NES_AEQE_AEID_AMP_FASTREG_SHARED:
+ case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG:
+ case NES_AEQE_AEID_AMP_FASTREG_MW_STAG:
+ case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS:
+ case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW:
+ case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH:
+ case NES_AEQE_AEID_AMP_INVALIDATE_SHARED:
+ case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS:
+ case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS:
+ case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS:
+ case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT:
+ case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED:
+ case NES_AEQE_AEID_BAD_CLOSE:
+ case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO:
+ case NES_AEQE_AEID_STAG_ZERO_INVALID:
+ case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
+ case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+ printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
+ nesqp->hwqp.qp_id, async_event_id);
+ print_ip(nesqp->cm_node);
+ if (!atomic_read(&nesqp->close_timer_started))
+ nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
break;
+
case NES_AEQE_AEID_CQ_OPERATION_ERROR:
context <<= 1;
nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
@@ -3022,70 +3728,19 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
if (resource_allocated) {
printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
__func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+ hw_cq = (struct nes_hw_cq *)(unsigned long)context;
+ if (hw_cq) {
+ nescq = container_of(hw_cq, struct nes_cq, hw_cq);
+ if (nescq->ibcq.event_handler) {
+ ibevent.device = nescq->ibcq.device;
+ ibevent.event = IB_EVENT_CQ_ERR;
+ ibevent.element.cq = &nescq->ibcq;
+ nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context);
+ }
+ }
}
break;
- case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- nesqp = nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG"
- "_FOR_AVAILABLE_BUFFER event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- /* tell cm to disconnect, cm will queue work to thread */
- nes_add_ref(&nesqp->ibqp);
- nes_cm_disconn(nesqp);
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN"
- "_NO_BUFFER_AVAILABLE event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- /* tell cm to disconnect, cm will queue work to thread */
- nes_add_ref(&nesqp->ibqp);
- nes_cm_disconn(nesqp);
- break;
- case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR"
- " event on QP%u \n Q2 Data:\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- /* tell cm to disconnect, cm will queue work to thread */
- nes_add_ref(&nesqp->ibqp);
- nes_cm_disconn(nesqp);
- break;
- /* TODO: additional AEs need to be here */
+
default:
nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
async_event_id);
@@ -3094,7 +3749,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
}
-
/**
* nes_iwarp_ce_handler
*/
@@ -3229,6 +3883,8 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
{
struct nes_cqp_request *cqp_request;
struct nes_hw_cqp_wqe *cqp_wqe;
+ u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
+ u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
int ret;
cqp_request = nes_get_cqp_request(nesdev);
@@ -3245,6 +3901,24 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
cqp_wqe = &cqp_request->cqp_wqe;
nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ /* If wqe in error was identified, set code to be put into cqe */
+ if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) {
+ which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+ sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code;
+ nesqp->term_sq_flush_code = 0;
+ }
+
+ if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) {
+ which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+ rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code;
+ nesqp->term_rq_flush_code = 0;
+ }
+
+ if (which_wq & NES_CQP_FLUSH_MAJ_MIN) {
+ cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code);
+ cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code);
+ }
+
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 7b81e0ae007..d748e4b31b8 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1,5 +1,5 @@
/*
-* Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -35,13 +35,22 @@
#include <linux/inet_lro.h>
+#define NES_PHY_TYPE_CX4 1
#define NES_PHY_TYPE_1G 2
-#define NES_PHY_TYPE_IRIS 3
#define NES_PHY_TYPE_ARGUS 4
#define NES_PHY_TYPE_PUMA_1G 5
#define NES_PHY_TYPE_PUMA_10G 6
+#define NES_PHY_TYPE_GLADIUS 7
+#define NES_PHY_TYPE_SFP_D 8
+#define NES_PHY_TYPE_KR 9
#define NES_MULTICAST_PF_MAX 8
+#define NES_A0 3
+
+#define NES_ENABLE_PAU 0x07000001
+#define NES_DISABLE_PAU 0x07000000
+#define NES_PAU_COUNTER 10
+#define NES_CQP_OPCODE_MASK 0x3f
enum pci_regs {
NES_INT_STAT = 0x0000,
@@ -60,6 +69,7 @@ enum pci_regs {
NES_CQ_ACK = 0x0034,
NES_WQE_ALLOC = 0x0040,
NES_CQE_ALLOC = 0x0044,
+ NES_AEQ_ALLOC = 0x0048
};
enum indexed_regs {
@@ -68,8 +78,10 @@ enum indexed_regs {
NES_IDX_QP_CONTROL = 0x0040,
NES_IDX_FLM_CONTROL = 0x0080,
NES_IDX_INT_CPU_STATUS = 0x00a0,
+ NES_IDX_GPR_TRIGGER = 0x00bc,
NES_IDX_GPIO_CONTROL = 0x00f0,
NES_IDX_GPIO_DATA = 0x00f4,
+ NES_IDX_GPR2 = 0x010c,
NES_IDX_TCP_CONFIG0 = 0x01e4,
NES_IDX_TCP_TIMER_CONFIG = 0x01ec,
NES_IDX_TCP_NOW = 0x01f0,
@@ -156,6 +168,8 @@ enum indexed_regs {
NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004,
NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008,
NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c,
+ NES_IDX_WQM_CONFIG0 = 0x5000,
+ NES_IDX_WQM_CONFIG1 = 0x5004,
NES_IDX_CM_CONFIG = 0x5100,
NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000,
NES_IDX_NIC_PHYPORT_TO_USW = 0x6008,
@@ -195,6 +209,7 @@ enum nes_cqp_opcodes {
NES_CQP_REGISTER_SHARED_STAG = 0x0c,
NES_CQP_DEALLOCATE_STAG = 0x0d,
NES_CQP_MANAGE_ARP_CACHE = 0x0f,
+ NES_CQP_DOWNLOAD_SEGMENT = 0x10,
NES_CQP_SUSPEND_QPS = 0x11,
NES_CQP_UPLOAD_CONTEXT = 0x13,
NES_CQP_CREATE_CEQ = 0x16,
@@ -203,7 +218,8 @@ enum nes_cqp_opcodes {
NES_CQP_DESTROY_AEQ = 0x1b,
NES_CQP_LMI_ACCESS = 0x20,
NES_CQP_FLUSH_WQES = 0x22,
- NES_CQP_MANAGE_APBVT = 0x23
+ NES_CQP_MANAGE_APBVT = 0x23,
+ NES_CQP_MANAGE_QUAD_HASH = 0x25
};
enum nes_cqp_wqe_word_idx {
@@ -215,6 +231,14 @@ enum nes_cqp_wqe_word_idx {
NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5,
};
+enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */
+ NES_CQP_WQE_DL_OPCODE_IDX = 0,
+ NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1,
+ NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2,
+ NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3
+ /* For index values 4-15 use NES_NIC_SQ_WQE_ values */
+};
+
enum nes_cqp_cq_wqeword_idx {
NES_CQP_CQ_WQE_PBL_LOW_IDX = 6,
NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7,
@@ -235,7 +259,9 @@ enum nes_cqp_stag_wqeword_idx {
NES_CQP_STAG_WQE_PBL_LEN_IDX = 14
};
+#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26
#define NES_CQP_OP_IWARP_STATE_SHIFT 28
+#define NES_CQP_OP_TERMLEN_SHIFT 28
enum nes_cqp_qp_bits {
NES_CQP_QP_ARP_VALID = (1<<8),
@@ -260,12 +286,16 @@ enum nes_cqp_qp_bits {
NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24),
+ NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25),
NES_CQP_QP_RESET = (1<<31),
};
enum nes_cqp_qp_wqe_word_idx {
NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
+ NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8,
+ NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9,
NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
};
@@ -356,6 +386,7 @@ enum nes_cqp_arp_bits {
enum nes_cqp_flush_bits {
NES_CQP_FLUSH_SQ = (1<<30),
NES_CQP_FLUSH_RQ = (1<<31),
+ NES_CQP_FLUSH_MAJ_MIN = (1<<28),
};
enum nes_cqe_opcode_bits {
@@ -535,11 +566,23 @@ enum nes_iwarp_sq_fmr_wqe_word_idx {
NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
};
+enum nes_iwarp_sq_fmr_opcodes {
+ NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6),
+ NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7),
+ NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
+ NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20),
+};
+
+#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF;
+
enum nes_iwarp_sq_locinv_wqe_word_idx {
NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
};
-
enum nes_iwarp_rq_wqe_word_idx {
NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
@@ -574,6 +617,7 @@ enum nes_nic_sq_wqe_bits {
enum nes_nic_cqe_word_idx {
NES_NIC_CQE_ACCQP_ID_IDX = 0,
+ NES_NIC_CQE_HASH_RCVNXT = 1,
NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2,
NES_NIC_CQE_MISC_IDX = 3,
};
@@ -628,11 +672,14 @@ enum nes_aeqe_bits {
NES_AEQE_INBOUND_RDMA = (1<<19),
NES_AEQE_IWARP_STATE_MASK = (7<<20),
NES_AEQE_TCP_STATE_MASK = (0xf<<24),
+ NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28),
NES_AEQE_VALID = (1<<31),
};
#define NES_AEQE_IWARP_STATE_SHIFT 20
#define NES_AEQE_TCP_STATE_SHIFT 24
+#define NES_AEQE_Q2_DATA_ETHERNET (1<<28)
+#define NES_AEQE_Q2_DATA_MPA (1<<29)
enum nes_aeqe_iwarp_state {
NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
@@ -746,6 +793,15 @@ enum nes_iwarp_sq_wqe_bits {
NES_IWARP_SQ_OP_NOP = 12,
};
+enum nes_iwarp_cqe_major_code {
+ NES_IWARP_CQE_MAJOR_FLUSH = 1,
+ NES_IWARP_CQE_MAJOR_DRV = 0x8000
+};
+
+enum nes_iwarp_cqe_minor_code {
+ NES_IWARP_CQE_MINOR_FLUSH = 1
+};
+
#define NES_EEPROM_READ_REQUEST (1<<16)
#define NES_MAC_ADDR_VALID (1<<20)
@@ -873,7 +929,6 @@ struct nes_hw_nic {
u8 replenishing_rq;
u8 reserved;
- spinlock_t sq_lock;
spinlock_t rq_lock;
};
@@ -967,6 +1022,12 @@ struct nes_arp_entry {
#define DEFAULT_JUMBO_NES_QL_TARGET 40
#define DEFAULT_JUMBO_NES_QL_HIGH 128
#define NES_NIC_CQ_DOWNWARD_TREND 16
+#define NES_PFT_SIZE 48
+
+#define NES_MGT_WQ_COUNT 32
+#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32))
+#define NES_MGT_QP_OFFSET 36
+#define NES_MGT_QP_COUNT 4
struct nes_hw_tune_timer {
/* u16 cq_count; */
@@ -1064,11 +1125,12 @@ struct nes_adapter {
u32 wqm_wat;
u32 core_clock;
u32 firmware_version;
+ u32 eeprom_version;
u32 nic_rx_eth_route_err;
u32 et_rx_coalesce_usecs;
- u32 et_rx_max_coalesced_frames;
+ u32 et_rx_max_coalesced_frames;
u32 et_rx_coalesce_usecs_irq;
u32 et_rx_max_coalesced_frames_irq;
u32 et_pkt_rate_low;
@@ -1079,6 +1141,8 @@ struct nes_adapter {
u32 et_rx_max_coalesced_frames_high;
u32 et_rate_sample_interval;
u32 timer_int_limit;
+ u32 wqm_quanta;
+ u8 allow_unaligned_fpdus;
/* Adapter base MAC address */
u32 mac_addr_low;
@@ -1094,12 +1158,14 @@ struct nes_adapter {
u16 pd_config_base[4];
u16 link_interrupt_count[4];
+ u8 crit_error_count[32];
/* the phy index for each port */
u8 phy_index[4];
u8 mac_sw_state[4];
u8 mac_link_down[4];
u8 phy_type[4];
+ u8 log_port;
/* PCI information */
unsigned int devfn;
@@ -1111,8 +1177,10 @@ struct nes_adapter {
u8 netdev_max; /* from host nic address count in EEPROM */
u8 port_count;
u8 virtwq;
+ u8 send_term_ok;
u8 et_use_adaptive_rx_coalesce;
u8 adapter_fcn_count;
+ u8 pft_mcast_map[NES_PFT_SIZE];
};
struct nes_pbl {
@@ -1125,6 +1193,19 @@ struct nes_pbl {
/* TODO: need to add list for two level tables */
};
+#define NES_4K_PBL_CHUNK_SIZE 4096
+
+struct nes_fast_mr_wqe_pbl {
+ u64 *kva;
+ dma_addr_t paddr;
+};
+
+struct nes_ib_fast_reg_page_list {
+ struct ib_fast_reg_page_list ibfrpl;
+ struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
+ u64 pbl;
+};
+
struct nes_listener {
struct work_struct work;
struct workqueue_struct *wq;
@@ -1137,10 +1218,11 @@ struct nes_listener {
struct nes_ib_device;
+#define NES_EVENT_DELAY msecs_to_jiffies(100)
+
struct nes_vnic {
struct nes_ib_device *nesibdev;
u64 sq_full;
- u64 sq_locked;
u64 tso_requests;
u64 segmented_tso_requests;
u64 linearized_skbs;
@@ -1154,7 +1236,6 @@ struct nes_vnic {
/* void *mem; */
struct nes_device *nesdev;
struct net_device *netdev;
- struct vlan_group *vlan_grp;
atomic_t rx_skbs_needed;
atomic_t rx_skb_timer_running;
int budget;
@@ -1188,10 +1269,21 @@ struct nes_vnic {
u8 next_qp_nic_index;
u8 of_device_registered;
u8 rdma_enabled;
- u8 rx_checksum_disabled;
u32 lro_max_aggr;
struct net_lro_mgr lro_mgr;
struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
+ struct timer_list event_timer;
+ enum ib_event_type delayed_event;
+ enum ib_event_type last_dispatched_event;
+ spinlock_t port_ibevent_lock;
+ u32 mgt_mem_size;
+ void *mgt_vbase;
+ dma_addr_t mgt_pbase;
+ struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT];
+ struct task_struct *mgt_thread;
+ wait_queue_head_t mgt_wait_queue;
+ struct sk_buff_head mgt_skb_list;
+
};
struct nes_ib_device {
@@ -1209,7 +1301,92 @@ struct nes_ib_device {
u32 num_pd;
};
-#define nes_vlan_rx vlan_hwaccel_receive_skb
-#define nes_netif_rx netif_receive_skb
+enum nes_hdrct_flags {
+ DDP_LEN_FLAG = 0x80,
+ DDP_HDR_FLAG = 0x40,
+ RDMA_HDR_FLAG = 0x20
+};
+
+enum nes_term_layers {
+ LAYER_RDMA = 0,
+ LAYER_DDP = 1,
+ LAYER_MPA = 2
+};
+
+enum nes_term_error_types {
+ RDMAP_CATASTROPHIC = 0,
+ RDMAP_REMOTE_PROT = 1,
+ RDMAP_REMOTE_OP = 2,
+ DDP_CATASTROPHIC = 0,
+ DDP_TAGGED_BUFFER = 1,
+ DDP_UNTAGGED_BUFFER = 2,
+ DDP_LLP = 3
+};
+
+enum nes_term_rdma_errors {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_INV_BOUNDS = 0x01,
+ RDMAP_ACCESS = 0x02,
+ RDMAP_UNASSOC_STAG = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_RDMAP_VER = 0x05,
+ RDMAP_UNEXPECTED_OP = 0x06,
+ RDMAP_CATASTROPHIC_LOCAL = 0x07,
+ RDMAP_CATASTROPHIC_GLOBAL = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum nes_term_ddp_errors {
+ DDP_CATASTROPHIC_LOCAL = 0x00,
+ DDP_TAGGED_INV_STAG = 0x00,
+ DDP_TAGGED_BOUNDS = 0x01,
+ DDP_TAGGED_UNASSOC_STAG = 0x02,
+ DDP_TAGGED_TO_WRAP = 0x03,
+ DDP_TAGGED_INV_DDP_VER = 0x04,
+ DDP_UNTAGGED_INV_QN = 0x01,
+ DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
+ DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
+ DDP_UNTAGGED_INV_MO = 0x04,
+ DDP_UNTAGGED_INV_TOO_LONG = 0x05,
+ DDP_UNTAGGED_INV_DDP_VER = 0x06
+};
+
+enum nes_term_mpa_errors {
+ MPA_CLOSED = 0x01,
+ MPA_CRC = 0x02,
+ MPA_MARKER = 0x03,
+ MPA_REQ_RSP = 0x04,
+};
+
+struct nes_terminate_hdr {
+ u8 layer_etype;
+ u8 error_code;
+ u8 hdrct;
+ u8 rsvd;
+};
+
+/* Used to determine how to fill in terminate error codes */
+#define IWARP_OPCODE_WRITE 0
+#define IWARP_OPCODE_READREQ 1
+#define IWARP_OPCODE_READRSP 2
+#define IWARP_OPCODE_SEND 3
+#define IWARP_OPCODE_SEND_INV 4
+#define IWARP_OPCODE_SEND_SE 5
+#define IWARP_OPCODE_SEND_SE_INV 6
+#define IWARP_OPCODE_TERM 7
+
+/* These values are used only during terminate processing */
+#define TERM_DDP_LEN_TAGGED 14
+#define TERM_DDP_LEN_UNTAGGED 18
+#define TERM_RDMA_LEN 28
+#define RDMA_OPCODE_MASK 0x0f
+#define RDMA_READ_REQ_OPCODE 1
+#define BAD_FRAME_OFFSET 64
+#define CQE_MAJOR_DRV 0x8000
+
+/* Used for link status recheck after interrupt processing */
+#define NES_LINK_RECHECK_DELAY msecs_to_jiffies(50)
+#define NES_LINK_RECHECK_MAX 60
#endif /* __NES_HW_H */
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
new file mode 100644
index 00000000000..416645259b0
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -0,0 +1,1160 @@
+/*
+ * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/kthread.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+#include "nes.h"
+#include "nes_mgt.h"
+
+atomic_t pau_qps_created;
+atomic_t pau_qps_destroyed;
+
+static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic)
+{
+ unsigned long flags;
+ dma_addr_t bus_address;
+ struct sk_buff *skb;
+ struct nes_hw_nic_rq_wqe *nic_rqe;
+ struct nes_hw_mgt *nesmgt;
+ struct nes_device *nesdev;
+ struct nes_rskb_cb *cb;
+ u32 rx_wqes_posted = 0;
+
+ nesmgt = &mgtvnic->mgt;
+ nesdev = mgtvnic->nesvnic->nesdev;
+ spin_lock_irqsave(&nesmgt->rq_lock, flags);
+ if (nesmgt->replenishing_rq != 0) {
+ if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
+ (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
+ atomic_set(&mgtvnic->rx_skb_timer_running, 1);
+ spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+ mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
+ add_timer(&mgtvnic->rq_wqes_timer);
+ } else {
+ spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+ }
+ return;
+ }
+ nesmgt->replenishing_rq = 1;
+ spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+ do {
+ skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size);
+ if (skb) {
+ skb->dev = mgtvnic->nesvnic->netdev;
+
+ bus_address = pci_map_single(nesdev->pcidev,
+ skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ cb->busaddr = bus_address;
+ cb->maplen = mgtvnic->nesvnic->max_frame_size;
+
+ nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head];
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
+ cpu_to_le32(mgtvnic->nesvnic->max_frame_size);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
+ cpu_to_le32((u32)bus_address);
+ nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
+ cpu_to_le32((u32)((u64)bus_address >> 32));
+ nesmgt->rx_skb[nesmgt->rq_head] = skb;
+ nesmgt->rq_head++;
+ nesmgt->rq_head &= nesmgt->rq_size - 1;
+ atomic_dec(&mgtvnic->rx_skbs_needed);
+ barrier();
+ if (++rx_wqes_posted == 255) {
+ nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
+ rx_wqes_posted = 0;
+ }
+ } else {
+ spin_lock_irqsave(&nesmgt->rq_lock, flags);
+ if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
+ (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
+ atomic_set(&mgtvnic->rx_skb_timer_running, 1);
+ spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+ mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
+ add_timer(&mgtvnic->rq_wqes_timer);
+ } else {
+ spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+ }
+ break;
+ }
+ } while (atomic_read(&mgtvnic->rx_skbs_needed));
+ barrier();
+ if (rx_wqes_posted)
+ nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
+ nesmgt->replenishing_rq = 0;
+}
+
+/**
+ * nes_mgt_rq_wqes_timeout
+ */
+static void nes_mgt_rq_wqes_timeout(unsigned long parm)
+{
+ struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm;
+
+ atomic_set(&mgtvnic->rx_skb_timer_running, 0);
+ if (atomic_read(&mgtvnic->rx_skbs_needed))
+ nes_replenish_mgt_rq(mgtvnic);
+}
+
+/**
+ * nes_mgt_free_skb - unmap and free skb
+ */
+static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir)
+{
+ struct nes_rskb_cb *cb;
+
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir);
+ cb->busaddr = 0;
+ dev_kfree_skb_any(skb);
+}
+
+/**
+ * nes_download_callback - handle download completions
+ */
+static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
+{
+ struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer;
+ struct nes_qp *nesqp = fpdu_info->nesqp;
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < fpdu_info->frag_cnt; i++) {
+ skb = fpdu_info->frags[i].skb;
+ if (fpdu_info->frags[i].cmplt) {
+ nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
+ nes_rem_ref_cm_node(nesqp->cm_node);
+ }
+ }
+
+ if (fpdu_info->hdr_vbase)
+ pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len,
+ fpdu_info->hdr_vbase, fpdu_info->hdr_pbase);
+ kfree(fpdu_info);
+}
+
+/**
+ * nes_get_seq - Get the seq, ack_seq and window from the packet
+ */
+static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
+{
+ struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0];
+ struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
+ struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+
+ *ack = be32_to_cpu(tcph->ack_seq);
+ *wnd = be16_to_cpu(tcph->window);
+ *fin_rcvd = tcph->fin;
+ *rst_rcvd = tcph->rst;
+ return be32_to_cpu(tcph->seq);
+}
+
+/**
+ * nes_get_next_skb - Get the next skb based on where current skb is in the queue
+ */
+static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp,
+ struct sk_buff *skb, u32 nextseq, u32 *ack,
+ u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
+{
+ u32 seq;
+ bool processacks;
+ struct sk_buff *old_skb;
+
+ if (skb) {
+ /* Continue processing fpdu */
+ if (skb->next == (struct sk_buff *)&nesqp->pau_list)
+ goto out;
+ skb = skb->next;
+ processacks = false;
+ } else {
+ /* Starting a new one */
+ if (skb_queue_empty(&nesqp->pau_list))
+ goto out;
+ skb = skb_peek(&nesqp->pau_list);
+ processacks = true;
+ }
+
+ while (1) {
+ if (skb_queue_empty(&nesqp->pau_list))
+ goto out;
+
+ seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
+ if (seq == nextseq) {
+ if (skb->len || processacks)
+ break;
+ } else if (after(seq, nextseq)) {
+ goto out;
+ }
+
+ old_skb = skb;
+ skb = skb->next;
+ skb_unlink(old_skb, &nesqp->pau_list);
+ nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
+ nes_rem_ref_cm_node(nesqp->cm_node);
+ if (skb == (struct sk_buff *)&nesqp->pau_list)
+ goto out;
+ }
+ return skb;
+
+out:
+ return NULL;
+}
+
+/**
+ * get_fpdu_info - Find the next complete fpdu and return its fragments.
+ */
+static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
+ struct pau_fpdu_info **pau_fpdu_info)
+{
+ struct sk_buff *skb;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ struct nes_rskb_cb *cb;
+ struct pau_fpdu_info *fpdu_info = NULL;
+ struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
+ u32 fpdu_len = 0;
+ u32 tmp_len;
+ int frag_cnt = 0;
+ u32 tot_len;
+ u32 frag_tot;
+ u32 ack;
+ u32 fin_rcvd;
+ u32 rst_rcvd;
+ u16 wnd;
+ int i;
+ int rc = 0;
+
+ *pau_fpdu_info = NULL;
+
+ skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
+ if (!skb)
+ goto out;
+
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ if (skb->len) {
+ fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
+ fpdu_len = (fpdu_len + 3) & 0xfffffffc;
+ tmp_len = fpdu_len;
+
+ /* See if we have all of the fpdu */
+ frag_tot = 0;
+ memset(&frags, 0, sizeof frags);
+ for (i = 0; i < MAX_FPDU_FRAGS; i++) {
+ frags[i].physaddr = cb->busaddr;
+ frags[i].physaddr += skb->data - cb->data_start;
+ frags[i].frag_len = min(tmp_len, skb->len);
+ frags[i].skb = skb;
+ frags[i].cmplt = (skb->len == frags[i].frag_len);
+ frag_tot += frags[i].frag_len;
+ frag_cnt++;
+
+ tmp_len -= frags[i].frag_len;
+ if (tmp_len == 0)
+ break;
+
+ skb = nes_get_next_skb(nesdev, nesqp, skb,
+ nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
+ if (!skb)
+ goto out;
+ if (rst_rcvd) {
+ /* rst received in the middle of fpdu */
+ for (; i >= 0; i--) {
+ skb_unlink(frags[i].skb, &nesqp->pau_list);
+ nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE);
+ }
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ frags[0].physaddr = cb->busaddr;
+ frags[0].physaddr += skb->data - cb->data_start;
+ frags[0].frag_len = skb->len;
+ frags[0].skb = skb;
+ frags[0].cmplt = true;
+ frag_cnt = 1;
+ break;
+ }
+
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ }
+ } else {
+ /* no data */
+ frags[0].physaddr = cb->busaddr;
+ frags[0].frag_len = 0;
+ frags[0].skb = skb;
+ frags[0].cmplt = true;
+ frag_cnt = 1;
+ }
+
+ /* Found one */
+ fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
+ if (fpdu_info == NULL) {
+ nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ fpdu_info->cqp_request = nes_get_cqp_request(nesdev);
+ if (fpdu_info->cqp_request == NULL) {
+ nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0];
+ iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
+ tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+ fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start;
+ fpdu_info->data_len = fpdu_len;
+ tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN;
+
+ if (frags[0].cmplt) {
+ fpdu_info->hdr_pbase = cb->busaddr;
+ fpdu_info->hdr_vbase = NULL;
+ } else {
+ fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev,
+ fpdu_info->hdr_len, &fpdu_info->hdr_pbase);
+ if (!fpdu_info->hdr_vbase) {
+ nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Copy hdrs, adjusting len and seqnum */
+ memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len);
+ iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN);
+ tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+ }
+
+ iph->tot_len = cpu_to_be16(tot_len);
+ iph->saddr = cpu_to_be32(0x7f000001);
+
+ tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
+ tcph->ack_seq = cpu_to_be32(ack);
+ tcph->window = cpu_to_be16(wnd);
+
+ nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd;
+
+ memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags));
+ fpdu_info->frag_cnt = frag_cnt;
+ fpdu_info->nesqp = nesqp;
+ *pau_fpdu_info = fpdu_info;
+
+ /* Update skb's for next pass */
+ for (i = 0; i < frag_cnt; i++) {
+ cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0];
+ skb_pull(frags[i].skb, frags[i].frag_len);
+
+ if (frags[i].skb->len == 0) {
+ /* Pull skb off the list - it will be freed in the callback */
+ if (!skb_queue_empty(&nesqp->pau_list))
+ skb_unlink(frags[i].skb, &nesqp->pau_list);
+ } else {
+ /* Last skb still has data so update the seq */
+ iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
+ tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+ tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
+ }
+ }
+
+out:
+ if (rc) {
+ if (fpdu_info) {
+ if (fpdu_info->cqp_request)
+ nes_put_cqp_request(nesdev, fpdu_info->cqp_request);
+ kfree(fpdu_info);
+ }
+ }
+ return rc;
+}
+
+/**
+ * forward_fpdu - send complete fpdus, one at a time
+ */
+static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct pau_fpdu_info *fpdu_info;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ unsigned long flags;
+ u64 u64tmp;
+ u32 u32tmp;
+ int rc;
+
+ while (1) {
+ spin_lock_irqsave(&nesqp->pau_lock, flags);
+ rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
+ if (rc || (fpdu_info == NULL)) {
+ spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+ return rc;
+ }
+
+ cqp_request = fpdu_info->cqp_request;
+ cqp_wqe = &cqp_request->cqp_wqe;
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX,
+ NES_CQP_DOWNLOAD_SEGMENT |
+ (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT));
+
+ u32tmp = fpdu_info->hdr_len << 16;
+ u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len;
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX,
+ u32tmp);
+
+ u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len;
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX,
+ u32tmp);
+
+ u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len;
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX,
+ u32tmp);
+
+ u64tmp = (u64)fpdu_info->hdr_pbase;
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
+ lower_32_bits(u64tmp));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
+ upper_32_bits(u64tmp));
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
+ lower_32_bits(fpdu_info->frags[0].physaddr));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX,
+ upper_32_bits(fpdu_info->frags[0].physaddr));
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX,
+ lower_32_bits(fpdu_info->frags[1].physaddr));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX,
+ upper_32_bits(fpdu_info->frags[1].physaddr));
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX,
+ lower_32_bits(fpdu_info->frags[2].physaddr));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX,
+ upper_32_bits(fpdu_info->frags[2].physaddr));
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX,
+ lower_32_bits(fpdu_info->frags[3].physaddr));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX,
+ upper_32_bits(fpdu_info->frags[3].physaddr));
+
+ cqp_request->cqp_callback_pointer = fpdu_info;
+ cqp_request->callback = 1;
+ cqp_request->cqp_callback = nes_download_callback;
+
+ atomic_set(&cqp_request->refcount, 1);
+ nes_post_cqp_request(nesdev, cqp_request);
+ spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+ }
+
+ return 0;
+}
+
+static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+ int again = 1;
+ unsigned long flags;
+
+ do {
+ /* Ignore rc - if it failed, tcp retries will cause it to try again */
+ forward_fpdus(nesvnic, nesqp);
+
+ spin_lock_irqsave(&nesqp->pau_lock, flags);
+ if (nesqp->pau_pending) {
+ nesqp->pau_pending = 0;
+ } else {
+ nesqp->pau_busy = 0;
+ again = 0;
+ }
+
+ spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+ } while (again);
+}
+
+/**
+ * queue_fpdus - Handle fpdu's that hw passed up to sw
+ */
+static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+ struct sk_buff *tmpskb;
+ struct nes_rskb_cb *cb;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ unsigned char *tcph_end;
+ u32 rcv_nxt;
+ u32 rcv_wnd;
+ u32 seqnum;
+ u32 len;
+ bool process_it = false;
+ unsigned long flags;
+
+ /* Move data ptr to after tcp header */
+ iph = (struct iphdr *)skb->data;
+ tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+ seqnum = be32_to_cpu(tcph->seq);
+ tcph_end = (((char *)tcph) + (4 * tcph->doff));
+
+ len = be16_to_cpu(iph->tot_len);
+ if (skb->len > len)
+ skb_trim(skb, len);
+ skb_pull(skb, tcph_end - skb->data);
+
+ /* Initialize tracking values */
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ cb->seqnum = seqnum;
+
+ /* Make sure data is in the receive window */
+ rcv_nxt = nesqp->pau_rcv_nxt;
+ rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd);
+ if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) {
+ nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE);
+ nes_rem_ref_cm_node(nesqp->cm_node);
+ return;
+ }
+
+ spin_lock_irqsave(&nesqp->pau_lock, flags);
+
+ if (nesqp->pau_busy)
+ nesqp->pau_pending = 1;
+ else
+ nesqp->pau_busy = 1;
+
+ /* Queue skb by sequence number */
+ if (skb_queue_len(&nesqp->pau_list) == 0) {
+ skb_queue_head(&nesqp->pau_list, skb);
+ } else {
+ tmpskb = nesqp->pau_list.next;
+ while (tmpskb != (struct sk_buff *)&nesqp->pau_list) {
+ cb = (struct nes_rskb_cb *)&tmpskb->cb[0];
+ if (before(seqnum, cb->seqnum))
+ break;
+ tmpskb = tmpskb->next;
+ }
+ skb_insert(tmpskb, skb, &nesqp->pau_list);
+ }
+ if (nesqp->pau_state == PAU_READY)
+ process_it = true;
+ spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+
+ if (process_it)
+ process_fpdus(nesvnic, nesqp);
+
+ return;
+}
+
+/**
+ * mgt_thread - Handle mgt skbs in a safe context
+ */
+static int mgt_thread(void *context)
+{
+ struct nes_vnic *nesvnic = context;
+ struct sk_buff *skb;
+ struct nes_rskb_cb *cb;
+
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(nesvnic->mgt_wait_queue,
+ skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop());
+ while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) {
+ skb = skb_dequeue(&nesvnic->mgt_skb_list);
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ cb->data_start = skb->data - ETH_HLEN;
+ cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start,
+ nesvnic->max_frame_size, PCI_DMA_TODEVICE);
+ queue_fpdus(skb, nesvnic, cb->nesqp);
+ }
+ }
+
+ /* Closing down so delete any entries on the queue */
+ while (skb_queue_len(&nesvnic->mgt_skb_list)) {
+ skb = skb_dequeue(&nesvnic->mgt_skb_list);
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ nes_rem_ref_cm_node(cb->nesqp->cm_node);
+ dev_kfree_skb_any(skb);
+ }
+ return 0;
+}
+
+/**
+ * nes_queue_skbs - Queue skb so it can be handled in a thread context
+ */
+void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+ struct nes_rskb_cb *cb;
+
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ cb->nesqp = nesqp;
+ skb_queue_tail(&nesvnic->mgt_skb_list, skb);
+ wake_up_interruptible(&nesvnic->mgt_wait_queue);
+}
+
+void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp)
+{
+ struct sk_buff *skb;
+ unsigned long flags;
+ atomic_inc(&pau_qps_destroyed);
+
+ /* Free packets that have not yet been forwarded */
+ /* Lock is acquired by skb_dequeue when removing the skb */
+ spin_lock_irqsave(&nesqp->pau_lock, flags);
+ while (skb_queue_len(&nesqp->pau_list)) {
+ skb = skb_dequeue(&nesqp->pau_list);
+ nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
+ nes_rem_ref_cm_node(nesqp->cm_node);
+ }
+ spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+}
+
+static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
+{
+ struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer;
+ struct nes_cqp_request *new_request;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_adapter *nesadapter;
+ struct nes_qp *nesqp;
+ struct nes_v4_quad nes_quad;
+ u32 crc_value;
+ u64 u64temp;
+
+ nesadapter = nesdev->nesadapter;
+ nesqp = qh_chg->nesqp;
+
+ /* Should we handle the bad completion */
+ if (cqp_request->major_code)
+ WARN(1, PFX "Invalid cqp_request major_code=0x%x\n",
+ cqp_request->major_code);
+
+ switch (nesqp->pau_state) {
+ case PAU_DEL_QH:
+ /* Old hash code deleted, now set the new one */
+ nesqp->pau_state = PAU_ADD_LB_QH;
+ new_request = nes_get_cqp_request(nesdev);
+ if (new_request == NULL) {
+ nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n");
+ WARN_ON(1);
+ return;
+ }
+
+ memset(&nes_quad, 0, sizeof(nes_quad));
+ nes_quad.DstIpAdrIndex =
+ cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+ nes_quad.SrcIpadr = cpu_to_be32(0x7f000001);
+ nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]);
+ nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]);
+
+ /* Produce hash key */
+ crc_value = get_crc_value(&nes_quad);
+ nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
+ nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n",
+ nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
+
+ nesqp->hte_index &= nesadapter->hte_index_mask;
+ nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
+ nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001);
+ nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt);
+
+ cqp_wqe = &new_request->cqp_wqe;
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words,
+ NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH |
+ NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+ u64temp = (u64)nesqp->nesqp_context_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+ nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n");
+
+ new_request->cqp_callback_pointer = qh_chg;
+ new_request->callback = 1;
+ new_request->cqp_callback = nes_chg_qh_handler;
+ atomic_set(&new_request->refcount, 1);
+ nes_post_cqp_request(nesdev, new_request);
+ break;
+
+ case PAU_ADD_LB_QH:
+ /* Start processing the queued fpdu's */
+ nesqp->pau_state = PAU_READY;
+ process_fpdus(qh_chg->nesvnic, qh_chg->nesqp);
+ kfree(qh_chg);
+ break;
+ }
+}
+
+/**
+ * nes_change_quad_hash
+ */
+static int nes_change_quad_hash(struct nes_device *nesdev,
+ struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+ struct nes_cqp_request *cqp_request = NULL;
+ struct pau_qh_chg *qh_chg = NULL;
+ u64 u64temp;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ int ret = 0;
+
+ cqp_request = nes_get_cqp_request(nesdev);
+ if (cqp_request == NULL) {
+ nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+ ret = -ENOMEM;
+ goto chg_qh_err;
+ }
+
+ qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
+ if (qh_chg == NULL) {
+ nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+ ret = -ENOMEM;
+ goto chg_qh_err;
+ }
+ qh_chg->nesdev = nesdev;
+ qh_chg->nesvnic = nesvnic;
+ qh_chg->nesqp = nesqp;
+ nesqp->pau_state = PAU_DEL_QH;
+
+ cqp_wqe = &cqp_request->cqp_wqe;
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words,
+ NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE |
+ NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+ u64temp = (u64)nesqp->nesqp_context_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+ nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n");
+
+ cqp_request->cqp_callback_pointer = qh_chg;
+ cqp_request->callback = 1;
+ cqp_request->cqp_callback = nes_chg_qh_handler;
+ atomic_set(&cqp_request->refcount, 1);
+ nes_post_cqp_request(nesdev, cqp_request);
+
+ return ret;
+
+chg_qh_err:
+ kfree(qh_chg);
+ if (cqp_request)
+ nes_put_cqp_request(nesdev, cqp_request);
+ return ret;
+}
+
+/**
+ * nes_mgt_ce_handler
+ * This management code deals with any packed and unaligned (pau) fpdu's
+ * that the hardware cannot handle.
+ */
+static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
+{
+ struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq);
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 head;
+ u32 cq_size;
+ u32 cqe_count = 0;
+ u32 cqe_misc;
+ u32 qp_id = 0;
+ u32 skbs_needed;
+ unsigned long context;
+ struct nes_qp *nesqp;
+ struct sk_buff *rx_skb;
+ struct nes_rskb_cb *cb;
+
+ head = cq->cq_head;
+ cq_size = cq->cq_size;
+
+ while (1) {
+ cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
+ if (!(cqe_misc & NES_NIC_CQE_VALID))
+ break;
+
+ nesqp = NULL;
+ if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) {
+ qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]);
+ qp_id &= 0x001fffff;
+ if (qp_id < nesadapter->max_qp) {
+ context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN];
+ nesqp = (struct nes_qp *)context;
+ }
+ }
+
+ if (nesqp) {
+ if (nesqp->pau_mode == false) {
+ nesqp->pau_mode = true; /* First time for this qp */
+ nesqp->pau_rcv_nxt = le32_to_cpu(
+ cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]);
+ skb_queue_head_init(&nesqp->pau_list);
+ spin_lock_init(&nesqp->pau_lock);
+ atomic_inc(&pau_qps_created);
+ nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp);
+ }
+
+ rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
+ rx_skb->len = 0;
+ skb_put(rx_skb, cqe_misc & 0x0000ffff);
+ rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev);
+ cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
+ pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE);
+ cb->busaddr = 0;
+ mgtvnic->mgt.rq_tail++;
+ mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1;
+
+ nes_add_ref_cm_node(nesqp->cm_node);
+ nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp);
+ } else {
+ printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id);
+ }
+
+ cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
+ cqe_count++;
+ if (++head >= cq_size)
+ head = 0;
+
+ if (cqe_count == 255) {
+ /* Replenish mgt CQ */
+ nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16));
+ nesdev->currcq_count += cqe_count;
+ cqe_count = 0;
+ }
+
+ skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed);
+ if (skbs_needed > (mgtvnic->mgt.rq_size >> 1))
+ nes_replenish_mgt_rq(mgtvnic);
+ }
+
+ cq->cq_head = head;
+ nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+ cq->cq_number | (cqe_count << 16));
+ nes_read32(nesdev->regs + NES_CQE_ALLOC);
+ nesdev->currcq_count += cqe_count;
+}
+
+/**
+ * nes_init_mgt_qp
+ */
+int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic)
+{
+ struct nes_vnic_mgt *mgtvnic;
+ u32 counter;
+ void *vmem;
+ dma_addr_t pmem;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ u32 cqp_head;
+ unsigned long flags;
+ struct nes_hw_nic_qp_context *mgt_context;
+ u64 u64temp;
+ struct nes_hw_nic_rq_wqe *mgt_rqe;
+ struct sk_buff *skb;
+ u32 wqe_count;
+ struct nes_rskb_cb *cb;
+ u32 mgt_mem_size;
+ void *mgt_vbase;
+ dma_addr_t mgt_pbase;
+ int i;
+ int ret;
+
+ /* Allocate space the all mgt QPs once */
+ mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL);
+ if (mgtvnic == NULL) {
+ nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n");
+ return -ENOMEM;
+ }
+
+ /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
+ /* We are not sending from this NIC so sq is not allocated */
+ mgt_mem_size = 256 +
+ (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) +
+ (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) +
+ sizeof(struct nes_hw_nic_qp_context);
+ mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+ mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase);
+ if (!mgt_vbase) {
+ kfree(mgtvnic);
+ nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n");
+ return -ENOMEM;
+ }
+
+ nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size;
+ nesvnic->mgt_vbase = mgt_vbase;
+ nesvnic->mgt_pbase = mgt_pbase;
+
+ skb_queue_head_init(&nesvnic->mgt_skb_list);
+ init_waitqueue_head(&nesvnic->mgt_wait_queue);
+ nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread");
+
+ for (i = 0; i < NES_MGT_QP_COUNT; i++) {
+ mgtvnic->nesvnic = nesvnic;
+ mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i;
+ memset(mgt_vbase, 0, mgt_mem_size);
+ nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n",
+ mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size);
+
+ vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) &
+ ~(unsigned long)(256 - 1));
+ pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) &
+ ~(unsigned long long)(256 - 1));
+
+ spin_lock_init(&mgtvnic->mgt.rq_lock);
+
+ /* setup the RQ */
+ mgtvnic->mgt.rq_vbase = vmem;
+ mgtvnic->mgt.rq_pbase = pmem;
+ mgtvnic->mgt.rq_head = 0;
+ mgtvnic->mgt.rq_tail = 0;
+ mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT;
+
+ /* setup the CQ */
+ vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
+ pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
+
+ mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id;
+ mgtvnic->mgt_cq.cq_vbase = vmem;
+ mgtvnic->mgt_cq.cq_pbase = pmem;
+ mgtvnic->mgt_cq.cq_head = 0;
+ mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT;
+
+ mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler;
+
+ /* Send CreateCQ request to CQP */
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+ cqp_head = nesdev->cqp.sq_head;
+
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
+ NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+ ((u32)mgtvnic->mgt_cq.cq_size << 16));
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
+ mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16));
+ u64temp = (u64)mgtvnic->mgt_cq.cq_pbase;
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
+ u64temp = (unsigned long)&mgtvnic->mgt_cq;
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+ cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+ cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ /* Send CreateQP request to CQP */
+ mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]);
+ mgt_context->context_words[NES_NIC_CTX_MISC_IDX] =
+ cpu_to_le32((u32)NES_MGT_CTX_SIZE |
+ ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
+ nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
+ nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
+ nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
+ if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0)
+ mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
+
+ u64temp = (u64)mgtvnic->mgt.rq_pbase;
+ mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+ u64temp = (u64)mgtvnic->mgt.rq_pbase;
+ mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+ mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
+ NES_CQP_QP_TYPE_NIC);
+ cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id);
+ u64temp = (u64)mgtvnic->mgt_cq.cq_pbase +
+ (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+ nesdev->cqp.sq_head = cqp_head;
+
+ barrier();
+
+ /* Ring doorbell (2 WQEs) */
+ nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+ nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n",
+ mgtvnic->mgt.qp_id);
+
+ ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+ NES_EVENT_TIMEOUT);
+ nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n",
+ mgtvnic->mgt.qp_id, ret);
+ if (!ret) {
+ nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id);
+ if (i == 0) {
+ pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
+ nesvnic->mgt_pbase);
+ kfree(mgtvnic);
+ } else {
+ nes_destroy_mgt(nesvnic);
+ }
+ return -EIO;
+ }
+
+ /* Populate the RQ */
+ for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) {
+ skb = dev_alloc_skb(nesvnic->max_frame_size);
+ if (!skb) {
+ nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
+ return -ENOMEM;
+ }
+
+ skb->dev = netdev;
+
+ pmem = pci_map_single(nesdev->pcidev, skb->data,
+ nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+ cb = (struct nes_rskb_cb *)&skb->cb[0];
+ cb->busaddr = pmem;
+ cb->maplen = nesvnic->max_frame_size;
+
+ mgt_rqe = &mgtvnic->mgt.rq_vbase[counter];
+ mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size);
+ mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+ mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
+ mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
+ mgtvnic->mgt.rx_skb[counter] = skb;
+ }
+
+ init_timer(&mgtvnic->rq_wqes_timer);
+ mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout;
+ mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic;
+
+ wqe_count = NES_MGT_WQ_COUNT - 1;
+ mgtvnic->mgt.rq_head = wqe_count;
+ barrier();
+ do {
+ counter = min(wqe_count, ((u32)255));
+ wqe_count -= counter;
+ nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id);
+ } while (wqe_count);
+
+ nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+ mgtvnic->mgt_cq.cq_number);
+ nes_read32(nesdev->regs + NES_CQE_ALLOC);
+
+ mgt_vbase += mgt_mem_size;
+ mgt_pbase += mgt_mem_size;
+ nesvnic->mgtvnic[i] = mgtvnic++;
+ }
+ return 0;
+}
+
+
+void nes_destroy_mgt(struct nes_vnic *nesvnic)
+{
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_vnic_mgt *mgtvnic;
+ struct nes_vnic_mgt *first_mgtvnic;
+ unsigned long flags;
+ struct nes_hw_cqp_wqe *cqp_wqe;
+ u32 cqp_head;
+ struct sk_buff *rx_skb;
+ int i;
+ int ret;
+
+ kthread_stop(nesvnic->mgt_thread);
+
+ /* Free remaining NIC receive buffers */
+ first_mgtvnic = nesvnic->mgtvnic[0];
+ for (i = 0; i < NES_MGT_QP_COUNT; i++) {
+ mgtvnic = nesvnic->mgtvnic[i];
+ if (mgtvnic == NULL)
+ continue;
+
+ while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) {
+ rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
+ nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE);
+ mgtvnic->mgt.rq_tail++;
+ mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1);
+ }
+
+ spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+ /* Destroy NIC QP */
+ cqp_head = nesdev->cqp.sq_head;
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ mgtvnic->mgt.qp_id);
+
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+
+ cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+
+ /* Destroy NIC CQ */
+ nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+ (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+ (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
+
+ if (++cqp_head >= nesdev->cqp.sq_size)
+ cqp_head = 0;
+
+ nesdev->cqp.sq_head = cqp_head;
+ barrier();
+
+ /* Ring doorbell (2 WQEs) */
+ nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+ spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+ nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
+ " cqp.sq_tail=%u, cqp.sq_size=%u\n",
+ cqp_head, nesdev->cqp.sq_head,
+ nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
+
+ ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+ NES_EVENT_TIMEOUT);
+
+ nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
+ " cqp.sq_head=%u, cqp.sq_tail=%u\n",
+ ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
+ if (!ret)
+ nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n",
+ mgtvnic->mgt.qp_id);
+
+ nesvnic->mgtvnic[i] = NULL;
+ }
+
+ if (nesvnic->mgt_vbase) {
+ pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
+ nesvnic->mgt_pbase);
+ nesvnic->mgt_vbase = NULL;
+ nesvnic->mgt_pbase = 0;
+ }
+
+ kfree(first_mgtvnic);
+}
diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h
new file mode 100644
index 00000000000..4f7f701c4a8
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_mgt.h
@@ -0,0 +1,97 @@
+/*
+* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses. You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenIB.org BSD license below:
+*
+* Redistribution and use in source and binary forms, with or
+* without modification, are permitted provided that the following
+* conditions are met:
+*
+* - Redistributions of source code must retain the above
+* copyright notice, this list of conditions and the following
+* disclaimer.
+*
+* - Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following
+* disclaimer in the documentation and/or other materials
+* provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+
+#ifndef __NES_MGT_H
+#define __NES_MGT_H
+
+#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */
+
+int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic);
+void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp);
+void nes_destroy_mgt(struct nes_vnic *nesvnic);
+void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp);
+
+struct nes_hw_mgt {
+ struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */
+ dma_addr_t rq_pbase; /* PCI memory for host rings */
+ struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
+ u16 qp_id;
+ u16 sq_head;
+ u16 rq_head;
+ u16 rq_tail;
+ u16 rq_size;
+ u8 replenishing_rq;
+ u8 reserved;
+ spinlock_t rq_lock;
+};
+
+struct nes_vnic_mgt {
+ struct nes_vnic *nesvnic;
+ struct nes_hw_mgt mgt;
+ struct nes_hw_nic_cq mgt_cq;
+ atomic_t rx_skbs_needed;
+ struct timer_list rq_wqes_timer;
+ atomic_t rx_skb_timer_running;
+};
+
+#define MAX_FPDU_FRAGS 4
+struct pau_fpdu_frag {
+ struct sk_buff *skb;
+ u64 physaddr;
+ u32 frag_len;
+ bool cmplt;
+};
+
+struct pau_fpdu_info {
+ struct nes_qp *nesqp;
+ struct nes_cqp_request *cqp_request;
+ void *hdr_vbase;
+ dma_addr_t hdr_pbase;
+ int hdr_len;
+ u16 data_len;
+ u16 frag_cnt;
+ struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
+};
+
+enum pau_qh_state {
+ PAU_DEL_QH,
+ PAU_ADD_LB_QH,
+ PAU_READY
+};
+
+struct pau_qh_chg {
+ struct nes_device *nesdev;
+ struct nes_vnic *nesvnic;
+ struct nes_qp *nesqp;
+};
+
+#endif /* __NES_MGT_H */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 1b0938c8777..49eb5111d2c 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -40,6 +40,7 @@
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <linux/ethtool.h>
+#include <linux/slab.h>
#include <net/tcp.h>
#include <net/inet_common.h>
@@ -91,6 +92,7 @@ static struct nic_qp_map *nic_qp_mapping_per_function[] = {
static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
| NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
static int debug = -1;
+static int nics_per_function = 1;
/**
* nes_netdev_poll
@@ -98,7 +100,6 @@ static int debug = -1;
static int nes_netdev_poll(struct napi_struct *napi, int budget)
{
struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi);
- struct net_device *netdev = nesvnic->netdev;
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq;
@@ -111,7 +112,7 @@ static int nes_netdev_poll(struct napi_struct *napi, int budget)
nes_nic_ce_handler(nesdev, nescq);
if (nescq->cqes_pending == 0) {
- netif_rx_complete(netdev, napi);
+ napi_complete(napi);
/* clear out completed cqes and arm */
nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
nescq->cq_number | (nescq->cqe_allocs_pending << 16));
@@ -143,6 +144,7 @@ static int nes_netdev_open(struct net_device *netdev)
u32 nic_active_bit;
u32 nic_active;
struct list_head *list_pos, *list_temp;
+ unsigned long flags;
assert(nesdev != NULL);
@@ -201,7 +203,8 @@ static int nes_netdev_open(struct net_device *netdev)
nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW"
" (Addr:%08X) = %08X, HIGH = %08X.\n",
i, nesvnic->qp_nic_index[i],
- NES_IDX_PERFECT_FILTER_LOW+((nesvnic->perfect_filter_index + i) * 8),
+ NES_IDX_PERFECT_FILTER_LOW+
+ (nesvnic->qp_nic_index[i] * 8),
macaddr_low,
(u32)macaddr_high | NES_MAC_ADDR_VALID |
((((u32)nesvnic->nic_index) << 16)));
@@ -230,12 +233,36 @@ static int nes_netdev_open(struct net_device *netdev)
NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
first_nesvnic = nesvnic;
}
+
if (first_nesvnic->linkup) {
/* Enable network packets */
nesvnic->linkup = 1;
netif_start_queue(netdev);
netif_carrier_on(netdev);
}
+
+ spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
+ if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) {
+ nesdev->link_recheck = 1;
+ mod_delayed_work(system_wq, &nesdev->work,
+ NES_LINK_RECHECK_DELAY);
+ }
+ spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+
+ spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
+ if (nesvnic->of_device_registered) {
+ nesdev->nesadapter->send_term_ok = 1;
+ if (nesvnic->linkup == 1) {
+ if (nesdev->iw_status == 0) {
+ nesdev->iw_status = 1;
+ nes_port_ibevent(nesvnic);
+ }
+ } else {
+ nesdev->iw_status = 0;
+ }
+ }
+ spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
+
napi_enable(&nesvnic->napi);
nesvnic->netdev_open = 1;
@@ -254,6 +281,7 @@ static int nes_netdev_stop(struct net_device *netdev)
u32 nic_active;
struct nes_vnic *first_nesvnic = NULL;
struct list_head *list_pos, *list_temp;
+ unsigned long flags;
nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n",
nesvnic, nesdev, netdev, netdev->name);
@@ -262,6 +290,7 @@ static int nes_netdev_stop(struct net_device *netdev)
if (netif_msg_ifdown(nesvnic))
printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name);
+ netif_carrier_off(netdev);
/* Disable network packets */
napi_disable(&nesvnic->napi);
@@ -272,14 +301,18 @@ static int nes_netdev_stop(struct net_device *netdev)
break;
}
- if (first_nesvnic->netdev_open == 0)
+ if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) &&
+ (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) !=
+ PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) {
+ nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+
+ (0x200*nesdev->mac_index), 0xffffffff);
+ nes_write_indexed(first_nesvnic->nesdev,
+ NES_IDX_MAC_INT_MASK+
+ (0x200*first_nesvnic->nesdev->mac_index),
+ ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
+ NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
+ } else {
nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff);
- else if ((first_nesvnic != nesvnic) &&
- (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), 0xffffffff);
- nes_write_indexed(first_nesvnic->nesdev, NES_IDX_MAC_INT_MASK + (0x200 * first_nesvnic->nesdev->mac_index),
- ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
- NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
}
nic_active_mask = ~((u32)(1 << nesvnic->nic_index));
@@ -301,12 +334,17 @@ static int nes_netdev_stop(struct net_device *netdev)
nic_active &= nic_active_mask;
nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
-
+ spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
if (nesvnic->of_device_registered) {
- nes_destroy_ofa_device(nesvnic->nesibdev);
- nesvnic->nesibdev = NULL;
- nesvnic->of_device_registered = 0;
+ nesdev->nesadapter->send_term_ok = 0;
+ nesdev->iw_status = 0;
+ if (nesvnic->linkup == 1)
+ nes_port_ibevent(nesvnic);
}
+ del_timer_sync(&nesvnic->event_timer);
+ nesvnic->event_timer.function = NULL;
+ spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
+
nes_destroy_nic_qp(nesvnic);
nesvnic->netdev_open = 0;
@@ -346,24 +384,20 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
/* bump past the vlan tag */
wqe_fragment_length++;
/* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
+ wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- tcph = tcp_hdr(skb);
- if (1) {
- if (skb_is_gso(skb)) {
- /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n",
- netdev->name, skb_is_gso(skb)); */
- wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE |
- NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
- ((u32)tcph->doff) |
- (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
- } else {
- wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
- }
+ if (skb_is_gso(skb)) {
+ tcph = tcp_hdr(skb);
+ /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n",
+ netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */
+ wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size;
+ set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
+ ((u32)tcph->doff) |
+ (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
}
} else { /* CHECKSUM_HW */
- wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION;
+ wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM;
}
set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
@@ -395,21 +429,20 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
if (skb_headlen(skb) == skb->len) {
if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) {
nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0;
- nesnic->tx_skb[nesnic->sq_head] = NULL;
- dev_kfree_skb(skb);
+ nesnic->tx_skb[nesnic->sq_head] = skb;
}
} else {
/* Deal with Fragments */
nesnic->tx_skb[nesnic->sq_head] = skb;
for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags;
skb_fragment_index++) {
- bus_address = pci_map_page( nesdev->pcidev,
- skb_shinfo(skb)->frags[skb_fragment_index].page,
- skb_shinfo(skb)->frags[skb_fragment_index].page_offset,
- skb_shinfo(skb)->frags[skb_fragment_index].size,
- PCI_DMA_TODEVICE);
+ skb_frag_t *frag =
+ &skb_shinfo(skb)->frags[skb_fragment_index];
+ bus_address = skb_frag_dma_map(&nesdev->pcidev->dev,
+ frag, 0, skb_frag_size(frag),
+ DMA_TO_DEVICE);
wqe_fragment_length[wqe_fragment_index] =
- cpu_to_le16(skb_shinfo(skb)->frags[skb_fragment_index].size);
+ cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index]));
set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
bus_address);
wqe_fragment_index++;
@@ -437,7 +470,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
struct nes_hw_nic_sq_wqe *nic_sqe;
struct tcphdr *tcph;
/* struct udphdr *udph; */
-#define NES_MAX_TSO_FRAGS 18
+#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS
/* 64K segment plus overflow on each side */
dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS];
dma_addr_t bus_address;
@@ -448,7 +481,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
u32 wqe_count=1;
u32 send_rc;
struct iphdr *iph;
- unsigned long flags;
__le16 *wqe_fragment_length;
u32 nr_frags;
u32 original_first_length;
@@ -459,7 +491,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
u16 nhoffset;
u16 wqes_needed;
u16 wqes_available;
- u32 old_head;
u32 wqe_misc;
/*
@@ -475,13 +506,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
if (netif_queue_stopped(netdev))
return NETDEV_TX_BUSY;
- local_irq_save(flags);
- if (!spin_trylock(&nesnic->sq_lock)) {
- local_irq_restore(flags);
- nesvnic->sq_locked++;
- return NETDEV_TX_LOCKED;
- }
-
/* Check if SQ is full */
if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) {
if (!netif_queue_stopped(netdev)) {
@@ -493,7 +517,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
}
}
nesvnic->sq_full++;
- spin_unlock_irqrestore(&nesnic->sq_lock, flags);
return NETDEV_TX_BUSY;
}
@@ -507,7 +530,6 @@ sq_no_longer_full:
if (skb_is_gso(skb)) {
nesvnic->segmented_tso_requests++;
nesvnic->tso_requests++;
- old_head = nesnic->sq_head;
/* Basically 4 fragments available per WQE with extended fragments */
wqes_needed = nr_frags >> 2;
wqes_needed += (nr_frags&3)?1:0;
@@ -526,7 +548,6 @@ sq_no_longer_full:
}
}
nesvnic->sq_full++;
- spin_unlock_irqrestore(&nesnic->sq_lock, flags);
nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n",
netdev->name);
return NETDEV_TX_BUSY;
@@ -535,11 +556,12 @@ tso_sq_no_longer_full:
/* Map all the buffers */
for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags;
tso_frag_count++) {
- tso_bus_address[tso_frag_count] = pci_map_page( nesdev->pcidev,
- skb_shinfo(skb)->frags[tso_frag_count].page,
- skb_shinfo(skb)->frags[tso_frag_count].page_offset,
- skb_shinfo(skb)->frags[tso_frag_count].size,
- PCI_DMA_TODEVICE);
+ skb_frag_t *frag =
+ &skb_shinfo(skb)->frags[tso_frag_count];
+ tso_bus_address[tso_frag_count] =
+ skb_frag_dma_map(&nesdev->pcidev->dev,
+ frag, 0, skb_frag_size(frag),
+ DMA_TO_DEVICE);
}
tso_frag_index = 0;
@@ -570,10 +592,10 @@ tso_sq_no_longer_full:
nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
original_first_length, NES_FIRST_FRAG_SIZE);
nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
- " (%u frags), tso_size=%u\n",
+ " (%u frags), is_gso = %u tso_size=%u\n",
netdev->name,
skb->len, skb_headlen(skb),
- skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+ skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size);
}
memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
@@ -605,14 +627,16 @@ tso_sq_no_longer_full:
wqe_fragment_length[wqe_fragment_index] = 0;
set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
bus_address);
+ tso_wqe_length += skb_headlen(skb) -
+ original_first_length;
}
while (wqe_fragment_index < 5) {
wqe_fragment_length[wqe_fragment_index] =
- cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size);
+ cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index]));
set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
(u64)tso_bus_address[tso_frag_index]);
wqe_fragment_index++;
- tso_wqe_length += skb_shinfo(skb)->frags[tso_frag_index++].size;
+ tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]);
if (wqe_fragment_index < 5)
wqe_fragment_length[wqe_fragment_index] = 0;
if (tso_frag_index == tso_frag_count)
@@ -623,8 +647,8 @@ tso_sq_no_longer_full:
} else {
nesnic->tx_skb[nesnic->sq_head] = NULL;
}
- wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
- if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) {
+ wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size;
+ if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) {
wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
} else {
iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
@@ -649,17 +673,13 @@ tso_sq_no_longer_full:
skb_set_transport_header(skb, hoffset);
skb_set_network_header(skb, nhoffset);
send_rc = nes_nic_send(skb, netdev);
- if (send_rc != NETDEV_TX_OK) {
- spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+ if (send_rc != NETDEV_TX_OK)
return NETDEV_TX_OK;
- }
}
} else {
send_rc = nes_nic_send(skb, netdev);
- if (send_rc != NETDEV_TX_OK) {
- spin_unlock_irqrestore(&nesnic->sq_lock, flags);
+ if (send_rc != NETDEV_TX_OK)
return NETDEV_TX_OK;
- }
}
barrier();
@@ -669,7 +689,6 @@ tso_sq_no_longer_full:
(wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id);
netdev->trans_start = jiffies;
- spin_unlock_irqrestore(&nesnic->sq_lock, flags);
return NETDEV_TX_OK;
}
@@ -789,14 +808,13 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
int i;
u32 macaddr_low;
u16 macaddr_high;
- DECLARE_MAC_BUF(mac);
if (!is_valid_ether_addr(mac_addr->sa_data))
return -EADDRNOTAVAIL;
memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
- printk(PFX "%s: Address length = %d, Address = %s\n",
- __func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data));
+ printk(PFX "%s: Address length = %d, Address = %pM\n",
+ __func__, netdev->addr_len, mac_addr->sa_data);
macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
macaddr_high += (u16)netdev->dev_addr[1];
macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
@@ -820,6 +838,20 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
}
+static void set_allmulti(struct nes_device *nesdev, u32 nic_active_bit)
+{
+ u32 nic_active;
+
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
+ nic_active |= nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+ nic_active &= ~nic_active_bit;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+}
+
+#define get_addr(addrs, index) ((addrs) + (index) * ETH_ALEN)
+
/**
* nes_netdev_set_multicast_list
*/
@@ -827,7 +859,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
- struct dev_mc_list *multicast_addr;
+ struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
u32 nic_active_bit;
u32 nic_active;
u32 perfect_filter_register_address;
@@ -836,7 +868,13 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
u8 mc_all_on = 0;
u8 mc_index;
int mc_nic_index = -1;
+ u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count *
+ nics_per_function, 4);
+ u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated;
+ unsigned long flags;
+ int mc_count = netdev_mc_count(netdev);
+ spin_lock_irqsave(&nesadapter->resource_lock, flags);
nic_active_bit = 1 << nesvnic->nic_index;
if (netdev->flags & IFF_PROMISC) {
@@ -847,14 +885,9 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
nic_active |= nic_active_bit;
nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
mc_all_on = 1;
- } else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) ||
+ } else if ((netdev->flags & IFF_ALLMULTI) ||
(nesvnic->nic_index > 3)) {
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active &= ~nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
+ set_allmulti(nesdev, nic_active_bit);
mc_all_on = 1;
} else {
nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
@@ -865,30 +898,59 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
}
- nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n",
- netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0,
- (netdev->flags & IFF_ALLMULTI)?1:0);
+ nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscuous = %d, All Multicast = %d.\n",
+ mc_count, !!(netdev->flags & IFF_PROMISC),
+ !!(netdev->flags & IFF_ALLMULTI));
if (!mc_all_on) {
- multicast_addr = netdev->mc_list;
- perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80;
- perfect_filter_register_address += nesvnic->nic_index*0x40;
- for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) {
- while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0))
- multicast_addr = multicast_addr->next;
+ char *addrs;
+ int i;
+ struct netdev_hw_addr *ha;
+
+ addrs = kmalloc(ETH_ALEN * mc_count, GFP_ATOMIC);
+ if (!addrs) {
+ set_allmulti(nesdev, nic_active_bit);
+ goto unlock;
+ }
+ i = 0;
+ netdev_for_each_mc_addr(ha, netdev)
+ memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN);
+
+ perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW +
+ pft_entries_preallocated * 0x8;
+ for (i = 0, mc_index = 0; mc_index < max_pft_entries_avaiable;
+ mc_index++) {
+ while (i < mc_count && nesvnic->mcrq_mcast_filter &&
+ ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic,
+ get_addr(addrs, i++))) == 0));
if (mc_nic_index < 0)
mc_nic_index = nesvnic->nic_index;
- if (multicast_addr) {
- DECLARE_MAC_BUF(mac);
- nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n",
- print_mac(mac, multicast_addr->dmi_addr),
+ while (nesadapter->pft_mcast_map[mc_index] < 16 &&
+ nesadapter->pft_mcast_map[mc_index] !=
+ nesvnic->nic_index &&
+ mc_index < max_pft_entries_avaiable) {
+ nes_debug(NES_DBG_NIC_RX,
+ "mc_index=%d skipping nic_index=%d, "
+ "used for=%d \n", mc_index,
+ nesvnic->nic_index,
+ nesadapter->pft_mcast_map[mc_index]);
+ mc_index++;
+ }
+ if (mc_index >= max_pft_entries_avaiable)
+ break;
+ if (i < mc_count) {
+ char *addr = get_addr(addrs, i++);
+
+ nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %pM to register 0x%04X nic_idx=%d\n",
+ addr,
perfect_filter_register_address+(mc_index * 8),
mc_nic_index);
- macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
- macaddr_high += (u16)multicast_addr->dmi_addr[1];
- macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
- macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
- macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
- macaddr_low += (u32)multicast_addr->dmi_addr[5];
+ macaddr_high = ((u8) addr[0]) << 8;
+ macaddr_high += (u8) addr[1];
+ macaddr_low = ((u8) addr[2]) << 24;
+ macaddr_low += ((u8) addr[3]) << 16;
+ macaddr_low += ((u8) addr[4]) << 8;
+ macaddr_low += (u8) addr[5];
+
nes_write_indexed(nesdev,
perfect_filter_register_address+(mc_index * 8),
macaddr_low);
@@ -896,16 +958,25 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
perfect_filter_register_address+4+(mc_index * 8),
(u32)macaddr_high | NES_MAC_ADDR_VALID |
((((u32)(1<<mc_nic_index)) << 16)));
- multicast_addr = multicast_addr->next;
+ nesadapter->pft_mcast_map[mc_index] =
+ nesvnic->nic_index;
} else {
nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n",
perfect_filter_register_address+(mc_index * 8));
nes_write_indexed(nesdev,
perfect_filter_register_address+4+(mc_index * 8),
0);
+ nesadapter->pft_mcast_map[mc_index] = 255;
}
}
+ kfree(addrs);
+ /* PFT is not large enough */
+ if (i < mc_count)
+ set_allmulti(nesdev, nic_active_bit);
}
+
+unlock:
+ spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
}
@@ -918,6 +989,10 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
struct nes_device *nesdev = nesvnic->nesdev;
int ret = 0;
u8 jumbomode = 0;
+ u32 nic_active;
+ u32 nic_active_bit;
+ u32 uc_all_active;
+ u32 mc_all_active;
if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
return -EINVAL;
@@ -931,13 +1006,30 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
nes_nic_init_timer_defaults(nesdev, jumbomode);
if (netif_running(netdev)) {
+ nic_active_bit = 1 << nesvnic->nic_index;
+ mc_all_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit;
+ uc_all_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_UNICAST_ALL) & nic_active_bit;
+
nes_netdev_stop(netdev);
nes_netdev_open(netdev);
+
+ nic_active = nes_read_indexed(nesdev,
+ NES_IDX_NIC_MULTICAST_ALL);
+ nic_active |= mc_all_active;
+ nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL,
+ nic_active);
+
+ nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
+ nic_active |= uc_all_active;
+ nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
}
return ret;
}
+
static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
"Link Change Interrupts",
"Linearized SKBs",
@@ -946,18 +1038,21 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
"Pause Frames Received",
"Internal Routing Errors",
"SQ SW Dropped SKBs",
- "SQ Locked",
"SQ Full",
"Segmented TSO Requests",
"Rx Symbol Errors",
"Rx Jabber Errors",
"Rx Oversized Frames",
"Rx Short Frames",
+ "Rx Length Errors",
+ "Rx CRC Errors",
+ "Rx Port Discard",
"Endnode Rx Discards",
"Endnode Rx Octets",
"Endnode Rx Frames",
"Endnode Tx Octets",
"Endnode Tx Frames",
+ "Tx Errors",
"mh detected",
"mh pauses",
"Retransmission Count",
@@ -986,56 +1081,27 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
"CM Nodes Destroyed",
"CM Accel Drops",
"CM Resets Received",
+ "Free 4Kpbls",
+ "Free 256pbls",
"Timer Inits",
- "CQ Depth 1",
- "CQ Depth 4",
- "CQ Depth 16",
- "CQ Depth 24",
- "CQ Depth 32",
- "CQ Depth 128",
- "CQ Depth 256",
"LRO aggregated",
"LRO flushed",
"LRO no_desc",
+ "PAU CreateQPs",
+ "PAU DestroyQPs",
};
-
#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset)
-/**
- * nes_netdev_get_rx_csum
- */
-static u32 nes_netdev_get_rx_csum (struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- if (nesvnic->rx_checksum_disabled)
- return 0;
- else
- return 1;
-}
-
/**
- * nes_netdev_set_rc_csum
+ * nes_netdev_get_sset_count
*/
-static int nes_netdev_set_rx_csum(struct net_device *netdev, u32 enable)
+static int nes_netdev_get_sset_count(struct net_device *netdev, int stringset)
{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- if (enable)
- nesvnic->rx_checksum_disabled = 0;
+ if (stringset == ETH_SS_STATS)
+ return NES_ETHTOOL_STAT_COUNT;
else
- nesvnic->rx_checksum_disabled = 1;
- return 0;
-}
-
-
-/**
- * nes_netdev_get_stats_count
- */
-static int nes_netdev_get_stats_count(struct net_device *netdev)
-{
- return NES_ETHTOOL_STAT_COUNT;
+ return -EINVAL;
}
@@ -1055,24 +1121,27 @@ static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset,
/**
* nes_netdev_get_ethtool_stats
*/
+
static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values)
{
u64 u64temp;
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 nic_count;
u32 u32temp;
+ u32 index = 0;
target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT;
- target_stat_values[0] = nesvnic->nesdev->link_status_interrupts;
- target_stat_values[1] = nesvnic->linearized_skbs;
- target_stat_values[2] = nesvnic->tso_requests;
+ target_stat_values[index] = nesvnic->nesdev->link_status_interrupts;
+ target_stat_values[++index] = nesvnic->linearized_skbs;
+ target_stat_values[++index] = nesvnic->tso_requests;
u32temp = nes_read_indexed(nesdev,
NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
nesvnic->nesdev->mac_pause_frames_sent += u32temp;
- target_stat_values[3] = nesvnic->nesdev->mac_pause_frames_sent;
+ target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_sent;
u32temp = nes_read_indexed(nesdev,
NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
@@ -1088,6 +1157,46 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
nesvnic->nesdev->port_tx_discards += u32temp;
nesvnic->netstats.tx_dropped += u32temp;
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_short_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_oversized_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_jabber_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_dropped += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->netstats.rx_length_errors += u32temp;
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->nesdev->mac_rx_errors += u32temp;
+ nesvnic->nesdev->mac_rx_crc_errors += u32temp;
+ nesvnic->netstats.rx_crc_errors += u32temp;
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200));
+ nesvnic->nesdev->mac_tx_errors += u32temp;
+ nesvnic->netstats.tx_errors += u32temp;
+
for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) {
if (nesvnic->qp_nic_index[nic_count] == 0xf)
break;
@@ -1143,64 +1252,62 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
nesvnic->endnode_ipv4_tcp_retransmits += u32temp;
}
- target_stat_values[4] = nesvnic->nesdev->mac_pause_frames_received;
- target_stat_values[5] = nesdev->nesadapter->nic_rx_eth_route_err;
- target_stat_values[6] = nesvnic->tx_sw_dropped;
- target_stat_values[7] = nesvnic->sq_locked;
- target_stat_values[8] = nesvnic->sq_full;
- target_stat_values[9] = nesvnic->segmented_tso_requests;
- target_stat_values[10] = nesvnic->nesdev->mac_rx_symbol_err_frames;
- target_stat_values[11] = nesvnic->nesdev->mac_rx_jabber_frames;
- target_stat_values[12] = nesvnic->nesdev->mac_rx_oversized_frames;
- target_stat_values[13] = nesvnic->nesdev->mac_rx_short_frames;
- target_stat_values[14] = nesvnic->endnode_nstat_rx_discard;
- target_stat_values[15] = nesvnic->endnode_nstat_rx_octets;
- target_stat_values[16] = nesvnic->endnode_nstat_rx_frames;
- target_stat_values[17] = nesvnic->endnode_nstat_tx_octets;
- target_stat_values[18] = nesvnic->endnode_nstat_tx_frames;
- target_stat_values[19] = mh_detected;
- target_stat_values[20] = mh_pauses_sent;
- target_stat_values[21] = nesvnic->endnode_ipv4_tcp_retransmits;
- target_stat_values[22] = atomic_read(&cm_connects);
- target_stat_values[23] = atomic_read(&cm_accepts);
- target_stat_values[24] = atomic_read(&cm_disconnects);
- target_stat_values[25] = atomic_read(&cm_connecteds);
- target_stat_values[26] = atomic_read(&cm_connect_reqs);
- target_stat_values[27] = atomic_read(&cm_rejects);
- target_stat_values[28] = atomic_read(&mod_qp_timouts);
- target_stat_values[29] = atomic_read(&qps_created);
- target_stat_values[30] = atomic_read(&sw_qps_destroyed);
- target_stat_values[31] = atomic_read(&qps_destroyed);
- target_stat_values[32] = atomic_read(&cm_closes);
- target_stat_values[33] = cm_packets_sent;
- target_stat_values[34] = cm_packets_bounced;
- target_stat_values[35] = cm_packets_created;
- target_stat_values[36] = cm_packets_received;
- target_stat_values[37] = cm_packets_dropped;
- target_stat_values[38] = cm_packets_retrans;
- target_stat_values[39] = cm_listens_created;
- target_stat_values[40] = cm_listens_destroyed;
- target_stat_values[41] = cm_backlog_drops;
- target_stat_values[42] = atomic_read(&cm_loopbacks);
- target_stat_values[43] = atomic_read(&cm_nodes_created);
- target_stat_values[44] = atomic_read(&cm_nodes_destroyed);
- target_stat_values[45] = atomic_read(&cm_accel_dropped_pkts);
- target_stat_values[46] = atomic_read(&cm_resets_recvd);
- target_stat_values[47] = int_mod_timer_init;
- target_stat_values[48] = int_mod_cq_depth_1;
- target_stat_values[49] = int_mod_cq_depth_4;
- target_stat_values[50] = int_mod_cq_depth_16;
- target_stat_values[51] = int_mod_cq_depth_24;
- target_stat_values[52] = int_mod_cq_depth_32;
- target_stat_values[53] = int_mod_cq_depth_128;
- target_stat_values[54] = int_mod_cq_depth_256;
- target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated;
- target_stat_values[56] = nesvnic->lro_mgr.stats.flushed;
- target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc;
-
+ target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_received;
+ target_stat_values[++index] = nesdev->nesadapter->nic_rx_eth_route_err;
+ target_stat_values[++index] = nesvnic->tx_sw_dropped;
+ target_stat_values[++index] = nesvnic->sq_full;
+ target_stat_values[++index] = nesvnic->segmented_tso_requests;
+ target_stat_values[++index] = nesvnic->nesdev->mac_rx_symbol_err_frames;
+ target_stat_values[++index] = nesvnic->nesdev->mac_rx_jabber_frames;
+ target_stat_values[++index] = nesvnic->nesdev->mac_rx_oversized_frames;
+ target_stat_values[++index] = nesvnic->nesdev->mac_rx_short_frames;
+ target_stat_values[++index] = nesvnic->netstats.rx_length_errors;
+ target_stat_values[++index] = nesvnic->nesdev->mac_rx_crc_errors;
+ target_stat_values[++index] = nesvnic->nesdev->port_rx_discards;
+ target_stat_values[++index] = nesvnic->endnode_nstat_rx_discard;
+ target_stat_values[++index] = nesvnic->endnode_nstat_rx_octets;
+ target_stat_values[++index] = nesvnic->endnode_nstat_rx_frames;
+ target_stat_values[++index] = nesvnic->endnode_nstat_tx_octets;
+ target_stat_values[++index] = nesvnic->endnode_nstat_tx_frames;
+ target_stat_values[++index] = nesvnic->nesdev->mac_tx_errors;
+ target_stat_values[++index] = mh_detected;
+ target_stat_values[++index] = mh_pauses_sent;
+ target_stat_values[++index] = nesvnic->endnode_ipv4_tcp_retransmits;
+ target_stat_values[++index] = atomic_read(&cm_connects);
+ target_stat_values[++index] = atomic_read(&cm_accepts);
+ target_stat_values[++index] = atomic_read(&cm_disconnects);
+ target_stat_values[++index] = atomic_read(&cm_connecteds);
+ target_stat_values[++index] = atomic_read(&cm_connect_reqs);
+ target_stat_values[++index] = atomic_read(&cm_rejects);
+ target_stat_values[++index] = atomic_read(&mod_qp_timouts);
+ target_stat_values[++index] = atomic_read(&qps_created);
+ target_stat_values[++index] = atomic_read(&sw_qps_destroyed);
+ target_stat_values[++index] = atomic_read(&qps_destroyed);
+ target_stat_values[++index] = atomic_read(&cm_closes);
+ target_stat_values[++index] = cm_packets_sent;
+ target_stat_values[++index] = cm_packets_bounced;
+ target_stat_values[++index] = cm_packets_created;
+ target_stat_values[++index] = cm_packets_received;
+ target_stat_values[++index] = cm_packets_dropped;
+ target_stat_values[++index] = cm_packets_retrans;
+ target_stat_values[++index] = atomic_read(&cm_listens_created);
+ target_stat_values[++index] = atomic_read(&cm_listens_destroyed);
+ target_stat_values[++index] = cm_backlog_drops;
+ target_stat_values[++index] = atomic_read(&cm_loopbacks);
+ target_stat_values[++index] = atomic_read(&cm_nodes_created);
+ target_stat_values[++index] = atomic_read(&cm_nodes_destroyed);
+ target_stat_values[++index] = atomic_read(&cm_accel_dropped_pkts);
+ target_stat_values[++index] = atomic_read(&cm_resets_recvd);
+ target_stat_values[++index] = nesadapter->free_4kpbl;
+ target_stat_values[++index] = nesadapter->free_256pbl;
+ target_stat_values[++index] = int_mod_timer_init;
+ target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated;
+ target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed;
+ target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc;
+ target_stat_values[++index] = atomic_read(&pau_qps_created);
+ target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
}
-
/**
* nes_netdev_get_drvinfo
*/
@@ -1208,12 +1315,15 @@ static void nes_netdev_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- strcpy(drvinfo->driver, DRV_NAME);
- strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev));
- strcpy(drvinfo->fw_version, "TBD");
- strcpy(drvinfo->version, DRV_VERSION);
- drvinfo->n_stats = nes_netdev_get_stats_count(netdev);
+ struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
+
+ strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev),
+ sizeof(drvinfo->bus_info));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%u.%u", nesadapter->firmware_version >> 16,
+ nesadapter->firmware_version & 0x000000ff);
+ strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
drvinfo->testinfo_len = 0;
drvinfo->eedump_len = 0;
drvinfo->regdump_len = 0;
@@ -1337,14 +1447,14 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev,
NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
nes_write_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp);
+ NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp);
nesdev->disable_tx_flow_control = 0;
} else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) {
u32temp = nes_read_indexed(nesdev,
NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
nes_write_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp);
+ NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp);
nesdev->disable_tx_flow_control = 1;
}
if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) {
@@ -1375,49 +1485,58 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 mac_index = nesdev->mac_index;
+ u8 phy_type = nesadapter->phy_type[mac_index];
+ u8 phy_index = nesadapter->phy_index[mac_index];
u16 phy_data;
et_cmd->duplex = DUPLEX_FULL;
et_cmd->port = PORT_MII;
+ et_cmd->maxtxpkt = 511;
+ et_cmd->maxrxpkt = 511;
if (nesadapter->OneG_Mode) {
- et_cmd->speed = SPEED_1000;
- if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+ ethtool_cmd_speed_set(et_cmd, SPEED_1000);
+ if (phy_type == NES_PHY_TYPE_PUMA_1G) {
et_cmd->supported = SUPPORTED_1000baseT_Full;
et_cmd->advertising = ADVERTISED_1000baseT_Full;
et_cmd->autoneg = AUTONEG_DISABLE;
et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->phy_address = nesdev->mac_index;
+ et_cmd->phy_address = mac_index;
} else {
- et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg;
- et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg;
- nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data);
+ unsigned long flags;
+ et_cmd->supported = SUPPORTED_1000baseT_Full
+ | SUPPORTED_Autoneg;
+ et_cmd->advertising = ADVERTISED_1000baseT_Full
+ | ADVERTISED_Autoneg;
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
if (phy_data & 0x1000)
et_cmd->autoneg = AUTONEG_ENABLE;
else
et_cmd->autoneg = AUTONEG_DISABLE;
et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
+ et_cmd->phy_address = phy_index;
}
+ return 0;
+ }
+ if ((phy_type == NES_PHY_TYPE_ARGUS) ||
+ (phy_type == NES_PHY_TYPE_SFP_D) ||
+ (phy_type == NES_PHY_TYPE_KR)) {
+ et_cmd->transceiver = XCVR_EXTERNAL;
+ et_cmd->port = PORT_FIBRE;
+ et_cmd->supported = SUPPORTED_FIBRE;
+ et_cmd->advertising = ADVERTISED_FIBRE;
+ et_cmd->phy_address = phy_index;
} else {
- if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) ||
- (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) {
- et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->port = PORT_FIBRE;
- et_cmd->supported = SUPPORTED_FIBRE;
- et_cmd->advertising = ADVERTISED_FIBRE;
- et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
- } else {
- et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->supported = SUPPORTED_10000baseT_Full;
- et_cmd->advertising = ADVERTISED_10000baseT_Full;
- et_cmd->phy_address = nesdev->mac_index;
- }
- et_cmd->speed = SPEED_10000;
- et_cmd->autoneg = AUTONEG_DISABLE;
+ et_cmd->transceiver = XCVR_INTERNAL;
+ et_cmd->supported = SUPPORTED_10000baseT_Full;
+ et_cmd->advertising = ADVERTISED_10000baseT_Full;
+ et_cmd->phy_address = mac_index;
}
- et_cmd->maxtxpkt = 511;
- et_cmd->maxrxpkt = 511;
+ ethtool_cmd_speed_set(et_cmd, SPEED_10000);
+ et_cmd->autoneg = AUTONEG_DISABLE;
return 0;
}
@@ -1430,12 +1549,15 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
- u16 phy_data;
if ((nesadapter->OneG_Mode) &&
(nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
- &phy_data);
+ unsigned long flags;
+ u16 phy_data;
+ u8 phy_index = nesadapter->phy_index[nesdev->mac_index];
+
+ spin_lock_irqsave(&nesadapter->phy_lock, flags);
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
if (et_cmd->autoneg) {
/* Turn on Full duplex, Autoneg, and restart autonegotiation */
phy_data |= 0x1300;
@@ -1443,55 +1565,41 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd
/* Turn off autoneg */
phy_data &= ~0x1000;
}
- nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
- phy_data);
+ nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
+ spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
}
return 0;
}
-static struct ethtool_ops nes_ethtool_ops = {
+static const struct ethtool_ops nes_ethtool_ops = {
.get_link = ethtool_op_get_link,
.get_settings = nes_netdev_get_settings,
.set_settings = nes_netdev_set_settings,
- .get_tx_csum = ethtool_op_get_tx_csum,
- .get_rx_csum = nes_netdev_get_rx_csum,
- .get_sg = ethtool_op_get_sg,
.get_strings = nes_netdev_get_strings,
- .get_stats_count = nes_netdev_get_stats_count,
+ .get_sset_count = nes_netdev_get_sset_count,
.get_ethtool_stats = nes_netdev_get_ethtool_stats,
.get_drvinfo = nes_netdev_get_drvinfo,
.get_coalesce = nes_netdev_get_coalesce,
.set_coalesce = nes_netdev_set_coalesce,
.get_pauseparam = nes_netdev_get_pauseparam,
.set_pauseparam = nes_netdev_set_pauseparam,
- .set_tx_csum = ethtool_op_set_tx_csum,
- .set_rx_csum = nes_netdev_set_rx_csum,
- .set_sg = ethtool_op_set_sg,
- .get_tso = ethtool_op_get_tso,
- .set_tso = ethtool_op_set_tso,
- .get_flags = ethtool_op_get_flags,
- .set_flags = ethtool_op_set_flags,
};
-
-static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
+static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features)
{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 u32temp;
unsigned long flags;
spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nesvnic->vlan_grp = grp;
nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name);
/* Enable/Disable VLAN Stripping */
u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG);
- if (grp)
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
u32temp &= 0xfdffffff;
else
u32temp |= 0x02000000;
@@ -1500,6 +1608,45 @@ static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_g
spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
}
+static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features)
+{
+ /*
+ * Since there is no support for separate rx/tx vlan accel
+ * enable/disable make sure tx flag is always in same state as rx.
+ */
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ features |= NETIF_F_HW_VLAN_CTAG_TX;
+ else
+ features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
+ return features;
+}
+
+static int nes_set_features(struct net_device *netdev, netdev_features_t features)
+{
+ struct nes_vnic *nesvnic = netdev_priv(netdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ u32 changed = netdev->features ^ features;
+
+ if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+ nes_vlan_mode(netdev, nesdev, features);
+
+ return 0;
+}
+
+static const struct net_device_ops nes_netdev_ops = {
+ .ndo_open = nes_netdev_open,
+ .ndo_stop = nes_netdev_stop,
+ .ndo_start_xmit = nes_netdev_start_xmit,
+ .ndo_get_stats = nes_netdev_get_stats,
+ .ndo_tx_timeout = nes_netdev_tx_timeout,
+ .ndo_set_mac_address = nes_netdev_set_mac_address,
+ .ndo_set_rx_mode = nes_netdev_set_multicast_list,
+ .ndo_change_mtu = nes_netdev_change_mtu,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_fix_features = nes_fix_features,
+ .ndo_set_features = nes_set_features,
+};
/**
* nes_netdev_init - initialize network device
@@ -1508,47 +1655,32 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
void __iomem *mmio_addr)
{
u64 u64temp;
- struct nes_vnic *nesvnic = NULL;
+ struct nes_vnic *nesvnic;
struct net_device *netdev;
struct nic_qp_map *curr_qp_map;
- u32 u32temp;
- u16 phy_data;
- u16 temp_phy_data;
+ u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index];
netdev = alloc_etherdev(sizeof(struct nes_vnic));
if (!netdev) {
printk(KERN_ERR PFX "nesvnic etherdev alloc failed");
return NULL;
}
+ nesvnic = netdev_priv(netdev);
nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name);
SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev);
- nesvnic = netdev_priv(netdev);
- memset(nesvnic, 0, sizeof(*nesvnic));
-
- netdev->open = nes_netdev_open;
- netdev->stop = nes_netdev_stop;
- netdev->hard_start_xmit = nes_netdev_start_xmit;
- netdev->get_stats = nes_netdev_get_stats;
- netdev->tx_timeout = nes_netdev_tx_timeout;
- netdev->set_mac_address = nes_netdev_set_mac_address;
- netdev->set_multicast_list = nes_netdev_set_multicast_list;
- netdev->change_mtu = nes_netdev_change_mtu;
netdev->watchdog_timeo = NES_TX_TIMEOUT;
netdev->irq = nesdev->pcidev->irq;
netdev->mtu = ETH_DATA_LEN;
netdev->hard_header_len = ETH_HLEN;
netdev->addr_len = ETH_ALEN;
netdev->type = ARPHRD_ETHER;
- netdev->features = NETIF_F_HIGHDMA;
+ netdev->netdev_ops = &nes_netdev_ops;
netdev->ethtool_ops = &nes_ethtool_ops;
netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
- netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
- netdev->vlan_rx_register = nes_netdev_vlan_rx_register;
- netdev->features |= NETIF_F_LLTX;
/* Fill in the port structure */
nesvnic->netdev = netdev;
@@ -1573,21 +1705,22 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
netdev->dev_addr[3] = (u8)(u64temp>>16);
netdev->dev_addr[4] = (u8)(u64temp>>8);
netdev->dev_addr[5] = (u8)u64temp;
- memcpy(netdev->perm_addr, netdev->dev_addr, 6);
- if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) {
- netdev->features |= NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM;
- netdev->features |= NETIF_F_GSO | NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM;
- } else {
- netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
- }
+ netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX;
+ if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV))
+ netdev->hw_features |= NETIF_F_TSO;
+
+ netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX;
+ netdev->hw_features |= NETIF_F_LRO;
nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
" nic_index = %d, logical_port = %d, mac_index = %d.\n",
nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id,
nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index);
- if (nesvnic->nesdev->nesadapter->port_count == 1) {
+ if (nesvnic->nesdev->nesadapter->port_count == 1 &&
+ nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) {
+
nesvnic->qp_nic_index[0] = nesvnic->nic_index;
nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1;
if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) {
@@ -1598,11 +1731,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3;
}
} else {
- if (nesvnic->nesdev->nesadapter->port_count == 2) {
- nesvnic->qp_nic_index[0] = nesvnic->nic_index;
- nesvnic->qp_nic_index[1] = nesvnic->nic_index + 2;
- nesvnic->qp_nic_index[2] = 0xf;
- nesvnic->qp_nic_index[3] = 0xf;
+ if (nesvnic->nesdev->nesadapter->port_count == 2 ||
+ (nesvnic->nesdev->nesadapter->port_count == 1 &&
+ nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) {
+ nesvnic->qp_nic_index[0] = nesvnic->nic_index;
+ nesvnic->qp_nic_index[1] = nesvnic->nic_index
+ + 2;
+ nesvnic->qp_nic_index[2] = 0xf;
+ nesvnic->qp_nic_index[3] = 0xf;
} else {
nesvnic->qp_nic_index[0] = nesvnic->nic_index;
nesvnic->qp_nic_index[1] = 0xf;
@@ -1618,7 +1754,10 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
nesvnic->rdma_enabled = 0;
}
nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id;
+ init_timer(&nesvnic->event_timer);
+ nesvnic->event_timer.function = NULL;
spin_lock_init(&nesvnic->tx_lock);
+ spin_lock_init(&nesvnic->port_ibevent_lock);
nesdev->netdev[nesdev->netdev_count] = netdev;
nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n",
@@ -1627,68 +1766,81 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
if ((nesdev->netdev_count == 0) &&
((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
- ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) &&
+ ((phy_type == NES_PHY_TYPE_PUMA_1G) &&
(((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
- /*
- * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
- * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
- */
+ u32 u32temp;
+ u32 link_mask = 0;
+ u32 link_val = 0;
+ u16 temp_phy_data;
+ u16 phy_data = 0;
+ unsigned long flags;
+
u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
(0x200 * (nesdev->mac_index & 1)));
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) {
+ if (phy_type != NES_PHY_TYPE_PUMA_1G) {
u32temp |= 0x00200000;
nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
(0x200 * (nesdev->mac_index & 1)), u32temp);
}
- u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)));
-
- if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
- nes_init_phy(nesdev);
- nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
- temp_phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- u32temp = 20;
- do {
- nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
- phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- if ((phy_data == temp_phy_data) || (!(--u32temp)))
- break;
- temp_phy_data = phy_data;
- } while (1);
- if (phy_data & 4) {
- nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
- nesvnic->linkup = 1;
- } else {
- nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n");
- }
+ /* Check and set linkup here. This is for back to back */
+ /* configuration where second port won't get link interrupt */
+ switch (phy_type) {
+ case NES_PHY_TYPE_PUMA_1G:
+ if (nesdev->mac_index < 2) {
+ link_mask = 0x01010000;
+ link_val = 0x01010000;
} else {
- nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
- nesvnic->linkup = 1;
+ link_mask = 0x02020000;
+ link_val = 0x02020000;
}
- } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
- nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
- nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
- if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
- ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) {
- nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+ break;
+ case NES_PHY_TYPE_SFP_D:
+ spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
+ nes_read_10G_phy_reg(nesdev,
+ nesdev->nesadapter->phy_index[nesdev->mac_index],
+ 1, 0x9003);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ nes_read_10G_phy_reg(nesdev,
+ nesdev->nesadapter->phy_index[nesdev->mac_index],
+ 3, 0x0021);
+ nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ nes_read_10G_phy_reg(nesdev,
+ nesdev->nesadapter->phy_index[nesdev->mac_index],
+ 3, 0x0021);
+ phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+ phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
+ break;
+ default:
+ link_mask = 0x0f1f0000;
+ link_val = 0x0f0f0000;
+ break;
+ }
+
+ u32temp = nes_read_indexed(nesdev,
+ NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+ (0x200 * (nesdev->mac_index & 1)));
+
+ if (phy_type == NES_PHY_TYPE_SFP_D) {
+ if (phy_data & 0x0004)
+ nesvnic->linkup = 1;
+ } else {
+ if ((u32temp & link_mask) == link_val)
nesvnic->linkup = 1;
- }
}
+
/* clear the MAC interrupt status, assumes direct logical to physical mapping */
u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp);
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS)
- nes_init_phy(nesdev);
-
+ nes_init_phy(nesdev);
}
+ nes_vlan_mode(netdev, nesdev, netdev->features);
+
return netdev;
}
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
index e64306bce80..529c421bb15 100644
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
@@ -39,8 +39,8 @@
#include <linux/types.h>
-#define NES_ABI_USERSPACE_VER 1
-#define NES_ABI_KERNEL_VER 1
+#define NES_ABI_USERSPACE_VER 2
+#define NES_ABI_KERNEL_VER 2
/*
* Make sure that all structs defined in this file remain laid out so
@@ -78,6 +78,7 @@ struct nes_create_cq_req {
struct nes_create_qp_req {
__u64 user_wqe_buffers;
+ __u64 user_qp_buffer;
};
enum iwnes_memreg_type {
@@ -86,6 +87,7 @@ enum iwnes_memreg_type {
IWNES_MEMREG_TYPE_CQ = 0x0002,
IWNES_MEMREG_TYPE_MW = 0x0003,
IWNES_MEMREG_TYPE_FMR = 0x0004,
+ IWNES_MEMREG_TYPE_FMEM = 0x0005,
};
struct nes_mem_reg_req {
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index fb8cbd71a2e..2042c0f2975 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,6 +38,7 @@
#include <linux/ethtool.h>
#include <linux/mii.h>
#include <linux/if_vlan.h>
+#include <linux/slab.h>
#include <linux/crc32.h>
#include <linux/in.h>
#include <linux/ip.h>
@@ -50,13 +51,34 @@
#include "nes.h"
-
-
static u16 nes_read16_eeprom(void __iomem *addr, u16 offset);
u32 mh_detected;
u32 mh_pauses_sent;
+static u32 nes_set_pau(struct nes_device *nesdev)
+{
+ u32 ret = 0;
+ u32 counter;
+
+ nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU);
+ nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
+
+ for (counter = 0; counter < NES_PAU_COUNTER; counter++) {
+ udelay(30);
+ if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) {
+ printk(KERN_INFO PFX "PAU is supported.\n");
+ break;
+ }
+ nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
+ }
+ if (counter == NES_PAU_COUNTER) {
+ printk(KERN_INFO PFX "PAU is not supported.\n");
+ return -EPERM;
+ }
+ return ret;
+}
+
/**
* nes_read_eeprom_values -
*/
@@ -183,9 +205,22 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada
} else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
nesadapter->virtwq = 1;
}
+ if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
+ nesadapter->send_term_ok = 1;
+
+ if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) {
+ if (!nes_set_pau(nesdev))
+ nesadapter->allow_unaligned_fpdus = 1;
+ }
+
nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
(u32)((u8)eeprom_data);
+ eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 10);
+ printk(PFX "EEPROM version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data);
+ nesadapter->eeprom_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
+ (u32)((u8)eeprom_data);
+
no_fw_rev:
/* eeprom is valid */
eeprom_offset = nesadapter->software_eeprom_offset;
@@ -377,12 +412,8 @@ static u16 nes_read16_eeprom(void __iomem *addr, u16 offset)
*/
void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data)
{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 u32temp;
u32 counter;
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
@@ -398,8 +429,6 @@ void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u1
if (!(u32temp & 1))
nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
u32temp);
-
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
}
@@ -410,14 +439,11 @@ void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u1
*/
void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data)
{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 u32temp;
u32 counter;
- unsigned long flags;
/* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n",
phy_addr, nesdev->mac_index); */
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
@@ -437,7 +463,6 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16
} else {
*data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
}
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
}
@@ -540,12 +565,15 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
if (!list_empty(&nesdev->cqp_avail_reqs)) {
spin_lock_irqsave(&nesdev->cqp.lock, flags);
- cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
+ if (!list_empty(&nesdev->cqp_avail_reqs)) {
+ cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
struct nes_cqp_request, list);
- list_del_init(&cqp_request->list);
+ list_del_init(&cqp_request->list);
+ }
spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- } else {
- cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
+ }
+ if (cqp_request == NULL) {
+ cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC);
if (cqp_request) {
cqp_request->dynamic = 1;
INIT_LIST_HEAD(&cqp_request->list);
@@ -592,6 +620,7 @@ void nes_put_cqp_request(struct nes_device *nesdev,
nes_free_cqp_request(nesdev, cqp_request);
}
+
/**
* nes_post_cqp_request
*/
@@ -602,6 +631,8 @@ void nes_post_cqp_request(struct nes_device *nesdev,
unsigned long flags;
u32 cqp_head;
u64 u64temp;
+ u32 opcode;
+ int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
spin_lock_irqsave(&nesdev->cqp.lock, flags);
@@ -612,17 +643,20 @@ void nes_post_cqp_request(struct nes_device *nesdev,
nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
+ opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
+ if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
+ ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
barrier();
u64temp = (unsigned long)cqp_request;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX,
- u64temp);
+ set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp);
nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
- " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
- " waiting = %d, refcount = %d.\n",
- le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
- le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
- nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
- cqp_request->waiting, atomic_read(&cqp_request->refcount));
+ " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
+ " waiting = %d, refcount = %d.\n",
+ opcode & NES_CQP_OPCODE_MASK,
+ le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
+ nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
+ cqp_request->waiting, atomic_read(&cqp_request->refcount));
+
barrier();
/* Ring doorbell (1 WQEs) */
@@ -643,7 +677,6 @@ void nes_post_cqp_request(struct nes_device *nesdev,
return;
}
-
/**
* nes_arp_table
*/
@@ -652,6 +685,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
struct nes_adapter *nesadapter = nesdev->nesadapter;
int arp_index;
int err = 0;
+ __be32 tmp_addr;
for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) {
if (nesadapter->arp_table[arp_index].ip_addr == ip_addr)
@@ -665,7 +699,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
arp_index = 0;
err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
- nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index);
+ nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP);
if (err) {
nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
return err;
@@ -679,9 +713,9 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
/* DELETE or RESOLVE */
if (arp_index == nesadapter->arp_table_size) {
- nes_debug(NES_DBG_NETDEV, "MAC for " NIPQUAD_FMT " not in ARP table - cannot %s\n",
- HIPQUAD(ip_addr),
- action == NES_ARP_RESOLVE ? "resolve" : "delete");
+ tmp_addr = cpu_to_be32(ip_addr);
+ nes_debug(NES_DBG_NETDEV, "MAC for %pI4 not in ARP table - cannot %s\n",
+ &tmp_addr, action == NES_ARP_RESOLVE ? "resolve" : "delete");
return -1;
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index e3939d13484..218dd357428 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -35,6 +35,7 @@
#include <linux/moduleparam.h>
#include <linux/random.h>
#include <linux/highmem.h>
+#include <linux/slab.h>
#include <asm/byteorder.h>
#include <rdma/ib_verbs.h>
@@ -54,7 +55,8 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
/**
* nes_alloc_mw
*/
-static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
+static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type)
+{
struct nes_pd *nespd = to_nespd(ibpd);
struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
struct nes_device *nesdev = nesvnic->nesdev;
@@ -70,6 +72,9 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
u32 driver_key = 0;
u8 stag_key = 0;
+ if (type != IB_MW_TYPE_1)
+ return ERR_PTR(-EINVAL);
+
get_random_bytes(&next_stag_index, sizeof(next_stag_index));
stag_key = (u8)next_stag_index;
@@ -79,7 +84,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
next_stag_index %= nesadapter->max_mr;
ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index);
+ nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW);
if (ret) {
return ERR_PTR(ret);
}
@@ -220,15 +225,15 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
if (nesqp->ibqp_state > IB_QPS_RTS)
return -EINVAL;
- spin_lock_irqsave(&nesqp->lock, flags);
+ spin_lock_irqsave(&nesqp->lock, flags);
head = nesqp->hwqp.sq_head;
qsize = nesqp->hwqp.sq_tail;
/* Check for SQ overflow */
if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return -EINVAL;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ return -ENOMEM;
}
wqe = &nesqp->hwqp.sq_vbase[head];
@@ -243,20 +248,19 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
- if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) {
+ if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
- }
- if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) {
+ if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
- }
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
+ ibmw_bind->bind_info.mr->lkey);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
- ibmw_bind->length);
+ ibmw_bind->bind_info.length);
wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
- u64temp = (u64)ibmw_bind->addr;
+ u64temp = (u64)ibmw_bind->bind_info.addr;
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
head++;
@@ -269,323 +273,242 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
nes_write32(nesdev->regs+NES_WQE_ALLOC,
(1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
- spin_unlock_irqrestore(&nesqp->lock, flags);
+ spin_unlock_irqrestore(&nesqp->lock, flags);
return 0;
}
-/**
- * nes_alloc_fmr
+/*
+ * nes_alloc_fast_mr
*/
-static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
- int ibmr_access_flags,
- struct ib_fmr_attr *ibfmr_attr)
+static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
+ u32 stag, u32 page_count)
{
- unsigned long flags;
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_fmr *nesfmr;
- struct nes_cqp_request *cqp_request;
struct nes_hw_cqp_wqe *cqp_wqe;
+ struct nes_cqp_request *cqp_request;
+ unsigned long flags;
int ret;
- u32 stag;
- u32 stag_index = 0;
- u32 next_stag_index = 0;
- u32 driver_key = 0;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
u32 opcode = 0;
- u8 stag_key = 0;
- int i=0;
- struct nes_vpbl vpbl;
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
-
- driver_key = 0;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index);
- if (ret) {
- goto failed_resource_alloc;
- }
-
- nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL);
- if (!nesfmr) {
- ret = -ENOMEM;
- goto failed_fmr_alloc;
- }
-
- nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR;
- if (ibfmr_attr->max_pages == 1) {
- /* use zero length PBL */
- nesfmr->nesmr.pbl_4k = 0;
- nesfmr->nesmr.pbls_used = 0;
- } else if (ibfmr_attr->max_pages <= 32) {
- /* use PBL 256 */
- nesfmr->nesmr.pbl_4k = 0;
- nesfmr->nesmr.pbls_used = 1;
- } else if (ibfmr_attr->max_pages <= 512) {
- /* use 4K PBLs */
- nesfmr->nesmr.pbl_4k = 1;
- nesfmr->nesmr.pbls_used = 1;
- } else {
- /* use two level 4K PBLs */
- /* add support for two level 256B PBLs */
- nesfmr->nesmr.pbl_4k = 1;
- nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) +
- ((ibfmr_attr->max_pages & 511) ? 1 : 0);
- }
- /* Register the region with the adapter */
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
- /* track PBL resources */
- if (nesfmr->nesmr.pbls_used != 0) {
- if (nesfmr->nesmr.pbl_4k) {
- if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_alloc;
- } else {
- nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used;
- }
- } else {
- if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_alloc;
- } else {
- nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used;
- }
- }
- }
-
- /* one level pbl */
- if (nesfmr->nesmr.pbls_used == 0) {
- nesfmr->root_vpbl.pbl_vbase = NULL;
- nes_debug(NES_DBG_MR, "zero level pbl \n");
- } else if (nesfmr->nesmr.pbls_used == 1) {
- /* can change it to kmalloc & dma_map_single */
- nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &nesfmr->root_vpbl.pbl_pbase);
- if (!nesfmr->root_vpbl.pbl_vbase) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_alloc;
- }
- nesfmr->leaf_pbl_cnt = 0;
- nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n",
- nesfmr->root_vpbl.pbl_vbase);
- }
- /* two level pbl */
- else {
- nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
- &nesfmr->root_vpbl.pbl_pbase);
- if (!nesfmr->root_vpbl.pbl_vbase) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_vpbl_alloc;
- }
-
- nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
- if (!nesfmr->root_vpbl.leaf_vpbl) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- ret = -ENOMEM;
- goto failed_leaf_vpbl_alloc;
- }
-
- nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1;
- nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p"
- " leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n",
- nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl);
-
- for (i=0; i<nesfmr->leaf_pbl_cnt; i++)
- nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL;
-
- for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
-
- if (!vpbl.pbl_vbase) {
- ret = -ENOMEM;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- goto failed_leaf_vpbl_pages_alloc;
- }
-
- nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
- nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
- nesfmr->root_vpbl.leaf_vpbl[i] = vpbl;
-
- nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n",
- nesfmr->root_vpbl.pbl_vbase[i].pa_low,
- nesfmr->root_vpbl.pbl_vbase[i].pa_high,
- &nesfmr->root_vpbl.leaf_vpbl[i]);
- }
- }
- nesfmr->ib_qp = NULL;
- nesfmr->access_rights =0;
+ u16 major_code;
+ u64 region_length = page_count * PAGE_SIZE;
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
cqp_request = nes_get_cqp_request(nesdev);
if (cqp_request == NULL) {
nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- ret = -ENOMEM;
- goto failed_leaf_vpbl_pages_alloc;
+ return -ENOMEM;
}
+ nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, "
+ "region_length = %llu\n",
+ page_count, region_length);
cqp_request->waiting = 1;
cqp_wqe = &cqp_request->cqp_wqe;
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
- stag, stag_index);
-
- opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
-
- if (nesfmr->nesmr.pbl_4k == 1)
- opcode |= NES_CQP_STAG_PBL_BLK_SIZE;
-
- if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) {
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE |
- NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN;
- nesfmr->access_rights |=
- NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE |
- NES_CQP_STAG_REM_ACC_EN;
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (nesadapter->free_4kpbl > 0) {
+ nesadapter->free_4kpbl--;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ } else {
+ /* No 4kpbl's available: */
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ nes_debug(NES_DBG_MR, "Out of Pbls\n");
+ nes_free_cqp_request(nesdev, cqp_request);
+ return -ENOMEM;
}
- if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) {
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ |
- NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN;
- nesfmr->access_rights |=
- NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ |
- NES_CQP_STAG_REM_ACC_EN;
- }
+ opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR |
+ NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO |
+ NES_CQP_STAG_REM_ACC_EN;
+ /*
+ * The current OFED API does not support the zero based TO option.
+ * If added then need to changed the NES_CQP_STAG_VA* option. Also,
+ * the API does not support that ability to have the MR set for local
+ * access only when created and not allow the SQ op to override. Given
+ * this the remote enable must be set here.
+ */
nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1);
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] =
- cpu_to_le32((nesfmr->nesmr.pbls_used>1) ?
- (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
+ cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
+ cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
+ cpu_to_le32(nespd->pd_id & 0x00007fff);
+
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0);
+ set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0);
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8));
+ cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
+ barrier();
atomic_set(&cqp_request->refcount, 2);
nes_post_cqp_request(nesdev, cqp_request);
/* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- stag, ret, cqp_request->major_code, cqp_request->minor_code);
-
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- ret = (!ret) ? -ETIME : -EIO;
- goto failed_leaf_vpbl_pages_alloc;
- }
+ ret = wait_event_timeout(cqp_request->waitq,
+ (0 != cqp_request->request_done),
+ NES_EVENT_TIMEOUT);
+
+ nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, "
+ "wait_event_timeout ret = %u, CQP Major:Minor codes = "
+ "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code,
+ cqp_request->minor_code);
+ major_code = cqp_request->major_code;
nes_put_cqp_request(nesdev, cqp_request);
- nesfmr->nesmr.ibfmr.lkey = stag;
- nesfmr->nesmr.ibfmr.rkey = stag;
- nesfmr->attr = *ibfmr_attr;
-
- return &nesfmr->nesmr.ibfmr;
-
- failed_leaf_vpbl_pages_alloc:
- /* unroll all allocated pages */
- for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
- if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
- nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- }
- if (nesfmr->root_vpbl.leaf_vpbl)
- kfree(nesfmr->root_vpbl.leaf_vpbl);
-
- failed_leaf_vpbl_alloc:
- if (nesfmr->leaf_pbl_cnt == 0) {
- if (nesfmr->root_vpbl.pbl_vbase)
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
- } else
- pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
-
- failed_vpbl_alloc:
- kfree(nesfmr);
- failed_fmr_alloc:
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ if (!ret || major_code) {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ nesadapter->free_4kpbl++;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
- failed_resource_alloc:
- return ERR_PTR(ret);
+ if (!ret)
+ return -ETIME;
+ else if (major_code)
+ return -EIO;
+ return 0;
}
-
-/**
- * nes_dealloc_fmr
+/*
+ * nes_alloc_fast_reg_mr
*/
-static int nes_dealloc_fmr(struct ib_fmr *ibfmr)
+static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len)
{
- struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr);
- struct nes_fmr *nesfmr = to_nesfmr(nesmr);
- struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device);
+ struct nes_pd *nespd = to_nespd(ibpd);
+ struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_mr temp_nesmr = *nesmr;
- int i = 0;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
- temp_nesmr.ibmw.device = ibfmr->device;
- temp_nesmr.ibmw.pd = ibfmr->pd;
- temp_nesmr.ibmw.rkey = ibfmr->rkey;
- temp_nesmr.ibmw.uobject = NULL;
+ u32 next_stag_index;
+ u8 stag_key = 0;
+ u32 driver_key = 0;
+ int err = 0;
+ u32 stag_index = 0;
+ struct nes_mr *nesmr;
+ u32 stag;
+ int ret;
+ struct ib_mr *ibmr;
+/*
+ * Note: Set to always use a fixed length single page entry PBL. This is to allow
+ * for the fast_reg_mr operation to always know the size of the PBL.
+ */
+ if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+ return ERR_PTR(-E2BIG);
- /* free the resources */
- if (nesfmr->leaf_pbl_cnt == 0) {
- /* single PBL case */
- if (nesfmr->root_vpbl.pbl_vbase)
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
- } else {
- for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) {
- pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
- nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- kfree(nesfmr->root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
- nesfmr->root_vpbl.pbl_pbase);
+ get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+ stag_key = (u8)next_stag_index;
+ next_stag_index >>= 8;
+ next_stag_index %= nesadapter->max_mr;
+
+ err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+ nesadapter->max_mr, &stag_index,
+ &next_stag_index, NES_RESOURCE_FAST_MR);
+ if (err)
+ return ERR_PTR(err);
+
+ nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+ if (!nesmr) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ return ERR_PTR(-ENOMEM);
}
- return nes_dealloc_mw(&temp_nesmr.ibmw);
-}
+ stag = stag_index << 8;
+ stag |= driver_key;
+ stag += (u32)stag_key;
+ nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
+ stag, stag_index);
-/**
- * nes_map_phys_fmr
+ ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len);
+
+ if (ret == 0) {
+ nesmr->ibmr.rkey = stag;
+ nesmr->ibmr.lkey = stag;
+ nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
+ ibmr = &nesmr->ibmr;
+ } else {
+ kfree(nesmr);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ }
+ return ibmr;
+}
+
+/*
+ * nes_alloc_fast_reg_page_list
*/
-static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova)
+static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
+ struct ib_device *ibdev,
+ int page_list_len)
{
- return 0;
-}
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct ib_fast_reg_page_list *pifrpl;
+ struct nes_ib_fast_reg_page_list *pnesfrpl;
+ if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+ return ERR_PTR(-E2BIG);
+ /*
+ * Allocate the ib_fast_reg_page_list structure, the
+ * nes_fast_bpl structure, and the PLB table.
+ */
+ pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
+ page_list_len * sizeof(u64), GFP_KERNEL);
+
+ if (!pnesfrpl)
+ return ERR_PTR(-ENOMEM);
-/**
- * nes_unmap_frm
+ pifrpl = &pnesfrpl->ibfrpl;
+ pifrpl->page_list = &pnesfrpl->pbl;
+ pifrpl->max_page_list_len = page_list_len;
+ /*
+ * Allocate the WQE PBL
+ */
+ pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
+ page_list_len * sizeof(u64),
+ &pnesfrpl->nes_wqe_pbl.paddr);
+
+ if (!pnesfrpl->nes_wqe_pbl.kva) {
+ kfree(pnesfrpl);
+ return ERR_PTR(-ENOMEM);
+ }
+ nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
+ "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
+ "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl,
+ pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
+ (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr);
+
+ return pifrpl;
+}
+
+/*
+ * nes_free_fast_reg_page_list
*/
-static int nes_unmap_fmr(struct list_head *ibfmr_list)
+static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
{
- return 0;
+ struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct nes_ib_fast_reg_page_list *pnesfrpl;
+
+ pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
+ /*
+ * Free the WQE PBL.
+ */
+ pci_free_consistent(nesdev->pcidev,
+ pifrpl->max_page_list_len * sizeof(u64),
+ pnesfrpl->nes_wqe_pbl.kva,
+ pnesfrpl->nes_wqe_pbl.paddr);
+ /*
+ * Free the PBL structure
+ */
+ kfree(pnesfrpl);
}
-
-
/**
* nes_query_device
*/
@@ -598,7 +521,7 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
memset(props, 0, sizeof(*props));
memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6);
- props->fw_ver = nesdev->nesadapter->fw_ver;
+ props->fw_ver = nesdev->nesadapter->firmware_version;
props->device_cap_flags = nesdev->nesadapter->device_cap_flags;
props->vendor_id = nesdev->nesadapter->vendor_id;
props->vendor_part_id = nesdev->nesadapter->vendor_part_id;
@@ -608,28 +531,28 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
props->max_sge = nesdev->nesadapter->max_sge;
props->max_cq = nesibdev->max_cq;
- props->max_cqe = nesdev->nesadapter->max_cqe - 1;
+ props->max_cqe = nesdev->nesadapter->max_cqe;
props->max_mr = nesibdev->max_mr;
props->max_mw = nesibdev->max_mr;
props->max_pd = nesibdev->max_pd;
props->max_sge_rd = 1;
switch (nesdev->nesadapter->max_irrq_wr) {
case 0:
- props->max_qp_rd_atom = 1;
+ props->max_qp_rd_atom = 2;
break;
case 1:
- props->max_qp_rd_atom = 4;
+ props->max_qp_rd_atom = 8;
break;
case 2:
- props->max_qp_rd_atom = 16;
+ props->max_qp_rd_atom = 32;
break;
case 3:
- props->max_qp_rd_atom = 32;
+ props->max_qp_rd_atom = 64;
break;
default:
props->max_qp_rd_atom = 0;
}
- props->max_qp_init_rd_atom = props->max_qp_wr;
+ props->max_qp_init_rd_atom = props->max_qp_rd_atom;
props->atomic_cap = IB_ATOMIC_NONE;
props->max_map_per_fmr = 1;
@@ -642,15 +565,34 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
*/
static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
{
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct net_device *netdev = nesvnic->netdev;
+
memset(props, 0, sizeof(*props));
- props->max_mtu = IB_MTU_2048;
- props->active_mtu = IB_MTU_2048;
+ props->max_mtu = IB_MTU_4096;
+
+ if (netdev->mtu >= 4096)
+ props->active_mtu = IB_MTU_4096;
+ else if (netdev->mtu >= 2048)
+ props->active_mtu = IB_MTU_2048;
+ else if (netdev->mtu >= 1024)
+ props->active_mtu = IB_MTU_1024;
+ else if (netdev->mtu >= 512)
+ props->active_mtu = IB_MTU_512;
+ else
+ props->active_mtu = IB_MTU_256;
+
props->lid = 1;
props->lmc = 0;
props->sm_lid = 0;
props->sm_sl = 0;
- props->state = IB_PORT_ACTIVE;
+ if (netif_queue_stopped(netdev))
+ props->state = IB_PORT_DOWN;
+ else if (nesvnic->linkup)
+ props->state = IB_PORT_ACTIVE;
+ else
+ props->state = IB_PORT_DOWN;
props->phys_state = 0;
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
@@ -658,7 +600,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
props->pkey_tbl_len = 1;
props->qkey_viol_cntr = 0;
props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
+ props->active_speed = IB_SPEED_SDR;
props->max_msg_sz = 0x80000000;
return 0;
@@ -666,16 +608,6 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
/**
- * nes_modify_port
- */
-static int nes_modify_port(struct ib_device *ibdev, u8 port,
- int port_modify_mask, struct ib_port_modify *props)
-{
- return 0;
-}
-
-
-/**
* nes_query_pkey
*/
static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
@@ -848,10 +780,10 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
- atomic_read(&nesvnic->netdev->refcnt));
+ netdev_refcnt_read(nesvnic->netdev));
err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
- nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
+ nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD);
if (err) {
return ERR_PTR(err);
}
@@ -1079,6 +1011,7 @@ static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl,
kunmap(nesqp->page);
return -ENOMEM;
}
+ nesqp->sq_kmapped = 1;
nesqp->hwqp.q2_vbase = mem;
mem += 256;
memset(nesqp->hwqp.q2_vbase, 0, 256);
@@ -1156,7 +1089,10 @@ static inline void nes_free_qp_mem(struct nes_device *nesdev,
pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
nesqp->pbl_vbase = NULL;
- kunmap(nesqp->page);
+ if (nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
+ kunmap(nesqp->page);
+ }
}
}
@@ -1224,7 +1160,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
- nesadapter->max_qp, &qp_num, &nesadapter->next_qp);
+ nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP);
if (ret) {
return ERR_PTR(ret);
}
@@ -1250,11 +1186,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
kfree(nesqp->allocated_buffer);
nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
- return NULL;
+ return ERR_PTR(-EFAULT);
}
if (req.user_wqe_buffers) {
virt_wqs = 1;
}
+ if (req.user_qp_buffer)
+ nesqp->nesuqp_addr = req.user_qp_buffer;
if ((ibpd->uobject) && (ibpd->uobject->context)) {
nesqp->user_mode = 1;
nes_ucontext = to_nesucontext(ibpd->uobject->context);
@@ -1338,8 +1276,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT);
nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size <<
NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT);
+ if (!udata) {
nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN);
nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN);
+ }
nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number +
((u32)nesqp->nesrcq->hw_cq.cq_number << 16));
u64temp = (u64)nesqp->hwqp.sq_pbase;
@@ -1381,6 +1321,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp)));
nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp))));
nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM |
+ NES_QPCONTEXT_ORDIRD_AAH |
((((u32)nesadapter->max_irrq_wr) <<
NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK));
if (disable_mpa_crc) {
@@ -1445,6 +1386,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
if (ibpd->uobject) {
uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
+ uresp.mmap_rq_db_index = 0;
uresp.actual_sq_size = sq_size;
uresp.actual_rq_size = rq_size;
uresp.qp_id = nesqp->hwqp.qp_id;
@@ -1460,24 +1402,59 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
spin_lock_init(&nesqp->lock);
- init_waitqueue_head(&nesqp->state_waitq);
- init_waitqueue_head(&nesqp->kick_waitq);
nes_add_ref(&nesqp->ibqp);
break;
default:
nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
return ERR_PTR(-EINVAL);
- break;
}
+ nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
+ init_timer(&nesqp->terminate_timer);
+ nesqp->terminate_timer.function = nes_terminate_timeout;
+ nesqp->terminate_timer.data = (unsigned long)nesqp;
+
/* update the QP table */
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
- atomic_read(&nesvnic->netdev->refcnt));
+ netdev_refcnt_read(nesvnic->netdev));
return &nesqp->ibqp;
}
+/**
+ * nes_clean_cq
+ */
+static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq)
+{
+ u32 cq_head;
+ u32 lo;
+ u32 hi;
+ u64 u64temp;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&nescq->lock, flags);
+
+ cq_head = nescq->hw_cq.cq_head;
+ while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
+ rmb();
+ lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+ hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]);
+ u64temp = (((u64)hi) << 32) | ((u64)lo);
+ u64temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+ if (u64temp == (u64)(unsigned long)nesqp) {
+ /* Zero the context value so cqe will be ignored */
+ nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0;
+ nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0;
+ }
+
+ if (++cq_head >= nescq->hw_cq.cq_size)
+ cq_head = 0;
+ }
+
+ spin_unlock_irqrestore(&nescq->lock, flags);
+}
+
/**
* nes_destroy_qp
@@ -1485,12 +1462,11 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
static int nes_destroy_qp(struct ib_qp *ibqp)
{
struct nes_qp *nesqp = to_nesqp(ibqp);
- /* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */
struct nes_ucontext *nes_ucontext;
struct ib_qp_attr attr;
struct iw_cm_id *cm_id;
struct iw_cm_event cm_event;
- int ret;
+ int ret = 0;
atomic_inc(&sw_qps_destroyed);
nesqp->destroyed = 1;
@@ -1506,7 +1482,7 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
(nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) {
cm_id = nesqp->cm_id;
cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = IW_CM_EVENT_STATUS_TIMEOUT;
+ cm_event.status = -ETIMEDOUT;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
@@ -1522,7 +1498,6 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
}
-
if (nesqp->user_mode) {
if ((ibqp->uobject)&&(ibqp->uobject->context)) {
nes_ucontext = to_nesucontext(ibqp->uobject->context);
@@ -1532,10 +1507,18 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
}
}
- if (nesqp->pbl_pbase)
+ if (nesqp->pbl_pbase && nesqp->sq_kmapped) {
+ nesqp->sq_kmapped = 0;
kunmap(nesqp->page);
- }
+ }
+ } else {
+ /* Clean any pending completions from the cq(s) */
+ if (nesqp->nesscq)
+ nes_clean_cq(nesqp, nesqp->nesscq);
+ if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
+ nes_clean_cq(nesqp, nesqp->nesrcq);
+ }
nes_rem_ref(&nesqp->ibqp);
return 0;
}
@@ -1567,8 +1550,11 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
unsigned long flags;
int ret;
+ if (entries > nesadapter->max_cqe)
+ return ERR_PTR(-EINVAL);
+
err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
- nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
+ nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ);
if (err) {
return ERR_PTR(err);
}
@@ -1596,11 +1582,12 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
nes_ucontext->mcrqf = req.mcrqf;
if (nes_ucontext->mcrqf) {
if (nes_ucontext->mcrqf & 0x80000000)
- nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 12 + (nes_ucontext->mcrqf & 0xf) - 1;
+ nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 28 + 2 * ((nes_ucontext->mcrqf & 0xf) - 1);
else if (nes_ucontext->mcrqf & 0x40000000)
nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff;
else
nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1;
+ nescq->mcrqf = nes_ucontext->mcrqf;
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
}
nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n",
@@ -1656,6 +1643,12 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
if (!context)
pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
nescq->hw_cq.cq_pbase);
+ else {
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
+ nespbl->pbl_vbase, nespbl->pbl_pbase);
+ kfree(nespbl);
+ }
+
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
kfree(nescq);
return ERR_PTR(-ENOMEM);
@@ -1674,16 +1667,16 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
/* use 4k pbl */
nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
if (nesadapter->free_4kpbl == 0) {
- if (cqp_request->dynamic) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kfree(cqp_request);
- } else {
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ nes_free_cqp_request(nesdev, cqp_request);
if (!context)
pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
nescq->hw_cq.cq_pbase);
+ else {
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
+ nespbl->pbl_vbase, nespbl->pbl_pbase);
+ kfree(nespbl);
+ }
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
kfree(nescq);
return ERR_PTR(-ENOMEM);
@@ -1696,16 +1689,16 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
/* use 256 byte pbl */
nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
if (nesadapter->free_256pbl == 0) {
- if (cqp_request->dynamic) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kfree(cqp_request);
- } else {
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ nes_free_cqp_request(nesdev, cqp_request);
if (!context)
pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
nescq->hw_cq.cq_pbase);
+ else {
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
+ nespbl->pbl_vbase, nespbl->pbl_pbase);
+ kfree(nespbl);
+ }
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
kfree(nescq);
return ERR_PTR(-ENOMEM);
@@ -1758,6 +1751,11 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
if (!context)
pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
nescq->hw_cq.cq_pbase);
+ else {
+ pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
+ nespbl->pbl_vbase, nespbl->pbl_pbase);
+ kfree(nespbl);
+ }
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
kfree(nescq);
return ERR_PTR(-EIO);
@@ -1772,7 +1770,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
resp.cq_id = nescq->hw_cq.cq_number;
resp.cq_size = nescq->hw_cq.cq_size;
resp.mmap_db_index = 0;
- if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+ if (ib_copy_to_udata(udata, &resp, sizeof resp - sizeof resp.reserved)) {
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
kfree(nescq);
return ERR_PTR(-EFAULT);
@@ -1839,7 +1837,9 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
(nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
+ if (!nescq->mcrqf)
+ nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
+
atomic_set(&cqp_request->refcount, 2);
nes_post_cqp_request(nesdev, cqp_request);
@@ -1873,21 +1873,74 @@ static int nes_destroy_cq(struct ib_cq *ib_cq)
return ret;
}
+/**
+ * root_256
+ */
+static u32 root_256(struct nes_device *nesdev,
+ struct nes_root_vpbl *root_vpbl,
+ struct nes_root_vpbl *new_root,
+ u16 pbl_count_4k)
+{
+ u64 leaf_pbl;
+ int i, j, k;
+
+ if (pbl_count_4k == 1) {
+ new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
+ 512, &new_root->pbl_pbase);
+
+ if (new_root->pbl_vbase == NULL)
+ return 0;
+
+ leaf_pbl = (u64)root_vpbl->pbl_pbase;
+ for (i = 0; i < 16; i++) {
+ new_root->pbl_vbase[i].pa_low =
+ cpu_to_le32((u32)leaf_pbl);
+ new_root->pbl_vbase[i].pa_high =
+ cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
+ leaf_pbl += 256;
+ }
+ } else {
+ for (i = 3; i >= 0; i--) {
+ j = i * 16;
+ root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i];
+ leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) +
+ (((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high))
+ << 32);
+ for (k = 1; k < 16; k++) {
+ leaf_pbl += 256;
+ root_vpbl->pbl_vbase[j + k].pa_low =
+ cpu_to_le32((u32)leaf_pbl);
+ root_vpbl->pbl_vbase[j + k].pa_high =
+ cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
+ }
+ }
+ }
+
+ return 1;
+}
+
/**
* nes_reg_mr
*/
static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl,
- dma_addr_t single_buffer, u16 pbl_count, u16 residual_page_count,
- int acc, u64 *iova_start)
+ dma_addr_t single_buffer, u16 pbl_count_4k,
+ u16 residual_page_count_4k, int acc, u64 *iova_start,
+ u16 *actual_pbl_cnt, u8 *used_4k_pbls)
{
struct nes_hw_cqp_wqe *cqp_wqe;
struct nes_cqp_request *cqp_request;
unsigned long flags;
int ret;
struct nes_adapter *nesadapter = nesdev->nesadapter;
- /* int count; */
+ uint pg_cnt = 0;
+ u16 pbl_count_256 = 0;
+ u16 pbl_count = 0;
+ u8 use_256_pbls = 0;
+ u8 use_4k_pbls = 0;
+ u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0;
+ struct nes_root_vpbl new_root = { 0, NULL, NULL };
u32 opcode = 0;
u16 major_code;
@@ -1900,56 +1953,70 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
cqp_request->waiting = 1;
cqp_wqe = &cqp_request->cqp_wqe;
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- /* track PBL resources */
- if (pbl_count != 0) {
- if (pbl_count > 1) {
- /* Two level PBL */
- if ((pbl_count+1) > nesadapter->free_4kpbl) {
- nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n");
- if (cqp_request->dynamic) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kfree(cqp_request);
- } else {
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
- return -ENOMEM;
- } else {
- nesadapter->free_4kpbl -= pbl_count+1;
- }
- } else if (residual_page_count > 32) {
- if (pbl_count > nesadapter->free_4kpbl) {
- nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n");
- if (cqp_request->dynamic) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kfree(cqp_request);
- } else {
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
- return -ENOMEM;
- } else {
- nesadapter->free_4kpbl -= pbl_count;
+ if (pbl_count_4k) {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+
+ pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k;
+ pbl_count_256 = (pg_cnt + 31) / 32;
+ if (pg_cnt <= 32) {
+ if (pbl_count_256 <= nesadapter->free_256pbl)
+ use_256_pbls = 1;
+ else if (pbl_count_4k <= nesadapter->free_4kpbl)
+ use_4k_pbls = 1;
+ } else if (pg_cnt <= 2048) {
+ if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) &&
+ (nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) {
+ use_4k_pbls = 1;
+ } else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) {
+ use_256_pbls = 1;
+ use_two_level = 1;
+ } else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
+ use_4k_pbls = 1;
}
} else {
- if (pbl_count > nesadapter->free_256pbl) {
- nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n");
- if (cqp_request->dynamic) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kfree(cqp_request);
- } else {
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
- return -ENOMEM;
- } else {
- nesadapter->free_256pbl -= pbl_count;
- }
+ if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl)
+ use_4k_pbls = 1;
+ }
+
+ if (use_256_pbls) {
+ pbl_count = pbl_count_256;
+ nesadapter->free_256pbl -= pbl_count + use_two_level;
+ } else if (use_4k_pbls) {
+ pbl_count = pbl_count_4k;
+ nesadapter->free_4kpbl -= pbl_count + use_two_level;
+ } else {
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ nes_debug(NES_DBG_MR, "Out of Pbls\n");
+ nes_free_cqp_request(nesdev, cqp_request);
+ return -ENOMEM;
}
+
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
}
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ if (use_256_pbls && use_two_level) {
+ if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k) == 1) {
+ if (new_root.pbl_pbase != 0)
+ root_vpbl = &new_root;
+ } else {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ nesadapter->free_256pbl += pbl_count_256 + use_two_level;
+ use_256_pbls = 0;
+
+ if (pbl_count_4k == 1)
+ use_two_level = 0;
+ pbl_count = pbl_count_4k;
+
+ if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
+ nesadapter->free_4kpbl -= pbl_count + use_two_level;
+ use_4k_pbls = 1;
+ }
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+
+ if (use_4k_pbls == 0)
+ return -ENOMEM;
+ }
+ }
opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ |
NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
@@ -1978,10 +2045,9 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
} else {
set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase);
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX,
- (((pbl_count - 1) * 4096) + (residual_page_count*8)));
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8));
- if ((pbl_count > 1) || (residual_page_count > 32))
+ if (use_4k_pbls)
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
}
barrier();
@@ -1998,13 +2064,25 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
major_code = cqp_request->major_code;
nes_put_cqp_request(nesdev, cqp_request);
+ if ((!ret || major_code) && pbl_count != 0) {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (use_256_pbls)
+ nesadapter->free_256pbl += pbl_count + use_two_level;
+ else if (use_4k_pbls)
+ nesadapter->free_4kpbl += pbl_count + use_two_level;
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+ if (new_root.pbl_pbase)
+ pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase,
+ new_root.pbl_pbase);
+
if (!ret)
return -ETIME;
else if (major_code)
return -EIO;
- else
- return 0;
+ *actual_pbl_cnt = pbl_count + use_two_level;
+ *used_4k_pbls = use_4k_pbls;
return 0;
}
@@ -2027,18 +2105,18 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
struct nes_root_vpbl root_vpbl;
u32 stag;
u32 i;
+ unsigned long mask;
u32 stag_index = 0;
u32 next_stag_index = 0;
u32 driver_key = 0;
u32 root_pbl_index = 0;
u32 cur_pbl_index = 0;
- int err = 0, pbl_depth = 0;
+ int err = 0;
int ret = 0;
u16 pbl_count = 0;
u8 single_page = 1;
u8 stag_key = 0;
- pbl_depth = 0;
region_length = 0;
vpbl.pbl_vbase = NULL;
root_vpbl.pbl_vbase = NULL;
@@ -2055,8 +2133,11 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-E2BIG);
}
+ if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+ return ERR_PTR(-EINVAL);
+
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
- &stag_index, &next_stag_index);
+ &stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR);
if (err) {
return ERR_PTR(err);
}
@@ -2120,19 +2201,16 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
root_pbl_index++;
cur_pbl_index = 0;
}
- if (buffer_list[i].addr & ~PAGE_MASK) {
- /* TODO: Unwind allocated buffers */
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) buffer_list[i].addr);
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_phys_err;
- }
- if (!buffer_list[i].size) {
+ mask = !buffer_list[i].size;
+ if (i != 0)
+ mask |= buffer_list[i].addr;
+ if (i != num_phys_buf - 1)
+ mask |= buffer_list[i].addr + buffer_list[i].size;
+
+ if (mask & ~PAGE_MASK) {
nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+ nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
ibmr = ERR_PTR(-EINVAL);
kfree(nesmr);
goto reg_phys_err;
@@ -2143,7 +2221,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
single_page = 0;
}
- vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr);
+ vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
vpbl.pbl_vbase[cur_pbl_index++].pa_high =
cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
}
@@ -2156,8 +2234,6 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
" length = 0x%016lX, index = 0x%08X\n",
stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
- region_length -= (*iova_start)&PAGE_MASK;
-
/* Make the leaf PBL the root if only one PBL */
if (root_pbl_index == 1) {
root_vpbl.pbl_pbase = vpbl.pbl_pbase;
@@ -2169,18 +2245,14 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
pbl_count = root_pbl_index;
}
ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
- buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start);
+ buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+ &nesmr->pbls_used, &nesmr->pbl_4k);
if (ret == 0) {
nesmr->ibmr.rkey = stag;
nesmr->ibmr.lkey = stag;
nesmr->mode = IWNES_MEMREG_TYPE_MEM;
ibmr = &nesmr->ibmr;
- nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
- nesmr->pbls_used = pbl_count;
- if (pbl_count > 1) {
- nesmr->pbls_used++;
- }
} else {
kfree(nesmr);
ibmr = ERR_PTR(-ENOMEM);
@@ -2237,7 +2309,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct ib_mr *ibmr = ERR_PTR(-EINVAL);
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct nes_ucontext *nes_ucontext;
struct nes_pbl *nespbl;
struct nes_mr *nesmr;
@@ -2245,7 +2317,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_mem_reg_req req;
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
- int nmap_index, page_index;
+ int entry, page_index;
int page_count = 0;
int err, pbl_depth = 0;
int chunk_pages;
@@ -2260,6 +2332,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u16 pbl_count;
u8 single_page = 1;
u8 stag_key;
+ int first_page = 1;
region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region)) {
@@ -2273,8 +2346,10 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
skip_pages = ((u32)region->offset) >> 12;
- if (ib_copy_from_udata(&req, udata, sizeof(req)))
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ ib_umem_release(region);
return ERR_PTR(-EFAULT);
+ }
nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type);
switch (req.reg_type) {
@@ -2294,7 +2369,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
next_stag_index %= nesadapter->max_mr;
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index);
+ nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR);
if (err) {
ib_umem_release(region);
return ERR_PTR(err);
@@ -2308,128 +2383,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nesmr->region = region;
- list_for_each_entry(chunk, &region->chunk_list, list) {
- nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n",
- chunk->nents, chunk->nmap);
- for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
- if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) sg_dma_address(&chunk->page_list[nmap_index]));
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ if (sg_dma_address(sg) & ~PAGE_MASK) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+ (unsigned int) sg_dma_address(sg));
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
- if (!sg_dma_len(&chunk->page_list[nmap_index])) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
+ if (!sg_dma_len(sg)) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
+ nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
- region_length += sg_dma_len(&chunk->page_list[nmap_index]);
- chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
- region_length -= skip_pages << 12;
- for (page_index=skip_pages; page_index < chunk_pages; page_index++) {
- skip_pages = 0;
- if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
- goto enough_pages;
- if ((page_count&0x01FF) == 0) {
- if (page_count >= 1024 * 512) {
+ region_length += sg_dma_len(sg);
+ chunk_pages = sg_dma_len(sg) >> 12;
+ region_length -= skip_pages << 12;
+ for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
+ skip_pages = 0;
+ if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+ goto enough_pages;
+ if ((page_count&0x01FF) == 0) {
+ if (page_count >= 1024 * 512) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter,
+ nesadapter->allocated_mrs, stag_index);
+ kfree(nesmr);
+ ibmr = ERR_PTR(-E2BIG);
+ goto reg_user_mr_err;
+ }
+ if (root_pbl_index == 1) {
+ root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
+ 8192, &root_vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+ root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+ if (!root_vpbl.pbl_vbase) {
ib_umem_release(region);
- nes_free_resource(nesadapter,
- nesadapter->allocated_mrs, stag_index);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
kfree(nesmr);
- ibmr = ERR_PTR(-E2BIG);
+ ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
- if (root_pbl_index == 1) {
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
- 8192, &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
- GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.pbl_vbase[0].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
- vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
+ root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
+ GFP_KERNEL);
+ if (!root_vpbl.leaf_vpbl) {
ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
+ pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+ root_vpbl.pbl_pbase);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
kfree(nesmr);
+ ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
+ root_vpbl.pbl_vbase[0].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[0].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+ root_vpbl.leaf_vpbl[0] = vpbl;
}
- if (single_page) {
- if (page_count != 0) {
- if ((last_dma_addr+4096) !=
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)))
- single_page = 0;
- last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096);
- } else {
- first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096);
- last_dma_addr = first_dma_addr;
- }
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
+ vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_user_mr_err;
}
-
- vpbl.pbl_vbase[cur_pbl_index].pa_low =
- cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)));
- vpbl.pbl_vbase[cur_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096))) >> 32)));
- cur_pbl_index++;
- page_count++;
+ if (1 <= root_pbl_index) {
+ root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+ root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+ }
+ root_pbl_index++;
+ cur_pbl_index = 0;
}
+ if (single_page) {
+ if (page_count != 0) {
+ if ((last_dma_addr+4096) !=
+ (sg_dma_address(sg)+
+ (page_index*4096)))
+ single_page = 0;
+ last_dma_addr = sg_dma_address(sg)+
+ (page_index*4096);
+ } else {
+ first_dma_addr = sg_dma_address(sg)+
+ (page_index*4096);
+ last_dma_addr = first_dma_addr;
+ }
+ }
+
+ vpbl.pbl_vbase[cur_pbl_index].pa_low =
+ cpu_to_le32((u32)(sg_dma_address(sg)+
+ (page_index*4096)));
+ vpbl.pbl_vbase[cur_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+
+ (page_index*4096))) >> 32)));
+ cur_pbl_index++;
+ page_count++;
}
}
+
enough_pages:
nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
" stag_key=0x%08x\n",
@@ -2437,9 +2509,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
stag = stag_index << 8;
stag |= driver_key;
stag += (u32)stag_key;
- if (stag == 0) {
- stag = 1;
- }
iova_start = virt;
/* Make the leaf PBL the root if only one PBL */
@@ -2458,8 +2527,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
stag, (unsigned int)iova_start,
(unsigned int)region_length, stag_index,
(unsigned long long)region->length, pbl_count);
- ret = nes_reg_mr( nesdev, nespd, stag, region->length, &root_vpbl,
- first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, &iova_start);
+ ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl,
+ first_dma_addr, pbl_count, (u16)cur_pbl_index, acc,
+ &iova_start, &nesmr->pbls_used, &nesmr->pbl_4k);
nes_debug(NES_DBG_MR, "ret=%d\n", ret);
@@ -2468,11 +2538,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nesmr->ibmr.lkey = stag;
nesmr->mode = IWNES_MEMREG_TYPE_MEM;
ibmr = &nesmr->ibmr;
- nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0;
- nesmr->pbls_used = pbl_count;
- if (pbl_count > 1) {
- nesmr->pbls_used++;
- }
} else {
ib_umem_release(region);
kfree(nesmr);
@@ -2498,9 +2563,13 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr);
return ibmr;
- break;
case IWNES_MEMREG_TYPE_QP:
case IWNES_MEMREG_TYPE_CQ:
+ if (!region->length) {
+ nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n");
+ ib_umem_release(region);
+ return ERR_PTR(-EINVAL);
+ }
nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
if (!nespbl) {
nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
@@ -2544,25 +2613,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
(void *) nespbl->pbl_vbase, nespbl->user_base);
- list_for_each_entry(chunk, &region->chunk_list, list) {
- for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
- chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
- chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0;
- nespbl->page = sg_page(&chunk->page_list[0]);
- for (page_index=0; page_index<chunk_pages; page_index++) {
- ((__le32 *)pbl)[0] = cpu_to_le32((u32)
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)));
- ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)))>>32);
- nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
- (unsigned long long)*pbl,
- le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
- pbl++;
- }
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ chunk_pages = sg_dma_len(sg) >> 12;
+ chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0;
+ if (first_page) {
+ nespbl->page = sg_page(sg);
+ first_page = 0;
+ }
+
+ for (page_index = 0; page_index < chunk_pages; page_index++) {
+ ((__le32 *)pbl)[0] = cpu_to_le32((u32)
+ (sg_dma_address(sg)+
+ (page_index*4096)));
+ ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
+ (sg_dma_address(sg)+
+ (page_index*4096)))>>32);
+ nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
+ (unsigned long long)*pbl,
+ le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
+ pbl++;
}
}
+
if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
} else {
@@ -2572,9 +2644,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nesmr->ibmr.lkey = -1;
nesmr->mode = req.reg_type;
return &nesmr->ibmr;
- break;
}
+ ib_umem_release(region);
return ERR_PTR(-ENOSYS);
}
@@ -2613,24 +2685,6 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
cqp_request->waiting = 1;
cqp_wqe = &cqp_request->cqp_wqe;
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nesmr->pbls_used != 0) {
- if (nesmr->pbl_4k) {
- nesadapter->free_4kpbl += nesmr->pbls_used;
- if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
- printk(KERN_ERR PFX "free 4KB PBLs(%u) has exceeded the max(%u)\n",
- nesadapter->free_4kpbl, nesadapter->max_4kpbl);
- }
- } else {
- nesadapter->free_256pbl += nesmr->pbls_used;
- if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
- printk(KERN_ERR PFX "free 256B PBLs(%u) has exceeded the max(%u)\n",
- nesadapter->free_256pbl, nesadapter->max_256pbl);
- }
- }
- }
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO |
@@ -2648,11 +2702,6 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
" CQP Major:Minor codes = 0x%04X:0x%04X\n",
ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- (ib_mr->rkey & 0x0fffff00) >> 8);
-
- kfree(nesmr);
-
major_code = cqp_request->major_code;
minor_code = cqp_request->minor_code;
@@ -2668,8 +2717,33 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
" to destroy STag, ib_mr=%p, rkey = 0x%08X\n",
major_code, minor_code, ib_mr, ib_mr->rkey);
return -EIO;
- } else
- return 0;
+ }
+
+ if (nesmr->pbls_used != 0) {
+ spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+ if (nesmr->pbl_4k) {
+ nesadapter->free_4kpbl += nesmr->pbls_used;
+ if (nesadapter->free_4kpbl > nesadapter->max_4kpbl)
+ printk(KERN_ERR PFX "free 4KB PBLs(%u) has "
+ "exceeded the max(%u)\n",
+ nesadapter->free_4kpbl,
+ nesadapter->max_4kpbl);
+ } else {
+ nesadapter->free_256pbl += nesmr->pbls_used;
+ if (nesadapter->free_256pbl > nesadapter->max_256pbl)
+ printk(KERN_ERR PFX "free 256B PBLs(%u) has "
+ "exceeded the max(%u)\n",
+ nesadapter->free_256pbl,
+ nesadapter->max_256pbl);
+ }
+ spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+ }
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ (ib_mr->rkey & 0x0fffff00) >> 8);
+
+ kfree(nesmr);
+
+ return 0;
}
@@ -2699,10 +2773,9 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr,
struct nes_vnic *nesvnic = nesibdev->nesvnic;
nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "%x.%x.%x\n",
- (int)(nesvnic->nesdev->nesadapter->fw_ver >> 32),
- (int)(nesvnic->nesdev->nesadapter->fw_ver >> 16) & 0xffff,
- (int)(nesvnic->nesdev->nesadapter->fw_ver & 0xffff));
+ return sprintf(buf, "%u.%u\n",
+ (nesvnic->nesdev->nesadapter->firmware_version >> 16),
+ (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
}
@@ -2755,17 +2828,16 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->cap.max_send_wr = nesqp->hwqp.sq_size;
attr->cap.max_recv_wr = nesqp->hwqp.rq_size;
attr->cap.max_recv_sge = 1;
- if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) {
- init_attr->cap.max_inline_data = 0;
- } else {
- init_attr->cap.max_inline_data = 64;
- }
+ if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA)
+ attr->cap.max_inline_data = 0;
+ else
+ attr->cap.max_inline_data = 64;
init_attr->event_handler = nesqp->ibqp.event_handler;
init_attr->qp_context = nesqp->ibqp.qp_context;
init_attr->send_cq = nesqp->ibqp.send_cq;
init_attr->recv_cq = nesqp->ibqp.recv_cq;
- init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq;
+ init_attr->srq = nesqp->ibqp.srq;
init_attr->cap = attr->cap;
return 0;
@@ -2776,7 +2848,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
* nes_hw_modify_qp
*/
int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
- u32 next_iwarp_state, u32 wait_completion)
+ u32 next_iwarp_state, u32 termlen, u32 wait_completion)
{
struct nes_hw_cqp_wqe *cqp_wqe;
/* struct iw_cm_id *cm_id = nesqp->cm_id; */
@@ -2808,6 +2880,13 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
+ /* If sending a terminate message, fill in the length (in words) */
+ if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) &&
+ !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) {
+ termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT;
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen);
+ }
+
atomic_set(&cqp_request->refcount, 2);
nes_post_cqp_request(nesdev, cqp_request);
@@ -2859,7 +2938,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int ret;
u16 original_last_aeq;
u8 issue_modify_qp = 0;
- u8 issue_disconnect = 0;
u8 dont_wait = 0;
nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u,"
@@ -2867,7 +2945,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state,
nesqp->iwarp_state, atomic_read(&nesqp->refcount));
- nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, qplockflags);
nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X,"
@@ -2882,7 +2959,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
@@ -2893,7 +2969,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
@@ -2904,14 +2979,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
if (nesqp->cm_id == NULL) {
nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n",
nesqp->hwqp.qp_id );
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS;
@@ -2929,7 +3002,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail);
if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return 0;
} else {
if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
@@ -2937,7 +3009,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" ignored due to current iWARP state\n",
nesqp->hwqp.qp_id);
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) {
@@ -2945,11 +3016,11 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" already done based on hw state.\n",
nesqp->hwqp.qp_id);
issue_modify_qp = 0;
- nesqp->in_disconnect = 0;
}
switch (nesqp->hw_iwarp_state) {
case NES_AEQE_IWARP_STATE_CLOSING:
next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
+ break;
case NES_AEQE_IWARP_STATE_TERMINATE:
next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
break;
@@ -2958,7 +3029,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
break;
default:
next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->in_disconnect = 1;
nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
break;
}
@@ -2969,24 +3039,24 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id);
if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
issue_modify_qp = 1;
- nesqp->in_disconnect = 1;
break;
case IB_QPS_ERR:
case IB_QPS_RESET:
if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) {
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
}
nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
nesqp->hwqp.qp_id);
+ if (nesqp->term_flags)
+ del_timer(&nesqp->terminate_timer);
+
next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
if (nesqp->hte_added) {
@@ -2995,9 +3065,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hte_added = 0;
}
if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
+ (nesdev->iw_status) &&
(nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
next_iwarp_state |= NES_CQP_QP_RESET;
- nesqp->in_disconnect = 1;
} else {
nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
@@ -3008,25 +3078,14 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
break;
default:
spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_rem_ref(&nesqp->ibqp);
return -EINVAL;
break;
}
nesqp->ibqp_state = attr->qp_state;
- if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) ==
- (u32)NES_CQP_QP_IWARP_STATE_RTS) &&
- ((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) >
- (u32)NES_CQP_QP_IWARP_STATE_RTS)) {
- nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
- nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
- nesqp->iwarp_state);
- issue_disconnect = 1;
- } else {
- nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
- nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
- nesqp->iwarp_state);
- }
+ nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
+ nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
+ nesqp->iwarp_state);
}
if (attr_mask & IB_QP_ACCESS_FLAGS) {
@@ -3065,7 +3124,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (issue_modify_qp) {
nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
- ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1);
+ ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1);
if (ret)
nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
" failed for QP%u.\n",
@@ -3078,9 +3137,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
- if ((!ret) ||
- ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) &&
- (ret))) {
+ if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
if (dont_wait) {
if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
@@ -3088,7 +3145,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
/* this one is for the cm_disconnect thread */
- nes_add_ref(&nesqp->ibqp);
spin_lock_irqsave(&nesqp->lock, qplockflags);
nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
@@ -3097,14 +3153,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
} else {
nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
- nes_rem_ref(&nesqp->ibqp);
}
} else {
spin_lock_irqsave(&nesqp->lock, qplockflags);
if (nesqp->cm_id) {
/* These two are for the timer thread */
if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
- nes_add_ref(&nesqp->ibqp);
nesqp->cm_id->add_ref(nesqp->cm_id);
nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
" need ae to finish up, original_last_aeq = 0x%04X."
@@ -3128,14 +3182,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
- nes_rem_ref(&nesqp->ibqp);
}
} else {
nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
- nes_rem_ref(&nesqp->ibqp);
}
err = 0;
@@ -3214,30 +3266,32 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_qp *nesqp = to_nesqp(ibqp);
struct nes_hw_qp_wqe *wqe;
- int err;
+ int err = 0;
u32 qsize = nesqp->hwqp.sq_size;
u32 head;
- u32 wqe_misc;
- u32 wqe_count;
+ u32 wqe_misc = 0;
+ u32 wqe_count = 0;
u32 counter;
- u32 total_payload_length;
-
- err = 0;
- wqe_misc = 0;
- wqe_count = 0;
- total_payload_length = 0;
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
+ if (nesqp->ibqp_state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
- spin_lock_irqsave(&nesqp->lock, flags);
+ spin_lock_irqsave(&nesqp->lock, flags);
head = nesqp->hwqp.sq_head;
while (ib_wr) {
+ /* Check for QP error */
+ if (nesqp->term_flags) {
+ err = -EINVAL;
+ break;
+ }
+
/* Check for SQ overflow */
if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- err = -EINVAL;
+ err = -ENOMEM;
break;
}
@@ -3248,94 +3302,210 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
u64temp = (u64)(ib_wr->wr_id);
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
u64temp);
- switch (ib_wr->opcode) {
- case IB_WR_SEND:
- if (ib_wr->send_flags & IB_SEND_SOLICITED) {
- wqe_misc = NES_IWARP_SQ_OP_SENDSE;
- } else {
- wqe_misc = NES_IWARP_SQ_OP_SEND;
- }
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- err = -EINVAL;
- break;
- }
- if (ib_wr->send_flags & IB_SEND_FENCE) {
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
- }
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
+ switch (ib_wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ if (IB_WR_SEND == ib_wr->opcode) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ wqe_misc = NES_IWARP_SQ_OP_SENDSE;
+ else
+ wqe_misc = NES_IWARP_SQ_OP_SEND;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ wqe_misc = NES_IWARP_SQ_OP_SENDSEINV;
+ else
+ wqe_misc = NES_IWARP_SQ_OP_SENDINV;
- break;
- case IB_WR_RDMA_WRITE:
- wqe_misc = NES_IWARP_SQ_OP_RDMAW;
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
- ib_wr->num_sge,
- nesdev->nesadapter->max_sge);
- err = -EINVAL;
- break;
- }
- if (ib_wr->send_flags & IB_SEND_FENCE) {
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
- }
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+ ib_wr->ex.invalidate_rkey);
+ }
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
-
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
- break;
- case IB_WR_RDMA_READ:
- /* iWARP only supports 1 sge for RDMA reads */
- if (ib_wr->num_sge > 1) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
- ib_wr->num_sge);
- err = -EINVAL;
- break;
- }
- wqe_misc = NES_IWARP_SQ_OP_RDMAR;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- ib_wr->wr.rdma.remote_addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- ib_wr->wr.rdma.rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
- ib_wr->sg_list->length);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
- ib_wr->sg_list->addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
- ib_wr->sg_list->lkey);
- break;
- default:
- /* error */
- err = -EINVAL;
- break;
+ if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+ err = -EINVAL;
+ break;
}
- if (ib_wr->send_flags & IB_SEND_SIGNALED) {
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+ if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+ ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+ (ib_wr->sg_list[0].length <= 64)) {
+ memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+ (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+ ib_wr->sg_list[0].length);
+ wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+ } else {
+ fill_wqe_sg_send(wqe, ib_wr, 1);
+ }
+
+ break;
+ case IB_WR_RDMA_WRITE:
+ wqe_misc = NES_IWARP_SQ_OP_RDMAW;
+ if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+ nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
+ ib_wr->num_sge, nesdev->nesadapter->max_sge);
+ err = -EINVAL;
+ break;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+ ib_wr->wr.rdma.rkey);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+ ib_wr->wr.rdma.remote_addr);
+
+ if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+ ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+ (ib_wr->sg_list[0].length <= 64)) {
+ memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+ (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+ ib_wr->sg_list[0].length);
+ wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+ } else {
+ fill_wqe_sg_send(wqe, ib_wr, 1);
+ }
+
+ wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
+ wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ /* iWARP only supports 1 sge for RDMA reads */
+ if (ib_wr->num_sge > 1) {
+ nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
+ ib_wr->num_sge);
+ err = -EINVAL;
+ break;
+ }
+ if (ib_wr->opcode == IB_WR_RDMA_READ) {
+ wqe_misc = NES_IWARP_SQ_OP_RDMAR;
+ } else {
+ wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV;
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+ ib_wr->ex.invalidate_rkey);
+ }
+
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+ ib_wr->wr.rdma.remote_addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+ ib_wr->wr.rdma.rkey);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
+ ib_wr->sg_list->length);
+ set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+ ib_wr->sg_list->addr);
+ set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
+ ib_wr->sg_list->lkey);
+ break;
+ case IB_WR_LOCAL_INV:
+ wqe_misc = NES_IWARP_SQ_OP_LOCINV;
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
+ ib_wr->ex.invalidate_rkey);
+ break;
+ case IB_WR_FAST_REG_MR:
+ {
+ int i;
+ int flags = ib_wr->wr.fast_reg.access_flags;
+ struct nes_ib_fast_reg_page_list *pnesfrpl =
+ container_of(ib_wr->wr.fast_reg.page_list,
+ struct nes_ib_fast_reg_page_list,
+ ibfrpl);
+ u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
+ u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
+
+ if (ib_wr->wr.fast_reg.page_list_len >
+ (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
+ nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
+ err = -EINVAL;
+ break;
+ }
+ wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
+ ib_wr->wr.fast_reg.iova_start);
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
+ ib_wr->wr.fast_reg.length);
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
+ ib_wr->wr.fast_reg.rkey);
+ /* Set page size: */
+ if (ib_wr->wr.fast_reg.page_shift == 12) {
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
+ } else if (ib_wr->wr.fast_reg.page_shift == 21) {
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
+ } else {
+ nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
+ " ib_wr=%u, max=1\n", ib_wr->num_sge);
+ err = -EINVAL;
+ break;
+ }
+ /* Set access_flags */
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
+ if (flags & IB_ACCESS_LOCAL_WRITE)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
+
+ if (flags & IB_ACCESS_REMOTE_WRITE)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
+
+ if (flags & IB_ACCESS_REMOTE_READ)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
+
+ if (flags & IB_ACCESS_MW_BIND)
+ wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
+
+ /* Fill in PBL info: */
+ if (ib_wr->wr.fast_reg.page_list_len >
+ pnesfrpl->ibfrpl.max_page_list_len) {
+ nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
+ " ib_wr=%p, value=%u, max=%u\n",
+ ib_wr, ib_wr->wr.fast_reg.page_list_len,
+ pnesfrpl->ibfrpl.max_page_list_len);
+ err = -EINVAL;
+ break;
+ }
+
+ set_wqe_64bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
+ pnesfrpl->nes_wqe_pbl.paddr);
+
+ set_wqe_32bit_value(wqe->wqe_words,
+ NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
+ ib_wr->wr.fast_reg.page_list_len * 8);
+
+ for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
+ dst_page_list[i] = cpu_to_le64(src_page_list[i]);
+
+ nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, "
+ "length: %d, rkey: %0x, pgl_paddr: %llx, "
+ "page_list_len: %u, wqe_misc: %x\n",
+ (unsigned long long) ib_wr->wr.fast_reg.iova_start,
+ ib_wr->wr.fast_reg.length,
+ ib_wr->wr.fast_reg.rkey,
+ (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr,
+ ib_wr->wr.fast_reg.page_list_len,
+ wqe_misc);
+ break;
+ }
+ default:
+ /* error */
+ err = -EINVAL;
+ break;
}
+
+ if (err)
+ break;
+
+ if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all)
+ wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+
wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
ib_wr = ib_wr->next;
@@ -3355,8 +3525,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
(counter << 24) | 0x00800000 | nesqp->hwqp.qp_id);
}
- spin_unlock_irqrestore(&nesqp->lock, flags);
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+out:
if (err)
*bad_wr = ib_wr;
return err;
@@ -3383,21 +3554,29 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
u32 counter;
u32 total_payload_length;
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
+ if (nesqp->ibqp_state > IB_QPS_RTS) {
+ err = -EINVAL;
+ goto out;
+ }
- spin_lock_irqsave(&nesqp->lock, flags);
+ spin_lock_irqsave(&nesqp->lock, flags);
head = nesqp->hwqp.rq_head;
while (ib_wr) {
+ /* Check for QP error */
+ if (nesqp->term_flags) {
+ err = -EINVAL;
+ break;
+ }
+
if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
err = -EINVAL;
break;
}
/* Check for RQ overflow */
if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
- err = -EINVAL;
+ err = -ENOMEM;
break;
}
@@ -3439,8 +3618,9 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id);
}
- spin_unlock_irqrestore(&nesqp->lock, flags);
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+out:
if (err)
*bad_wr = ib_wr;
return err;
@@ -3454,7 +3634,6 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
{
u64 u64temp;
u64 wrid;
- /* u64 u64temp; */
unsigned long flags = 0;
struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
struct nes_device *nesdev = nesvnic->nesdev;
@@ -3462,44 +3641,56 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
struct nes_qp *nesqp;
struct nes_hw_cqe cqe;
u32 head;
- u32 wq_tail;
+ u32 wq_tail = 0;
u32 cq_size;
u32 cqe_count = 0;
u32 wqe_index;
u32 u32temp;
- /* u32 counter; */
+ u32 move_cq_head = 1;
+ u32 err_code;
nes_debug(NES_DBG_CQ, "\n");
- spin_lock_irqsave(&nescq->lock, flags);
+ spin_lock_irqsave(&nescq->lock, flags);
head = nescq->hw_cq.cq_head;
cq_size = nescq->hw_cq.cq_size;
while (cqe_count < num_entries) {
- if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
- NES_CQE_VALID) {
- /*
- * Make sure we read CQ entry contents *after*
- * we've checked the valid bit.
- */
- rmb();
-
- cqe = nescq->hw_cq.cq_vbase[head];
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- wqe_index = u32temp &
- (nesdev->nesadapter->max_qp_wr - 1);
- u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
- /* parse CQE, get completion context from WQE (either rq or sq */
- u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
- ((u64)u32temp);
- nesqp = *((struct nes_qp **)&u64temp);
+ if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
+ NES_CQE_VALID) == 0)
+ break;
+
+ /*
+ * Make sure we read CQ entry contents *after*
+ * we've checked the valid bit.
+ */
+ rmb();
+
+ cqe = nescq->hw_cq.cq_vbase[head];
+ u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+ wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1);
+ u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+ /* parse CQE, get completion context from WQE (either rq or sq) */
+ u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+ ((u64)u32temp);
+
+ if (u64temp) {
+ nesqp = (struct nes_qp *)(unsigned long)u64temp;
memset(entry, 0, sizeof *entry);
if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
entry->status = IB_WC_SUCCESS;
} else {
- entry->status = IB_WC_WR_FLUSH_ERR;
+ err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]);
+ if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) {
+ entry->status = err_code & 0x0000ffff;
+
+ /* The rest of the cqe's will be marked as flushed */
+ nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] =
+ cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) |
+ NES_IWARP_CQE_MINOR_FLUSH);
+ } else
+ entry->status = IB_WC_WR_FLUSH_ERR;
}
entry->qp = &nesqp->ibqp;
@@ -3508,20 +3699,18 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
if (nesqp->skip_lsmm) {
nesqp->skip_lsmm = 0;
- wq_tail = nesqp->hwqp.sq_tail++;
+ nesqp->hwqp.sq_tail++;
}
/* Working on a SQ Completion*/
- wq_tail = wqe_index;
- nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
- wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+ wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
- ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+ ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
- switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
case NES_IWARP_SQ_OP_RDMAW:
nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
@@ -3530,7 +3719,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
case NES_IWARP_SQ_OP_RDMAR:
nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
entry->opcode = IB_WC_RDMA_READ;
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
break;
case NES_IWARP_SQ_OP_SENDINV:
@@ -3540,34 +3729,61 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
nes_debug(NES_DBG_CQ, "Operation = Send.\n");
entry->opcode = IB_WC_SEND;
break;
+ case NES_IWARP_SQ_OP_LOCINV:
+ entry->opcode = IB_WC_LOCAL_INV;
+ break;
+ case NES_IWARP_SQ_OP_FAST_REG:
+ entry->opcode = IB_WC_FAST_REG_MR;
+ break;
+ }
+
+ nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
+ if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) {
+ move_cq_head = 0;
+ wq_tail = nesqp->hwqp.sq_tail;
}
} else {
/* Working on a RQ Completion*/
- wq_tail = wqe_index;
- nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
- wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
- ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
+ wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
+ ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
entry->opcode = IB_WC_RECV;
+
+ nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
+ if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) {
+ move_cq_head = 0;
+ wq_tail = nesqp->hwqp.rq_tail;
+ }
}
+
entry->wr_id = wrid;
+ entry++;
+ cqe_count++;
+ }
+ if (move_cq_head) {
+ nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
if (++head >= cq_size)
head = 0;
- cqe_count++;
nescq->polled_completions++;
+
if ((nescq->polled_completions > (cq_size / 2)) ||
(nescq->polled_completions == 255)) {
nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
- " are pending %u of %u.\n",
- nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
+ " are pending %u of %u.\n",
+ nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+ nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
nescq->polled_completions = 0;
}
- entry++;
- } else
- break;
+ } else {
+ /* Update the wqe index and set status to flush */
+ wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+ wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail;
+ nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] =
+ cpu_to_le32(wqe_index);
+ move_cq_head = 1; /* ready for next pass */
+ }
}
if (nescq->polled_completions) {
@@ -3580,7 +3796,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n",
cqe_count, nescq->hw_cq.cq_number);
- spin_unlock_irqrestore(&nescq->lock, flags);
+ spin_unlock_irqrestore(&nescq->lock, flags);
return cqe_count;
}
@@ -3664,7 +3880,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
nesibdev->ibdev.query_device = nes_query_device;
nesibdev->ibdev.query_port = nes_query_port;
- nesibdev->ibdev.modify_port = nes_modify_port;
nesibdev->ibdev.query_pkey = nes_query_pkey;
nesibdev->ibdev.query_gid = nes_query_gid;
nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext;
@@ -3689,10 +3904,9 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
nesibdev->ibdev.bind_mw = nes_bind_mw;
- nesibdev->ibdev.alloc_fmr = nes_alloc_fmr;
- nesibdev->ibdev.unmap_fmr = nes_unmap_fmr;
- nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr;
- nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr;
+ nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr;
+ nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
+ nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
nesibdev->ibdev.attach_mcast = nes_multicast_attach;
nesibdev->ibdev.detach_mcast = nes_multicast_detach;
@@ -3721,6 +3935,52 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
/**
+ * nes_handle_delayed_event
+ */
+static void nes_handle_delayed_event(unsigned long data)
+{
+ struct nes_vnic *nesvnic = (void *) data;
+
+ if (nesvnic->delayed_event != nesvnic->last_dispatched_event) {
+ struct ib_event event;
+
+ event.device = &nesvnic->nesibdev->ibdev;
+ if (!event.device)
+ goto stop_timer;
+ event.event = nesvnic->delayed_event;
+ event.element.port_num = nesvnic->logical_port + 1;
+ ib_dispatch_event(&event);
+ }
+
+stop_timer:
+ nesvnic->event_timer.function = NULL;
+}
+
+
+void nes_port_ibevent(struct nes_vnic *nesvnic)
+{
+ struct nes_ib_device *nesibdev = nesvnic->nesibdev;
+ struct nes_device *nesdev = nesvnic->nesdev;
+ struct ib_event event;
+ event.device = &nesibdev->ibdev;
+ event.element.port_num = nesvnic->logical_port + 1;
+ event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+
+ if (!nesvnic->event_timer.function) {
+ ib_dispatch_event(&event);
+ nesvnic->last_dispatched_event = event.event;
+ nesvnic->event_timer.function = nes_handle_delayed_event;
+ nesvnic->event_timer.data = (unsigned long) nesvnic;
+ nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY;
+ add_timer(&nesvnic->event_timer);
+ } else {
+ mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY);
+ }
+ nesvnic->delayed_event = event.event;
+}
+
+
+/**
* nes_destroy_ofa_device
*/
void nes_destroy_ofa_device(struct nes_ib_device *nesibdev)
@@ -3745,7 +4005,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
struct nes_adapter *nesadapter = nesdev->nesadapter;
int i, ret;
- ret = ib_register_device(&nesvnic->nesibdev->ibdev);
+ ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
if (ret) {
return ret;
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 6c6b4da5184..309b31c31ae 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -40,6 +40,10 @@ struct nes_device;
#define NES_MAX_USER_DB_REGIONS 4096
#define NES_MAX_USER_WQ_REGIONS 4096
+#define NES_TERM_SENT 0x01
+#define NES_TERM_RCVD 0x02
+#define NES_TERM_DONE 0x04
+
struct nes_ucontext {
struct ib_ucontext ibucontext;
struct nes_device *nesdev;
@@ -112,12 +116,18 @@ struct nes_cq {
spinlock_t lock;
u8 virtual_cq;
u8 pad[3];
+ u32 mcrqf;
};
struct nes_wq {
spinlock_t lock;
};
+struct disconn_work {
+ struct work_struct work;
+ struct nes_qp *nesqp;
+};
+
struct iw_cm_id;
struct ietf_mpa_frame;
@@ -125,19 +135,16 @@ struct nes_qp {
struct ib_qp ibqp;
void *allocated_buffer;
struct iw_cm_id *cm_id;
- struct workqueue_struct *wq;
- struct work_struct disconn_work;
struct nes_cq *nesscq;
struct nes_cq *nesrcq;
struct nes_pd *nespd;
void *cm_node; /* handle of the node this QP is associated with */
- struct ietf_mpa_frame *ietf_frame;
+ void *ietf_frame;
+ u8 ietf_frame_size;
dma_addr_t ietf_frame_pbase;
- wait_queue_head_t state_waitq;
- unsigned long socket;
+ struct ib_mr *lsmm_mr;
struct nes_hw_qp hwqp;
struct work_struct work;
- struct work_struct ae_work;
enum ib_qp_state ibqp_state;
u32 iwarp_state;
u32 hte_index;
@@ -148,22 +155,35 @@ struct nes_qp {
u32 mmap_sq_db_index;
u32 mmap_rq_db_index;
spinlock_t lock;
+ spinlock_t pau_lock;
struct nes_qp_context *nesqp_context;
dma_addr_t nesqp_context_pbase;
void *pbl_vbase;
dma_addr_t pbl_pbase;
struct page *page;
- wait_queue_head_t kick_waitq;
- u16 in_disconnect;
+ struct timer_list terminate_timer;
+ enum ib_event_type terminate_eventtype;
+ struct sk_buff_head pau_list;
+ u32 pau_rcv_nxt;
+ u16 active_conn:1;
+ u16 skip_lsmm:1;
+ u16 user_mode:1;
+ u16 hte_added:1;
+ u16 flush_issued:1;
+ u16 destroyed:1;
+ u16 sig_all:1;
+ u16 pau_mode:1;
+ u16 rsvd:8;
u16 private_data_len;
- u8 active_conn;
- u8 skip_lsmm;
- u8 user_mode;
- u8 hte_added;
+ u16 term_sq_flush_code;
+ u16 term_rq_flush_code;
u8 hw_iwarp_state;
- u8 flush_issued;
u8 hw_tcp_state;
- u8 disconn_pending;
- u8 destroyed;
+ u8 term_flags;
+ u8 sq_kmapped;
+ u8 pau_busy;
+ u8 pau_pending;
+ u8 pau_state;
+ __u64 nesuqp_addr;
};
#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig
new file mode 100644
index 00000000000..c0cddc0192d
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/Kconfig
@@ -0,0 +1,8 @@
+config INFINIBAND_OCRDMA
+ tristate "Emulex One Connect HCA support"
+ depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n)
+ select NET_VENDOR_EMULEX
+ select BE2NET
+ ---help---
+ This driver provides low-level InfiniBand over Ethernet
+ support for Emulex One Connect host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile
new file mode 100644
index 00000000000..d1bfd4f4cdd
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/Makefile
@@ -0,0 +1,5 @@
+ccflags-y := -Idrivers/net/ethernet/emulex/benet
+
+obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o
+
+ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
new file mode 100644
index 00000000000..19011dbb930
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -0,0 +1,521 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_H__
+#define __OCRDMA_H__
+
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include <be_roce.h>
+#include "ocrdma_sli.h"
+
+#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+
+#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
+#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
+
+#define OC_NAME_SH OCRDMA_NODE_DESC "(Skyhawk)"
+#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)"
+
+#define OC_SKH_DEVICE_PF 0x720
+#define OC_SKH_DEVICE_VF 0x728
+#define OCRDMA_MAX_AH 512
+
+#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
+
+#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo)
+
+struct ocrdma_dev_attr {
+ u8 fw_ver[32];
+ u32 vendor_id;
+ u32 device_id;
+ u16 max_pd;
+ u16 max_cq;
+ u16 max_cqe;
+ u16 max_qp;
+ u16 max_wqe;
+ u16 max_rqe;
+ u16 max_srq;
+ u32 max_inline_data;
+ int max_send_sge;
+ int max_recv_sge;
+ int max_srq_sge;
+ int max_rdma_sge;
+ int max_mr;
+ u64 max_mr_size;
+ u32 max_num_mr_pbl;
+ int max_mw;
+ int max_fmr;
+ int max_map_per_fmr;
+ int max_pages_per_frmr;
+ u16 max_ord_per_qp;
+ u16 max_ird_per_qp;
+
+ int device_cap_flags;
+ u8 cq_overflow_detect;
+ u8 srq_supported;
+
+ u32 wqe_size;
+ u32 rqe_size;
+ u32 ird_page_size;
+ u8 local_ca_ack_delay;
+ u8 ird;
+ u8 num_ird_pages;
+};
+
+struct ocrdma_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+};
+
+struct ocrdma_pbl {
+ void *va;
+ dma_addr_t pa;
+};
+
+struct ocrdma_queue_info {
+ void *va;
+ dma_addr_t dma;
+ u32 size;
+ u16 len;
+ u16 entry_size; /* Size of an element in the queue */
+ u16 id; /* qid, where to ring the doorbell. */
+ u16 head, tail;
+ bool created;
+};
+
+struct ocrdma_eq {
+ struct ocrdma_queue_info q;
+ u32 vector;
+ int cq_cnt;
+ struct ocrdma_dev *dev;
+ char irq_name[32];
+};
+
+struct ocrdma_mq {
+ struct ocrdma_queue_info sq;
+ struct ocrdma_queue_info cq;
+ bool rearm_cq;
+};
+
+struct mqe_ctx {
+ struct mutex lock; /* for serializing mailbox commands on MQ */
+ wait_queue_head_t cmd_wait;
+ u32 tag;
+ u16 cqe_status;
+ u16 ext_status;
+ bool cmd_done;
+};
+
+struct ocrdma_hw_mr {
+ u32 lkey;
+ u8 fr_mr;
+ u8 remote_atomic;
+ u8 remote_rd;
+ u8 remote_wr;
+ u8 local_rd;
+ u8 local_wr;
+ u8 mw_bind;
+ u8 rsvd;
+ u64 len;
+ struct ocrdma_pbl *pbl_table;
+ u32 num_pbls;
+ u32 num_pbes;
+ u32 pbl_size;
+ u32 pbe_size;
+ u64 fbo;
+ u64 va;
+};
+
+struct ocrdma_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct ocrdma_hw_mr hwmr;
+};
+
+struct ocrdma_stats {
+ u8 type;
+ struct ocrdma_dev *dev;
+};
+
+struct stats_mem {
+ struct ocrdma_mqe mqe;
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+ char *debugfs_mem;
+};
+
+struct phy_info {
+ u16 auto_speeds_supported;
+ u16 fixed_speeds_supported;
+ u16 phy_type;
+ u16 interface_type;
+};
+
+struct ocrdma_dev {
+ struct ib_device ibdev;
+ struct ocrdma_dev_attr attr;
+
+ struct mutex dev_lock; /* provides syncronise access to device data */
+ spinlock_t flush_q_lock ____cacheline_aligned;
+
+ struct ocrdma_cq **cq_tbl;
+ struct ocrdma_qp **qp_tbl;
+
+ struct ocrdma_eq *eq_tbl;
+ int eq_cnt;
+ u16 base_eqid;
+ u16 max_eq;
+
+ union ib_gid *sgid_tbl;
+ /* provided synchronization to sgid table for
+ * updating gid entries triggered by notifier.
+ */
+ spinlock_t sgid_lock;
+
+ int gsi_qp_created;
+ struct ocrdma_cq *gsi_sqcq;
+ struct ocrdma_cq *gsi_rqcq;
+
+ struct {
+ struct ocrdma_av *va;
+ dma_addr_t pa;
+ u32 size;
+ u32 num_ah;
+ /* provide synchronization for av
+ * entry allocations.
+ */
+ spinlock_t lock;
+ u32 ahid;
+ struct ocrdma_pbl pbl;
+ } av_tbl;
+
+ void *mbx_cmd;
+ struct ocrdma_mq mq;
+ struct mqe_ctx mqe_ctx;
+
+ struct be_dev_info nic_info;
+ struct phy_info phy;
+ char model_number[32];
+ u32 hba_port_num;
+
+ struct list_head entry;
+ struct rcu_head rcu;
+ int id;
+ u64 stag_arr[OCRDMA_MAX_STAG];
+ u16 pvid;
+ u32 asic_id;
+
+ ulong last_stats_time;
+ struct mutex stats_lock; /* provide synch for debugfs operations */
+ struct stats_mem stats_mem;
+ struct ocrdma_stats rsrc_stats;
+ struct ocrdma_stats rx_stats;
+ struct ocrdma_stats wqe_stats;
+ struct ocrdma_stats tx_stats;
+ struct ocrdma_stats db_err_stats;
+ struct ocrdma_stats tx_qp_err_stats;
+ struct ocrdma_stats rx_qp_err_stats;
+ struct ocrdma_stats tx_dbg_stats;
+ struct ocrdma_stats rx_dbg_stats;
+ struct dentry *dir;
+};
+
+struct ocrdma_cq {
+ struct ib_cq ibcq;
+ struct ocrdma_cqe *va;
+ u32 phase;
+ u32 getp; /* pointer to pending wrs to
+ * return to stack, wrap arounds
+ * at max_hw_cqe
+ */
+ u32 max_hw_cqe;
+ bool phase_change;
+ bool deferred_arm, deferred_sol;
+ bool first_arm;
+
+ spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
+ * to cq polling
+ */
+ /* syncronizes cq completion handler invoked from multiple context */
+ spinlock_t comp_handler_lock ____cacheline_aligned;
+ u16 id;
+ u16 eqn;
+
+ struct ocrdma_ucontext *ucontext;
+ dma_addr_t pa;
+ u32 len;
+ u32 cqe_cnt;
+
+ /* head of all qp's sq and rq for which cqes need to be flushed
+ * by the software.
+ */
+ struct list_head sq_head, rq_head;
+};
+
+struct ocrdma_pd {
+ struct ib_pd ibpd;
+ struct ocrdma_ucontext *uctx;
+ u32 id;
+ int num_dpp_qp;
+ u32 dpp_page;
+ bool dpp_enabled;
+};
+
+struct ocrdma_ah {
+ struct ib_ah ibah;
+ struct ocrdma_av *av;
+ u16 sgid_index;
+ u32 id;
+};
+
+struct ocrdma_qp_hwq_info {
+ u8 *va; /* virtual address */
+ u32 max_sges;
+ u32 head, tail;
+ u32 entry_size;
+ u32 max_cnt;
+ u32 max_wqe_idx;
+ u16 dbid; /* qid, where to ring the doorbell. */
+ u32 len;
+ dma_addr_t pa;
+};
+
+struct ocrdma_srq {
+ struct ib_srq ibsrq;
+ u8 __iomem *db;
+ struct ocrdma_qp_hwq_info rq;
+ u64 *rqe_wr_id_tbl;
+ u32 *idx_bit_fields;
+ u32 bit_fields_len;
+
+ /* provide synchronization to multiple context(s) posting rqe */
+ spinlock_t q_lock ____cacheline_aligned;
+
+ struct ocrdma_pd *pd;
+ u32 id;
+};
+
+struct ocrdma_qp {
+ struct ib_qp ibqp;
+ struct ocrdma_dev *dev;
+
+ u8 __iomem *sq_db;
+ struct ocrdma_qp_hwq_info sq;
+ struct {
+ uint64_t wrid;
+ uint16_t dpp_wqe_idx;
+ uint16_t dpp_wqe;
+ uint8_t signaled;
+ uint8_t rsvd[3];
+ } *wqe_wr_id_tbl;
+ u32 max_inline_data;
+
+ /* provide synchronization to multiple context(s) posting wqe, rqe */
+ spinlock_t q_lock ____cacheline_aligned;
+ struct ocrdma_cq *sq_cq;
+ /* list maintained per CQ to flush SQ errors */
+ struct list_head sq_entry;
+
+ u8 __iomem *rq_db;
+ struct ocrdma_qp_hwq_info rq;
+ u64 *rqe_wr_id_tbl;
+ struct ocrdma_cq *rq_cq;
+ struct ocrdma_srq *srq;
+ /* list maintained per CQ to flush RQ errors */
+ struct list_head rq_entry;
+
+ enum ocrdma_qp_state state; /* QP state */
+ int cap_flags;
+ u32 max_ord, max_ird;
+
+ u32 id;
+ struct ocrdma_pd *pd;
+
+ enum ib_qp_type qp_type;
+
+ int sgid_idx;
+ u32 qkey;
+ bool dpp_enabled;
+ u8 *ird_q_va;
+ bool signaled;
+};
+
+struct ocrdma_ucontext {
+ struct ib_ucontext ibucontext;
+
+ struct list_head mm_head;
+ struct mutex mm_list_lock; /* protects list entries of mm type */
+ struct ocrdma_pd *cntxt_pd;
+ int pd_in_use;
+
+ struct {
+ u32 *va;
+ dma_addr_t pa;
+ u32 len;
+ } ah_tbl;
+};
+
+struct ocrdma_mm {
+ struct {
+ u64 phy_addr;
+ unsigned long len;
+ } key;
+ struct list_head entry;
+};
+
+static inline struct ocrdma_dev *get_ocrdma_dev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct ocrdma_dev, ibdev);
+}
+
+static inline struct ocrdma_ucontext *get_ocrdma_ucontext(struct ib_ucontext
+ *ibucontext)
+{
+ return container_of(ibucontext, struct ocrdma_ucontext, ibucontext);
+}
+
+static inline struct ocrdma_pd *get_ocrdma_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct ocrdma_pd, ibpd);
+}
+
+static inline struct ocrdma_cq *get_ocrdma_cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct ocrdma_cq, ibcq);
+}
+
+static inline struct ocrdma_qp *get_ocrdma_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct ocrdma_qp, ibqp);
+}
+
+static inline struct ocrdma_mr *get_ocrdma_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct ocrdma_mr, ibmr);
+}
+
+static inline struct ocrdma_ah *get_ocrdma_ah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct ocrdma_ah, ibah);
+}
+
+static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct ocrdma_srq, ibsrq);
+}
+
+static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
+{
+ int cqe_valid;
+ cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID;
+ return (cqe_valid == cq->phase);
+}
+
+static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe)
+{
+ return (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_QTYPE) ? 0 : 1;
+}
+
+static inline int is_cqe_invalidated(struct ocrdma_cqe *cqe)
+{
+ return (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_INVALIDATE) ? 1 : 0;
+}
+
+static inline int is_cqe_imm(struct ocrdma_cqe *cqe)
+{
+ return (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_IMM) ? 1 : 0;
+}
+
+static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
+{
+ return (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
+}
+
+static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
+ struct ib_ah_attr *ah_attr, u8 *mac_addr)
+{
+ struct in6_addr in6;
+
+ memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6))
+ rdma_get_mcast_mac(&in6, mac_addr);
+ else
+ memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
+ return 0;
+}
+
+static inline char *hca_name(struct ocrdma_dev *dev)
+{
+ switch (dev->nic_info.pdev->device) {
+ case OC_SKH_DEVICE_PF:
+ case OC_SKH_DEVICE_VF:
+ return OC_NAME_SH;
+ default:
+ return OC_NAME_UNKNOWN;
+ }
+}
+
+static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev,
+ int eqid)
+{
+ int indx;
+
+ for (indx = 0; indx < dev->eq_cnt; indx++) {
+ if (dev->eq_tbl[indx].q.id == eqid)
+ return indx;
+ }
+
+ return -EINVAL;
+}
+
+static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
+{
+ if (dev->nic_info.dev_family == 0xF && !dev->asic_id) {
+ pci_read_config_dword(
+ dev->nic_info.pdev,
+ OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id);
+ }
+
+ return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >>
+ OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
new file mode 100644
index 00000000000..1554cca5712
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
@@ -0,0 +1,134 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_ABI_H__
+#define __OCRDMA_ABI_H__
+
+#define OCRDMA_ABI_VERSION 2
+#define OCRDMA_BE_ROCE_ABI_VERSION 1
+/* user kernel communication data structures. */
+
+struct ocrdma_alloc_ucontext_resp {
+ u32 dev_id;
+ u32 wqe_size;
+ u32 max_inline_data;
+ u32 dpp_wqe_size;
+ u64 ah_tbl_page;
+ u32 ah_tbl_len;
+ u32 rqe_size;
+ u8 fw_ver[32];
+ /* for future use/new features in progress */
+ u64 rsvd1;
+ u64 rsvd2;
+};
+
+struct ocrdma_alloc_pd_ureq {
+ u64 rsvd1;
+};
+
+struct ocrdma_alloc_pd_uresp {
+ u32 id;
+ u32 dpp_enabled;
+ u32 dpp_page_addr_hi;
+ u32 dpp_page_addr_lo;
+ u64 rsvd1;
+};
+
+struct ocrdma_create_cq_ureq {
+ u32 dpp_cq;
+ u32 rsvd; /* pad */
+};
+
+#define MAX_CQ_PAGES 8
+struct ocrdma_create_cq_uresp {
+ u32 cq_id;
+ u32 page_size;
+ u32 num_pages;
+ u32 max_hw_cqe;
+ u64 page_addr[MAX_CQ_PAGES];
+ u64 db_page_addr;
+ u32 db_page_size;
+ u32 phase_change;
+ /* for future use/new features in progress */
+ u64 rsvd1;
+ u64 rsvd2;
+};
+
+#define MAX_QP_PAGES 8
+#define MAX_UD_AV_PAGES 8
+
+struct ocrdma_create_qp_ureq {
+ u8 enable_dpp_cq;
+ u8 rsvd;
+ u16 dpp_cq_id;
+ u32 rsvd1; /* pad */
+};
+
+struct ocrdma_create_qp_uresp {
+ u16 qp_id;
+ u16 sq_dbid;
+ u16 rq_dbid;
+ u16 resv0; /* pad */
+ u32 sq_page_size;
+ u32 rq_page_size;
+ u32 num_sq_pages;
+ u32 num_rq_pages;
+ u64 sq_page_addr[MAX_QP_PAGES];
+ u64 rq_page_addr[MAX_QP_PAGES];
+ u64 db_page_addr;
+ u32 db_page_size;
+ u32 dpp_credit;
+ u32 dpp_offset;
+ u32 num_wqe_allocated;
+ u32 num_rqe_allocated;
+ u32 db_sq_offset;
+ u32 db_rq_offset;
+ u32 db_shift;
+ u64 rsvd[11];
+} __packed;
+
+struct ocrdma_create_srq_uresp {
+ u16 rq_dbid;
+ u16 resv0; /* pad */
+ u32 resv1;
+
+ u32 rq_page_size;
+ u32 num_rq_pages;
+
+ u64 rq_page_addr[MAX_QP_PAGES];
+ u64 db_page_addr;
+
+ u32 db_page_size;
+ u32 num_rqe_allocated;
+ u32 db_rq_offset;
+ u32 db_shift;
+
+ u64 rsvd2;
+ u64 rsvd3;
+};
+
+#endif /* __OCRDMA_ABI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
new file mode 100644
index 00000000000..d4cc01f10c0
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -0,0 +1,175 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <net/neighbour.h>
+#include <net/netevent.h>
+
+#include <rdma/ib_addr.h>
+
+#include "ocrdma.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_ah.h"
+#include "ocrdma_hw.h"
+
+static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
+ struct ib_ah_attr *attr, int pdid)
+{
+ int status = 0;
+ u16 vlan_tag; bool vlan_enabled = false;
+ struct ocrdma_eth_vlan eth;
+ struct ocrdma_grh grh;
+ int eth_sz;
+
+ memset(&eth, 0, sizeof(eth));
+ memset(&grh, 0, sizeof(grh));
+
+ ah->sgid_index = attr->grh.sgid_index;
+
+ vlan_tag = attr->vlan_id;
+ if (!vlan_tag || (vlan_tag > 0xFFF))
+ vlan_tag = dev->pvid;
+ if (vlan_tag && (vlan_tag < 0x1000)) {
+ eth.eth_type = cpu_to_be16(0x8100);
+ eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+ vlan_tag |= (attr->sl & 7) << 13;
+ eth.vlan_tag = cpu_to_be16(vlan_tag);
+ eth_sz = sizeof(struct ocrdma_eth_vlan);
+ vlan_enabled = true;
+ } else {
+ eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+ eth_sz = sizeof(struct ocrdma_eth_basic);
+ }
+ memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
+ memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN);
+ status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
+ if (status)
+ return status;
+ status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
+ (union ib_gid *)&grh.sgid[0]);
+ if (status)
+ return status;
+
+ grh.tclass_flow = cpu_to_be32((6 << 28) |
+ (attr->grh.traffic_class << 24) |
+ attr->grh.flow_label);
+ /* 0x1b is next header value in GRH */
+ grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
+ (0x1b << 8) | attr->grh.hop_limit);
+
+ memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
+ memcpy(&ah->av->eth_hdr, &eth, eth_sz);
+ memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
+ if (vlan_enabled)
+ ah->av->valid |= OCRDMA_AV_VLAN_VALID;
+ ah->av->valid = cpu_to_le32(ah->av->valid);
+ return status;
+}
+
+struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+{
+ u32 *ahid_addr;
+ int status;
+ struct ocrdma_ah *ah;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+
+ if (!(attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
+
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ status = ocrdma_alloc_av(dev, ah);
+ if (status)
+ goto av_err;
+ status = set_av_attr(dev, ah, attr, pd->id);
+ if (status)
+ goto av_conf_err;
+
+ /* if pd is for the user process, pass the ah_id to user space */
+ if ((pd->uctx) && (pd->uctx->ah_tbl.va)) {
+ ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
+ *ahid_addr = ah->id;
+ }
+ return &ah->ibah;
+
+av_conf_err:
+ ocrdma_free_av(dev, ah);
+av_err:
+ kfree(ah);
+ return ERR_PTR(status);
+}
+
+int ocrdma_destroy_ah(struct ib_ah *ibah)
+{
+ struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
+
+ ocrdma_free_av(dev, ah);
+ kfree(ah);
+ return 0;
+}
+
+int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+ struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
+ struct ocrdma_av *av = ah->av;
+ struct ocrdma_grh *grh;
+ attr->ah_flags |= IB_AH_GRH;
+ if (ah->av->valid & Bit(1)) {
+ grh = (struct ocrdma_grh *)((u8 *)ah->av +
+ sizeof(struct ocrdma_eth_vlan));
+ attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
+ } else {
+ grh = (struct ocrdma_grh *)((u8 *)ah->av +
+ sizeof(struct ocrdma_eth_basic));
+ attr->sl = 0;
+ }
+ memcpy(&attr->grh.dgid.raw[0], &grh->dgid[0], sizeof(grh->dgid));
+ attr->grh.sgid_index = ah->sgid_index;
+ attr->grh.hop_limit = be32_to_cpu(grh->pdid_hoplimit) & 0xff;
+ attr->grh.traffic_class = be32_to_cpu(grh->tclass_flow) >> 24;
+ attr->grh.flow_label = be32_to_cpu(grh->tclass_flow) & 0x00ffffffff;
+ return 0;
+}
+
+int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+{
+ /* modify_ah is unsupported */
+ return -ENOSYS;
+}
+
+int ocrdma_process_mad(struct ib_device *ibdev,
+ int process_mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ return IB_MAD_RESULT_SUCCESS;
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
new file mode 100644
index 00000000000..8ac49e7f96d
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -0,0 +1,42 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_AH_H__
+#define __OCRDMA_AH_H__
+
+struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
+int ocrdma_destroy_ah(struct ib_ah *);
+int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
+int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
+
+int ocrdma_process_mad(struct ib_device *,
+ int process_mad_flags,
+ u8 port_num,
+ struct ib_wc *in_wc,
+ struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+#endif /* __OCRDMA_AH_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
new file mode 100644
index 00000000000..3bbf2010a82
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -0,0 +1,2737 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) CNA Adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/log2.h>
+#include <linux/dma-mapping.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_ah.h"
+
+enum mbx_status {
+ OCRDMA_MBX_STATUS_FAILED = 1,
+ OCRDMA_MBX_STATUS_ILLEGAL_FIELD = 3,
+ OCRDMA_MBX_STATUS_OOR = 100,
+ OCRDMA_MBX_STATUS_INVALID_PD = 101,
+ OCRDMA_MBX_STATUS_PD_INUSE = 102,
+ OCRDMA_MBX_STATUS_INVALID_CQ = 103,
+ OCRDMA_MBX_STATUS_INVALID_QP = 104,
+ OCRDMA_MBX_STATUS_INVALID_LKEY = 105,
+ OCRDMA_MBX_STATUS_ORD_EXCEEDS = 106,
+ OCRDMA_MBX_STATUS_IRD_EXCEEDS = 107,
+ OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS = 108,
+ OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS = 109,
+ OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS = 110,
+ OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS = 111,
+ OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS = 112,
+ OCRDMA_MBX_STATUS_INVALID_STATE_CHANGE = 113,
+ OCRDMA_MBX_STATUS_MW_BOUND = 114,
+ OCRDMA_MBX_STATUS_INVALID_VA = 115,
+ OCRDMA_MBX_STATUS_INVALID_LENGTH = 116,
+ OCRDMA_MBX_STATUS_INVALID_FBO = 117,
+ OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS = 118,
+ OCRDMA_MBX_STATUS_INVALID_PBE_SIZE = 119,
+ OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY = 120,
+ OCRDMA_MBX_STATUS_INVALID_PBL_SHIFT = 121,
+ OCRDMA_MBX_STATUS_INVALID_SRQ_ID = 129,
+ OCRDMA_MBX_STATUS_SRQ_ERROR = 133,
+ OCRDMA_MBX_STATUS_RQE_EXCEEDS = 134,
+ OCRDMA_MBX_STATUS_MTU_EXCEEDS = 135,
+ OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS = 136,
+ OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS = 137,
+ OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS = 138,
+ OCRDMA_MBX_STATUS_QP_BOUND = 130,
+ OCRDMA_MBX_STATUS_INVALID_CHANGE = 139,
+ OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP = 140,
+ OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER = 141,
+ OCRDMA_MBX_STATUS_MW_STILL_BOUND = 142,
+ OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID = 143,
+ OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS = 144
+};
+
+enum additional_status {
+ OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES = 22
+};
+
+enum cqe_status {
+ OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES = 1,
+ OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER = 2,
+ OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES = 3,
+ OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING = 4,
+ OCRDMA_MBX_CQE_STATUS_DMA_FAILED = 5
+};
+
+static inline void *ocrdma_get_eqe(struct ocrdma_eq *eq)
+{
+ return eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe));
+}
+
+static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq)
+{
+ eq->q.tail = (eq->q.tail + 1) & (OCRDMA_EQ_LEN - 1);
+}
+
+static inline void *ocrdma_get_mcqe(struct ocrdma_dev *dev)
+{
+ struct ocrdma_mcqe *cqe = (struct ocrdma_mcqe *)
+ (dev->mq.cq.va + (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe)));
+
+ if (!(le32_to_cpu(cqe->valid_ae_cmpl_cons) & OCRDMA_MCQE_VALID_MASK))
+ return NULL;
+ return cqe;
+}
+
+static inline void ocrdma_mcq_inc_tail(struct ocrdma_dev *dev)
+{
+ dev->mq.cq.tail = (dev->mq.cq.tail + 1) & (OCRDMA_MQ_CQ_LEN - 1);
+}
+
+static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
+{
+ return dev->mq.sq.va + (dev->mq.sq.head * sizeof(struct ocrdma_mqe));
+}
+
+static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
+{
+ dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1);
+}
+
+static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
+{
+ return dev->mq.sq.va + (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe));
+}
+
+enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
+{
+ switch (qps) {
+ case OCRDMA_QPS_RST:
+ return IB_QPS_RESET;
+ case OCRDMA_QPS_INIT:
+ return IB_QPS_INIT;
+ case OCRDMA_QPS_RTR:
+ return IB_QPS_RTR;
+ case OCRDMA_QPS_RTS:
+ return IB_QPS_RTS;
+ case OCRDMA_QPS_SQD:
+ case OCRDMA_QPS_SQ_DRAINING:
+ return IB_QPS_SQD;
+ case OCRDMA_QPS_SQE:
+ return IB_QPS_SQE;
+ case OCRDMA_QPS_ERR:
+ return IB_QPS_ERR;
+ }
+ return IB_QPS_ERR;
+}
+
+static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps)
+{
+ switch (qps) {
+ case IB_QPS_RESET:
+ return OCRDMA_QPS_RST;
+ case IB_QPS_INIT:
+ return OCRDMA_QPS_INIT;
+ case IB_QPS_RTR:
+ return OCRDMA_QPS_RTR;
+ case IB_QPS_RTS:
+ return OCRDMA_QPS_RTS;
+ case IB_QPS_SQD:
+ return OCRDMA_QPS_SQD;
+ case IB_QPS_SQE:
+ return OCRDMA_QPS_SQE;
+ case IB_QPS_ERR:
+ return OCRDMA_QPS_ERR;
+ }
+ return OCRDMA_QPS_ERR;
+}
+
+static int ocrdma_get_mbx_errno(u32 status)
+{
+ int err_num;
+ u8 mbox_status = (status & OCRDMA_MBX_RSP_STATUS_MASK) >>
+ OCRDMA_MBX_RSP_STATUS_SHIFT;
+ u8 add_status = (status & OCRDMA_MBX_RSP_ASTATUS_MASK) >>
+ OCRDMA_MBX_RSP_ASTATUS_SHIFT;
+
+ switch (mbox_status) {
+ case OCRDMA_MBX_STATUS_OOR:
+ case OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS:
+ err_num = -EAGAIN;
+ break;
+
+ case OCRDMA_MBX_STATUS_INVALID_PD:
+ case OCRDMA_MBX_STATUS_INVALID_CQ:
+ case OCRDMA_MBX_STATUS_INVALID_SRQ_ID:
+ case OCRDMA_MBX_STATUS_INVALID_QP:
+ case OCRDMA_MBX_STATUS_INVALID_CHANGE:
+ case OCRDMA_MBX_STATUS_MTU_EXCEEDS:
+ case OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER:
+ case OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID:
+ case OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS:
+ case OCRDMA_MBX_STATUS_ILLEGAL_FIELD:
+ case OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY:
+ case OCRDMA_MBX_STATUS_INVALID_LKEY:
+ case OCRDMA_MBX_STATUS_INVALID_VA:
+ case OCRDMA_MBX_STATUS_INVALID_LENGTH:
+ case OCRDMA_MBX_STATUS_INVALID_FBO:
+ case OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS:
+ case OCRDMA_MBX_STATUS_INVALID_PBE_SIZE:
+ case OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP:
+ case OCRDMA_MBX_STATUS_SRQ_ERROR:
+ case OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS:
+ err_num = -EINVAL;
+ break;
+
+ case OCRDMA_MBX_STATUS_PD_INUSE:
+ case OCRDMA_MBX_STATUS_QP_BOUND:
+ case OCRDMA_MBX_STATUS_MW_STILL_BOUND:
+ case OCRDMA_MBX_STATUS_MW_BOUND:
+ err_num = -EBUSY;
+ break;
+
+ case OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS:
+ case OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS:
+ case OCRDMA_MBX_STATUS_RQE_EXCEEDS:
+ case OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS:
+ case OCRDMA_MBX_STATUS_ORD_EXCEEDS:
+ case OCRDMA_MBX_STATUS_IRD_EXCEEDS:
+ case OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS:
+ case OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS:
+ case OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS:
+ err_num = -ENOBUFS;
+ break;
+
+ case OCRDMA_MBX_STATUS_FAILED:
+ switch (add_status) {
+ case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES:
+ err_num = -EAGAIN;
+ break;
+ }
+ default:
+ err_num = -EFAULT;
+ }
+ return err_num;
+}
+
+char *port_speed_string(struct ocrdma_dev *dev)
+{
+ char *str = "";
+ u16 speeds_supported;
+
+ speeds_supported = dev->phy.fixed_speeds_supported |
+ dev->phy.auto_speeds_supported;
+ if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS)
+ str = "40Gbps ";
+ else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS)
+ str = "10Gbps ";
+ else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS)
+ str = "1Gbps ";
+
+ return str;
+}
+
+static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
+{
+ int err_num = -EINVAL;
+
+ switch (cqe_status) {
+ case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES:
+ err_num = -EPERM;
+ break;
+ case OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER:
+ err_num = -EINVAL;
+ break;
+ case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES:
+ case OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING:
+ err_num = -EINVAL;
+ break;
+ case OCRDMA_MBX_CQE_STATUS_DMA_FAILED:
+ default:
+ err_num = -EINVAL;
+ break;
+ }
+ return err_num;
+}
+
+void ocrdma_ring_cq_db(struct ocrdma_dev *dev, u16 cq_id, bool armed,
+ bool solicited, u16 cqe_popped)
+{
+ u32 val = cq_id & OCRDMA_DB_CQ_RING_ID_MASK;
+
+ val |= ((cq_id & OCRDMA_DB_CQ_RING_ID_EXT_MASK) <<
+ OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT);
+
+ if (armed)
+ val |= (1 << OCRDMA_DB_CQ_REARM_SHIFT);
+ if (solicited)
+ val |= (1 << OCRDMA_DB_CQ_SOLICIT_SHIFT);
+ val |= (cqe_popped << OCRDMA_DB_CQ_NUM_POPPED_SHIFT);
+ iowrite32(val, dev->nic_info.db + OCRDMA_DB_CQ_OFFSET);
+}
+
+static void ocrdma_ring_mq_db(struct ocrdma_dev *dev)
+{
+ u32 val = 0;
+
+ val |= dev->mq.sq.id & OCRDMA_MQ_ID_MASK;
+ val |= 1 << OCRDMA_MQ_NUM_MQE_SHIFT;
+ iowrite32(val, dev->nic_info.db + OCRDMA_DB_MQ_OFFSET);
+}
+
+static void ocrdma_ring_eq_db(struct ocrdma_dev *dev, u16 eq_id,
+ bool arm, bool clear_int, u16 num_eqe)
+{
+ u32 val = 0;
+
+ val |= eq_id & OCRDMA_EQ_ID_MASK;
+ val |= ((eq_id & OCRDMA_EQ_ID_EXT_MASK) << OCRDMA_EQ_ID_EXT_MASK_SHIFT);
+ if (arm)
+ val |= (1 << OCRDMA_REARM_SHIFT);
+ if (clear_int)
+ val |= (1 << OCRDMA_EQ_CLR_SHIFT);
+ val |= (1 << OCRDMA_EQ_TYPE_SHIFT);
+ val |= (num_eqe << OCRDMA_NUM_EQE_SHIFT);
+ iowrite32(val, dev->nic_info.db + OCRDMA_DB_EQ_OFFSET);
+}
+
+static void ocrdma_init_mch(struct ocrdma_mbx_hdr *cmd_hdr,
+ u8 opcode, u8 subsys, u32 cmd_len)
+{
+ cmd_hdr->subsys_op = (opcode | (subsys << OCRDMA_MCH_SUBSYS_SHIFT));
+ cmd_hdr->timeout = 20; /* seconds */
+ cmd_hdr->cmd_len = cmd_len - sizeof(struct ocrdma_mbx_hdr);
+}
+
+static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
+{
+ struct ocrdma_mqe *mqe;
+
+ mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
+ if (!mqe)
+ return NULL;
+ mqe->hdr.spcl_sge_cnt_emb |=
+ (OCRDMA_MQE_EMBEDDED << OCRDMA_MQE_HDR_EMB_SHIFT) &
+ OCRDMA_MQE_HDR_EMB_MASK;
+ mqe->hdr.pyld_len = cmd_len - sizeof(struct ocrdma_mqe_hdr);
+
+ ocrdma_init_mch(&mqe->u.emb_req.mch, opcode, OCRDMA_SUBSYS_ROCE,
+ mqe->hdr.pyld_len);
+ return mqe;
+}
+
+static void *ocrdma_alloc_mqe(void)
+{
+ return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
+}
+
+static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
+{
+ dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
+}
+
+static int ocrdma_alloc_q(struct ocrdma_dev *dev,
+ struct ocrdma_queue_info *q, u16 len, u16 entry_size)
+{
+ memset(q, 0, sizeof(*q));
+ q->len = len;
+ q->entry_size = entry_size;
+ q->size = len * entry_size;
+ q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size,
+ &q->dma, GFP_KERNEL);
+ if (!q->va)
+ return -ENOMEM;
+ memset(q->va, 0, q->size);
+ return 0;
+}
+
+static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
+ dma_addr_t host_pa, int hw_page_size)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ q_pa[i].lo = (u32) (host_pa & 0xffffffff);
+ q_pa[i].hi = (u32) upper_32_bits(host_pa);
+ host_pa += hw_page_size;
+ }
+}
+
+static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev,
+ struct ocrdma_queue_info *q, int queue_type)
+{
+ u8 opcode = 0;
+ int status;
+ struct ocrdma_delete_q_req *cmd = dev->mbx_cmd;
+
+ switch (queue_type) {
+ case QTYPE_MCCQ:
+ opcode = OCRDMA_CMD_DELETE_MQ;
+ break;
+ case QTYPE_CQ:
+ opcode = OCRDMA_CMD_DELETE_CQ;
+ break;
+ case QTYPE_EQ:
+ opcode = OCRDMA_CMD_DELETE_EQ;
+ break;
+ default:
+ BUG();
+ }
+ memset(cmd, 0, sizeof(*cmd));
+ ocrdma_init_mch(&cmd->req, opcode, OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+ cmd->id = q->id;
+
+ status = be_roce_mcc_cmd(dev->nic_info.netdev,
+ cmd, sizeof(*cmd), NULL, NULL);
+ if (!status)
+ q->created = false;
+ return status;
+}
+
+static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+ int status;
+ struct ocrdma_create_eq_req *cmd = dev->mbx_cmd;
+ struct ocrdma_create_eq_rsp *rsp = dev->mbx_cmd;
+
+ memset(cmd, 0, sizeof(*cmd));
+ ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_EQ, OCRDMA_SUBSYS_COMMON,
+ sizeof(*cmd));
+
+ cmd->req.rsvd_version = 2;
+ cmd->num_pages = 4;
+ cmd->valid = OCRDMA_CREATE_EQ_VALID;
+ cmd->cnt = 4 << OCRDMA_CREATE_EQ_CNT_SHIFT;
+
+ ocrdma_build_q_pages(&cmd->pa[0], cmd->num_pages, eq->q.dma,
+ PAGE_SIZE_4K);
+ status = be_roce_mcc_cmd(dev->nic_info.netdev, cmd, sizeof(*cmd), NULL,
+ NULL);
+ if (!status) {
+ eq->q.id = rsp->vector_eqid & 0xffff;
+ eq->vector = (rsp->vector_eqid >> 16) & 0xffff;
+ eq->q.created = true;
+ }
+ return status;
+}
+
+static int ocrdma_create_eq(struct ocrdma_dev *dev,
+ struct ocrdma_eq *eq, u16 q_len)
+{
+ int status;
+
+ status = ocrdma_alloc_q(dev, &eq->q, OCRDMA_EQ_LEN,
+ sizeof(struct ocrdma_eqe));
+ if (status)
+ return status;
+
+ status = ocrdma_mbx_create_eq(dev, eq);
+ if (status)
+ goto mbx_err;
+ eq->dev = dev;
+ ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+
+ return 0;
+mbx_err:
+ ocrdma_free_q(dev, &eq->q);
+ return status;
+}
+
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+ int irq;
+
+ if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
+ irq = dev->nic_info.pdev->irq;
+ else
+ irq = dev->nic_info.msix.vector_list[eq->vector];
+ return irq;
+}
+
+static void _ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+ if (eq->q.created) {
+ ocrdma_mbx_delete_q(dev, &eq->q, QTYPE_EQ);
+ ocrdma_free_q(dev, &eq->q);
+ }
+}
+
+static void ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+{
+ int irq;
+
+ /* disarm EQ so that interrupts are not generated
+ * during freeing and EQ delete is in progress.
+ */
+ ocrdma_ring_eq_db(dev, eq->q.id, false, false, 0);
+
+ irq = ocrdma_get_irq(dev, eq);
+ free_irq(irq, eq);
+ _ocrdma_destroy_eq(dev, eq);
+}
+
+static void ocrdma_destroy_eqs(struct ocrdma_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->eq_cnt; i++)
+ ocrdma_destroy_eq(dev, &dev->eq_tbl[i]);
+}
+
+static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
+ struct ocrdma_queue_info *cq,
+ struct ocrdma_queue_info *eq)
+{
+ struct ocrdma_create_cq_cmd *cmd = dev->mbx_cmd;
+ struct ocrdma_create_cq_cmd_rsp *rsp = dev->mbx_cmd;
+ int status;
+
+ memset(cmd, 0, sizeof(*cmd));
+ ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_CQ,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+ cmd->req.rsvd_version = OCRDMA_CREATE_CQ_VER2;
+ cmd->pgsz_pgcnt = (cq->size / OCRDMA_MIN_Q_PAGE_SIZE) <<
+ OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
+ cmd->pgsz_pgcnt |= PAGES_4K_SPANNED(cq->va, cq->size);
+
+ cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
+ cmd->eqn = eq->id;
+ cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe);
+
+ ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
+ cq->dma, PAGE_SIZE_4K);
+ status = be_roce_mcc_cmd(dev->nic_info.netdev,
+ cmd, sizeof(*cmd), NULL, NULL);
+ if (!status) {
+ cq->id = (u16) (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
+ cq->created = true;
+ }
+ return status;
+}
+
+static u32 ocrdma_encoded_q_len(int q_len)
+{
+ u32 len_encoded = fls(q_len); /* log2(len) + 1 */
+
+ if (len_encoded == 16)
+ len_encoded = 0;
+ return len_encoded;
+}
+
+static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
+ struct ocrdma_queue_info *mq,
+ struct ocrdma_queue_info *cq)
+{
+ int num_pages, status;
+ struct ocrdma_create_mq_req *cmd = dev->mbx_cmd;
+ struct ocrdma_create_mq_rsp *rsp = dev->mbx_cmd;
+ struct ocrdma_pa *pa;
+
+ memset(cmd, 0, sizeof(*cmd));
+ num_pages = PAGES_4K_SPANNED(mq->va, mq->size);
+
+ ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+ cmd->req.rsvd_version = 1;
+ cmd->cqid_pages = num_pages;
+ cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
+ cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
+
+ cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE);
+ cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE);
+
+ cmd->async_cqid_ringsize = cq->id;
+ cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
+ OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
+ cmd->valid = OCRDMA_CREATE_MQ_VALID;
+ pa = &cmd->pa[0];
+
+ ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K);
+ status = be_roce_mcc_cmd(dev->nic_info.netdev,
+ cmd, sizeof(*cmd), NULL, NULL);
+ if (!status) {
+ mq->id = rsp->id;
+ mq->created = true;
+ }
+ return status;
+}
+
+static int ocrdma_create_mq(struct ocrdma_dev *dev)
+{
+ int status;
+
+ /* Alloc completion queue for Mailbox queue */
+ status = ocrdma_alloc_q(dev, &dev->mq.cq, OCRDMA_MQ_CQ_LEN,
+ sizeof(struct ocrdma_mcqe));
+ if (status)
+ goto alloc_err;
+
+ dev->eq_tbl[0].cq_cnt++;
+ status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q);
+ if (status)
+ goto mbx_cq_free;
+
+ memset(&dev->mqe_ctx, 0, sizeof(dev->mqe_ctx));
+ init_waitqueue_head(&dev->mqe_ctx.cmd_wait);
+ mutex_init(&dev->mqe_ctx.lock);
+
+ /* Alloc Mailbox queue */
+ status = ocrdma_alloc_q(dev, &dev->mq.sq, OCRDMA_MQ_LEN,
+ sizeof(struct ocrdma_mqe));
+ if (status)
+ goto mbx_cq_destroy;
+ status = ocrdma_mbx_create_mq(dev, &dev->mq.sq, &dev->mq.cq);
+ if (status)
+ goto mbx_q_free;
+ ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, 0);
+ return 0;
+
+mbx_q_free:
+ ocrdma_free_q(dev, &dev->mq.sq);
+mbx_cq_destroy:
+ ocrdma_mbx_delete_q(dev, &dev->mq.cq, QTYPE_CQ);
+mbx_cq_free:
+ ocrdma_free_q(dev, &dev->mq.cq);
+alloc_err:
+ return status;
+}
+
+static void ocrdma_destroy_mq(struct ocrdma_dev *dev)
+{
+ struct ocrdma_queue_info *mbxq, *cq;
+
+ /* mqe_ctx lock synchronizes with any other pending cmds. */
+ mutex_lock(&dev->mqe_ctx.lock);
+ mbxq = &dev->mq.sq;
+ if (mbxq->created) {
+ ocrdma_mbx_delete_q(dev, mbxq, QTYPE_MCCQ);
+ ocrdma_free_q(dev, mbxq);
+ }
+ mutex_unlock(&dev->mqe_ctx.lock);
+
+ cq = &dev->mq.cq;
+ if (cq->created) {
+ ocrdma_mbx_delete_q(dev, cq, QTYPE_CQ);
+ ocrdma_free_q(dev, cq);
+ }
+}
+
+static void ocrdma_process_qpcat_error(struct ocrdma_dev *dev,
+ struct ocrdma_qp *qp)
+{
+ enum ib_qp_state new_ib_qps = IB_QPS_ERR;
+ enum ib_qp_state old_ib_qps;
+
+ if (qp == NULL)
+ BUG();
+ ocrdma_qp_state_change(qp, new_ib_qps, &old_ib_qps);
+}
+
+static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
+ struct ocrdma_ae_mcqe *cqe)
+{
+ struct ocrdma_qp *qp = NULL;
+ struct ocrdma_cq *cq = NULL;
+ struct ib_event ib_evt = { 0 };
+ int cq_event = 0;
+ int qp_event = 1;
+ int srq_event = 0;
+ int dev_event = 0;
+ int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
+ OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+
+ if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID)
+ qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK];
+ if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
+ cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+
+ ib_evt.device = &dev->ibdev;
+
+ switch (type) {
+ case OCRDMA_CQ_ERROR:
+ ib_evt.element.cq = &cq->ibcq;
+ ib_evt.event = IB_EVENT_CQ_ERR;
+ cq_event = 1;
+ qp_event = 0;
+ break;
+ case OCRDMA_CQ_OVERRUN_ERROR:
+ ib_evt.element.cq = &cq->ibcq;
+ ib_evt.event = IB_EVENT_CQ_ERR;
+ cq_event = 1;
+ qp_event = 0;
+ break;
+ case OCRDMA_CQ_QPCAT_ERROR:
+ ib_evt.element.qp = &qp->ibqp;
+ ib_evt.event = IB_EVENT_QP_FATAL;
+ ocrdma_process_qpcat_error(dev, qp);
+ break;
+ case OCRDMA_QP_ACCESS_ERROR:
+ ib_evt.element.qp = &qp->ibqp;
+ ib_evt.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case OCRDMA_QP_COMM_EST_EVENT:
+ ib_evt.element.qp = &qp->ibqp;
+ ib_evt.event = IB_EVENT_COMM_EST;
+ break;
+ case OCRDMA_SQ_DRAINED_EVENT:
+ ib_evt.element.qp = &qp->ibqp;
+ ib_evt.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case OCRDMA_DEVICE_FATAL_EVENT:
+ ib_evt.element.port_num = 1;
+ ib_evt.event = IB_EVENT_DEVICE_FATAL;
+ qp_event = 0;
+ dev_event = 1;
+ break;
+ case OCRDMA_SRQCAT_ERROR:
+ ib_evt.element.srq = &qp->srq->ibsrq;
+ ib_evt.event = IB_EVENT_SRQ_ERR;
+ srq_event = 1;
+ qp_event = 0;
+ break;
+ case OCRDMA_SRQ_LIMIT_EVENT:
+ ib_evt.element.srq = &qp->srq->ibsrq;
+ ib_evt.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ srq_event = 1;
+ qp_event = 0;
+ break;
+ case OCRDMA_QP_LAST_WQE_EVENT:
+ ib_evt.element.qp = &qp->ibqp;
+ ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ default:
+ cq_event = 0;
+ qp_event = 0;
+ srq_event = 0;
+ dev_event = 0;
+ pr_err("%s() unknown type=0x%x\n", __func__, type);
+ break;
+ }
+
+ if (qp_event) {
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&ib_evt, qp->ibqp.qp_context);
+ } else if (cq_event) {
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&ib_evt, cq->ibcq.cq_context);
+ } else if (srq_event) {
+ if (qp->srq->ibsrq.event_handler)
+ qp->srq->ibsrq.event_handler(&ib_evt,
+ qp->srq->ibsrq.
+ srq_context);
+ } else if (dev_event) {
+ pr_err("%s: Fatal event received\n", dev->ibdev.name);
+ ib_dispatch_event(&ib_evt);
+ }
+
+}
+
+static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
+ struct ocrdma_ae_mcqe *cqe)
+{
+ struct ocrdma_ae_pvid_mcqe *evt;
+ int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
+ OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+
+ switch (type) {
+ case OCRDMA_ASYNC_EVENT_PVID_STATE:
+ evt = (struct ocrdma_ae_pvid_mcqe *)cqe;
+ if ((evt->tag_enabled & OCRDMA_AE_PVID_MCQE_ENABLED_MASK) >>
+ OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT)
+ dev->pvid = ((evt->tag_enabled &
+ OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
+ OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
+ break;
+ default:
+ /* Not interested evts. */
+ break;
+ }
+}
+
+static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
+{
+ /* async CQE processing */
+ struct ocrdma_ae_mcqe *cqe = ae_cqe;
+ u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
+ OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
+
+ if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
+ ocrdma_dispatch_ibevent(dev, cqe);
+ else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
+ ocrdma_process_grp5_aync(dev, cqe);
+ else
+ pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
+ dev->id, evt_code);
+}
+
+static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
+{
+ if (dev->mqe_ctx.tag == cqe->tag_lo && dev->mqe_ctx.cmd_done == false) {
+ dev->mqe_ctx.cqe_status = (cqe->status &
+ OCRDMA_MCQE_STATUS_MASK) >> OCRDMA_MCQE_STATUS_SHIFT;
+ dev->mqe_ctx.ext_status =
+ (cqe->status & OCRDMA_MCQE_ESTATUS_MASK)
+ >> OCRDMA_MCQE_ESTATUS_SHIFT;
+ dev->mqe_ctx.cmd_done = true;
+ wake_up(&dev->mqe_ctx.cmd_wait);
+ } else
+ pr_err("%s() cqe for invalid tag0x%x.expected=0x%x\n",
+ __func__, cqe->tag_lo, dev->mqe_ctx.tag);
+}
+
+static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
+{
+ u16 cqe_popped = 0;
+ struct ocrdma_mcqe *cqe;
+
+ while (1) {
+ cqe = ocrdma_get_mcqe(dev);
+ if (cqe == NULL)
+ break;
+ ocrdma_le32_to_cpu(cqe, sizeof(*cqe));
+ cqe_popped += 1;
+ if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_AE_MASK)
+ ocrdma_process_acqe(dev, cqe);
+ else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
+ ocrdma_process_mcqe(dev, cqe);
+ memset(cqe, 0, sizeof(struct ocrdma_mcqe));
+ ocrdma_mcq_inc_tail(dev);
+ }
+ ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, cqe_popped);
+ return 0;
+}
+
+static void ocrdma_qp_buddy_cq_handler(struct ocrdma_dev *dev,
+ struct ocrdma_cq *cq)
+{
+ unsigned long flags;
+ struct ocrdma_qp *qp;
+ bool buddy_cq_found = false;
+ /* Go through list of QPs in error state which are using this CQ
+ * and invoke its callback handler to trigger CQE processing for
+ * error/flushed CQE. It is rare to find more than few entries in
+ * this list as most consumers stops after getting error CQE.
+ * List is traversed only once when a matching buddy cq found for a QP.
+ */
+ spin_lock_irqsave(&dev->flush_q_lock, flags);
+ list_for_each_entry(qp, &cq->sq_head, sq_entry) {
+ if (qp->srq)
+ continue;
+ /* if wq and rq share the same cq, than comp_handler
+ * is already invoked.
+ */
+ if (qp->sq_cq == qp->rq_cq)
+ continue;
+ /* if completion came on sq, rq's cq is buddy cq.
+ * if completion came on rq, sq's cq is buddy cq.
+ */
+ if (qp->sq_cq == cq)
+ cq = qp->rq_cq;
+ else
+ cq = qp->sq_cq;
+ buddy_cq_found = true;
+ break;
+ }
+ spin_unlock_irqrestore(&dev->flush_q_lock, flags);
+ if (buddy_cq_found == false)
+ return;
+ if (cq->ibcq.comp_handler) {
+ spin_lock_irqsave(&cq->comp_handler_lock, flags);
+ (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+ spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
+ }
+}
+
+static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
+{
+ unsigned long flags;
+ struct ocrdma_cq *cq;
+
+ if (cq_idx >= OCRDMA_MAX_CQ)
+ BUG();
+
+ cq = dev->cq_tbl[cq_idx];
+ if (cq == NULL)
+ return;
+
+ if (cq->ibcq.comp_handler) {
+ spin_lock_irqsave(&cq->comp_handler_lock, flags);
+ (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+ spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
+ }
+ ocrdma_qp_buddy_cq_handler(dev, cq);
+}
+
+static void ocrdma_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
+{
+ /* process the MQ-CQE. */
+ if (cq_id == dev->mq.cq.id)
+ ocrdma_mq_cq_handler(dev, cq_id);
+ else
+ ocrdma_qp_cq_handler(dev, cq_id);
+}
+
+static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
+{
+ struct ocrdma_eq *eq = handle;
+ struct ocrdma_dev *dev = eq->dev;
+ struct ocrdma_eqe eqe;
+ struct ocrdma_eqe *ptr;
+ u16 cq_id;
+ int budget = eq->cq_cnt;
+
+ do {
+ ptr = ocrdma_get_eqe(eq);
+ eqe = *ptr;
+ ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
+ if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
+ break;
+
+ ptr->id_valid = 0;
+ /* ring eq doorbell as soon as its consumed. */
+ ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1);
+ /* check whether its CQE or not. */
+ if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) {
+ cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT;
+ ocrdma_cq_handler(dev, cq_id);
+ }
+ ocrdma_eq_inc_tail(eq);
+
+ /* There can be a stale EQE after the last bound CQ is
+ * destroyed. EQE valid and budget == 0 implies this.
+ */
+ if (budget)
+ budget--;
+
+ } while (budget);
+
+ ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+ return IRQ_HANDLED;
+}
+
+static void ocrdma_post_mqe(struct ocrdma_dev *dev, struct ocrdma_mqe *cmd)
+{
+ struct ocrdma_mqe *mqe;
+
+ dev->mqe_ctx.tag = dev->mq.sq.head;
+ dev->mqe_ctx.cmd_done = false;
+ mqe = ocrdma_get_mqe(dev);
+ cmd->hdr.tag_lo = dev->mq.sq.head;
+ ocrdma_copy_cpu_to_le32(mqe, cmd, sizeof(*mqe));
+ /* make sure descriptor is written before ringing doorbell */
+ wmb();
+ ocrdma_mq_inc_head(dev);
+ ocrdma_ring_mq_db(dev);
+}
+
+static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
+{
+ long status;
+ /* 30 sec timeout */
+ status = wait_event_timeout(dev->mqe_ctx.cmd_wait,
+ (dev->mqe_ctx.cmd_done != false),
+ msecs_to_jiffies(30000));
+ if (status)
+ return 0;
+ else
+ return -1;
+}
+
+/* issue a mailbox command on the MQ */
+static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
+{
+ int status = 0;
+ u16 cqe_status, ext_status;
+ struct ocrdma_mqe *rsp_mqe;
+ struct ocrdma_mbx_rsp *rsp = NULL;
+
+ mutex_lock(&dev->mqe_ctx.lock);
+ ocrdma_post_mqe(dev, mqe);
+ status = ocrdma_wait_mqe_cmpl(dev);
+ if (status)
+ goto mbx_err;
+ cqe_status = dev->mqe_ctx.cqe_status;
+ ext_status = dev->mqe_ctx.ext_status;
+ rsp_mqe = ocrdma_get_mqe_rsp(dev);
+ ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe)));
+ if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+ OCRDMA_MQE_HDR_EMB_SHIFT)
+ rsp = &mqe->u.rsp;
+
+ if (cqe_status || ext_status) {
+ pr_err("%s() cqe_status=0x%x, ext_status=0x%x,",
+ __func__, cqe_status, ext_status);
+ if (rsp) {
+ /* This is for embedded cmds. */
+ pr_err("opcode=0x%x, subsystem=0x%x\n",
+ (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+ OCRDMA_MBX_RSP_OPCODE_SHIFT,
+ (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+ OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+ }
+ status = ocrdma_get_mbx_cqe_errno(cqe_status);
+ goto mbx_err;
+ }
+ /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */
+ if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK))
+ status = ocrdma_get_mbx_errno(mqe->u.rsp.status);
+mbx_err:
+ mutex_unlock(&dev->mqe_ctx.lock);
+ return status;
+}
+
+static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
+ void *payload_va)
+{
+ int status = 0;
+ struct ocrdma_mbx_rsp *rsp = payload_va;
+
+ if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+ OCRDMA_MQE_HDR_EMB_SHIFT)
+ BUG();
+
+ status = ocrdma_mbx_cmd(dev, mqe);
+ if (!status)
+ /* For non embedded, only CQE failures are handled in
+ * ocrdma_mbx_cmd. We need to check for RSP errors.
+ */
+ if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK)
+ status = ocrdma_get_mbx_errno(rsp->status);
+
+ if (status)
+ pr_err("opcode=0x%x, subsystem=0x%x\n",
+ (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+ OCRDMA_MBX_RSP_OPCODE_SHIFT,
+ (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+ OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+ return status;
+}
+
+static void ocrdma_get_attr(struct ocrdma_dev *dev,
+ struct ocrdma_dev_attr *attr,
+ struct ocrdma_mbx_query_config *rsp)
+{
+ attr->max_pd =
+ (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT;
+ attr->max_qp =
+ (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT;
+ attr->max_srq =
+ (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
+ attr->max_send_sge = ((rsp->max_write_send_sge &
+ OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
+ attr->max_recv_sge = (rsp->max_write_send_sge &
+ OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT;
+ attr->max_srq_sge = (rsp->max_srq_rqe_sge &
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
+ attr->max_rdma_sge = (rsp->max_write_send_sge &
+ OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT;
+ attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
+ OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
+ attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
+ OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
+ attr->cq_overflow_detect = (rsp->qp_srq_cq_ird_ord &
+ OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT;
+ attr->srq_supported = (rsp->qp_srq_cq_ird_ord &
+ OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT;
+ attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay &
+ OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
+ attr->max_mw = rsp->max_mw;
+ attr->max_mr = rsp->max_mr;
+ attr->max_mr_size = ~0ull;
+ attr->max_fmr = 0;
+ attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
+ attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
+ attr->max_cqe = rsp->max_cq_cqes_per_cq &
+ OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK;
+ attr->max_cq = (rsp->max_cq_cqes_per_cq &
+ OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET;
+ attr->wqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
+ OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET) *
+ OCRDMA_WQE_STRIDE;
+ attr->rqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
+ OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET) *
+ OCRDMA_WQE_STRIDE;
+ attr->max_inline_data =
+ attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) +
+ sizeof(struct ocrdma_sge));
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
+ attr->ird = 1;
+ attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE;
+ attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES;
+ }
+ dev->attr.max_wqe = rsp->max_wqes_rqes_per_q >>
+ OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET;
+ dev->attr.max_rqe = rsp->max_wqes_rqes_per_q &
+ OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_MASK;
+}
+
+static int ocrdma_check_fw_config(struct ocrdma_dev *dev,
+ struct ocrdma_fw_conf_rsp *conf)
+{
+ u32 fn_mode;
+
+ fn_mode = conf->fn_mode & OCRDMA_FN_MODE_RDMA;
+ if (fn_mode != OCRDMA_FN_MODE_RDMA)
+ return -EINVAL;
+ dev->base_eqid = conf->base_eqid;
+ dev->max_eq = conf->max_eq;
+ return 0;
+}
+
+/* can be issued only during init time. */
+static int ocrdma_mbx_query_fw_ver(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mqe *cmd;
+ struct ocrdma_fw_ver_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_VER, sizeof(*cmd));
+ if (!cmd)
+ return -ENOMEM;
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+ OCRDMA_CMD_GET_FW_VER,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_fw_ver_rsp *)cmd;
+ memset(&dev->attr.fw_ver[0], 0, sizeof(dev->attr.fw_ver));
+ memcpy(&dev->attr.fw_ver[0], &rsp->running_ver[0],
+ sizeof(rsp->running_ver));
+ ocrdma_le32_to_cpu(dev->attr.fw_ver, sizeof(rsp->running_ver));
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+/* can be issued only during init time. */
+static int ocrdma_mbx_query_fw_config(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mqe *cmd;
+ struct ocrdma_fw_conf_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_CONFIG, sizeof(*cmd));
+ if (!cmd)
+ return -ENOMEM;
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+ OCRDMA_CMD_GET_FW_CONFIG,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_fw_conf_rsp *)cmd;
+ status = ocrdma_check_fw_config(dev, rsp);
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
+{
+ struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
+ struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
+ struct ocrdma_rdma_stats_resp *old_stats = NULL;
+ int status;
+
+ old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+ if (old_stats == NULL)
+ return -ENOMEM;
+
+ memset(mqe, 0, sizeof(*mqe));
+ mqe->hdr.pyld_len = dev->stats_mem.size;
+ mqe->hdr.spcl_sge_cnt_emb |=
+ (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff);
+ mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa);
+ mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size;
+
+ /* Cache the old stats */
+ memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp));
+ memset(req, 0, dev->stats_mem.size);
+
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)req,
+ OCRDMA_CMD_GET_RDMA_STATS,
+ OCRDMA_SUBSYS_ROCE,
+ dev->stats_mem.size);
+ if (reset)
+ req->reset_stats = reset;
+
+ status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va);
+ if (status)
+ /* Copy from cache, if mbox fails */
+ memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp));
+ else
+ ocrdma_le32_to_cpu(req, dev->stats_mem.size);
+
+ kfree(old_stats);
+ return status;
+}
+
+static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_dma_mem dma;
+ struct ocrdma_mqe *mqe;
+ struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
+ struct mgmt_hba_attribs *hba_attribs;
+
+ mqe = ocrdma_alloc_mqe();
+ if (!mqe)
+ return status;
+ memset(mqe, 0, sizeof(*mqe));
+
+ dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
+ dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev,
+ dma.size, &dma.pa, GFP_KERNEL);
+ if (!dma.va)
+ goto free_mqe;
+
+ mqe->hdr.pyld_len = dma.size;
+ mqe->hdr.spcl_sge_cnt_emb |=
+ (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff);
+ mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa);
+ mqe->u.nonemb_req.sge[0].len = dma.size;
+
+ memset(dma.va, 0, dma.size);
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va,
+ OCRDMA_CMD_GET_CTRL_ATTRIBUTES,
+ OCRDMA_SUBSYS_COMMON,
+ dma.size);
+
+ status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va);
+ if (!status) {
+ ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
+ hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
+
+ dev->hba_port_num = hba_attribs->phy_port;
+ strncpy(dev->model_number,
+ hba_attribs->controller_model_number, 31);
+ }
+ dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
+free_mqe:
+ kfree(mqe);
+ return status;
+}
+
+static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mbx_query_config *rsp;
+ struct ocrdma_mqe *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_CONFIG, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_mbx_query_config *)cmd;
+ ocrdma_get_attr(dev, &dev->attr, rsp);
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
+{
+ int status = -ENOMEM;
+ struct ocrdma_get_link_speed_rsp *rsp;
+ struct ocrdma_mqe *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1,
+ sizeof(*cmd));
+ if (!cmd)
+ return status;
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+ OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+ ((struct ocrdma_mbx_hdr *)cmd->u.cmd)->rsvd_version = 0x1;
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
+ *lnk_speed = rsp->phys_port_speed;
+
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mqe *cmd;
+ struct ocrdma_get_phy_info_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd));
+ if (!cmd)
+ return status;
+
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+ OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON,
+ sizeof(*cmd));
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
+ dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+ dev->phy.auto_speeds_supported =
+ le16_to_cpu(rsp->auto_speeds_supported);
+ dev->phy.fixed_speeds_supported =
+ le16_to_cpu(rsp->fixed_speeds_supported);
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
+{
+ int status = -ENOMEM;
+ struct ocrdma_alloc_pd *cmd;
+ struct ocrdma_alloc_pd_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ if (pd->dpp_enabled)
+ cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_alloc_pd_rsp *)cmd;
+ pd->id = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_PDID_MASK;
+ if (rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) {
+ pd->dpp_enabled = true;
+ pd->dpp_page = rsp->dpp_page_pdid >>
+ OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT;
+ } else {
+ pd->dpp_enabled = false;
+ pd->num_dpp_qp = 0;
+ }
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
+{
+ int status = -ENOMEM;
+ struct ocrdma_dealloc_pd *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_PD, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->id = pd->id;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ kfree(cmd);
+ return status;
+}
+
+static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
+ int *num_pages, int *page_size)
+{
+ int i;
+ int mem_size;
+
+ *num_entries = roundup_pow_of_two(*num_entries);
+ mem_size = *num_entries * entry_size;
+ /* find the possible lowest possible multiplier */
+ for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) {
+ if (mem_size <= (OCRDMA_Q_PAGE_BASE_SIZE << i))
+ break;
+ }
+ if (i >= OCRDMA_MAX_Q_PAGE_SIZE_CNT)
+ return -EINVAL;
+ mem_size = roundup(mem_size,
+ ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES));
+ *num_pages =
+ mem_size / ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES);
+ *page_size = ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES);
+ *num_entries = mem_size / entry_size;
+ return 0;
+}
+
+static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
+{
+ int i;
+ int status = 0;
+ int max_ah;
+ struct ocrdma_create_ah_tbl *cmd;
+ struct ocrdma_create_ah_tbl_rsp *rsp;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ dma_addr_t pa;
+ struct ocrdma_pbe *pbes;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_AH_TBL, sizeof(*cmd));
+ if (!cmd)
+ return status;
+
+ max_ah = OCRDMA_MAX_AH;
+ dev->av_tbl.size = sizeof(struct ocrdma_av) * max_ah;
+
+ /* number of PBEs in PBL */
+ cmd->ah_conf = (OCRDMA_AH_TBL_PAGES <<
+ OCRDMA_CREATE_AH_NUM_PAGES_SHIFT) &
+ OCRDMA_CREATE_AH_NUM_PAGES_MASK;
+
+ /* page size */
+ for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) {
+ if (PAGE_SIZE == (OCRDMA_MIN_Q_PAGE_SIZE << i))
+ break;
+ }
+ cmd->ah_conf |= (i << OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT) &
+ OCRDMA_CREATE_AH_PAGE_SIZE_MASK;
+
+ /* ah_entry size */
+ cmd->ah_conf |= (sizeof(struct ocrdma_av) <<
+ OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT) &
+ OCRDMA_CREATE_AH_ENTRY_SIZE_MASK;
+
+ dev->av_tbl.pbl.va = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+ &dev->av_tbl.pbl.pa,
+ GFP_KERNEL);
+ if (dev->av_tbl.pbl.va == NULL)
+ goto mem_err;
+
+ dev->av_tbl.va = dma_alloc_coherent(&pdev->dev, dev->av_tbl.size,
+ &pa, GFP_KERNEL);
+ if (dev->av_tbl.va == NULL)
+ goto mem_err_ah;
+ dev->av_tbl.pa = pa;
+ dev->av_tbl.num_ah = max_ah;
+ memset(dev->av_tbl.va, 0, dev->av_tbl.size);
+
+ pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
+ for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
+ pbes[i].pa_lo = (u32) (pa & 0xffffffff);
+ pbes[i].pa_hi = (u32) upper_32_bits(pa);
+ pa += PAGE_SIZE;
+ }
+ cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
+ cmd->tbl_addr[0].hi = (u32)upper_32_bits(dev->av_tbl.pbl.pa);
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_create_ah_tbl_rsp *)cmd;
+ dev->av_tbl.ahid = rsp->ahid & 0xFFFF;
+ kfree(cmd);
+ return 0;
+
+mbx_err:
+ dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
+ dev->av_tbl.pa);
+ dev->av_tbl.va = NULL;
+mem_err_ah:
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
+ dev->av_tbl.pbl.pa);
+ dev->av_tbl.pbl.va = NULL;
+ dev->av_tbl.size = 0;
+mem_err:
+ kfree(cmd);
+ return status;
+}
+
+static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
+{
+ struct ocrdma_delete_ah_tbl *cmd;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+
+ if (dev->av_tbl.va == NULL)
+ return;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_AH_TBL, sizeof(*cmd));
+ if (!cmd)
+ return;
+ cmd->ahid = dev->av_tbl.ahid;
+
+ ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
+ dev->av_tbl.pa);
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
+ dev->av_tbl.pbl.pa);
+ kfree(cmd);
+}
+
+/* Multiple CQs uses the EQ. This routine returns least used
+ * EQ to associate with CQ. This will distributes the interrupt
+ * processing and CPU load to associated EQ, vector and so to that CPU.
+ */
+static u16 ocrdma_bind_eq(struct ocrdma_dev *dev)
+{
+ int i, selected_eq = 0, cq_cnt = 0;
+ u16 eq_id;
+
+ mutex_lock(&dev->dev_lock);
+ cq_cnt = dev->eq_tbl[0].cq_cnt;
+ eq_id = dev->eq_tbl[0].q.id;
+ /* find the EQ which is has the least number of
+ * CQs associated with it.
+ */
+ for (i = 0; i < dev->eq_cnt; i++) {
+ if (dev->eq_tbl[i].cq_cnt < cq_cnt) {
+ cq_cnt = dev->eq_tbl[i].cq_cnt;
+ eq_id = dev->eq_tbl[i].q.id;
+ selected_eq = i;
+ }
+ }
+ dev->eq_tbl[selected_eq].cq_cnt += 1;
+ mutex_unlock(&dev->dev_lock);
+ return eq_id;
+}
+
+static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
+{
+ int i;
+
+ mutex_lock(&dev->dev_lock);
+ i = ocrdma_get_eq_table_index(dev, eq_id);
+ if (i == -EINVAL)
+ BUG();
+ dev->eq_tbl[i].cq_cnt -= 1;
+ mutex_unlock(&dev->dev_lock);
+}
+
+int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
+ int entries, int dpp_cq, u16 pd_id)
+{
+ int status = -ENOMEM; int max_hw_cqe;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ struct ocrdma_create_cq *cmd;
+ struct ocrdma_create_cq_rsp *rsp;
+ u32 hw_pages, cqe_size, page_size, cqe_count;
+
+ if (entries > dev->attr.max_cqe) {
+ pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
+ __func__, dev->id, dev->attr.max_cqe, entries);
+ return -EINVAL;
+ }
+ if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R))
+ return -EINVAL;
+
+ if (dpp_cq) {
+ cq->max_hw_cqe = 1;
+ max_hw_cqe = 1;
+ cqe_size = OCRDMA_DPP_CQE_SIZE;
+ hw_pages = 1;
+ } else {
+ cq->max_hw_cqe = dev->attr.max_cqe;
+ max_hw_cqe = dev->attr.max_cqe;
+ cqe_size = sizeof(struct ocrdma_cqe);
+ hw_pages = OCRDMA_CREATE_CQ_MAX_PAGES;
+ }
+
+ cq->len = roundup(max_hw_cqe * cqe_size, OCRDMA_MIN_Q_PAGE_SIZE);
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_CQ, sizeof(*cmd));
+ if (!cmd)
+ return -ENOMEM;
+ ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_CREATE_CQ,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+ cq->va = dma_alloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL);
+ if (!cq->va) {
+ status = -ENOMEM;
+ goto mem_err;
+ }
+ memset(cq->va, 0, cq->len);
+ page_size = cq->len / hw_pages;
+ cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
+ OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
+ cmd->cmd.pgsz_pgcnt |= hw_pages;
+ cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
+
+ cq->eqn = ocrdma_bind_eq(dev);
+ cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
+ cqe_count = cq->len / cqe_size;
+ cq->cqe_cnt = cqe_count;
+ if (cqe_count > 1024) {
+ /* Set cnt to 3 to indicate more than 1024 cq entries */
+ cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
+ } else {
+ u8 count = 0;
+ switch (cqe_count) {
+ case 256:
+ count = 0;
+ break;
+ case 512:
+ count = 1;
+ break;
+ case 1024:
+ count = 2;
+ break;
+ default:
+ goto mbx_err;
+ }
+ cmd->cmd.ev_cnt_flags |= (count << OCRDMA_CREATE_CQ_CNT_SHIFT);
+ }
+ /* shared eq between all the consumer cqs. */
+ cmd->cmd.eqn = cq->eqn;
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
+ if (dpp_cq)
+ cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
+ OCRDMA_CREATE_CQ_TYPE_SHIFT;
+ cq->phase_change = false;
+ cmd->cmd.cqe_count = (cq->len / cqe_size);
+ } else {
+ cmd->cmd.cqe_count = (cq->len / cqe_size) - 1;
+ cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
+ cq->phase_change = true;
+ }
+
+ cmd->cmd.pd_id = pd_id; /* valid only for v3 */
+ ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_create_cq_rsp *)cmd;
+ cq->id = (u16) (rsp->rsp.cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
+ kfree(cmd);
+ return 0;
+mbx_err:
+ ocrdma_unbind_eq(dev, cq->eqn);
+ dma_free_coherent(&pdev->dev, cq->len, cq->va, cq->pa);
+mem_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
+{
+ int status = -ENOMEM;
+ struct ocrdma_destroy_cq *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_CQ, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ ocrdma_init_mch(&cmd->req, OCRDMA_CMD_DELETE_CQ,
+ OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
+
+ cmd->bypass_flush_qid |=
+ (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
+ OCRDMA_DESTROY_CQ_QID_MASK;
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ ocrdma_unbind_eq(dev, cq->eqn);
+ dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
+ u32 pdid, int addr_check)
+{
+ int status = -ENOMEM;
+ struct ocrdma_alloc_lkey *cmd;
+ struct ocrdma_alloc_lkey_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_LKEY, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->pdid = pdid;
+ cmd->pbl_sz_flags |= addr_check;
+ cmd->pbl_sz_flags |= (hwmr->fr_mr << OCRDMA_ALLOC_LKEY_FMR_SHIFT);
+ cmd->pbl_sz_flags |=
+ (hwmr->remote_wr << OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT);
+ cmd->pbl_sz_flags |=
+ (hwmr->remote_rd << OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT);
+ cmd->pbl_sz_flags |=
+ (hwmr->local_wr << OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT);
+ cmd->pbl_sz_flags |=
+ (hwmr->remote_atomic << OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT);
+ cmd->pbl_sz_flags |=
+ (hwmr->num_pbls << OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT);
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_alloc_lkey_rsp *)cmd;
+ hwmr->lkey = rsp->lrkey;
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey)
+{
+ int status = -ENOMEM;
+ struct ocrdma_dealloc_lkey *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_LKEY, sizeof(*cmd));
+ if (!cmd)
+ return -ENOMEM;
+ cmd->lkey = lkey;
+ cmd->rsvd_frmr = fr_mr ? 1 : 0;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
+ u32 pdid, u32 pbl_cnt, u32 pbe_size, u32 last)
+{
+ int status = -ENOMEM;
+ int i;
+ struct ocrdma_reg_nsmr *cmd;
+ struct ocrdma_reg_nsmr_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd));
+ if (!cmd)
+ return -ENOMEM;
+ cmd->num_pbl_pdid =
+ pdid | (hwmr->num_pbls << OCRDMA_REG_NSMR_NUM_PBL_SHIFT);
+ cmd->fr_mr = hwmr->fr_mr;
+
+ cmd->flags_hpage_pbe_sz |= (hwmr->remote_wr <<
+ OCRDMA_REG_NSMR_REMOTE_WR_SHIFT);
+ cmd->flags_hpage_pbe_sz |= (hwmr->remote_rd <<
+ OCRDMA_REG_NSMR_REMOTE_RD_SHIFT);
+ cmd->flags_hpage_pbe_sz |= (hwmr->local_wr <<
+ OCRDMA_REG_NSMR_LOCAL_WR_SHIFT);
+ cmd->flags_hpage_pbe_sz |= (hwmr->remote_atomic <<
+ OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT);
+ cmd->flags_hpage_pbe_sz |= (hwmr->mw_bind <<
+ OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT);
+ cmd->flags_hpage_pbe_sz |= (last << OCRDMA_REG_NSMR_LAST_SHIFT);
+
+ cmd->flags_hpage_pbe_sz |= (hwmr->pbe_size / OCRDMA_MIN_HPAGE_SIZE);
+ cmd->flags_hpage_pbe_sz |= (hwmr->pbl_size / OCRDMA_MIN_HPAGE_SIZE) <<
+ OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT;
+ cmd->totlen_low = hwmr->len;
+ cmd->totlen_high = upper_32_bits(hwmr->len);
+ cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff);
+ cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo);
+ cmd->va_loaddr = (u32) hwmr->va;
+ cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va);
+
+ for (i = 0; i < pbl_cnt; i++) {
+ cmd->pbl[i].lo = (u32) (hwmr->pbl_table[i].pa & 0xffffffff);
+ cmd->pbl[i].hi = upper_32_bits(hwmr->pbl_table[i].pa);
+ }
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_reg_nsmr_rsp *)cmd;
+ hwmr->lkey = rsp->lrkey;
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev,
+ struct ocrdma_hw_mr *hwmr, u32 pbl_cnt,
+ u32 pbl_offset, u32 last)
+{
+ int status = -ENOMEM;
+ int i;
+ struct ocrdma_reg_nsmr_cont *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR_CONT, sizeof(*cmd));
+ if (!cmd)
+ return -ENOMEM;
+ cmd->lrkey = hwmr->lkey;
+ cmd->num_pbl_offset = (pbl_cnt << OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT) |
+ (pbl_offset & OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK);
+ cmd->last = last << OCRDMA_REG_NSMR_CONT_LAST_SHIFT;
+
+ for (i = 0; i < pbl_cnt; i++) {
+ cmd->pbl[i].lo =
+ (u32) (hwmr->pbl_table[i + pbl_offset].pa & 0xffffffff);
+ cmd->pbl[i].hi =
+ upper_32_bits(hwmr->pbl_table[i + pbl_offset].pa);
+ }
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_reg_mr(struct ocrdma_dev *dev,
+ struct ocrdma_hw_mr *hwmr, u32 pdid, int acc)
+{
+ int status;
+ u32 last = 0;
+ u32 cur_pbl_cnt, pbl_offset;
+ u32 pending_pbl_cnt = hwmr->num_pbls;
+
+ pbl_offset = 0;
+ cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL);
+ if (cur_pbl_cnt == pending_pbl_cnt)
+ last = 1;
+
+ status = ocrdma_mbx_reg_mr(dev, hwmr, pdid,
+ cur_pbl_cnt, hwmr->pbe_size, last);
+ if (status) {
+ pr_err("%s() status=%d\n", __func__, status);
+ return status;
+ }
+ /* if there is no more pbls to register then exit. */
+ if (last)
+ return 0;
+
+ while (!last) {
+ pbl_offset += cur_pbl_cnt;
+ pending_pbl_cnt -= cur_pbl_cnt;
+ cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL);
+ /* if we reach the end of the pbls, then need to set the last
+ * bit, indicating no more pbls to register for this memory key.
+ */
+ if (cur_pbl_cnt == pending_pbl_cnt)
+ last = 1;
+
+ status = ocrdma_mbx_reg_mr_cont(dev, hwmr, cur_pbl_cnt,
+ pbl_offset, last);
+ if (status)
+ break;
+ }
+ if (status)
+ pr_err("%s() err. status=%d\n", __func__, status);
+
+ return status;
+}
+
+bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp)
+{
+ struct ocrdma_qp *tmp;
+ bool found = false;
+ list_for_each_entry(tmp, &cq->sq_head, sq_entry) {
+ if (qp == tmp) {
+ found = true;
+ break;
+ }
+ }
+ return found;
+}
+
+bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp)
+{
+ struct ocrdma_qp *tmp;
+ bool found = false;
+ list_for_each_entry(tmp, &cq->rq_head, rq_entry) {
+ if (qp == tmp) {
+ found = true;
+ break;
+ }
+ }
+ return found;
+}
+
+void ocrdma_flush_qp(struct ocrdma_qp *qp)
+{
+ bool found;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->dev->flush_q_lock, flags);
+ found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
+ if (!found)
+ list_add_tail(&qp->sq_entry, &qp->sq_cq->sq_head);
+ if (!qp->srq) {
+ found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
+ if (!found)
+ list_add_tail(&qp->rq_entry, &qp->rq_cq->rq_head);
+ }
+ spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags);
+}
+
+static void ocrdma_init_hwq_ptr(struct ocrdma_qp *qp)
+{
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+}
+
+int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
+ enum ib_qp_state *old_ib_state)
+{
+ unsigned long flags;
+ int status = 0;
+ enum ocrdma_qp_state new_state;
+ new_state = get_ocrdma_qp_state(new_ib_state);
+
+ /* sync with wqe and rqe posting */
+ spin_lock_irqsave(&qp->q_lock, flags);
+
+ if (old_ib_state)
+ *old_ib_state = get_ibqp_state(qp->state);
+ if (new_state == qp->state) {
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+ return 1;
+ }
+
+
+ if (new_state == OCRDMA_QPS_INIT) {
+ ocrdma_init_hwq_ptr(qp);
+ ocrdma_del_flush_qp(qp);
+ } else if (new_state == OCRDMA_QPS_ERR) {
+ ocrdma_flush_qp(qp);
+ }
+
+ qp->state = new_state;
+
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+ return status;
+}
+
+static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp)
+{
+ u32 flags = 0;
+ if (qp->cap_flags & OCRDMA_QP_INB_RD)
+ flags |= OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK;
+ if (qp->cap_flags & OCRDMA_QP_INB_WR)
+ flags |= OCRDMA_CREATE_QP_REQ_INB_WREN_MASK;
+ if (qp->cap_flags & OCRDMA_QP_MW_BIND)
+ flags |= OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK;
+ if (qp->cap_flags & OCRDMA_QP_LKEY0)
+ flags |= OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK;
+ if (qp->cap_flags & OCRDMA_QP_FAST_REG)
+ flags |= OCRDMA_CREATE_QP_REQ_FMR_EN_MASK;
+ return flags;
+}
+
+static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
+ struct ib_qp_init_attr *attrs,
+ struct ocrdma_qp *qp)
+{
+ int status;
+ u32 len, hw_pages, hw_page_size;
+ dma_addr_t pa;
+ struct ocrdma_dev *dev = qp->dev;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ u32 max_wqe_allocated;
+ u32 max_sges = attrs->cap.max_send_sge;
+
+ /* QP1 may exceed 127 */
+ max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
+ dev->attr.max_wqe);
+
+ status = ocrdma_build_q_conf(&max_wqe_allocated,
+ dev->attr.wqe_size, &hw_pages, &hw_page_size);
+ if (status) {
+ pr_err("%s() req. max_send_wr=0x%x\n", __func__,
+ max_wqe_allocated);
+ return -EINVAL;
+ }
+ qp->sq.max_cnt = max_wqe_allocated;
+ len = (hw_pages * hw_page_size);
+
+ qp->sq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+ if (!qp->sq.va)
+ return -EINVAL;
+ memset(qp->sq.va, 0, len);
+ qp->sq.len = len;
+ qp->sq.pa = pa;
+ qp->sq.entry_size = dev->attr.wqe_size;
+ ocrdma_build_q_pages(&cmd->wq_addr[0], hw_pages, pa, hw_page_size);
+
+ cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE)
+ << OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT);
+ cmd->num_wq_rq_pages |= (hw_pages <<
+ OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK;
+ cmd->max_sge_send_write |= (max_sges <<
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK;
+ cmd->max_sge_send_write |= (max_sges <<
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK;
+ cmd->max_wqe_rqe |= (ilog2(qp->sq.max_cnt) <<
+ OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK;
+ cmd->wqe_rqe_size |= (dev->attr.wqe_size <<
+ OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK;
+ return 0;
+}
+
+static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
+ struct ib_qp_init_attr *attrs,
+ struct ocrdma_qp *qp)
+{
+ int status;
+ u32 len, hw_pages, hw_page_size;
+ dma_addr_t pa = 0;
+ struct ocrdma_dev *dev = qp->dev;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ u32 max_rqe_allocated = attrs->cap.max_recv_wr + 1;
+
+ status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size,
+ &hw_pages, &hw_page_size);
+ if (status) {
+ pr_err("%s() req. max_recv_wr=0x%x\n", __func__,
+ attrs->cap.max_recv_wr + 1);
+ return status;
+ }
+ qp->rq.max_cnt = max_rqe_allocated;
+ len = (hw_pages * hw_page_size);
+
+ qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+ if (!qp->rq.va)
+ return -ENOMEM;
+ memset(qp->rq.va, 0, len);
+ qp->rq.pa = pa;
+ qp->rq.len = len;
+ qp->rq.entry_size = dev->attr.rqe_size;
+
+ ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size);
+ cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
+ OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT);
+ cmd->num_wq_rq_pages |=
+ (hw_pages << OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK;
+ cmd->max_sge_recv_flags |= (attrs->cap.max_recv_sge <<
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK;
+ cmd->max_wqe_rqe |= (ilog2(qp->rq.max_cnt) <<
+ OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK;
+ cmd->wqe_rqe_size |= (dev->attr.rqe_size <<
+ OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK;
+ return 0;
+}
+
+static void ocrdma_set_create_qp_dpp_cmd(struct ocrdma_create_qp_req *cmd,
+ struct ocrdma_pd *pd,
+ struct ocrdma_qp *qp,
+ u8 enable_dpp_cq, u16 dpp_cq_id)
+{
+ pd->num_dpp_qp--;
+ qp->dpp_enabled = true;
+ cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK;
+ if (!enable_dpp_cq)
+ return;
+ cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK;
+ cmd->dpp_credits_cqid = dpp_cq_id;
+ cmd->dpp_credits_cqid |= OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT <<
+ OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT;
+}
+
+static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
+ struct ocrdma_qp *qp)
+{
+ struct ocrdma_dev *dev = qp->dev;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ dma_addr_t pa = 0;
+ int ird_page_size = dev->attr.ird_page_size;
+ int ird_q_len = dev->attr.num_ird_pages * ird_page_size;
+ struct ocrdma_hdr_wqe *rqe;
+ int i = 0;
+
+ if (dev->attr.ird == 0)
+ return 0;
+
+ qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len,
+ &pa, GFP_KERNEL);
+ if (!qp->ird_q_va)
+ return -ENOMEM;
+ memset(qp->ird_q_va, 0, ird_q_len);
+ ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
+ pa, ird_page_size);
+ for (; i < ird_q_len / dev->attr.rqe_size; i++) {
+ rqe = (struct ocrdma_hdr_wqe *)(qp->ird_q_va +
+ (i * dev->attr.rqe_size));
+ rqe->cw = 0;
+ rqe->cw |= 2;
+ rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+ rqe->cw |= (8 << OCRDMA_WQE_SIZE_SHIFT);
+ rqe->cw |= (8 << OCRDMA_WQE_NXT_WQE_SIZE_SHIFT);
+ }
+ return 0;
+}
+
+static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp,
+ struct ocrdma_qp *qp,
+ struct ib_qp_init_attr *attrs,
+ u16 *dpp_offset, u16 *dpp_credit_lmt)
+{
+ u32 max_wqe_allocated, max_rqe_allocated;
+ qp->id = rsp->qp_id & OCRDMA_CREATE_QP_RSP_QP_ID_MASK;
+ qp->rq.dbid = rsp->sq_rq_id & OCRDMA_CREATE_QP_RSP_RQ_ID_MASK;
+ qp->sq.dbid = rsp->sq_rq_id >> OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT;
+ qp->max_ird = rsp->max_ord_ird & OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK;
+ qp->max_ord = (rsp->max_ord_ird >> OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT);
+ qp->dpp_enabled = false;
+ if (rsp->dpp_response & OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK) {
+ qp->dpp_enabled = true;
+ *dpp_credit_lmt = (rsp->dpp_response &
+ OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK) >>
+ OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT;
+ *dpp_offset = (rsp->dpp_response &
+ OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK) >>
+ OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT;
+ }
+ max_wqe_allocated =
+ rsp->max_wqe_rqe >> OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT;
+ max_wqe_allocated = 1 << max_wqe_allocated;
+ max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe);
+
+ qp->sq.max_cnt = max_wqe_allocated;
+ qp->sq.max_wqe_idx = max_wqe_allocated - 1;
+
+ if (!attrs->srq) {
+ qp->rq.max_cnt = max_rqe_allocated;
+ qp->rq.max_wqe_idx = max_rqe_allocated - 1;
+ }
+}
+
+int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
+ u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
+ u16 *dpp_credit_lmt)
+{
+ int status = -ENOMEM;
+ u32 flags = 0;
+ struct ocrdma_dev *dev = qp->dev;
+ struct ocrdma_pd *pd = qp->pd;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ struct ocrdma_cq *cq;
+ struct ocrdma_create_qp_req *cmd;
+ struct ocrdma_create_qp_rsp *rsp;
+ int qptype;
+
+ switch (attrs->qp_type) {
+ case IB_QPT_GSI:
+ qptype = OCRDMA_QPT_GSI;
+ break;
+ case IB_QPT_RC:
+ qptype = OCRDMA_QPT_RC;
+ break;
+ case IB_QPT_UD:
+ qptype = OCRDMA_QPT_UD;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->type_pgsz_pdn |= (qptype << OCRDMA_CREATE_QP_REQ_QPT_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_QPT_MASK;
+ status = ocrdma_set_create_qp_sq_cmd(cmd, attrs, qp);
+ if (status)
+ goto sq_err;
+
+ if (attrs->srq) {
+ struct ocrdma_srq *srq = get_ocrdma_srq(attrs->srq);
+ cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK;
+ cmd->rq_addr[0].lo = srq->id;
+ qp->srq = srq;
+ } else {
+ status = ocrdma_set_create_qp_rq_cmd(cmd, attrs, qp);
+ if (status)
+ goto rq_err;
+ }
+
+ status = ocrdma_set_create_qp_ird_cmd(cmd, qp);
+ if (status)
+ goto mbx_err;
+
+ cmd->type_pgsz_pdn |= (pd->id << OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_PD_ID_MASK;
+
+ flags = ocrdma_set_create_qp_mbx_access_flags(qp);
+
+ cmd->max_sge_recv_flags |= flags;
+ cmd->max_ord_ird |= (dev->attr.max_ord_per_qp <<
+ OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK;
+ cmd->max_ord_ird |= (dev->attr.max_ird_per_qp <<
+ OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK;
+ cq = get_ocrdma_cq(attrs->send_cq);
+ cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK;
+ qp->sq_cq = cq;
+ cq = get_ocrdma_cq(attrs->recv_cq);
+ cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT) &
+ OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
+ qp->rq_cq = cq;
+
+ if (pd->dpp_enabled && pd->num_dpp_qp) {
+ ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
+ dpp_cq_id);
+ }
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_create_qp_rsp *)cmd;
+ ocrdma_get_create_qp_rsp(rsp, qp, attrs, dpp_offset, dpp_credit_lmt);
+ qp->state = OCRDMA_QPS_RST;
+ kfree(cmd);
+ return 0;
+mbx_err:
+ if (qp->rq.va)
+ dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
+rq_err:
+ pr_err("%s(%d) rq_err\n", __func__, dev->id);
+ dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
+sq_err:
+ pr_err("%s(%d) sq_err\n", __func__, dev->id);
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
+ struct ocrdma_qp_params *param)
+{
+ int status = -ENOMEM;
+ struct ocrdma_query_qp *cmd;
+ struct ocrdma_query_qp_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->qp_id = qp->id;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_query_qp_rsp *)cmd;
+ memcpy(param, &rsp->params, sizeof(struct ocrdma_qp_params));
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+static int ocrdma_set_av_params(struct ocrdma_qp *qp,
+ struct ocrdma_modify_qp *cmd,
+ struct ib_qp_attr *attrs)
+{
+ int status;
+ struct ib_ah_attr *ah_attr = &attrs->ah_attr;
+ union ib_gid sgid, zgid;
+ u32 vlan_id;
+ u8 mac_addr[6];
+
+ if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
+ return -EINVAL;
+ cmd->params.tclass_sq_psn |=
+ (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+ cmd->params.rnt_rc_sl_fl |=
+ (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
+ cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT);
+ cmd->params.hop_lmt_rq_psn |=
+ (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
+ cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
+ memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
+ sizeof(cmd->params.dgid));
+ status = ocrdma_query_gid(&qp->dev->ibdev, 1,
+ ah_attr->grh.sgid_index, &sgid);
+ if (status)
+ return status;
+
+ memset(&zgid, 0, sizeof(zgid));
+ if (!memcmp(&sgid, &zgid, sizeof(zgid)))
+ return -EINVAL;
+
+ qp->sgid_idx = ah_attr->grh.sgid_index;
+ memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
+ ocrdma_resolve_dmac(qp->dev, ah_attr, &mac_addr[0]);
+ cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
+ (mac_addr[2] << 16) | (mac_addr[3] << 24);
+ /* convert them to LE format. */
+ ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
+ ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
+ cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
+ vlan_id = ah_attr->vlan_id;
+ if (vlan_id && (vlan_id < 0x1000)) {
+ cmd->params.vlan_dmac_b4_to_b5 |=
+ vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
+ cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
+ }
+ return 0;
+}
+
+static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
+ struct ocrdma_modify_qp *cmd,
+ struct ib_qp_attr *attrs, int attr_mask)
+{
+ int status = 0;
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ cmd->params.path_mtu_pkey_indx |= (attrs->pkey_index &
+ OCRDMA_QP_PARAMS_PKEY_INDEX_MASK);
+ cmd->flags |= OCRDMA_QP_PARA_PKEY_VALID;
+ }
+ if (attr_mask & IB_QP_QKEY) {
+ qp->qkey = attrs->qkey;
+ cmd->params.qkey = attrs->qkey;
+ cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
+ }
+ if (attr_mask & IB_QP_AV) {
+ status = ocrdma_set_av_params(qp, cmd, attrs);
+ if (status)
+ return status;
+ } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
+ /* set the default mac address for UD, GSI QPs */
+ cmd->params.dmac_b0_to_b3 = qp->dev->nic_info.mac_addr[0] |
+ (qp->dev->nic_info.mac_addr[1] << 8) |
+ (qp->dev->nic_info.mac_addr[2] << 16) |
+ (qp->dev->nic_info.mac_addr[3] << 24);
+ cmd->params.vlan_dmac_b4_to_b5 = qp->dev->nic_info.mac_addr[4] |
+ (qp->dev->nic_info.mac_addr[5] << 8);
+ }
+ if ((attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) &&
+ attrs->en_sqd_async_notify) {
+ cmd->params.max_sge_recv_flags |=
+ OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC;
+ cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
+ }
+ if (attr_mask & IB_QP_DEST_QPN) {
+ cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->dest_qp_num &
+ OCRDMA_QP_PARAMS_DEST_QPN_MASK);
+ cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
+ }
+ if (attr_mask & IB_QP_PATH_MTU) {
+ if (attrs->path_mtu < IB_MTU_256 ||
+ attrs->path_mtu > IB_MTU_4096) {
+ status = -EINVAL;
+ goto pmtu_err;
+ }
+ cmd->params.path_mtu_pkey_indx |=
+ (ib_mtu_enum_to_int(attrs->path_mtu) <<
+ OCRDMA_QP_PARAMS_PATH_MTU_SHIFT) &
+ OCRDMA_QP_PARAMS_PATH_MTU_MASK;
+ cmd->flags |= OCRDMA_QP_PARA_PMTU_VALID;
+ }
+ if (attr_mask & IB_QP_TIMEOUT) {
+ cmd->params.ack_to_rnr_rtc_dest_qpn |= attrs->timeout <<
+ OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
+ cmd->flags |= OCRDMA_QP_PARA_ACK_TO_VALID;
+ }
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ cmd->params.rnt_rc_sl_fl |= (attrs->retry_cnt <<
+ OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT) &
+ OCRDMA_QP_PARAMS_RETRY_CNT_MASK;
+ cmd->flags |= OCRDMA_QP_PARA_RETRY_CNT_VALID;
+ }
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ cmd->params.rnt_rc_sl_fl |= (attrs->min_rnr_timer <<
+ OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT) &
+ OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK;
+ cmd->flags |= OCRDMA_QP_PARA_RNT_VALID;
+ }
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->rnr_retry <<
+ OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT)
+ & OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK;
+ cmd->flags |= OCRDMA_QP_PARA_RRC_VALID;
+ }
+ if (attr_mask & IB_QP_SQ_PSN) {
+ cmd->params.tclass_sq_psn |= (attrs->sq_psn & 0x00ffffff);
+ cmd->flags |= OCRDMA_QP_PARA_SQPSN_VALID;
+ }
+ if (attr_mask & IB_QP_RQ_PSN) {
+ cmd->params.hop_lmt_rq_psn |= (attrs->rq_psn & 0x00ffffff);
+ cmd->flags |= OCRDMA_QP_PARA_RQPSN_VALID;
+ }
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attrs->max_rd_atomic > qp->dev->attr.max_ord_per_qp) {
+ status = -EINVAL;
+ goto pmtu_err;
+ }
+ qp->max_ord = attrs->max_rd_atomic;
+ cmd->flags |= OCRDMA_QP_PARA_MAX_ORD_VALID;
+ }
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attrs->max_dest_rd_atomic > qp->dev->attr.max_ird_per_qp) {
+ status = -EINVAL;
+ goto pmtu_err;
+ }
+ qp->max_ird = attrs->max_dest_rd_atomic;
+ cmd->flags |= OCRDMA_QP_PARA_MAX_IRD_VALID;
+ }
+ cmd->params.max_ord_ird = (qp->max_ord <<
+ OCRDMA_QP_PARAMS_MAX_ORD_SHIFT) |
+ (qp->max_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK);
+pmtu_err:
+ return status;
+}
+
+int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
+ struct ib_qp_attr *attrs, int attr_mask)
+{
+ int status = -ENOMEM;
+ struct ocrdma_modify_qp *cmd;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_QP, sizeof(*cmd));
+ if (!cmd)
+ return status;
+
+ cmd->params.id = qp->id;
+ cmd->flags = 0;
+ if (attr_mask & IB_QP_STATE) {
+ cmd->params.max_sge_recv_flags |=
+ (get_ocrdma_qp_state(attrs->qp_state) <<
+ OCRDMA_QP_PARAMS_STATE_SHIFT) &
+ OCRDMA_QP_PARAMS_STATE_MASK;
+ cmd->flags |= OCRDMA_QP_PARA_QPS_VALID;
+ } else {
+ cmd->params.max_sge_recv_flags |=
+ (qp->state << OCRDMA_QP_PARAMS_STATE_SHIFT) &
+ OCRDMA_QP_PARAMS_STATE_MASK;
+ }
+
+ status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask);
+ if (status)
+ goto mbx_err;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_destroy_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
+{
+ int status = -ENOMEM;
+ struct ocrdma_destroy_qp *cmd;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_QP, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->qp_id = qp->id;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+mbx_err:
+ kfree(cmd);
+ if (qp->sq.va)
+ dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
+ if (!qp->srq && qp->rq.va)
+ dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
+ if (qp->dpp_enabled)
+ qp->pd->num_dpp_qp++;
+ return status;
+}
+
+int ocrdma_mbx_create_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
+ struct ib_srq_init_attr *srq_attr,
+ struct ocrdma_pd *pd)
+{
+ int status = -ENOMEM;
+ int hw_pages, hw_page_size;
+ int len;
+ struct ocrdma_create_srq_rsp *rsp;
+ struct ocrdma_create_srq *cmd;
+ dma_addr_t pa;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ u32 max_rqe_allocated;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
+ if (!cmd)
+ return status;
+
+ cmd->pgsz_pdid = pd->id & OCRDMA_CREATE_SRQ_PD_ID_MASK;
+ max_rqe_allocated = srq_attr->attr.max_wr + 1;
+ status = ocrdma_build_q_conf(&max_rqe_allocated,
+ dev->attr.rqe_size,
+ &hw_pages, &hw_page_size);
+ if (status) {
+ pr_err("%s() req. max_wr=0x%x\n", __func__,
+ srq_attr->attr.max_wr);
+ status = -EINVAL;
+ goto ret;
+ }
+ len = hw_pages * hw_page_size;
+ srq->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
+ if (!srq->rq.va) {
+ status = -ENOMEM;
+ goto ret;
+ }
+ ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size);
+
+ srq->rq.entry_size = dev->attr.rqe_size;
+ srq->rq.pa = pa;
+ srq->rq.len = len;
+ srq->rq.max_cnt = max_rqe_allocated;
+
+ cmd->max_sge_rqe = ilog2(max_rqe_allocated);
+ cmd->max_sge_rqe |= srq_attr->attr.max_sge <<
+ OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT;
+
+ cmd->pgsz_pdid |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE)
+ << OCRDMA_CREATE_SRQ_PG_SZ_SHIFT);
+ cmd->pages_rqe_sz |= (dev->attr.rqe_size
+ << OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT)
+ & OCRDMA_CREATE_SRQ_RQE_SIZE_MASK;
+ cmd->pages_rqe_sz |= hw_pages << OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT;
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+ rsp = (struct ocrdma_create_srq_rsp *)cmd;
+ srq->id = rsp->id;
+ srq->rq.dbid = rsp->id;
+ max_rqe_allocated = ((rsp->max_sge_rqe_allocated &
+ OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK) >>
+ OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT);
+ max_rqe_allocated = (1 << max_rqe_allocated);
+ srq->rq.max_cnt = max_rqe_allocated;
+ srq->rq.max_wqe_idx = max_rqe_allocated - 1;
+ srq->rq.max_sges = (rsp->max_sge_rqe_allocated &
+ OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK) >>
+ OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT;
+ goto ret;
+mbx_err:
+ dma_free_coherent(&pdev->dev, srq->rq.len, srq->rq.va, pa);
+ret:
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
+{
+ int status = -ENOMEM;
+ struct ocrdma_modify_srq *cmd;
+ struct ocrdma_pd *pd = srq->pd;
+ struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_SRQ, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->id = srq->id;
+ cmd->limit_max_rqe |= srq_attr->srq_limit <<
+ OCRDMA_MODIFY_SRQ_LIMIT_SHIFT;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
+{
+ int status = -ENOMEM;
+ struct ocrdma_query_srq *cmd;
+ struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device);
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_SRQ, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->id = srq->rq.dbid;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status == 0) {
+ struct ocrdma_query_srq_rsp *rsp =
+ (struct ocrdma_query_srq_rsp *)cmd;
+ srq_attr->max_sge =
+ rsp->srq_lmt_max_sge &
+ OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK;
+ srq_attr->max_wr =
+ rsp->max_rqe_pdid >> OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT;
+ srq_attr->srq_limit = rsp->srq_lmt_max_sge >>
+ OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT;
+ }
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
+{
+ int status = -ENOMEM;
+ struct ocrdma_destroy_srq *cmd;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_SRQ, sizeof(*cmd));
+ if (!cmd)
+ return status;
+ cmd->id = srq->id;
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (srq->rq.va)
+ dma_free_coherent(&pdev->dev, srq->rq.len,
+ srq->rq.va, srq->rq.pa);
+ kfree(cmd);
+ return status;
+}
+
+int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
+{
+ int i;
+ int status = -EINVAL;
+ struct ocrdma_av *av;
+ unsigned long flags;
+
+ av = dev->av_tbl.va;
+ spin_lock_irqsave(&dev->av_tbl.lock, flags);
+ for (i = 0; i < dev->av_tbl.num_ah; i++) {
+ if (av->valid == 0) {
+ av->valid = OCRDMA_AV_VALID;
+ ah->av = av;
+ ah->id = i;
+ status = 0;
+ break;
+ }
+ av++;
+ }
+ if (i == dev->av_tbl.num_ah)
+ status = -EAGAIN;
+ spin_unlock_irqrestore(&dev->av_tbl.lock, flags);
+ return status;
+}
+
+int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&dev->av_tbl.lock, flags);
+ ah->av->valid = 0;
+ spin_unlock_irqrestore(&dev->av_tbl.lock, flags);
+ return 0;
+}
+
+static int ocrdma_create_eqs(struct ocrdma_dev *dev)
+{
+ int num_eq, i, status = 0;
+ int irq;
+ unsigned long flags = 0;
+
+ num_eq = dev->nic_info.msix.num_vectors -
+ dev->nic_info.msix.start_vector;
+ if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) {
+ num_eq = 1;
+ flags = IRQF_SHARED;
+ } else {
+ num_eq = min_t(u32, num_eq, num_online_cpus());
+ }
+
+ if (!num_eq)
+ return -EINVAL;
+
+ dev->eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
+ if (!dev->eq_tbl)
+ return -ENOMEM;
+
+ for (i = 0; i < num_eq; i++) {
+ status = ocrdma_create_eq(dev, &dev->eq_tbl[i],
+ OCRDMA_EQ_LEN);
+ if (status) {
+ status = -EINVAL;
+ break;
+ }
+ sprintf(dev->eq_tbl[i].irq_name, "ocrdma%d-%d",
+ dev->id, i);
+ irq = ocrdma_get_irq(dev, &dev->eq_tbl[i]);
+ status = request_irq(irq, ocrdma_irq_handler, flags,
+ dev->eq_tbl[i].irq_name,
+ &dev->eq_tbl[i]);
+ if (status)
+ goto done;
+ dev->eq_cnt += 1;
+ }
+ /* one eq is sufficient for data path to work */
+ return 0;
+done:
+ ocrdma_destroy_eqs(dev);
+ return status;
+}
+
+int ocrdma_init_hw(struct ocrdma_dev *dev)
+{
+ int status;
+
+ /* create the eqs */
+ status = ocrdma_create_eqs(dev);
+ if (status)
+ goto qpeq_err;
+ status = ocrdma_create_mq(dev);
+ if (status)
+ goto mq_err;
+ status = ocrdma_mbx_query_fw_config(dev);
+ if (status)
+ goto conf_err;
+ status = ocrdma_mbx_query_dev(dev);
+ if (status)
+ goto conf_err;
+ status = ocrdma_mbx_query_fw_ver(dev);
+ if (status)
+ goto conf_err;
+ status = ocrdma_mbx_create_ah_tbl(dev);
+ if (status)
+ goto conf_err;
+ status = ocrdma_mbx_get_phy_info(dev);
+ if (status)
+ goto conf_err;
+ status = ocrdma_mbx_get_ctrl_attribs(dev);
+ if (status)
+ goto conf_err;
+
+ return 0;
+
+conf_err:
+ ocrdma_destroy_mq(dev);
+mq_err:
+ ocrdma_destroy_eqs(dev);
+qpeq_err:
+ pr_err("%s() status=%d\n", __func__, status);
+ return status;
+}
+
+void ocrdma_cleanup_hw(struct ocrdma_dev *dev)
+{
+ ocrdma_mbx_delete_ah_tbl(dev);
+
+ /* cleanup the eqs */
+ ocrdma_destroy_eqs(dev);
+
+ /* cleanup the control path */
+ ocrdma_destroy_mq(dev);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
new file mode 100644
index 00000000000..e513f729314
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -0,0 +1,138 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) CNA Adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_HW_H__
+#define __OCRDMA_HW_H__
+
+#include "ocrdma_sli.h"
+
+static inline void ocrdma_cpu_to_le32(void *dst, u32 len)
+{
+#ifdef __BIG_ENDIAN
+ int i = 0;
+ u32 *src_ptr = dst;
+ u32 *dst_ptr = dst;
+ for (; i < (len / 4); i++)
+ *(dst_ptr + i) = cpu_to_le32p(src_ptr + i);
+#endif
+}
+
+static inline void ocrdma_le32_to_cpu(void *dst, u32 len)
+{
+#ifdef __BIG_ENDIAN
+ int i = 0;
+ u32 *src_ptr = dst;
+ u32 *dst_ptr = dst;
+ for (; i < (len / sizeof(u32)); i++)
+ *(dst_ptr + i) = le32_to_cpu(*(src_ptr + i));
+#endif
+}
+
+static inline void ocrdma_copy_cpu_to_le32(void *dst, void *src, u32 len)
+{
+#ifdef __BIG_ENDIAN
+ int i = 0;
+ u32 *src_ptr = src;
+ u32 *dst_ptr = dst;
+ for (; i < (len / sizeof(u32)); i++)
+ *(dst_ptr + i) = cpu_to_le32p(src_ptr + i);
+#else
+ memcpy(dst, src, len);
+#endif
+}
+
+static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len)
+{
+#ifdef __BIG_ENDIAN
+ int i = 0;
+ u32 *src_ptr = src;
+ u32 *dst_ptr = dst;
+ for (; i < len / sizeof(u32); i++)
+ *(dst_ptr + i) = le32_to_cpu(*(src_ptr + i));
+#else
+ memcpy(dst, src, len);
+#endif
+}
+
+static inline u64 ocrdma_get_db_addr(struct ocrdma_dev *dev, u32 pdid)
+{
+ return dev->nic_info.unmapped_db + (pdid * dev->nic_info.db_page_size);
+}
+
+int ocrdma_init_hw(struct ocrdma_dev *);
+void ocrdma_cleanup_hw(struct ocrdma_dev *);
+
+enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps);
+void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
+ bool solicited, u16 cqe_popped);
+
+/* verbs specific mailbox commands */
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
+int ocrdma_query_config(struct ocrdma_dev *,
+ struct ocrdma_mbx_query_config *config);
+
+int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
+int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
+
+int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
+ u32 pd_id, int addr_check);
+int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey);
+
+int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
+ u32 pd_id, int acc);
+int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
+ int entries, int dpp_cq, u16 pd_id);
+int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
+
+int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
+ u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
+ u16 *dpp_credit_lmt);
+int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
+ struct ib_qp_attr *attrs, int attr_mask);
+int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
+ struct ocrdma_qp_params *param);
+int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
+int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *,
+ struct ib_srq_init_attr *,
+ struct ocrdma_pd *);
+int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *);
+int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *);
+int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *);
+
+int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *);
+int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *);
+
+int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
+ enum ib_qp_state *old_ib_state);
+bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
+bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
+void ocrdma_flush_qp(struct ocrdma_qp *);
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
+
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
+char *port_speed_string(struct ocrdma_dev *dev);
+#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
new file mode 100644
index 00000000000..7c504e07974
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -0,0 +1,585 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include <linux/netdevice.h>
+#include <net/addrconf.h>
+
+#include "ocrdma.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_ah.h"
+#include "be_roce.h"
+#include "ocrdma_hw.h"
+#include "ocrdma_stats.h"
+#include "ocrdma_abi.h"
+
+MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION);
+MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
+MODULE_AUTHOR("Emulex Corporation");
+MODULE_LICENSE("GPL");
+
+static LIST_HEAD(ocrdma_dev_list);
+static DEFINE_SPINLOCK(ocrdma_devlist_lock);
+static DEFINE_IDR(ocrdma_dev_id);
+
+static union ib_gid ocrdma_zero_sgid;
+
+void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
+{
+ u8 mac_addr[6];
+
+ memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
+ guid[0] = mac_addr[0] ^ 2;
+ guid[1] = mac_addr[1];
+ guid[2] = mac_addr[2];
+ guid[3] = 0xff;
+ guid[4] = 0xfe;
+ guid[5] = mac_addr[3];
+ guid[6] = mac_addr[4];
+ guid[7] = mac_addr[5];
+}
+
+static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid)
+{
+ int i;
+ unsigned long flags;
+
+ memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
+
+
+ spin_lock_irqsave(&dev->sgid_lock, flags);
+ for (i = 0; i < OCRDMA_MAX_SGID; i++) {
+ if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
+ sizeof(union ib_gid))) {
+ /* found free entry */
+ memcpy(&dev->sgid_tbl[i], new_sgid,
+ sizeof(union ib_gid));
+ spin_unlock_irqrestore(&dev->sgid_lock, flags);
+ return true;
+ } else if (!memcmp(&dev->sgid_tbl[i], new_sgid,
+ sizeof(union ib_gid))) {
+ /* entry already present, no addition is required. */
+ spin_unlock_irqrestore(&dev->sgid_lock, flags);
+ return false;
+ }
+ }
+ spin_unlock_irqrestore(&dev->sgid_lock, flags);
+ return false;
+}
+
+static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid)
+{
+ int found = false;
+ int i;
+ unsigned long flags;
+
+
+ spin_lock_irqsave(&dev->sgid_lock, flags);
+ /* first is default sgid, which cannot be deleted. */
+ for (i = 1; i < OCRDMA_MAX_SGID; i++) {
+ if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) {
+ /* found matching entry */
+ memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&dev->sgid_lock, flags);
+ return found;
+}
+
+static int ocrdma_addr_event(unsigned long event, struct net_device *netdev,
+ union ib_gid *gid)
+{
+ struct ib_event gid_event;
+ struct ocrdma_dev *dev;
+ bool found = false;
+ bool updated = false;
+ bool is_vlan = false;
+
+ is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
+ if (is_vlan)
+ netdev = rdma_vlan_dev_real_dev(netdev);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
+ if (dev->nic_info.netdev == netdev) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!found)
+ return NOTIFY_DONE;
+
+ mutex_lock(&dev->dev_lock);
+ switch (event) {
+ case NETDEV_UP:
+ updated = ocrdma_add_sgid(dev, gid);
+ break;
+ case NETDEV_DOWN:
+ updated = ocrdma_del_sgid(dev, gid);
+ break;
+ default:
+ break;
+ }
+ if (updated) {
+ /* GID table updated, notify the consumers about it */
+ gid_event.device = &dev->ibdev;
+ gid_event.element.port_num = 1;
+ gid_event.event = IB_EVENT_GID_CHANGE;
+ ib_dispatch_event(&gid_event);
+ }
+ mutex_unlock(&dev->dev_lock);
+ return NOTIFY_OK;
+}
+
+static int ocrdma_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ union ib_gid gid;
+ struct net_device *netdev = ifa->ifa_dev->dev;
+
+ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+ return ocrdma_addr_event(event, netdev, &gid);
+}
+
+static struct notifier_block ocrdma_inetaddr_notifier = {
+ .notifier_call = ocrdma_inetaddr_event
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static int ocrdma_inet6addr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ union ib_gid *gid = (union ib_gid *)&ifa->addr;
+ struct net_device *netdev = ifa->idev->dev;
+ return ocrdma_addr_event(event, netdev, gid);
+}
+
+static struct notifier_block ocrdma_inet6addr_notifier = {
+ .notifier_call = ocrdma_inet6addr_event
+};
+
+#endif /* IPV6 and VLAN */
+
+static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int ocrdma_register_device(struct ocrdma_dev *dev)
+{
+ strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
+ ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
+ memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
+ sizeof(OCRDMA_NODE_DESC));
+ dev->ibdev.owner = THIS_MODULE;
+ dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION;
+ dev->ibdev.uverbs_cmd_mask =
+ OCRDMA_UVERBS(GET_CONTEXT) |
+ OCRDMA_UVERBS(QUERY_DEVICE) |
+ OCRDMA_UVERBS(QUERY_PORT) |
+ OCRDMA_UVERBS(ALLOC_PD) |
+ OCRDMA_UVERBS(DEALLOC_PD) |
+ OCRDMA_UVERBS(REG_MR) |
+ OCRDMA_UVERBS(DEREG_MR) |
+ OCRDMA_UVERBS(CREATE_COMP_CHANNEL) |
+ OCRDMA_UVERBS(CREATE_CQ) |
+ OCRDMA_UVERBS(RESIZE_CQ) |
+ OCRDMA_UVERBS(DESTROY_CQ) |
+ OCRDMA_UVERBS(REQ_NOTIFY_CQ) |
+ OCRDMA_UVERBS(CREATE_QP) |
+ OCRDMA_UVERBS(MODIFY_QP) |
+ OCRDMA_UVERBS(QUERY_QP) |
+ OCRDMA_UVERBS(DESTROY_QP) |
+ OCRDMA_UVERBS(POLL_CQ) |
+ OCRDMA_UVERBS(POST_SEND) |
+ OCRDMA_UVERBS(POST_RECV);
+
+ dev->ibdev.uverbs_cmd_mask |=
+ OCRDMA_UVERBS(CREATE_AH) |
+ OCRDMA_UVERBS(MODIFY_AH) |
+ OCRDMA_UVERBS(QUERY_AH) |
+ OCRDMA_UVERBS(DESTROY_AH);
+
+ dev->ibdev.node_type = RDMA_NODE_IB_CA;
+ dev->ibdev.phys_port_cnt = 1;
+ dev->ibdev.num_comp_vectors = 1;
+
+ /* mandatory verbs. */
+ dev->ibdev.query_device = ocrdma_query_device;
+ dev->ibdev.query_port = ocrdma_query_port;
+ dev->ibdev.modify_port = ocrdma_modify_port;
+ dev->ibdev.query_gid = ocrdma_query_gid;
+ dev->ibdev.get_link_layer = ocrdma_link_layer;
+ dev->ibdev.alloc_pd = ocrdma_alloc_pd;
+ dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
+
+ dev->ibdev.create_cq = ocrdma_create_cq;
+ dev->ibdev.destroy_cq = ocrdma_destroy_cq;
+ dev->ibdev.resize_cq = ocrdma_resize_cq;
+
+ dev->ibdev.create_qp = ocrdma_create_qp;
+ dev->ibdev.modify_qp = ocrdma_modify_qp;
+ dev->ibdev.query_qp = ocrdma_query_qp;
+ dev->ibdev.destroy_qp = ocrdma_destroy_qp;
+
+ dev->ibdev.query_pkey = ocrdma_query_pkey;
+ dev->ibdev.create_ah = ocrdma_create_ah;
+ dev->ibdev.destroy_ah = ocrdma_destroy_ah;
+ dev->ibdev.query_ah = ocrdma_query_ah;
+ dev->ibdev.modify_ah = ocrdma_modify_ah;
+
+ dev->ibdev.poll_cq = ocrdma_poll_cq;
+ dev->ibdev.post_send = ocrdma_post_send;
+ dev->ibdev.post_recv = ocrdma_post_recv;
+ dev->ibdev.req_notify_cq = ocrdma_arm_cq;
+
+ dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
+ dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
+ dev->ibdev.dereg_mr = ocrdma_dereg_mr;
+ dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
+
+ dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr;
+ dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
+ dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
+
+ /* mandatory to support user space verbs consumer. */
+ dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
+ dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
+ dev->ibdev.mmap = ocrdma_mmap;
+ dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
+
+ dev->ibdev.process_mad = ocrdma_process_mad;
+
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
+ dev->ibdev.uverbs_cmd_mask |=
+ OCRDMA_UVERBS(CREATE_SRQ) |
+ OCRDMA_UVERBS(MODIFY_SRQ) |
+ OCRDMA_UVERBS(QUERY_SRQ) |
+ OCRDMA_UVERBS(DESTROY_SRQ) |
+ OCRDMA_UVERBS(POST_SRQ_RECV);
+
+ dev->ibdev.create_srq = ocrdma_create_srq;
+ dev->ibdev.modify_srq = ocrdma_modify_srq;
+ dev->ibdev.query_srq = ocrdma_query_srq;
+ dev->ibdev.destroy_srq = ocrdma_destroy_srq;
+ dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
+ }
+ return ib_register_device(&dev->ibdev, NULL);
+}
+
+static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
+{
+ mutex_init(&dev->dev_lock);
+ dev->sgid_tbl = kzalloc(sizeof(union ib_gid) *
+ OCRDMA_MAX_SGID, GFP_KERNEL);
+ if (!dev->sgid_tbl)
+ goto alloc_err;
+ spin_lock_init(&dev->sgid_lock);
+
+ dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) *
+ OCRDMA_MAX_CQ, GFP_KERNEL);
+ if (!dev->cq_tbl)
+ goto alloc_err;
+
+ if (dev->attr.max_qp) {
+ dev->qp_tbl = kzalloc(sizeof(struct ocrdma_qp *) *
+ OCRDMA_MAX_QP, GFP_KERNEL);
+ if (!dev->qp_tbl)
+ goto alloc_err;
+ }
+ spin_lock_init(&dev->av_tbl.lock);
+ spin_lock_init(&dev->flush_q_lock);
+ return 0;
+alloc_err:
+ pr_err("%s(%d) error.\n", __func__, dev->id);
+ return -ENOMEM;
+}
+
+static void ocrdma_free_resources(struct ocrdma_dev *dev)
+{
+ kfree(dev->qp_tbl);
+ kfree(dev->cq_tbl);
+ kfree(dev->sgid_tbl);
+}
+
+/* OCRDMA sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+
+static struct device_attribute *ocrdma_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver
+};
+
+static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+ device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
+}
+
+static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
+{
+ int status = 0, i;
+ struct ocrdma_dev *dev;
+
+ dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
+ if (!dev) {
+ pr_err("Unable to allocate ib device\n");
+ return NULL;
+ }
+ dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
+ if (!dev->mbx_cmd)
+ goto idr_err;
+
+ memcpy(&dev->nic_info, dev_info, sizeof(*dev_info));
+ dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL);
+ if (dev->id < 0)
+ goto idr_err;
+
+ status = ocrdma_init_hw(dev);
+ if (status)
+ goto init_err;
+
+ status = ocrdma_alloc_resources(dev);
+ if (status)
+ goto alloc_err;
+
+ status = ocrdma_register_device(dev);
+ if (status)
+ goto alloc_err;
+
+ for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+ if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
+ goto sysfs_err;
+ spin_lock(&ocrdma_devlist_lock);
+ list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
+ spin_unlock(&ocrdma_devlist_lock);
+ /* Init stats */
+ ocrdma_add_port_stats(dev);
+
+ pr_info("%s %s: %s \"%s\" port %d\n",
+ dev_name(&dev->nic_info.pdev->dev), hca_name(dev),
+ port_speed_string(dev), dev->model_number,
+ dev->hba_port_num);
+ pr_info("%s ocrdma%d driver loaded successfully\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
+ return dev;
+
+sysfs_err:
+ ocrdma_remove_sysfiles(dev);
+alloc_err:
+ ocrdma_free_resources(dev);
+ ocrdma_cleanup_hw(dev);
+init_err:
+ idr_remove(&ocrdma_dev_id, dev->id);
+idr_err:
+ kfree(dev->mbx_cmd);
+ ib_dealloc_device(&dev->ibdev);
+ pr_err("%s() leaving. ret=%d\n", __func__, status);
+ return NULL;
+}
+
+static void ocrdma_remove_free(struct rcu_head *rcu)
+{
+ struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
+
+ idr_remove(&ocrdma_dev_id, dev->id);
+ kfree(dev->mbx_cmd);
+ ib_dealloc_device(&dev->ibdev);
+}
+
+static void ocrdma_remove(struct ocrdma_dev *dev)
+{
+ /* first unregister with stack to stop all the active traffic
+ * of the registered clients.
+ */
+ ocrdma_rem_port_stats(dev);
+ ocrdma_remove_sysfiles(dev);
+
+ ib_unregister_device(&dev->ibdev);
+
+ spin_lock(&ocrdma_devlist_lock);
+ list_del_rcu(&dev->entry);
+ spin_unlock(&ocrdma_devlist_lock);
+
+ ocrdma_free_resources(dev);
+ ocrdma_cleanup_hw(dev);
+
+ call_rcu(&dev->rcu, ocrdma_remove_free);
+}
+
+static int ocrdma_open(struct ocrdma_dev *dev)
+{
+ struct ib_event port_event;
+
+ port_event.event = IB_EVENT_PORT_ACTIVE;
+ port_event.element.port_num = 1;
+ port_event.device = &dev->ibdev;
+ ib_dispatch_event(&port_event);
+ return 0;
+}
+
+static int ocrdma_close(struct ocrdma_dev *dev)
+{
+ int i;
+ struct ocrdma_qp *qp, **cur_qp;
+ struct ib_event err_event;
+ struct ib_qp_attr attrs;
+ int attr_mask = IB_QP_STATE;
+
+ attrs.qp_state = IB_QPS_ERR;
+ mutex_lock(&dev->dev_lock);
+ if (dev->qp_tbl) {
+ cur_qp = dev->qp_tbl;
+ for (i = 0; i < OCRDMA_MAX_QP; i++) {
+ qp = cur_qp[i];
+ if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
+ /* change the QP state to ERROR */
+ _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
+
+ err_event.event = IB_EVENT_QP_FATAL;
+ err_event.element.qp = &qp->ibqp;
+ err_event.device = &dev->ibdev;
+ ib_dispatch_event(&err_event);
+ }
+ }
+ }
+ mutex_unlock(&dev->dev_lock);
+
+ err_event.event = IB_EVENT_PORT_ERR;
+ err_event.element.port_num = 1;
+ err_event.device = &dev->ibdev;
+ ib_dispatch_event(&err_event);
+ return 0;
+}
+
+/* event handling via NIC driver ensures that all the NIC specific
+ * initialization done before RoCE driver notifies
+ * event to stack.
+ */
+static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
+{
+ switch (event) {
+ case BE_DEV_UP:
+ ocrdma_open(dev);
+ break;
+ case BE_DEV_DOWN:
+ ocrdma_close(dev);
+ break;
+ }
+}
+
+static struct ocrdma_driver ocrdma_drv = {
+ .name = "ocrdma_driver",
+ .add = ocrdma_add,
+ .remove = ocrdma_remove,
+ .state_change_handler = ocrdma_event_handler,
+ .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION,
+};
+
+static void ocrdma_unregister_inet6addr_notifier(void)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier);
+#endif
+}
+
+static void ocrdma_unregister_inetaddr_notifier(void)
+{
+ unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+}
+
+static int __init ocrdma_init_module(void)
+{
+ int status;
+
+ ocrdma_init_debugfs();
+
+ status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+ if (status)
+ return status;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
+ if (status)
+ goto err_notifier6;
+#endif
+
+ status = be_roce_register_driver(&ocrdma_drv);
+ if (status)
+ goto err_be_reg;
+
+ return 0;
+
+err_be_reg:
+ ocrdma_unregister_inet6addr_notifier();
+err_notifier6:
+ ocrdma_unregister_inetaddr_notifier();
+ return status;
+}
+
+static void __exit ocrdma_exit_module(void)
+{
+ be_roce_unregister_driver(&ocrdma_drv);
+ ocrdma_unregister_inet6addr_notifier();
+ ocrdma_unregister_inetaddr_notifier();
+ ocrdma_rem_debugfs();
+}
+
+module_init(ocrdma_init_module);
+module_exit(ocrdma_exit_module);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
new file mode 100644
index 00000000000..96c9ee602ba
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -0,0 +1,1953 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_SLI_H__
+#define __OCRDMA_SLI_H__
+
+#define Bit(_b) (1 << (_b))
+
+enum {
+ OCRDMA_ASIC_GEN_SKH_R = 0x04,
+ OCRDMA_ASIC_GEN_LANCER = 0x0B
+};
+
+enum {
+ OCRDMA_ASIC_REV_A0 = 0x00,
+ OCRDMA_ASIC_REV_B0 = 0x10,
+ OCRDMA_ASIC_REV_C0 = 0x20
+};
+
+#define OCRDMA_SUBSYS_ROCE 10
+enum {
+ OCRDMA_CMD_QUERY_CONFIG = 1,
+ OCRDMA_CMD_ALLOC_PD,
+ OCRDMA_CMD_DEALLOC_PD,
+
+ OCRDMA_CMD_CREATE_AH_TBL,
+ OCRDMA_CMD_DELETE_AH_TBL,
+
+ OCRDMA_CMD_CREATE_QP,
+ OCRDMA_CMD_QUERY_QP,
+ OCRDMA_CMD_MODIFY_QP,
+ OCRDMA_CMD_DELETE_QP,
+
+ OCRDMA_CMD_RSVD1,
+ OCRDMA_CMD_ALLOC_LKEY,
+ OCRDMA_CMD_DEALLOC_LKEY,
+ OCRDMA_CMD_REGISTER_NSMR,
+ OCRDMA_CMD_REREGISTER_NSMR,
+ OCRDMA_CMD_REGISTER_NSMR_CONT,
+ OCRDMA_CMD_QUERY_NSMR,
+ OCRDMA_CMD_ALLOC_MW,
+ OCRDMA_CMD_QUERY_MW,
+
+ OCRDMA_CMD_CREATE_SRQ,
+ OCRDMA_CMD_QUERY_SRQ,
+ OCRDMA_CMD_MODIFY_SRQ,
+ OCRDMA_CMD_DELETE_SRQ,
+
+ OCRDMA_CMD_ATTACH_MCAST,
+ OCRDMA_CMD_DETACH_MCAST,
+ OCRDMA_CMD_GET_RDMA_STATS,
+
+ OCRDMA_CMD_MAX
+};
+
+#define OCRDMA_SUBSYS_COMMON 1
+enum {
+ OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1 = 5,
+ OCRDMA_CMD_CREATE_CQ = 12,
+ OCRDMA_CMD_CREATE_EQ = 13,
+ OCRDMA_CMD_CREATE_MQ = 21,
+ OCRDMA_CMD_GET_CTRL_ATTRIBUTES = 32,
+ OCRDMA_CMD_GET_FW_VER = 35,
+ OCRDMA_CMD_DELETE_MQ = 53,
+ OCRDMA_CMD_DELETE_CQ = 54,
+ OCRDMA_CMD_DELETE_EQ = 55,
+ OCRDMA_CMD_GET_FW_CONFIG = 58,
+ OCRDMA_CMD_CREATE_MQ_EXT = 90,
+ OCRDMA_CMD_PHY_DETAILS = 102
+};
+
+enum {
+ QTYPE_EQ = 1,
+ QTYPE_CQ = 2,
+ QTYPE_MCCQ = 3
+};
+
+#define OCRDMA_MAX_SGID (8)
+
+#define OCRDMA_MAX_QP 2048
+#define OCRDMA_MAX_CQ 2048
+#define OCRDMA_MAX_STAG 8192
+
+enum {
+ OCRDMA_DB_RQ_OFFSET = 0xE0,
+ OCRDMA_DB_GEN2_RQ_OFFSET = 0x100,
+ OCRDMA_DB_SQ_OFFSET = 0x60,
+ OCRDMA_DB_GEN2_SQ_OFFSET = 0x1C0,
+ OCRDMA_DB_SRQ_OFFSET = OCRDMA_DB_RQ_OFFSET,
+ OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET,
+ OCRDMA_DB_CQ_OFFSET = 0x120,
+ OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET,
+ OCRDMA_DB_MQ_OFFSET = 0x140,
+
+ OCRDMA_DB_SQ_SHIFT = 16,
+ OCRDMA_DB_RQ_SHIFT = 24
+};
+
+#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */
+#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */
+/* qid #2 msbits at 12-11 */
+#define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1
+#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */
+/* Rearm bit */
+#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */
+/* solicited bit */
+#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */
+
+#define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */
+#define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */
+#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */
+
+/* Clear the interrupt for this eq */
+#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */
+/* Must be 1 */
+#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */
+/* Number of event entries processed */
+#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */
+/* Rearm bit */
+#define OCRDMA_REARM_SHIFT (29) /* bit 29 */
+
+#define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */
+/* Number of entries posted */
+#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */
+
+#define OCRDMA_MIN_HPAGE_SIZE (4096)
+
+#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
+#define OCRDMA_MAX_Q_PAGES (8)
+
+#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C
+#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF
+#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00
+#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08
+/*
+# 0: 4K Bytes
+# 1: 8K Bytes
+# 2: 16K Bytes
+# 3: 32K Bytes
+# 4: 64K Bytes
+# 5: 128K Bytes
+# 6: 256K Bytes
+# 7: 512K Bytes
+*/
+#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8)
+#define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
+
+#define MAX_OCRDMA_QP_PAGES (8)
+#define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE)
+
+#define OCRDMA_CREATE_CQ_MAX_PAGES (4)
+#define OCRDMA_DPP_CQE_SIZE (4)
+
+#define OCRDMA_GEN2_MAX_CQE 1024
+#define OCRDMA_GEN2_CQ_PAGE_SIZE 4096
+#define OCRDMA_GEN2_WQE_SIZE 256
+#define OCRDMA_MAX_CQE 4095
+#define OCRDMA_CQ_PAGE_SIZE 16384
+#define OCRDMA_WQE_SIZE 128
+#define OCRDMA_WQE_STRIDE 8
+#define OCRDMA_WQE_ALIGN_BYTES 16
+
+#define MAX_OCRDMA_SRQ_PAGES MAX_OCRDMA_QP_PAGES
+
+enum {
+ OCRDMA_MCH_OPCODE_SHIFT = 0,
+ OCRDMA_MCH_OPCODE_MASK = 0xFF,
+ OCRDMA_MCH_SUBSYS_SHIFT = 8,
+ OCRDMA_MCH_SUBSYS_MASK = 0xFF00
+};
+
+/* mailbox cmd header */
+struct ocrdma_mbx_hdr {
+ u32 subsys_op;
+ u32 timeout; /* in seconds */
+ u32 cmd_len;
+ u32 rsvd_version;
+};
+
+enum {
+ OCRDMA_MBX_RSP_OPCODE_SHIFT = 0,
+ OCRDMA_MBX_RSP_OPCODE_MASK = 0xFF,
+ OCRDMA_MBX_RSP_SUBSYS_SHIFT = 8,
+ OCRDMA_MBX_RSP_SUBSYS_MASK = 0xFF << OCRDMA_MBX_RSP_SUBSYS_SHIFT,
+
+ OCRDMA_MBX_RSP_STATUS_SHIFT = 0,
+ OCRDMA_MBX_RSP_STATUS_MASK = 0xFF,
+ OCRDMA_MBX_RSP_ASTATUS_SHIFT = 8,
+ OCRDMA_MBX_RSP_ASTATUS_MASK = 0xFF << OCRDMA_MBX_RSP_ASTATUS_SHIFT
+};
+
+/* mailbox cmd response */
+struct ocrdma_mbx_rsp {
+ u32 subsys_op;
+ u32 status;
+ u32 rsp_len;
+ u32 add_rsp_len;
+};
+
+enum {
+ OCRDMA_MQE_EMBEDDED = 1,
+ OCRDMA_MQE_NONEMBEDDED = 0
+};
+
+struct ocrdma_mqe_sge {
+ u32 pa_lo;
+ u32 pa_hi;
+ u32 len;
+};
+
+enum {
+ OCRDMA_MQE_HDR_EMB_SHIFT = 0,
+ OCRDMA_MQE_HDR_EMB_MASK = Bit(0),
+ OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3,
+ OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT,
+ OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24,
+ OCRDMA_MQE_HDR_SPECIAL_MASK = 0xFF << OCRDMA_MQE_HDR_SPECIAL_SHIFT
+};
+
+struct ocrdma_mqe_hdr {
+ u32 spcl_sge_cnt_emb;
+ u32 pyld_len;
+ u32 tag_lo;
+ u32 tag_hi;
+ u32 rsvd3;
+};
+
+struct ocrdma_mqe_emb_cmd {
+ struct ocrdma_mbx_hdr mch;
+ u8 pyld[220];
+};
+
+struct ocrdma_mqe {
+ struct ocrdma_mqe_hdr hdr;
+ union {
+ struct ocrdma_mqe_emb_cmd emb_req;
+ struct {
+ struct ocrdma_mqe_sge sge[19];
+ } nonemb_req;
+ u8 cmd[236];
+ struct ocrdma_mbx_rsp rsp;
+ } u;
+};
+
+#define OCRDMA_EQ_LEN 4096
+#define OCRDMA_MQ_CQ_LEN 256
+#define OCRDMA_MQ_LEN 128
+
+#define PAGE_SHIFT_4K 12
+#define PAGE_SIZE_4K (1 << PAGE_SHIFT_4K)
+
+/* Returns number of pages spanned by the data starting at the given addr */
+#define PAGES_4K_SPANNED(_address, size) \
+ ((u32)((((size_t)(_address) & (PAGE_SIZE_4K - 1)) + \
+ (size) + (PAGE_SIZE_4K - 1)) >> PAGE_SHIFT_4K))
+
+struct ocrdma_delete_q_req {
+ struct ocrdma_mbx_hdr req;
+ u32 id;
+};
+
+struct ocrdma_pa {
+ u32 lo;
+ u32 hi;
+};
+
+#define MAX_OCRDMA_EQ_PAGES (8)
+struct ocrdma_create_eq_req {
+ struct ocrdma_mbx_hdr req;
+ u32 num_pages;
+ u32 valid;
+ u32 cnt;
+ u32 delay;
+ u32 rsvd;
+ struct ocrdma_pa pa[MAX_OCRDMA_EQ_PAGES];
+};
+
+enum {
+ OCRDMA_CREATE_EQ_VALID = Bit(29),
+ OCRDMA_CREATE_EQ_CNT_SHIFT = 26,
+ OCRDMA_CREATE_CQ_DELAY_SHIFT = 13,
+};
+
+struct ocrdma_create_eq_rsp {
+ struct ocrdma_mbx_rsp rsp;
+ u32 vector_eqid;
+};
+
+#define OCRDMA_EQ_MINOR_OTHER (0x1)
+
+enum {
+ OCRDMA_MCQE_STATUS_SHIFT = 0,
+ OCRDMA_MCQE_STATUS_MASK = 0xFFFF,
+ OCRDMA_MCQE_ESTATUS_SHIFT = 16,
+ OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT,
+ OCRDMA_MCQE_CONS_SHIFT = 27,
+ OCRDMA_MCQE_CONS_MASK = Bit(27),
+ OCRDMA_MCQE_CMPL_SHIFT = 28,
+ OCRDMA_MCQE_CMPL_MASK = Bit(28),
+ OCRDMA_MCQE_AE_SHIFT = 30,
+ OCRDMA_MCQE_AE_MASK = Bit(30),
+ OCRDMA_MCQE_VALID_SHIFT = 31,
+ OCRDMA_MCQE_VALID_MASK = Bit(31)
+};
+
+struct ocrdma_mcqe {
+ u32 status;
+ u32 tag_lo;
+ u32 tag_hi;
+ u32 valid_ae_cmpl_cons;
+};
+
+enum {
+ OCRDMA_AE_MCQE_QPVALID = Bit(31),
+ OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF,
+
+ OCRDMA_AE_MCQE_CQVALID = Bit(31),
+ OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF,
+ OCRDMA_AE_MCQE_VALID = Bit(31),
+ OCRDMA_AE_MCQE_AE = Bit(30),
+ OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16,
+ OCRDMA_AE_MCQE_EVENT_TYPE_MASK =
+ 0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT,
+ OCRDMA_AE_MCQE_EVENT_CODE_SHIFT = 8,
+ OCRDMA_AE_MCQE_EVENT_CODE_MASK =
+ 0xFF << OCRDMA_AE_MCQE_EVENT_CODE_SHIFT
+};
+struct ocrdma_ae_mcqe {
+ u32 qpvalid_qpid;
+ u32 cqvalid_cqid;
+ u32 evt_tag;
+ u32 valid_ae_event;
+};
+
+enum {
+ OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT = 0,
+ OCRDMA_AE_PVID_MCQE_ENABLED_MASK = 0xFF,
+ OCRDMA_AE_PVID_MCQE_TAG_SHIFT = 16,
+ OCRDMA_AE_PVID_MCQE_TAG_MASK = 0xFFFF << OCRDMA_AE_PVID_MCQE_TAG_SHIFT
+};
+
+struct ocrdma_ae_pvid_mcqe {
+ u32 tag_enabled;
+ u32 event_tag;
+ u32 rsvd1;
+ u32 rsvd2;
+};
+
+enum {
+ OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT = 16,
+ OCRDMA_AE_MPA_MCQE_REQ_ID_MASK = 0xFFFF <<
+ OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT,
+
+ OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT = 8,
+ OCRDMA_AE_MPA_MCQE_EVENT_CODE_MASK = 0xFF <<
+ OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT,
+ OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT = 16,
+ OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF <<
+ OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT,
+ OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30,
+ OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30),
+ OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31,
+ OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31)
+};
+
+struct ocrdma_ae_mpa_mcqe {
+ u32 req_id;
+ u32 w1;
+ u32 w2;
+ u32 valid_ae_event;
+};
+
+enum {
+ OCRDMA_AE_QP_MCQE_NEW_QP_STATE_SHIFT = 0,
+ OCRDMA_AE_QP_MCQE_NEW_QP_STATE_MASK = 0xFFFF,
+ OCRDMA_AE_QP_MCQE_QP_ID_SHIFT = 16,
+ OCRDMA_AE_QP_MCQE_QP_ID_MASK = 0xFFFF <<
+ OCRDMA_AE_QP_MCQE_QP_ID_SHIFT,
+
+ OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT = 8,
+ OCRDMA_AE_QP_MCQE_EVENT_CODE_MASK = 0xFF <<
+ OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT,
+ OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT = 16,
+ OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF <<
+ OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT,
+ OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30,
+ OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30),
+ OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31,
+ OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31)
+};
+
+struct ocrdma_ae_qp_mcqe {
+ u32 qp_id_state;
+ u32 w1;
+ u32 w2;
+ u32 valid_ae_event;
+};
+
+#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
+#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
+#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3
+
+enum OCRDMA_ASYNC_EVENT_TYPE {
+ OCRDMA_CQ_ERROR = 0x00,
+ OCRDMA_CQ_OVERRUN_ERROR = 0x01,
+ OCRDMA_CQ_QPCAT_ERROR = 0x02,
+ OCRDMA_QP_ACCESS_ERROR = 0x03,
+ OCRDMA_QP_COMM_EST_EVENT = 0x04,
+ OCRDMA_SQ_DRAINED_EVENT = 0x05,
+ OCRDMA_DEVICE_FATAL_EVENT = 0x08,
+ OCRDMA_SRQCAT_ERROR = 0x0E,
+ OCRDMA_SRQ_LIMIT_EVENT = 0x0F,
+ OCRDMA_QP_LAST_WQE_EVENT = 0x10
+};
+
+/* mailbox command request and responses */
+enum {
+ OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
+ OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2),
+ OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3,
+ OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3),
+ OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8,
+ OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT,
+ OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8,
+ OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF <<
+ OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF,
+ OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF,
+ OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET = 24,
+ OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK = 0xFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK = 0xFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_OFFSET = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_OFFSET,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET,
+
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET,
+};
+
+struct ocrdma_mbx_query_config {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+ u32 qp_srq_cq_ird_ord;
+ u32 max_pd_ca_ack_delay;
+ u32 max_write_send_sge;
+ u32 max_ird_ord_per_qp;
+ u32 max_shared_ird_ord;
+ u32 max_mr;
+ u32 max_mr_size_lo;
+ u32 max_mr_size_hi;
+ u32 max_num_mr_pbl;
+ u32 max_mw;
+ u32 max_fmr;
+ u32 max_pages_per_frmr;
+ u32 max_mcast_group;
+ u32 max_mcast_qp_attach;
+ u32 max_total_mcast_qp_attach;
+ u32 wqe_rqe_stride_max_dpp_cqs;
+ u32 max_srq_rpir_qps;
+ u32 max_dpp_pds_credits;
+ u32 max_dpp_credits_pds_per_pd;
+ u32 max_wqes_rqes_per_q;
+ u32 max_cq_cqes_per_cq;
+ u32 max_srq_rqe_sge;
+};
+
+struct ocrdma_fw_ver_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u8 running_ver[32];
+};
+
+struct ocrdma_fw_conf_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 config_num;
+ u32 asic_revision;
+ u32 phy_port;
+ u32 fn_mode;
+ struct {
+ u32 mode;
+ u32 nic_wqid_base;
+ u32 nic_wq_tot;
+ u32 prot_wqid_base;
+ u32 prot_wq_tot;
+ u32 prot_rqid_base;
+ u32 prot_rqid_tot;
+ u32 rsvd[6];
+ } ulp[2];
+ u32 fn_capabilities;
+ u32 rsvd1;
+ u32 rsvd2;
+ u32 base_eqid;
+ u32 max_eq;
+
+};
+
+enum {
+ OCRDMA_FN_MODE_RDMA = 0x4
+};
+
+struct ocrdma_get_phy_info_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u16 phy_type;
+ u16 interface_type;
+ u32 misc_params;
+ u16 ext_phy_details;
+ u16 rsvd;
+ u16 auto_speeds_supported;
+ u16 fixed_speeds_supported;
+ u32 future_use[2];
+};
+
+enum {
+ OCRDMA_PHY_SPEED_ZERO = 0x0,
+ OCRDMA_PHY_SPEED_10MBPS = 0x1,
+ OCRDMA_PHY_SPEED_100MBPS = 0x2,
+ OCRDMA_PHY_SPEED_1GBPS = 0x4,
+ OCRDMA_PHY_SPEED_10GBPS = 0x8,
+ OCRDMA_PHY_SPEED_40GBPS = 0x20
+};
+
+
+struct ocrdma_get_link_speed_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u8 pt_port_num;
+ u8 link_duplex;
+ u8 phys_port_speed;
+ u8 phys_port_fault;
+ u16 rsvd1;
+ u16 qos_lnk_speed;
+ u8 logical_lnk_status;
+ u8 rsvd2[3];
+};
+
+enum {
+ OCRDMA_PHYS_LINK_SPEED_ZERO = 0x0,
+ OCRDMA_PHYS_LINK_SPEED_10MBPS = 0x1,
+ OCRDMA_PHYS_LINK_SPEED_100MBPS = 0x2,
+ OCRDMA_PHYS_LINK_SPEED_1GBPS = 0x3,
+ OCRDMA_PHYS_LINK_SPEED_10GBPS = 0x4,
+ OCRDMA_PHYS_LINK_SPEED_20GBPS = 0x5,
+ OCRDMA_PHYS_LINK_SPEED_25GBPS = 0x6,
+ OCRDMA_PHYS_LINK_SPEED_40GBPS = 0x7,
+ OCRDMA_PHYS_LINK_SPEED_100GBPS = 0x8
+};
+
+enum {
+ OCRDMA_CREATE_CQ_VER2 = 2,
+ OCRDMA_CREATE_CQ_VER3 = 3,
+
+ OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF,
+ OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16,
+ OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF,
+
+ OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12,
+ OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12),
+ OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14),
+ OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15),
+
+ OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF,
+ OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF
+};
+
+enum {
+ OCRDMA_CREATE_CQ_VER0 = 0,
+ OCRDMA_CREATE_CQ_DPP = 1,
+ OCRDMA_CREATE_CQ_TYPE_SHIFT = 24,
+ OCRDMA_CREATE_CQ_EQID_SHIFT = 22,
+
+ OCRDMA_CREATE_CQ_CNT_SHIFT = 27,
+ OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29),
+ OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31),
+ OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID |
+ OCRDMA_CREATE_CQ_FLAGS_EVENTABLE |
+ OCRDMA_CREATE_CQ_FLAGS_NODELAY
+};
+
+struct ocrdma_create_cq_cmd {
+ struct ocrdma_mbx_hdr req;
+ u32 pgsz_pgcnt;
+ u32 ev_cnt_flags;
+ u32 eqn;
+ u16 cqe_count;
+ u16 pd_id;
+ u32 rsvd6;
+ struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
+};
+
+struct ocrdma_create_cq {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_create_cq_cmd cmd;
+};
+
+enum {
+ OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
+};
+
+struct ocrdma_create_cq_cmd_rsp {
+ struct ocrdma_mbx_rsp rsp;
+ u32 cq_id;
+};
+
+struct ocrdma_create_cq_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_create_cq_cmd_rsp rsp;
+};
+
+enum {
+ OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22,
+ OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16,
+ OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16,
+ OCRDMA_CREATE_MQ_VALID = Bit(31),
+ OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0)
+};
+
+struct ocrdma_create_mq_req {
+ struct ocrdma_mbx_hdr req;
+ u32 cqid_pages;
+ u32 async_event_bitmap;
+ u32 async_cqid_ringsize;
+ u32 valid;
+ u32 async_cqid_valid;
+ u32 rsvd;
+ struct ocrdma_pa pa[8];
+};
+
+struct ocrdma_create_mq_rsp {
+ struct ocrdma_mbx_rsp rsp;
+ u32 id;
+};
+
+enum {
+ OCRDMA_DESTROY_CQ_QID_SHIFT = 0,
+ OCRDMA_DESTROY_CQ_QID_MASK = 0xFFFF,
+ OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT = 16,
+ OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_MASK = 0xFFFF <<
+ OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT
+};
+
+struct ocrdma_destroy_cq {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ u32 bypass_flush_qid;
+};
+
+struct ocrdma_destroy_cq_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+};
+
+enum {
+ OCRDMA_QPT_GSI = 1,
+ OCRDMA_QPT_RC = 2,
+ OCRDMA_QPT_UD = 4,
+};
+
+enum {
+ OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_PD_ID_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19,
+ OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29,
+ OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29),
+
+ OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT,
+
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT,
+
+ OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0),
+ OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1,
+ OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1),
+ OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2,
+ OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2),
+ OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3,
+ OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3),
+ OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4,
+ OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4),
+ OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5,
+ OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5),
+ OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6,
+ OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6),
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7,
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7),
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8,
+ OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8),
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT,
+
+ OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT,
+
+ OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT,
+
+ OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT,
+
+ OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT,
+
+ OCRDMA_CREATE_QP_REQ_DPP_CQPID_SHIFT = 0,
+ OCRDMA_CREATE_QP_REQ_DPP_CQPID_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT = 16,
+ OCRDMA_CREATE_QP_REQ_DPP_CREDIT_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT
+};
+
+enum {
+ OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT = 16,
+ OCRDMA_CREATE_QP_RSP_DPP_PAGE_SHIFT = 1
+};
+
+#define MAX_OCRDMA_IRD_PAGES 4
+
+enum ocrdma_qp_flags {
+ OCRDMA_QP_MW_BIND = 1,
+ OCRDMA_QP_LKEY0 = (1 << 1),
+ OCRDMA_QP_FAST_REG = (1 << 2),
+ OCRDMA_QP_INB_RD = (1 << 6),
+ OCRDMA_QP_INB_WR = (1 << 7),
+};
+
+enum ocrdma_qp_state {
+ OCRDMA_QPS_RST = 0,
+ OCRDMA_QPS_INIT = 1,
+ OCRDMA_QPS_RTR = 2,
+ OCRDMA_QPS_RTS = 3,
+ OCRDMA_QPS_SQE = 4,
+ OCRDMA_QPS_SQ_DRAINING = 5,
+ OCRDMA_QPS_ERR = 6,
+ OCRDMA_QPS_SQD = 7
+};
+
+struct ocrdma_create_qp_req {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ u32 type_pgsz_pdn;
+ u32 max_wqe_rqe;
+ u32 max_sge_send_write;
+ u32 max_sge_recv_flags;
+ u32 max_ord_ird;
+ u32 num_wq_rq_pages;
+ u32 wqe_rqe_size;
+ u32 wq_rq_cqid;
+ struct ocrdma_pa wq_addr[MAX_OCRDMA_QP_PAGES];
+ struct ocrdma_pa rq_addr[MAX_OCRDMA_QP_PAGES];
+ u32 dpp_credits_cqid;
+ u32 rpir_lkey;
+ struct ocrdma_pa ird_addr[MAX_OCRDMA_IRD_PAGES];
+};
+
+enum {
+ OCRDMA_CREATE_QP_RSP_QP_ID_SHIFT = 0,
+ OCRDMA_CREATE_QP_RSP_QP_ID_MASK = 0xFFFF,
+
+ OCRDMA_CREATE_QP_RSP_MAX_RQE_SHIFT = 0,
+ OCRDMA_CREATE_QP_RSP_MAX_RQE_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT = 16,
+ OCRDMA_CREATE_QP_RSP_MAX_WQE_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT,
+
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_SHIFT = 0,
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT = 16,
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT,
+
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT = 16,
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT,
+
+ OCRDMA_CREATE_QP_RSP_MAX_IRD_SHIFT = 0,
+ OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT = 16,
+ OCRDMA_CREATE_QP_RSP_MAX_ORD_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT,
+
+ OCRDMA_CREATE_QP_RSP_RQ_ID_SHIFT = 0,
+ OCRDMA_CREATE_QP_RSP_RQ_ID_MASK = 0xFFFF,
+ OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT = 16,
+ OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT,
+
+ OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0),
+ OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1,
+ OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF <<
+ OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT,
+ OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT = 16,
+ OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK = 0xFFFF <<
+ OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT,
+};
+
+struct ocrdma_create_qp_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 qp_id;
+ u32 max_wqe_rqe;
+ u32 max_sge_send_write;
+ u32 max_sge_recv;
+ u32 max_ord_ird;
+ u32 sq_rq_id;
+ u32 dpp_response;
+};
+
+struct ocrdma_destroy_qp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+ u32 qp_id;
+};
+
+struct ocrdma_destroy_qp_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+};
+
+enum {
+ OCRDMA_MODIFY_QP_ID_SHIFT = 0,
+ OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF,
+
+ OCRDMA_QP_PARA_QPS_VALID = Bit(0),
+ OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1),
+ OCRDMA_QP_PARA_PKEY_VALID = Bit(2),
+ OCRDMA_QP_PARA_QKEY_VALID = Bit(3),
+ OCRDMA_QP_PARA_PMTU_VALID = Bit(4),
+ OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5),
+ OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6),
+ OCRDMA_QP_PARA_RRC_VALID = Bit(7),
+ OCRDMA_QP_PARA_RQPSN_VALID = Bit(8),
+ OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9),
+ OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10),
+ OCRDMA_QP_PARA_RNT_VALID = Bit(11),
+ OCRDMA_QP_PARA_SQPSN_VALID = Bit(12),
+ OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13),
+ OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14),
+ OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15),
+ OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16),
+ OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17),
+ OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18),
+ OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19),
+ OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20),
+ OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21),
+ OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22),
+ OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23),
+ OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24),
+ OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25),
+ OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26),
+
+ OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0),
+ OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1),
+ OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2),
+ OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3)
+};
+
+enum {
+ OCRDMA_QP_PARAMS_SRQ_ID_SHIFT = 0,
+ OCRDMA_QP_PARAMS_SRQ_ID_MASK = 0xFFFF,
+
+ OCRDMA_QP_PARAMS_MAX_RQE_SHIFT = 0,
+ OCRDMA_QP_PARAMS_MAX_RQE_MASK = 0xFFFF,
+ OCRDMA_QP_PARAMS_MAX_WQE_SHIFT = 16,
+ OCRDMA_QP_PARAMS_MAX_WQE_MASK = 0xFFFF <<
+ OCRDMA_QP_PARAMS_MAX_WQE_SHIFT,
+
+ OCRDMA_QP_PARAMS_MAX_SGE_WRITE_SHIFT = 0,
+ OCRDMA_QP_PARAMS_MAX_SGE_WRITE_MASK = 0xFFFF,
+ OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT = 16,
+ OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF <<
+ OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT,
+
+ OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0),
+ OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1),
+ OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2),
+ OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3),
+ OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4),
+ OCRDMA_QP_PARAMS_STATE_SHIFT = 5,
+ OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7),
+ OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8),
+ OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9),
+ OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16,
+ OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF <<
+ OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
+
+ OCRDMA_QP_PARAMS_MAX_IRD_SHIFT = 0,
+ OCRDMA_QP_PARAMS_MAX_IRD_MASK = 0xFFFF,
+ OCRDMA_QP_PARAMS_MAX_ORD_SHIFT = 16,
+ OCRDMA_QP_PARAMS_MAX_ORD_MASK = 0xFFFF <<
+ OCRDMA_QP_PARAMS_MAX_ORD_SHIFT,
+
+ OCRDMA_QP_PARAMS_RQ_CQID_SHIFT = 0,
+ OCRDMA_QP_PARAMS_RQ_CQID_MASK = 0xFFFF,
+ OCRDMA_QP_PARAMS_WQ_CQID_SHIFT = 16,
+ OCRDMA_QP_PARAMS_WQ_CQID_MASK = 0xFFFF <<
+ OCRDMA_QP_PARAMS_WQ_CQID_SHIFT,
+
+ OCRDMA_QP_PARAMS_RQ_PSN_SHIFT = 0,
+ OCRDMA_QP_PARAMS_RQ_PSN_MASK = 0xFFFFFF,
+ OCRDMA_QP_PARAMS_HOP_LMT_SHIFT = 24,
+ OCRDMA_QP_PARAMS_HOP_LMT_MASK = 0xFF <<
+ OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
+
+ OCRDMA_QP_PARAMS_SQ_PSN_SHIFT = 0,
+ OCRDMA_QP_PARAMS_SQ_PSN_MASK = 0xFFFFFF,
+ OCRDMA_QP_PARAMS_TCLASS_SHIFT = 24,
+ OCRDMA_QP_PARAMS_TCLASS_MASK = 0xFF <<
+ OCRDMA_QP_PARAMS_TCLASS_SHIFT,
+
+ OCRDMA_QP_PARAMS_DEST_QPN_SHIFT = 0,
+ OCRDMA_QP_PARAMS_DEST_QPN_MASK = 0xFFFFFF,
+ OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT = 24,
+ OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK = 0x7 <<
+ OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT,
+ OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT = 27,
+ OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK = 0x1F <<
+ OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT,
+
+ OCRDMA_QP_PARAMS_PKEY_IDNEX_SHIFT = 0,
+ OCRDMA_QP_PARAMS_PKEY_INDEX_MASK = 0xFFFF,
+ OCRDMA_QP_PARAMS_PATH_MTU_SHIFT = 18,
+ OCRDMA_QP_PARAMS_PATH_MTU_MASK = 0x3FFF <<
+ OCRDMA_QP_PARAMS_PATH_MTU_SHIFT,
+
+ OCRDMA_QP_PARAMS_FLOW_LABEL_SHIFT = 0,
+ OCRDMA_QP_PARAMS_FLOW_LABEL_MASK = 0xFFFFF,
+ OCRDMA_QP_PARAMS_SL_SHIFT = 20,
+ OCRDMA_QP_PARAMS_SL_MASK = 0xF <<
+ OCRDMA_QP_PARAMS_SL_SHIFT,
+ OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT = 24,
+ OCRDMA_QP_PARAMS_RETRY_CNT_MASK = 0x7 <<
+ OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT,
+ OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT = 27,
+ OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK = 0x1F <<
+ OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT,
+
+ OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_SHIFT = 0,
+ OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_MASK = 0xFFFF,
+ OCRDMA_QP_PARAMS_VLAN_SHIFT = 16,
+ OCRDMA_QP_PARAMS_VLAN_MASK = 0xFFFF <<
+ OCRDMA_QP_PARAMS_VLAN_SHIFT
+};
+
+struct ocrdma_qp_params {
+ u32 id;
+ u32 max_wqe_rqe;
+ u32 max_sge_send_write;
+ u32 max_sge_recv_flags;
+ u32 max_ord_ird;
+ u32 wq_rq_cqid;
+ u32 hop_lmt_rq_psn;
+ u32 tclass_sq_psn;
+ u32 ack_to_rnr_rtc_dest_qpn;
+ u32 path_mtu_pkey_indx;
+ u32 rnt_rc_sl_fl;
+ u8 sgid[16];
+ u8 dgid[16];
+ u32 dmac_b0_to_b3;
+ u32 vlan_dmac_b4_to_b5;
+ u32 qkey;
+};
+
+
+struct ocrdma_modify_qp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ struct ocrdma_qp_params params;
+ u32 flags;
+ u32 rdma_flags;
+ u32 num_outstanding_atomic_rd;
+};
+
+enum {
+ OCRDMA_MODIFY_QP_RSP_MAX_RQE_SHIFT = 0,
+ OCRDMA_MODIFY_QP_RSP_MAX_RQE_MASK = 0xFFFF,
+ OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT = 16,
+ OCRDMA_MODIFY_QP_RSP_MAX_WQE_MASK = 0xFFFF <<
+ OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT,
+
+ OCRDMA_MODIFY_QP_RSP_MAX_IRD_SHIFT = 0,
+ OCRDMA_MODIFY_QP_RSP_MAX_IRD_MASK = 0xFFFF,
+ OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT = 16,
+ OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF <<
+ OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT
+};
+
+struct ocrdma_modify_qp_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 max_wqe_rqe;
+ u32 max_ord_ird;
+};
+
+struct ocrdma_query_qp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
+#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF
+ u32 qp_id;
+};
+
+struct ocrdma_query_qp_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+ struct ocrdma_qp_params params;
+};
+
+enum {
+ OCRDMA_CREATE_SRQ_PD_ID_SHIFT = 0,
+ OCRDMA_CREATE_SRQ_PD_ID_MASK = 0xFFFF,
+ OCRDMA_CREATE_SRQ_PG_SZ_SHIFT = 16,
+ OCRDMA_CREATE_SRQ_PG_SZ_MASK = 0x3 <<
+ OCRDMA_CREATE_SRQ_PG_SZ_SHIFT,
+
+ OCRDMA_CREATE_SRQ_MAX_RQE_SHIFT = 0,
+ OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT = 16,
+ OCRDMA_CREATE_SRQ_MAX_SGE_RECV_MASK = 0xFFFF <<
+ OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT,
+
+ OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT = 0,
+ OCRDMA_CREATE_SRQ_RQE_SIZE_MASK = 0xFFFF,
+ OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT = 16,
+ OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_MASK = 0xFFFF <<
+ OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT
+};
+
+struct ocrdma_create_srq {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ u32 pgsz_pdid;
+ u32 max_sge_rqe;
+ u32 pages_rqe_sz;
+ struct ocrdma_pa rq_addr[MAX_OCRDMA_SRQ_PAGES];
+};
+
+enum {
+ OCRDMA_CREATE_SRQ_RSP_SRQ_ID_SHIFT = 0,
+ OCRDMA_CREATE_SRQ_RSP_SRQ_ID_MASK = 0xFFFFFF,
+
+ OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT = 0,
+ OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK = 0xFFFF,
+ OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT = 16,
+ OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK = 0xFFFF <<
+ OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT
+};
+
+struct ocrdma_create_srq_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 id;
+ u32 max_sge_rqe_allocated;
+};
+
+enum {
+ OCRDMA_MODIFY_SRQ_ID_SHIFT = 0,
+ OCRDMA_MODIFY_SRQ_ID_MASK = 0xFFFFFF,
+
+ OCRDMA_MODIFY_SRQ_MAX_RQE_SHIFT = 0,
+ OCRDMA_MODIFY_SRQ_MAX_RQE_MASK = 0xFFFF,
+ OCRDMA_MODIFY_SRQ_LIMIT_SHIFT = 16,
+ OCRDMA_MODIFY_SRQ__LIMIT_MASK = 0xFFFF <<
+ OCRDMA_MODIFY_SRQ_LIMIT_SHIFT
+};
+
+struct ocrdma_modify_srq {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rep;
+
+ u32 id;
+ u32 limit_max_rqe;
+};
+
+enum {
+ OCRDMA_QUERY_SRQ_ID_SHIFT = 0,
+ OCRDMA_QUERY_SRQ_ID_MASK = 0xFFFFFF
+};
+
+struct ocrdma_query_srq {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp req;
+
+ u32 id;
+};
+
+enum {
+ OCRDMA_QUERY_SRQ_RSP_PD_ID_SHIFT = 0,
+ OCRDMA_QUERY_SRQ_RSP_PD_ID_MASK = 0xFFFF,
+ OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT = 16,
+ OCRDMA_QUERY_SRQ_RSP_MAX_RQE_MASK = 0xFFFF <<
+ OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT,
+
+ OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_SHIFT = 0,
+ OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK = 0xFFFF,
+ OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT = 16,
+ OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_MASK = 0xFFFF <<
+ OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT
+};
+
+struct ocrdma_query_srq_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp req;
+
+ u32 max_rqe_pdid;
+ u32 srq_lmt_max_sge;
+};
+
+enum {
+ OCRDMA_DESTROY_SRQ_ID_SHIFT = 0,
+ OCRDMA_DESTROY_SRQ_ID_MASK = 0xFFFFFF
+};
+
+struct ocrdma_destroy_srq {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp req;
+
+ u32 id;
+};
+
+enum {
+ OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16),
+ OCRDMA_PD_MAX_DPP_ENABLED_QP = 8,
+ OCRDMA_DPP_PAGE_SIZE = 4096
+};
+
+struct ocrdma_alloc_pd {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+ u32 enable_dpp_rsvd;
+};
+
+enum {
+ OCRDMA_ALLOC_PD_RSP_DPP = Bit(16),
+ OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20,
+ OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF,
+};
+
+struct ocrdma_alloc_pd_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+ u32 dpp_page_pdid;
+};
+
+struct ocrdma_dealloc_pd {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+ u32 id;
+};
+
+struct ocrdma_dealloc_pd_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+};
+
+enum {
+ OCRDMA_ADDR_CHECK_ENABLE = 1,
+ OCRDMA_ADDR_CHECK_DISABLE = 0
+};
+
+enum {
+ OCRDMA_ALLOC_LKEY_PD_ID_SHIFT = 0,
+ OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF,
+
+ OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0,
+ OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0),
+ OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1,
+ OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1),
+ OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2,
+ OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2),
+ OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3,
+ OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3),
+ OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4,
+ OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4),
+ OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5,
+ OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5),
+ OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6),
+ OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6,
+ OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16,
+ OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF <<
+ OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT
+};
+
+struct ocrdma_alloc_lkey {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ u32 pdid;
+ u32 pbl_sz_flags;
+};
+
+struct ocrdma_alloc_lkey_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 lrkey;
+ u32 num_pbl_rsvd;
+};
+
+struct ocrdma_dealloc_lkey {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ u32 lkey;
+ u32 rsvd_frmr;
+};
+
+struct ocrdma_dealloc_lkey_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+};
+
+#define MAX_OCRDMA_NSMR_PBL (u32)22
+#define MAX_OCRDMA_PBL_SIZE 65536
+#define MAX_OCRDMA_PBL_PER_LKEY 32767
+
+enum {
+ OCRDMA_REG_NSMR_LRKEY_INDEX_SHIFT = 0,
+ OCRDMA_REG_NSMR_LRKEY_INDEX_MASK = 0xFFFFFF,
+ OCRDMA_REG_NSMR_LRKEY_SHIFT = 24,
+ OCRDMA_REG_NSMR_LRKEY_MASK = 0xFF <<
+ OCRDMA_REG_NSMR_LRKEY_SHIFT,
+
+ OCRDMA_REG_NSMR_PD_ID_SHIFT = 0,
+ OCRDMA_REG_NSMR_PD_ID_MASK = 0xFFFF,
+ OCRDMA_REG_NSMR_NUM_PBL_SHIFT = 16,
+ OCRDMA_REG_NSMR_NUM_PBL_MASK = 0xFFFF <<
+ OCRDMA_REG_NSMR_NUM_PBL_SHIFT,
+
+ OCRDMA_REG_NSMR_PBE_SIZE_SHIFT = 0,
+ OCRDMA_REG_NSMR_PBE_SIZE_MASK = 0xFFFF,
+ OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT = 16,
+ OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF <<
+ OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT,
+ OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24,
+ OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24),
+ OCRDMA_REG_NSMR_ZB_SHIFT = 25,
+ OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25),
+ OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26,
+ OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26),
+ OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27,
+ OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27),
+ OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28,
+ OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28),
+ OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29,
+ OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29),
+ OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30,
+ OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30),
+ OCRDMA_REG_NSMR_LAST_SHIFT = 31,
+ OCRDMA_REG_NSMR_LAST_MASK = Bit(31)
+};
+
+struct ocrdma_reg_nsmr {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr cmd;
+
+ u32 fr_mr;
+ u32 num_pbl_pdid;
+ u32 flags_hpage_pbe_sz;
+ u32 totlen_low;
+ u32 totlen_high;
+ u32 fbo_low;
+ u32 fbo_high;
+ u32 va_loaddr;
+ u32 va_hiaddr;
+ struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
+};
+
+enum {
+ OCRDMA_REG_NSMR_CONT_PBL_SHIFT = 0,
+ OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK = 0xFFFF,
+ OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT = 16,
+ OCRDMA_REG_NSMR_CONT_NUM_PBL_MASK = 0xFFFF <<
+ OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT,
+
+ OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31,
+ OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31)
+};
+
+struct ocrdma_reg_nsmr_cont {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr cmd;
+
+ u32 lrkey;
+ u32 num_pbl_offset;
+ u32 last;
+
+ struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
+};
+
+struct ocrdma_pbe {
+ u32 pa_hi;
+ u32 pa_lo;
+};
+
+enum {
+ OCRDMA_REG_NSMR_RSP_NUM_PBL_SHIFT = 16,
+ OCRDMA_REG_NSMR_RSP_NUM_PBL_MASK = 0xFFFF0000
+};
+struct ocrdma_reg_nsmr_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 lrkey;
+ u32 num_pbl;
+};
+
+enum {
+ OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_SHIFT = 0,
+ OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_MASK = 0xFFFFFF,
+ OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT = 24,
+ OCRDMA_REG_NSMR_CONT_RSP_LRKEY_MASK = 0xFF <<
+ OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT,
+
+ OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT = 16,
+ OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_MASK = 0xFFFF <<
+ OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT
+};
+
+struct ocrdma_reg_nsmr_cont_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 lrkey_key_index;
+ u32 num_pbl;
+};
+
+enum {
+ OCRDMA_ALLOC_MW_PD_ID_SHIFT = 0,
+ OCRDMA_ALLOC_MW_PD_ID_MASK = 0xFFFF
+};
+
+struct ocrdma_alloc_mw {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ u32 pdid;
+};
+
+enum {
+ OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_SHIFT = 0,
+ OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_MASK = 0xFFFFFF
+};
+
+struct ocrdma_alloc_mw_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u32 lrkey_index;
+};
+
+struct ocrdma_attach_mcast {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+ u32 qp_id;
+ u8 mgid[16];
+ u32 mac_b0_to_b3;
+ u32 vlan_mac_b4_to_b5;
+};
+
+struct ocrdma_attach_mcast_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+};
+
+struct ocrdma_detach_mcast {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+ u32 qp_id;
+ u8 mgid[16];
+ u32 mac_b0_to_b3;
+ u32 vlan_mac_b4_to_b5;
+};
+
+struct ocrdma_detach_mcast_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+};
+
+enum {
+ OCRDMA_CREATE_AH_NUM_PAGES_SHIFT = 19,
+ OCRDMA_CREATE_AH_NUM_PAGES_MASK = 0xF <<
+ OCRDMA_CREATE_AH_NUM_PAGES_SHIFT,
+
+ OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT = 16,
+ OCRDMA_CREATE_AH_PAGE_SIZE_MASK = 0x7 <<
+ OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT,
+
+ OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT = 23,
+ OCRDMA_CREATE_AH_ENTRY_SIZE_MASK = 0x1FF <<
+ OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT,
+};
+
+#define OCRDMA_AH_TBL_PAGES 8
+
+struct ocrdma_create_ah_tbl {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+
+ u32 ah_conf;
+ struct ocrdma_pa tbl_addr[8];
+};
+
+struct ocrdma_create_ah_tbl_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+ u32 ahid;
+};
+
+struct ocrdma_delete_ah_tbl {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_hdr req;
+ u32 ahid;
+};
+
+struct ocrdma_delete_ah_tbl_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+};
+
+enum {
+ OCRDMA_EQE_VALID_SHIFT = 0,
+ OCRDMA_EQE_VALID_MASK = Bit(0),
+ OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE,
+ OCRDMA_EQE_RESOURCE_ID_SHIFT = 16,
+ OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF <<
+ OCRDMA_EQE_RESOURCE_ID_SHIFT,
+};
+
+struct ocrdma_eqe {
+ u32 id_valid;
+};
+
+enum OCRDMA_CQE_STATUS {
+ OCRDMA_CQE_SUCCESS = 0,
+ OCRDMA_CQE_LOC_LEN_ERR,
+ OCRDMA_CQE_LOC_QP_OP_ERR,
+ OCRDMA_CQE_LOC_EEC_OP_ERR,
+ OCRDMA_CQE_LOC_PROT_ERR,
+ OCRDMA_CQE_WR_FLUSH_ERR,
+ OCRDMA_CQE_MW_BIND_ERR,
+ OCRDMA_CQE_BAD_RESP_ERR,
+ OCRDMA_CQE_LOC_ACCESS_ERR,
+ OCRDMA_CQE_REM_INV_REQ_ERR,
+ OCRDMA_CQE_REM_ACCESS_ERR,
+ OCRDMA_CQE_REM_OP_ERR,
+ OCRDMA_CQE_RETRY_EXC_ERR,
+ OCRDMA_CQE_RNR_RETRY_EXC_ERR,
+ OCRDMA_CQE_LOC_RDD_VIOL_ERR,
+ OCRDMA_CQE_REM_INV_RD_REQ_ERR,
+ OCRDMA_CQE_REM_ABORT_ERR,
+ OCRDMA_CQE_INV_EECN_ERR,
+ OCRDMA_CQE_INV_EEC_STATE_ERR,
+ OCRDMA_CQE_FATAL_ERR,
+ OCRDMA_CQE_RESP_TIMEOUT_ERR,
+ OCRDMA_CQE_GENERAL_ERR
+};
+
+enum {
+ /* w0 */
+ OCRDMA_CQE_WQEIDX_SHIFT = 0,
+ OCRDMA_CQE_WQEIDX_MASK = 0xFFFF,
+
+ /* w1 */
+ OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16,
+ OCRDMA_CQE_PKEY_SHIFT = 0,
+ OCRDMA_CQE_PKEY_MASK = 0xFFFF,
+
+ /* w2 */
+ OCRDMA_CQE_QPN_SHIFT = 0,
+ OCRDMA_CQE_QPN_MASK = 0x0000FFFF,
+
+ OCRDMA_CQE_BUFTAG_SHIFT = 16,
+ OCRDMA_CQE_BUFTAG_MASK = 0xFFFF << OCRDMA_CQE_BUFTAG_SHIFT,
+
+ /* w3 */
+ OCRDMA_CQE_UD_STATUS_SHIFT = 24,
+ OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT,
+ OCRDMA_CQE_STATUS_SHIFT = 16,
+ OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT,
+ OCRDMA_CQE_VALID = Bit(31),
+ OCRDMA_CQE_INVALIDATE = Bit(30),
+ OCRDMA_CQE_QTYPE = Bit(29),
+ OCRDMA_CQE_IMM = Bit(28),
+ OCRDMA_CQE_WRITE_IMM = Bit(27),
+ OCRDMA_CQE_QTYPE_SQ = 0,
+ OCRDMA_CQE_QTYPE_RQ = 1,
+ OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF
+};
+
+struct ocrdma_cqe {
+ union {
+ /* w0 to w2 */
+ struct {
+ u32 wqeidx;
+ u32 bytes_xfered;
+ u32 qpn;
+ } wq;
+ struct {
+ u32 lkey_immdt;
+ u32 rxlen;
+ u32 buftag_qpn;
+ } rq;
+ struct {
+ u32 lkey_immdt;
+ u32 rxlen_pkey;
+ u32 buftag_qpn;
+ } ud;
+ struct {
+ u32 word_0;
+ u32 word_1;
+ u32 qpn;
+ } cmn;
+ };
+ u32 flags_status_srcqpn; /* w3 */
+};
+
+struct ocrdma_sge {
+ u32 addr_hi;
+ u32 addr_lo;
+ u32 lrkey;
+ u32 len;
+};
+
+enum {
+ OCRDMA_FLAG_SIG = 0x1,
+ OCRDMA_FLAG_INV = 0x2,
+ OCRDMA_FLAG_FENCE_L = 0x4,
+ OCRDMA_FLAG_FENCE_R = 0x8,
+ OCRDMA_FLAG_SOLICIT = 0x10,
+ OCRDMA_FLAG_IMM = 0x20,
+
+ /* Stag flags */
+ OCRDMA_LKEY_FLAG_LOCAL_WR = 0x1,
+ OCRDMA_LKEY_FLAG_REMOTE_RD = 0x2,
+ OCRDMA_LKEY_FLAG_REMOTE_WR = 0x4,
+ OCRDMA_LKEY_FLAG_VATO = 0x8,
+};
+
+enum OCRDMA_WQE_OPCODE {
+ OCRDMA_WRITE = 0x06,
+ OCRDMA_READ = 0x0C,
+ OCRDMA_RESV0 = 0x02,
+ OCRDMA_SEND = 0x00,
+ OCRDMA_CMP_SWP = 0x14,
+ OCRDMA_BIND_MW = 0x10,
+ OCRDMA_FR_MR = 0x11,
+ OCRDMA_RESV1 = 0x0A,
+ OCRDMA_LKEY_INV = 0x15,
+ OCRDMA_FETCH_ADD = 0x13,
+ OCRDMA_POST_RQ = 0x12
+};
+
+enum {
+ OCRDMA_TYPE_INLINE = 0x0,
+ OCRDMA_TYPE_LKEY = 0x1,
+};
+
+enum {
+ OCRDMA_WQE_OPCODE_SHIFT = 0,
+ OCRDMA_WQE_OPCODE_MASK = 0x0000001F,
+ OCRDMA_WQE_FLAGS_SHIFT = 5,
+ OCRDMA_WQE_TYPE_SHIFT = 16,
+ OCRDMA_WQE_TYPE_MASK = 0x00030000,
+ OCRDMA_WQE_SIZE_SHIFT = 18,
+ OCRDMA_WQE_SIZE_MASK = 0xFF,
+ OCRDMA_WQE_NXT_WQE_SIZE_SHIFT = 25,
+
+ OCRDMA_WQE_LKEY_FLAGS_SHIFT = 0,
+ OCRDMA_WQE_LKEY_FLAGS_MASK = 0xF
+};
+
+/* header WQE for all the SQ and RQ operations */
+struct ocrdma_hdr_wqe {
+ u32 cw;
+ union {
+ u32 rsvd_tag;
+ u32 rsvd_lkey_flags;
+ };
+ union {
+ u32 immdt;
+ u32 lkey;
+ };
+ u32 total_len;
+};
+
+struct ocrdma_ewqe_ud_hdr {
+ u32 rsvd_dest_qpn;
+ u32 qkey;
+ u32 rsvd_ahid;
+ u32 rsvd;
+};
+
+/* extended wqe followed by hdr_wqe for Fast Memory register */
+struct ocrdma_ewqe_fr {
+ u32 va_hi;
+ u32 va_lo;
+ u32 fbo_hi;
+ u32 fbo_lo;
+ u32 size_sge;
+ u32 num_sges;
+ u32 rsvd;
+ u32 rsvd2;
+};
+
+struct ocrdma_eth_basic {
+ u8 dmac[6];
+ u8 smac[6];
+ __be16 eth_type;
+} __packed;
+
+struct ocrdma_eth_vlan {
+ u8 dmac[6];
+ u8 smac[6];
+ __be16 eth_type;
+ __be16 vlan_tag;
+#define OCRDMA_ROCE_ETH_TYPE 0x8915
+ __be16 roce_eth_type;
+} __packed;
+
+struct ocrdma_grh {
+ __be32 tclass_flow;
+ __be32 pdid_hoplimit;
+ u8 sgid[16];
+ u8 dgid[16];
+ u16 rsvd;
+} __packed;
+
+#define OCRDMA_AV_VALID Bit(7)
+#define OCRDMA_AV_VLAN_VALID Bit(1)
+
+struct ocrdma_av {
+ struct ocrdma_eth_vlan eth_hdr;
+ struct ocrdma_grh grh;
+ u32 valid;
+} __packed;
+
+struct ocrdma_rsrc_stats {
+ u32 dpp_pds;
+ u32 non_dpp_pds;
+ u32 rc_dpp_qps;
+ u32 uc_dpp_qps;
+ u32 ud_dpp_qps;
+ u32 rc_non_dpp_qps;
+ u32 rsvd;
+ u32 uc_non_dpp_qps;
+ u32 ud_non_dpp_qps;
+ u32 rsvd1;
+ u32 srqs;
+ u32 rbqs;
+ u32 r64K_nsmr;
+ u32 r64K_to_2M_nsmr;
+ u32 r2M_to_44M_nsmr;
+ u32 r44M_to_1G_nsmr;
+ u32 r1G_to_4G_nsmr;
+ u32 nsmr_count_4G_to_32G;
+ u32 r32G_to_64G_nsmr;
+ u32 r64G_to_128G_nsmr;
+ u32 r128G_to_higher_nsmr;
+ u32 embedded_nsmr;
+ u32 frmr;
+ u32 prefetch_qps;
+ u32 ondemand_qps;
+ u32 phy_mr;
+ u32 mw;
+ u32 rsvd2[7];
+};
+
+struct ocrdma_db_err_stats {
+ u32 sq_doorbell_errors;
+ u32 cq_doorbell_errors;
+ u32 rq_srq_doorbell_errors;
+ u32 cq_overflow_errors;
+ u32 rsvd[4];
+};
+
+struct ocrdma_wqe_stats {
+ u32 large_send_rc_wqes_lo;
+ u32 large_send_rc_wqes_hi;
+ u32 large_write_rc_wqes_lo;
+ u32 large_write_rc_wqes_hi;
+ u32 rsvd[4];
+ u32 read_wqes_lo;
+ u32 read_wqes_hi;
+ u32 frmr_wqes_lo;
+ u32 frmr_wqes_hi;
+ u32 mw_bind_wqes_lo;
+ u32 mw_bind_wqes_hi;
+ u32 invalidate_wqes_lo;
+ u32 invalidate_wqes_hi;
+ u32 rsvd1[2];
+ u32 dpp_wqe_drops;
+ u32 rsvd2[5];
+};
+
+struct ocrdma_tx_stats {
+ u32 send_pkts_lo;
+ u32 send_pkts_hi;
+ u32 write_pkts_lo;
+ u32 write_pkts_hi;
+ u32 read_pkts_lo;
+ u32 read_pkts_hi;
+ u32 read_rsp_pkts_lo;
+ u32 read_rsp_pkts_hi;
+ u32 ack_pkts_lo;
+ u32 ack_pkts_hi;
+ u32 send_bytes_lo;
+ u32 send_bytes_hi;
+ u32 write_bytes_lo;
+ u32 write_bytes_hi;
+ u32 read_req_bytes_lo;
+ u32 read_req_bytes_hi;
+ u32 read_rsp_bytes_lo;
+ u32 read_rsp_bytes_hi;
+ u32 ack_timeouts;
+ u32 rsvd[5];
+};
+
+
+struct ocrdma_tx_qp_err_stats {
+ u32 local_length_errors;
+ u32 local_protection_errors;
+ u32 local_qp_operation_errors;
+ u32 retry_count_exceeded_errors;
+ u32 rnr_retry_count_exceeded_errors;
+ u32 rsvd[3];
+};
+
+struct ocrdma_rx_stats {
+ u32 roce_frame_bytes_lo;
+ u32 roce_frame_bytes_hi;
+ u32 roce_frame_icrc_drops;
+ u32 roce_frame_payload_len_drops;
+ u32 ud_drops;
+ u32 qp1_drops;
+ u32 psn_error_request_packets;
+ u32 psn_error_resp_packets;
+ u32 rnr_nak_timeouts;
+ u32 rnr_nak_receives;
+ u32 roce_frame_rxmt_drops;
+ u32 nak_count_psn_sequence_errors;
+ u32 rc_drop_count_lookup_errors;
+ u32 rq_rnr_naks;
+ u32 srq_rnr_naks;
+ u32 roce_frames_lo;
+ u32 roce_frames_hi;
+ u32 rsvd;
+};
+
+struct ocrdma_rx_qp_err_stats {
+ u32 nak_invalid_requst_errors;
+ u32 nak_remote_operation_errors;
+ u32 nak_count_remote_access_errors;
+ u32 local_length_errors;
+ u32 local_protection_errors;
+ u32 local_qp_operation_errors;
+ u32 rsvd[2];
+};
+
+struct ocrdma_tx_dbg_stats {
+ u32 data[100];
+};
+
+struct ocrdma_rx_dbg_stats {
+ u32 data[200];
+};
+
+struct ocrdma_rdma_stats_req {
+ struct ocrdma_mbx_hdr hdr;
+ u8 reset_stats;
+ u8 rsvd[3];
+} __packed;
+
+struct ocrdma_rdma_stats_resp {
+ struct ocrdma_mbx_hdr hdr;
+ struct ocrdma_rsrc_stats act_rsrc_stats;
+ struct ocrdma_rsrc_stats th_rsrc_stats;
+ struct ocrdma_db_err_stats db_err_stats;
+ struct ocrdma_wqe_stats wqe_stats;
+ struct ocrdma_tx_stats tx_stats;
+ struct ocrdma_tx_qp_err_stats tx_qp_err_stats;
+ struct ocrdma_rx_stats rx_stats;
+ struct ocrdma_rx_qp_err_stats rx_qp_err_stats;
+ struct ocrdma_tx_dbg_stats tx_dbg_stats;
+ struct ocrdma_rx_dbg_stats rx_dbg_stats;
+} __packed;
+
+
+struct mgmt_hba_attribs {
+ u8 flashrom_version_string[32];
+ u8 manufacturer_name[32];
+ u32 supported_modes;
+ u32 rsvd0[3];
+ u8 ncsi_ver_string[12];
+ u32 default_extended_timeout;
+ u8 controller_model_number[32];
+ u8 controller_description[64];
+ u8 controller_serial_number[32];
+ u8 ip_version_string[32];
+ u8 firmware_version_string[32];
+ u8 bios_version_string[32];
+ u8 redboot_version_string[32];
+ u8 driver_version_string[32];
+ u8 fw_on_flash_version_string[32];
+ u32 functionalities_supported;
+ u16 max_cdblength;
+ u8 asic_revision;
+ u8 generational_guid[16];
+ u8 hba_port_count;
+ u16 default_link_down_timeout;
+ u8 iscsi_ver_min_max;
+ u8 multifunction_device;
+ u8 cache_valid;
+ u8 hba_status;
+ u8 max_domains_supported;
+ u8 phy_port;
+ u32 firmware_post_status;
+ u32 hba_mtu[8];
+ u32 rsvd1[4];
+};
+
+struct mgmt_controller_attrib {
+ struct mgmt_hba_attribs hba_attribs;
+ u16 pci_vendor_id;
+ u16 pci_device_id;
+ u16 pci_sub_vendor_id;
+ u16 pci_sub_system_id;
+ u8 pci_bus_number;
+ u8 pci_device_number;
+ u8 pci_function_number;
+ u8 interface_type;
+ u64 unique_identifier;
+ u32 rsvd0[5];
+};
+
+struct ocrdma_get_ctrl_attribs_rsp {
+ struct ocrdma_mbx_hdr hdr;
+ struct mgmt_controller_attrib ctrl_attribs;
+};
+
+
+#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
new file mode 100644
index 00000000000..41a9aec9998
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -0,0 +1,616 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <rdma/ib_addr.h>
+#include "ocrdma_stats.h"
+
+static struct dentry *ocrdma_dbgfs_dir;
+
+static int ocrdma_add_stat(char *start, char *pcur,
+ char *name, u64 count)
+{
+ char buff[128] = {0};
+ int cpy_len = 0;
+
+ snprintf(buff, 128, "%s: %llu\n", name, count);
+ cpy_len = strlen(buff);
+
+ if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) {
+ pr_err("%s: No space in stats buff\n", __func__);
+ return 0;
+ }
+
+ memcpy(pcur, buff, cpy_len);
+ return cpy_len;
+}
+
+static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+{
+ struct stats_mem *mem = &dev->stats_mem;
+
+ /* Alloc mbox command mem*/
+ mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
+ sizeof(struct ocrdma_rdma_stats_resp));
+
+ mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va) {
+ pr_err("%s: stats mbox allocation failed\n", __func__);
+ return false;
+ }
+
+ memset(mem->va, 0, mem->size);
+
+ /* Alloc debugfs mem */
+ mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
+ if (!mem->debugfs_mem) {
+ pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+ return false;
+ }
+
+ return true;
+}
+
+static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+{
+ struct stats_mem *mem = &dev->stats_mem;
+
+ if (mem->va)
+ dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
+ mem->va, mem->pa);
+ kfree(mem->debugfs_mem);
+}
+
+static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds",
+ (u64)rsrc_stats->dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds",
+ (u64)rsrc_stats->non_dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps",
+ (u64)rsrc_stats->rc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps",
+ (u64)rsrc_stats->uc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps",
+ (u64)rsrc_stats->ud_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps",
+ (u64)rsrc_stats->rc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps",
+ (u64)rsrc_stats->uc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps",
+ (u64)rsrc_stats->ud_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_srqs",
+ (u64)rsrc_stats->srqs);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rbqs",
+ (u64)rsrc_stats->rbqs);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr",
+ (u64)rsrc_stats->r64K_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr",
+ (u64)rsrc_stats->r64K_to_2M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr",
+ (u64)rsrc_stats->r2M_to_44M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr",
+ (u64)rsrc_stats->r44M_to_1G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr",
+ (u64)rsrc_stats->r1G_to_4G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G",
+ (u64)rsrc_stats->nsmr_count_4G_to_32G);
+ pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr",
+ (u64)rsrc_stats->r32G_to_64G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr",
+ (u64)rsrc_stats->r64G_to_128G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr",
+ (u64)rsrc_stats->r128G_to_higher_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr",
+ (u64)rsrc_stats->embedded_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_frmr",
+ (u64)rsrc_stats->frmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps",
+ (u64)rsrc_stats->prefetch_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps",
+ (u64)rsrc_stats->ondemand_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr",
+ (u64)rsrc_stats->phy_mr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_mw",
+ (u64)rsrc_stats->mw);
+
+ /* Print the threshold stats */
+ rsrc_stats = &rdma_stats->th_rsrc_stats;
+
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds",
+ (u64)rsrc_stats->dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds",
+ (u64)rsrc_stats->non_dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps",
+ (u64)rsrc_stats->rc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps",
+ (u64)rsrc_stats->uc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps",
+ (u64)rsrc_stats->ud_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps",
+ (u64)rsrc_stats->rc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps",
+ (u64)rsrc_stats->uc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps",
+ (u64)rsrc_stats->ud_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs",
+ (u64)rsrc_stats->srqs);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs",
+ (u64)rsrc_stats->rbqs);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr",
+ (u64)rsrc_stats->r64K_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr",
+ (u64)rsrc_stats->r64K_to_2M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr",
+ (u64)rsrc_stats->r2M_to_44M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr",
+ (u64)rsrc_stats->r44M_to_1G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr",
+ (u64)rsrc_stats->r1G_to_4G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G",
+ (u64)rsrc_stats->nsmr_count_4G_to_32G);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr",
+ (u64)rsrc_stats->r32G_to_64G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr",
+ (u64)rsrc_stats->r64G_to_128G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr",
+ (u64)rsrc_stats->r128G_to_higher_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr",
+ (u64)rsrc_stats->embedded_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr",
+ (u64)rsrc_stats->frmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps",
+ (u64)rsrc_stats->prefetch_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps",
+ (u64)rsrc_stats->ondemand_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr",
+ (u64)rsrc_stats->phy_mr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_mw",
+ (u64)rsrc_stats->mw);
+ return stats;
+}
+
+static char *ocrdma_rx_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat
+ (stats, pcur, "roce_frame_bytes",
+ convert_to_64bit(rx_stats->roce_frame_bytes_lo,
+ rx_stats->roce_frame_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops",
+ (u64)rx_stats->roce_frame_icrc_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops",
+ (u64)rx_stats->roce_frame_payload_len_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "ud_drops",
+ (u64)rx_stats->ud_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "qp1_drops",
+ (u64)rx_stats->qp1_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets",
+ (u64)rx_stats->psn_error_request_packets);
+ pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets",
+ (u64)rx_stats->psn_error_resp_packets);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts",
+ (u64)rx_stats->rnr_nak_timeouts);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives",
+ (u64)rx_stats->rnr_nak_receives);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops",
+ (u64)rx_stats->roce_frame_rxmt_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors",
+ (u64)rx_stats->nak_count_psn_sequence_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors",
+ (u64)rx_stats->rc_drop_count_lookup_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks",
+ (u64)rx_stats->rq_rnr_naks);
+ pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks",
+ (u64)rx_stats->srq_rnr_naks);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frames",
+ convert_to_64bit(rx_stats->roce_frames_lo,
+ rx_stats->roce_frames_hi));
+
+ return stats;
+}
+
+static char *ocrdma_tx_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "send_pkts",
+ convert_to_64bit(tx_stats->send_pkts_lo,
+ tx_stats->send_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "write_pkts",
+ convert_to_64bit(tx_stats->write_pkts_lo,
+ tx_stats->write_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_pkts",
+ convert_to_64bit(tx_stats->read_pkts_lo,
+ tx_stats->read_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts",
+ convert_to_64bit(tx_stats->read_rsp_pkts_lo,
+ tx_stats->read_rsp_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "ack_pkts",
+ convert_to_64bit(tx_stats->ack_pkts_lo,
+ tx_stats->ack_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "send_bytes",
+ convert_to_64bit(tx_stats->send_bytes_lo,
+ tx_stats->send_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "write_bytes",
+ convert_to_64bit(tx_stats->write_bytes_lo,
+ tx_stats->write_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes",
+ convert_to_64bit(tx_stats->read_req_bytes_lo,
+ tx_stats->read_req_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes",
+ convert_to_64bit(tx_stats->read_rsp_bytes_lo,
+ tx_stats->read_rsp_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts",
+ (u64)tx_stats->ack_timeouts);
+
+ return stats;
+}
+
+static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes",
+ convert_to_64bit(wqe_stats->large_send_rc_wqes_lo,
+ wqe_stats->large_send_rc_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes",
+ convert_to_64bit(wqe_stats->large_write_rc_wqes_lo,
+ wqe_stats->large_write_rc_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_wqes",
+ convert_to_64bit(wqe_stats->read_wqes_lo,
+ wqe_stats->read_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes",
+ convert_to_64bit(wqe_stats->frmr_wqes_lo,
+ wqe_stats->frmr_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes",
+ convert_to_64bit(wqe_stats->mw_bind_wqes_lo,
+ wqe_stats->mw_bind_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes",
+ convert_to_64bit(wqe_stats->invalidate_wqes_lo,
+ wqe_stats->invalidate_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops",
+ (u64)wqe_stats->dpp_wqe_drops);
+ return stats;
+}
+
+static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors",
+ (u64)db_err_stats->sq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors",
+ (u64)db_err_stats->cq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors",
+ (u64)db_err_stats->rq_srq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors",
+ (u64)db_err_stats->cq_overflow_errors);
+ return stats;
+}
+
+static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_qp_err_stats *rx_qp_err_stats =
+ &rdma_stats->rx_qp_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
+ (u64)rx_qp_err_stats->nak_invalid_requst_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
+ (u64)rx_qp_err_stats->nak_remote_operation_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
+ (u64)rx_qp_err_stats->nak_count_remote_access_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+ (u64)rx_qp_err_stats->local_length_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+ (u64)rx_qp_err_stats->local_protection_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+ (u64)rx_qp_err_stats->local_qp_operation_errors);
+ return stats;
+}
+
+static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_qp_err_stats *tx_qp_err_stats =
+ &rdma_stats->tx_qp_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+ (u64)tx_qp_err_stats->local_length_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+ (u64)tx_qp_err_stats->local_protection_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+ (u64)tx_qp_err_stats->local_qp_operation_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors",
+ (u64)tx_qp_err_stats->retry_count_exceeded_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors",
+ (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors);
+ return stats;
+}
+
+static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
+{
+ int i;
+ char *pstats = dev->stats_mem.debugfs_mem;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_dbg_stats *tx_dbg_stats =
+ &rdma_stats->tx_dbg_stats;
+
+ memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ for (i = 0; i < 100; i++)
+ pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+ tx_dbg_stats->data[i]);
+
+ return dev->stats_mem.debugfs_mem;
+}
+
+static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
+{
+ int i;
+ char *pstats = dev->stats_mem.debugfs_mem;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_dbg_stats *rx_dbg_stats =
+ &rdma_stats->rx_dbg_stats;
+
+ memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ for (i = 0; i < 200; i++)
+ pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+ rx_dbg_stats->data[i]);
+
+ return dev->stats_mem.debugfs_mem;
+}
+
+static void ocrdma_update_stats(struct ocrdma_dev *dev)
+{
+ ulong now = jiffies, secs;
+ int status = 0;
+
+ secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U;
+ if (secs) {
+ /* update */
+ status = ocrdma_mbx_rdma_stats(dev, false);
+ if (status)
+ pr_err("%s: stats mbox failed with status = %d\n",
+ __func__, status);
+ dev->last_stats_time = jiffies;
+ }
+}
+
+static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct ocrdma_stats *pstats = filp->private_data;
+ struct ocrdma_dev *dev = pstats->dev;
+ ssize_t status = 0;
+ char *data = NULL;
+
+ /* No partial reads */
+ if (*ppos != 0)
+ return 0;
+
+ mutex_lock(&dev->stats_lock);
+
+ ocrdma_update_stats(dev);
+
+ switch (pstats->type) {
+ case OCRDMA_RSRC_STATS:
+ data = ocrdma_resource_stats(dev);
+ break;
+ case OCRDMA_RXSTATS:
+ data = ocrdma_rx_stats(dev);
+ break;
+ case OCRDMA_WQESTATS:
+ data = ocrdma_wqe_stats(dev);
+ break;
+ case OCRDMA_TXSTATS:
+ data = ocrdma_tx_stats(dev);
+ break;
+ case OCRDMA_DB_ERRSTATS:
+ data = ocrdma_db_errstats(dev);
+ break;
+ case OCRDMA_RXQP_ERRSTATS:
+ data = ocrdma_rxqp_errstats(dev);
+ break;
+ case OCRDMA_TXQP_ERRSTATS:
+ data = ocrdma_txqp_errstats(dev);
+ break;
+ case OCRDMA_TX_DBG_STATS:
+ data = ocrdma_tx_dbg_stats(dev);
+ break;
+ case OCRDMA_RX_DBG_STATS:
+ data = ocrdma_rx_dbg_stats(dev);
+ break;
+
+ default:
+ status = -EFAULT;
+ goto exit;
+ }
+
+ if (usr_buf_len < strlen(data)) {
+ status = -ENOSPC;
+ goto exit;
+ }
+
+ status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data,
+ strlen(data));
+exit:
+ mutex_unlock(&dev->stats_lock);
+ return status;
+}
+
+static const struct file_operations ocrdma_dbg_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ocrdma_dbgfs_ops_read,
+};
+
+void ocrdma_add_port_stats(struct ocrdma_dev *dev)
+{
+ if (!ocrdma_dbgfs_dir)
+ return;
+
+ /* Create post stats base dir */
+ dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+ if (!dev->dir)
+ goto err;
+
+ dev->rsrc_stats.type = OCRDMA_RSRC_STATS;
+ dev->rsrc_stats.dev = dev;
+ if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir,
+ &dev->rsrc_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_stats.type = OCRDMA_RXSTATS;
+ dev->rx_stats.dev = dev;
+ if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir,
+ &dev->rx_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->wqe_stats.type = OCRDMA_WQESTATS;
+ dev->wqe_stats.dev = dev;
+ if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir,
+ &dev->wqe_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->tx_stats.type = OCRDMA_TXSTATS;
+ dev->tx_stats.dev = dev;
+ if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir,
+ &dev->tx_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->db_err_stats.type = OCRDMA_DB_ERRSTATS;
+ dev->db_err_stats.dev = dev;
+ if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir,
+ &dev->db_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+
+ dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS;
+ dev->tx_qp_err_stats.dev = dev;
+ if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->tx_qp_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS;
+ dev->rx_qp_err_stats.dev = dev;
+ if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->rx_qp_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+
+ dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS;
+ dev->tx_dbg_stats.dev = dev;
+ if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->tx_dbg_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS;
+ dev->rx_dbg_stats.dev = dev;
+ if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->rx_dbg_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ /* Now create dma_mem for stats mbx command */
+ if (!ocrdma_alloc_stats_mem(dev))
+ goto err;
+
+ mutex_init(&dev->stats_lock);
+
+ return;
+err:
+ ocrdma_release_stats_mem(dev);
+ debugfs_remove_recursive(dev->dir);
+ dev->dir = NULL;
+}
+
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
+{
+ if (!dev->dir)
+ return;
+ mutex_destroy(&dev->stats_lock);
+ ocrdma_release_stats_mem(dev);
+ debugfs_remove(dev->dir);
+}
+
+void ocrdma_init_debugfs(void)
+{
+ /* Create base dir in debugfs root dir */
+ ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL);
+}
+
+void ocrdma_rem_debugfs(void)
+{
+ debugfs_remove_recursive(ocrdma_dbgfs_dir);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
new file mode 100644
index 00000000000..5f5e20c46d7
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -0,0 +1,54 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_STATS_H__
+#define __OCRDMA_STATS_H__
+
+#include <linux/debugfs.h>
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+
+#define OCRDMA_MAX_DBGFS_MEM 4096
+
+enum OCRDMA_STATS_TYPE {
+ OCRDMA_RSRC_STATS,
+ OCRDMA_RXSTATS,
+ OCRDMA_WQESTATS,
+ OCRDMA_TXSTATS,
+ OCRDMA_DB_ERRSTATS,
+ OCRDMA_RXQP_ERRSTATS,
+ OCRDMA_TXQP_ERRSTATS,
+ OCRDMA_TX_DBG_STATS,
+ OCRDMA_RX_DBG_STATS
+};
+
+void ocrdma_rem_debugfs(void);
+void ocrdma_init_debugfs(void);
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
+void ocrdma_add_port_stats(struct ocrdma_dev *dev);
+
+#endif /* __OCRDMA_STATS_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
new file mode 100644
index 00000000000..edf6211d84b
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -0,0 +1,3054 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <linux/dma-mapping.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+#include "ocrdma_verbs.h"
+#include "ocrdma_abi.h"
+
+int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ if (index > 1)
+ return -EINVAL;
+
+ *pkey = 0xffff;
+ return 0;
+}
+
+int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *sgid)
+{
+ struct ocrdma_dev *dev;
+
+ dev = get_ocrdma_dev(ibdev);
+ memset(sgid, 0, sizeof(*sgid));
+ if (index > OCRDMA_MAX_SGID)
+ return -EINVAL;
+
+ memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
+
+ return 0;
+}
+
+int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
+{
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+
+ memset(attr, 0, sizeof *attr);
+ memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
+ min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
+ ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
+ attr->max_mr_size = ~0ull;
+ attr->page_size_cap = 0xffff000;
+ attr->vendor_id = dev->nic_info.pdev->vendor;
+ attr->vendor_part_id = dev->nic_info.pdev->device;
+ attr->hw_ver = 0;
+ attr->max_qp = dev->attr.max_qp;
+ attr->max_ah = OCRDMA_MAX_AH;
+ attr->max_qp_wr = dev->attr.max_wqe;
+
+ attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
+ IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_SHUTDOWN_PORT |
+ IB_DEVICE_SYS_IMAGE_GUID |
+ IB_DEVICE_LOCAL_DMA_LKEY |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
+ attr->max_sge_rd = 0;
+ attr->max_cq = dev->attr.max_cq;
+ attr->max_cqe = dev->attr.max_cqe;
+ attr->max_mr = dev->attr.max_mr;
+ attr->max_mw = dev->attr.max_mw;
+ attr->max_pd = dev->attr.max_pd;
+ attr->atomic_cap = 0;
+ attr->max_fmr = 0;
+ attr->max_map_per_fmr = 0;
+ attr->max_qp_rd_atom =
+ min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
+ attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
+ attr->max_srq = dev->attr.max_srq;
+ attr->max_srq_sge = dev->attr.max_srq_sge;
+ attr->max_srq_wr = dev->attr.max_rqe;
+ attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
+ attr->max_fast_reg_page_list_len = 0;
+ attr->max_pkeys = 1;
+ return 0;
+}
+
+static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
+ u8 *ib_speed, u8 *ib_width)
+{
+ int status;
+ u8 speed;
+
+ status = ocrdma_mbx_get_link_speed(dev, &speed);
+ if (status)
+ speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
+
+ switch (speed) {
+ case OCRDMA_PHYS_LINK_SPEED_1GBPS:
+ *ib_speed = IB_SPEED_SDR;
+ *ib_width = IB_WIDTH_1X;
+ break;
+
+ case OCRDMA_PHYS_LINK_SPEED_10GBPS:
+ *ib_speed = IB_SPEED_QDR;
+ *ib_width = IB_WIDTH_1X;
+ break;
+
+ case OCRDMA_PHYS_LINK_SPEED_20GBPS:
+ *ib_speed = IB_SPEED_DDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ case OCRDMA_PHYS_LINK_SPEED_40GBPS:
+ *ib_speed = IB_SPEED_QDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ default:
+ /* Unsupported */
+ *ib_speed = IB_SPEED_SDR;
+ *ib_width = IB_WIDTH_1X;
+ }
+}
+
+int ocrdma_query_port(struct ib_device *ibdev,
+ u8 port, struct ib_port_attr *props)
+{
+ enum ib_port_state port_state;
+ struct ocrdma_dev *dev;
+ struct net_device *netdev;
+
+ dev = get_ocrdma_dev(ibdev);
+ if (port > 1) {
+ pr_err("%s(%d) invalid_port=0x%x\n", __func__,
+ dev->id, port);
+ return -EINVAL;
+ }
+ netdev = dev->nic_info.netdev;
+ if (netif_running(netdev) && netif_oper_up(netdev)) {
+ port_state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ } else {
+ port_state = IB_PORT_DOWN;
+ props->phys_state = 3;
+ }
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = iboe_get_mtu(netdev->mtu);
+ props->lid = 0;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ props->state = port_state;
+ props->port_cap_flags =
+ IB_PORT_CM_SUP |
+ IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_IP_BASED_GIDS;
+ props->gid_tbl_len = OCRDMA_MAX_SGID;
+ props->pkey_tbl_len = 1;
+ props->bad_pkey_cntr = 0;
+ props->qkey_viol_cntr = 0;
+ get_link_speed_and_width(dev, &props->active_speed,
+ &props->active_width);
+ props->max_msg_sz = 0x80000000;
+ props->max_vl_num = 4;
+ return 0;
+}
+
+int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
+ struct ib_port_modify *props)
+{
+ struct ocrdma_dev *dev;
+
+ dev = get_ocrdma_dev(ibdev);
+ if (port > 1) {
+ pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
+ unsigned long len)
+{
+ struct ocrdma_mm *mm;
+
+ mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+ if (mm == NULL)
+ return -ENOMEM;
+ mm->key.phy_addr = phy_addr;
+ mm->key.len = len;
+ INIT_LIST_HEAD(&mm->entry);
+
+ mutex_lock(&uctx->mm_list_lock);
+ list_add_tail(&mm->entry, &uctx->mm_head);
+ mutex_unlock(&uctx->mm_list_lock);
+ return 0;
+}
+
+static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
+ unsigned long len)
+{
+ struct ocrdma_mm *mm, *tmp;
+
+ mutex_lock(&uctx->mm_list_lock);
+ list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
+ if (len != mm->key.len && phy_addr != mm->key.phy_addr)
+ continue;
+
+ list_del(&mm->entry);
+ kfree(mm);
+ break;
+ }
+ mutex_unlock(&uctx->mm_list_lock);
+}
+
+static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
+ unsigned long len)
+{
+ bool found = false;
+ struct ocrdma_mm *mm;
+
+ mutex_lock(&uctx->mm_list_lock);
+ list_for_each_entry(mm, &uctx->mm_head, entry) {
+ if (len != mm->key.len && phy_addr != mm->key.phy_addr)
+ continue;
+
+ found = true;
+ break;
+ }
+ mutex_unlock(&uctx->mm_list_lock);
+ return found;
+}
+
+static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
+ struct ocrdma_ucontext *uctx,
+ struct ib_udata *udata)
+{
+ struct ocrdma_pd *pd = NULL;
+ int status = 0;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ if (udata && uctx) {
+ pd->dpp_enabled =
+ ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
+ pd->num_dpp_qp =
+ pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+ }
+
+retry:
+ status = ocrdma_mbx_alloc_pd(dev, pd);
+ if (status) {
+ if (pd->dpp_enabled) {
+ pd->dpp_enabled = false;
+ pd->num_dpp_qp = 0;
+ goto retry;
+ } else {
+ kfree(pd);
+ return ERR_PTR(status);
+ }
+ }
+
+ return pd;
+}
+
+static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
+ struct ocrdma_pd *pd)
+{
+ return (uctx->cntxt_pd == pd ? true : false);
+}
+
+static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
+ struct ocrdma_pd *pd)
+{
+ int status = 0;
+
+ status = ocrdma_mbx_dealloc_pd(dev, pd);
+ kfree(pd);
+ return status;
+}
+
+static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
+ struct ocrdma_ucontext *uctx,
+ struct ib_udata *udata)
+{
+ int status = 0;
+
+ uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
+ if (IS_ERR(uctx->cntxt_pd)) {
+ status = PTR_ERR(uctx->cntxt_pd);
+ uctx->cntxt_pd = NULL;
+ goto err;
+ }
+
+ uctx->cntxt_pd->uctx = uctx;
+ uctx->cntxt_pd->ibpd.device = &dev->ibdev;
+err:
+ return status;
+}
+
+static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+ int status = 0;
+ struct ocrdma_pd *pd = uctx->cntxt_pd;
+ struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
+
+ BUG_ON(uctx->pd_in_use);
+ uctx->cntxt_pd = NULL;
+ status = _ocrdma_dealloc_pd(dev, pd);
+ return status;
+}
+
+static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+ struct ocrdma_pd *pd = NULL;
+
+ mutex_lock(&uctx->mm_list_lock);
+ if (!uctx->pd_in_use) {
+ uctx->pd_in_use = true;
+ pd = uctx->cntxt_pd;
+ }
+ mutex_unlock(&uctx->mm_list_lock);
+
+ return pd;
+}
+
+static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
+{
+ mutex_lock(&uctx->mm_list_lock);
+ uctx->pd_in_use = false;
+ mutex_unlock(&uctx->mm_list_lock);
+}
+
+struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ int status;
+ struct ocrdma_ucontext *ctx;
+ struct ocrdma_alloc_ucontext_resp resp;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
+
+ if (!udata)
+ return ERR_PTR(-EFAULT);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&ctx->mm_head);
+ mutex_init(&ctx->mm_list_lock);
+
+ ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
+ &ctx->ah_tbl.pa, GFP_KERNEL);
+ if (!ctx->ah_tbl.va) {
+ kfree(ctx);
+ return ERR_PTR(-ENOMEM);
+ }
+ memset(ctx->ah_tbl.va, 0, map_len);
+ ctx->ah_tbl.len = map_len;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.ah_tbl_len = ctx->ah_tbl.len;
+ resp.ah_tbl_page = ctx->ah_tbl.pa;
+
+ status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
+ if (status)
+ goto map_err;
+
+ status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
+ if (status)
+ goto pd_err;
+
+ resp.dev_id = dev->id;
+ resp.max_inline_data = dev->attr.max_inline_data;
+ resp.wqe_size = dev->attr.wqe_size;
+ resp.rqe_size = dev->attr.rqe_size;
+ resp.dpp_wqe_size = dev->attr.wqe_size;
+
+ memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
+ status = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (status)
+ goto cpy_err;
+ return &ctx->ibucontext;
+
+cpy_err:
+pd_err:
+ ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
+map_err:
+ dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
+ ctx->ah_tbl.pa);
+ kfree(ctx);
+ return ERR_PTR(status);
+}
+
+int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ int status = 0;
+ struct ocrdma_mm *mm, *tmp;
+ struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
+ struct pci_dev *pdev = dev->nic_info.pdev;
+
+ status = ocrdma_dealloc_ucontext_pd(uctx);
+
+ ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
+ dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
+ uctx->ah_tbl.pa);
+
+ list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
+ list_del(&mm->entry);
+ kfree(mm);
+ }
+ kfree(uctx);
+ return status;
+}
+
+int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
+ struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
+ unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
+ u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
+ unsigned long len = (vma->vm_end - vma->vm_start);
+ int status = 0;
+ bool found;
+
+ if (vma->vm_start & (PAGE_SIZE - 1))
+ return -EINVAL;
+ found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
+ if (!found)
+ return -EINVAL;
+
+ if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
+ dev->nic_info.db_total_size)) &&
+ (len <= dev->nic_info.db_page_size)) {
+ if (vma->vm_flags & VM_READ)
+ return -EPERM;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ len, vma->vm_page_prot);
+ } else if (dev->nic_info.dpp_unmapped_len &&
+ (vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
+ (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
+ dev->nic_info.dpp_unmapped_len)) &&
+ (len <= dev->nic_info.dpp_unmapped_len)) {
+ if (vma->vm_flags & VM_READ)
+ return -EPERM;
+
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ len, vma->vm_page_prot);
+ } else {
+ status = remap_pfn_range(vma, vma->vm_start,
+ vma->vm_pgoff, len, vma->vm_page_prot);
+ }
+ return status;
+}
+
+static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
+ struct ib_ucontext *ib_ctx,
+ struct ib_udata *udata)
+{
+ int status;
+ u64 db_page_addr;
+ u64 dpp_page_addr = 0;
+ u32 db_page_size;
+ struct ocrdma_alloc_pd_uresp rsp;
+ struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
+
+ memset(&rsp, 0, sizeof(rsp));
+ rsp.id = pd->id;
+ rsp.dpp_enabled = pd->dpp_enabled;
+ db_page_addr = ocrdma_get_db_addr(dev, pd->id);
+ db_page_size = dev->nic_info.db_page_size;
+
+ status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
+ if (status)
+ return status;
+
+ if (pd->dpp_enabled) {
+ dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
+ (pd->id * PAGE_SIZE);
+ status = ocrdma_add_mmap(uctx, dpp_page_addr,
+ PAGE_SIZE);
+ if (status)
+ goto dpp_map_err;
+ rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
+ rsp.dpp_page_addr_lo = dpp_page_addr;
+ }
+
+ status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
+ if (status)
+ goto ucopy_err;
+
+ pd->uctx = uctx;
+ return 0;
+
+ucopy_err:
+ if (pd->dpp_enabled)
+ ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
+dpp_map_err:
+ ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
+ return status;
+}
+
+struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+ struct ocrdma_pd *pd;
+ struct ocrdma_ucontext *uctx = NULL;
+ int status;
+ u8 is_uctx_pd = false;
+
+ if (udata && context) {
+ uctx = get_ocrdma_ucontext(context);
+ pd = ocrdma_get_ucontext_pd(uctx);
+ if (pd) {
+ is_uctx_pd = true;
+ goto pd_mapping;
+ }
+ }
+
+ pd = _ocrdma_alloc_pd(dev, uctx, udata);
+ if (IS_ERR(pd)) {
+ status = PTR_ERR(pd);
+ goto exit;
+ }
+
+pd_mapping:
+ if (udata && context) {
+ status = ocrdma_copy_pd_uresp(dev, pd, context, udata);
+ if (status)
+ goto err;
+ }
+ return &pd->ibpd;
+
+err:
+ if (is_uctx_pd) {
+ ocrdma_release_ucontext_pd(uctx);
+ } else {
+ status = ocrdma_mbx_dealloc_pd(dev, pd);
+ kfree(pd);
+ }
+exit:
+ return ERR_PTR(status);
+}
+
+int ocrdma_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ struct ocrdma_ucontext *uctx = NULL;
+ int status = 0;
+ u64 usr_db;
+
+ uctx = pd->uctx;
+ if (uctx) {
+ u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
+ (pd->id * PAGE_SIZE);
+ if (pd->dpp_enabled)
+ ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
+ usr_db = ocrdma_get_db_addr(dev, pd->id);
+ ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
+
+ if (is_ucontext_pd(uctx, pd)) {
+ ocrdma_release_ucontext_pd(uctx);
+ return status;
+ }
+ }
+ status = _ocrdma_dealloc_pd(dev, pd);
+ return status;
+}
+
+static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+ u32 pdid, int acc, u32 num_pbls, u32 addr_check)
+{
+ int status;
+
+ mr->hwmr.fr_mr = 0;
+ mr->hwmr.local_rd = 1;
+ mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+ mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
+ mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ mr->hwmr.num_pbls = num_pbls;
+
+ status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
+ if (status)
+ return status;
+
+ mr->ibmr.lkey = mr->hwmr.lkey;
+ if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
+ mr->ibmr.rkey = mr->hwmr.lkey;
+ return 0;
+}
+
+struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+ int status;
+ struct ocrdma_mr *mr;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+
+ if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
+ pr_err("%s err, invalid access rights\n", __func__);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
+ OCRDMA_ADDR_CHECK_DISABLE);
+ if (status) {
+ kfree(mr);
+ return ERR_PTR(status);
+ }
+
+ return &mr->ibmr;
+}
+
+static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
+ struct ocrdma_hw_mr *mr)
+{
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ int i = 0;
+
+ if (mr->pbl_table) {
+ for (i = 0; i < mr->num_pbls; i++) {
+ if (!mr->pbl_table[i].va)
+ continue;
+ dma_free_coherent(&pdev->dev, mr->pbl_size,
+ mr->pbl_table[i].va,
+ mr->pbl_table[i].pa);
+ }
+ kfree(mr->pbl_table);
+ mr->pbl_table = NULL;
+ }
+}
+
+static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+ u32 num_pbes)
+{
+ u32 num_pbls = 0;
+ u32 idx = 0;
+ int status = 0;
+ u32 pbl_size;
+
+ do {
+ pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
+ if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
+ status = -EFAULT;
+ break;
+ }
+ num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
+ num_pbls = num_pbls / (pbl_size / sizeof(u64));
+ idx++;
+ } while (num_pbls >= dev->attr.max_num_mr_pbl);
+
+ mr->hwmr.num_pbes = num_pbes;
+ mr->hwmr.num_pbls = num_pbls;
+ mr->hwmr.pbl_size = pbl_size;
+ return status;
+}
+
+static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
+{
+ int status = 0;
+ int i;
+ u32 dma_len = mr->pbl_size;
+ struct pci_dev *pdev = dev->nic_info.pdev;
+ void *va;
+ dma_addr_t pa;
+
+ mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) *
+ mr->num_pbls, GFP_KERNEL);
+
+ if (!mr->pbl_table)
+ return -ENOMEM;
+
+ for (i = 0; i < mr->num_pbls; i++) {
+ va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
+ if (!va) {
+ ocrdma_free_mr_pbl_tbl(dev, mr);
+ status = -ENOMEM;
+ break;
+ }
+ memset(va, 0, dma_len);
+ mr->pbl_table[i].va = va;
+ mr->pbl_table[i].pa = pa;
+ }
+ return status;
+}
+
+static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
+ u32 num_pbes)
+{
+ struct ocrdma_pbe *pbe;
+ struct scatterlist *sg;
+ struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
+ struct ib_umem *umem = mr->umem;
+ int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
+
+ if (!mr->hwmr.num_pbes)
+ return;
+
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ pbe_cnt = 0;
+
+ shift = ilog2(umem->page_size);
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ pages = sg_dma_len(sg) >> shift;
+ for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+ /* store the page address in pbe */
+ pbe->pa_lo =
+ cpu_to_le32(sg_dma_address
+ (sg) +
+ (umem->page_size * pg_cnt));
+ pbe->pa_hi =
+ cpu_to_le32(upper_32_bits
+ ((sg_dma_address
+ (sg) +
+ umem->page_size * pg_cnt)));
+ pbe_cnt += 1;
+ total_num_pbes += 1;
+ pbe++;
+
+ /* if done building pbes, issue the mbx cmd. */
+ if (total_num_pbes == num_pbes)
+ return;
+
+ /* if the given pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (pbe_cnt ==
+ (mr->hwmr.pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ pbe_cnt = 0;
+ }
+
+ }
+ }
+}
+
+struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 usr_addr, int acc, struct ib_udata *udata)
+{
+ int status = -ENOMEM;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ struct ocrdma_mr *mr;
+ struct ocrdma_pd *pd;
+ u32 num_pbes;
+
+ pd = get_ocrdma_pd(ibpd);
+
+ if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(status);
+ mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+ if (IS_ERR(mr->umem)) {
+ status = -EFAULT;
+ goto umem_err;
+ }
+ num_pbes = ib_umem_page_count(mr->umem);
+ status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+ if (status)
+ goto umem_err;
+
+ mr->hwmr.pbe_size = mr->umem->page_size;
+ mr->hwmr.fbo = mr->umem->offset;
+ mr->hwmr.va = usr_addr;
+ mr->hwmr.len = len;
+ mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+ mr->hwmr.local_rd = 1;
+ mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+ if (status)
+ goto umem_err;
+ build_user_pbes(dev, mr, num_pbes);
+ status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
+ if (status)
+ goto mbx_err;
+ mr->ibmr.lkey = mr->hwmr.lkey;
+ if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
+ mr->ibmr.rkey = mr->hwmr.lkey;
+
+ return &mr->ibmr;
+
+mbx_err:
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+umem_err:
+ kfree(mr);
+ return ERR_PTR(status);
+}
+
+int ocrdma_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
+ int status;
+
+ status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
+
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+
+ /* it could be user registered memory. */
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+ kfree(mr);
+ return status;
+}
+
+static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
+ struct ib_udata *udata,
+ struct ib_ucontext *ib_ctx)
+{
+ int status;
+ struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
+ struct ocrdma_create_cq_uresp uresp;
+
+ memset(&uresp, 0, sizeof(uresp));
+ uresp.cq_id = cq->id;
+ uresp.page_size = PAGE_ALIGN(cq->len);
+ uresp.num_pages = 1;
+ uresp.max_hw_cqe = cq->max_hw_cqe;
+ uresp.page_addr[0] = cq->pa;
+ uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
+ uresp.db_page_size = dev->nic_info.db_page_size;
+ uresp.phase_change = cq->phase_change ? 1 : 0;
+ status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (status) {
+ pr_err("%s(%d) copy error cqid=0x%x.\n",
+ __func__, dev->id, cq->id);
+ goto err;
+ }
+ status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
+ if (status)
+ goto err;
+ status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
+ if (status) {
+ ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
+ goto err;
+ }
+ cq->ucontext = uctx;
+err:
+ return status;
+}
+
+struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *ib_ctx,
+ struct ib_udata *udata)
+{
+ struct ocrdma_cq *cq;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
+ struct ocrdma_ucontext *uctx = NULL;
+ u16 pd_id = 0;
+ int status;
+ struct ocrdma_create_cq_ureq ureq;
+
+ if (udata) {
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+ return ERR_PTR(-EFAULT);
+ } else
+ ureq.dpp_cq = 0;
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&cq->cq_lock);
+ spin_lock_init(&cq->comp_handler_lock);
+ INIT_LIST_HEAD(&cq->sq_head);
+ INIT_LIST_HEAD(&cq->rq_head);
+ cq->first_arm = true;
+
+ if (ib_ctx) {
+ uctx = get_ocrdma_ucontext(ib_ctx);
+ pd_id = uctx->cntxt_pd->id;
+ }
+
+ status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
+ if (status) {
+ kfree(cq);
+ return ERR_PTR(status);
+ }
+ if (ib_ctx) {
+ status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx);
+ if (status)
+ goto ctx_err;
+ }
+ cq->phase = OCRDMA_CQE_VALID;
+ dev->cq_tbl[cq->id] = cq;
+ return &cq->ibcq;
+
+ctx_err:
+ ocrdma_mbx_destroy_cq(dev, cq);
+ kfree(cq);
+ return ERR_PTR(status);
+}
+
+int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
+ struct ib_udata *udata)
+{
+ int status = 0;
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+
+ if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
+ status = -EINVAL;
+ return status;
+ }
+ ibcq->cqe = new_cnt;
+ return status;
+}
+
+static void ocrdma_flush_cq(struct ocrdma_cq *cq)
+{
+ int cqe_cnt;
+ int valid_count = 0;
+ unsigned long flags;
+
+ struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
+ struct ocrdma_cqe *cqe = NULL;
+
+ cqe = cq->va;
+ cqe_cnt = cq->cqe_cnt;
+
+ /* Last irq might have scheduled a polling thread
+ * sync-up with it before hard flushing.
+ */
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while (cqe_cnt) {
+ if (is_cqe_valid(cq, cqe))
+ valid_count++;
+ cqe++;
+ cqe_cnt--;
+ }
+ ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+}
+
+int ocrdma_destroy_cq(struct ib_cq *ibcq)
+{
+ int status;
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+ struct ocrdma_eq *eq = NULL;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
+ int pdid = 0;
+ u32 irq, indx;
+
+ dev->cq_tbl[cq->id] = NULL;
+ indx = ocrdma_get_eq_table_index(dev, cq->eqn);
+ if (indx == -EINVAL)
+ BUG();
+
+ eq = &dev->eq_tbl[indx];
+ irq = ocrdma_get_irq(dev, eq);
+ synchronize_irq(irq);
+ ocrdma_flush_cq(cq);
+
+ status = ocrdma_mbx_destroy_cq(dev, cq);
+ if (cq->ucontext) {
+ pdid = cq->ucontext->cntxt_pd->id;
+ ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
+ PAGE_ALIGN(cq->len));
+ ocrdma_del_mmap(cq->ucontext,
+ ocrdma_get_db_addr(dev, pdid),
+ dev->nic_info.db_page_size);
+ }
+
+ kfree(cq);
+ return status;
+}
+
+static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
+{
+ int status = -EINVAL;
+
+ if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
+ dev->qp_tbl[qp->id] = qp;
+ status = 0;
+ }
+ return status;
+}
+
+static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
+{
+ dev->qp_tbl[qp->id] = NULL;
+}
+
+static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if ((attrs->qp_type != IB_QPT_GSI) &&
+ (attrs->qp_type != IB_QPT_RC) &&
+ (attrs->qp_type != IB_QPT_UC) &&
+ (attrs->qp_type != IB_QPT_UD)) {
+ pr_err("%s(%d) unsupported qp type=0x%x requested\n",
+ __func__, dev->id, attrs->qp_type);
+ return -EINVAL;
+ }
+ /* Skip the check for QP1 to support CM size of 128 */
+ if ((attrs->qp_type != IB_QPT_GSI) &&
+ (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
+ pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
+ __func__, dev->id, attrs->cap.max_send_wr);
+ pr_err("%s(%d) supported send_wr=0x%x\n",
+ __func__, dev->id, dev->attr.max_wqe);
+ return -EINVAL;
+ }
+ if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
+ pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
+ __func__, dev->id, attrs->cap.max_recv_wr);
+ pr_err("%s(%d) supported recv_wr=0x%x\n",
+ __func__, dev->id, dev->attr.max_rqe);
+ return -EINVAL;
+ }
+ if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
+ pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
+ __func__, dev->id, attrs->cap.max_inline_data);
+ pr_err("%s(%d) supported inline data size=0x%x\n",
+ __func__, dev->id, dev->attr.max_inline_data);
+ return -EINVAL;
+ }
+ if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
+ pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
+ __func__, dev->id, attrs->cap.max_send_sge);
+ pr_err("%s(%d) supported send_sge=0x%x\n",
+ __func__, dev->id, dev->attr.max_send_sge);
+ return -EINVAL;
+ }
+ if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
+ pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
+ __func__, dev->id, attrs->cap.max_recv_sge);
+ pr_err("%s(%d) supported recv_sge=0x%x\n",
+ __func__, dev->id, dev->attr.max_recv_sge);
+ return -EINVAL;
+ }
+ /* unprivileged user space cannot create special QP */
+ if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+ pr_err
+ ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
+ __func__, dev->id, attrs->qp_type);
+ return -EINVAL;
+ }
+ /* allow creating only one GSI type of QP */
+ if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
+ pr_err("%s(%d) GSI special QPs already created.\n",
+ __func__, dev->id);
+ return -EINVAL;
+ }
+ /* verify consumer QPs are not trying to use GSI QP's CQ */
+ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
+ if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
+ (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
+ pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
+ __func__, dev->id);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
+ struct ib_udata *udata, int dpp_offset,
+ int dpp_credit_lmt, int srq)
+{
+ int status = 0;
+ u64 usr_db;
+ struct ocrdma_create_qp_uresp uresp;
+ struct ocrdma_dev *dev = qp->dev;
+ struct ocrdma_pd *pd = qp->pd;
+
+ memset(&uresp, 0, sizeof(uresp));
+ usr_db = dev->nic_info.unmapped_db +
+ (pd->id * dev->nic_info.db_page_size);
+ uresp.qp_id = qp->id;
+ uresp.sq_dbid = qp->sq.dbid;
+ uresp.num_sq_pages = 1;
+ uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
+ uresp.sq_page_addr[0] = qp->sq.pa;
+ uresp.num_wqe_allocated = qp->sq.max_cnt;
+ if (!srq) {
+ uresp.rq_dbid = qp->rq.dbid;
+ uresp.num_rq_pages = 1;
+ uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
+ uresp.rq_page_addr[0] = qp->rq.pa;
+ uresp.num_rqe_allocated = qp->rq.max_cnt;
+ }
+ uresp.db_page_addr = usr_db;
+ uresp.db_page_size = dev->nic_info.db_page_size;
+ uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
+ uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+ uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
+
+ if (qp->dpp_enabled) {
+ uresp.dpp_credit = dpp_credit_lmt;
+ uresp.dpp_offset = dpp_offset;
+ }
+ status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (status) {
+ pr_err("%s(%d) user copy error.\n", __func__, dev->id);
+ goto err;
+ }
+ status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
+ uresp.sq_page_size);
+ if (status)
+ goto err;
+
+ if (!srq) {
+ status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
+ uresp.rq_page_size);
+ if (status)
+ goto rq_map_err;
+ }
+ return status;
+rq_map_err:
+ ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
+err:
+ return status;
+}
+
+static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
+ struct ocrdma_pd *pd)
+{
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
+ qp->sq_db = dev->nic_info.db +
+ (pd->id * dev->nic_info.db_page_size) +
+ OCRDMA_DB_GEN2_SQ_OFFSET;
+ qp->rq_db = dev->nic_info.db +
+ (pd->id * dev->nic_info.db_page_size) +
+ OCRDMA_DB_GEN2_RQ_OFFSET;
+ } else {
+ qp->sq_db = dev->nic_info.db +
+ (pd->id * dev->nic_info.db_page_size) +
+ OCRDMA_DB_SQ_OFFSET;
+ qp->rq_db = dev->nic_info.db +
+ (pd->id * dev->nic_info.db_page_size) +
+ OCRDMA_DB_RQ_OFFSET;
+ }
+}
+
+static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
+{
+ qp->wqe_wr_id_tbl =
+ kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt,
+ GFP_KERNEL);
+ if (qp->wqe_wr_id_tbl == NULL)
+ return -ENOMEM;
+ qp->rqe_wr_id_tbl =
+ kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL);
+ if (qp->rqe_wr_id_tbl == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
+ struct ocrdma_pd *pd,
+ struct ib_qp_init_attr *attrs)
+{
+ qp->pd = pd;
+ spin_lock_init(&qp->q_lock);
+ INIT_LIST_HEAD(&qp->sq_entry);
+ INIT_LIST_HEAD(&qp->rq_entry);
+
+ qp->qp_type = attrs->qp_type;
+ qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
+ qp->max_inline_data = attrs->cap.max_inline_data;
+ qp->sq.max_sges = attrs->cap.max_send_sge;
+ qp->rq.max_sges = attrs->cap.max_recv_sge;
+ qp->state = OCRDMA_QPS_RST;
+ qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
+}
+
+static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if (attrs->qp_type == IB_QPT_GSI) {
+ dev->gsi_qp_created = 1;
+ dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
+ dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
+ }
+}
+
+struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ int status;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_qp *qp;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ struct ocrdma_create_qp_ureq ureq;
+ u16 dpp_credit_lmt, dpp_offset;
+
+ status = ocrdma_check_qp_params(ibpd, dev, attrs);
+ if (status)
+ goto gen_err;
+
+ memset(&ureq, 0, sizeof(ureq));
+ if (udata) {
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+ return ERR_PTR(-EFAULT);
+ }
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ status = -ENOMEM;
+ goto gen_err;
+ }
+ qp->dev = dev;
+ ocrdma_set_qp_init_params(qp, pd, attrs);
+ if (udata == NULL)
+ qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
+ OCRDMA_QP_FAST_REG);
+
+ mutex_lock(&dev->dev_lock);
+ status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
+ ureq.dpp_cq_id,
+ &dpp_offset, &dpp_credit_lmt);
+ if (status)
+ goto mbx_err;
+
+ /* user space QP's wr_id table are managed in library */
+ if (udata == NULL) {
+ status = ocrdma_alloc_wr_id_tbl(qp);
+ if (status)
+ goto map_err;
+ }
+
+ status = ocrdma_add_qpn_map(dev, qp);
+ if (status)
+ goto map_err;
+ ocrdma_set_qp_db(dev, qp, pd);
+ if (udata) {
+ status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
+ dpp_credit_lmt,
+ (attrs->srq != NULL));
+ if (status)
+ goto cpy_err;
+ }
+ ocrdma_store_gsi_qp_cq(dev, attrs);
+ qp->ibqp.qp_num = qp->id;
+ mutex_unlock(&dev->dev_lock);
+ return &qp->ibqp;
+
+cpy_err:
+ ocrdma_del_qpn_map(dev, qp);
+map_err:
+ ocrdma_mbx_destroy_qp(dev, qp);
+mbx_err:
+ mutex_unlock(&dev->dev_lock);
+ kfree(qp->wqe_wr_id_tbl);
+ kfree(qp->rqe_wr_id_tbl);
+ kfree(qp);
+ pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
+gen_err:
+ return ERR_PTR(status);
+}
+
+int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ int status = 0;
+ struct ocrdma_qp *qp;
+ struct ocrdma_dev *dev;
+ enum ib_qp_state old_qps;
+
+ qp = get_ocrdma_qp(ibqp);
+ dev = qp->dev;
+ if (attr_mask & IB_QP_STATE)
+ status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
+ /* if new and previous states are same hw doesn't need to
+ * know about it.
+ */
+ if (status < 0)
+ return status;
+ status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
+
+ return status;
+}
+
+int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ unsigned long flags;
+ int status = -EINVAL;
+ struct ocrdma_qp *qp;
+ struct ocrdma_dev *dev;
+ enum ib_qp_state old_qps, new_qps;
+
+ qp = get_ocrdma_qp(ibqp);
+ dev = qp->dev;
+
+ /* syncronize with multiple context trying to change, retrive qps */
+ mutex_lock(&dev->dev_lock);
+ /* syncronize with wqe, rqe posting and cqe processing contexts */
+ spin_lock_irqsave(&qp->q_lock, flags);
+ old_qps = get_ibqp_state(qp->state);
+ if (attr_mask & IB_QP_STATE)
+ new_qps = attr->qp_state;
+ else
+ new_qps = old_qps;
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+
+ if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_ETHERNET)) {
+ pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
+ "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
+ __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
+ old_qps, new_qps);
+ goto param_err;
+ }
+
+ status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
+ if (status > 0)
+ status = 0;
+param_err:
+ mutex_unlock(&dev->dev_lock);
+ return status;
+}
+
+static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
+{
+ switch (mtu) {
+ case 256:
+ return IB_MTU_256;
+ case 512:
+ return IB_MTU_512;
+ case 1024:
+ return IB_MTU_1024;
+ case 2048:
+ return IB_MTU_2048;
+ case 4096:
+ return IB_MTU_4096;
+ default:
+ return IB_MTU_1024;
+ }
+}
+
+static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
+{
+ int ib_qp_acc_flags = 0;
+
+ if (qp_cap_flags & OCRDMA_QP_INB_WR)
+ ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (qp_cap_flags & OCRDMA_QP_INB_RD)
+ ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
+ return ib_qp_acc_flags;
+}
+
+int ocrdma_query_qp(struct ib_qp *ibqp,
+ struct ib_qp_attr *qp_attr,
+ int attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ int status;
+ u32 qp_state;
+ struct ocrdma_qp_params params;
+ struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+ struct ocrdma_dev *dev = qp->dev;
+
+ memset(&params, 0, sizeof(params));
+ mutex_lock(&dev->dev_lock);
+ status = ocrdma_mbx_query_qp(dev, qp, &params);
+ mutex_unlock(&dev->dev_lock);
+ if (status)
+ goto mbx_err;
+ qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
+ qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
+ qp_attr->path_mtu =
+ ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
+ OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
+ OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
+ qp_attr->path_mig_state = IB_MIG_MIGRATED;
+ qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
+ qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
+ qp_attr->dest_qp_num =
+ params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
+
+ qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
+ qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
+ qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
+ qp_attr->cap.max_send_sge = qp->sq.max_sges;
+ qp_attr->cap.max_recv_sge = qp->rq.max_sges;
+ qp_attr->cap.max_inline_data = qp->max_inline_data;
+ qp_init_attr->cap = qp_attr->cap;
+ memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
+ sizeof(params.dgid));
+ qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
+ qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
+ qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
+ OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
+ OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
+ qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
+ OCRDMA_QP_PARAMS_TCLASS_MASK) >>
+ OCRDMA_QP_PARAMS_TCLASS_SHIFT;
+
+ qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+ qp_attr->ah_attr.port_num = 1;
+ qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_SL_MASK) >>
+ OCRDMA_QP_PARAMS_SL_SHIFT;
+ qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
+ OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
+ OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
+ qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
+ OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
+ OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
+ qp_attr->retry_cnt =
+ (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
+ OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
+ qp_attr->min_rnr_timer = 0;
+ qp_attr->pkey_index = 0;
+ qp_attr->port_num = 1;
+ qp_attr->ah_attr.src_path_bits = 0;
+ qp_attr->ah_attr.static_rate = 0;
+ qp_attr->alt_pkey_index = 0;
+ qp_attr->alt_port_num = 0;
+ qp_attr->alt_timeout = 0;
+ memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
+ qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
+ OCRDMA_QP_PARAMS_STATE_SHIFT;
+ qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
+ qp_attr->max_dest_rd_atomic =
+ params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
+ qp_attr->max_rd_atomic =
+ params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
+ qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
+ OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
+mbx_err:
+ return status;
+}
+
+static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx)
+{
+ int i = idx / 32;
+ unsigned int mask = (1 << (idx % 32));
+
+ if (srq->idx_bit_fields[i] & mask)
+ srq->idx_bit_fields[i] &= ~mask;
+ else
+ srq->idx_bit_fields[i] |= mask;
+}
+
+static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
+{
+ return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
+}
+
+static int is_hw_sq_empty(struct ocrdma_qp *qp)
+{
+ return (qp->sq.tail == qp->sq.head);
+}
+
+static int is_hw_rq_empty(struct ocrdma_qp *qp)
+{
+ return (qp->rq.tail == qp->rq.head);
+}
+
+static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
+{
+ return q->va + (q->head * q->entry_size);
+}
+
+static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
+ u32 idx)
+{
+ return q->va + (idx * q->entry_size);
+}
+
+static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
+{
+ q->head = (q->head + 1) & q->max_wqe_idx;
+}
+
+static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
+{
+ q->tail = (q->tail + 1) & q->max_wqe_idx;
+}
+
+/* discard the cqe for a given QP */
+static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
+{
+ unsigned long cq_flags;
+ unsigned long flags;
+ int discard_cnt = 0;
+ u32 cur_getp, stop_getp;
+ struct ocrdma_cqe *cqe;
+ u32 qpn = 0, wqe_idx = 0;
+
+ spin_lock_irqsave(&cq->cq_lock, cq_flags);
+
+ /* traverse through the CQEs in the hw CQ,
+ * find the matching CQE for a given qp,
+ * mark the matching one discarded by clearing qpn.
+ * ring the doorbell in the poll_cq() as
+ * we don't complete out of order cqe.
+ */
+
+ cur_getp = cq->getp;
+ /* find upto when do we reap the cq. */
+ stop_getp = cur_getp;
+ do {
+ if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
+ break;
+
+ cqe = cq->va + cur_getp;
+ /* if (a) done reaping whole hw cq, or
+ * (b) qp_xq becomes empty.
+ * then exit
+ */
+ qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
+ /* if previously discarded cqe found, skip that too. */
+ /* check for matching qp */
+ if (qpn == 0 || qpn != qp->id)
+ goto skip_cqe;
+
+ if (is_cqe_for_sq(cqe)) {
+ ocrdma_hwq_inc_tail(&qp->sq);
+ } else {
+ if (qp->srq) {
+ wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+ OCRDMA_CQE_BUFTAG_SHIFT) &
+ qp->srq->rq.max_wqe_idx;
+ if (wqe_idx < 1)
+ BUG();
+ spin_lock_irqsave(&qp->srq->q_lock, flags);
+ ocrdma_hwq_inc_tail(&qp->srq->rq);
+ ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
+ spin_unlock_irqrestore(&qp->srq->q_lock, flags);
+
+ } else {
+ ocrdma_hwq_inc_tail(&qp->rq);
+ }
+ }
+ /* mark cqe discarded so that it is not picked up later
+ * in the poll_cq().
+ */
+ discard_cnt += 1;
+ cqe->cmn.qpn = 0;
+skip_cqe:
+ cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
+ } while (cur_getp != stop_getp);
+ spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
+}
+
+void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
+{
+ int found = false;
+ unsigned long flags;
+ struct ocrdma_dev *dev = qp->dev;
+ /* sync with any active CQ poll */
+
+ spin_lock_irqsave(&dev->flush_q_lock, flags);
+ found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
+ if (found)
+ list_del(&qp->sq_entry);
+ if (!qp->srq) {
+ found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
+ if (found)
+ list_del(&qp->rq_entry);
+ }
+ spin_unlock_irqrestore(&dev->flush_q_lock, flags);
+}
+
+int ocrdma_destroy_qp(struct ib_qp *ibqp)
+{
+ int status;
+ struct ocrdma_pd *pd;
+ struct ocrdma_qp *qp;
+ struct ocrdma_dev *dev;
+ struct ib_qp_attr attrs;
+ int attr_mask = IB_QP_STATE;
+ unsigned long flags;
+
+ qp = get_ocrdma_qp(ibqp);
+ dev = qp->dev;
+
+ attrs.qp_state = IB_QPS_ERR;
+ pd = qp->pd;
+
+ /* change the QP state to ERROR */
+ _ocrdma_modify_qp(ibqp, &attrs, attr_mask);
+
+ /* ensure that CQEs for newly created QP (whose id may be same with
+ * one which just getting destroyed are same), dont get
+ * discarded until the old CQEs are discarded.
+ */
+ mutex_lock(&dev->dev_lock);
+ status = ocrdma_mbx_destroy_qp(dev, qp);
+
+ /*
+ * acquire CQ lock while destroy is in progress, in order to
+ * protect against proessing in-flight CQEs for this QP.
+ */
+ spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
+ if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+ spin_lock(&qp->rq_cq->cq_lock);
+
+ ocrdma_del_qpn_map(dev, qp);
+
+ if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+ spin_unlock(&qp->rq_cq->cq_lock);
+ spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
+
+ if (!pd->uctx) {
+ ocrdma_discard_cqes(qp, qp->sq_cq);
+ ocrdma_discard_cqes(qp, qp->rq_cq);
+ }
+ mutex_unlock(&dev->dev_lock);
+
+ if (pd->uctx) {
+ ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
+ PAGE_ALIGN(qp->sq.len));
+ if (!qp->srq)
+ ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
+ PAGE_ALIGN(qp->rq.len));
+ }
+
+ ocrdma_del_flush_qp(qp);
+
+ kfree(qp->wqe_wr_id_tbl);
+ kfree(qp->rqe_wr_id_tbl);
+ kfree(qp);
+ return status;
+}
+
+static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
+ struct ib_udata *udata)
+{
+ int status;
+ struct ocrdma_create_srq_uresp uresp;
+
+ memset(&uresp, 0, sizeof(uresp));
+ uresp.rq_dbid = srq->rq.dbid;
+ uresp.num_rq_pages = 1;
+ uresp.rq_page_addr[0] = srq->rq.pa;
+ uresp.rq_page_size = srq->rq.len;
+ uresp.db_page_addr = dev->nic_info.unmapped_db +
+ (srq->pd->id * dev->nic_info.db_page_size);
+ uresp.db_page_size = dev->nic_info.db_page_size;
+ uresp.num_rqe_allocated = srq->rq.max_cnt;
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
+ uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+ uresp.db_shift = 24;
+ } else {
+ uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
+ uresp.db_shift = 16;
+ }
+
+ status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (status)
+ return status;
+ status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
+ uresp.rq_page_size);
+ if (status)
+ return status;
+ return status;
+}
+
+struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int status = -ENOMEM;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ struct ocrdma_srq *srq;
+
+ if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
+ return ERR_PTR(-EINVAL);
+ if (init_attr->attr.max_wr > dev->attr.max_rqe)
+ return ERR_PTR(-EINVAL);
+
+ srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(status);
+
+ spin_lock_init(&srq->q_lock);
+ srq->pd = pd;
+ srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
+ status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
+ if (status)
+ goto err;
+
+ if (udata == NULL) {
+ srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
+ GFP_KERNEL);
+ if (srq->rqe_wr_id_tbl == NULL)
+ goto arm_err;
+
+ srq->bit_fields_len = (srq->rq.max_cnt / 32) +
+ (srq->rq.max_cnt % 32 ? 1 : 0);
+ srq->idx_bit_fields =
+ kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL);
+ if (srq->idx_bit_fields == NULL)
+ goto arm_err;
+ memset(srq->idx_bit_fields, 0xff,
+ srq->bit_fields_len * sizeof(u32));
+ }
+
+ if (init_attr->attr.srq_limit) {
+ status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
+ if (status)
+ goto arm_err;
+ }
+
+ if (udata) {
+ status = ocrdma_copy_srq_uresp(dev, srq, udata);
+ if (status)
+ goto arm_err;
+ }
+
+ return &srq->ibsrq;
+
+arm_err:
+ ocrdma_mbx_destroy_srq(dev, srq);
+err:
+ kfree(srq->rqe_wr_id_tbl);
+ kfree(srq->idx_bit_fields);
+ kfree(srq);
+ return ERR_PTR(status);
+}
+
+int ocrdma_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ int status = 0;
+ struct ocrdma_srq *srq;
+
+ srq = get_ocrdma_srq(ibsrq);
+ if (srq_attr_mask & IB_SRQ_MAX_WR)
+ status = -EINVAL;
+ else
+ status = ocrdma_mbx_modify_srq(srq, srq_attr);
+ return status;
+}
+
+int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+ int status;
+ struct ocrdma_srq *srq;
+
+ srq = get_ocrdma_srq(ibsrq);
+ status = ocrdma_mbx_query_srq(srq, srq_attr);
+ return status;
+}
+
+int ocrdma_destroy_srq(struct ib_srq *ibsrq)
+{
+ int status;
+ struct ocrdma_srq *srq;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
+
+ srq = get_ocrdma_srq(ibsrq);
+
+ status = ocrdma_mbx_destroy_srq(dev, srq);
+
+ if (srq->pd->uctx)
+ ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
+ PAGE_ALIGN(srq->rq.len));
+
+ kfree(srq->idx_bit_fields);
+ kfree(srq->rqe_wr_id_tbl);
+ kfree(srq);
+ return status;
+}
+
+/* unprivileged verbs and their support functions. */
+static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
+ struct ocrdma_hdr_wqe *hdr,
+ struct ib_send_wr *wr)
+{
+ struct ocrdma_ewqe_ud_hdr *ud_hdr =
+ (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
+ struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
+
+ ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
+ if (qp->qp_type == IB_QPT_GSI)
+ ud_hdr->qkey = qp->qkey;
+ else
+ ud_hdr->qkey = wr->wr.ud.remote_qkey;
+ ud_hdr->rsvd_ahid = ah->id;
+}
+
+static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
+ struct ocrdma_sge *sge, int num_sge,
+ struct ib_sge *sg_list)
+{
+ int i;
+
+ for (i = 0; i < num_sge; i++) {
+ sge[i].lrkey = sg_list[i].lkey;
+ sge[i].addr_lo = sg_list[i].addr;
+ sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
+ sge[i].len = sg_list[i].length;
+ hdr->total_len += sg_list[i].length;
+ }
+ if (num_sge == 0)
+ memset(sge, 0, sizeof(*sge));
+}
+
+static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
+{
+ uint32_t total_len = 0, i;
+
+ for (i = 0; i < num_sge; i++)
+ total_len += sg_list[i].length;
+ return total_len;
+}
+
+
+static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
+ struct ocrdma_hdr_wqe *hdr,
+ struct ocrdma_sge *sge,
+ struct ib_send_wr *wr, u32 wqe_size)
+{
+ int i;
+ char *dpp_addr;
+
+ if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
+ hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
+ if (unlikely(hdr->total_len > qp->max_inline_data)) {
+ pr_err("%s() supported_len=0x%x,\n"
+ " unspported len req=0x%x\n", __func__,
+ qp->max_inline_data, hdr->total_len);
+ return -EINVAL;
+ }
+ dpp_addr = (char *)sge;
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dpp_addr,
+ (void *)(unsigned long)wr->sg_list[i].addr,
+ wr->sg_list[i].length);
+ dpp_addr += wr->sg_list[i].length;
+ }
+
+ wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
+ if (0 == hdr->total_len)
+ wqe_size += sizeof(struct ocrdma_sge);
+ hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
+ } else {
+ ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
+ if (wr->num_sge)
+ wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
+ else
+ wqe_size += sizeof(struct ocrdma_sge);
+ hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+ }
+ hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+ return 0;
+}
+
+static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+ struct ib_send_wr *wr)
+{
+ int status;
+ struct ocrdma_sge *sge;
+ u32 wqe_size = sizeof(*hdr);
+
+ if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
+ ocrdma_build_ud_hdr(qp, hdr, wr);
+ sge = (struct ocrdma_sge *)(hdr + 2);
+ wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
+ } else {
+ sge = (struct ocrdma_sge *)(hdr + 1);
+ }
+
+ status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
+ return status;
+}
+
+static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+ struct ib_send_wr *wr)
+{
+ int status;
+ struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
+ struct ocrdma_sge *sge = ext_rw + 1;
+ u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
+
+ status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
+ if (status)
+ return status;
+ ext_rw->addr_lo = wr->wr.rdma.remote_addr;
+ ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
+ ext_rw->lrkey = wr->wr.rdma.rkey;
+ ext_rw->len = hdr->total_len;
+ return 0;
+}
+
+static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+ struct ib_send_wr *wr)
+{
+ struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
+ struct ocrdma_sge *sge = ext_rw + 1;
+ u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
+ sizeof(struct ocrdma_hdr_wqe);
+
+ ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
+ hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+ hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
+ hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+
+ ext_rw->addr_lo = wr->wr.rdma.remote_addr;
+ ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
+ ext_rw->lrkey = wr->wr.rdma.rkey;
+ ext_rw->len = hdr->total_len;
+}
+
+static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
+ struct ocrdma_hw_mr *hwmr)
+{
+ int i;
+ u64 buf_addr = 0;
+ int num_pbes;
+ struct ocrdma_pbe *pbe;
+
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ num_pbes = 0;
+
+ /* go through the OS phy regions & fill hw pbe entries into pbls. */
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+ /* number of pbes can be more for one OS buf, when
+ * buffers are of different sizes.
+ * split the ib_buf to one or more pbes.
+ */
+ buf_addr = wr->wr.fast_reg.page_list->page_list[i];
+ pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+ pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
+ num_pbes += 1;
+ pbe++;
+
+ /* if the pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ }
+ }
+ return;
+}
+
+static int get_encoded_page_size(int pg_sz)
+{
+ /* Max size is 256M 4096 << 16 */
+ int i = 0;
+ for (; i < 17; i++)
+ if (pg_sz == (4096 << i))
+ break;
+ return i;
+}
+
+
+static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
+ struct ib_send_wr *wr)
+{
+ u64 fbo;
+ struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
+ struct ocrdma_mr *mr;
+ u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
+
+ wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
+
+ if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
+ return -EINVAL;
+
+ hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
+ hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
+
+ if (wr->wr.fast_reg.page_list_len == 0)
+ BUG();
+ if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
+ hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
+ if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)
+ hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
+ if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)
+ hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
+ hdr->lkey = wr->wr.fast_reg.rkey;
+ hdr->total_len = wr->wr.fast_reg.length;
+
+ fbo = wr->wr.fast_reg.iova_start -
+ (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
+
+ fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start);
+ fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff);
+ fast_reg->fbo_hi = upper_32_bits(fbo);
+ fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
+ fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
+ fast_reg->size_sge =
+ get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
+ mr = (struct ocrdma_mr *) (unsigned long)
+ qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
+ build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
+ return 0;
+}
+
+static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
+{
+ u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
+
+ iowrite32(val, qp->sq_db);
+}
+
+int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int status = 0;
+ struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+ struct ocrdma_hdr_wqe *hdr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->q_lock, flags);
+ if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ while (wr) {
+ if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
+ wr->num_sge > qp->sq.max_sges) {
+ *bad_wr = wr;
+ status = -ENOMEM;
+ break;
+ }
+ hdr = ocrdma_hwq_head(&qp->sq);
+ hdr->cw = 0;
+ if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
+ hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
+ if (wr->send_flags & IB_SEND_FENCE)
+ hdr->cw |=
+ (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ hdr->cw |=
+ (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
+ hdr->total_len = 0;
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
+ hdr->immdt = ntohl(wr->ex.imm_data);
+ case IB_WR_SEND:
+ hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
+ ocrdma_build_send(qp, hdr, wr);
+ break;
+ case IB_WR_SEND_WITH_INV:
+ hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
+ hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
+ hdr->lkey = wr->ex.invalidate_rkey;
+ status = ocrdma_build_send(qp, hdr, wr);
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
+ hdr->immdt = ntohl(wr->ex.imm_data);
+ case IB_WR_RDMA_WRITE:
+ hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
+ status = ocrdma_build_write(qp, hdr, wr);
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
+ case IB_WR_RDMA_READ:
+ ocrdma_build_read(qp, hdr, wr);
+ break;
+ case IB_WR_LOCAL_INV:
+ hdr->cw |=
+ (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
+ hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
+ sizeof(struct ocrdma_sge)) /
+ OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
+ hdr->lkey = wr->ex.invalidate_rkey;
+ break;
+ case IB_WR_FAST_REG_MR:
+ status = ocrdma_build_fr(qp, hdr, wr);
+ break;
+ default:
+ status = -EINVAL;
+ break;
+ }
+ if (status) {
+ *bad_wr = wr;
+ break;
+ }
+ if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
+ qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
+ else
+ qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
+ qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
+ ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
+ OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
+ /* make sure wqe is written before adapter can access it */
+ wmb();
+ /* inform hw to start processing it */
+ ocrdma_ring_sq_db(qp);
+
+ /* update pointer, counter for next wr */
+ ocrdma_hwq_inc_head(&qp->sq);
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+ return status;
+}
+
+static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
+{
+ u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
+
+ iowrite32(val, qp->rq_db);
+}
+
+static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
+ u16 tag)
+{
+ u32 wqe_size = 0;
+ struct ocrdma_sge *sge;
+ if (wr->num_sge)
+ wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
+ else
+ wqe_size = sizeof(*sge) + sizeof(*rqe);
+
+ rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
+ OCRDMA_WQE_SIZE_SHIFT);
+ rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
+ rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
+ rqe->total_len = 0;
+ rqe->rsvd_tag = tag;
+ sge = (struct ocrdma_sge *)(rqe + 1);
+ ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
+ ocrdma_cpu_to_le32(rqe, wqe_size);
+}
+
+int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int status = 0;
+ unsigned long flags;
+ struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+ struct ocrdma_hdr_wqe *rqe;
+
+ spin_lock_irqsave(&qp->q_lock, flags);
+ if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+ while (wr) {
+ if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
+ wr->num_sge > qp->rq.max_sges) {
+ *bad_wr = wr;
+ status = -ENOMEM;
+ break;
+ }
+ rqe = ocrdma_hwq_head(&qp->rq);
+ ocrdma_build_rqe(rqe, wr, 0);
+
+ qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
+ /* make sure rqe is written before adapter can access it */
+ wmb();
+
+ /* inform hw to start processing it */
+ ocrdma_ring_rq_db(qp);
+
+ /* update pointer, counter for next wr */
+ ocrdma_hwq_inc_head(&qp->rq);
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+ return status;
+}
+
+/* cqe for srq's rqe can potentially arrive out of order.
+ * index gives the entry in the shadow table where to store
+ * the wr_id. tag/index is returned in cqe to reference back
+ * for a given rqe.
+ */
+static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
+{
+ int row = 0;
+ int indx = 0;
+
+ for (row = 0; row < srq->bit_fields_len; row++) {
+ if (srq->idx_bit_fields[row]) {
+ indx = ffs(srq->idx_bit_fields[row]);
+ indx = (row * 32) + (indx - 1);
+ if (indx >= srq->rq.max_cnt)
+ BUG();
+ ocrdma_srq_toggle_bit(srq, indx);
+ break;
+ }
+ }
+
+ if (row == srq->bit_fields_len)
+ BUG();
+ return indx + 1; /* Use from index 1 */
+}
+
+static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
+{
+ u32 val = srq->rq.dbid | (1 << 16);
+
+ iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
+}
+
+int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int status = 0;
+ unsigned long flags;
+ struct ocrdma_srq *srq;
+ struct ocrdma_hdr_wqe *rqe;
+ u16 tag;
+
+ srq = get_ocrdma_srq(ibsrq);
+
+ spin_lock_irqsave(&srq->q_lock, flags);
+ while (wr) {
+ if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
+ wr->num_sge > srq->rq.max_sges) {
+ status = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+ tag = ocrdma_srq_get_idx(srq);
+ rqe = ocrdma_hwq_head(&srq->rq);
+ ocrdma_build_rqe(rqe, wr, tag);
+
+ srq->rqe_wr_id_tbl[tag] = wr->wr_id;
+ /* make sure rqe is written before adapter can perform DMA */
+ wmb();
+ /* inform hw to start processing it */
+ ocrdma_ring_srq_db(srq);
+ /* update pointer, counter for next wr */
+ ocrdma_hwq_inc_head(&srq->rq);
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&srq->q_lock, flags);
+ return status;
+}
+
+static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
+{
+ enum ib_wc_status ibwc_status;
+
+ switch (status) {
+ case OCRDMA_CQE_GENERAL_ERR:
+ ibwc_status = IB_WC_GENERAL_ERR;
+ break;
+ case OCRDMA_CQE_LOC_LEN_ERR:
+ ibwc_status = IB_WC_LOC_LEN_ERR;
+ break;
+ case OCRDMA_CQE_LOC_QP_OP_ERR:
+ ibwc_status = IB_WC_LOC_QP_OP_ERR;
+ break;
+ case OCRDMA_CQE_LOC_EEC_OP_ERR:
+ ibwc_status = IB_WC_LOC_EEC_OP_ERR;
+ break;
+ case OCRDMA_CQE_LOC_PROT_ERR:
+ ibwc_status = IB_WC_LOC_PROT_ERR;
+ break;
+ case OCRDMA_CQE_WR_FLUSH_ERR:
+ ibwc_status = IB_WC_WR_FLUSH_ERR;
+ break;
+ case OCRDMA_CQE_MW_BIND_ERR:
+ ibwc_status = IB_WC_MW_BIND_ERR;
+ break;
+ case OCRDMA_CQE_BAD_RESP_ERR:
+ ibwc_status = IB_WC_BAD_RESP_ERR;
+ break;
+ case OCRDMA_CQE_LOC_ACCESS_ERR:
+ ibwc_status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case OCRDMA_CQE_REM_INV_REQ_ERR:
+ ibwc_status = IB_WC_REM_INV_REQ_ERR;
+ break;
+ case OCRDMA_CQE_REM_ACCESS_ERR:
+ ibwc_status = IB_WC_REM_ACCESS_ERR;
+ break;
+ case OCRDMA_CQE_REM_OP_ERR:
+ ibwc_status = IB_WC_REM_OP_ERR;
+ break;
+ case OCRDMA_CQE_RETRY_EXC_ERR:
+ ibwc_status = IB_WC_RETRY_EXC_ERR;
+ break;
+ case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
+ ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
+ break;
+ case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
+ ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
+ break;
+ case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
+ ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
+ break;
+ case OCRDMA_CQE_REM_ABORT_ERR:
+ ibwc_status = IB_WC_REM_ABORT_ERR;
+ break;
+ case OCRDMA_CQE_INV_EECN_ERR:
+ ibwc_status = IB_WC_INV_EECN_ERR;
+ break;
+ case OCRDMA_CQE_INV_EEC_STATE_ERR:
+ ibwc_status = IB_WC_INV_EEC_STATE_ERR;
+ break;
+ case OCRDMA_CQE_FATAL_ERR:
+ ibwc_status = IB_WC_FATAL_ERR;
+ break;
+ case OCRDMA_CQE_RESP_TIMEOUT_ERR:
+ ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
+ break;
+ default:
+ ibwc_status = IB_WC_GENERAL_ERR;
+ break;
+ }
+ return ibwc_status;
+}
+
+static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
+ u32 wqe_idx)
+{
+ struct ocrdma_hdr_wqe *hdr;
+ struct ocrdma_sge *rw;
+ int opcode;
+
+ hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
+
+ ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
+ /* Undo the hdr->cw swap */
+ opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
+ switch (opcode) {
+ case OCRDMA_WRITE:
+ ibwc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case OCRDMA_READ:
+ rw = (struct ocrdma_sge *)(hdr + 1);
+ ibwc->opcode = IB_WC_RDMA_READ;
+ ibwc->byte_len = rw->len;
+ break;
+ case OCRDMA_SEND:
+ ibwc->opcode = IB_WC_SEND;
+ break;
+ case OCRDMA_FR_MR:
+ ibwc->opcode = IB_WC_FAST_REG_MR;
+ break;
+ case OCRDMA_LKEY_INV:
+ ibwc->opcode = IB_WC_LOCAL_INV;
+ break;
+ default:
+ ibwc->status = IB_WC_GENERAL_ERR;
+ pr_err("%s() invalid opcode received = 0x%x\n",
+ __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
+ break;
+ }
+}
+
+static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
+ struct ocrdma_cqe *cqe)
+{
+ if (is_cqe_for_sq(cqe)) {
+ cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+ cqe->flags_status_srcqpn) &
+ ~OCRDMA_CQE_STATUS_MASK);
+ cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+ cqe->flags_status_srcqpn) |
+ (OCRDMA_CQE_WR_FLUSH_ERR <<
+ OCRDMA_CQE_STATUS_SHIFT));
+ } else {
+ if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
+ cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+ cqe->flags_status_srcqpn) &
+ ~OCRDMA_CQE_UD_STATUS_MASK);
+ cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+ cqe->flags_status_srcqpn) |
+ (OCRDMA_CQE_WR_FLUSH_ERR <<
+ OCRDMA_CQE_UD_STATUS_SHIFT));
+ } else {
+ cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+ cqe->flags_status_srcqpn) &
+ ~OCRDMA_CQE_STATUS_MASK);
+ cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
+ cqe->flags_status_srcqpn) |
+ (OCRDMA_CQE_WR_FLUSH_ERR <<
+ OCRDMA_CQE_STATUS_SHIFT));
+ }
+ }
+}
+
+static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
+ struct ocrdma_qp *qp, int status)
+{
+ bool expand = false;
+
+ ibwc->byte_len = 0;
+ ibwc->qp = &qp->ibqp;
+ ibwc->status = ocrdma_to_ibwc_err(status);
+
+ ocrdma_flush_qp(qp);
+ ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
+
+ /* if wqe/rqe pending for which cqe needs to be returned,
+ * trigger inflating it.
+ */
+ if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
+ expand = true;
+ ocrdma_set_cqe_status_flushed(qp, cqe);
+ }
+ return expand;
+}
+
+static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
+ struct ocrdma_qp *qp, int status)
+{
+ ibwc->opcode = IB_WC_RECV;
+ ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
+ ocrdma_hwq_inc_tail(&qp->rq);
+
+ return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
+}
+
+static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
+ struct ocrdma_qp *qp, int status)
+{
+ ocrdma_update_wc(qp, ibwc, qp->sq.tail);
+ ocrdma_hwq_inc_tail(&qp->sq);
+
+ return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
+}
+
+
+static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
+ struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
+ bool *polled, bool *stop)
+{
+ bool expand;
+ int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+
+ /* when hw sq is empty, but rq is not empty, so we continue
+ * to keep the cqe in order to get the cq event again.
+ */
+ if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
+ /* when cq for rq and sq is same, it is safe to return
+ * flush cqe for RQEs.
+ */
+ if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
+ *polled = true;
+ status = OCRDMA_CQE_WR_FLUSH_ERR;
+ expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
+ } else {
+ /* stop processing further cqe as this cqe is used for
+ * triggering cq event on buddy cq of RQ.
+ * When QP is destroyed, this cqe will be removed
+ * from the cq's hardware q.
+ */
+ *polled = false;
+ *stop = true;
+ expand = false;
+ }
+ } else {
+ *polled = true;
+ expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
+ }
+ return expand;
+}
+
+static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
+ struct ocrdma_cqe *cqe,
+ struct ib_wc *ibwc, bool *polled)
+{
+ bool expand = false;
+ int tail = qp->sq.tail;
+ u32 wqe_idx;
+
+ if (!qp->wqe_wr_id_tbl[tail].signaled) {
+ *polled = false; /* WC cannot be consumed yet */
+ } else {
+ ibwc->status = IB_WC_SUCCESS;
+ ibwc->wc_flags = 0;
+ ibwc->qp = &qp->ibqp;
+ ocrdma_update_wc(qp, ibwc, tail);
+ *polled = true;
+ }
+ wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
+ OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
+ if (tail != wqe_idx)
+ expand = true; /* Coalesced CQE can't be consumed yet */
+
+ ocrdma_hwq_inc_tail(&qp->sq);
+ return expand;
+}
+
+static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
+ struct ib_wc *ibwc, bool *polled, bool *stop)
+{
+ int status;
+ bool expand;
+
+ status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+
+ if (status == OCRDMA_CQE_SUCCESS)
+ expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
+ else
+ expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
+ return expand;
+}
+
+static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
+{
+ int status;
+
+ status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
+ ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_SRCQP_MASK;
+ ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
+ OCRDMA_CQE_PKEY_MASK;
+ ibwc->wc_flags = IB_WC_GRH;
+ ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
+ OCRDMA_CQE_UD_XFER_LEN_SHIFT);
+ return status;
+}
+
+static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
+ struct ocrdma_cqe *cqe,
+ struct ocrdma_qp *qp)
+{
+ unsigned long flags;
+ struct ocrdma_srq *srq;
+ u32 wqe_idx;
+
+ srq = get_ocrdma_srq(qp->ibqp.srq);
+ wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+ OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+ if (wqe_idx < 1)
+ BUG();
+
+ ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
+ spin_lock_irqsave(&srq->q_lock, flags);
+ ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
+ spin_unlock_irqrestore(&srq->q_lock, flags);
+ ocrdma_hwq_inc_tail(&srq->rq);
+}
+
+static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
+ struct ib_wc *ibwc, bool *polled, bool *stop,
+ int status)
+{
+ bool expand;
+
+ /* when hw_rq is empty, but wq is not empty, so continue
+ * to keep the cqe to get the cq event again.
+ */
+ if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
+ if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
+ *polled = true;
+ status = OCRDMA_CQE_WR_FLUSH_ERR;
+ expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
+ } else {
+ *polled = false;
+ *stop = true;
+ expand = false;
+ }
+ } else {
+ *polled = true;
+ expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
+ }
+ return expand;
+}
+
+static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
+ struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
+{
+ ibwc->opcode = IB_WC_RECV;
+ ibwc->qp = &qp->ibqp;
+ ibwc->status = IB_WC_SUCCESS;
+
+ if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
+ ocrdma_update_ud_rcqe(ibwc, cqe);
+ else
+ ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
+
+ if (is_cqe_imm(cqe)) {
+ ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
+ ibwc->wc_flags |= IB_WC_WITH_IMM;
+ } else if (is_cqe_wr_imm(cqe)) {
+ ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
+ ibwc->wc_flags |= IB_WC_WITH_IMM;
+ } else if (is_cqe_invalidated(cqe)) {
+ ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
+ ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+ if (qp->ibqp.srq) {
+ ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
+ } else {
+ ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
+ ocrdma_hwq_inc_tail(&qp->rq);
+ }
+}
+
+static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
+ struct ib_wc *ibwc, bool *polled, bool *stop)
+{
+ int status;
+ bool expand = false;
+
+ ibwc->wc_flags = 0;
+ if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
+ status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_UD_STATUS_MASK) >>
+ OCRDMA_CQE_UD_STATUS_SHIFT;
+ } else {
+ status = (le32_to_cpu(cqe->flags_status_srcqpn) &
+ OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
+ }
+
+ if (status == OCRDMA_CQE_SUCCESS) {
+ *polled = true;
+ ocrdma_poll_success_rcqe(qp, cqe, ibwc);
+ } else {
+ expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
+ status);
+ }
+ return expand;
+}
+
+static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
+ u16 cur_getp)
+{
+ if (cq->phase_change) {
+ if (cur_getp == 0)
+ cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
+ } else {
+ /* clear valid bit */
+ cqe->flags_status_srcqpn = 0;
+ }
+}
+
+static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
+ struct ib_wc *ibwc)
+{
+ u16 qpn = 0;
+ int i = 0;
+ bool expand = false;
+ int polled_hw_cqes = 0;
+ struct ocrdma_qp *qp = NULL;
+ struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
+ struct ocrdma_cqe *cqe;
+ u16 cur_getp; bool polled = false; bool stop = false;
+
+ cur_getp = cq->getp;
+ while (num_entries) {
+ cqe = cq->va + cur_getp;
+ /* check whether valid cqe or not */
+ if (!is_cqe_valid(cq, cqe))
+ break;
+ qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
+ /* ignore discarded cqe */
+ if (qpn == 0)
+ goto skip_cqe;
+ qp = dev->qp_tbl[qpn];
+ BUG_ON(qp == NULL);
+
+ if (is_cqe_for_sq(cqe)) {
+ expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
+ &stop);
+ } else {
+ expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
+ &stop);
+ }
+ if (expand)
+ goto expand_cqe;
+ if (stop)
+ goto stop_cqe;
+ /* clear qpn to avoid duplicate processing by discard_cqe() */
+ cqe->cmn.qpn = 0;
+skip_cqe:
+ polled_hw_cqes += 1;
+ cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
+ ocrdma_change_cq_phase(cq, cqe, cur_getp);
+expand_cqe:
+ if (polled) {
+ num_entries -= 1;
+ i += 1;
+ ibwc = ibwc + 1;
+ polled = false;
+ }
+ }
+stop_cqe:
+ cq->getp = cur_getp;
+ if (cq->deferred_arm) {
+ ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
+ polled_hw_cqes);
+ cq->deferred_arm = false;
+ cq->deferred_sol = false;
+ } else {
+ /* We need to pop the CQE. No need to arm */
+ ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
+ polled_hw_cqes);
+ cq->deferred_sol = false;
+ }
+
+ return i;
+}
+
+/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
+static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
+ struct ocrdma_qp *qp, struct ib_wc *ibwc)
+{
+ int err_cqes = 0;
+
+ while (num_entries) {
+ if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
+ break;
+ if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
+ ocrdma_update_wc(qp, ibwc, qp->sq.tail);
+ ocrdma_hwq_inc_tail(&qp->sq);
+ } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
+ ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
+ ocrdma_hwq_inc_tail(&qp->rq);
+ } else {
+ return err_cqes;
+ }
+ ibwc->byte_len = 0;
+ ibwc->status = IB_WC_WR_FLUSH_ERR;
+ ibwc = ibwc + 1;
+ err_cqes += 1;
+ num_entries -= 1;
+ }
+ return err_cqes;
+}
+
+int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ int cqes_to_poll = num_entries;
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
+ int num_os_cqe = 0, err_cqes = 0;
+ struct ocrdma_qp *qp;
+ unsigned long flags;
+
+ /* poll cqes from adapter CQ */
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ cqes_to_poll -= num_os_cqe;
+
+ if (cqes_to_poll) {
+ wc = wc + num_os_cqe;
+ /* adapter returns single error cqe when qp moves to
+ * error state. So insert error cqes with wc_status as
+ * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
+ * respectively which uses this CQ.
+ */
+ spin_lock_irqsave(&dev->flush_q_lock, flags);
+ list_for_each_entry(qp, &cq->sq_head, sq_entry) {
+ if (cqes_to_poll == 0)
+ break;
+ err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
+ cqes_to_poll -= err_cqes;
+ num_os_cqe += err_cqes;
+ wc = wc + err_cqes;
+ }
+ spin_unlock_irqrestore(&dev->flush_q_lock, flags);
+ }
+ return num_os_cqe;
+}
+
+int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
+{
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
+ u16 cq_id;
+ unsigned long flags;
+ bool arm_needed = false, sol_needed = false;
+
+ cq_id = cq->id;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
+ arm_needed = true;
+ if (cq_flags & IB_CQ_SOLICITED)
+ sol_needed = true;
+
+ if (cq->first_arm) {
+ ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
+ cq->first_arm = false;
+ goto skip_defer;
+ }
+ cq->deferred_arm = true;
+
+skip_defer:
+ cq->deferred_sol = sol_needed;
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return 0;
+}
+
+struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
+{
+ int status;
+ struct ocrdma_mr *mr;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+
+ if (max_page_list_len > dev->attr.max_pages_per_frmr)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ status = ocrdma_get_pbl_info(dev, mr, max_page_list_len);
+ if (status)
+ goto pbl_err;
+ mr->hwmr.fr_mr = 1;
+ mr->hwmr.remote_rd = 0;
+ mr->hwmr.remote_wr = 0;
+ mr->hwmr.local_rd = 0;
+ mr->hwmr.local_wr = 0;
+ mr->hwmr.mw_bind = 0;
+ status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+ if (status)
+ goto pbl_err;
+ status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
+ if (status)
+ goto mbx_err;
+ mr->ibmr.rkey = mr->hwmr.lkey;
+ mr->ibmr.lkey = mr->hwmr.lkey;
+ dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
+ (unsigned long) mr;
+ return &mr->ibmr;
+mbx_err:
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+pbl_err:
+ kfree(mr);
+ return ERR_PTR(-ENOMEM);
+}
+
+struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
+ *ibdev,
+ int page_list_len)
+{
+ struct ib_fast_reg_page_list *frmr_list;
+ int size;
+
+ size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
+ frmr_list = kzalloc(size, GFP_KERNEL);
+ if (!frmr_list)
+ return ERR_PTR(-ENOMEM);
+ frmr_list->page_list = (u64 *)(frmr_list + 1);
+ return frmr_list;
+}
+
+void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
+{
+ kfree(page_list);
+}
+
+#define MAX_KERNEL_PBE_SIZE 65536
+static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
+ int buf_cnt, u32 *pbe_size)
+{
+ u64 total_size = 0;
+ u64 buf_size = 0;
+ int i;
+ *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
+ *pbe_size = roundup_pow_of_two(*pbe_size);
+
+ /* find the smallest PBE size that we can have */
+ for (i = 0; i < buf_cnt; i++) {
+ /* first addr may not be page aligned, so ignore checking */
+ if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
+ (buf_list[i].size & ~PAGE_MASK))) {
+ return 0;
+ }
+
+ /* if configured PBE size is greater then the chosen one,
+ * reduce the PBE size.
+ */
+ buf_size = roundup(buf_list[i].size, PAGE_SIZE);
+ /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
+ buf_size = roundup_pow_of_two(buf_size);
+ if (*pbe_size > buf_size)
+ *pbe_size = buf_size;
+
+ total_size += buf_size;
+ }
+ *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
+ (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
+
+ /* num_pbes = total_size / (*pbe_size); this is implemented below. */
+
+ return total_size >> ilog2(*pbe_size);
+}
+
+static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
+ u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
+ struct ocrdma_hw_mr *hwmr)
+{
+ int i;
+ int idx;
+ int pbes_per_buf = 0;
+ u64 buf_addr = 0;
+ int num_pbes;
+ struct ocrdma_pbe *pbe;
+ int total_num_pbes = 0;
+
+ if (!hwmr->num_pbes)
+ return;
+
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ num_pbes = 0;
+
+ /* go through the OS phy regions & fill hw pbe entries into pbls. */
+ for (i = 0; i < ib_buf_cnt; i++) {
+ buf_addr = buf_list[i].addr;
+ pbes_per_buf =
+ roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
+ pbe_size;
+ hwmr->len += buf_list[i].size;
+ /* number of pbes can be more for one OS buf, when
+ * buffers are of different sizes.
+ * split the ib_buf to one or more pbes.
+ */
+ for (idx = 0; idx < pbes_per_buf; idx++) {
+ /* we program always page aligned addresses,
+ * first unaligned address is taken care by fbo.
+ */
+ if (i == 0) {
+ /* for non zero fbo, assign the
+ * start of the page.
+ */
+ pbe->pa_lo =
+ cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+ pbe->pa_hi =
+ cpu_to_le32((u32) upper_32_bits(buf_addr));
+ } else {
+ pbe->pa_lo =
+ cpu_to_le32((u32) (buf_addr & 0xffffffff));
+ pbe->pa_hi =
+ cpu_to_le32((u32) upper_32_bits(buf_addr));
+ }
+ buf_addr += pbe_size;
+ num_pbes += 1;
+ total_num_pbes += 1;
+ pbe++;
+
+ if (total_num_pbes == hwmr->num_pbes)
+ goto mr_tbl_done;
+ /* if the pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ num_pbes = 0;
+ }
+ }
+ }
+mr_tbl_done:
+ return;
+}
+
+struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
+ struct ib_phys_buf *buf_list,
+ int buf_cnt, int acc, u64 *iova_start)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mr *mr;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ u32 num_pbes;
+ u32 pbe_size = 0;
+
+ if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(status);
+
+ num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
+ if (num_pbes == 0) {
+ status = -EINVAL;
+ goto pbl_err;
+ }
+ status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+ if (status)
+ goto pbl_err;
+
+ mr->hwmr.pbe_size = pbe_size;
+ mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
+ mr->hwmr.va = *iova_start;
+ mr->hwmr.local_rd = 1;
+ mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+ mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+ status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
+ if (status)
+ goto pbl_err;
+ build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
+ &mr->hwmr);
+ status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
+ if (status)
+ goto mbx_err;
+
+ mr->ibmr.lkey = mr->hwmr.lkey;
+ if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
+ mr->ibmr.rkey = mr->hwmr.lkey;
+ return &mr->ibmr;
+
+mbx_err:
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+pbl_err:
+ kfree(mr);
+ return ERR_PTR(status);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
new file mode 100644
index 00000000000..b8f7853fd36
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -0,0 +1,99 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2012 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_VERBS_H__
+#define __OCRDMA_VERBS_H__
+
+int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
+ struct ib_send_wr **bad_wr);
+int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,
+ struct ib_recv_wr **bad_wr);
+
+int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
+int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
+
+int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props);
+int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
+int ocrdma_modify_port(struct ib_device *, u8 port, int mask,
+ struct ib_port_modify *props);
+
+void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid);
+int ocrdma_query_gid(struct ib_device *, u8 port,
+ int index, union ib_gid *gid);
+int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
+
+struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *,
+ struct ib_udata *);
+int ocrdma_dealloc_ucontext(struct ib_ucontext *);
+
+int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+
+struct ib_pd *ocrdma_alloc_pd(struct ib_device *,
+ struct ib_ucontext *, struct ib_udata *);
+int ocrdma_dealloc_pd(struct ib_pd *pd);
+
+struct ib_cq *ocrdma_create_cq(struct ib_device *, int entries, int vector,
+ struct ib_ucontext *, struct ib_udata *);
+int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
+int ocrdma_destroy_cq(struct ib_cq *);
+
+struct ib_qp *ocrdma_create_qp(struct ib_pd *,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *);
+int _ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
+ int attr_mask);
+int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int ocrdma_query_qp(struct ib_qp *,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *);
+int ocrdma_destroy_qp(struct ib_qp *);
+void ocrdma_del_flush_qp(struct ocrdma_qp *qp);
+
+struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *,
+ struct ib_udata *);
+int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
+ enum ib_srq_attr_mask, struct ib_udata *);
+int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
+int ocrdma_destroy_srq(struct ib_srq *);
+int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
+ struct ib_recv_wr **bad_recv_wr);
+
+int ocrdma_dereg_mr(struct ib_mr *);
+struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
+struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start);
+struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *);
+struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *pd, int max_page_list_len);
+struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
+ *ibdev,
+ int page_list_len);
+void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list);
+
+#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
new file mode 100644
index 00000000000..495be09781b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -0,0 +1,15 @@
+config INFINIBAND_QIB
+ tristate "Intel PCIe HCA support"
+ depends on 64BIT
+ ---help---
+ This is a low-level driver for Intel PCIe QLE InfiniBand host
+ channel adapters. This driver does not support the Intel
+ HyperTransport card (model QHT7140).
+
+config INFINIBAND_QIB_DCA
+ bool "QIB DCA support"
+ depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m)
+ default y
+ ---help---
+ Setting this enables DCA support on some Intel chip sets
+ with the iba7322 HCA.
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
new file mode 100644
index 00000000000..57f8103e51f
--- /dev/null
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -0,0 +1,16 @@
+obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o
+
+ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \
+ qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \
+ qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \
+ qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \
+ qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
+ qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \
+ qib_sd7220.o qib_iba7322.o qib_verbs.o
+
+# 6120 has no fallback if no MSI interrupts, others can do INTx
+ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
+
+ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
+ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
+ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
new file mode 100644
index 00000000000..c00ae093b6f
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -0,0 +1,1548 @@
+#ifndef _QIB_KERNEL_H
+#define _QIB_KERNEL_H
+/*
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This header file is the base header file for qlogic_ib kernel code
+ * qib_user.h serves a similar purpose for user code.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/fs.h>
+#include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/sched.h>
+#include <linux/kthread.h>
+
+#include "qib_common.h"
+#include "qib_verbs.h"
+
+/* only s/w major version of QLogic_IB we can handle */
+#define QIB_CHIP_VERS_MAJ 2U
+
+/* don't care about this except printing */
+#define QIB_CHIP_VERS_MIN 0U
+
+/* The Organization Unique Identifier (Mfg code), and its position in GUID */
+#define QIB_OUI 0x001175
+#define QIB_OUI_LSB 40
+
+/*
+ * per driver stats, either not device nor port-specific, or
+ * summed over all of the devices and ports.
+ * They are described by name via ipathfs filesystem, so layout
+ * and number of elements can change without breaking compatibility.
+ * If members are added or deleted qib_statnames[] in qib_fs.c must
+ * change to match.
+ */
+struct qlogic_ib_stats {
+ __u64 sps_ints; /* number of interrupts handled */
+ __u64 sps_errints; /* number of error interrupts */
+ __u64 sps_txerrs; /* tx-related packet errors */
+ __u64 sps_rcverrs; /* non-crc rcv packet errors */
+ __u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */
+ __u64 sps_nopiobufs; /* no pio bufs avail from kernel */
+ __u64 sps_ctxts; /* number of contexts currently open */
+ __u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */
+ __u64 sps_buffull;
+ __u64 sps_hdrfull;
+};
+
+extern struct qlogic_ib_stats qib_stats;
+extern const struct pci_error_handlers qib_pci_err_handler;
+
+#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
+/*
+ * First-cut critierion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define QIB_TRAFFIC_ACTIVE_THRESHOLD (2000)
+
+/*
+ * Struct used to indicate which errors are logged in each of the
+ * error-counters that are logged to EEPROM. A counter is incremented
+ * _once_ (saturating at 255) for each event with any bits set in
+ * the error or hwerror register masks below.
+ */
+#define QIB_EEP_LOG_CNT (4)
+struct qib_eep_log_mask {
+ u64 errs_to_log;
+ u64 hwerrs_to_log;
+};
+
+/*
+ * Below contains all data related to a single context (formerly called port).
+ */
+
+#ifdef CONFIG_DEBUG_FS
+struct qib_opcode_stats_perctx;
+#endif
+
+struct qib_ctxtdata {
+ void **rcvegrbuf;
+ dma_addr_t *rcvegrbuf_phys;
+ /* rcvhdrq base, needs mmap before useful */
+ void *rcvhdrq;
+ /* kernel virtual address where hdrqtail is updated */
+ void *rcvhdrtail_kvaddr;
+ /*
+ * temp buffer for expected send setup, allocated at open, instead
+ * of each setup call
+ */
+ void *tid_pg_list;
+ /*
+ * Shared page for kernel to signal user processes that send buffers
+ * need disarming. The process should call QIB_CMD_DISARM_BUFS
+ * or QIB_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
+ */
+ unsigned long *user_event_mask;
+ /* when waiting for rcv or pioavail */
+ wait_queue_head_t wait;
+ /*
+ * rcvegr bufs base, physical, must fit
+ * in 44 bits so 32 bit programs mmap64 44 bit works)
+ */
+ dma_addr_t rcvegr_phys;
+ /* mmap of hdrq, must fit in 44 bits */
+ dma_addr_t rcvhdrq_phys;
+ dma_addr_t rcvhdrqtailaddr_phys;
+
+ /*
+ * number of opens (including slave sub-contexts) on this instance
+ * (ignoring forks, dup, etc. for now)
+ */
+ int cnt;
+ /*
+ * how much space to leave at start of eager TID entries for
+ * protocol use, on each TID
+ */
+ /* instead of calculating it */
+ unsigned ctxt;
+ /* local node of context */
+ int node_id;
+ /* non-zero if ctxt is being shared. */
+ u16 subctxt_cnt;
+ /* non-zero if ctxt is being shared. */
+ u16 subctxt_id;
+ /* number of eager TID entries. */
+ u16 rcvegrcnt;
+ /* index of first eager TID entry. */
+ u16 rcvegr_tid_base;
+ /* number of pio bufs for this ctxt (all procs, if shared) */
+ u32 piocnt;
+ /* first pio buffer for this ctxt */
+ u32 pio_base;
+ /* chip offset of PIO buffers for this ctxt */
+ u32 piobufs;
+ /* how many alloc_pages() chunks in rcvegrbuf_pages */
+ u32 rcvegrbuf_chunks;
+ /* how many egrbufs per chunk */
+ u16 rcvegrbufs_perchunk;
+ /* ilog2 of above */
+ u16 rcvegrbufs_perchunk_shift;
+ /* order for rcvegrbuf_pages */
+ size_t rcvegrbuf_size;
+ /* rcvhdrq size (for freeing) */
+ size_t rcvhdrq_size;
+ /* per-context flags for fileops/intr communication */
+ unsigned long flag;
+ /* next expected TID to check when looking for free */
+ u32 tidcursor;
+ /* WAIT_RCV that timed out, no interrupt */
+ u32 rcvwait_to;
+ /* WAIT_PIO that timed out, no interrupt */
+ u32 piowait_to;
+ /* WAIT_RCV already happened, no wait */
+ u32 rcvnowait;
+ /* WAIT_PIO already happened, no wait */
+ u32 pionowait;
+ /* total number of polled urgent packets */
+ u32 urgent;
+ /* saved total number of polled urgent packets for poll edge trigger */
+ u32 urgent_poll;
+ /* pid of process using this ctxt */
+ pid_t pid;
+ pid_t subpid[QLOGIC_IB_MAX_SUBCTXT];
+ /* same size as task_struct .comm[], command that opened context */
+ char comm[16];
+ /* pkeys set by this use of this ctxt */
+ u16 pkeys[4];
+ /* so file ops can get at unit */
+ struct qib_devdata *dd;
+ /* so funcs that need physical port can get it easily */
+ struct qib_pportdata *ppd;
+ /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+ void *subctxt_uregbase;
+ /* An array of pages for the eager receive buffers * N */
+ void *subctxt_rcvegrbuf;
+ /* An array of pages for the eager header queue entries * N */
+ void *subctxt_rcvhdr_base;
+ /* The version of the library which opened this ctxt */
+ u32 userversion;
+ /* Bitmask of active slaves */
+ u32 active_slaves;
+ /* Type of packets or conditions we want to poll for */
+ u16 poll_type;
+ /* receive packet sequence counter */
+ u8 seq_cnt;
+ u8 redirect_seq_cnt;
+ /* ctxt rcvhdrq head offset */
+ u32 head;
+ /* lookaside fields */
+ struct qib_qp *lookaside_qp;
+ u32 lookaside_qpn;
+ /* QPs waiting for context processing */
+ struct list_head qp_wait_list;
+#ifdef CONFIG_DEBUG_FS
+ /* verbs stats per CTX */
+ struct qib_opcode_stats_perctx *opstats;
+#endif
+};
+
+struct qib_sge_state;
+
+struct qib_sdma_txreq {
+ int flags;
+ int sg_count;
+ dma_addr_t addr;
+ void (*callback)(struct qib_sdma_txreq *, int);
+ u16 start_idx; /* sdma private */
+ u16 next_descq_idx; /* sdma private */
+ struct list_head list; /* sdma private */
+};
+
+struct qib_sdma_desc {
+ __le64 qw[2];
+};
+
+struct qib_verbs_txreq {
+ struct qib_sdma_txreq txreq;
+ struct qib_qp *qp;
+ struct qib_swqe *wqe;
+ u32 dwords;
+ u16 hdr_dwords;
+ u16 hdr_inx;
+ struct qib_pio_header *align_buf;
+ struct qib_mregion *mr;
+ struct qib_sge_state *ss;
+};
+
+#define QIB_SDMA_TXREQ_F_USELARGEBUF 0x1
+#define QIB_SDMA_TXREQ_F_HEADTOHOST 0x2
+#define QIB_SDMA_TXREQ_F_INTREQ 0x4
+#define QIB_SDMA_TXREQ_F_FREEBUF 0x8
+#define QIB_SDMA_TXREQ_F_FREEDESC 0x10
+
+#define QIB_SDMA_TXREQ_S_OK 0
+#define QIB_SDMA_TXREQ_S_SENDERROR 1
+#define QIB_SDMA_TXREQ_S_ABORTED 2
+#define QIB_SDMA_TXREQ_S_SHUTDOWN 3
+
+/*
+ * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
+ * Mostly for MADs that set or query link parameters, also ipath
+ * config interfaces
+ */
+#define QIB_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */
+#define QIB_IB_CFG_LWID_ENB 2 /* allowed Link-width */
+#define QIB_IB_CFG_LWID 3 /* currently active Link-width */
+#define QIB_IB_CFG_SPD_ENB 4 /* allowed Link speeds */
+#define QIB_IB_CFG_SPD 5 /* current Link spd */
+#define QIB_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */
+#define QIB_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */
+#define QIB_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */
+#define QIB_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */
+#define QIB_IB_CFG_OP_VLS 10 /* operational VLs */
+#define QIB_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */
+#define QIB_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */
+#define QIB_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */
+#define QIB_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */
+#define QIB_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */
+#define QIB_IB_CFG_PKEYS 16 /* update partition keys */
+#define QIB_IB_CFG_MTU 17 /* update MTU in IBC */
+#define QIB_IB_CFG_LSTATE 18 /* update linkcmd and linkinitcmd in IBC */
+#define QIB_IB_CFG_VL_HIGH_LIMIT 19
+#define QIB_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */
+#define QIB_IB_CFG_PORT 21 /* switch port we are connected to */
+
+/*
+ * for CFG_LSTATE: LINKCMD in upper 16 bits, LINKINITCMD in lower 16
+ * IB_LINKINITCMD_POLL and SLEEP are also used as set/get values for
+ * QIB_IB_CFG_LINKDEFAULT cmd
+ */
+#define IB_LINKCMD_DOWN (0 << 16)
+#define IB_LINKCMD_ARMED (1 << 16)
+#define IB_LINKCMD_ACTIVE (2 << 16)
+#define IB_LINKINITCMD_NOP 0
+#define IB_LINKINITCMD_POLL 1
+#define IB_LINKINITCMD_SLEEP 2
+#define IB_LINKINITCMD_DISABLE 3
+
+/*
+ * valid states passed to qib_set_linkstate() user call
+ */
+#define QIB_IB_LINKDOWN 0
+#define QIB_IB_LINKARM 1
+#define QIB_IB_LINKACTIVE 2
+#define QIB_IB_LINKDOWN_ONLY 3
+#define QIB_IB_LINKDOWN_SLEEP 4
+#define QIB_IB_LINKDOWN_DISABLE 5
+
+/*
+ * These 7 values (SDR, DDR, and QDR may be ORed for auto-speed
+ * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
+ * with cmd QIB_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
+ * are also the the possible values for qib_link_speed_enabled and active
+ * The values were chosen to match values used within the IB spec.
+ */
+#define QIB_IB_SDR 1
+#define QIB_IB_DDR 2
+#define QIB_IB_QDR 4
+
+#define QIB_DEFAULT_MTU 4096
+
+/* max number of IB ports supported per HCA */
+#define QIB_MAX_IB_PORTS 2
+
+/*
+ * Possible IB config parameters for f_get/set_ib_table()
+ */
+#define QIB_IB_TBL_VL_HIGH_ARB 1 /* Get/set VL high priority weights */
+#define QIB_IB_TBL_VL_LOW_ARB 2 /* Get/set VL low priority weights */
+
+/*
+ * Possible "operations" for f_rcvctrl(ppd, op, ctxt)
+ * these are bits so they can be combined, e.g.
+ * QIB_RCVCTRL_INTRAVAIL_ENB | QIB_RCVCTRL_CTXT_ENB
+ */
+#define QIB_RCVCTRL_TAILUPD_ENB 0x01
+#define QIB_RCVCTRL_TAILUPD_DIS 0x02
+#define QIB_RCVCTRL_CTXT_ENB 0x04
+#define QIB_RCVCTRL_CTXT_DIS 0x08
+#define QIB_RCVCTRL_INTRAVAIL_ENB 0x10
+#define QIB_RCVCTRL_INTRAVAIL_DIS 0x20
+#define QIB_RCVCTRL_PKEY_ENB 0x40 /* Note, default is enabled */
+#define QIB_RCVCTRL_PKEY_DIS 0x80
+#define QIB_RCVCTRL_BP_ENB 0x0100
+#define QIB_RCVCTRL_BP_DIS 0x0200
+#define QIB_RCVCTRL_TIDFLOW_ENB 0x0400
+#define QIB_RCVCTRL_TIDFLOW_DIS 0x0800
+
+/*
+ * Possible "operations" for f_sendctrl(ppd, op, var)
+ * these are bits so they can be combined, e.g.
+ * QIB_SENDCTRL_BUFAVAIL_ENB | QIB_SENDCTRL_ENB
+ * Some operations (e.g. DISARM, ABORT) are known to
+ * be "one-shot", so do not modify shadow.
+ */
+#define QIB_SENDCTRL_DISARM (0x1000)
+#define QIB_SENDCTRL_DISARM_BUF(bufn) ((bufn) | QIB_SENDCTRL_DISARM)
+ /* available (0x2000) */
+#define QIB_SENDCTRL_AVAIL_DIS (0x4000)
+#define QIB_SENDCTRL_AVAIL_ENB (0x8000)
+#define QIB_SENDCTRL_AVAIL_BLIP (0x10000)
+#define QIB_SENDCTRL_SEND_DIS (0x20000)
+#define QIB_SENDCTRL_SEND_ENB (0x40000)
+#define QIB_SENDCTRL_FLUSH (0x80000)
+#define QIB_SENDCTRL_CLEAR (0x100000)
+#define QIB_SENDCTRL_DISARM_ALL (0x200000)
+
+/*
+ * These are the generic indices for requesting per-port
+ * counter values via the f_portcntr function. They
+ * are always returned as 64 bit values, although most
+ * are 32 bit counters.
+ */
+/* send-related counters */
+#define QIBPORTCNTR_PKTSEND 0U
+#define QIBPORTCNTR_WORDSEND 1U
+#define QIBPORTCNTR_PSXMITDATA 2U
+#define QIBPORTCNTR_PSXMITPKTS 3U
+#define QIBPORTCNTR_PSXMITWAIT 4U
+#define QIBPORTCNTR_SENDSTALL 5U
+/* receive-related counters */
+#define QIBPORTCNTR_PKTRCV 6U
+#define QIBPORTCNTR_PSRCVDATA 7U
+#define QIBPORTCNTR_PSRCVPKTS 8U
+#define QIBPORTCNTR_RCVEBP 9U
+#define QIBPORTCNTR_RCVOVFL 10U
+#define QIBPORTCNTR_WORDRCV 11U
+/* IB link related error counters */
+#define QIBPORTCNTR_RXLOCALPHYERR 12U
+#define QIBPORTCNTR_RXVLERR 13U
+#define QIBPORTCNTR_ERRICRC 14U
+#define QIBPORTCNTR_ERRVCRC 15U
+#define QIBPORTCNTR_ERRLPCRC 16U
+#define QIBPORTCNTR_BADFORMAT 17U
+#define QIBPORTCNTR_ERR_RLEN 18U
+#define QIBPORTCNTR_IBSYMBOLERR 19U
+#define QIBPORTCNTR_INVALIDRLEN 20U
+#define QIBPORTCNTR_UNSUPVL 21U
+#define QIBPORTCNTR_EXCESSBUFOVFL 22U
+#define QIBPORTCNTR_ERRLINK 23U
+#define QIBPORTCNTR_IBLINKDOWN 24U
+#define QIBPORTCNTR_IBLINKERRRECOV 25U
+#define QIBPORTCNTR_LLI 26U
+/* other error counters */
+#define QIBPORTCNTR_RXDROPPKT 27U
+#define QIBPORTCNTR_VL15PKTDROP 28U
+#define QIBPORTCNTR_ERRPKEY 29U
+#define QIBPORTCNTR_KHDROVFL 30U
+/* sampling counters (these are actually control registers) */
+#define QIBPORTCNTR_PSINTERVAL 31U
+#define QIBPORTCNTR_PSSTART 32U
+#define QIBPORTCNTR_PSSTAT 33U
+
+/* how often we check for packet activity for "power on hours (in seconds) */
+#define ACTIVITY_TIMER 5
+
+#define MAX_NAME_SIZE 64
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify;
+#endif
+
+struct qib_msix_entry {
+ struct msix_entry msix;
+ void *arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int dca;
+ int rcv;
+ struct qib_irq_notify *notifier;
+#endif
+ char name[MAX_NAME_SIZE];
+ cpumask_var_t mask;
+};
+
+/* Below is an opaque struct. Each chip (device) can maintain
+ * private data needed for its operation, but not germane to the
+ * rest of the driver. For convenience, we define another that
+ * is chip-specific, per-port
+ */
+struct qib_chip_specific;
+struct qib_chipport_specific;
+
+enum qib_sdma_states {
+ qib_sdma_state_s00_hw_down,
+ qib_sdma_state_s10_hw_start_up_wait,
+ qib_sdma_state_s20_idle,
+ qib_sdma_state_s30_sw_clean_up_wait,
+ qib_sdma_state_s40_hw_clean_up_wait,
+ qib_sdma_state_s50_hw_halt_wait,
+ qib_sdma_state_s99_running,
+};
+
+enum qib_sdma_events {
+ qib_sdma_event_e00_go_hw_down,
+ qib_sdma_event_e10_go_hw_start,
+ qib_sdma_event_e20_hw_started,
+ qib_sdma_event_e30_go_running,
+ qib_sdma_event_e40_sw_cleaned,
+ qib_sdma_event_e50_hw_cleaned,
+ qib_sdma_event_e60_hw_halted,
+ qib_sdma_event_e70_go_idle,
+ qib_sdma_event_e7220_err_halted,
+ qib_sdma_event_e7322_err_halted,
+ qib_sdma_event_e90_timer_tick,
+};
+
+extern char *qib_sdma_state_names[];
+extern char *qib_sdma_event_names[];
+
+struct sdma_set_state_action {
+ unsigned op_enable:1;
+ unsigned op_intenable:1;
+ unsigned op_halt:1;
+ unsigned op_drain:1;
+ unsigned go_s99_running_tofalse:1;
+ unsigned go_s99_running_totrue:1;
+};
+
+struct qib_sdma_state {
+ struct kref kref;
+ struct completion comp;
+ enum qib_sdma_states current_state;
+ struct sdma_set_state_action *set_state_action;
+ unsigned current_op;
+ unsigned go_s99_running;
+ unsigned first_sendbuf;
+ unsigned last_sendbuf; /* really last +1 */
+ /* debugging/devel */
+ enum qib_sdma_states previous_state;
+ unsigned previous_op;
+ enum qib_sdma_events last_event;
+};
+
+struct xmit_wait {
+ struct timer_list timer;
+ u64 counter;
+ u8 flags;
+ struct cache {
+ u64 psxmitdata;
+ u64 psrcvdata;
+ u64 psxmitpkts;
+ u64 psrcvpkts;
+ u64 psxmitwait;
+ } counter_cache;
+};
+
+/*
+ * The structure below encapsulates data relevant to a physical IB Port.
+ * Current chips support only one such port, but the separation
+ * clarifies things a bit. Note that to conform to IB conventions,
+ * port-numbers are one-based. The first or only port is port1.
+ */
+struct qib_pportdata {
+ struct qib_ibport ibport_data;
+
+ struct qib_devdata *dd;
+ struct qib_chippport_specific *cpspec; /* chip-specific per-port */
+ struct kobject pport_kobj;
+ struct kobject pport_cc_kobj;
+ struct kobject sl2vl_kobj;
+ struct kobject diagc_kobj;
+
+ /* GUID for this interface, in network order */
+ __be64 guid;
+
+ /* QIB_POLL, etc. link-state specific flags, per port */
+ u32 lflags;
+ /* qib_lflags driver is waiting for */
+ u32 state_wanted;
+ spinlock_t lflags_lock;
+
+ /* ref count for each pkey */
+ atomic_t pkeyrefs[4];
+
+ /*
+ * this address is mapped readonly into user processes so they can
+ * get status cheaply, whenever they want. One qword of status per port
+ */
+ u64 *statusp;
+
+ /* SendDMA related entries */
+
+ /* read mostly */
+ struct qib_sdma_desc *sdma_descq;
+ struct workqueue_struct *qib_wq;
+ struct qib_sdma_state sdma_state;
+ dma_addr_t sdma_descq_phys;
+ volatile __le64 *sdma_head_dma; /* DMA'ed by chip */
+ dma_addr_t sdma_head_phys;
+ u16 sdma_descq_cnt;
+
+ /* read/write using lock */
+ spinlock_t sdma_lock ____cacheline_aligned_in_smp;
+ struct list_head sdma_activelist;
+ struct list_head sdma_userpending;
+ u64 sdma_descq_added;
+ u64 sdma_descq_removed;
+ u16 sdma_descq_tail;
+ u16 sdma_descq_head;
+ u8 sdma_generation;
+ u8 sdma_intrequest;
+
+ struct tasklet_struct sdma_sw_clean_up_task
+ ____cacheline_aligned_in_smp;
+
+ wait_queue_head_t state_wait; /* for state_wanted */
+
+ /* HoL blocking for SMP replies */
+ unsigned hol_state;
+ struct timer_list hol_timer;
+
+ /*
+ * Shadow copies of registers; size indicates read access size.
+ * Most of them are readonly, but some are write-only register,
+ * where we manipulate the bits in the shadow copy, and then write
+ * the shadow copy to qlogic_ib.
+ *
+ * We deliberately make most of these 32 bits, since they have
+ * restricted range. For any that we read, we won't to generate 32
+ * bit accesses, since Opteron will generate 2 separate 32 bit HT
+ * transactions for a 64 bit read, and we want to avoid unnecessary
+ * bus transactions.
+ */
+
+ /* This is the 64 bit group */
+ /* last ibcstatus. opaque outside chip-specific code */
+ u64 lastibcstat;
+
+ /* these are the "32 bit" regs */
+
+ /*
+ * the following two are 32-bit bitmasks, but {test,clear,set}_bit
+ * all expect bit fields to be "unsigned long"
+ */
+ unsigned long p_rcvctrl; /* shadow per-port rcvctrl */
+ unsigned long p_sendctrl; /* shadow per-port sendctrl */
+
+ u32 ibmtu; /* The MTU programmed for this unit */
+ /*
+ * Current max size IB packet (in bytes) including IB headers, that
+ * we can send. Changes when ibmtu changes.
+ */
+ u32 ibmaxlen;
+ /*
+ * ibmaxlen at init time, limited by chip and by receive buffer
+ * size. Not changed after init.
+ */
+ u32 init_ibmaxlen;
+ /* LID programmed for this instance */
+ u16 lid;
+ /* list of pkeys programmed; 0 if not set */
+ u16 pkeys[4];
+ /* LID mask control */
+ u8 lmc;
+ u8 link_width_supported;
+ u8 link_speed_supported;
+ u8 link_width_enabled;
+ u8 link_speed_enabled;
+ u8 link_width_active;
+ u8 link_speed_active;
+ u8 vls_supported;
+ u8 vls_operational;
+ /* Rx Polarity inversion (compensate for ~tx on partner) */
+ u8 rx_pol_inv;
+
+ u8 hw_pidx; /* physical port index */
+ u8 port; /* IB port number and index into dd->pports - 1 */
+
+ u8 delay_mult;
+
+ /* used to override LED behavior */
+ u8 led_override; /* Substituted for normal value, if non-zero */
+ u16 led_override_timeoff; /* delta to next timer event */
+ u8 led_override_vals[2]; /* Alternates per blink-frame */
+ u8 led_override_phase; /* Just counts, LSB picks from vals[] */
+ atomic_t led_override_timer_active;
+ /* Used to flash LEDs in override mode */
+ struct timer_list led_override_timer;
+ struct xmit_wait cong_stats;
+ struct timer_list symerr_clear_timer;
+
+ /* Synchronize access between driver writes and sysfs reads */
+ spinlock_t cc_shadow_lock
+ ____cacheline_aligned_in_smp;
+
+ /* Shadow copy of the congestion control table */
+ struct cc_table_shadow *ccti_entries_shadow;
+
+ /* Shadow copy of the congestion control entries */
+ struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
+
+ /* List of congestion control table entries */
+ struct ib_cc_table_entry_shadow *ccti_entries;
+
+ /* 16 congestion entries with each entry corresponding to a SL */
+ struct ib_cc_congestion_entry_shadow *congestion_entries;
+
+ /* Maximum number of congestion control entries that the agent expects
+ * the manager to send.
+ */
+ u16 cc_supported_table_entries;
+
+ /* Total number of congestion control table entries */
+ u16 total_cct_entry;
+
+ /* Bit map identifying service level */
+ u16 cc_sl_control_map;
+
+ /* maximum congestion control table index */
+ u16 ccti_limit;
+
+ /* CA's max number of 64 entry units in the congestion control table */
+ u8 cc_max_table_entries;
+};
+
+/* Observers. Not to be taken lightly, possibly not to ship. */
+/*
+ * If a diag read or write is to (bottom <= offset <= top),
+ * the "hoook" is called, allowing, e.g. shadows to be
+ * updated in sync with the driver. struct diag_observer
+ * is the "visible" part.
+ */
+struct diag_observer;
+
+typedef int (*diag_hook) (struct qib_devdata *dd,
+ const struct diag_observer *op,
+ u32 offs, u64 *data, u64 mask, int only_32);
+
+struct diag_observer {
+ diag_hook hook;
+ u32 bottom;
+ u32 top;
+};
+
+extern int qib_register_observer(struct qib_devdata *dd,
+ const struct diag_observer *op);
+
+/* Only declared here, not defined. Private to diags */
+struct diag_observer_list_elt;
+
+/* device data struct now contains only "general per-device" info.
+ * fields related to a physical IB port are in a qib_pportdata struct,
+ * described above) while fields only used by a particular chip-type are in
+ * a qib_chipdata struct, whose contents are opaque to this file.
+ */
+struct qib_devdata {
+ struct qib_ibdev verbs_dev; /* must be first */
+ struct list_head list;
+ /* pointers to related structs for this device */
+ /* pci access data structure */
+ struct pci_dev *pcidev;
+ struct cdev *user_cdev;
+ struct cdev *diag_cdev;
+ struct device *user_device;
+ struct device *diag_device;
+
+ /* mem-mapped pointer to base of chip regs */
+ u64 __iomem *kregbase;
+ /* end of mem-mapped chip space excluding sendbuf and user regs */
+ u64 __iomem *kregend;
+ /* physical address of chip for io_remap, etc. */
+ resource_size_t physaddr;
+ /* qib_cfgctxts pointers */
+ struct qib_ctxtdata **rcd; /* Receive Context Data */
+
+ /* qib_pportdata, points to array of (physical) port-specific
+ * data structs, indexed by pidx (0..n-1)
+ */
+ struct qib_pportdata *pport;
+ struct qib_chip_specific *cspec; /* chip-specific */
+
+ /* kvirt address of 1st 2k pio buffer */
+ void __iomem *pio2kbase;
+ /* kvirt address of 1st 4k pio buffer */
+ void __iomem *pio4kbase;
+ /* mem-mapped pointer to base of PIO buffers (if using WC PAT) */
+ void __iomem *piobase;
+ /* mem-mapped pointer to base of user chip regs (if using WC PAT) */
+ u64 __iomem *userbase;
+ void __iomem *piovl15base; /* base of VL15 buffers, if not WC */
+ /*
+ * points to area where PIOavail registers will be DMA'ed.
+ * Has to be on a page of it's own, because the page will be
+ * mapped into user program space. This copy is *ONLY* ever
+ * written by DMA, not by the driver! Need a copy per device
+ * when we get to multiple devices
+ */
+ volatile __le64 *pioavailregs_dma; /* DMA'ed by chip */
+ /* physical address where updates occur */
+ dma_addr_t pioavailregs_phys;
+
+ /* device-specific implementations of functions needed by
+ * common code. Contrary to previous consensus, we can't
+ * really just point to a device-specific table, because we
+ * may need to "bend", e.g. *_f_put_tid
+ */
+ /* fallback to alternate interrupt type if possible */
+ int (*f_intr_fallback)(struct qib_devdata *);
+ /* hard reset chip */
+ int (*f_reset)(struct qib_devdata *);
+ void (*f_quiet_serdes)(struct qib_pportdata *);
+ int (*f_bringup_serdes)(struct qib_pportdata *);
+ int (*f_early_init)(struct qib_devdata *);
+ void (*f_clear_tids)(struct qib_devdata *, struct qib_ctxtdata *);
+ void (*f_put_tid)(struct qib_devdata *, u64 __iomem*,
+ u32, unsigned long);
+ void (*f_cleanup)(struct qib_devdata *);
+ void (*f_setextled)(struct qib_pportdata *, u32);
+ /* fill out chip-specific fields */
+ int (*f_get_base_info)(struct qib_ctxtdata *, struct qib_base_info *);
+ /* free irq */
+ void (*f_free_irq)(struct qib_devdata *);
+ struct qib_message_header *(*f_get_msgheader)
+ (struct qib_devdata *, __le32 *);
+ void (*f_config_ctxts)(struct qib_devdata *);
+ int (*f_get_ib_cfg)(struct qib_pportdata *, int);
+ int (*f_set_ib_cfg)(struct qib_pportdata *, int, u32);
+ int (*f_set_ib_loopback)(struct qib_pportdata *, const char *);
+ int (*f_get_ib_table)(struct qib_pportdata *, int, void *);
+ int (*f_set_ib_table)(struct qib_pportdata *, int, void *);
+ u32 (*f_iblink_state)(u64);
+ u8 (*f_ibphys_portstate)(u64);
+ void (*f_xgxs_reset)(struct qib_pportdata *);
+ /* per chip actions needed for IB Link up/down changes */
+ int (*f_ib_updown)(struct qib_pportdata *, int, u64);
+ u32 __iomem *(*f_getsendbuf)(struct qib_pportdata *, u64, u32 *);
+ /* Read/modify/write of GPIO pins (potentially chip-specific */
+ int (*f_gpio_mod)(struct qib_devdata *dd, u32 out, u32 dir,
+ u32 mask);
+ /* Enable writes to config EEPROM (if supported) */
+ int (*f_eeprom_wen)(struct qib_devdata *dd, int wen);
+ /*
+ * modify rcvctrl shadow[s] and write to appropriate chip-regs.
+ * see above QIB_RCVCTRL_xxx_ENB/DIS for operations.
+ * (ctxt == -1) means "all contexts", only meaningful for
+ * clearing. Could remove if chip_spec shutdown properly done.
+ */
+ void (*f_rcvctrl)(struct qib_pportdata *, unsigned int op,
+ int ctxt);
+ /* Read/modify/write sendctrl appropriately for op and port. */
+ void (*f_sendctrl)(struct qib_pportdata *, u32 op);
+ void (*f_set_intr_state)(struct qib_devdata *, u32);
+ void (*f_set_armlaunch)(struct qib_devdata *, u32);
+ void (*f_wantpiobuf_intr)(struct qib_devdata *, u32);
+ int (*f_late_initreg)(struct qib_devdata *);
+ int (*f_init_sdma_regs)(struct qib_pportdata *);
+ u16 (*f_sdma_gethead)(struct qib_pportdata *);
+ int (*f_sdma_busy)(struct qib_pportdata *);
+ void (*f_sdma_update_tail)(struct qib_pportdata *, u16);
+ void (*f_sdma_set_desc_cnt)(struct qib_pportdata *, unsigned);
+ void (*f_sdma_sendctrl)(struct qib_pportdata *, unsigned);
+ void (*f_sdma_hw_clean_up)(struct qib_pportdata *);
+ void (*f_sdma_hw_start_up)(struct qib_pportdata *);
+ void (*f_sdma_init_early)(struct qib_pportdata *);
+ void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32);
+ void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32);
+ u32 (*f_hdrqempty)(struct qib_ctxtdata *);
+ u64 (*f_portcntr)(struct qib_pportdata *, u32);
+ u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **,
+ u64 **);
+ u32 (*f_read_portcntrs)(struct qib_devdata *, loff_t, u32,
+ char **, u64 **);
+ u32 (*f_setpbc_control)(struct qib_pportdata *, u32, u8, u8);
+ void (*f_initvl15_bufs)(struct qib_devdata *);
+ void (*f_init_ctxt)(struct qib_ctxtdata *);
+ void (*f_txchk_change)(struct qib_devdata *, u32, u32, u32,
+ struct qib_ctxtdata *);
+ void (*f_writescratch)(struct qib_devdata *, u32);
+ int (*f_tempsense_rd)(struct qib_devdata *, int regnum);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int (*f_notify_dca)(struct qib_devdata *, unsigned long event);
+#endif
+
+ char *boardname; /* human readable board info */
+
+ /* template for writing TIDs */
+ u64 tidtemplate;
+ /* value to write to free TIDs */
+ u64 tidinvalid;
+
+ /* number of registers used for pioavail */
+ u32 pioavregs;
+ /* device (not port) flags, basically device capabilities */
+ u32 flags;
+ /* last buffer for user use */
+ u32 lastctxt_piobuf;
+
+ /* reset value */
+ u64 z_int_counter;
+ /* percpu intcounter */
+ u64 __percpu *int_counter;
+
+ /* pio bufs allocated per ctxt */
+ u32 pbufsctxt;
+ /* if remainder on bufs/ctxt, ctxts < extrabuf get 1 extra */
+ u32 ctxts_extrabuf;
+ /*
+ * number of ctxts configured as max; zero is set to number chip
+ * supports, less gives more pio bufs/ctxt, etc.
+ */
+ u32 cfgctxts;
+ /*
+ * number of ctxts available for PSM open
+ */
+ u32 freectxts;
+
+ /*
+ * hint that we should update pioavailshadow before
+ * looking for a PIO buffer
+ */
+ u32 upd_pio_shadow;
+
+ /* internal debugging stats */
+ u32 maxpkts_call;
+ u32 avgpkts_call;
+ u64 nopiobufs;
+
+ /* PCI Vendor ID (here for NodeInfo) */
+ u16 vendorid;
+ /* PCI Device ID (here for NodeInfo) */
+ u16 deviceid;
+ /* for write combining settings */
+ unsigned long wc_cookie;
+ unsigned long wc_base;
+ unsigned long wc_len;
+
+ /* shadow copy of struct page *'s for exp tid pages */
+ struct page **pageshadow;
+ /* shadow copy of dma handles for exp tid pages */
+ dma_addr_t *physshadow;
+ u64 __iomem *egrtidbase;
+ spinlock_t sendctrl_lock; /* protect changes to sendctrl shadow */
+ /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
+ spinlock_t uctxt_lock; /* rcd and user context changes */
+ /*
+ * per unit status, see also portdata statusp
+ * mapped readonly into user processes so they can get unit and
+ * IB link status cheaply
+ */
+ u64 *devstatusp;
+ char *freezemsg; /* freeze msg if hw error put chip in freeze */
+ u32 freezelen; /* max length of freezemsg */
+ /* timer used to prevent stats overflow, error throttling, etc. */
+ struct timer_list stats_timer;
+
+ /* timer to verify interrupts work, and fallback if possible */
+ struct timer_list intrchk_timer;
+ unsigned long ureg_align; /* user register alignment */
+
+ /*
+ * Protects pioavailshadow, pioavailkernel, pio_need_disarm, and
+ * pio_writing.
+ */
+ spinlock_t pioavail_lock;
+ /*
+ * index of last buffer to optimize search for next
+ */
+ u32 last_pio;
+ /*
+ * min kernel pio buffer to optimize search
+ */
+ u32 min_kernel_pio;
+ /*
+ * Shadow copies of registers; size indicates read access size.
+ * Most of them are readonly, but some are write-only register,
+ * where we manipulate the bits in the shadow copy, and then write
+ * the shadow copy to qlogic_ib.
+ *
+ * We deliberately make most of these 32 bits, since they have
+ * restricted range. For any that we read, we won't to generate 32
+ * bit accesses, since Opteron will generate 2 separate 32 bit HT
+ * transactions for a 64 bit read, and we want to avoid unnecessary
+ * bus transactions.
+ */
+
+ /* This is the 64 bit group */
+
+ unsigned long pioavailshadow[6];
+ /* bitmap of send buffers available for the kernel to use with PIO. */
+ unsigned long pioavailkernel[6];
+ /* bitmap of send buffers which need to be disarmed. */
+ unsigned long pio_need_disarm[3];
+ /* bitmap of send buffers which are being written to. */
+ unsigned long pio_writing[3];
+ /* kr_revision shadow */
+ u64 revision;
+ /* Base GUID for device (from eeprom, network order) */
+ __be64 base_guid;
+
+ /*
+ * kr_sendpiobufbase value (chip offset of pio buffers), and the
+ * base of the 2KB buffer s(user processes only use 2K)
+ */
+ u64 piobufbase;
+ u32 pio2k_bufbase;
+
+ /* these are the "32 bit" regs */
+
+ /* number of GUIDs in the flash for this interface */
+ u32 nguid;
+ /*
+ * the following two are 32-bit bitmasks, but {test,clear,set}_bit
+ * all expect bit fields to be "unsigned long"
+ */
+ unsigned long rcvctrl; /* shadow per device rcvctrl */
+ unsigned long sendctrl; /* shadow per device sendctrl */
+
+ /* value we put in kr_rcvhdrcnt */
+ u32 rcvhdrcnt;
+ /* value we put in kr_rcvhdrsize */
+ u32 rcvhdrsize;
+ /* value we put in kr_rcvhdrentsize */
+ u32 rcvhdrentsize;
+ /* kr_ctxtcnt value */
+ u32 ctxtcnt;
+ /* kr_pagealign value */
+ u32 palign;
+ /* number of "2KB" PIO buffers */
+ u32 piobcnt2k;
+ /* size in bytes of "2KB" PIO buffers */
+ u32 piosize2k;
+ /* max usable size in dwords of a "2KB" PIO buffer before going "4KB" */
+ u32 piosize2kmax_dwords;
+ /* number of "4KB" PIO buffers */
+ u32 piobcnt4k;
+ /* size in bytes of "4KB" PIO buffers */
+ u32 piosize4k;
+ /* kr_rcvegrbase value */
+ u32 rcvegrbase;
+ /* kr_rcvtidbase value */
+ u32 rcvtidbase;
+ /* kr_rcvtidcnt value */
+ u32 rcvtidcnt;
+ /* kr_userregbase */
+ u32 uregbase;
+ /* shadow the control register contents */
+ u32 control;
+
+ /* chip address space used by 4k pio buffers */
+ u32 align4k;
+ /* size of each rcvegrbuffer */
+ u16 rcvegrbufsize;
+ /* log2 of above */
+ u16 rcvegrbufsize_shift;
+ /* localbus width (1, 2,4,8,16,32) from config space */
+ u32 lbus_width;
+ /* localbus speed in MHz */
+ u32 lbus_speed;
+ int unit; /* unit # of this chip */
+
+ /* start of CHIP_SPEC move to chipspec, but need code changes */
+ /* low and high portions of MSI capability/vector */
+ u32 msi_lo;
+ /* saved after PCIe init for restore after reset */
+ u32 msi_hi;
+ /* MSI data (vector) saved for restore */
+ u16 msi_data;
+ /* so we can rewrite it after a chip reset */
+ u32 pcibar0;
+ /* so we can rewrite it after a chip reset */
+ u32 pcibar1;
+ u64 rhdrhead_intr_off;
+
+ /*
+ * ASCII serial number, from flash, large enough for original
+ * all digit strings, and longer QLogic serial number format
+ */
+ u8 serial[16];
+ /* human readable board version */
+ u8 boardversion[96];
+ u8 lbus_info[32]; /* human readable localbus info */
+ /* chip major rev, from qib_revision */
+ u8 majrev;
+ /* chip minor rev, from qib_revision */
+ u8 minrev;
+
+ /* Misc small ints */
+ /* Number of physical ports available */
+ u8 num_pports;
+ /* Lowest context number which can be used by user processes */
+ u8 first_user_ctxt;
+ u8 n_krcv_queues;
+ u8 qpn_mask;
+ u8 skip_kctxt_mask;
+
+ u16 rhf_offset; /* offset of RHF within receive header entry */
+
+ /*
+ * GPIO pins for twsi-connected devices, and device code for eeprom
+ */
+ u8 gpio_sda_num;
+ u8 gpio_scl_num;
+ u8 twsi_eeprom_dev;
+ u8 board_atten;
+
+ /* Support (including locks) for EEPROM logging of errors and time */
+ /* control access to actual counters, timer */
+ spinlock_t eep_st_lock;
+ /* control high-level access to EEPROM */
+ struct mutex eep_lock;
+ uint64_t traffic_wds;
+ /* active time is kept in seconds, but logged in hours */
+ atomic_t active_time;
+ /* Below are nominal shadow of EEPROM, new since last EEPROM update */
+ uint8_t eep_st_errs[QIB_EEP_LOG_CNT];
+ uint8_t eep_st_new_errs[QIB_EEP_LOG_CNT];
+ uint16_t eep_hrs;
+ /*
+ * masks for which bits of errs, hwerrs that cause
+ * each of the counters to increment.
+ */
+ struct qib_eep_log_mask eep_st_masks[QIB_EEP_LOG_CNT];
+ struct qib_diag_client *diag_client;
+ spinlock_t qib_diag_trans_lock; /* protect diag observer ops */
+ struct diag_observer_list_elt *diag_observer_list;
+
+ u8 psxmitwait_supported;
+ /* cycle length of PS* counters in HW (in picoseconds) */
+ u16 psxmitwait_check_rate;
+ /* high volume overflow errors defered to tasklet */
+ struct tasklet_struct error_tasklet;
+ /* per device cq worker */
+ struct kthread_worker *worker;
+
+ int assigned_node_id; /* NUMA node closest to HCA */
+};
+
+/* hol_state values */
+#define QIB_HOL_UP 0
+#define QIB_HOL_INIT 1
+
+#define QIB_SDMA_SENDCTRL_OP_ENABLE (1U << 0)
+#define QIB_SDMA_SENDCTRL_OP_INTENABLE (1U << 1)
+#define QIB_SDMA_SENDCTRL_OP_HALT (1U << 2)
+#define QIB_SDMA_SENDCTRL_OP_CLEANUP (1U << 3)
+#define QIB_SDMA_SENDCTRL_OP_DRAIN (1U << 4)
+
+/* operation types for f_txchk_change() */
+#define TXCHK_CHG_TYPE_DIS1 3
+#define TXCHK_CHG_TYPE_ENAB1 2
+#define TXCHK_CHG_TYPE_KERN 1
+#define TXCHK_CHG_TYPE_USER 0
+
+#define QIB_CHASE_TIME msecs_to_jiffies(145)
+#define QIB_CHASE_DIS_TIME msecs_to_jiffies(160)
+
+/* Private data for file operations */
+struct qib_filedata {
+ struct qib_ctxtdata *rcd;
+ unsigned subctxt;
+ unsigned tidcursor;
+ struct qib_user_sdma_queue *pq;
+ int rec_cpu_num; /* for cpu affinity; -1 if none */
+};
+
+extern struct list_head qib_dev_list;
+extern spinlock_t qib_devs_lock;
+extern struct qib_devdata *qib_lookup(int unit);
+extern u32 qib_cpulist_count;
+extern unsigned long *qib_cpulist;
+
+extern unsigned qib_wc_pat;
+extern unsigned qib_cc_table_size;
+int qib_init(struct qib_devdata *, int);
+int init_chip_wc_pat(struct qib_devdata *dd, u32);
+int qib_enable_wc(struct qib_devdata *dd);
+void qib_disable_wc(struct qib_devdata *dd);
+int qib_count_units(int *npresentp, int *nupp);
+int qib_count_active_units(void);
+
+int qib_cdev_init(int minor, const char *name,
+ const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp);
+void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp);
+int qib_dev_init(void);
+void qib_dev_cleanup(void);
+
+int qib_diag_add(struct qib_devdata *);
+void qib_diag_remove(struct qib_devdata *);
+void qib_handle_e_ibstatuschanged(struct qib_pportdata *, u64);
+void qib_sdma_update_tail(struct qib_pportdata *, u16); /* hold sdma_lock */
+
+int qib_decode_err(struct qib_devdata *dd, char *buf, size_t blen, u64 err);
+void qib_bad_intrstatus(struct qib_devdata *);
+void qib_handle_urcv(struct qib_devdata *, u64);
+
+/* clean up any per-chip chip-specific stuff */
+void qib_chip_cleanup(struct qib_devdata *);
+/* clean up any chip type-specific stuff */
+void qib_chip_done(void);
+
+/* check to see if we have to force ordering for write combining */
+int qib_unordered_wc(void);
+void qib_pio_copy(void __iomem *to, const void *from, size_t count);
+
+void qib_disarm_piobufs(struct qib_devdata *, unsigned, unsigned);
+int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *);
+void qib_disarm_piobufs_set(struct qib_devdata *, unsigned long *, unsigned);
+void qib_cancel_sends(struct qib_pportdata *);
+
+int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
+int qib_setup_eagerbufs(struct qib_ctxtdata *);
+void qib_set_ctxtcnt(struct qib_devdata *);
+int qib_create_ctxts(struct qib_devdata *dd);
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
+int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
+void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
+
+u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
+int qib_reset_device(int);
+int qib_wait_linkstate(struct qib_pportdata *, u32, int);
+int qib_set_linkstate(struct qib_pportdata *, u8);
+int qib_set_mtu(struct qib_pportdata *, u16);
+int qib_set_lid(struct qib_pportdata *, u32, u8);
+void qib_hol_down(struct qib_pportdata *);
+void qib_hol_init(struct qib_pportdata *);
+void qib_hol_up(struct qib_pportdata *);
+void qib_hol_event(unsigned long);
+void qib_disable_after_error(struct qib_devdata *);
+int qib_set_uevent_bits(struct qib_pportdata *, const int);
+
+/* for use in system calls, where we want to know device type, etc. */
+#define ctxt_fp(fp) \
+ (((struct qib_filedata *)(fp)->private_data)->rcd)
+#define subctxt_fp(fp) \
+ (((struct qib_filedata *)(fp)->private_data)->subctxt)
+#define tidcursor_fp(fp) \
+ (((struct qib_filedata *)(fp)->private_data)->tidcursor)
+#define user_sdma_queue_fp(fp) \
+ (((struct qib_filedata *)(fp)->private_data)->pq)
+
+static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd)
+{
+ return ppd->dd;
+}
+
+static inline struct qib_devdata *dd_from_dev(struct qib_ibdev *dev)
+{
+ return container_of(dev, struct qib_devdata, verbs_dev);
+}
+
+static inline struct qib_devdata *dd_from_ibdev(struct ib_device *ibdev)
+{
+ return dd_from_dev(to_idev(ibdev));
+}
+
+static inline struct qib_pportdata *ppd_from_ibp(struct qib_ibport *ibp)
+{
+ return container_of(ibp, struct qib_pportdata, ibport_data);
+}
+
+static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
+
+ WARN_ON(pidx >= dd->num_pports);
+ return &dd->pport[pidx].ibport_data;
+}
+
+/*
+ * values for dd->flags (_device_ related flags) and
+ */
+#define QIB_HAS_LINK_LATENCY 0x1 /* supports link latency (IB 1.2) */
+#define QIB_INITTED 0x2 /* chip and driver up and initted */
+#define QIB_DOING_RESET 0x4 /* in the middle of doing chip reset */
+#define QIB_PRESENT 0x8 /* chip accesses can be done */
+#define QIB_PIO_FLUSH_WC 0x10 /* Needs Write combining flush for PIO */
+#define QIB_HAS_THRESH_UPDATE 0x40
+#define QIB_HAS_SDMA_TIMEOUT 0x80
+#define QIB_USE_SPCL_TRIG 0x100 /* SpecialTrigger launch enabled */
+#define QIB_NODMA_RTAIL 0x200 /* rcvhdrtail register DMA enabled */
+#define QIB_HAS_INTX 0x800 /* Supports INTx interrupts */
+#define QIB_HAS_SEND_DMA 0x1000 /* Supports Send DMA */
+#define QIB_HAS_VLSUPP 0x2000 /* Supports multiple VLs; PBC different */
+#define QIB_HAS_HDRSUPP 0x4000 /* Supports header suppression */
+#define QIB_BADINTR 0x8000 /* severe interrupt problems */
+#define QIB_DCA_ENABLED 0x10000 /* Direct Cache Access enabled */
+#define QIB_HAS_QSFP 0x20000 /* device (card instance) has QSFP */
+
+/*
+ * values for ppd->lflags (_ib_port_ related flags)
+ */
+#define QIBL_LINKV 0x1 /* IB link state valid */
+#define QIBL_LINKDOWN 0x8 /* IB link is down */
+#define QIBL_LINKINIT 0x10 /* IB link level is up */
+#define QIBL_LINKARMED 0x20 /* IB link is ARMED */
+#define QIBL_LINKACTIVE 0x40 /* IB link is ACTIVE */
+/* leave a gap for more IB-link state */
+#define QIBL_IB_AUTONEG_INPROG 0x1000 /* non-IBTA DDR/QDR neg active */
+#define QIBL_IB_AUTONEG_FAILED 0x2000 /* non-IBTA DDR/QDR neg failed */
+#define QIBL_IB_LINK_DISABLED 0x4000 /* Linkdown-disable forced,
+ * Do not try to bring up */
+#define QIBL_IB_FORCE_NOTIFY 0x8000 /* force notify on next ib change */
+
+/* IB dword length mask in PBC (lower 11 bits); same for all chips */
+#define QIB_PBC_LENGTH_MASK ((1 << 11) - 1)
+
+
+/* ctxt_flag bit offsets */
+ /* waiting for a packet to arrive */
+#define QIB_CTXT_WAITING_RCV 2
+ /* master has not finished initializing */
+#define QIB_CTXT_MASTER_UNINIT 4
+ /* waiting for an urgent packet to arrive */
+#define QIB_CTXT_WAITING_URG 5
+
+/* free up any allocated data at closes */
+void qib_free_data(struct qib_ctxtdata *dd);
+void qib_chg_pioavailkernel(struct qib_devdata *, unsigned, unsigned,
+ u32, struct qib_ctxtdata *);
+struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *,
+ const struct pci_device_id *);
+struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *,
+ const struct pci_device_id *);
+struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *,
+ const struct pci_device_id *);
+void qib_free_devdata(struct qib_devdata *);
+struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra);
+
+#define QIB_TWSI_NO_DEV 0xFF
+/* Below qib_twsi_ functions must be called with eep_lock held */
+int qib_twsi_reset(struct qib_devdata *dd);
+int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, void *buffer,
+ int len);
+int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
+ const void *buffer, int len);
+void qib_get_eeprom_info(struct qib_devdata *);
+int qib_update_eeprom_log(struct qib_devdata *dd);
+void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr);
+void qib_dump_lookup_output_queue(struct qib_devdata *);
+void qib_force_pio_avail_update(struct qib_devdata *);
+void qib_clear_symerror_on_linkup(unsigned long opaque);
+
+/*
+ * Set LED override, only the two LSBs have "public" meaning, but
+ * any non-zero value substitutes them for the Link and LinkTrain
+ * LED states.
+ */
+#define QIB_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
+#define QIB_LED_LOG 2 /* Logical (link) YELLOW LED */
+void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val);
+
+/* send dma routines */
+int qib_setup_sdma(struct qib_pportdata *);
+void qib_teardown_sdma(struct qib_pportdata *);
+void __qib_sdma_intr(struct qib_pportdata *);
+void qib_sdma_intr(struct qib_pportdata *);
+void qib_user_sdma_send_desc(struct qib_pportdata *dd,
+ struct list_head *pktlist);
+int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
+ u32, struct qib_verbs_txreq *);
+/* ppd->sdma_lock should be locked before calling this. */
+int qib_sdma_make_progress(struct qib_pportdata *dd);
+
+static inline int qib_sdma_empty(const struct qib_pportdata *ppd)
+{
+ return ppd->sdma_descq_added == ppd->sdma_descq_removed;
+}
+
+/* must be called under qib_sdma_lock */
+static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd)
+{
+ return ppd->sdma_descq_cnt -
+ (ppd->sdma_descq_added - ppd->sdma_descq_removed) - 1;
+}
+
+static inline int __qib_sdma_running(struct qib_pportdata *ppd)
+{
+ return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
+}
+int qib_sdma_running(struct qib_pportdata *);
+void dump_sdma_state(struct qib_pportdata *ppd);
+void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
+void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
+
+/*
+ * number of words used for protocol header if not set by qib_userinit();
+ */
+#define QIB_DFLT_RCVHDRSIZE 9
+
+/*
+ * We need to be able to handle an IB header of at least 24 dwords.
+ * We need the rcvhdrq large enough to handle largest IB header, but
+ * still have room for a 2KB MTU standard IB packet.
+ * Additionally, some processor/memory controller combinations
+ * benefit quite strongly from having the DMA'ed data be cacheline
+ * aligned and a cacheline multiple, so we set the size to 32 dwords
+ * (2 64-byte primary cachelines for pretty much all processors of
+ * interest). The alignment hurts nothing, other than using somewhat
+ * more memory.
+ */
+#define QIB_RCVHDR_ENTSIZE 32
+
+int qib_get_user_pages(unsigned long, size_t, struct page **);
+void qib_release_user_pages(struct page **, size_t);
+int qib_eeprom_read(struct qib_devdata *, u8, void *, int);
+int qib_eeprom_write(struct qib_devdata *, u8, const void *, int);
+u32 __iomem *qib_getsendbuf_range(struct qib_devdata *, u32 *, u32, u32);
+void qib_sendbuf_done(struct qib_devdata *, unsigned);
+
+static inline void qib_clear_rcvhdrtail(const struct qib_ctxtdata *rcd)
+{
+ *((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL;
+}
+
+static inline u32 qib_get_rcvhdrtail(const struct qib_ctxtdata *rcd)
+{
+ /*
+ * volatile because it's a DMA target from the chip, routine is
+ * inlined, and don't want register caching or reordering.
+ */
+ return (u32) le64_to_cpu(
+ *((volatile __le64 *)rcd->rcvhdrtail_kvaddr)); /* DMA'ed */
+}
+
+static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd)
+{
+ const struct qib_devdata *dd = rcd->dd;
+ u32 hdrqtail;
+
+ if (dd->flags & QIB_NODMA_RTAIL) {
+ __le32 *rhf_addr;
+ u32 seq;
+
+ rhf_addr = (__le32 *) rcd->rcvhdrq +
+ rcd->head + dd->rhf_offset;
+ seq = qib_hdrget_seq(rhf_addr);
+ hdrqtail = rcd->head;
+ if (seq == rcd->seq_cnt)
+ hdrqtail++;
+ } else
+ hdrqtail = qib_get_rcvhdrtail(rcd);
+
+ return hdrqtail;
+}
+
+/*
+ * sysfs interface.
+ */
+
+extern const char ib_qib_version[];
+
+int qib_device_create(struct qib_devdata *);
+void qib_device_remove(struct qib_devdata *);
+
+int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
+ struct kobject *kobj);
+int qib_verbs_register_sysfs(struct qib_devdata *);
+void qib_verbs_unregister_sysfs(struct qib_devdata *);
+/* Hook for sysfs read of QSFP */
+extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len);
+
+int __init qib_init_qibfs(void);
+int __exit qib_exit_qibfs(void);
+
+int qibfs_add(struct qib_devdata *);
+int qibfs_remove(struct qib_devdata *);
+
+int qib_pcie_init(struct pci_dev *, const struct pci_device_id *);
+int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *,
+ const struct pci_device_id *);
+void qib_pcie_ddcleanup(struct qib_devdata *);
+int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *);
+int qib_reinit_intr(struct qib_devdata *);
+void qib_enable_intx(struct pci_dev *);
+void qib_nomsi(struct qib_devdata *);
+void qib_nomsix(struct qib_devdata *);
+void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
+void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
+/* interrupts for device */
+u64 qib_int_counter(struct qib_devdata *);
+/* interrupt for all devices */
+u64 qib_sps_ints(void);
+
+/*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
+ size_t, int);
+const char *qib_get_unit_name(int unit);
+
+/*
+ * Flush write combining store buffers (if present) and perform a write
+ * barrier.
+ */
+#if defined(CONFIG_X86_64)
+#define qib_flush_wc() asm volatile("sfence" : : : "memory")
+#else
+#define qib_flush_wc() wmb() /* no reorder around wc flush */
+#endif
+
+/* global module parameter variables */
+extern unsigned qib_ibmtu;
+extern ushort qib_cfgctxts;
+extern ushort qib_num_cfg_vls;
+extern ushort qib_mini_init; /* If set, do few (ideally 0) writes to chip */
+extern unsigned qib_n_krcv_queues;
+extern unsigned qib_sdma_fetch_arb;
+extern unsigned qib_compat_ddr_negotiate;
+extern int qib_special_trigger;
+extern unsigned qib_numa_aware;
+
+extern struct mutex qib_mutex;
+
+/* Number of seconds before our card status check... */
+#define STATUS_TIMEOUT 60
+
+#define QIB_DRV_NAME "ib_qib"
+#define QIB_USER_MINOR_BASE 0
+#define QIB_TRACE_MINOR 127
+#define QIB_DIAGPKT_MINOR 128
+#define QIB_DIAG_MINOR_BASE 129
+#define QIB_NMINORS 255
+
+#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
+#define PCI_VENDOR_ID_QLOGIC 0x1077
+#define PCI_DEVICE_ID_QLOGIC_IB_6120 0x10
+#define PCI_DEVICE_ID_QLOGIC_IB_7220 0x7220
+#define PCI_DEVICE_ID_QLOGIC_IB_7322 0x7322
+
+/*
+ * qib_early_err is used (only!) to print early errors before devdata is
+ * allocated, or when dd->pcidev may not be valid, and at the tail end of
+ * cleanup when devdata may have been freed, etc. qib_dev_porterr is
+ * the same as qib_dev_err, but is used when the message really needs
+ * the IB port# to be definitive as to what's happening..
+ * All of these go to the trace log, and the trace log entry is done
+ * first to avoid possible serial port delays from printk.
+ */
+#define qib_early_err(dev, fmt, ...) \
+ dev_err(dev, fmt, ##__VA_ARGS__)
+
+#define qib_dev_err(dd, fmt, ...) \
+ dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
+ qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
+
+#define qib_dev_warn(dd, fmt, ...) \
+ dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
+ qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
+
+#define qib_dev_porterr(dd, port, fmt, ...) \
+ dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
+ qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
+ ##__VA_ARGS__)
+
+#define qib_devinfo(pcidev, fmt, ...) \
+ dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__)
+
+/*
+ * this is used for formatting hw error messages...
+ */
+struct qib_hwerror_msgs {
+ u64 mask;
+ const char *msg;
+ size_t sz;
+};
+
+#define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b }
+
+/* in qib_intr.c... */
+void qib_format_hwerrors(u64 hwerrs,
+ const struct qib_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs, char *msg, size_t lmsg);
+#endif /* _QIB_KERNEL_H */
diff --git a/drivers/infiniband/hw/qib/qib_6120_regs.h b/drivers/infiniband/hw/qib/qib_6120_regs.h
new file mode 100644
index 00000000000..e16cb6f7de2
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_6120_regs.h
@@ -0,0 +1,977 @@
+/*
+ * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is mechanically generated from RTL. Any hand-edits will be lost! */
+
+#define QIB_6120_Revision_OFFS 0x0
+#define QIB_6120_Revision_R_Simulator_LSB 0x3F
+#define QIB_6120_Revision_R_Simulator_RMASK 0x1
+#define QIB_6120_Revision_Reserved_LSB 0x28
+#define QIB_6120_Revision_Reserved_RMASK 0x7FFFFF
+#define QIB_6120_Revision_BoardID_LSB 0x20
+#define QIB_6120_Revision_BoardID_RMASK 0xFF
+#define QIB_6120_Revision_R_SW_LSB 0x18
+#define QIB_6120_Revision_R_SW_RMASK 0xFF
+#define QIB_6120_Revision_R_Arch_LSB 0x10
+#define QIB_6120_Revision_R_Arch_RMASK 0xFF
+#define QIB_6120_Revision_R_ChipRevMajor_LSB 0x8
+#define QIB_6120_Revision_R_ChipRevMajor_RMASK 0xFF
+#define QIB_6120_Revision_R_ChipRevMinor_LSB 0x0
+#define QIB_6120_Revision_R_ChipRevMinor_RMASK 0xFF
+
+#define QIB_6120_Control_OFFS 0x8
+#define QIB_6120_Control_TxLatency_LSB 0x4
+#define QIB_6120_Control_TxLatency_RMASK 0x1
+#define QIB_6120_Control_PCIERetryBufDiagEn_LSB 0x3
+#define QIB_6120_Control_PCIERetryBufDiagEn_RMASK 0x1
+#define QIB_6120_Control_LinkEn_LSB 0x2
+#define QIB_6120_Control_LinkEn_RMASK 0x1
+#define QIB_6120_Control_FreezeMode_LSB 0x1
+#define QIB_6120_Control_FreezeMode_RMASK 0x1
+#define QIB_6120_Control_SyncReset_LSB 0x0
+#define QIB_6120_Control_SyncReset_RMASK 0x1
+
+#define QIB_6120_PageAlign_OFFS 0x10
+
+#define QIB_6120_PortCnt_OFFS 0x18
+
+#define QIB_6120_SendRegBase_OFFS 0x30
+
+#define QIB_6120_UserRegBase_OFFS 0x38
+
+#define QIB_6120_CntrRegBase_OFFS 0x40
+
+#define QIB_6120_Scratch_OFFS 0x48
+#define QIB_6120_Scratch_TopHalf_LSB 0x20
+#define QIB_6120_Scratch_TopHalf_RMASK 0xFFFFFFFF
+#define QIB_6120_Scratch_BottomHalf_LSB 0x0
+#define QIB_6120_Scratch_BottomHalf_RMASK 0xFFFFFFFF
+
+#define QIB_6120_IntBlocked_OFFS 0x60
+#define QIB_6120_IntBlocked_ErrorIntBlocked_LSB 0x1F
+#define QIB_6120_IntBlocked_ErrorIntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_PioSetIntBlocked_LSB 0x1E
+#define QIB_6120_IntBlocked_PioSetIntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_LSB 0x1D
+#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_assertGPIOIntBlocked_LSB 0x1C
+#define QIB_6120_IntBlocked_assertGPIOIntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_Reserved_LSB 0xF
+#define QIB_6120_IntBlocked_Reserved_RMASK 0x1FFF
+#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_LSB 0x10
+#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_LSB 0xF
+#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_LSB 0xE
+#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_LSB 0xD
+#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_LSB 0xC
+#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_Reserved1_LSB 0x5
+#define QIB_6120_IntBlocked_Reserved1_RMASK 0x7F
+#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_LSB 0x4
+#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_LSB 0x3
+#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_LSB 0x2
+#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_LSB 0x1
+#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_RMASK 0x1
+#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_LSB 0x0
+#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_RMASK 0x1
+
+#define QIB_6120_IntMask_OFFS 0x68
+#define QIB_6120_IntMask_ErrorIntMask_LSB 0x1F
+#define QIB_6120_IntMask_ErrorIntMask_RMASK 0x1
+#define QIB_6120_IntMask_PioSetIntMask_LSB 0x1E
+#define QIB_6120_IntMask_PioSetIntMask_RMASK 0x1
+#define QIB_6120_IntMask_PioBufAvailIntMask_LSB 0x1D
+#define QIB_6120_IntMask_PioBufAvailIntMask_RMASK 0x1
+#define QIB_6120_IntMask_assertGPIOIntMask_LSB 0x1C
+#define QIB_6120_IntMask_assertGPIOIntMask_RMASK 0x1
+#define QIB_6120_IntMask_Reserved_LSB 0x11
+#define QIB_6120_IntMask_Reserved_RMASK 0x7FF
+#define QIB_6120_IntMask_RcvAvail4IntMask_LSB 0x10
+#define QIB_6120_IntMask_RcvAvail4IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvAvail3IntMask_LSB 0xF
+#define QIB_6120_IntMask_RcvAvail3IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvAvail2IntMask_LSB 0xE
+#define QIB_6120_IntMask_RcvAvail2IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvAvail1IntMask_LSB 0xD
+#define QIB_6120_IntMask_RcvAvail1IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvAvail0IntMask_LSB 0xC
+#define QIB_6120_IntMask_RcvAvail0IntMask_RMASK 0x1
+#define QIB_6120_IntMask_Reserved1_LSB 0x5
+#define QIB_6120_IntMask_Reserved1_RMASK 0x7F
+#define QIB_6120_IntMask_RcvUrg4IntMask_LSB 0x4
+#define QIB_6120_IntMask_RcvUrg4IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvUrg3IntMask_LSB 0x3
+#define QIB_6120_IntMask_RcvUrg3IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvUrg2IntMask_LSB 0x2
+#define QIB_6120_IntMask_RcvUrg2IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvUrg1IntMask_LSB 0x1
+#define QIB_6120_IntMask_RcvUrg1IntMask_RMASK 0x1
+#define QIB_6120_IntMask_RcvUrg0IntMask_LSB 0x0
+#define QIB_6120_IntMask_RcvUrg0IntMask_RMASK 0x1
+
+#define QIB_6120_IntStatus_OFFS 0x70
+#define QIB_6120_IntStatus_Error_LSB 0x1F
+#define QIB_6120_IntStatus_Error_RMASK 0x1
+#define QIB_6120_IntStatus_PioSent_LSB 0x1E
+#define QIB_6120_IntStatus_PioSent_RMASK 0x1
+#define QIB_6120_IntStatus_PioBufAvail_LSB 0x1D
+#define QIB_6120_IntStatus_PioBufAvail_RMASK 0x1
+#define QIB_6120_IntStatus_assertGPIO_LSB 0x1C
+#define QIB_6120_IntStatus_assertGPIO_RMASK 0x1
+#define QIB_6120_IntStatus_Reserved_LSB 0xF
+#define QIB_6120_IntStatus_Reserved_RMASK 0x1FFF
+#define QIB_6120_IntStatus_RcvAvail4_LSB 0x10
+#define QIB_6120_IntStatus_RcvAvail4_RMASK 0x1
+#define QIB_6120_IntStatus_RcvAvail3_LSB 0xF
+#define QIB_6120_IntStatus_RcvAvail3_RMASK 0x1
+#define QIB_6120_IntStatus_RcvAvail2_LSB 0xE
+#define QIB_6120_IntStatus_RcvAvail2_RMASK 0x1
+#define QIB_6120_IntStatus_RcvAvail1_LSB 0xD
+#define QIB_6120_IntStatus_RcvAvail1_RMASK 0x1
+#define QIB_6120_IntStatus_RcvAvail0_LSB 0xC
+#define QIB_6120_IntStatus_RcvAvail0_RMASK 0x1
+#define QIB_6120_IntStatus_Reserved1_LSB 0x5
+#define QIB_6120_IntStatus_Reserved1_RMASK 0x7F
+#define QIB_6120_IntStatus_RcvUrg4_LSB 0x4
+#define QIB_6120_IntStatus_RcvUrg4_RMASK 0x1
+#define QIB_6120_IntStatus_RcvUrg3_LSB 0x3
+#define QIB_6120_IntStatus_RcvUrg3_RMASK 0x1
+#define QIB_6120_IntStatus_RcvUrg2_LSB 0x2
+#define QIB_6120_IntStatus_RcvUrg2_RMASK 0x1
+#define QIB_6120_IntStatus_RcvUrg1_LSB 0x1
+#define QIB_6120_IntStatus_RcvUrg1_RMASK 0x1
+#define QIB_6120_IntStatus_RcvUrg0_LSB 0x0
+#define QIB_6120_IntStatus_RcvUrg0_RMASK 0x1
+
+#define QIB_6120_IntClear_OFFS 0x78
+#define QIB_6120_IntClear_ErrorIntClear_LSB 0x1F
+#define QIB_6120_IntClear_ErrorIntClear_RMASK 0x1
+#define QIB_6120_IntClear_PioSetIntClear_LSB 0x1E
+#define QIB_6120_IntClear_PioSetIntClear_RMASK 0x1
+#define QIB_6120_IntClear_PioBufAvailIntClear_LSB 0x1D
+#define QIB_6120_IntClear_PioBufAvailIntClear_RMASK 0x1
+#define QIB_6120_IntClear_assertGPIOIntClear_LSB 0x1C
+#define QIB_6120_IntClear_assertGPIOIntClear_RMASK 0x1
+#define QIB_6120_IntClear_Reserved_LSB 0xF
+#define QIB_6120_IntClear_Reserved_RMASK 0x1FFF
+#define QIB_6120_IntClear_RcvAvail4IntClear_LSB 0x10
+#define QIB_6120_IntClear_RcvAvail4IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvAvail3IntClear_LSB 0xF
+#define QIB_6120_IntClear_RcvAvail3IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvAvail2IntClear_LSB 0xE
+#define QIB_6120_IntClear_RcvAvail2IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvAvail1IntClear_LSB 0xD
+#define QIB_6120_IntClear_RcvAvail1IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvAvail0IntClear_LSB 0xC
+#define QIB_6120_IntClear_RcvAvail0IntClear_RMASK 0x1
+#define QIB_6120_IntClear_Reserved1_LSB 0x5
+#define QIB_6120_IntClear_Reserved1_RMASK 0x7F
+#define QIB_6120_IntClear_RcvUrg4IntClear_LSB 0x4
+#define QIB_6120_IntClear_RcvUrg4IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvUrg3IntClear_LSB 0x3
+#define QIB_6120_IntClear_RcvUrg3IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvUrg2IntClear_LSB 0x2
+#define QIB_6120_IntClear_RcvUrg2IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvUrg1IntClear_LSB 0x1
+#define QIB_6120_IntClear_RcvUrg1IntClear_RMASK 0x1
+#define QIB_6120_IntClear_RcvUrg0IntClear_LSB 0x0
+#define QIB_6120_IntClear_RcvUrg0IntClear_RMASK 0x1
+
+#define QIB_6120_ErrMask_OFFS 0x80
+#define QIB_6120_ErrMask_Reserved_LSB 0x34
+#define QIB_6120_ErrMask_Reserved_RMASK 0xFFF
+#define QIB_6120_ErrMask_HardwareErrMask_LSB 0x33
+#define QIB_6120_ErrMask_HardwareErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_ResetNegatedMask_LSB 0x32
+#define QIB_6120_ErrMask_ResetNegatedMask_RMASK 0x1
+#define QIB_6120_ErrMask_InvalidAddrErrMask_LSB 0x31
+#define QIB_6120_ErrMask_InvalidAddrErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_IBStatusChangedMask_LSB 0x30
+#define QIB_6120_ErrMask_IBStatusChangedMask_RMASK 0x1
+#define QIB_6120_ErrMask_Reserved1_LSB 0x26
+#define QIB_6120_ErrMask_Reserved1_RMASK 0x3FF
+#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_LSB 0x25
+#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24
+#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_LSB 0x23
+#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_LSB 0x22
+#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21
+#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendPktLenErrMask_LSB 0x20
+#define QIB_6120_ErrMask_SendPktLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendUnderRunErrMask_LSB 0x1F
+#define QIB_6120_ErrMask_SendUnderRunErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendMaxPktLenErrMask_LSB 0x1E
+#define QIB_6120_ErrMask_SendMaxPktLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_SendMinPktLenErrMask_LSB 0x1D
+#define QIB_6120_ErrMask_SendMinPktLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_Reserved2_LSB 0x12
+#define QIB_6120_ErrMask_Reserved2_RMASK 0x7FF
+#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_LSB 0x11
+#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvHdrErrMask_LSB 0x10
+#define QIB_6120_ErrMask_RcvHdrErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvHdrLenErrMask_LSB 0xF
+#define QIB_6120_ErrMask_RcvHdrLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvBadTidErrMask_LSB 0xE
+#define QIB_6120_ErrMask_RcvBadTidErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvHdrFullErrMask_LSB 0xD
+#define QIB_6120_ErrMask_RcvHdrFullErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvEgrFullErrMask_LSB 0xC
+#define QIB_6120_ErrMask_RcvEgrFullErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvBadVersionErrMask_LSB 0xB
+#define QIB_6120_ErrMask_RcvBadVersionErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvIBFlowErrMask_LSB 0xA
+#define QIB_6120_ErrMask_RcvIBFlowErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvEBPErrMask_LSB 0x9
+#define QIB_6120_ErrMask_RcvEBPErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8
+#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7
+#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvShortPktLenErrMask_LSB 0x6
+#define QIB_6120_ErrMask_RcvShortPktLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvLongPktLenErrMask_LSB 0x5
+#define QIB_6120_ErrMask_RcvLongPktLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_LSB 0x4
+#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvMinPktLenErrMask_LSB 0x3
+#define QIB_6120_ErrMask_RcvMinPktLenErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvICRCErrMask_LSB 0x2
+#define QIB_6120_ErrMask_RcvICRCErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvVCRCErrMask_LSB 0x1
+#define QIB_6120_ErrMask_RcvVCRCErrMask_RMASK 0x1
+#define QIB_6120_ErrMask_RcvFormatErrMask_LSB 0x0
+#define QIB_6120_ErrMask_RcvFormatErrMask_RMASK 0x1
+
+#define QIB_6120_ErrStatus_OFFS 0x88
+#define QIB_6120_ErrStatus_Reserved_LSB 0x34
+#define QIB_6120_ErrStatus_Reserved_RMASK 0xFFF
+#define QIB_6120_ErrStatus_HardwareErr_LSB 0x33
+#define QIB_6120_ErrStatus_HardwareErr_RMASK 0x1
+#define QIB_6120_ErrStatus_ResetNegated_LSB 0x32
+#define QIB_6120_ErrStatus_ResetNegated_RMASK 0x1
+#define QIB_6120_ErrStatus_InvalidAddrErr_LSB 0x31
+#define QIB_6120_ErrStatus_InvalidAddrErr_RMASK 0x1
+#define QIB_6120_ErrStatus_IBStatusChanged_LSB 0x30
+#define QIB_6120_ErrStatus_IBStatusChanged_RMASK 0x1
+#define QIB_6120_ErrStatus_Reserved1_LSB 0x26
+#define QIB_6120_ErrStatus_Reserved1_RMASK 0x3FF
+#define QIB_6120_ErrStatus_SendUnsupportedVLErr_LSB 0x25
+#define QIB_6120_ErrStatus_SendUnsupportedVLErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24
+#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendPioArmLaunchErr_LSB 0x23
+#define QIB_6120_ErrStatus_SendPioArmLaunchErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendDroppedDataPktErr_LSB 0x22
+#define QIB_6120_ErrStatus_SendDroppedDataPktErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_LSB 0x21
+#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendPktLenErr_LSB 0x20
+#define QIB_6120_ErrStatus_SendPktLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendUnderRunErr_LSB 0x1F
+#define QIB_6120_ErrStatus_SendUnderRunErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendMaxPktLenErr_LSB 0x1E
+#define QIB_6120_ErrStatus_SendMaxPktLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_SendMinPktLenErr_LSB 0x1D
+#define QIB_6120_ErrStatus_SendMinPktLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_Reserved2_LSB 0x12
+#define QIB_6120_ErrStatus_Reserved2_RMASK 0x7FF
+#define QIB_6120_ErrStatus_RcvIBLostLinkErr_LSB 0x11
+#define QIB_6120_ErrStatus_RcvIBLostLinkErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvHdrErr_LSB 0x10
+#define QIB_6120_ErrStatus_RcvHdrErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvHdrLenErr_LSB 0xF
+#define QIB_6120_ErrStatus_RcvHdrLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvBadTidErr_LSB 0xE
+#define QIB_6120_ErrStatus_RcvBadTidErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvHdrFullErr_LSB 0xD
+#define QIB_6120_ErrStatus_RcvHdrFullErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvEgrFullErr_LSB 0xC
+#define QIB_6120_ErrStatus_RcvEgrFullErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvBadVersionErr_LSB 0xB
+#define QIB_6120_ErrStatus_RcvBadVersionErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvIBFlowErr_LSB 0xA
+#define QIB_6120_ErrStatus_RcvIBFlowErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvEBPErr_LSB 0x9
+#define QIB_6120_ErrStatus_RcvEBPErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_LSB 0x8
+#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_LSB 0x7
+#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvShortPktLenErr_LSB 0x6
+#define QIB_6120_ErrStatus_RcvShortPktLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvLongPktLenErr_LSB 0x5
+#define QIB_6120_ErrStatus_RcvLongPktLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvMaxPktLenErr_LSB 0x4
+#define QIB_6120_ErrStatus_RcvMaxPktLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvMinPktLenErr_LSB 0x3
+#define QIB_6120_ErrStatus_RcvMinPktLenErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvICRCErr_LSB 0x2
+#define QIB_6120_ErrStatus_RcvICRCErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvVCRCErr_LSB 0x1
+#define QIB_6120_ErrStatus_RcvVCRCErr_RMASK 0x1
+#define QIB_6120_ErrStatus_RcvFormatErr_LSB 0x0
+#define QIB_6120_ErrStatus_RcvFormatErr_RMASK 0x1
+
+#define QIB_6120_ErrClear_OFFS 0x90
+#define QIB_6120_ErrClear_Reserved_LSB 0x34
+#define QIB_6120_ErrClear_Reserved_RMASK 0xFFF
+#define QIB_6120_ErrClear_HardwareErrClear_LSB 0x33
+#define QIB_6120_ErrClear_HardwareErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_ResetNegatedClear_LSB 0x32
+#define QIB_6120_ErrClear_ResetNegatedClear_RMASK 0x1
+#define QIB_6120_ErrClear_InvalidAddrErrClear_LSB 0x31
+#define QIB_6120_ErrClear_InvalidAddrErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_IBStatusChangedClear_LSB 0x30
+#define QIB_6120_ErrClear_IBStatusChangedClear_RMASK 0x1
+#define QIB_6120_ErrClear_Reserved1_LSB 0x26
+#define QIB_6120_ErrClear_Reserved1_RMASK 0x3FF
+#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_LSB 0x25
+#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24
+#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_LSB 0x23
+#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_LSB 0x22
+#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21
+#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendPktLenErrClear_LSB 0x20
+#define QIB_6120_ErrClear_SendPktLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendUnderRunErrClear_LSB 0x1F
+#define QIB_6120_ErrClear_SendUnderRunErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendMaxPktLenErrClear_LSB 0x1E
+#define QIB_6120_ErrClear_SendMaxPktLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_SendMinPktLenErrClear_LSB 0x1D
+#define QIB_6120_ErrClear_SendMinPktLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_Reserved2_LSB 0x12
+#define QIB_6120_ErrClear_Reserved2_RMASK 0x7FF
+#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_LSB 0x11
+#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvHdrErrClear_LSB 0x10
+#define QIB_6120_ErrClear_RcvHdrErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvHdrLenErrClear_LSB 0xF
+#define QIB_6120_ErrClear_RcvHdrLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvBadTidErrClear_LSB 0xE
+#define QIB_6120_ErrClear_RcvBadTidErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvHdrFullErrClear_LSB 0xD
+#define QIB_6120_ErrClear_RcvHdrFullErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvEgrFullErrClear_LSB 0xC
+#define QIB_6120_ErrClear_RcvEgrFullErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvBadVersionErrClear_LSB 0xB
+#define QIB_6120_ErrClear_RcvBadVersionErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvIBFlowErrClear_LSB 0xA
+#define QIB_6120_ErrClear_RcvIBFlowErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvEBPErrClear_LSB 0x9
+#define QIB_6120_ErrClear_RcvEBPErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8
+#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7
+#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvShortPktLenErrClear_LSB 0x6
+#define QIB_6120_ErrClear_RcvShortPktLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvLongPktLenErrClear_LSB 0x5
+#define QIB_6120_ErrClear_RcvLongPktLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_LSB 0x4
+#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvMinPktLenErrClear_LSB 0x3
+#define QIB_6120_ErrClear_RcvMinPktLenErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvICRCErrClear_LSB 0x2
+#define QIB_6120_ErrClear_RcvICRCErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvVCRCErrClear_LSB 0x1
+#define QIB_6120_ErrClear_RcvVCRCErrClear_RMASK 0x1
+#define QIB_6120_ErrClear_RcvFormatErrClear_LSB 0x0
+#define QIB_6120_ErrClear_RcvFormatErrClear_RMASK 0x1
+
+#define QIB_6120_HwErrMask_OFFS 0x98
+#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F
+#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1
+#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E
+#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1
+#define QIB_6120_HwErrMask_Reserved_LSB 0x3D
+#define QIB_6120_HwErrMask_Reserved_RMASK 0x1
+#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C
+#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1
+#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x3B
+#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1
+#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x3A
+#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1
+#define QIB_6120_HwErrMask_Reserved1_LSB 0x39
+#define QIB_6120_HwErrMask_Reserved1_RMASK 0x1
+#define QIB_6120_HwErrMask_IBPLLrfSlipMask_LSB 0x38
+#define QIB_6120_HwErrMask_IBPLLrfSlipMask_RMASK 0x1
+#define QIB_6120_HwErrMask_IBPLLfbSlipMask_LSB 0x37
+#define QIB_6120_HwErrMask_IBPLLfbSlipMask_RMASK 0x1
+#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_LSB 0x36
+#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1
+#define QIB_6120_HwErrMask_Reserved2_LSB 0x33
+#define QIB_6120_HwErrMask_Reserved2_RMASK 0x7
+#define QIB_6120_HwErrMask_RXEMemParityErrMask_LSB 0x2C
+#define QIB_6120_HwErrMask_RXEMemParityErrMask_RMASK 0x7F
+#define QIB_6120_HwErrMask_TXEMemParityErrMask_LSB 0x28
+#define QIB_6120_HwErrMask_TXEMemParityErrMask_RMASK 0xF
+#define QIB_6120_HwErrMask_Reserved3_LSB 0x22
+#define QIB_6120_HwErrMask_Reserved3_RMASK 0x3F
+#define QIB_6120_HwErrMask_PCIeBusParityErrMask_LSB 0x1F
+#define QIB_6120_HwErrMask_PCIeBusParityErrMask_RMASK 0x7
+#define QIB_6120_HwErrMask_PcieCplTimeoutMask_LSB 0x1E
+#define QIB_6120_HwErrMask_PcieCplTimeoutMask_RMASK 0x1
+#define QIB_6120_HwErrMask_PoisonedTLPMask_LSB 0x1D
+#define QIB_6120_HwErrMask_PoisonedTLPMask_RMASK 0x1
+#define QIB_6120_HwErrMask_Reserved4_LSB 0x6
+#define QIB_6120_HwErrMask_Reserved4_RMASK 0x7FFFFF
+#define QIB_6120_HwErrMask_PCIeMemParityErrMask_LSB 0x0
+#define QIB_6120_HwErrMask_PCIeMemParityErrMask_RMASK 0x3F
+
+#define QIB_6120_HwErrStatus_OFFS 0xA0
+#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F
+#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1
+#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E
+#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1
+#define QIB_6120_HwErrStatus_Reserved_LSB 0x3D
+#define QIB_6120_HwErrStatus_Reserved_RMASK 0x1
+#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C
+#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1
+#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x3B
+#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1
+#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x3A
+#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1
+#define QIB_6120_HwErrStatus_Reserved1_LSB 0x39
+#define QIB_6120_HwErrStatus_Reserved1_RMASK 0x1
+#define QIB_6120_HwErrStatus_IBPLLrfSlip_LSB 0x38
+#define QIB_6120_HwErrStatus_IBPLLrfSlip_RMASK 0x1
+#define QIB_6120_HwErrStatus_IBPLLfbSlip_LSB 0x37
+#define QIB_6120_HwErrStatus_IBPLLfbSlip_RMASK 0x1
+#define QIB_6120_HwErrStatus_PowerOnBISTFailed_LSB 0x36
+#define QIB_6120_HwErrStatus_PowerOnBISTFailed_RMASK 0x1
+#define QIB_6120_HwErrStatus_Reserved2_LSB 0x33
+#define QIB_6120_HwErrStatus_Reserved2_RMASK 0x7
+#define QIB_6120_HwErrStatus_RXEMemParity_LSB 0x2C
+#define QIB_6120_HwErrStatus_RXEMemParity_RMASK 0x7F
+#define QIB_6120_HwErrStatus_TXEMemParity_LSB 0x28
+#define QIB_6120_HwErrStatus_TXEMemParity_RMASK 0xF
+#define QIB_6120_HwErrStatus_Reserved3_LSB 0x22
+#define QIB_6120_HwErrStatus_Reserved3_RMASK 0x3F
+#define QIB_6120_HwErrStatus_PCIeBusParity_LSB 0x1F
+#define QIB_6120_HwErrStatus_PCIeBusParity_RMASK 0x7
+#define QIB_6120_HwErrStatus_PcieCplTimeout_LSB 0x1E
+#define QIB_6120_HwErrStatus_PcieCplTimeout_RMASK 0x1
+#define QIB_6120_HwErrStatus_PoisenedTLP_LSB 0x1D
+#define QIB_6120_HwErrStatus_PoisenedTLP_RMASK 0x1
+#define QIB_6120_HwErrStatus_Reserved4_LSB 0x6
+#define QIB_6120_HwErrStatus_Reserved4_RMASK 0x7FFFFF
+#define QIB_6120_HwErrStatus_PCIeMemParity_LSB 0x0
+#define QIB_6120_HwErrStatus_PCIeMemParity_RMASK 0x3F
+
+#define QIB_6120_HwErrClear_OFFS 0xA8
+#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F
+#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1
+#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E
+#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1
+#define QIB_6120_HwErrClear_Reserved_LSB 0x3D
+#define QIB_6120_HwErrClear_Reserved_RMASK 0x1
+#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C
+#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1
+#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x3B
+#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1
+#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x3A
+#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1
+#define QIB_6120_HwErrClear_Reserved1_LSB 0x39
+#define QIB_6120_HwErrClear_Reserved1_RMASK 0x1
+#define QIB_6120_HwErrClear_IBPLLrfSlipClear_LSB 0x38
+#define QIB_6120_HwErrClear_IBPLLrfSlipClear_RMASK 0x1
+#define QIB_6120_HwErrClear_IBPLLfbSlipClear_LSB 0x37
+#define QIB_6120_HwErrClear_IBPLLfbSlipClear_RMASK 0x1
+#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_LSB 0x36
+#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1
+#define QIB_6120_HwErrClear_Reserved2_LSB 0x33
+#define QIB_6120_HwErrClear_Reserved2_RMASK 0x7
+#define QIB_6120_HwErrClear_RXEMemParityClear_LSB 0x2C
+#define QIB_6120_HwErrClear_RXEMemParityClear_RMASK 0x7F
+#define QIB_6120_HwErrClear_TXEMemParityClear_LSB 0x28
+#define QIB_6120_HwErrClear_TXEMemParityClear_RMASK 0xF
+#define QIB_6120_HwErrClear_Reserved3_LSB 0x22
+#define QIB_6120_HwErrClear_Reserved3_RMASK 0x3F
+#define QIB_6120_HwErrClear_PCIeBusParityClr_LSB 0x1F
+#define QIB_6120_HwErrClear_PCIeBusParityClr_RMASK 0x7
+#define QIB_6120_HwErrClear_PcieCplTimeoutClear_LSB 0x1E
+#define QIB_6120_HwErrClear_PcieCplTimeoutClear_RMASK 0x1
+#define QIB_6120_HwErrClear_PoisonedTLPClear_LSB 0x1D
+#define QIB_6120_HwErrClear_PoisonedTLPClear_RMASK 0x1
+#define QIB_6120_HwErrClear_Reserved4_LSB 0x6
+#define QIB_6120_HwErrClear_Reserved4_RMASK 0x7FFFFF
+#define QIB_6120_HwErrClear_PCIeMemParityClr_LSB 0x0
+#define QIB_6120_HwErrClear_PCIeMemParityClr_RMASK 0x3F
+
+#define QIB_6120_HwDiagCtrl_OFFS 0xB0
+#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F
+#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1
+#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E
+#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1
+#define QIB_6120_HwDiagCtrl_CounterWrEnable_LSB 0x3D
+#define QIB_6120_HwDiagCtrl_CounterWrEnable_RMASK 0x1
+#define QIB_6120_HwDiagCtrl_CounterDisable_LSB 0x3C
+#define QIB_6120_HwDiagCtrl_CounterDisable_RMASK 0x1
+#define QIB_6120_HwDiagCtrl_Reserved_LSB 0x33
+#define QIB_6120_HwDiagCtrl_Reserved_RMASK 0x1FF
+#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C
+#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F
+#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28
+#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF
+#define QIB_6120_HwDiagCtrl_Reserved1_LSB 0x23
+#define QIB_6120_HwDiagCtrl_Reserved1_RMASK 0x1F
+#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F
+#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF
+#define QIB_6120_HwDiagCtrl_Reserved2_LSB 0x6
+#define QIB_6120_HwDiagCtrl_Reserved2_RMASK 0x1FFFFFF
+#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_LSB 0x0
+#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_RMASK 0x3F
+
+#define QIB_6120_IBCStatus_OFFS 0xC0
+#define QIB_6120_IBCStatus_TxCreditOk_LSB 0x1F
+#define QIB_6120_IBCStatus_TxCreditOk_RMASK 0x1
+#define QIB_6120_IBCStatus_TxReady_LSB 0x1E
+#define QIB_6120_IBCStatus_TxReady_RMASK 0x1
+#define QIB_6120_IBCStatus_Reserved_LSB 0x7
+#define QIB_6120_IBCStatus_Reserved_RMASK 0x7FFFFF
+#define QIB_6120_IBCStatus_LinkState_LSB 0x4
+#define QIB_6120_IBCStatus_LinkState_RMASK 0x7
+#define QIB_6120_IBCStatus_LinkTrainingState_LSB 0x0
+#define QIB_6120_IBCStatus_LinkTrainingState_RMASK 0xF
+
+#define QIB_6120_IBCCtrl_OFFS 0xC8
+#define QIB_6120_IBCCtrl_Loopback_LSB 0x3F
+#define QIB_6120_IBCCtrl_Loopback_RMASK 0x1
+#define QIB_6120_IBCCtrl_LinkDownDefaultState_LSB 0x3E
+#define QIB_6120_IBCCtrl_LinkDownDefaultState_RMASK 0x1
+#define QIB_6120_IBCCtrl_Reserved_LSB 0x2B
+#define QIB_6120_IBCCtrl_Reserved_RMASK 0x7FFFF
+#define QIB_6120_IBCCtrl_CreditScale_LSB 0x28
+#define QIB_6120_IBCCtrl_CreditScale_RMASK 0x7
+#define QIB_6120_IBCCtrl_OverrunThreshold_LSB 0x24
+#define QIB_6120_IBCCtrl_OverrunThreshold_RMASK 0xF
+#define QIB_6120_IBCCtrl_PhyerrThreshold_LSB 0x20
+#define QIB_6120_IBCCtrl_PhyerrThreshold_RMASK 0xF
+#define QIB_6120_IBCCtrl_Reserved1_LSB 0x1F
+#define QIB_6120_IBCCtrl_Reserved1_RMASK 0x1
+#define QIB_6120_IBCCtrl_MaxPktLen_LSB 0x14
+#define QIB_6120_IBCCtrl_MaxPktLen_RMASK 0x7FF
+#define QIB_6120_IBCCtrl_LinkCmd_LSB 0x12
+#define QIB_6120_IBCCtrl_LinkCmd_RMASK 0x3
+#define QIB_6120_IBCCtrl_LinkInitCmd_LSB 0x10
+#define QIB_6120_IBCCtrl_LinkInitCmd_RMASK 0x3
+#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_LSB 0x8
+#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF
+#define QIB_6120_IBCCtrl_FlowCtrlPeriod_LSB 0x0
+#define QIB_6120_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF
+
+#define QIB_6120_EXTStatus_OFFS 0xD0
+#define QIB_6120_EXTStatus_GPIOIn_LSB 0x30
+#define QIB_6120_EXTStatus_GPIOIn_RMASK 0xFFFF
+#define QIB_6120_EXTStatus_Reserved_LSB 0x20
+#define QIB_6120_EXTStatus_Reserved_RMASK 0xFFFF
+#define QIB_6120_EXTStatus_Reserved1_LSB 0x10
+#define QIB_6120_EXTStatus_Reserved1_RMASK 0xFFFF
+#define QIB_6120_EXTStatus_MemBISTFoundErr_LSB 0xF
+#define QIB_6120_EXTStatus_MemBISTFoundErr_RMASK 0x1
+#define QIB_6120_EXTStatus_MemBISTEndTest_LSB 0xE
+#define QIB_6120_EXTStatus_MemBISTEndTest_RMASK 0x1
+#define QIB_6120_EXTStatus_Reserved2_LSB 0x0
+#define QIB_6120_EXTStatus_Reserved2_RMASK 0x3FFF
+
+#define QIB_6120_EXTCtrl_OFFS 0xD8
+#define QIB_6120_EXTCtrl_GPIOOe_LSB 0x30
+#define QIB_6120_EXTCtrl_GPIOOe_RMASK 0xFFFF
+#define QIB_6120_EXTCtrl_GPIOInvert_LSB 0x20
+#define QIB_6120_EXTCtrl_GPIOInvert_RMASK 0xFFFF
+#define QIB_6120_EXTCtrl_Reserved_LSB 0x4
+#define QIB_6120_EXTCtrl_Reserved_RMASK 0xFFFFFFF
+#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_LSB 0x3
+#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1
+#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_LSB 0x2
+#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1
+#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_LSB 0x1
+#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1
+#define QIB_6120_EXTCtrl_LEDGblErrRedOff_LSB 0x0
+#define QIB_6120_EXTCtrl_LEDGblErrRedOff_RMASK 0x1
+
+#define QIB_6120_GPIOOut_OFFS 0xE0
+
+#define QIB_6120_GPIOMask_OFFS 0xE8
+
+#define QIB_6120_GPIOStatus_OFFS 0xF0
+
+#define QIB_6120_GPIOClear_OFFS 0xF8
+
+#define QIB_6120_RcvCtrl_OFFS 0x100
+#define QIB_6120_RcvCtrl_TailUpd_LSB 0x1F
+#define QIB_6120_RcvCtrl_TailUpd_RMASK 0x1
+#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_LSB 0x1E
+#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1
+#define QIB_6120_RcvCtrl_Reserved_LSB 0x15
+#define QIB_6120_RcvCtrl_Reserved_RMASK 0x1FF
+#define QIB_6120_RcvCtrl_IntrAvail_LSB 0x10
+#define QIB_6120_RcvCtrl_IntrAvail_RMASK 0x1F
+#define QIB_6120_RcvCtrl_Reserved1_LSB 0x9
+#define QIB_6120_RcvCtrl_Reserved1_RMASK 0x7F
+#define QIB_6120_RcvCtrl_Reserved2_LSB 0x5
+#define QIB_6120_RcvCtrl_Reserved2_RMASK 0xF
+#define QIB_6120_RcvCtrl_PortEnable_LSB 0x0
+#define QIB_6120_RcvCtrl_PortEnable_RMASK 0x1F
+
+#define QIB_6120_RcvBTHQP_OFFS 0x108
+#define QIB_6120_RcvBTHQP_BTHQP_Mask_LSB 0x1E
+#define QIB_6120_RcvBTHQP_BTHQP_Mask_RMASK 0x3
+#define QIB_6120_RcvBTHQP_Reserved_LSB 0x18
+#define QIB_6120_RcvBTHQP_Reserved_RMASK 0x3F
+#define QIB_6120_RcvBTHQP_RcvBTHQP_LSB 0x0
+#define QIB_6120_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF
+
+#define QIB_6120_RcvHdrSize_OFFS 0x110
+
+#define QIB_6120_RcvHdrCnt_OFFS 0x118
+
+#define QIB_6120_RcvHdrEntSize_OFFS 0x120
+
+#define QIB_6120_RcvTIDBase_OFFS 0x128
+
+#define QIB_6120_RcvTIDCnt_OFFS 0x130
+
+#define QIB_6120_RcvEgrBase_OFFS 0x138
+
+#define QIB_6120_RcvEgrCnt_OFFS 0x140
+
+#define QIB_6120_RcvBufBase_OFFS 0x148
+
+#define QIB_6120_RcvBufSize_OFFS 0x150
+
+#define QIB_6120_RxIntMemBase_OFFS 0x158
+
+#define QIB_6120_RxIntMemSize_OFFS 0x160
+
+#define QIB_6120_RcvPartitionKey_OFFS 0x168
+
+#define QIB_6120_RcvPktLEDCnt_OFFS 0x178
+#define QIB_6120_RcvPktLEDCnt_ONperiod_LSB 0x20
+#define QIB_6120_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF
+#define QIB_6120_RcvPktLEDCnt_OFFperiod_LSB 0x0
+#define QIB_6120_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF
+
+#define QIB_6120_SendCtrl_OFFS 0x1C0
+#define QIB_6120_SendCtrl_Disarm_LSB 0x1F
+#define QIB_6120_SendCtrl_Disarm_RMASK 0x1
+#define QIB_6120_SendCtrl_Reserved_LSB 0x17
+#define QIB_6120_SendCtrl_Reserved_RMASK 0xFF
+#define QIB_6120_SendCtrl_DisarmPIOBuf_LSB 0x10
+#define QIB_6120_SendCtrl_DisarmPIOBuf_RMASK 0x7F
+#define QIB_6120_SendCtrl_Reserved1_LSB 0x4
+#define QIB_6120_SendCtrl_Reserved1_RMASK 0xFFF
+#define QIB_6120_SendCtrl_PIOEnable_LSB 0x3
+#define QIB_6120_SendCtrl_PIOEnable_RMASK 0x1
+#define QIB_6120_SendCtrl_PIOBufAvailUpd_LSB 0x2
+#define QIB_6120_SendCtrl_PIOBufAvailUpd_RMASK 0x1
+#define QIB_6120_SendCtrl_PIOIntBufAvail_LSB 0x1
+#define QIB_6120_SendCtrl_PIOIntBufAvail_RMASK 0x1
+#define QIB_6120_SendCtrl_Abort_LSB 0x0
+#define QIB_6120_SendCtrl_Abort_RMASK 0x1
+
+#define QIB_6120_SendPIOBufBase_OFFS 0x1C8
+#define QIB_6120_SendPIOBufBase_Reserved_LSB 0x35
+#define QIB_6120_SendPIOBufBase_Reserved_RMASK 0x7FF
+#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_LSB 0x20
+#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF
+#define QIB_6120_SendPIOBufBase_Reserved1_LSB 0x15
+#define QIB_6120_SendPIOBufBase_Reserved1_RMASK 0x7FF
+#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_LSB 0x0
+#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF
+
+#define QIB_6120_SendPIOSize_OFFS 0x1D0
+#define QIB_6120_SendPIOSize_Reserved_LSB 0x2D
+#define QIB_6120_SendPIOSize_Reserved_RMASK 0xFFFFF
+#define QIB_6120_SendPIOSize_Size_LargePIO_LSB 0x20
+#define QIB_6120_SendPIOSize_Size_LargePIO_RMASK 0x1FFF
+#define QIB_6120_SendPIOSize_Reserved1_LSB 0xC
+#define QIB_6120_SendPIOSize_Reserved1_RMASK 0xFFFFF
+#define QIB_6120_SendPIOSize_Size_SmallPIO_LSB 0x0
+#define QIB_6120_SendPIOSize_Size_SmallPIO_RMASK 0xFFF
+
+#define QIB_6120_SendPIOBufCnt_OFFS 0x1D8
+#define QIB_6120_SendPIOBufCnt_Reserved_LSB 0x24
+#define QIB_6120_SendPIOBufCnt_Reserved_RMASK 0xFFFFFFF
+#define QIB_6120_SendPIOBufCnt_Num_LargePIO_LSB 0x20
+#define QIB_6120_SendPIOBufCnt_Num_LargePIO_RMASK 0xF
+#define QIB_6120_SendPIOBufCnt_Reserved1_LSB 0x9
+#define QIB_6120_SendPIOBufCnt_Reserved1_RMASK 0x7FFFFF
+#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_LSB 0x0
+#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_RMASK 0x1FF
+
+#define QIB_6120_SendPIOAvailAddr_OFFS 0x1E0
+#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_LSB 0x6
+#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_RMASK 0x3FFFFFFFF
+#define QIB_6120_SendPIOAvailAddr_Reserved_LSB 0x0
+#define QIB_6120_SendPIOAvailAddr_Reserved_RMASK 0x3F
+
+#define QIB_6120_SendBufErr0_OFFS 0x240
+#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_LSB 0x0
+#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_RMASK 0x0
+
+#define QIB_6120_RcvHdrAddr0_OFFS 0x280
+#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2
+#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF
+#define QIB_6120_RcvHdrAddr0_Reserved_LSB 0x0
+#define QIB_6120_RcvHdrAddr0_Reserved_RMASK 0x3
+
+#define QIB_6120_RcvHdrTailAddr0_OFFS 0x300
+#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2
+#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF
+#define QIB_6120_RcvHdrTailAddr0_Reserved_LSB 0x0
+#define QIB_6120_RcvHdrTailAddr0_Reserved_RMASK 0x3
+
+#define QIB_6120_SerdesCfg0_OFFS 0x3C0
+#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_LSB 0x3F
+#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_RMASK 0x1
+#define QIB_6120_SerdesCfg0_Reserved_LSB 0x38
+#define QIB_6120_SerdesCfg0_Reserved_RMASK 0x7F
+#define QIB_6120_SerdesCfg0_RxEqCtl_LSB 0x36
+#define QIB_6120_SerdesCfg0_RxEqCtl_RMASK 0x3
+#define QIB_6120_SerdesCfg0_TxTermAdj_LSB 0x34
+#define QIB_6120_SerdesCfg0_TxTermAdj_RMASK 0x3
+#define QIB_6120_SerdesCfg0_RxTermAdj_LSB 0x32
+#define QIB_6120_SerdesCfg0_RxTermAdj_RMASK 0x3
+#define QIB_6120_SerdesCfg0_TermAdj1_LSB 0x31
+#define QIB_6120_SerdesCfg0_TermAdj1_RMASK 0x1
+#define QIB_6120_SerdesCfg0_TermAdj0_LSB 0x30
+#define QIB_6120_SerdesCfg0_TermAdj0_RMASK 0x1
+#define QIB_6120_SerdesCfg0_LPBKA_LSB 0x2F
+#define QIB_6120_SerdesCfg0_LPBKA_RMASK 0x1
+#define QIB_6120_SerdesCfg0_LPBKB_LSB 0x2E
+#define QIB_6120_SerdesCfg0_LPBKB_RMASK 0x1
+#define QIB_6120_SerdesCfg0_LPBKC_LSB 0x2D
+#define QIB_6120_SerdesCfg0_LPBKC_RMASK 0x1
+#define QIB_6120_SerdesCfg0_LPBKD_LSB 0x2C
+#define QIB_6120_SerdesCfg0_LPBKD_RMASK 0x1
+#define QIB_6120_SerdesCfg0_PW_LSB 0x2B
+#define QIB_6120_SerdesCfg0_PW_RMASK 0x1
+#define QIB_6120_SerdesCfg0_RefSel_LSB 0x29
+#define QIB_6120_SerdesCfg0_RefSel_RMASK 0x3
+#define QIB_6120_SerdesCfg0_ParReset_LSB 0x28
+#define QIB_6120_SerdesCfg0_ParReset_RMASK 0x1
+#define QIB_6120_SerdesCfg0_ParLPBK_LSB 0x27
+#define QIB_6120_SerdesCfg0_ParLPBK_RMASK 0x1
+#define QIB_6120_SerdesCfg0_OffsetEn_LSB 0x26
+#define QIB_6120_SerdesCfg0_OffsetEn_RMASK 0x1
+#define QIB_6120_SerdesCfg0_Offset_LSB 0x1E
+#define QIB_6120_SerdesCfg0_Offset_RMASK 0xFF
+#define QIB_6120_SerdesCfg0_L2PwrDn_LSB 0x1D
+#define QIB_6120_SerdesCfg0_L2PwrDn_RMASK 0x1
+#define QIB_6120_SerdesCfg0_ResetPLL_LSB 0x1C
+#define QIB_6120_SerdesCfg0_ResetPLL_RMASK 0x1
+#define QIB_6120_SerdesCfg0_RxTermEnX_LSB 0x18
+#define QIB_6120_SerdesCfg0_RxTermEnX_RMASK 0xF
+#define QIB_6120_SerdesCfg0_BeaconTxEnX_LSB 0x14
+#define QIB_6120_SerdesCfg0_BeaconTxEnX_RMASK 0xF
+#define QIB_6120_SerdesCfg0_RxDetEnX_LSB 0x10
+#define QIB_6120_SerdesCfg0_RxDetEnX_RMASK 0xF
+#define QIB_6120_SerdesCfg0_TxIdeEnX_LSB 0xC
+#define QIB_6120_SerdesCfg0_TxIdeEnX_RMASK 0xF
+#define QIB_6120_SerdesCfg0_RxIdleEnX_LSB 0x8
+#define QIB_6120_SerdesCfg0_RxIdleEnX_RMASK 0xF
+#define QIB_6120_SerdesCfg0_L1PwrDnA_LSB 0x7
+#define QIB_6120_SerdesCfg0_L1PwrDnA_RMASK 0x1
+#define QIB_6120_SerdesCfg0_L1PwrDnB_LSB 0x6
+#define QIB_6120_SerdesCfg0_L1PwrDnB_RMASK 0x1
+#define QIB_6120_SerdesCfg0_L1PwrDnC_LSB 0x5
+#define QIB_6120_SerdesCfg0_L1PwrDnC_RMASK 0x1
+#define QIB_6120_SerdesCfg0_L1PwrDnD_LSB 0x4
+#define QIB_6120_SerdesCfg0_L1PwrDnD_RMASK 0x1
+#define QIB_6120_SerdesCfg0_ResetA_LSB 0x3
+#define QIB_6120_SerdesCfg0_ResetA_RMASK 0x1
+#define QIB_6120_SerdesCfg0_ResetB_LSB 0x2
+#define QIB_6120_SerdesCfg0_ResetB_RMASK 0x1
+#define QIB_6120_SerdesCfg0_ResetC_LSB 0x1
+#define QIB_6120_SerdesCfg0_ResetC_RMASK 0x1
+#define QIB_6120_SerdesCfg0_ResetD_LSB 0x0
+#define QIB_6120_SerdesCfg0_ResetD_RMASK 0x1
+
+#define QIB_6120_SerdesStat_OFFS 0x3D0
+#define QIB_6120_SerdesStat_Reserved_LSB 0xC
+#define QIB_6120_SerdesStat_Reserved_RMASK 0xFFFFFFFFFFFFF
+#define QIB_6120_SerdesStat_BeaconDetA_LSB 0xB
+#define QIB_6120_SerdesStat_BeaconDetA_RMASK 0x1
+#define QIB_6120_SerdesStat_BeaconDetB_LSB 0xA
+#define QIB_6120_SerdesStat_BeaconDetB_RMASK 0x1
+#define QIB_6120_SerdesStat_BeaconDetC_LSB 0x9
+#define QIB_6120_SerdesStat_BeaconDetC_RMASK 0x1
+#define QIB_6120_SerdesStat_BeaconDetD_LSB 0x8
+#define QIB_6120_SerdesStat_BeaconDetD_RMASK 0x1
+#define QIB_6120_SerdesStat_RxDetA_LSB 0x7
+#define QIB_6120_SerdesStat_RxDetA_RMASK 0x1
+#define QIB_6120_SerdesStat_RxDetB_LSB 0x6
+#define QIB_6120_SerdesStat_RxDetB_RMASK 0x1
+#define QIB_6120_SerdesStat_RxDetC_LSB 0x5
+#define QIB_6120_SerdesStat_RxDetC_RMASK 0x1
+#define QIB_6120_SerdesStat_RxDetD_LSB 0x4
+#define QIB_6120_SerdesStat_RxDetD_RMASK 0x1
+#define QIB_6120_SerdesStat_TxIdleDetA_LSB 0x3
+#define QIB_6120_SerdesStat_TxIdleDetA_RMASK 0x1
+#define QIB_6120_SerdesStat_TxIdleDetB_LSB 0x2
+#define QIB_6120_SerdesStat_TxIdleDetB_RMASK 0x1
+#define QIB_6120_SerdesStat_TxIdleDetC_LSB 0x1
+#define QIB_6120_SerdesStat_TxIdleDetC_RMASK 0x1
+#define QIB_6120_SerdesStat_TxIdleDetD_LSB 0x0
+#define QIB_6120_SerdesStat_TxIdleDetD_RMASK 0x1
+
+#define QIB_6120_XGXSCfg_OFFS 0x3D8
+#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_LSB 0x3F
+#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_RMASK 0x1
+#define QIB_6120_XGXSCfg_Reserved_LSB 0x17
+#define QIB_6120_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFF
+#define QIB_6120_XGXSCfg_polarity_inv_LSB 0x13
+#define QIB_6120_XGXSCfg_polarity_inv_RMASK 0xF
+#define QIB_6120_XGXSCfg_link_sync_mask_LSB 0x9
+#define QIB_6120_XGXSCfg_link_sync_mask_RMASK 0x3FF
+#define QIB_6120_XGXSCfg_port_addr_LSB 0x4
+#define QIB_6120_XGXSCfg_port_addr_RMASK 0x1F
+#define QIB_6120_XGXSCfg_mdd_30_LSB 0x3
+#define QIB_6120_XGXSCfg_mdd_30_RMASK 0x1
+#define QIB_6120_XGXSCfg_xcv_resetn_LSB 0x2
+#define QIB_6120_XGXSCfg_xcv_resetn_RMASK 0x1
+#define QIB_6120_XGXSCfg_Reserved1_LSB 0x1
+#define QIB_6120_XGXSCfg_Reserved1_RMASK 0x1
+#define QIB_6120_XGXSCfg_tx_rx_resetn_LSB 0x0
+#define QIB_6120_XGXSCfg_tx_rx_resetn_RMASK 0x1
+
+#define QIB_6120_LBIntCnt_OFFS 0x12000
+
+#define QIB_6120_LBFlowStallCnt_OFFS 0x12008
+
+#define QIB_6120_TxUnsupVLErrCnt_OFFS 0x12018
+
+#define QIB_6120_TxDataPktCnt_OFFS 0x12020
+
+#define QIB_6120_TxFlowPktCnt_OFFS 0x12028
+
+#define QIB_6120_TxDwordCnt_OFFS 0x12030
+
+#define QIB_6120_TxLenErrCnt_OFFS 0x12038
+
+#define QIB_6120_TxMaxMinLenErrCnt_OFFS 0x12040
+
+#define QIB_6120_TxUnderrunCnt_OFFS 0x12048
+
+#define QIB_6120_TxFlowStallCnt_OFFS 0x12050
+
+#define QIB_6120_TxDroppedPktCnt_OFFS 0x12058
+
+#define QIB_6120_RxDroppedPktCnt_OFFS 0x12060
+
+#define QIB_6120_RxDataPktCnt_OFFS 0x12068
+
+#define QIB_6120_RxFlowPktCnt_OFFS 0x12070
+
+#define QIB_6120_RxDwordCnt_OFFS 0x12078
+
+#define QIB_6120_RxLenErrCnt_OFFS 0x12080
+
+#define QIB_6120_RxMaxMinLenErrCnt_OFFS 0x12088
+
+#define QIB_6120_RxICRCErrCnt_OFFS 0x12090
+
+#define QIB_6120_RxVCRCErrCnt_OFFS 0x12098
+
+#define QIB_6120_RxFlowCtrlErrCnt_OFFS 0x120A0
+
+#define QIB_6120_RxBadFormatCnt_OFFS 0x120A8
+
+#define QIB_6120_RxLinkProblemCnt_OFFS 0x120B0
+
+#define QIB_6120_RxEBPCnt_OFFS 0x120B8
+
+#define QIB_6120_RxLPCRCErrCnt_OFFS 0x120C0
+
+#define QIB_6120_RxBufOvflCnt_OFFS 0x120C8
+
+#define QIB_6120_RxTIDFullErrCnt_OFFS 0x120D0
+
+#define QIB_6120_RxTIDValidErrCnt_OFFS 0x120D8
+
+#define QIB_6120_RxPKeyMismatchCnt_OFFS 0x120E0
+
+#define QIB_6120_RxP0HdrEgrOvflCnt_OFFS 0x120E8
+
+#define QIB_6120_IBStatusChangeCnt_OFFS 0x12140
+
+#define QIB_6120_IBLinkErrRecoveryCnt_OFFS 0x12148
+
+#define QIB_6120_IBLinkDownedCnt_OFFS 0x12150
+
+#define QIB_6120_IBSymbolErrCnt_OFFS 0x12158
+
+#define QIB_6120_PcieRetryBufDiagQwordCnt_OFFS 0x12170
+
+#define QIB_6120_RcvEgrArray0_OFFS 0x14000
+
+#define QIB_6120_RcvTIDArray0_OFFS 0x54000
+
+#define QIB_6120_PIOLaunchFIFO_OFFS 0x64000
+
+#define QIB_6120_SendPIOpbcCache_OFFS 0x64800
+
+#define QIB_6120_RcvBuf1_OFFS 0x72000
+
+#define QIB_6120_RcvBuf2_OFFS 0x75000
+
+#define QIB_6120_RcvFlags_OFFS 0x77000
+
+#define QIB_6120_RcvLookupBuf1_OFFS 0x79000
+
+#define QIB_6120_RcvDMABuf_OFFS 0x7B000
+
+#define QIB_6120_MiscRXEIntMem_OFFS 0x7C000
+
+#define QIB_6120_PCIERcvBuf_OFFS 0x80000
+
+#define QIB_6120_PCIERetryBuf_OFFS 0x82000
+
+#define QIB_6120_PCIERcvBufRdToWrAddr_OFFS 0x84000
+
+#define QIB_6120_PIOBuf0_MA_OFFS 0x100000
diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h
new file mode 100644
index 00000000000..a5356cb4252
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_7220.h
@@ -0,0 +1,149 @@
+#ifndef _QIB_7220_H
+#define _QIB_7220_H
+/*
+ * Copyright (c) 2007, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* grab register-defs auto-generated by HW */
+#include "qib_7220_regs.h"
+
+/* The number of eager receive TIDs for context zero. */
+#define IBA7220_KRCVEGRCNT 2048U
+
+#define IB_7220_LT_STATE_CFGRCVFCFG 0x09
+#define IB_7220_LT_STATE_CFGWAITRMT 0x0a
+#define IB_7220_LT_STATE_TXREVLANES 0x0d
+#define IB_7220_LT_STATE_CFGENH 0x10
+
+struct qib_chip_specific {
+ u64 __iomem *cregbase;
+ u64 *cntrs;
+ u64 *portcntrs;
+ spinlock_t sdepb_lock; /* serdes EPB bus */
+ spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */
+ spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */
+ u64 hwerrmask;
+ u64 errormask;
+ u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */
+ u64 gpio_mask; /* shadow the gpio mask register */
+ u64 extctrl; /* shadow the gpio output enable, etc... */
+ u32 ncntrs;
+ u32 nportcntrs;
+ u32 cntrnamelen;
+ u32 portcntrnamelen;
+ u32 numctxts;
+ u32 rcvegrcnt;
+ u32 autoneg_tries;
+ u32 serdes_first_init_done;
+ u32 sdmabufcnt;
+ u32 lastbuf_for_pio;
+ u32 updthresh; /* current AvailUpdThld */
+ u32 updthresh_dflt; /* default AvailUpdThld */
+ int irq;
+ u8 presets_needed;
+ u8 relock_timer_active;
+ char emsgbuf[128];
+ char sdmamsgbuf[192];
+ char bitsmsgbuf[64];
+ struct timer_list relock_timer;
+ unsigned int relock_interval; /* in jiffies */
+};
+
+struct qib_chippport_specific {
+ struct qib_pportdata pportdata;
+ wait_queue_head_t autoneg_wait;
+ struct delayed_work autoneg_work;
+ struct timer_list chase_timer;
+ /*
+ * these 5 fields are used to establish deltas for IB symbol
+ * errors and linkrecovery errors. They can be reported on
+ * some chips during link negotiation prior to INIT, and with
+ * DDR when faking DDR negotiations with non-IBTA switches.
+ * The chip counters are adjusted at driver unload if there is
+ * a non-zero delta.
+ */
+ u64 ibdeltainprog;
+ u64 ibsymdelta;
+ u64 ibsymsnap;
+ u64 iblnkerrdelta;
+ u64 iblnkerrsnap;
+ u64 ibcctrl; /* kr_ibcctrl shadow */
+ u64 ibcddrctrl; /* kr_ibcddrctrl shadow */
+ unsigned long chase_end;
+ u32 last_delay_mult;
+};
+
+/*
+ * This header file provides the declarations and common definitions
+ * for (mostly) manipulation of the SerDes blocks within the IBA7220.
+ * the functions declared should only be called from within other
+ * 7220-related files such as qib_iba7220.c or qib_sd7220.c.
+ */
+int qib_sd7220_presets(struct qib_devdata *dd);
+int qib_sd7220_init(struct qib_devdata *dd);
+void qib_sd7220_clr_ibpar(struct qib_devdata *);
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB, which is the
+ * only one currently used
+ */
+#define IB_7220_SERDES 2
+
+static inline u32 qib_read_kreg32(const struct qib_devdata *dd,
+ const u16 regno)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return -1;
+ return readl((u32 __iomem *)&dd->kregbase[regno]);
+}
+
+static inline u64 qib_read_kreg64(const struct qib_devdata *dd,
+ const u16 regno)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return -1;
+
+ return readq(&dd->kregbase[regno]);
+}
+
+static inline void qib_write_kreg(const struct qib_devdata *dd,
+ const u16 regno, u64 value)
+{
+ if (dd->kregbase)
+ writeq(value, &dd->kregbase[regno]);
+}
+
+void set_7220_relock_poll(struct qib_devdata *, int);
+void shutdown_7220_relock_poll(struct qib_devdata *);
+void toggle_7220_rclkrls(struct qib_devdata *);
+
+
+#endif /* _QIB_7220_H */
diff --git a/drivers/infiniband/hw/qib/qib_7220_regs.h b/drivers/infiniband/hw/qib/qib_7220_regs.h
new file mode 100644
index 00000000000..0da5bb750e5
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_7220_regs.h
@@ -0,0 +1,1496 @@
+/*
+ * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/* This file is mechanically generated from RTL. Any hand-edits will be lost! */
+
+#define QIB_7220_Revision_OFFS 0x0
+#define QIB_7220_Revision_R_Simulator_LSB 0x3F
+#define QIB_7220_Revision_R_Simulator_RMASK 0x1
+#define QIB_7220_Revision_R_Emulation_LSB 0x3E
+#define QIB_7220_Revision_R_Emulation_RMASK 0x1
+#define QIB_7220_Revision_R_Emulation_Revcode_LSB 0x28
+#define QIB_7220_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF
+#define QIB_7220_Revision_BoardID_LSB 0x20
+#define QIB_7220_Revision_BoardID_RMASK 0xFF
+#define QIB_7220_Revision_R_SW_LSB 0x18
+#define QIB_7220_Revision_R_SW_RMASK 0xFF
+#define QIB_7220_Revision_R_Arch_LSB 0x10
+#define QIB_7220_Revision_R_Arch_RMASK 0xFF
+#define QIB_7220_Revision_R_ChipRevMajor_LSB 0x8
+#define QIB_7220_Revision_R_ChipRevMajor_RMASK 0xFF
+#define QIB_7220_Revision_R_ChipRevMinor_LSB 0x0
+#define QIB_7220_Revision_R_ChipRevMinor_RMASK 0xFF
+
+#define QIB_7220_Control_OFFS 0x8
+#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_LSB 0x7
+#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_RMASK 0x1
+#define QIB_7220_Control_PCIECplQDiagEn_LSB 0x6
+#define QIB_7220_Control_PCIECplQDiagEn_RMASK 0x1
+#define QIB_7220_Control_Reserved_LSB 0x5
+#define QIB_7220_Control_Reserved_RMASK 0x1
+#define QIB_7220_Control_TxLatency_LSB 0x4
+#define QIB_7220_Control_TxLatency_RMASK 0x1
+#define QIB_7220_Control_PCIERetryBufDiagEn_LSB 0x3
+#define QIB_7220_Control_PCIERetryBufDiagEn_RMASK 0x1
+#define QIB_7220_Control_LinkEn_LSB 0x2
+#define QIB_7220_Control_LinkEn_RMASK 0x1
+#define QIB_7220_Control_FreezeMode_LSB 0x1
+#define QIB_7220_Control_FreezeMode_RMASK 0x1
+#define QIB_7220_Control_SyncReset_LSB 0x0
+#define QIB_7220_Control_SyncReset_RMASK 0x1
+
+#define QIB_7220_PageAlign_OFFS 0x10
+
+#define QIB_7220_PortCnt_OFFS 0x18
+
+#define QIB_7220_SendRegBase_OFFS 0x30
+
+#define QIB_7220_UserRegBase_OFFS 0x38
+
+#define QIB_7220_CntrRegBase_OFFS 0x40
+
+#define QIB_7220_Scratch_OFFS 0x48
+
+#define QIB_7220_IntMask_OFFS 0x68
+#define QIB_7220_IntMask_SDmaIntMask_LSB 0x3F
+#define QIB_7220_IntMask_SDmaIntMask_RMASK 0x1
+#define QIB_7220_IntMask_SDmaDisabledMasked_LSB 0x3E
+#define QIB_7220_IntMask_SDmaDisabledMasked_RMASK 0x1
+#define QIB_7220_IntMask_Reserved_LSB 0x31
+#define QIB_7220_IntMask_Reserved_RMASK 0x1FFF
+#define QIB_7220_IntMask_RcvUrg16IntMask_LSB 0x30
+#define QIB_7220_IntMask_RcvUrg16IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg15IntMask_LSB 0x2F
+#define QIB_7220_IntMask_RcvUrg15IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg14IntMask_LSB 0x2E
+#define QIB_7220_IntMask_RcvUrg14IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg13IntMask_LSB 0x2D
+#define QIB_7220_IntMask_RcvUrg13IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg12IntMask_LSB 0x2C
+#define QIB_7220_IntMask_RcvUrg12IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg11IntMask_LSB 0x2B
+#define QIB_7220_IntMask_RcvUrg11IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg10IntMask_LSB 0x2A
+#define QIB_7220_IntMask_RcvUrg10IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg9IntMask_LSB 0x29
+#define QIB_7220_IntMask_RcvUrg9IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg8IntMask_LSB 0x28
+#define QIB_7220_IntMask_RcvUrg8IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg7IntMask_LSB 0x27
+#define QIB_7220_IntMask_RcvUrg7IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg6IntMask_LSB 0x26
+#define QIB_7220_IntMask_RcvUrg6IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg5IntMask_LSB 0x25
+#define QIB_7220_IntMask_RcvUrg5IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg4IntMask_LSB 0x24
+#define QIB_7220_IntMask_RcvUrg4IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg3IntMask_LSB 0x23
+#define QIB_7220_IntMask_RcvUrg3IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg2IntMask_LSB 0x22
+#define QIB_7220_IntMask_RcvUrg2IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg1IntMask_LSB 0x21
+#define QIB_7220_IntMask_RcvUrg1IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvUrg0IntMask_LSB 0x20
+#define QIB_7220_IntMask_RcvUrg0IntMask_RMASK 0x1
+#define QIB_7220_IntMask_ErrorIntMask_LSB 0x1F
+#define QIB_7220_IntMask_ErrorIntMask_RMASK 0x1
+#define QIB_7220_IntMask_PioSetIntMask_LSB 0x1E
+#define QIB_7220_IntMask_PioSetIntMask_RMASK 0x1
+#define QIB_7220_IntMask_PioBufAvailIntMask_LSB 0x1D
+#define QIB_7220_IntMask_PioBufAvailIntMask_RMASK 0x1
+#define QIB_7220_IntMask_assertGPIOIntMask_LSB 0x1C
+#define QIB_7220_IntMask_assertGPIOIntMask_RMASK 0x1
+#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_LSB 0x1B
+#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_RMASK 0x1
+#define QIB_7220_IntMask_JIntMask_LSB 0x1A
+#define QIB_7220_IntMask_JIntMask_RMASK 0x1
+#define QIB_7220_IntMask_Reserved1_LSB 0x11
+#define QIB_7220_IntMask_Reserved1_RMASK 0x1FF
+#define QIB_7220_IntMask_RcvAvail16IntMask_LSB 0x10
+#define QIB_7220_IntMask_RcvAvail16IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail15IntMask_LSB 0xF
+#define QIB_7220_IntMask_RcvAvail15IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail14IntMask_LSB 0xE
+#define QIB_7220_IntMask_RcvAvail14IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail13IntMask_LSB 0xD
+#define QIB_7220_IntMask_RcvAvail13IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail12IntMask_LSB 0xC
+#define QIB_7220_IntMask_RcvAvail12IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail11IntMask_LSB 0xB
+#define QIB_7220_IntMask_RcvAvail11IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail10IntMask_LSB 0xA
+#define QIB_7220_IntMask_RcvAvail10IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail9IntMask_LSB 0x9
+#define QIB_7220_IntMask_RcvAvail9IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail8IntMask_LSB 0x8
+#define QIB_7220_IntMask_RcvAvail8IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail7IntMask_LSB 0x7
+#define QIB_7220_IntMask_RcvAvail7IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail6IntMask_LSB 0x6
+#define QIB_7220_IntMask_RcvAvail6IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail5IntMask_LSB 0x5
+#define QIB_7220_IntMask_RcvAvail5IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail4IntMask_LSB 0x4
+#define QIB_7220_IntMask_RcvAvail4IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail3IntMask_LSB 0x3
+#define QIB_7220_IntMask_RcvAvail3IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail2IntMask_LSB 0x2
+#define QIB_7220_IntMask_RcvAvail2IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail1IntMask_LSB 0x1
+#define QIB_7220_IntMask_RcvAvail1IntMask_RMASK 0x1
+#define QIB_7220_IntMask_RcvAvail0IntMask_LSB 0x0
+#define QIB_7220_IntMask_RcvAvail0IntMask_RMASK 0x1
+
+#define QIB_7220_IntStatus_OFFS 0x70
+#define QIB_7220_IntStatus_SDmaInt_LSB 0x3F
+#define QIB_7220_IntStatus_SDmaInt_RMASK 0x1
+#define QIB_7220_IntStatus_SDmaDisabled_LSB 0x3E
+#define QIB_7220_IntStatus_SDmaDisabled_RMASK 0x1
+#define QIB_7220_IntStatus_Reserved_LSB 0x31
+#define QIB_7220_IntStatus_Reserved_RMASK 0x1FFF
+#define QIB_7220_IntStatus_RcvUrg16_LSB 0x30
+#define QIB_7220_IntStatus_RcvUrg16_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg15_LSB 0x2F
+#define QIB_7220_IntStatus_RcvUrg15_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg14_LSB 0x2E
+#define QIB_7220_IntStatus_RcvUrg14_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg13_LSB 0x2D
+#define QIB_7220_IntStatus_RcvUrg13_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg12_LSB 0x2C
+#define QIB_7220_IntStatus_RcvUrg12_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg11_LSB 0x2B
+#define QIB_7220_IntStatus_RcvUrg11_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg10_LSB 0x2A
+#define QIB_7220_IntStatus_RcvUrg10_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg9_LSB 0x29
+#define QIB_7220_IntStatus_RcvUrg9_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg8_LSB 0x28
+#define QIB_7220_IntStatus_RcvUrg8_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg7_LSB 0x27
+#define QIB_7220_IntStatus_RcvUrg7_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg6_LSB 0x26
+#define QIB_7220_IntStatus_RcvUrg6_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg5_LSB 0x25
+#define QIB_7220_IntStatus_RcvUrg5_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg4_LSB 0x24
+#define QIB_7220_IntStatus_RcvUrg4_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg3_LSB 0x23
+#define QIB_7220_IntStatus_RcvUrg3_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg2_LSB 0x22
+#define QIB_7220_IntStatus_RcvUrg2_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg1_LSB 0x21
+#define QIB_7220_IntStatus_RcvUrg1_RMASK 0x1
+#define QIB_7220_IntStatus_RcvUrg0_LSB 0x20
+#define QIB_7220_IntStatus_RcvUrg0_RMASK 0x1
+#define QIB_7220_IntStatus_Error_LSB 0x1F
+#define QIB_7220_IntStatus_Error_RMASK 0x1
+#define QIB_7220_IntStatus_PioSent_LSB 0x1E
+#define QIB_7220_IntStatus_PioSent_RMASK 0x1
+#define QIB_7220_IntStatus_PioBufAvail_LSB 0x1D
+#define QIB_7220_IntStatus_PioBufAvail_RMASK 0x1
+#define QIB_7220_IntStatus_assertGPIO_LSB 0x1C
+#define QIB_7220_IntStatus_assertGPIO_RMASK 0x1
+#define QIB_7220_IntStatus_IBSerdesTrimDone_LSB 0x1B
+#define QIB_7220_IntStatus_IBSerdesTrimDone_RMASK 0x1
+#define QIB_7220_IntStatus_JInt_LSB 0x1A
+#define QIB_7220_IntStatus_JInt_RMASK 0x1
+#define QIB_7220_IntStatus_Reserved1_LSB 0x11
+#define QIB_7220_IntStatus_Reserved1_RMASK 0x1FF
+#define QIB_7220_IntStatus_RcvAvail16_LSB 0x10
+#define QIB_7220_IntStatus_RcvAvail16_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail15_LSB 0xF
+#define QIB_7220_IntStatus_RcvAvail15_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail14_LSB 0xE
+#define QIB_7220_IntStatus_RcvAvail14_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail13_LSB 0xD
+#define QIB_7220_IntStatus_RcvAvail13_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail12_LSB 0xC
+#define QIB_7220_IntStatus_RcvAvail12_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail11_LSB 0xB
+#define QIB_7220_IntStatus_RcvAvail11_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail10_LSB 0xA
+#define QIB_7220_IntStatus_RcvAvail10_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail9_LSB 0x9
+#define QIB_7220_IntStatus_RcvAvail9_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail8_LSB 0x8
+#define QIB_7220_IntStatus_RcvAvail8_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail7_LSB 0x7
+#define QIB_7220_IntStatus_RcvAvail7_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail6_LSB 0x6
+#define QIB_7220_IntStatus_RcvAvail6_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail5_LSB 0x5
+#define QIB_7220_IntStatus_RcvAvail5_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail4_LSB 0x4
+#define QIB_7220_IntStatus_RcvAvail4_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail3_LSB 0x3
+#define QIB_7220_IntStatus_RcvAvail3_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail2_LSB 0x2
+#define QIB_7220_IntStatus_RcvAvail2_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail1_LSB 0x1
+#define QIB_7220_IntStatus_RcvAvail1_RMASK 0x1
+#define QIB_7220_IntStatus_RcvAvail0_LSB 0x0
+#define QIB_7220_IntStatus_RcvAvail0_RMASK 0x1
+
+#define QIB_7220_IntClear_OFFS 0x78
+#define QIB_7220_IntClear_SDmaIntClear_LSB 0x3F
+#define QIB_7220_IntClear_SDmaIntClear_RMASK 0x1
+#define QIB_7220_IntClear_SDmaDisabledClear_LSB 0x3E
+#define QIB_7220_IntClear_SDmaDisabledClear_RMASK 0x1
+#define QIB_7220_IntClear_Reserved_LSB 0x31
+#define QIB_7220_IntClear_Reserved_RMASK 0x1FFF
+#define QIB_7220_IntClear_RcvUrg16IntClear_LSB 0x30
+#define QIB_7220_IntClear_RcvUrg16IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg15IntClear_LSB 0x2F
+#define QIB_7220_IntClear_RcvUrg15IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg14IntClear_LSB 0x2E
+#define QIB_7220_IntClear_RcvUrg14IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg13IntClear_LSB 0x2D
+#define QIB_7220_IntClear_RcvUrg13IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg12IntClear_LSB 0x2C
+#define QIB_7220_IntClear_RcvUrg12IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg11IntClear_LSB 0x2B
+#define QIB_7220_IntClear_RcvUrg11IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg10IntClear_LSB 0x2A
+#define QIB_7220_IntClear_RcvUrg10IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg9IntClear_LSB 0x29
+#define QIB_7220_IntClear_RcvUrg9IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg8IntClear_LSB 0x28
+#define QIB_7220_IntClear_RcvUrg8IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg7IntClear_LSB 0x27
+#define QIB_7220_IntClear_RcvUrg7IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg6IntClear_LSB 0x26
+#define QIB_7220_IntClear_RcvUrg6IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg5IntClear_LSB 0x25
+#define QIB_7220_IntClear_RcvUrg5IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg4IntClear_LSB 0x24
+#define QIB_7220_IntClear_RcvUrg4IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg3IntClear_LSB 0x23
+#define QIB_7220_IntClear_RcvUrg3IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg2IntClear_LSB 0x22
+#define QIB_7220_IntClear_RcvUrg2IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg1IntClear_LSB 0x21
+#define QIB_7220_IntClear_RcvUrg1IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvUrg0IntClear_LSB 0x20
+#define QIB_7220_IntClear_RcvUrg0IntClear_RMASK 0x1
+#define QIB_7220_IntClear_ErrorIntClear_LSB 0x1F
+#define QIB_7220_IntClear_ErrorIntClear_RMASK 0x1
+#define QIB_7220_IntClear_PioSetIntClear_LSB 0x1E
+#define QIB_7220_IntClear_PioSetIntClear_RMASK 0x1
+#define QIB_7220_IntClear_PioBufAvailIntClear_LSB 0x1D
+#define QIB_7220_IntClear_PioBufAvailIntClear_RMASK 0x1
+#define QIB_7220_IntClear_assertGPIOIntClear_LSB 0x1C
+#define QIB_7220_IntClear_assertGPIOIntClear_RMASK 0x1
+#define QIB_7220_IntClear_IBSerdesTrimDoneClear_LSB 0x1B
+#define QIB_7220_IntClear_IBSerdesTrimDoneClear_RMASK 0x1
+#define QIB_7220_IntClear_JIntClear_LSB 0x1A
+#define QIB_7220_IntClear_JIntClear_RMASK 0x1
+#define QIB_7220_IntClear_Reserved1_LSB 0x11
+#define QIB_7220_IntClear_Reserved1_RMASK 0x1FF
+#define QIB_7220_IntClear_RcvAvail16IntClear_LSB 0x10
+#define QIB_7220_IntClear_RcvAvail16IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail15IntClear_LSB 0xF
+#define QIB_7220_IntClear_RcvAvail15IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail14IntClear_LSB 0xE
+#define QIB_7220_IntClear_RcvAvail14IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail13IntClear_LSB 0xD
+#define QIB_7220_IntClear_RcvAvail13IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail12IntClear_LSB 0xC
+#define QIB_7220_IntClear_RcvAvail12IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail11IntClear_LSB 0xB
+#define QIB_7220_IntClear_RcvAvail11IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail10IntClear_LSB 0xA
+#define QIB_7220_IntClear_RcvAvail10IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail9IntClear_LSB 0x9
+#define QIB_7220_IntClear_RcvAvail9IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail8IntClear_LSB 0x8
+#define QIB_7220_IntClear_RcvAvail8IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail7IntClear_LSB 0x7
+#define QIB_7220_IntClear_RcvAvail7IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail6IntClear_LSB 0x6
+#define QIB_7220_IntClear_RcvAvail6IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail5IntClear_LSB 0x5
+#define QIB_7220_IntClear_RcvAvail5IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail4IntClear_LSB 0x4
+#define QIB_7220_IntClear_RcvAvail4IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail3IntClear_LSB 0x3
+#define QIB_7220_IntClear_RcvAvail3IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail2IntClear_LSB 0x2
+#define QIB_7220_IntClear_RcvAvail2IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail1IntClear_LSB 0x1
+#define QIB_7220_IntClear_RcvAvail1IntClear_RMASK 0x1
+#define QIB_7220_IntClear_RcvAvail0IntClear_LSB 0x0
+#define QIB_7220_IntClear_RcvAvail0IntClear_RMASK 0x1
+
+#define QIB_7220_ErrMask_OFFS 0x80
+#define QIB_7220_ErrMask_Reserved_LSB 0x36
+#define QIB_7220_ErrMask_Reserved_RMASK 0x3FF
+#define QIB_7220_ErrMask_InvalidEEPCmdMask_LSB 0x35
+#define QIB_7220_ErrMask_InvalidEEPCmdMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_LSB 0x34
+#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_HardwareErrMask_LSB 0x33
+#define QIB_7220_ErrMask_HardwareErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_ResetNegatedMask_LSB 0x32
+#define QIB_7220_ErrMask_ResetNegatedMask_RMASK 0x1
+#define QIB_7220_ErrMask_InvalidAddrErrMask_LSB 0x31
+#define QIB_7220_ErrMask_InvalidAddrErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_IBStatusChangedMask_LSB 0x30
+#define QIB_7220_ErrMask_IBStatusChangedMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_LSB 0x2F
+#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaMissingDwErrMask_LSB 0x2E
+#define QIB_7220_ErrMask_SDmaMissingDwErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaDwEnErrMask_LSB 0x2D
+#define QIB_7220_ErrMask_SDmaDwEnErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaRpyTagErrMask_LSB 0x2C
+#define QIB_7220_ErrMask_SDmaRpyTagErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDma1stDescErrMask_LSB 0x2B
+#define QIB_7220_ErrMask_SDma1stDescErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaBaseErrMask_LSB 0x2A
+#define QIB_7220_ErrMask_SDmaBaseErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_LSB 0x29
+#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_LSB 0x28
+#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_LSB 0x27
+#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendBufMisuseErrMask_LSB 0x26
+#define QIB_7220_ErrMask_SendBufMisuseErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_LSB 0x25
+#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24
+#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_LSB 0x23
+#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_LSB 0x22
+#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21
+#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendPktLenErrMask_LSB 0x20
+#define QIB_7220_ErrMask_SendPktLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendUnderRunErrMask_LSB 0x1F
+#define QIB_7220_ErrMask_SendUnderRunErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendMaxPktLenErrMask_LSB 0x1E
+#define QIB_7220_ErrMask_SendMaxPktLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendMinPktLenErrMask_LSB 0x1D
+#define QIB_7220_ErrMask_SendMinPktLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SDmaDisabledErrMask_LSB 0x1C
+#define QIB_7220_ErrMask_SDmaDisabledErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B
+#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_Reserved1_LSB 0x12
+#define QIB_7220_ErrMask_Reserved1_RMASK 0x1FF
+#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_LSB 0x11
+#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvHdrErrMask_LSB 0x10
+#define QIB_7220_ErrMask_RcvHdrErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvHdrLenErrMask_LSB 0xF
+#define QIB_7220_ErrMask_RcvHdrLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvBadTidErrMask_LSB 0xE
+#define QIB_7220_ErrMask_RcvBadTidErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvHdrFullErrMask_LSB 0xD
+#define QIB_7220_ErrMask_RcvHdrFullErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvEgrFullErrMask_LSB 0xC
+#define QIB_7220_ErrMask_RcvEgrFullErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvBadVersionErrMask_LSB 0xB
+#define QIB_7220_ErrMask_RcvBadVersionErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvIBFlowErrMask_LSB 0xA
+#define QIB_7220_ErrMask_RcvIBFlowErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvEBPErrMask_LSB 0x9
+#define QIB_7220_ErrMask_RcvEBPErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8
+#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7
+#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvShortPktLenErrMask_LSB 0x6
+#define QIB_7220_ErrMask_RcvShortPktLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvLongPktLenErrMask_LSB 0x5
+#define QIB_7220_ErrMask_RcvLongPktLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_LSB 0x4
+#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvMinPktLenErrMask_LSB 0x3
+#define QIB_7220_ErrMask_RcvMinPktLenErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvICRCErrMask_LSB 0x2
+#define QIB_7220_ErrMask_RcvICRCErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvVCRCErrMask_LSB 0x1
+#define QIB_7220_ErrMask_RcvVCRCErrMask_RMASK 0x1
+#define QIB_7220_ErrMask_RcvFormatErrMask_LSB 0x0
+#define QIB_7220_ErrMask_RcvFormatErrMask_RMASK 0x1
+
+#define QIB_7220_ErrStatus_OFFS 0x88
+#define QIB_7220_ErrStatus_Reserved_LSB 0x36
+#define QIB_7220_ErrStatus_Reserved_RMASK 0x3FF
+#define QIB_7220_ErrStatus_InvalidEEPCmdErr_LSB 0x35
+#define QIB_7220_ErrStatus_InvalidEEPCmdErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_LSB 0x34
+#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_RMASK 0x1
+#define QIB_7220_ErrStatus_HardwareErr_LSB 0x33
+#define QIB_7220_ErrStatus_HardwareErr_RMASK 0x1
+#define QIB_7220_ErrStatus_ResetNegated_LSB 0x32
+#define QIB_7220_ErrStatus_ResetNegated_RMASK 0x1
+#define QIB_7220_ErrStatus_InvalidAddrErr_LSB 0x31
+#define QIB_7220_ErrStatus_InvalidAddrErr_RMASK 0x1
+#define QIB_7220_ErrStatus_IBStatusChanged_LSB 0x30
+#define QIB_7220_ErrStatus_IBStatusChanged_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaUnexpDataErr_LSB 0x2F
+#define QIB_7220_ErrStatus_SDmaUnexpDataErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaMissingDwErr_LSB 0x2E
+#define QIB_7220_ErrStatus_SDmaMissingDwErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaDwEnErr_LSB 0x2D
+#define QIB_7220_ErrStatus_SDmaDwEnErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaRpyTagErr_LSB 0x2C
+#define QIB_7220_ErrStatus_SDmaRpyTagErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDma1stDescErr_LSB 0x2B
+#define QIB_7220_ErrStatus_SDma1stDescErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaBaseErr_LSB 0x2A
+#define QIB_7220_ErrStatus_SDmaBaseErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_LSB 0x29
+#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_LSB 0x28
+#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaGenMismatchErr_LSB 0x27
+#define QIB_7220_ErrStatus_SDmaGenMismatchErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendBufMisuseErr_LSB 0x26
+#define QIB_7220_ErrStatus_SendBufMisuseErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendUnsupportedVLErr_LSB 0x25
+#define QIB_7220_ErrStatus_SendUnsupportedVLErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24
+#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendPioArmLaunchErr_LSB 0x23
+#define QIB_7220_ErrStatus_SendPioArmLaunchErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendDroppedDataPktErr_LSB 0x22
+#define QIB_7220_ErrStatus_SendDroppedDataPktErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_LSB 0x21
+#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendPktLenErr_LSB 0x20
+#define QIB_7220_ErrStatus_SendPktLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendUnderRunErr_LSB 0x1F
+#define QIB_7220_ErrStatus_SendUnderRunErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendMaxPktLenErr_LSB 0x1E
+#define QIB_7220_ErrStatus_SendMaxPktLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendMinPktLenErr_LSB 0x1D
+#define QIB_7220_ErrStatus_SendMinPktLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SDmaDisabledErr_LSB 0x1C
+#define QIB_7220_ErrStatus_SDmaDisabledErr_RMASK 0x1
+#define QIB_7220_ErrStatus_SendSpecialTriggerErr_LSB 0x1B
+#define QIB_7220_ErrStatus_SendSpecialTriggerErr_RMASK 0x1
+#define QIB_7220_ErrStatus_Reserved1_LSB 0x12
+#define QIB_7220_ErrStatus_Reserved1_RMASK 0x1FF
+#define QIB_7220_ErrStatus_RcvIBLostLinkErr_LSB 0x11
+#define QIB_7220_ErrStatus_RcvIBLostLinkErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvHdrErr_LSB 0x10
+#define QIB_7220_ErrStatus_RcvHdrErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvHdrLenErr_LSB 0xF
+#define QIB_7220_ErrStatus_RcvHdrLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvBadTidErr_LSB 0xE
+#define QIB_7220_ErrStatus_RcvBadTidErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvHdrFullErr_LSB 0xD
+#define QIB_7220_ErrStatus_RcvHdrFullErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvEgrFullErr_LSB 0xC
+#define QIB_7220_ErrStatus_RcvEgrFullErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvBadVersionErr_LSB 0xB
+#define QIB_7220_ErrStatus_RcvBadVersionErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvIBFlowErr_LSB 0xA
+#define QIB_7220_ErrStatus_RcvIBFlowErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvEBPErr_LSB 0x9
+#define QIB_7220_ErrStatus_RcvEBPErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_LSB 0x8
+#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_LSB 0x7
+#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvShortPktLenErr_LSB 0x6
+#define QIB_7220_ErrStatus_RcvShortPktLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvLongPktLenErr_LSB 0x5
+#define QIB_7220_ErrStatus_RcvLongPktLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvMaxPktLenErr_LSB 0x4
+#define QIB_7220_ErrStatus_RcvMaxPktLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvMinPktLenErr_LSB 0x3
+#define QIB_7220_ErrStatus_RcvMinPktLenErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvICRCErr_LSB 0x2
+#define QIB_7220_ErrStatus_RcvICRCErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvVCRCErr_LSB 0x1
+#define QIB_7220_ErrStatus_RcvVCRCErr_RMASK 0x1
+#define QIB_7220_ErrStatus_RcvFormatErr_LSB 0x0
+#define QIB_7220_ErrStatus_RcvFormatErr_RMASK 0x1
+
+#define QIB_7220_ErrClear_OFFS 0x90
+#define QIB_7220_ErrClear_Reserved_LSB 0x36
+#define QIB_7220_ErrClear_Reserved_RMASK 0x3FF
+#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_LSB 0x35
+#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_LSB 0x34
+#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_HardwareErrClear_LSB 0x33
+#define QIB_7220_ErrClear_HardwareErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_ResetNegatedClear_LSB 0x32
+#define QIB_7220_ErrClear_ResetNegatedClear_RMASK 0x1
+#define QIB_7220_ErrClear_InvalidAddrErrClear_LSB 0x31
+#define QIB_7220_ErrClear_InvalidAddrErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_IBStatusChangedClear_LSB 0x30
+#define QIB_7220_ErrClear_IBStatusChangedClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_LSB 0x2F
+#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaMissingDwErrClear_LSB 0x2E
+#define QIB_7220_ErrClear_SDmaMissingDwErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaDwEnErrClear_LSB 0x2D
+#define QIB_7220_ErrClear_SDmaDwEnErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaRpyTagErrClear_LSB 0x2C
+#define QIB_7220_ErrClear_SDmaRpyTagErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDma1stDescErrClear_LSB 0x2B
+#define QIB_7220_ErrClear_SDma1stDescErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaBaseErrClear_LSB 0x2A
+#define QIB_7220_ErrClear_SDmaBaseErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_LSB 0x29
+#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_LSB 0x28
+#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_LSB 0x27
+#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendBufMisuseErrClear_LSB 0x26
+#define QIB_7220_ErrClear_SendBufMisuseErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_LSB 0x25
+#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24
+#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_LSB 0x23
+#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_LSB 0x22
+#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21
+#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendPktLenErrClear_LSB 0x20
+#define QIB_7220_ErrClear_SendPktLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendUnderRunErrClear_LSB 0x1F
+#define QIB_7220_ErrClear_SendUnderRunErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendMaxPktLenErrClear_LSB 0x1E
+#define QIB_7220_ErrClear_SendMaxPktLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendMinPktLenErrClear_LSB 0x1D
+#define QIB_7220_ErrClear_SendMinPktLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SDmaDisabledErrClear_LSB 0x1C
+#define QIB_7220_ErrClear_SDmaDisabledErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B
+#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_Reserved1_LSB 0x12
+#define QIB_7220_ErrClear_Reserved1_RMASK 0x1FF
+#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_LSB 0x11
+#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvHdrErrClear_LSB 0x10
+#define QIB_7220_ErrClear_RcvHdrErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvHdrLenErrClear_LSB 0xF
+#define QIB_7220_ErrClear_RcvHdrLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvBadTidErrClear_LSB 0xE
+#define QIB_7220_ErrClear_RcvBadTidErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvHdrFullErrClear_LSB 0xD
+#define QIB_7220_ErrClear_RcvHdrFullErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvEgrFullErrClear_LSB 0xC
+#define QIB_7220_ErrClear_RcvEgrFullErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvBadVersionErrClear_LSB 0xB
+#define QIB_7220_ErrClear_RcvBadVersionErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvIBFlowErrClear_LSB 0xA
+#define QIB_7220_ErrClear_RcvIBFlowErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvEBPErrClear_LSB 0x9
+#define QIB_7220_ErrClear_RcvEBPErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8
+#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7
+#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvShortPktLenErrClear_LSB 0x6
+#define QIB_7220_ErrClear_RcvShortPktLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvLongPktLenErrClear_LSB 0x5
+#define QIB_7220_ErrClear_RcvLongPktLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_LSB 0x4
+#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvMinPktLenErrClear_LSB 0x3
+#define QIB_7220_ErrClear_RcvMinPktLenErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvICRCErrClear_LSB 0x2
+#define QIB_7220_ErrClear_RcvICRCErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvVCRCErrClear_LSB 0x1
+#define QIB_7220_ErrClear_RcvVCRCErrClear_RMASK 0x1
+#define QIB_7220_ErrClear_RcvFormatErrClear_LSB 0x0
+#define QIB_7220_ErrClear_RcvFormatErrClear_RMASK 0x1
+
+#define QIB_7220_HwErrMask_OFFS 0x98
+#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F
+#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1
+#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E
+#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1
+#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_LSB 0x3D
+#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_RMASK 0x1
+#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C
+#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1
+#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_LSB 0x3B
+#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_RMASK 0x1
+#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_LSB 0x3A
+#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_RMASK 0x1
+#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x39
+#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1
+#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x38
+#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1
+#define QIB_7220_HwErrMask_Reserved_LSB 0x37
+#define QIB_7220_HwErrMask_Reserved_RMASK 0x1
+#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_LSB 0x36
+#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1
+#define QIB_7220_HwErrMask_Reserved1_LSB 0x33
+#define QIB_7220_HwErrMask_Reserved1_RMASK 0x7
+#define QIB_7220_HwErrMask_RXEMemParityErrMask_LSB 0x2C
+#define QIB_7220_HwErrMask_RXEMemParityErrMask_RMASK 0x7F
+#define QIB_7220_HwErrMask_TXEMemParityErrMask_LSB 0x28
+#define QIB_7220_HwErrMask_TXEMemParityErrMask_RMASK 0xF
+#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_LSB 0x27
+#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_RMASK 0x1
+#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_LSB 0x26
+#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_RMASK 0x1
+#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_LSB 0x25
+#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_RMASK 0x1
+#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_LSB 0x24
+#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_RMASK 0x1
+#define QIB_7220_HwErrMask_Reserved2_LSB 0x22
+#define QIB_7220_HwErrMask_Reserved2_RMASK 0x3
+#define QIB_7220_HwErrMask_PCIeBusParityErrMask_LSB 0x1F
+#define QIB_7220_HwErrMask_PCIeBusParityErrMask_RMASK 0x7
+#define QIB_7220_HwErrMask_PcieCplTimeoutMask_LSB 0x1E
+#define QIB_7220_HwErrMask_PcieCplTimeoutMask_RMASK 0x1
+#define QIB_7220_HwErrMask_PoisonedTLPMask_LSB 0x1D
+#define QIB_7220_HwErrMask_PoisonedTLPMask_RMASK 0x1
+#define QIB_7220_HwErrMask_SDmaMemReadErrMask_LSB 0x1C
+#define QIB_7220_HwErrMask_SDmaMemReadErrMask_RMASK 0x1
+#define QIB_7220_HwErrMask_Reserved3_LSB 0x8
+#define QIB_7220_HwErrMask_Reserved3_RMASK 0xFFFFF
+#define QIB_7220_HwErrMask_PCIeMemParityErrMask_LSB 0x0
+#define QIB_7220_HwErrMask_PCIeMemParityErrMask_RMASK 0xFF
+
+#define QIB_7220_HwErrStatus_OFFS 0xA0
+#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F
+#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1
+#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E
+#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1
+#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_LSB 0x3D
+#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_RMASK 0x1
+#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C
+#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1
+#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_LSB 0x3B
+#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_RMASK 0x1
+#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_LSB 0x3A
+#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_RMASK 0x1
+#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x39
+#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1
+#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x38
+#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1
+#define QIB_7220_HwErrStatus_Reserved_LSB 0x37
+#define QIB_7220_HwErrStatus_Reserved_RMASK 0x1
+#define QIB_7220_HwErrStatus_PowerOnBISTFailed_LSB 0x36
+#define QIB_7220_HwErrStatus_PowerOnBISTFailed_RMASK 0x1
+#define QIB_7220_HwErrStatus_Reserved1_LSB 0x33
+#define QIB_7220_HwErrStatus_Reserved1_RMASK 0x7
+#define QIB_7220_HwErrStatus_RXEMemParity_LSB 0x2C
+#define QIB_7220_HwErrStatus_RXEMemParity_RMASK 0x7F
+#define QIB_7220_HwErrStatus_TXEMemParity_LSB 0x28
+#define QIB_7220_HwErrStatus_TXEMemParity_RMASK 0xF
+#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_LSB 0x27
+#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_RMASK 0x1
+#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_LSB 0x26
+#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_RMASK 0x1
+#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_LSB 0x25
+#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_RMASK 0x1
+#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_LSB 0x24
+#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_RMASK 0x1
+#define QIB_7220_HwErrStatus_Reserved2_LSB 0x22
+#define QIB_7220_HwErrStatus_Reserved2_RMASK 0x3
+#define QIB_7220_HwErrStatus_PCIeBusParity_LSB 0x1F
+#define QIB_7220_HwErrStatus_PCIeBusParity_RMASK 0x7
+#define QIB_7220_HwErrStatus_PcieCplTimeout_LSB 0x1E
+#define QIB_7220_HwErrStatus_PcieCplTimeout_RMASK 0x1
+#define QIB_7220_HwErrStatus_PoisenedTLP_LSB 0x1D
+#define QIB_7220_HwErrStatus_PoisenedTLP_RMASK 0x1
+#define QIB_7220_HwErrStatus_SDmaMemReadErr_LSB 0x1C
+#define QIB_7220_HwErrStatus_SDmaMemReadErr_RMASK 0x1
+#define QIB_7220_HwErrStatus_Reserved3_LSB 0x8
+#define QIB_7220_HwErrStatus_Reserved3_RMASK 0xFFFFF
+#define QIB_7220_HwErrStatus_PCIeMemParity_LSB 0x0
+#define QIB_7220_HwErrStatus_PCIeMemParity_RMASK 0xFF
+
+#define QIB_7220_HwErrClear_OFFS 0xA8
+#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F
+#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1
+#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E
+#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1
+#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_LSB 0x3D
+#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_RMASK 0x1
+#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C
+#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1
+#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_LSB 0x3B
+#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_RMASK 0x1
+#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_LSB 0x3A
+#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_RMASK 0x1
+#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x39
+#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1
+#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x38
+#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1
+#define QIB_7220_HwErrClear_Reserved_LSB 0x37
+#define QIB_7220_HwErrClear_Reserved_RMASK 0x1
+#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_LSB 0x36
+#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1
+#define QIB_7220_HwErrClear_Reserved1_LSB 0x33
+#define QIB_7220_HwErrClear_Reserved1_RMASK 0x7
+#define QIB_7220_HwErrClear_RXEMemParityClear_LSB 0x2C
+#define QIB_7220_HwErrClear_RXEMemParityClear_RMASK 0x7F
+#define QIB_7220_HwErrClear_TXEMemParityClear_LSB 0x28
+#define QIB_7220_HwErrClear_TXEMemParityClear_RMASK 0xF
+#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_LSB 0x27
+#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_RMASK 0x1
+#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_LSB 0x26
+#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_RMASK 0x1
+#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_LSB 0x25
+#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_RMASK 0x1
+#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_LSB 0x24
+#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_RMASK 0x1
+#define QIB_7220_HwErrClear_Reserved2_LSB 0x22
+#define QIB_7220_HwErrClear_Reserved2_RMASK 0x3
+#define QIB_7220_HwErrClear_PCIeBusParityClr_LSB 0x1F
+#define QIB_7220_HwErrClear_PCIeBusParityClr_RMASK 0x7
+#define QIB_7220_HwErrClear_PcieCplTimeoutClear_LSB 0x1E
+#define QIB_7220_HwErrClear_PcieCplTimeoutClear_RMASK 0x1
+#define QIB_7220_HwErrClear_PoisonedTLPClear_LSB 0x1D
+#define QIB_7220_HwErrClear_PoisonedTLPClear_RMASK 0x1
+#define QIB_7220_HwErrClear_SDmaMemReadErrClear_LSB 0x1C
+#define QIB_7220_HwErrClear_SDmaMemReadErrClear_RMASK 0x1
+#define QIB_7220_HwErrClear_Reserved3_LSB 0x8
+#define QIB_7220_HwErrClear_Reserved3_RMASK 0xFFFFF
+#define QIB_7220_HwErrClear_PCIeMemParityClr_LSB 0x0
+#define QIB_7220_HwErrClear_PCIeMemParityClr_RMASK 0xFF
+
+#define QIB_7220_HwDiagCtrl_OFFS 0xB0
+#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F
+#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E
+#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_CounterWrEnable_LSB 0x3D
+#define QIB_7220_HwDiagCtrl_CounterWrEnable_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_CounterDisable_LSB 0x3C
+#define QIB_7220_HwDiagCtrl_CounterDisable_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_Reserved_LSB 0x33
+#define QIB_7220_HwDiagCtrl_Reserved_RMASK 0x1FF
+#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C
+#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F
+#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28
+#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF
+#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_LSB 0x27
+#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_LSB 0x26
+#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_LSB 0x25
+#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_LSB 0x24
+#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_Reserved1_LSB 0x23
+#define QIB_7220_HwDiagCtrl_Reserved1_RMASK 0x1
+#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F
+#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF
+#define QIB_7220_HwDiagCtrl_Reserved2_LSB 0x8
+#define QIB_7220_HwDiagCtrl_Reserved2_RMASK 0x7FFFFF
+#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_LSB 0x0
+#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_RMASK 0xFF
+
+#define QIB_7220_REG_0000B8_OFFS 0xB8
+
+#define QIB_7220_IBCStatus_OFFS 0xC0
+#define QIB_7220_IBCStatus_TxCreditOk_LSB 0x1F
+#define QIB_7220_IBCStatus_TxCreditOk_RMASK 0x1
+#define QIB_7220_IBCStatus_TxReady_LSB 0x1E
+#define QIB_7220_IBCStatus_TxReady_RMASK 0x1
+#define QIB_7220_IBCStatus_Reserved_LSB 0xE
+#define QIB_7220_IBCStatus_Reserved_RMASK 0xFFFF
+#define QIB_7220_IBCStatus_IBTxLaneReversed_LSB 0xD
+#define QIB_7220_IBCStatus_IBTxLaneReversed_RMASK 0x1
+#define QIB_7220_IBCStatus_IBRxLaneReversed_LSB 0xC
+#define QIB_7220_IBCStatus_IBRxLaneReversed_RMASK 0x1
+#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_LSB 0xB
+#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_RMASK 0x1
+#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_LSB 0xA
+#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_RMASK 0x1
+#define QIB_7220_IBCStatus_LinkWidthActive_LSB 0x9
+#define QIB_7220_IBCStatus_LinkWidthActive_RMASK 0x1
+#define QIB_7220_IBCStatus_LinkSpeedActive_LSB 0x8
+#define QIB_7220_IBCStatus_LinkSpeedActive_RMASK 0x1
+#define QIB_7220_IBCStatus_LinkState_LSB 0x5
+#define QIB_7220_IBCStatus_LinkState_RMASK 0x7
+#define QIB_7220_IBCStatus_LinkTrainingState_LSB 0x0
+#define QIB_7220_IBCStatus_LinkTrainingState_RMASK 0x1F
+
+#define QIB_7220_IBCCtrl_OFFS 0xC8
+#define QIB_7220_IBCCtrl_Loopback_LSB 0x3F
+#define QIB_7220_IBCCtrl_Loopback_RMASK 0x1
+#define QIB_7220_IBCCtrl_LinkDownDefaultState_LSB 0x3E
+#define QIB_7220_IBCCtrl_LinkDownDefaultState_RMASK 0x1
+#define QIB_7220_IBCCtrl_Reserved_LSB 0x2B
+#define QIB_7220_IBCCtrl_Reserved_RMASK 0x7FFFF
+#define QIB_7220_IBCCtrl_CreditScale_LSB 0x28
+#define QIB_7220_IBCCtrl_CreditScale_RMASK 0x7
+#define QIB_7220_IBCCtrl_OverrunThreshold_LSB 0x24
+#define QIB_7220_IBCCtrl_OverrunThreshold_RMASK 0xF
+#define QIB_7220_IBCCtrl_PhyerrThreshold_LSB 0x20
+#define QIB_7220_IBCCtrl_PhyerrThreshold_RMASK 0xF
+#define QIB_7220_IBCCtrl_MaxPktLen_LSB 0x15
+#define QIB_7220_IBCCtrl_MaxPktLen_RMASK 0x7FF
+#define QIB_7220_IBCCtrl_LinkCmd_LSB 0x13
+#define QIB_7220_IBCCtrl_LinkCmd_RMASK 0x3
+#define QIB_7220_IBCCtrl_LinkInitCmd_LSB 0x10
+#define QIB_7220_IBCCtrl_LinkInitCmd_RMASK 0x7
+#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_LSB 0x8
+#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF
+#define QIB_7220_IBCCtrl_FlowCtrlPeriod_LSB 0x0
+#define QIB_7220_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF
+
+#define QIB_7220_EXTStatus_OFFS 0xD0
+#define QIB_7220_EXTStatus_GPIOIn_LSB 0x30
+#define QIB_7220_EXTStatus_GPIOIn_RMASK 0xFFFF
+#define QIB_7220_EXTStatus_Reserved_LSB 0x20
+#define QIB_7220_EXTStatus_Reserved_RMASK 0xFFFF
+#define QIB_7220_EXTStatus_Reserved1_LSB 0x10
+#define QIB_7220_EXTStatus_Reserved1_RMASK 0xFFFF
+#define QIB_7220_EXTStatus_MemBISTDisabled_LSB 0xF
+#define QIB_7220_EXTStatus_MemBISTDisabled_RMASK 0x1
+#define QIB_7220_EXTStatus_MemBISTEndTest_LSB 0xE
+#define QIB_7220_EXTStatus_MemBISTEndTest_RMASK 0x1
+#define QIB_7220_EXTStatus_Reserved2_LSB 0x0
+#define QIB_7220_EXTStatus_Reserved2_RMASK 0x3FFF
+
+#define QIB_7220_EXTCtrl_OFFS 0xD8
+#define QIB_7220_EXTCtrl_GPIOOe_LSB 0x30
+#define QIB_7220_EXTCtrl_GPIOOe_RMASK 0xFFFF
+#define QIB_7220_EXTCtrl_GPIOInvert_LSB 0x20
+#define QIB_7220_EXTCtrl_GPIOInvert_RMASK 0xFFFF
+#define QIB_7220_EXTCtrl_Reserved_LSB 0x4
+#define QIB_7220_EXTCtrl_Reserved_RMASK 0xFFFFFFF
+#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_LSB 0x3
+#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1
+#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_LSB 0x2
+#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1
+#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_LSB 0x1
+#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1
+#define QIB_7220_EXTCtrl_LEDGblErrRedOff_LSB 0x0
+#define QIB_7220_EXTCtrl_LEDGblErrRedOff_RMASK 0x1
+
+#define QIB_7220_GPIOOut_OFFS 0xE0
+
+#define QIB_7220_GPIOMask_OFFS 0xE8
+
+#define QIB_7220_GPIOStatus_OFFS 0xF0
+
+#define QIB_7220_GPIOClear_OFFS 0xF8
+
+#define QIB_7220_RcvCtrl_OFFS 0x100
+#define QIB_7220_RcvCtrl_Reserved_LSB 0x27
+#define QIB_7220_RcvCtrl_Reserved_RMASK 0x1FFFFFF
+#define QIB_7220_RcvCtrl_RcvQPMapEnable_LSB 0x26
+#define QIB_7220_RcvCtrl_RcvQPMapEnable_RMASK 0x1
+#define QIB_7220_RcvCtrl_PortCfg_LSB 0x24
+#define QIB_7220_RcvCtrl_PortCfg_RMASK 0x3
+#define QIB_7220_RcvCtrl_TailUpd_LSB 0x23
+#define QIB_7220_RcvCtrl_TailUpd_RMASK 0x1
+#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_LSB 0x22
+#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1
+#define QIB_7220_RcvCtrl_IntrAvail_LSB 0x11
+#define QIB_7220_RcvCtrl_IntrAvail_RMASK 0x1FFFF
+#define QIB_7220_RcvCtrl_PortEnable_LSB 0x0
+#define QIB_7220_RcvCtrl_PortEnable_RMASK 0x1FFFF
+
+#define QIB_7220_RcvBTHQP_OFFS 0x108
+#define QIB_7220_RcvBTHQP_Reserved_LSB 0x18
+#define QIB_7220_RcvBTHQP_Reserved_RMASK 0xFF
+#define QIB_7220_RcvBTHQP_RcvBTHQP_LSB 0x0
+#define QIB_7220_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF
+
+#define QIB_7220_RcvHdrSize_OFFS 0x110
+
+#define QIB_7220_RcvHdrCnt_OFFS 0x118
+
+#define QIB_7220_RcvHdrEntSize_OFFS 0x120
+
+#define QIB_7220_RcvTIDBase_OFFS 0x128
+
+#define QIB_7220_RcvTIDCnt_OFFS 0x130
+
+#define QIB_7220_RcvEgrBase_OFFS 0x138
+
+#define QIB_7220_RcvEgrCnt_OFFS 0x140
+
+#define QIB_7220_RcvBufBase_OFFS 0x148
+
+#define QIB_7220_RcvBufSize_OFFS 0x150
+
+#define QIB_7220_RxIntMemBase_OFFS 0x158
+
+#define QIB_7220_RxIntMemSize_OFFS 0x160
+
+#define QIB_7220_RcvPartitionKey_OFFS 0x168
+
+#define QIB_7220_RcvQPMulticastPort_OFFS 0x170
+#define QIB_7220_RcvQPMulticastPort_Reserved_LSB 0x5
+#define QIB_7220_RcvQPMulticastPort_Reserved_RMASK 0x7FFFFFFFFFFFFFF
+#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_LSB 0x0
+#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_RMASK 0x1F
+
+#define QIB_7220_RcvPktLEDCnt_OFFS 0x178
+#define QIB_7220_RcvPktLEDCnt_ONperiod_LSB 0x20
+#define QIB_7220_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF
+#define QIB_7220_RcvPktLEDCnt_OFFperiod_LSB 0x0
+#define QIB_7220_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF
+
+#define QIB_7220_IBCDDRCtrl_OFFS 0x180
+#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_LSB 0x30
+#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_RMASK 0xFFFF
+#define QIB_7220_IBCDDRCtrl_IB_DLID_LSB 0x20
+#define QIB_7220_IBCDDRCtrl_IB_DLID_RMASK 0xFFFF
+#define QIB_7220_IBCDDRCtrl_Reserved_LSB 0x1B
+#define QIB_7220_IBCDDRCtrl_Reserved_RMASK 0x1F
+#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_LSB 0x1A
+#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_LSB 0x12
+#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_RMASK 0xFF
+#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_LSB 0x11
+#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_LSB 0x10
+#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_SD_DDS_LSB 0xC
+#define QIB_7220_IBCDDRCtrl_SD_DDS_RMASK 0xF
+#define QIB_7220_IBCDDRCtrl_SD_DDSV_LSB 0xB
+#define QIB_7220_IBCDDRCtrl_SD_DDSV_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_LSB 0xA
+#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_LSB 0x9
+#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_LSB 0x8
+#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_LSB 0x7
+#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_LSB 0x5
+#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_RMASK 0x3
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_LSB 0x4
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_LSB 0x3
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_LSB 0x2
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_LSB 0x1
+#define QIB_7220_IBCDDRCtrl_SD_SPEED_RMASK 0x1
+#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_LSB 0x0
+#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_RMASK 0x1
+
+#define QIB_7220_HRTBT_GUID_OFFS 0x188
+
+#define QIB_7220_IBCDDRCtrl2_OFFS 0x1A0
+#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_LSB 0x5
+#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_RMASK 0x1F
+#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_LSB 0x0
+#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_RMASK 0x1F
+
+#define QIB_7220_IBCDDRStatus_OFFS 0x1A8
+#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_LSB 0x24
+#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_RMASK 0x1
+#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_LSB 0x20
+#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_RMASK 0xF
+#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_LSB 0x1E
+#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_RMASK 0x3
+#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_LSB 0x1A
+#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_RMASK 0xF
+#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_LSB 0x0
+#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_RMASK 0x3FFFFFF
+
+#define QIB_7220_JIntReload_OFFS 0x1B0
+#define QIB_7220_JIntReload_J_limit_reload_LSB 0x10
+#define QIB_7220_JIntReload_J_limit_reload_RMASK 0xFFFF
+#define QIB_7220_JIntReload_J_reload_LSB 0x0
+#define QIB_7220_JIntReload_J_reload_RMASK 0xFFFF
+
+#define QIB_7220_IBNCModeCtrl_OFFS 0x1B8
+#define QIB_7220_IBNCModeCtrl_Reserved_LSB 0x1A
+#define QIB_7220_IBNCModeCtrl_Reserved_RMASK 0x3FFFFFFFFF
+#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_LSB 0x11
+#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_RMASK 0x1FF
+#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_LSB 0x8
+#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_RMASK 0x1FF
+#define QIB_7220_IBNCModeCtrl_Reserved1_LSB 0x3
+#define QIB_7220_IBNCModeCtrl_Reserved1_RMASK 0x1F
+#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_LSB 0x2
+#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_RMASK 0x1
+#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_LSB 0x1
+#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_RMASK 0x1
+#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_LSB 0x0
+#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_RMASK 0x1
+
+#define QIB_7220_SendCtrl_OFFS 0x1C0
+#define QIB_7220_SendCtrl_Disarm_LSB 0x1F
+#define QIB_7220_SendCtrl_Disarm_RMASK 0x1
+#define QIB_7220_SendCtrl_Reserved_LSB 0x1D
+#define QIB_7220_SendCtrl_Reserved_RMASK 0x3
+#define QIB_7220_SendCtrl_AvailUpdThld_LSB 0x18
+#define QIB_7220_SendCtrl_AvailUpdThld_RMASK 0x1F
+#define QIB_7220_SendCtrl_DisarmPIOBuf_LSB 0x10
+#define QIB_7220_SendCtrl_DisarmPIOBuf_RMASK 0xFF
+#define QIB_7220_SendCtrl_Reserved1_LSB 0xD
+#define QIB_7220_SendCtrl_Reserved1_RMASK 0x7
+#define QIB_7220_SendCtrl_SDmaHalt_LSB 0xC
+#define QIB_7220_SendCtrl_SDmaHalt_RMASK 0x1
+#define QIB_7220_SendCtrl_SDmaEnable_LSB 0xB
+#define QIB_7220_SendCtrl_SDmaEnable_RMASK 0x1
+#define QIB_7220_SendCtrl_SDmaSingleDescriptor_LSB 0xA
+#define QIB_7220_SendCtrl_SDmaSingleDescriptor_RMASK 0x1
+#define QIB_7220_SendCtrl_SDmaIntEnable_LSB 0x9
+#define QIB_7220_SendCtrl_SDmaIntEnable_RMASK 0x1
+#define QIB_7220_SendCtrl_Reserved2_LSB 0x5
+#define QIB_7220_SendCtrl_Reserved2_RMASK 0xF
+#define QIB_7220_SendCtrl_SSpecialTriggerEn_LSB 0x4
+#define QIB_7220_SendCtrl_SSpecialTriggerEn_RMASK 0x1
+#define QIB_7220_SendCtrl_SPioEnable_LSB 0x3
+#define QIB_7220_SendCtrl_SPioEnable_RMASK 0x1
+#define QIB_7220_SendCtrl_SendBufAvailUpd_LSB 0x2
+#define QIB_7220_SendCtrl_SendBufAvailUpd_RMASK 0x1
+#define QIB_7220_SendCtrl_SendIntBufAvail_LSB 0x1
+#define QIB_7220_SendCtrl_SendIntBufAvail_RMASK 0x1
+#define QIB_7220_SendCtrl_Abort_LSB 0x0
+#define QIB_7220_SendCtrl_Abort_RMASK 0x1
+
+#define QIB_7220_SendBufBase_OFFS 0x1C8
+#define QIB_7220_SendBufBase_Reserved_LSB 0x35
+#define QIB_7220_SendBufBase_Reserved_RMASK 0x7FF
+#define QIB_7220_SendBufBase_BaseAddr_LargePIO_LSB 0x20
+#define QIB_7220_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF
+#define QIB_7220_SendBufBase_Reserved1_LSB 0x15
+#define QIB_7220_SendBufBase_Reserved1_RMASK 0x7FF
+#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_LSB 0x0
+#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF
+
+#define QIB_7220_SendBufSize_OFFS 0x1D0
+#define QIB_7220_SendBufSize_Reserved_LSB 0x2D
+#define QIB_7220_SendBufSize_Reserved_RMASK 0xFFFFF
+#define QIB_7220_SendBufSize_Size_LargePIO_LSB 0x20
+#define QIB_7220_SendBufSize_Size_LargePIO_RMASK 0x1FFF
+#define QIB_7220_SendBufSize_Reserved1_LSB 0xC
+#define QIB_7220_SendBufSize_Reserved1_RMASK 0xFFFFF
+#define QIB_7220_SendBufSize_Size_SmallPIO_LSB 0x0
+#define QIB_7220_SendBufSize_Size_SmallPIO_RMASK 0xFFF
+
+#define QIB_7220_SendBufCnt_OFFS 0x1D8
+#define QIB_7220_SendBufCnt_Reserved_LSB 0x24
+#define QIB_7220_SendBufCnt_Reserved_RMASK 0xFFFFFFF
+#define QIB_7220_SendBufCnt_Num_LargeBuffers_LSB 0x20
+#define QIB_7220_SendBufCnt_Num_LargeBuffers_RMASK 0xF
+#define QIB_7220_SendBufCnt_Reserved1_LSB 0x9
+#define QIB_7220_SendBufCnt_Reserved1_RMASK 0x7FFFFF
+#define QIB_7220_SendBufCnt_Num_SmallBuffers_LSB 0x0
+#define QIB_7220_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF
+
+#define QIB_7220_SendBufAvailAddr_OFFS 0x1E0
+#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6
+#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF
+#define QIB_7220_SendBufAvailAddr_Reserved_LSB 0x0
+#define QIB_7220_SendBufAvailAddr_Reserved_RMASK 0x3F
+
+#define QIB_7220_TxIntMemBase_OFFS 0x1E8
+
+#define QIB_7220_TxIntMemSize_OFFS 0x1F0
+
+#define QIB_7220_SendDmaBase_OFFS 0x1F8
+#define QIB_7220_SendDmaBase_Reserved_LSB 0x30
+#define QIB_7220_SendDmaBase_Reserved_RMASK 0xFFFF
+#define QIB_7220_SendDmaBase_SendDmaBase_LSB 0x0
+#define QIB_7220_SendDmaBase_SendDmaBase_RMASK 0xFFFFFFFFFFFF
+
+#define QIB_7220_SendDmaLenGen_OFFS 0x200
+#define QIB_7220_SendDmaLenGen_Reserved_LSB 0x13
+#define QIB_7220_SendDmaLenGen_Reserved_RMASK 0x1FFFFFFFFFFF
+#define QIB_7220_SendDmaLenGen_Generation_LSB 0x10
+#define QIB_7220_SendDmaLenGen_Generation_MSB 0x12
+#define QIB_7220_SendDmaLenGen_Generation_RMASK 0x7
+#define QIB_7220_SendDmaLenGen_Length_LSB 0x0
+#define QIB_7220_SendDmaLenGen_Length_RMASK 0xFFFF
+
+#define QIB_7220_SendDmaTail_OFFS 0x208
+#define QIB_7220_SendDmaTail_Reserved_LSB 0x10
+#define QIB_7220_SendDmaTail_Reserved_RMASK 0xFFFFFFFFFFFF
+#define QIB_7220_SendDmaTail_SendDmaTail_LSB 0x0
+#define QIB_7220_SendDmaTail_SendDmaTail_RMASK 0xFFFF
+
+#define QIB_7220_SendDmaHead_OFFS 0x210
+#define QIB_7220_SendDmaHead_Reserved_LSB 0x30
+#define QIB_7220_SendDmaHead_Reserved_RMASK 0xFFFF
+#define QIB_7220_SendDmaHead_InternalSendDmaHead_LSB 0x20
+#define QIB_7220_SendDmaHead_InternalSendDmaHead_RMASK 0xFFFF
+#define QIB_7220_SendDmaHead_Reserved1_LSB 0x10
+#define QIB_7220_SendDmaHead_Reserved1_RMASK 0xFFFF
+#define QIB_7220_SendDmaHead_SendDmaHead_LSB 0x0
+#define QIB_7220_SendDmaHead_SendDmaHead_RMASK 0xFFFF
+
+#define QIB_7220_SendDmaHeadAddr_OFFS 0x218
+#define QIB_7220_SendDmaHeadAddr_Reserved_LSB 0x30
+#define QIB_7220_SendDmaHeadAddr_Reserved_RMASK 0xFFFF
+#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_LSB 0x0
+#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_RMASK 0xFFFFFFFFFFFF
+
+#define QIB_7220_SendDmaBufMask0_OFFS 0x220
+#define QIB_7220_SendDmaBufMask0_BufMask_63_0_LSB 0x0
+#define QIB_7220_SendDmaBufMask0_BufMask_63_0_RMASK 0x0
+
+#define QIB_7220_SendDmaStatus_OFFS 0x238
+#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_LSB 0x3F
+#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_RMASK 0x1
+#define QIB_7220_SendDmaStatus_AbortInProg_LSB 0x3E
+#define QIB_7220_SendDmaStatus_AbortInProg_RMASK 0x1
+#define QIB_7220_SendDmaStatus_InternalSDmaEnable_LSB 0x3D
+#define QIB_7220_SendDmaStatus_InternalSDmaEnable_RMASK 0x1
+#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_LSB 0x2F
+#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_RMASK 0x3FFF
+#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_LSB 0x28
+#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_RMASK 0x7F
+#define QIB_7220_SendDmaStatus_RpyTag_7_0_LSB 0x20
+#define QIB_7220_SendDmaStatus_RpyTag_7_0_RMASK 0xFF
+#define QIB_7220_SendDmaStatus_ScbFull_LSB 0x1F
+#define QIB_7220_SendDmaStatus_ScbFull_RMASK 0x1
+#define QIB_7220_SendDmaStatus_ScbEmpty_LSB 0x1E
+#define QIB_7220_SendDmaStatus_ScbEmpty_RMASK 0x1
+#define QIB_7220_SendDmaStatus_ScbEntryValid_LSB 0x1D
+#define QIB_7220_SendDmaStatus_ScbEntryValid_RMASK 0x1
+#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_LSB 0x1C
+#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_RMASK 0x1
+#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_LSB 0x1B
+#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_RMASK 0x1
+#define QIB_7220_SendDmaStatus_SplFifoDisarmed_LSB 0x1A
+#define QIB_7220_SendDmaStatus_SplFifoDisarmed_RMASK 0x1
+#define QIB_7220_SendDmaStatus_SplFifoEmpty_LSB 0x19
+#define QIB_7220_SendDmaStatus_SplFifoEmpty_RMASK 0x1
+#define QIB_7220_SendDmaStatus_SplFifoFull_LSB 0x18
+#define QIB_7220_SendDmaStatus_SplFifoFull_RMASK 0x1
+#define QIB_7220_SendDmaStatus_SplFifoBufNum_LSB 0x10
+#define QIB_7220_SendDmaStatus_SplFifoBufNum_RMASK 0xFF
+#define QIB_7220_SendDmaStatus_SplFifoDescIndex_LSB 0x0
+#define QIB_7220_SendDmaStatus_SplFifoDescIndex_RMASK 0xFFFF
+
+#define QIB_7220_SendBufErr0_OFFS 0x240
+#define QIB_7220_SendBufErr0_SendBufErr_63_0_LSB 0x0
+#define QIB_7220_SendBufErr0_SendBufErr_63_0_RMASK 0x0
+
+#define QIB_7220_RcvHdrAddr0_OFFS 0x270
+#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2
+#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF
+#define QIB_7220_RcvHdrAddr0_Reserved_LSB 0x0
+#define QIB_7220_RcvHdrAddr0_Reserved_RMASK 0x3
+
+#define QIB_7220_RcvHdrTailAddr0_OFFS 0x300
+#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2
+#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF
+#define QIB_7220_RcvHdrTailAddr0_Reserved_LSB 0x0
+#define QIB_7220_RcvHdrTailAddr0_Reserved_RMASK 0x3
+
+#define QIB_7220_ibsd_epb_access_ctrl_OFFS 0x3C0
+#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_LSB 0x8
+#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_RMASK 0x1
+#define QIB_7220_ibsd_epb_access_ctrl_Reserved_LSB 0x1
+#define QIB_7220_ibsd_epb_access_ctrl_Reserved_RMASK 0x7F
+#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_LSB 0x0
+#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_RMASK 0x1
+
+#define QIB_7220_ibsd_epb_transaction_reg_OFFS 0x3C8
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_LSB 0x1F
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_RMASK 0x1
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_LSB 0x1E
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_RMASK 0x1
+#define QIB_7220_ibsd_epb_transaction_reg_Reserved_LSB 0x1D
+#define QIB_7220_ibsd_epb_transaction_reg_Reserved_RMASK 0x1
+#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_LSB 0x1C
+#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_RMASK 0x1
+#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_LSB 0x1B
+#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_RMASK 0x1
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_LSB 0x19
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_RMASK 0x3
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_LSB 0x18
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_RMASK 0x1
+#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_LSB 0x17
+#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_RMASK 0x1
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_LSB 0x8
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_RMASK 0x7FFF
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_LSB 0x0
+#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_RMASK 0xFF
+
+#define QIB_7220_XGXSCfg_OFFS 0x3D8
+#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_LSB 0x3F
+#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_RMASK 0x1
+#define QIB_7220_XGXSCfg_Reserved_LSB 0x13
+#define QIB_7220_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFFF
+#define QIB_7220_XGXSCfg_link_sync_mask_LSB 0x9
+#define QIB_7220_XGXSCfg_link_sync_mask_RMASK 0x3FF
+#define QIB_7220_XGXSCfg_Reserved1_LSB 0x3
+#define QIB_7220_XGXSCfg_Reserved1_RMASK 0x3F
+#define QIB_7220_XGXSCfg_xcv_reset_LSB 0x2
+#define QIB_7220_XGXSCfg_xcv_reset_RMASK 0x1
+#define QIB_7220_XGXSCfg_Reserved2_LSB 0x1
+#define QIB_7220_XGXSCfg_Reserved2_RMASK 0x1
+#define QIB_7220_XGXSCfg_tx_rx_reset_LSB 0x0
+#define QIB_7220_XGXSCfg_tx_rx_reset_RMASK 0x1
+
+#define QIB_7220_IBSerDesCtrl_OFFS 0x3E0
+#define QIB_7220_IBSerDesCtrl_Reserved_LSB 0x2D
+#define QIB_7220_IBSerDesCtrl_Reserved_RMASK 0x7FFFF
+#define QIB_7220_IBSerDesCtrl_INT_uC_LSB 0x2C
+#define QIB_7220_IBSerDesCtrl_INT_uC_RMASK 0x1
+#define QIB_7220_IBSerDesCtrl_CKSEL_uC_LSB 0x2A
+#define QIB_7220_IBSerDesCtrl_CKSEL_uC_RMASK 0x3
+#define QIB_7220_IBSerDesCtrl_PLLN_LSB 0x28
+#define QIB_7220_IBSerDesCtrl_PLLN_RMASK 0x3
+#define QIB_7220_IBSerDesCtrl_PLLM_LSB 0x25
+#define QIB_7220_IBSerDesCtrl_PLLM_RMASK 0x7
+#define QIB_7220_IBSerDesCtrl_TXOBPD_LSB 0x24
+#define QIB_7220_IBSerDesCtrl_TXOBPD_RMASK 0x1
+#define QIB_7220_IBSerDesCtrl_TWC_LSB 0x23
+#define QIB_7220_IBSerDesCtrl_TWC_RMASK 0x1
+#define QIB_7220_IBSerDesCtrl_RXIDLE_LSB 0x22
+#define QIB_7220_IBSerDesCtrl_RXIDLE_RMASK 0x1
+#define QIB_7220_IBSerDesCtrl_RXINV_LSB 0x21
+#define QIB_7220_IBSerDesCtrl_RXINV_RMASK 0x1
+#define QIB_7220_IBSerDesCtrl_TXINV_LSB 0x20
+#define QIB_7220_IBSerDesCtrl_TXINV_RMASK 0x1
+#define QIB_7220_IBSerDesCtrl_Reserved1_LSB 0x12
+#define QIB_7220_IBSerDesCtrl_Reserved1_RMASK 0x3FFF
+#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_LSB 0xD
+#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_RMASK 0x1F
+#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_LSB 0x8
+#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_RMASK 0x1F
+#define QIB_7220_IBSerDesCtrl_Reserved2_LSB 0x1
+#define QIB_7220_IBSerDesCtrl_Reserved2_RMASK 0x7F
+#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_LSB 0x0
+#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_RMASK 0x1
+
+#define QIB_7220_pciesd_epb_access_ctrl_OFFS 0x400
+#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_LSB 0x8
+#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_RMASK 0x1
+#define QIB_7220_pciesd_epb_access_ctrl_Reserved_LSB 0x3
+#define QIB_7220_pciesd_epb_access_ctrl_Reserved_RMASK 0x1F
+#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_LSB 0x1
+#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_RMASK 0x3
+#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_LSB 0x0
+#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_RMASK 0x1
+
+#define QIB_7220_pciesd_epb_transaction_reg_OFFS 0x408
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_LSB 0x1F
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_RMASK 0x1
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_LSB 0x1E
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_RMASK 0x1
+#define QIB_7220_pciesd_epb_transaction_reg_Reserved_LSB 0x1D
+#define QIB_7220_pciesd_epb_transaction_reg_Reserved_RMASK 0x1
+#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_LSB 0x1C
+#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_RMASK 0x1
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_LSB 0x19
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_RMASK 0x7
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_LSB 0x18
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_RMASK 0x1
+#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_LSB 0x17
+#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_RMASK 0x1
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_LSB 0x8
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_RMASK 0x7FFF
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_LSB 0x0
+#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_RMASK 0xFF
+
+#define QIB_7220_SerDes_DDSRXEQ0_OFFS 0x500
+#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_LSB 0x4
+#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_RMASK 0x3F
+#define QIB_7220_SerDes_DDSRXEQ0_element_num_LSB 0x0
+#define QIB_7220_SerDes_DDSRXEQ0_element_num_RMASK 0xF
+
+#define QIB_7220_LBIntCnt_OFFS 0x13000
+
+#define QIB_7220_LBFlowStallCnt_OFFS 0x13008
+
+#define QIB_7220_TxSDmaDescCnt_OFFS 0x13010
+
+#define QIB_7220_TxUnsupVLErrCnt_OFFS 0x13018
+
+#define QIB_7220_TxDataPktCnt_OFFS 0x13020
+
+#define QIB_7220_TxFlowPktCnt_OFFS 0x13028
+
+#define QIB_7220_TxDwordCnt_OFFS 0x13030
+
+#define QIB_7220_TxLenErrCnt_OFFS 0x13038
+
+#define QIB_7220_TxMaxMinLenErrCnt_OFFS 0x13040
+
+#define QIB_7220_TxUnderrunCnt_OFFS 0x13048
+
+#define QIB_7220_TxFlowStallCnt_OFFS 0x13050
+
+#define QIB_7220_TxDroppedPktCnt_OFFS 0x13058
+
+#define QIB_7220_RxDroppedPktCnt_OFFS 0x13060
+
+#define QIB_7220_RxDataPktCnt_OFFS 0x13068
+
+#define QIB_7220_RxFlowPktCnt_OFFS 0x13070
+
+#define QIB_7220_RxDwordCnt_OFFS 0x13078
+
+#define QIB_7220_RxLenErrCnt_OFFS 0x13080
+
+#define QIB_7220_RxMaxMinLenErrCnt_OFFS 0x13088
+
+#define QIB_7220_RxICRCErrCnt_OFFS 0x13090
+
+#define QIB_7220_RxVCRCErrCnt_OFFS 0x13098
+
+#define QIB_7220_RxFlowCtrlViolCnt_OFFS 0x130A0
+
+#define QIB_7220_RxVersionErrCnt_OFFS 0x130A8
+
+#define QIB_7220_RxLinkMalformCnt_OFFS 0x130B0
+
+#define QIB_7220_RxEBPCnt_OFFS 0x130B8
+
+#define QIB_7220_RxLPCRCErrCnt_OFFS 0x130C0
+
+#define QIB_7220_RxBufOvflCnt_OFFS 0x130C8
+
+#define QIB_7220_RxTIDFullErrCnt_OFFS 0x130D0
+
+#define QIB_7220_RxTIDValidErrCnt_OFFS 0x130D8
+
+#define QIB_7220_RxPKeyMismatchCnt_OFFS 0x130E0
+
+#define QIB_7220_RxP0HdrEgrOvflCnt_OFFS 0x130E8
+
+#define QIB_7220_IBStatusChangeCnt_OFFS 0x13170
+
+#define QIB_7220_IBLinkErrRecoveryCnt_OFFS 0x13178
+
+#define QIB_7220_IBLinkDownedCnt_OFFS 0x13180
+
+#define QIB_7220_IBSymbolErrCnt_OFFS 0x13188
+
+#define QIB_7220_RxVL15DroppedPktCnt_OFFS 0x13190
+
+#define QIB_7220_RxOtherLocalPhyErrCnt_OFFS 0x13198
+
+#define QIB_7220_PcieRetryBufDiagQwordCnt_OFFS 0x131A0
+
+#define QIB_7220_ExcessBufferOvflCnt_OFFS 0x131A8
+
+#define QIB_7220_LocalLinkIntegrityErrCnt_OFFS 0x131B0
+
+#define QIB_7220_RxVlErrCnt_OFFS 0x131B8
+
+#define QIB_7220_RxDlidFltrCnt_OFFS 0x131C0
+
+#define QIB_7220_CNT_0131C8_OFFS 0x131C8
+
+#define QIB_7220_PSStat_OFFS 0x13200
+
+#define QIB_7220_PSStart_OFFS 0x13208
+
+#define QIB_7220_PSInterval_OFFS 0x13210
+
+#define QIB_7220_PSRcvDataCount_OFFS 0x13218
+
+#define QIB_7220_PSRcvPktsCount_OFFS 0x13220
+
+#define QIB_7220_PSXmitDataCount_OFFS 0x13228
+
+#define QIB_7220_PSXmitPktsCount_OFFS 0x13230
+
+#define QIB_7220_PSXmitWaitCount_OFFS 0x13238
+
+#define QIB_7220_CNT_013240_OFFS 0x13240
+
+#define QIB_7220_RcvEgrArray_OFFS 0x14000
+
+#define QIB_7220_MEM_038000_OFFS 0x38000
+
+#define QIB_7220_RcvTIDArray0_OFFS 0x53000
+
+#define QIB_7220_PIOLaunchFIFO_OFFS 0x64000
+
+#define QIB_7220_MEM_064480_OFFS 0x64480
+
+#define QIB_7220_SendPIOpbcCache_OFFS 0x64800
+
+#define QIB_7220_MEM_064C80_OFFS 0x64C80
+
+#define QIB_7220_PreLaunchFIFO_OFFS 0x65000
+
+#define QIB_7220_MEM_065080_OFFS 0x65080
+
+#define QIB_7220_ScoreBoard_OFFS 0x65400
+
+#define QIB_7220_MEM_065440_OFFS 0x65440
+
+#define QIB_7220_DescriptorFIFO_OFFS 0x65800
+
+#define QIB_7220_MEM_065880_OFFS 0x65880
+
+#define QIB_7220_RcvBuf1_OFFS 0x72000
+
+#define QIB_7220_MEM_074800_OFFS 0x74800
+
+#define QIB_7220_RcvBuf2_OFFS 0x75000
+
+#define QIB_7220_MEM_076400_OFFS 0x76400
+
+#define QIB_7220_RcvFlags_OFFS 0x77000
+
+#define QIB_7220_MEM_078400_OFFS 0x78400
+
+#define QIB_7220_RcvLookupBuf1_OFFS 0x79000
+
+#define QIB_7220_MEM_07A400_OFFS 0x7A400
+
+#define QIB_7220_RcvDMADatBuf_OFFS 0x7B000
+
+#define QIB_7220_RcvDMAHdrBuf_OFFS 0x7B800
+
+#define QIB_7220_MiscRXEIntMem_OFFS 0x7C000
+
+#define QIB_7220_MEM_07D400_OFFS 0x7D400
+
+#define QIB_7220_PCIERcvBuf_OFFS 0x80000
+
+#define QIB_7220_PCIERetryBuf_OFFS 0x84000
+
+#define QIB_7220_PCIERcvBufRdToWrAddr_OFFS 0x88000
+
+#define QIB_7220_PCIECplBuf_OFFS 0x90000
+
+#define QIB_7220_IBSerDesMappTable_OFFS 0x94000
+
+#define QIB_7220_MEM_095000_OFFS 0x95000
+
+#define QIB_7220_SendBuf0_MA_OFFS 0x100000
+
+#define QIB_7220_MEM_1A0000_OFFS 0x1A0000
diff --git a/drivers/infiniband/hw/qib/qib_7322_regs.h b/drivers/infiniband/hw/qib/qib_7322_regs.h
new file mode 100644
index 00000000000..32dc81ff8d4
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_7322_regs.h
@@ -0,0 +1,3163 @@
+/*
+ * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is mechanically generated from RTL. Any hand-edits will be lost! */
+
+#define QIB_7322_Revision_OFFS 0x0
+#define QIB_7322_Revision_DEF 0x0000000002010601
+#define QIB_7322_Revision_R_Simulator_LSB 0x3F
+#define QIB_7322_Revision_R_Simulator_MSB 0x3F
+#define QIB_7322_Revision_R_Simulator_RMASK 0x1
+#define QIB_7322_Revision_R_Emulation_LSB 0x3E
+#define QIB_7322_Revision_R_Emulation_MSB 0x3E
+#define QIB_7322_Revision_R_Emulation_RMASK 0x1
+#define QIB_7322_Revision_R_Emulation_Revcode_LSB 0x28
+#define QIB_7322_Revision_R_Emulation_Revcode_MSB 0x3D
+#define QIB_7322_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF
+#define QIB_7322_Revision_BoardID_LSB 0x20
+#define QIB_7322_Revision_BoardID_MSB 0x27
+#define QIB_7322_Revision_BoardID_RMASK 0xFF
+#define QIB_7322_Revision_R_SW_LSB 0x18
+#define QIB_7322_Revision_R_SW_MSB 0x1F
+#define QIB_7322_Revision_R_SW_RMASK 0xFF
+#define QIB_7322_Revision_R_Arch_LSB 0x10
+#define QIB_7322_Revision_R_Arch_MSB 0x17
+#define QIB_7322_Revision_R_Arch_RMASK 0xFF
+#define QIB_7322_Revision_R_ChipRevMajor_LSB 0x8
+#define QIB_7322_Revision_R_ChipRevMajor_MSB 0xF
+#define QIB_7322_Revision_R_ChipRevMajor_RMASK 0xFF
+#define QIB_7322_Revision_R_ChipRevMinor_LSB 0x0
+#define QIB_7322_Revision_R_ChipRevMinor_MSB 0x7
+#define QIB_7322_Revision_R_ChipRevMinor_RMASK 0xFF
+
+#define QIB_7322_Control_OFFS 0x8
+#define QIB_7322_Control_DEF 0x0000000000000000
+#define QIB_7322_Control_PCIECplQDiagEn_LSB 0x6
+#define QIB_7322_Control_PCIECplQDiagEn_MSB 0x6
+#define QIB_7322_Control_PCIECplQDiagEn_RMASK 0x1
+#define QIB_7322_Control_PCIEPostQDiagEn_LSB 0x5
+#define QIB_7322_Control_PCIEPostQDiagEn_MSB 0x5
+#define QIB_7322_Control_PCIEPostQDiagEn_RMASK 0x1
+#define QIB_7322_Control_SDmaDescFetchPriorityEn_LSB 0x4
+#define QIB_7322_Control_SDmaDescFetchPriorityEn_MSB 0x4
+#define QIB_7322_Control_SDmaDescFetchPriorityEn_RMASK 0x1
+#define QIB_7322_Control_PCIERetryBufDiagEn_LSB 0x3
+#define QIB_7322_Control_PCIERetryBufDiagEn_MSB 0x3
+#define QIB_7322_Control_PCIERetryBufDiagEn_RMASK 0x1
+#define QIB_7322_Control_FreezeMode_LSB 0x1
+#define QIB_7322_Control_FreezeMode_MSB 0x1
+#define QIB_7322_Control_FreezeMode_RMASK 0x1
+#define QIB_7322_Control_SyncReset_LSB 0x0
+#define QIB_7322_Control_SyncReset_MSB 0x0
+#define QIB_7322_Control_SyncReset_RMASK 0x1
+
+#define QIB_7322_PageAlign_OFFS 0x10
+#define QIB_7322_PageAlign_DEF 0x0000000000001000
+
+#define QIB_7322_ContextCnt_OFFS 0x18
+#define QIB_7322_ContextCnt_DEF 0x0000000000000012
+
+#define QIB_7322_Scratch_OFFS 0x20
+#define QIB_7322_Scratch_DEF 0x0000000000000000
+
+#define QIB_7322_CntrRegBase_OFFS 0x28
+#define QIB_7322_CntrRegBase_DEF 0x0000000000011000
+
+#define QIB_7322_SendRegBase_OFFS 0x30
+#define QIB_7322_SendRegBase_DEF 0x0000000000003000
+
+#define QIB_7322_UserRegBase_OFFS 0x38
+#define QIB_7322_UserRegBase_DEF 0x0000000000200000
+
+#define QIB_7322_IntMask_OFFS 0x68
+#define QIB_7322_IntMask_DEF 0x0000000000000000
+#define QIB_7322_IntMask_SDmaIntMask_1_LSB 0x3F
+#define QIB_7322_IntMask_SDmaIntMask_1_MSB 0x3F
+#define QIB_7322_IntMask_SDmaIntMask_1_RMASK 0x1
+#define QIB_7322_IntMask_SDmaIntMask_0_LSB 0x3E
+#define QIB_7322_IntMask_SDmaIntMask_0_MSB 0x3E
+#define QIB_7322_IntMask_SDmaIntMask_0_RMASK 0x1
+#define QIB_7322_IntMask_SDmaProgressIntMask_1_LSB 0x3D
+#define QIB_7322_IntMask_SDmaProgressIntMask_1_MSB 0x3D
+#define QIB_7322_IntMask_SDmaProgressIntMask_1_RMASK 0x1
+#define QIB_7322_IntMask_SDmaProgressIntMask_0_LSB 0x3C
+#define QIB_7322_IntMask_SDmaProgressIntMask_0_MSB 0x3C
+#define QIB_7322_IntMask_SDmaProgressIntMask_0_RMASK 0x1
+#define QIB_7322_IntMask_SDmaIdleIntMask_1_LSB 0x3B
+#define QIB_7322_IntMask_SDmaIdleIntMask_1_MSB 0x3B
+#define QIB_7322_IntMask_SDmaIdleIntMask_1_RMASK 0x1
+#define QIB_7322_IntMask_SDmaIdleIntMask_0_LSB 0x3A
+#define QIB_7322_IntMask_SDmaIdleIntMask_0_MSB 0x3A
+#define QIB_7322_IntMask_SDmaIdleIntMask_0_RMASK 0x1
+#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_LSB 0x39
+#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_MSB 0x39
+#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_RMASK 0x1
+#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_LSB 0x38
+#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_MSB 0x38
+#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg17IntMask_LSB 0x31
+#define QIB_7322_IntMask_RcvUrg17IntMask_MSB 0x31
+#define QIB_7322_IntMask_RcvUrg17IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg16IntMask_LSB 0x30
+#define QIB_7322_IntMask_RcvUrg16IntMask_MSB 0x30
+#define QIB_7322_IntMask_RcvUrg16IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg15IntMask_LSB 0x2F
+#define QIB_7322_IntMask_RcvUrg15IntMask_MSB 0x2F
+#define QIB_7322_IntMask_RcvUrg15IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg14IntMask_LSB 0x2E
+#define QIB_7322_IntMask_RcvUrg14IntMask_MSB 0x2E
+#define QIB_7322_IntMask_RcvUrg14IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg13IntMask_LSB 0x2D
+#define QIB_7322_IntMask_RcvUrg13IntMask_MSB 0x2D
+#define QIB_7322_IntMask_RcvUrg13IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg12IntMask_LSB 0x2C
+#define QIB_7322_IntMask_RcvUrg12IntMask_MSB 0x2C
+#define QIB_7322_IntMask_RcvUrg12IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg11IntMask_LSB 0x2B
+#define QIB_7322_IntMask_RcvUrg11IntMask_MSB 0x2B
+#define QIB_7322_IntMask_RcvUrg11IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg10IntMask_LSB 0x2A
+#define QIB_7322_IntMask_RcvUrg10IntMask_MSB 0x2A
+#define QIB_7322_IntMask_RcvUrg10IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg9IntMask_LSB 0x29
+#define QIB_7322_IntMask_RcvUrg9IntMask_MSB 0x29
+#define QIB_7322_IntMask_RcvUrg9IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg8IntMask_LSB 0x28
+#define QIB_7322_IntMask_RcvUrg8IntMask_MSB 0x28
+#define QIB_7322_IntMask_RcvUrg8IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg7IntMask_LSB 0x27
+#define QIB_7322_IntMask_RcvUrg7IntMask_MSB 0x27
+#define QIB_7322_IntMask_RcvUrg7IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg6IntMask_LSB 0x26
+#define QIB_7322_IntMask_RcvUrg6IntMask_MSB 0x26
+#define QIB_7322_IntMask_RcvUrg6IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg5IntMask_LSB 0x25
+#define QIB_7322_IntMask_RcvUrg5IntMask_MSB 0x25
+#define QIB_7322_IntMask_RcvUrg5IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg4IntMask_LSB 0x24
+#define QIB_7322_IntMask_RcvUrg4IntMask_MSB 0x24
+#define QIB_7322_IntMask_RcvUrg4IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg3IntMask_LSB 0x23
+#define QIB_7322_IntMask_RcvUrg3IntMask_MSB 0x23
+#define QIB_7322_IntMask_RcvUrg3IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg2IntMask_LSB 0x22
+#define QIB_7322_IntMask_RcvUrg2IntMask_MSB 0x22
+#define QIB_7322_IntMask_RcvUrg2IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg1IntMask_LSB 0x21
+#define QIB_7322_IntMask_RcvUrg1IntMask_MSB 0x21
+#define QIB_7322_IntMask_RcvUrg1IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvUrg0IntMask_LSB 0x20
+#define QIB_7322_IntMask_RcvUrg0IntMask_MSB 0x20
+#define QIB_7322_IntMask_RcvUrg0IntMask_RMASK 0x1
+#define QIB_7322_IntMask_ErrIntMask_1_LSB 0x1F
+#define QIB_7322_IntMask_ErrIntMask_1_MSB 0x1F
+#define QIB_7322_IntMask_ErrIntMask_1_RMASK 0x1
+#define QIB_7322_IntMask_ErrIntMask_0_LSB 0x1E
+#define QIB_7322_IntMask_ErrIntMask_0_MSB 0x1E
+#define QIB_7322_IntMask_ErrIntMask_0_RMASK 0x1
+#define QIB_7322_IntMask_ErrIntMask_LSB 0x1D
+#define QIB_7322_IntMask_ErrIntMask_MSB 0x1D
+#define QIB_7322_IntMask_ErrIntMask_RMASK 0x1
+#define QIB_7322_IntMask_AssertGPIOIntMask_LSB 0x1C
+#define QIB_7322_IntMask_AssertGPIOIntMask_MSB 0x1C
+#define QIB_7322_IntMask_AssertGPIOIntMask_RMASK 0x1
+#define QIB_7322_IntMask_SendDoneIntMask_1_LSB 0x19
+#define QIB_7322_IntMask_SendDoneIntMask_1_MSB 0x19
+#define QIB_7322_IntMask_SendDoneIntMask_1_RMASK 0x1
+#define QIB_7322_IntMask_SendDoneIntMask_0_LSB 0x18
+#define QIB_7322_IntMask_SendDoneIntMask_0_MSB 0x18
+#define QIB_7322_IntMask_SendDoneIntMask_0_RMASK 0x1
+#define QIB_7322_IntMask_SendBufAvailIntMask_LSB 0x17
+#define QIB_7322_IntMask_SendBufAvailIntMask_MSB 0x17
+#define QIB_7322_IntMask_SendBufAvailIntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail17IntMask_LSB 0x11
+#define QIB_7322_IntMask_RcvAvail17IntMask_MSB 0x11
+#define QIB_7322_IntMask_RcvAvail17IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail16IntMask_LSB 0x10
+#define QIB_7322_IntMask_RcvAvail16IntMask_MSB 0x10
+#define QIB_7322_IntMask_RcvAvail16IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail15IntMask_LSB 0xF
+#define QIB_7322_IntMask_RcvAvail15IntMask_MSB 0xF
+#define QIB_7322_IntMask_RcvAvail15IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail14IntMask_LSB 0xE
+#define QIB_7322_IntMask_RcvAvail14IntMask_MSB 0xE
+#define QIB_7322_IntMask_RcvAvail14IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail13IntMask_LSB 0xD
+#define QIB_7322_IntMask_RcvAvail13IntMask_MSB 0xD
+#define QIB_7322_IntMask_RcvAvail13IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail12IntMask_LSB 0xC
+#define QIB_7322_IntMask_RcvAvail12IntMask_MSB 0xC
+#define QIB_7322_IntMask_RcvAvail12IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail11IntMask_LSB 0xB
+#define QIB_7322_IntMask_RcvAvail11IntMask_MSB 0xB
+#define QIB_7322_IntMask_RcvAvail11IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail10IntMask_LSB 0xA
+#define QIB_7322_IntMask_RcvAvail10IntMask_MSB 0xA
+#define QIB_7322_IntMask_RcvAvail10IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail9IntMask_LSB 0x9
+#define QIB_7322_IntMask_RcvAvail9IntMask_MSB 0x9
+#define QIB_7322_IntMask_RcvAvail9IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail8IntMask_LSB 0x8
+#define QIB_7322_IntMask_RcvAvail8IntMask_MSB 0x8
+#define QIB_7322_IntMask_RcvAvail8IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail7IntMask_LSB 0x7
+#define QIB_7322_IntMask_RcvAvail7IntMask_MSB 0x7
+#define QIB_7322_IntMask_RcvAvail7IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail6IntMask_LSB 0x6
+#define QIB_7322_IntMask_RcvAvail6IntMask_MSB 0x6
+#define QIB_7322_IntMask_RcvAvail6IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail5IntMask_LSB 0x5
+#define QIB_7322_IntMask_RcvAvail5IntMask_MSB 0x5
+#define QIB_7322_IntMask_RcvAvail5IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail4IntMask_LSB 0x4
+#define QIB_7322_IntMask_RcvAvail4IntMask_MSB 0x4
+#define QIB_7322_IntMask_RcvAvail4IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail3IntMask_LSB 0x3
+#define QIB_7322_IntMask_RcvAvail3IntMask_MSB 0x3
+#define QIB_7322_IntMask_RcvAvail3IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail2IntMask_LSB 0x2
+#define QIB_7322_IntMask_RcvAvail2IntMask_MSB 0x2
+#define QIB_7322_IntMask_RcvAvail2IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail1IntMask_LSB 0x1
+#define QIB_7322_IntMask_RcvAvail1IntMask_MSB 0x1
+#define QIB_7322_IntMask_RcvAvail1IntMask_RMASK 0x1
+#define QIB_7322_IntMask_RcvAvail0IntMask_LSB 0x0
+#define QIB_7322_IntMask_RcvAvail0IntMask_MSB 0x0
+#define QIB_7322_IntMask_RcvAvail0IntMask_RMASK 0x1
+
+#define QIB_7322_IntStatus_OFFS 0x70
+#define QIB_7322_IntStatus_DEF 0x0000000000000000
+#define QIB_7322_IntStatus_SDmaInt_1_LSB 0x3F
+#define QIB_7322_IntStatus_SDmaInt_1_MSB 0x3F
+#define QIB_7322_IntStatus_SDmaInt_1_RMASK 0x1
+#define QIB_7322_IntStatus_SDmaInt_0_LSB 0x3E
+#define QIB_7322_IntStatus_SDmaInt_0_MSB 0x3E
+#define QIB_7322_IntStatus_SDmaInt_0_RMASK 0x1
+#define QIB_7322_IntStatus_SDmaProgressInt_1_LSB 0x3D
+#define QIB_7322_IntStatus_SDmaProgressInt_1_MSB 0x3D
+#define QIB_7322_IntStatus_SDmaProgressInt_1_RMASK 0x1
+#define QIB_7322_IntStatus_SDmaProgressInt_0_LSB 0x3C
+#define QIB_7322_IntStatus_SDmaProgressInt_0_MSB 0x3C
+#define QIB_7322_IntStatus_SDmaProgressInt_0_RMASK 0x1
+#define QIB_7322_IntStatus_SDmaIdleInt_1_LSB 0x3B
+#define QIB_7322_IntStatus_SDmaIdleInt_1_MSB 0x3B
+#define QIB_7322_IntStatus_SDmaIdleInt_1_RMASK 0x1
+#define QIB_7322_IntStatus_SDmaIdleInt_0_LSB 0x3A
+#define QIB_7322_IntStatus_SDmaIdleInt_0_MSB 0x3A
+#define QIB_7322_IntStatus_SDmaIdleInt_0_RMASK 0x1
+#define QIB_7322_IntStatus_SDmaCleanupDone_1_LSB 0x39
+#define QIB_7322_IntStatus_SDmaCleanupDone_1_MSB 0x39
+#define QIB_7322_IntStatus_SDmaCleanupDone_1_RMASK 0x1
+#define QIB_7322_IntStatus_SDmaCleanupDone_0_LSB 0x38
+#define QIB_7322_IntStatus_SDmaCleanupDone_0_MSB 0x38
+#define QIB_7322_IntStatus_SDmaCleanupDone_0_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg17_LSB 0x31
+#define QIB_7322_IntStatus_RcvUrg17_MSB 0x31
+#define QIB_7322_IntStatus_RcvUrg17_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg16_LSB 0x30
+#define QIB_7322_IntStatus_RcvUrg16_MSB 0x30
+#define QIB_7322_IntStatus_RcvUrg16_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg15_LSB 0x2F
+#define QIB_7322_IntStatus_RcvUrg15_MSB 0x2F
+#define QIB_7322_IntStatus_RcvUrg15_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg14_LSB 0x2E
+#define QIB_7322_IntStatus_RcvUrg14_MSB 0x2E
+#define QIB_7322_IntStatus_RcvUrg14_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg13_LSB 0x2D
+#define QIB_7322_IntStatus_RcvUrg13_MSB 0x2D
+#define QIB_7322_IntStatus_RcvUrg13_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg12_LSB 0x2C
+#define QIB_7322_IntStatus_RcvUrg12_MSB 0x2C
+#define QIB_7322_IntStatus_RcvUrg12_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg11_LSB 0x2B
+#define QIB_7322_IntStatus_RcvUrg11_MSB 0x2B
+#define QIB_7322_IntStatus_RcvUrg11_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg10_LSB 0x2A
+#define QIB_7322_IntStatus_RcvUrg10_MSB 0x2A
+#define QIB_7322_IntStatus_RcvUrg10_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg9_LSB 0x29
+#define QIB_7322_IntStatus_RcvUrg9_MSB 0x29
+#define QIB_7322_IntStatus_RcvUrg9_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg8_LSB 0x28
+#define QIB_7322_IntStatus_RcvUrg8_MSB 0x28
+#define QIB_7322_IntStatus_RcvUrg8_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg7_LSB 0x27
+#define QIB_7322_IntStatus_RcvUrg7_MSB 0x27
+#define QIB_7322_IntStatus_RcvUrg7_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg6_LSB 0x26
+#define QIB_7322_IntStatus_RcvUrg6_MSB 0x26
+#define QIB_7322_IntStatus_RcvUrg6_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg5_LSB 0x25
+#define QIB_7322_IntStatus_RcvUrg5_MSB 0x25
+#define QIB_7322_IntStatus_RcvUrg5_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg4_LSB 0x24
+#define QIB_7322_IntStatus_RcvUrg4_MSB 0x24
+#define QIB_7322_IntStatus_RcvUrg4_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg3_LSB 0x23
+#define QIB_7322_IntStatus_RcvUrg3_MSB 0x23
+#define QIB_7322_IntStatus_RcvUrg3_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg2_LSB 0x22
+#define QIB_7322_IntStatus_RcvUrg2_MSB 0x22
+#define QIB_7322_IntStatus_RcvUrg2_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg1_LSB 0x21
+#define QIB_7322_IntStatus_RcvUrg1_MSB 0x21
+#define QIB_7322_IntStatus_RcvUrg1_RMASK 0x1
+#define QIB_7322_IntStatus_RcvUrg0_LSB 0x20
+#define QIB_7322_IntStatus_RcvUrg0_MSB 0x20
+#define QIB_7322_IntStatus_RcvUrg0_RMASK 0x1
+#define QIB_7322_IntStatus_Err_1_LSB 0x1F
+#define QIB_7322_IntStatus_Err_1_MSB 0x1F
+#define QIB_7322_IntStatus_Err_1_RMASK 0x1
+#define QIB_7322_IntStatus_Err_0_LSB 0x1E
+#define QIB_7322_IntStatus_Err_0_MSB 0x1E
+#define QIB_7322_IntStatus_Err_0_RMASK 0x1
+#define QIB_7322_IntStatus_Err_LSB 0x1D
+#define QIB_7322_IntStatus_Err_MSB 0x1D
+#define QIB_7322_IntStatus_Err_RMASK 0x1
+#define QIB_7322_IntStatus_AssertGPIO_LSB 0x1C
+#define QIB_7322_IntStatus_AssertGPIO_MSB 0x1C
+#define QIB_7322_IntStatus_AssertGPIO_RMASK 0x1
+#define QIB_7322_IntStatus_SendDone_1_LSB 0x19
+#define QIB_7322_IntStatus_SendDone_1_MSB 0x19
+#define QIB_7322_IntStatus_SendDone_1_RMASK 0x1
+#define QIB_7322_IntStatus_SendDone_0_LSB 0x18
+#define QIB_7322_IntStatus_SendDone_0_MSB 0x18
+#define QIB_7322_IntStatus_SendDone_0_RMASK 0x1
+#define QIB_7322_IntStatus_SendBufAvail_LSB 0x17
+#define QIB_7322_IntStatus_SendBufAvail_MSB 0x17
+#define QIB_7322_IntStatus_SendBufAvail_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail17_LSB 0x11
+#define QIB_7322_IntStatus_RcvAvail17_MSB 0x11
+#define QIB_7322_IntStatus_RcvAvail17_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail16_LSB 0x10
+#define QIB_7322_IntStatus_RcvAvail16_MSB 0x10
+#define QIB_7322_IntStatus_RcvAvail16_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail15_LSB 0xF
+#define QIB_7322_IntStatus_RcvAvail15_MSB 0xF
+#define QIB_7322_IntStatus_RcvAvail15_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail14_LSB 0xE
+#define QIB_7322_IntStatus_RcvAvail14_MSB 0xE
+#define QIB_7322_IntStatus_RcvAvail14_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail13_LSB 0xD
+#define QIB_7322_IntStatus_RcvAvail13_MSB 0xD
+#define QIB_7322_IntStatus_RcvAvail13_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail12_LSB 0xC
+#define QIB_7322_IntStatus_RcvAvail12_MSB 0xC
+#define QIB_7322_IntStatus_RcvAvail12_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail11_LSB 0xB
+#define QIB_7322_IntStatus_RcvAvail11_MSB 0xB
+#define QIB_7322_IntStatus_RcvAvail11_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail10_LSB 0xA
+#define QIB_7322_IntStatus_RcvAvail10_MSB 0xA
+#define QIB_7322_IntStatus_RcvAvail10_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail9_LSB 0x9
+#define QIB_7322_IntStatus_RcvAvail9_MSB 0x9
+#define QIB_7322_IntStatus_RcvAvail9_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail8_LSB 0x8
+#define QIB_7322_IntStatus_RcvAvail8_MSB 0x8
+#define QIB_7322_IntStatus_RcvAvail8_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail7_LSB 0x7
+#define QIB_7322_IntStatus_RcvAvail7_MSB 0x7
+#define QIB_7322_IntStatus_RcvAvail7_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail6_LSB 0x6
+#define QIB_7322_IntStatus_RcvAvail6_MSB 0x6
+#define QIB_7322_IntStatus_RcvAvail6_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail5_LSB 0x5
+#define QIB_7322_IntStatus_RcvAvail5_MSB 0x5
+#define QIB_7322_IntStatus_RcvAvail5_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail4_LSB 0x4
+#define QIB_7322_IntStatus_RcvAvail4_MSB 0x4
+#define QIB_7322_IntStatus_RcvAvail4_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail3_LSB 0x3
+#define QIB_7322_IntStatus_RcvAvail3_MSB 0x3
+#define QIB_7322_IntStatus_RcvAvail3_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail2_LSB 0x2
+#define QIB_7322_IntStatus_RcvAvail2_MSB 0x2
+#define QIB_7322_IntStatus_RcvAvail2_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail1_LSB 0x1
+#define QIB_7322_IntStatus_RcvAvail1_MSB 0x1
+#define QIB_7322_IntStatus_RcvAvail1_RMASK 0x1
+#define QIB_7322_IntStatus_RcvAvail0_LSB 0x0
+#define QIB_7322_IntStatus_RcvAvail0_MSB 0x0
+#define QIB_7322_IntStatus_RcvAvail0_RMASK 0x1
+
+#define QIB_7322_IntClear_OFFS 0x78
+#define QIB_7322_IntClear_DEF 0x0000000000000000
+#define QIB_7322_IntClear_SDmaIntClear_1_LSB 0x3F
+#define QIB_7322_IntClear_SDmaIntClear_1_MSB 0x3F
+#define QIB_7322_IntClear_SDmaIntClear_1_RMASK 0x1
+#define QIB_7322_IntClear_SDmaIntClear_0_LSB 0x3E
+#define QIB_7322_IntClear_SDmaIntClear_0_MSB 0x3E
+#define QIB_7322_IntClear_SDmaIntClear_0_RMASK 0x1
+#define QIB_7322_IntClear_SDmaProgressIntClear_1_LSB 0x3D
+#define QIB_7322_IntClear_SDmaProgressIntClear_1_MSB 0x3D
+#define QIB_7322_IntClear_SDmaProgressIntClear_1_RMASK 0x1
+#define QIB_7322_IntClear_SDmaProgressIntClear_0_LSB 0x3C
+#define QIB_7322_IntClear_SDmaProgressIntClear_0_MSB 0x3C
+#define QIB_7322_IntClear_SDmaProgressIntClear_0_RMASK 0x1
+#define QIB_7322_IntClear_SDmaIdleIntClear_1_LSB 0x3B
+#define QIB_7322_IntClear_SDmaIdleIntClear_1_MSB 0x3B
+#define QIB_7322_IntClear_SDmaIdleIntClear_1_RMASK 0x1
+#define QIB_7322_IntClear_SDmaIdleIntClear_0_LSB 0x3A
+#define QIB_7322_IntClear_SDmaIdleIntClear_0_MSB 0x3A
+#define QIB_7322_IntClear_SDmaIdleIntClear_0_RMASK 0x1
+#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_LSB 0x39
+#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_MSB 0x39
+#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_RMASK 0x1
+#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_LSB 0x38
+#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_MSB 0x38
+#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg17IntClear_LSB 0x31
+#define QIB_7322_IntClear_RcvUrg17IntClear_MSB 0x31
+#define QIB_7322_IntClear_RcvUrg17IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg16IntClear_LSB 0x30
+#define QIB_7322_IntClear_RcvUrg16IntClear_MSB 0x30
+#define QIB_7322_IntClear_RcvUrg16IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg15IntClear_LSB 0x2F
+#define QIB_7322_IntClear_RcvUrg15IntClear_MSB 0x2F
+#define QIB_7322_IntClear_RcvUrg15IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg14IntClear_LSB 0x2E
+#define QIB_7322_IntClear_RcvUrg14IntClear_MSB 0x2E
+#define QIB_7322_IntClear_RcvUrg14IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg13IntClear_LSB 0x2D
+#define QIB_7322_IntClear_RcvUrg13IntClear_MSB 0x2D
+#define QIB_7322_IntClear_RcvUrg13IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg12IntClear_LSB 0x2C
+#define QIB_7322_IntClear_RcvUrg12IntClear_MSB 0x2C
+#define QIB_7322_IntClear_RcvUrg12IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg11IntClear_LSB 0x2B
+#define QIB_7322_IntClear_RcvUrg11IntClear_MSB 0x2B
+#define QIB_7322_IntClear_RcvUrg11IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg10IntClear_LSB 0x2A
+#define QIB_7322_IntClear_RcvUrg10IntClear_MSB 0x2A
+#define QIB_7322_IntClear_RcvUrg10IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg9IntClear_LSB 0x29
+#define QIB_7322_IntClear_RcvUrg9IntClear_MSB 0x29
+#define QIB_7322_IntClear_RcvUrg9IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg8IntClear_LSB 0x28
+#define QIB_7322_IntClear_RcvUrg8IntClear_MSB 0x28
+#define QIB_7322_IntClear_RcvUrg8IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg7IntClear_LSB 0x27
+#define QIB_7322_IntClear_RcvUrg7IntClear_MSB 0x27
+#define QIB_7322_IntClear_RcvUrg7IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg6IntClear_LSB 0x26
+#define QIB_7322_IntClear_RcvUrg6IntClear_MSB 0x26
+#define QIB_7322_IntClear_RcvUrg6IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg5IntClear_LSB 0x25
+#define QIB_7322_IntClear_RcvUrg5IntClear_MSB 0x25
+#define QIB_7322_IntClear_RcvUrg5IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg4IntClear_LSB 0x24
+#define QIB_7322_IntClear_RcvUrg4IntClear_MSB 0x24
+#define QIB_7322_IntClear_RcvUrg4IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg3IntClear_LSB 0x23
+#define QIB_7322_IntClear_RcvUrg3IntClear_MSB 0x23
+#define QIB_7322_IntClear_RcvUrg3IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg2IntClear_LSB 0x22
+#define QIB_7322_IntClear_RcvUrg2IntClear_MSB 0x22
+#define QIB_7322_IntClear_RcvUrg2IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg1IntClear_LSB 0x21
+#define QIB_7322_IntClear_RcvUrg1IntClear_MSB 0x21
+#define QIB_7322_IntClear_RcvUrg1IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvUrg0IntClear_LSB 0x20
+#define QIB_7322_IntClear_RcvUrg0IntClear_MSB 0x20
+#define QIB_7322_IntClear_RcvUrg0IntClear_RMASK 0x1
+#define QIB_7322_IntClear_ErrIntClear_1_LSB 0x1F
+#define QIB_7322_IntClear_ErrIntClear_1_MSB 0x1F
+#define QIB_7322_IntClear_ErrIntClear_1_RMASK 0x1
+#define QIB_7322_IntClear_ErrIntClear_0_LSB 0x1E
+#define QIB_7322_IntClear_ErrIntClear_0_MSB 0x1E
+#define QIB_7322_IntClear_ErrIntClear_0_RMASK 0x1
+#define QIB_7322_IntClear_ErrIntClear_LSB 0x1D
+#define QIB_7322_IntClear_ErrIntClear_MSB 0x1D
+#define QIB_7322_IntClear_ErrIntClear_RMASK 0x1
+#define QIB_7322_IntClear_AssertGPIOIntClear_LSB 0x1C
+#define QIB_7322_IntClear_AssertGPIOIntClear_MSB 0x1C
+#define QIB_7322_IntClear_AssertGPIOIntClear_RMASK 0x1
+#define QIB_7322_IntClear_SendDoneIntClear_1_LSB 0x19
+#define QIB_7322_IntClear_SendDoneIntClear_1_MSB 0x19
+#define QIB_7322_IntClear_SendDoneIntClear_1_RMASK 0x1
+#define QIB_7322_IntClear_SendDoneIntClear_0_LSB 0x18
+#define QIB_7322_IntClear_SendDoneIntClear_0_MSB 0x18
+#define QIB_7322_IntClear_SendDoneIntClear_0_RMASK 0x1
+#define QIB_7322_IntClear_SendBufAvailIntClear_LSB 0x17
+#define QIB_7322_IntClear_SendBufAvailIntClear_MSB 0x17
+#define QIB_7322_IntClear_SendBufAvailIntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail17IntClear_LSB 0x11
+#define QIB_7322_IntClear_RcvAvail17IntClear_MSB 0x11
+#define QIB_7322_IntClear_RcvAvail17IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail16IntClear_LSB 0x10
+#define QIB_7322_IntClear_RcvAvail16IntClear_MSB 0x10
+#define QIB_7322_IntClear_RcvAvail16IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail15IntClear_LSB 0xF
+#define QIB_7322_IntClear_RcvAvail15IntClear_MSB 0xF
+#define QIB_7322_IntClear_RcvAvail15IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail14IntClear_LSB 0xE
+#define QIB_7322_IntClear_RcvAvail14IntClear_MSB 0xE
+#define QIB_7322_IntClear_RcvAvail14IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail13IntClear_LSB 0xD
+#define QIB_7322_IntClear_RcvAvail13IntClear_MSB 0xD
+#define QIB_7322_IntClear_RcvAvail13IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail12IntClear_LSB 0xC
+#define QIB_7322_IntClear_RcvAvail12IntClear_MSB 0xC
+#define QIB_7322_IntClear_RcvAvail12IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail11IntClear_LSB 0xB
+#define QIB_7322_IntClear_RcvAvail11IntClear_MSB 0xB
+#define QIB_7322_IntClear_RcvAvail11IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail10IntClear_LSB 0xA
+#define QIB_7322_IntClear_RcvAvail10IntClear_MSB 0xA
+#define QIB_7322_IntClear_RcvAvail10IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail9IntClear_LSB 0x9
+#define QIB_7322_IntClear_RcvAvail9IntClear_MSB 0x9
+#define QIB_7322_IntClear_RcvAvail9IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail8IntClear_LSB 0x8
+#define QIB_7322_IntClear_RcvAvail8IntClear_MSB 0x8
+#define QIB_7322_IntClear_RcvAvail8IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail7IntClear_LSB 0x7
+#define QIB_7322_IntClear_RcvAvail7IntClear_MSB 0x7
+#define QIB_7322_IntClear_RcvAvail7IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail6IntClear_LSB 0x6
+#define QIB_7322_IntClear_RcvAvail6IntClear_MSB 0x6
+#define QIB_7322_IntClear_RcvAvail6IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail5IntClear_LSB 0x5
+#define QIB_7322_IntClear_RcvAvail5IntClear_MSB 0x5
+#define QIB_7322_IntClear_RcvAvail5IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail4IntClear_LSB 0x4
+#define QIB_7322_IntClear_RcvAvail4IntClear_MSB 0x4
+#define QIB_7322_IntClear_RcvAvail4IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail3IntClear_LSB 0x3
+#define QIB_7322_IntClear_RcvAvail3IntClear_MSB 0x3
+#define QIB_7322_IntClear_RcvAvail3IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail2IntClear_LSB 0x2
+#define QIB_7322_IntClear_RcvAvail2IntClear_MSB 0x2
+#define QIB_7322_IntClear_RcvAvail2IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail1IntClear_LSB 0x1
+#define QIB_7322_IntClear_RcvAvail1IntClear_MSB 0x1
+#define QIB_7322_IntClear_RcvAvail1IntClear_RMASK 0x1
+#define QIB_7322_IntClear_RcvAvail0IntClear_LSB 0x0
+#define QIB_7322_IntClear_RcvAvail0IntClear_MSB 0x0
+#define QIB_7322_IntClear_RcvAvail0IntClear_RMASK 0x1
+
+#define QIB_7322_ErrMask_OFFS 0x80
+#define QIB_7322_ErrMask_DEF 0x0000000000000000
+#define QIB_7322_ErrMask_ResetNegatedMask_LSB 0x3F
+#define QIB_7322_ErrMask_ResetNegatedMask_MSB 0x3F
+#define QIB_7322_ErrMask_ResetNegatedMask_RMASK 0x1
+#define QIB_7322_ErrMask_HardwareErrMask_LSB 0x3E
+#define QIB_7322_ErrMask_HardwareErrMask_MSB 0x3E
+#define QIB_7322_ErrMask_HardwareErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_InvalidAddrErrMask_LSB 0x3D
+#define QIB_7322_ErrMask_InvalidAddrErrMask_MSB 0x3D
+#define QIB_7322_ErrMask_InvalidAddrErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_SDmaVL15ErrMask_LSB 0x38
+#define QIB_7322_ErrMask_SDmaVL15ErrMask_MSB 0x38
+#define QIB_7322_ErrMask_SDmaVL15ErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_LSB 0x37
+#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_MSB 0x37
+#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_InvalidEEPCmdMask_LSB 0x35
+#define QIB_7322_ErrMask_InvalidEEPCmdMask_MSB 0x35
+#define QIB_7322_ErrMask_InvalidEEPCmdMask_RMASK 0x1
+#define QIB_7322_ErrMask_RcvContextShareErrMask_LSB 0x34
+#define QIB_7322_ErrMask_RcvContextShareErrMask_MSB 0x34
+#define QIB_7322_ErrMask_RcvContextShareErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_SendVLMismatchErrMask_LSB 0x24
+#define QIB_7322_ErrMask_SendVLMismatchErrMask_MSB 0x24
+#define QIB_7322_ErrMask_SendVLMismatchErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_SendArmLaunchErrMask_LSB 0x23
+#define QIB_7322_ErrMask_SendArmLaunchErrMask_MSB 0x23
+#define QIB_7322_ErrMask_SendArmLaunchErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B
+#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_MSB 0x1B
+#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_SDmaWrongPortErrMask_LSB 0x1A
+#define QIB_7322_ErrMask_SDmaWrongPortErrMask_MSB 0x1A
+#define QIB_7322_ErrMask_SDmaWrongPortErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_LSB 0x19
+#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_MSB 0x19
+#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_RcvHdrFullErrMask_LSB 0xD
+#define QIB_7322_ErrMask_RcvHdrFullErrMask_MSB 0xD
+#define QIB_7322_ErrMask_RcvHdrFullErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_RcvEgrFullErrMask_LSB 0xC
+#define QIB_7322_ErrMask_RcvEgrFullErrMask_MSB 0xC
+#define QIB_7322_ErrMask_RcvEgrFullErrMask_RMASK 0x1
+
+#define QIB_7322_ErrStatus_OFFS 0x88
+#define QIB_7322_ErrStatus_DEF 0x0000000000000000
+#define QIB_7322_ErrStatus_ResetNegated_LSB 0x3F
+#define QIB_7322_ErrStatus_ResetNegated_MSB 0x3F
+#define QIB_7322_ErrStatus_ResetNegated_RMASK 0x1
+#define QIB_7322_ErrStatus_HardwareErr_LSB 0x3E
+#define QIB_7322_ErrStatus_HardwareErr_MSB 0x3E
+#define QIB_7322_ErrStatus_HardwareErr_RMASK 0x1
+#define QIB_7322_ErrStatus_InvalidAddrErr_LSB 0x3D
+#define QIB_7322_ErrStatus_InvalidAddrErr_MSB 0x3D
+#define QIB_7322_ErrStatus_InvalidAddrErr_RMASK 0x1
+#define QIB_7322_ErrStatus_SDmaVL15Err_LSB 0x38
+#define QIB_7322_ErrStatus_SDmaVL15Err_MSB 0x38
+#define QIB_7322_ErrStatus_SDmaVL15Err_RMASK 0x1
+#define QIB_7322_ErrStatus_SBufVL15MisUseErr_LSB 0x37
+#define QIB_7322_ErrStatus_SBufVL15MisUseErr_MSB 0x37
+#define QIB_7322_ErrStatus_SBufVL15MisUseErr_RMASK 0x1
+#define QIB_7322_ErrStatus_InvalidEEPCmdErr_LSB 0x35
+#define QIB_7322_ErrStatus_InvalidEEPCmdErr_MSB 0x35
+#define QIB_7322_ErrStatus_InvalidEEPCmdErr_RMASK 0x1
+#define QIB_7322_ErrStatus_RcvContextShareErr_LSB 0x34
+#define QIB_7322_ErrStatus_RcvContextShareErr_MSB 0x34
+#define QIB_7322_ErrStatus_RcvContextShareErr_RMASK 0x1
+#define QIB_7322_ErrStatus_SendVLMismatchErr_LSB 0x24
+#define QIB_7322_ErrStatus_SendVLMismatchErr_MSB 0x24
+#define QIB_7322_ErrStatus_SendVLMismatchErr_RMASK 0x1
+#define QIB_7322_ErrStatus_SendArmLaunchErr_LSB 0x23
+#define QIB_7322_ErrStatus_SendArmLaunchErr_MSB 0x23
+#define QIB_7322_ErrStatus_SendArmLaunchErr_RMASK 0x1
+#define QIB_7322_ErrStatus_SendSpecialTriggerErr_LSB 0x1B
+#define QIB_7322_ErrStatus_SendSpecialTriggerErr_MSB 0x1B
+#define QIB_7322_ErrStatus_SendSpecialTriggerErr_RMASK 0x1
+#define QIB_7322_ErrStatus_SDmaWrongPortErr_LSB 0x1A
+#define QIB_7322_ErrStatus_SDmaWrongPortErr_MSB 0x1A
+#define QIB_7322_ErrStatus_SDmaWrongPortErr_RMASK 0x1
+#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_LSB 0x19
+#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_MSB 0x19
+#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_RMASK 0x1
+#define QIB_7322_ErrStatus_RcvHdrFullErr_LSB 0xD
+#define QIB_7322_ErrStatus_RcvHdrFullErr_MSB 0xD
+#define QIB_7322_ErrStatus_RcvHdrFullErr_RMASK 0x1
+#define QIB_7322_ErrStatus_RcvEgrFullErr_LSB 0xC
+#define QIB_7322_ErrStatus_RcvEgrFullErr_MSB 0xC
+#define QIB_7322_ErrStatus_RcvEgrFullErr_RMASK 0x1
+
+#define QIB_7322_ErrClear_OFFS 0x90
+#define QIB_7322_ErrClear_DEF 0x0000000000000000
+#define QIB_7322_ErrClear_ResetNegatedClear_LSB 0x3F
+#define QIB_7322_ErrClear_ResetNegatedClear_MSB 0x3F
+#define QIB_7322_ErrClear_ResetNegatedClear_RMASK 0x1
+#define QIB_7322_ErrClear_HardwareErrClear_LSB 0x3E
+#define QIB_7322_ErrClear_HardwareErrClear_MSB 0x3E
+#define QIB_7322_ErrClear_HardwareErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_InvalidAddrErrClear_LSB 0x3D
+#define QIB_7322_ErrClear_InvalidAddrErrClear_MSB 0x3D
+#define QIB_7322_ErrClear_InvalidAddrErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_SDmaVL15ErrClear_LSB 0x38
+#define QIB_7322_ErrClear_SDmaVL15ErrClear_MSB 0x38
+#define QIB_7322_ErrClear_SDmaVL15ErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_LSB 0x37
+#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_MSB 0x37
+#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_LSB 0x35
+#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_MSB 0x35
+#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_RcvContextShareErrClear_LSB 0x34
+#define QIB_7322_ErrClear_RcvContextShareErrClear_MSB 0x34
+#define QIB_7322_ErrClear_RcvContextShareErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_SendVLMismatchErrMask_LSB 0x24
+#define QIB_7322_ErrClear_SendVLMismatchErrMask_MSB 0x24
+#define QIB_7322_ErrClear_SendVLMismatchErrMask_RMASK 0x1
+#define QIB_7322_ErrClear_SendArmLaunchErrClear_LSB 0x23
+#define QIB_7322_ErrClear_SendArmLaunchErrClear_MSB 0x23
+#define QIB_7322_ErrClear_SendArmLaunchErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B
+#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_MSB 0x1B
+#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_SDmaWrongPortErrClear_LSB 0x1A
+#define QIB_7322_ErrClear_SDmaWrongPortErrClear_MSB 0x1A
+#define QIB_7322_ErrClear_SDmaWrongPortErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_LSB 0x19
+#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_MSB 0x19
+#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_RcvHdrFullErrClear_LSB 0xD
+#define QIB_7322_ErrClear_RcvHdrFullErrClear_MSB 0xD
+#define QIB_7322_ErrClear_RcvHdrFullErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_RcvEgrFullErrClear_LSB 0xC
+#define QIB_7322_ErrClear_RcvEgrFullErrClear_MSB 0xC
+#define QIB_7322_ErrClear_RcvEgrFullErrClear_RMASK 0x1
+
+#define QIB_7322_HwErrMask_OFFS 0x98
+#define QIB_7322_HwErrMask_DEF 0x0000000000000000
+#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_LSB 0x3F
+#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_MSB 0x3F
+#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_RMASK 0x1
+#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_LSB 0x3E
+#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_MSB 0x3E
+#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_RMASK 0x1
+#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_LSB 0x37
+#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_MSB 0x37
+#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_RMASK 0x1
+#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_LSB 0x36
+#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_MSB 0x36
+#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1
+#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_LSB 0x35
+#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_MSB 0x35
+#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_RMASK 0x1
+#define QIB_7322_HwErrMask_MemoryErrMask_LSB 0x30
+#define QIB_7322_HwErrMask_MemoryErrMask_MSB 0x30
+#define QIB_7322_HwErrMask_MemoryErrMask_RMASK 0x1
+#define QIB_7322_HwErrMask_pcie_phy_txParityErr_LSB 0x22
+#define QIB_7322_HwErrMask_pcie_phy_txParityErr_MSB 0x22
+#define QIB_7322_HwErrMask_pcie_phy_txParityErr_RMASK 0x1
+#define QIB_7322_HwErrMask_PCIeBusParityErrMask_LSB 0x1F
+#define QIB_7322_HwErrMask_PCIeBusParityErrMask_MSB 0x21
+#define QIB_7322_HwErrMask_PCIeBusParityErrMask_RMASK 0x7
+#define QIB_7322_HwErrMask_PcieCplTimeoutMask_LSB 0x1E
+#define QIB_7322_HwErrMask_PcieCplTimeoutMask_MSB 0x1E
+#define QIB_7322_HwErrMask_PcieCplTimeoutMask_RMASK 0x1
+#define QIB_7322_HwErrMask_PciePoisonedTLPMask_LSB 0x1D
+#define QIB_7322_HwErrMask_PciePoisonedTLPMask_MSB 0x1D
+#define QIB_7322_HwErrMask_PciePoisonedTLPMask_RMASK 0x1
+#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_LSB 0x1C
+#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_MSB 0x1C
+#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_RMASK 0x1
+#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_LSB 0x1B
+#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_MSB 0x1B
+#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_RMASK 0x1
+#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_LSB 0xF
+#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_MSB 0xF
+#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_RMASK 0x1
+#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_LSB 0xE
+#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_MSB 0xE
+#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_RMASK 0x1
+#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_LSB 0xD
+#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_MSB 0xD
+#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_RMASK 0x1
+#define QIB_7322_HwErrMask_statusValidNoEopMask_LSB 0xC
+#define QIB_7322_HwErrMask_statusValidNoEopMask_MSB 0xC
+#define QIB_7322_HwErrMask_statusValidNoEopMask_RMASK 0x1
+#define QIB_7322_HwErrMask_LATriggeredMask_LSB 0xB
+#define QIB_7322_HwErrMask_LATriggeredMask_MSB 0xB
+#define QIB_7322_HwErrMask_LATriggeredMask_RMASK 0x1
+
+#define QIB_7322_HwErrStatus_OFFS 0xA0
+#define QIB_7322_HwErrStatus_DEF 0x0000000000000000
+#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_LSB 0x3F
+#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_MSB 0x3F
+#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_RMASK 0x1
+#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_LSB 0x3E
+#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_MSB 0x3E
+#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_RMASK 0x1
+#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_LSB 0x37
+#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_MSB 0x37
+#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_RMASK 0x1
+#define QIB_7322_HwErrStatus_PowerOnBISTFailed_LSB 0x36
+#define QIB_7322_HwErrStatus_PowerOnBISTFailed_MSB 0x36
+#define QIB_7322_HwErrStatus_PowerOnBISTFailed_RMASK 0x1
+#define QIB_7322_HwErrStatus_TempsenseTholdReached_LSB 0x35
+#define QIB_7322_HwErrStatus_TempsenseTholdReached_MSB 0x35
+#define QIB_7322_HwErrStatus_TempsenseTholdReached_RMASK 0x1
+#define QIB_7322_HwErrStatus_MemoryErr_LSB 0x30
+#define QIB_7322_HwErrStatus_MemoryErr_MSB 0x30
+#define QIB_7322_HwErrStatus_MemoryErr_RMASK 0x1
+#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_LSB 0x22
+#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_MSB 0x22
+#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_RMASK 0x1
+#define QIB_7322_HwErrStatus_PCIeBusParity_LSB 0x1F
+#define QIB_7322_HwErrStatus_PCIeBusParity_MSB 0x21
+#define QIB_7322_HwErrStatus_PCIeBusParity_RMASK 0x7
+#define QIB_7322_HwErrStatus_PcieCplTimeout_LSB 0x1E
+#define QIB_7322_HwErrStatus_PcieCplTimeout_MSB 0x1E
+#define QIB_7322_HwErrStatus_PcieCplTimeout_RMASK 0x1
+#define QIB_7322_HwErrStatus_PciePoisonedTLP_LSB 0x1D
+#define QIB_7322_HwErrStatus_PciePoisonedTLP_MSB 0x1D
+#define QIB_7322_HwErrStatus_PciePoisonedTLP_RMASK 0x1
+#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_LSB 0x1C
+#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_MSB 0x1C
+#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_RMASK 0x1
+#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_LSB 0x1B
+#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_MSB 0x1B
+#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_RMASK 0x1
+#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_LSB 0xF
+#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_MSB 0xF
+#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_RMASK 0x1
+#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_LSB 0xE
+#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_MSB 0xE
+#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_RMASK 0x1
+#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_LSB 0xD
+#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_MSB 0xD
+#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_RMASK 0x1
+#define QIB_7322_HwErrStatus_statusValidNoEop_LSB 0xC
+#define QIB_7322_HwErrStatus_statusValidNoEop_MSB 0xC
+#define QIB_7322_HwErrStatus_statusValidNoEop_RMASK 0x1
+#define QIB_7322_HwErrStatus_LATriggered_LSB 0xB
+#define QIB_7322_HwErrStatus_LATriggered_MSB 0xB
+#define QIB_7322_HwErrStatus_LATriggered_RMASK 0x1
+
+#define QIB_7322_HwErrClear_OFFS 0xA8
+#define QIB_7322_HwErrClear_DEF 0x0000000000000000
+#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_LSB 0x3F
+#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_MSB 0x3F
+#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_RMASK 0x1
+#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_LSB 0x3E
+#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_MSB 0x3E
+#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_RMASK 0x1
+#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_LSB 0x37
+#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_MSB 0x37
+#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_RMASK 0x1
+#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_LSB 0x36
+#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_MSB 0x36
+#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1
+#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_LSB 0x35
+#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_MSB 0x35
+#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_RMASK 0x1
+#define QIB_7322_HwErrClear_MemoryErrClear_LSB 0x30
+#define QIB_7322_HwErrClear_MemoryErrClear_MSB 0x30
+#define QIB_7322_HwErrClear_MemoryErrClear_RMASK 0x1
+#define QIB_7322_HwErrClear_pcie_phy_txParityErr_LSB 0x22
+#define QIB_7322_HwErrClear_pcie_phy_txParityErr_MSB 0x22
+#define QIB_7322_HwErrClear_pcie_phy_txParityErr_RMASK 0x1
+#define QIB_7322_HwErrClear_PCIeBusParityClear_LSB 0x1F
+#define QIB_7322_HwErrClear_PCIeBusParityClear_MSB 0x21
+#define QIB_7322_HwErrClear_PCIeBusParityClear_RMASK 0x7
+#define QIB_7322_HwErrClear_PcieCplTimeoutClear_LSB 0x1E
+#define QIB_7322_HwErrClear_PcieCplTimeoutClear_MSB 0x1E
+#define QIB_7322_HwErrClear_PcieCplTimeoutClear_RMASK 0x1
+#define QIB_7322_HwErrClear_PciePoisonedTLPClear_LSB 0x1D
+#define QIB_7322_HwErrClear_PciePoisonedTLPClear_MSB 0x1D
+#define QIB_7322_HwErrClear_PciePoisonedTLPClear_RMASK 0x1
+#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_LSB 0x1C
+#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_MSB 0x1C
+#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_RMASK 0x1
+#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_LSB 0x1B
+#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_MSB 0x1B
+#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_RMASK 0x1
+#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_LSB 0xF
+#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_MSB 0xF
+#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_RMASK 0x1
+#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_LSB 0xE
+#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_MSB 0xE
+#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_RMASK 0x1
+#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_LSB 0xD
+#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_MSB 0xD
+#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_RMASK 0x1
+#define QIB_7322_HwErrClear_statusValidNoEopClear_LSB 0xC
+#define QIB_7322_HwErrClear_statusValidNoEopClear_MSB 0xC
+#define QIB_7322_HwErrClear_statusValidNoEopClear_RMASK 0x1
+#define QIB_7322_HwErrClear_LATriggeredClear_LSB 0xB
+#define QIB_7322_HwErrClear_LATriggeredClear_MSB 0xB
+#define QIB_7322_HwErrClear_LATriggeredClear_RMASK 0x1
+
+#define QIB_7322_HwDiagCtrl_OFFS 0xB0
+#define QIB_7322_HwDiagCtrl_DEF 0x0000000000000000
+#define QIB_7322_HwDiagCtrl_Diagnostic_LSB 0x3F
+#define QIB_7322_HwDiagCtrl_Diagnostic_MSB 0x3F
+#define QIB_7322_HwDiagCtrl_Diagnostic_RMASK 0x1
+#define QIB_7322_HwDiagCtrl_CounterWrEnable_LSB 0x3D
+#define QIB_7322_HwDiagCtrl_CounterWrEnable_MSB 0x3D
+#define QIB_7322_HwDiagCtrl_CounterWrEnable_RMASK 0x1
+#define QIB_7322_HwDiagCtrl_CounterDisable_LSB 0x3C
+#define QIB_7322_HwDiagCtrl_CounterDisable_MSB 0x3C
+#define QIB_7322_HwDiagCtrl_CounterDisable_RMASK 0x1
+#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F
+#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_MSB 0x22
+#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF
+#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_LSB 0xF
+#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_MSB 0xF
+#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_RMASK 0x1
+#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_LSB 0xE
+#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_MSB 0xE
+#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_RMASK 0x1
+#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_LSB 0xD
+#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_MSB 0xD
+#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_RMASK 0x1
+#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_LSB 0xC
+#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_MSB 0xC
+#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_RMASK 0x1
+
+#define QIB_7322_EXTStatus_OFFS 0xC0
+#define QIB_7322_EXTStatus_DEF 0x000000000000X000
+#define QIB_7322_EXTStatus_GPIOIn_LSB 0x30
+#define QIB_7322_EXTStatus_GPIOIn_MSB 0x3F
+#define QIB_7322_EXTStatus_GPIOIn_RMASK 0xFFFF
+#define QIB_7322_EXTStatus_MemBISTDisabled_LSB 0xF
+#define QIB_7322_EXTStatus_MemBISTDisabled_MSB 0xF
+#define QIB_7322_EXTStatus_MemBISTDisabled_RMASK 0x1
+#define QIB_7322_EXTStatus_MemBISTEndTest_LSB 0xE
+#define QIB_7322_EXTStatus_MemBISTEndTest_MSB 0xE
+#define QIB_7322_EXTStatus_MemBISTEndTest_RMASK 0x1
+
+#define QIB_7322_EXTCtrl_OFFS 0xC8
+#define QIB_7322_EXTCtrl_DEF 0x0000000000000000
+#define QIB_7322_EXTCtrl_GPIOOe_LSB 0x30
+#define QIB_7322_EXTCtrl_GPIOOe_MSB 0x3F
+#define QIB_7322_EXTCtrl_GPIOOe_RMASK 0xFFFF
+#define QIB_7322_EXTCtrl_GPIOInvert_LSB 0x20
+#define QIB_7322_EXTCtrl_GPIOInvert_MSB 0x2F
+#define QIB_7322_EXTCtrl_GPIOInvert_RMASK 0xFFFF
+#define QIB_7322_EXTCtrl_LEDPort1GreenOn_LSB 0x3
+#define QIB_7322_EXTCtrl_LEDPort1GreenOn_MSB 0x3
+#define QIB_7322_EXTCtrl_LEDPort1GreenOn_RMASK 0x1
+#define QIB_7322_EXTCtrl_LEDPort1YellowOn_LSB 0x2
+#define QIB_7322_EXTCtrl_LEDPort1YellowOn_MSB 0x2
+#define QIB_7322_EXTCtrl_LEDPort1YellowOn_RMASK 0x1
+#define QIB_7322_EXTCtrl_LEDPort0GreenOn_LSB 0x1
+#define QIB_7322_EXTCtrl_LEDPort0GreenOn_MSB 0x1
+#define QIB_7322_EXTCtrl_LEDPort0GreenOn_RMASK 0x1
+#define QIB_7322_EXTCtrl_LEDPort0YellowOn_LSB 0x0
+#define QIB_7322_EXTCtrl_LEDPort0YellowOn_MSB 0x0
+#define QIB_7322_EXTCtrl_LEDPort0YellowOn_RMASK 0x1
+
+#define QIB_7322_GPIOOut_OFFS 0xE0
+#define QIB_7322_GPIOOut_DEF 0x0000000000000000
+
+#define QIB_7322_GPIOMask_OFFS 0xE8
+#define QIB_7322_GPIOMask_DEF 0x0000000000000000
+
+#define QIB_7322_GPIOStatus_OFFS 0xF0
+#define QIB_7322_GPIOStatus_DEF 0x0000000000000000
+
+#define QIB_7322_GPIOClear_OFFS 0xF8
+#define QIB_7322_GPIOClear_DEF 0x0000000000000000
+
+#define QIB_7322_RcvCtrl_OFFS 0x100
+#define QIB_7322_RcvCtrl_DEF 0x0000000000000000
+#define QIB_7322_RcvCtrl_TidReDirect_LSB 0x30
+#define QIB_7322_RcvCtrl_TidReDirect_MSB 0x3F
+#define QIB_7322_RcvCtrl_TidReDirect_RMASK 0xFFFF
+#define QIB_7322_RcvCtrl_TailUpd_LSB 0x2F
+#define QIB_7322_RcvCtrl_TailUpd_MSB 0x2F
+#define QIB_7322_RcvCtrl_TailUpd_RMASK 0x1
+#define QIB_7322_RcvCtrl_XrcTypeCode_LSB 0x2C
+#define QIB_7322_RcvCtrl_XrcTypeCode_MSB 0x2E
+#define QIB_7322_RcvCtrl_XrcTypeCode_RMASK 0x7
+#define QIB_7322_RcvCtrl_TidFlowEnable_LSB 0x2B
+#define QIB_7322_RcvCtrl_TidFlowEnable_MSB 0x2B
+#define QIB_7322_RcvCtrl_TidFlowEnable_RMASK 0x1
+#define QIB_7322_RcvCtrl_ContextCfg_LSB 0x29
+#define QIB_7322_RcvCtrl_ContextCfg_MSB 0x2A
+#define QIB_7322_RcvCtrl_ContextCfg_RMASK 0x3
+#define QIB_7322_RcvCtrl_IntrAvail_LSB 0x14
+#define QIB_7322_RcvCtrl_IntrAvail_MSB 0x25
+#define QIB_7322_RcvCtrl_IntrAvail_RMASK 0x3FFFF
+#define QIB_7322_RcvCtrl_dontDropRHQFull_LSB 0x0
+#define QIB_7322_RcvCtrl_dontDropRHQFull_MSB 0x11
+#define QIB_7322_RcvCtrl_dontDropRHQFull_RMASK 0x3FFFF
+
+#define QIB_7322_RcvHdrSize_OFFS 0x110
+#define QIB_7322_RcvHdrSize_DEF 0x0000000000000000
+
+#define QIB_7322_RcvHdrCnt_OFFS 0x118
+#define QIB_7322_RcvHdrCnt_DEF 0x0000000000000000
+
+#define QIB_7322_RcvHdrEntSize_OFFS 0x120
+#define QIB_7322_RcvHdrEntSize_DEF 0x0000000000000000
+
+#define QIB_7322_RcvTIDBase_OFFS 0x128
+#define QIB_7322_RcvTIDBase_DEF 0x0000000000050000
+
+#define QIB_7322_RcvTIDCnt_OFFS 0x130
+#define QIB_7322_RcvTIDCnt_DEF 0x0000000000000200
+
+#define QIB_7322_RcvEgrBase_OFFS 0x138
+#define QIB_7322_RcvEgrBase_DEF 0x0000000000014000
+
+#define QIB_7322_RcvEgrCnt_OFFS 0x140
+#define QIB_7322_RcvEgrCnt_DEF 0x0000000000001000
+
+#define QIB_7322_RcvBufBase_OFFS 0x148
+#define QIB_7322_RcvBufBase_DEF 0x0000000000080000
+
+#define QIB_7322_RcvBufSize_OFFS 0x150
+#define QIB_7322_RcvBufSize_DEF 0x0000000000005000
+
+#define QIB_7322_RxIntMemBase_OFFS 0x158
+#define QIB_7322_RxIntMemBase_DEF 0x0000000000077000
+
+#define QIB_7322_RxIntMemSize_OFFS 0x160
+#define QIB_7322_RxIntMemSize_DEF 0x0000000000007000
+
+#define QIB_7322_feature_mask_OFFS 0x190
+#define QIB_7322_feature_mask_DEF 0x00000000000000XX
+
+#define QIB_7322_active_feature_mask_OFFS 0x198
+#define QIB_7322_active_feature_mask_DEF 0x00000000000000XX
+#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_LSB 0x5
+#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_MSB 0x5
+#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_RMASK 0x1
+#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_LSB 0x4
+#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_MSB 0x4
+#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_RMASK 0x1
+#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_LSB 0x3
+#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_MSB 0x3
+#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_RMASK 0x1
+#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_LSB 0x2
+#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_MSB 0x2
+#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_RMASK 0x1
+#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_LSB 0x1
+#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_MSB 0x1
+#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_RMASK 0x1
+#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_LSB 0x0
+#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_MSB 0x0
+#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_RMASK 0x1
+
+#define QIB_7322_SendCtrl_OFFS 0x1C0
+#define QIB_7322_SendCtrl_DEF 0x0000000000000000
+#define QIB_7322_SendCtrl_Disarm_LSB 0x1F
+#define QIB_7322_SendCtrl_Disarm_MSB 0x1F
+#define QIB_7322_SendCtrl_Disarm_RMASK 0x1
+#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_LSB 0x1D
+#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_MSB 0x1D
+#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_RMASK 0x1
+#define QIB_7322_SendCtrl_AvailUpdThld_LSB 0x18
+#define QIB_7322_SendCtrl_AvailUpdThld_MSB 0x1C
+#define QIB_7322_SendCtrl_AvailUpdThld_RMASK 0x1F
+#define QIB_7322_SendCtrl_DisarmSendBuf_LSB 0x10
+#define QIB_7322_SendCtrl_DisarmSendBuf_MSB 0x17
+#define QIB_7322_SendCtrl_DisarmSendBuf_RMASK 0xFF
+#define QIB_7322_SendCtrl_SpecialTriggerEn_LSB 0x4
+#define QIB_7322_SendCtrl_SpecialTriggerEn_MSB 0x4
+#define QIB_7322_SendCtrl_SpecialTriggerEn_RMASK 0x1
+#define QIB_7322_SendCtrl_SendBufAvailUpd_LSB 0x2
+#define QIB_7322_SendCtrl_SendBufAvailUpd_MSB 0x2
+#define QIB_7322_SendCtrl_SendBufAvailUpd_RMASK 0x1
+#define QIB_7322_SendCtrl_SendIntBufAvail_LSB 0x1
+#define QIB_7322_SendCtrl_SendIntBufAvail_MSB 0x1
+#define QIB_7322_SendCtrl_SendIntBufAvail_RMASK 0x1
+
+#define QIB_7322_SendBufBase_OFFS 0x1C8
+#define QIB_7322_SendBufBase_DEF 0x0018000000100000
+#define QIB_7322_SendBufBase_BaseAddr_LargePIO_LSB 0x20
+#define QIB_7322_SendBufBase_BaseAddr_LargePIO_MSB 0x34
+#define QIB_7322_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF
+#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_LSB 0x0
+#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_MSB 0x14
+#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF
+
+#define QIB_7322_SendBufSize_OFFS 0x1D0
+#define QIB_7322_SendBufSize_DEF 0x0000108000000880
+#define QIB_7322_SendBufSize_Size_LargePIO_LSB 0x20
+#define QIB_7322_SendBufSize_Size_LargePIO_MSB 0x2C
+#define QIB_7322_SendBufSize_Size_LargePIO_RMASK 0x1FFF
+#define QIB_7322_SendBufSize_Size_SmallPIO_LSB 0x0
+#define QIB_7322_SendBufSize_Size_SmallPIO_MSB 0xB
+#define QIB_7322_SendBufSize_Size_SmallPIO_RMASK 0xFFF
+
+#define QIB_7322_SendBufCnt_OFFS 0x1D8
+#define QIB_7322_SendBufCnt_DEF 0x0000002000000080
+#define QIB_7322_SendBufCnt_Num_LargeBuffers_LSB 0x20
+#define QIB_7322_SendBufCnt_Num_LargeBuffers_MSB 0x25
+#define QIB_7322_SendBufCnt_Num_LargeBuffers_RMASK 0x3F
+#define QIB_7322_SendBufCnt_Num_SmallBuffers_LSB 0x0
+#define QIB_7322_SendBufCnt_Num_SmallBuffers_MSB 0x8
+#define QIB_7322_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF
+
+#define QIB_7322_SendBufAvailAddr_OFFS 0x1E0
+#define QIB_7322_SendBufAvailAddr_DEF 0x0000000000000000
+#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6
+#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_MSB 0x27
+#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF
+
+#define QIB_7322_SendBufErr0_OFFS 0x240
+#define QIB_7322_SendBufErr0_DEF 0x0000000000000000
+#define QIB_7322_SendBufErr0_SendBufErr_63_0_LSB 0x0
+#define QIB_7322_SendBufErr0_SendBufErr_63_0_MSB 0x3F
+#define QIB_7322_SendBufErr0_SendBufErr_63_0_RMASK 0x0
+
+#define QIB_7322_AvailUpdCount_OFFS 0x268
+#define QIB_7322_AvailUpdCount_DEF 0x0000000000000000
+#define QIB_7322_AvailUpdCount_AvailUpdCount_LSB 0x0
+#define QIB_7322_AvailUpdCount_AvailUpdCount_MSB 0x4
+#define QIB_7322_AvailUpdCount_AvailUpdCount_RMASK 0x1F
+
+#define QIB_7322_RcvHdrAddr0_OFFS 0x280
+#define QIB_7322_RcvHdrAddr0_DEF 0x0000000000000000
+#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_LSB 0x2
+#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_MSB 0x27
+#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_RMASK 0x3FFFFFFFFF
+
+#define QIB_7322_RcvHdrTailAddr0_OFFS 0x340
+#define QIB_7322_RcvHdrTailAddr0_DEF 0x0000000000000000
+#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_LSB 0x2
+#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_MSB 0x27
+#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_RMASK 0x3FFFFFFFFF
+
+#define QIB_7322_ahb_access_ctrl_OFFS 0x460
+#define QIB_7322_ahb_access_ctrl_DEF 0x0000000000000000
+#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_LSB 0x1
+#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_MSB 0x2
+#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_RMASK 0x3
+#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_LSB 0x0
+#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_MSB 0x0
+#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_RMASK 0x1
+
+#define QIB_7322_ahb_transaction_reg_OFFS 0x468
+#define QIB_7322_ahb_transaction_reg_DEF 0x0000000080000000
+#define QIB_7322_ahb_transaction_reg_ahb_data_LSB 0x20
+#define QIB_7322_ahb_transaction_reg_ahb_data_MSB 0x3F
+#define QIB_7322_ahb_transaction_reg_ahb_data_RMASK 0xFFFFFFFF
+#define QIB_7322_ahb_transaction_reg_ahb_rdy_LSB 0x1F
+#define QIB_7322_ahb_transaction_reg_ahb_rdy_MSB 0x1F
+#define QIB_7322_ahb_transaction_reg_ahb_rdy_RMASK 0x1
+#define QIB_7322_ahb_transaction_reg_ahb_req_err_LSB 0x1E
+#define QIB_7322_ahb_transaction_reg_ahb_req_err_MSB 0x1E
+#define QIB_7322_ahb_transaction_reg_ahb_req_err_RMASK 0x1
+#define QIB_7322_ahb_transaction_reg_write_not_read_LSB 0x1B
+#define QIB_7322_ahb_transaction_reg_write_not_read_MSB 0x1B
+#define QIB_7322_ahb_transaction_reg_write_not_read_RMASK 0x1
+#define QIB_7322_ahb_transaction_reg_ahb_address_LSB 0x10
+#define QIB_7322_ahb_transaction_reg_ahb_address_MSB 0x1A
+#define QIB_7322_ahb_transaction_reg_ahb_address_RMASK 0x7FF
+
+#define QIB_7322_SPC_JTAG_ACCESS_REG_OFFS 0x470
+#define QIB_7322_SPC_JTAG_ACCESS_REG_DEF 0x0000000000000001
+#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_LSB 0xA
+#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_MSB 0xA
+#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_RMASK 0x1
+#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_LSB 0x5
+#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_MSB 0x9
+#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_RMASK 0x1F
+#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_LSB 0x3
+#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_MSB 0x4
+#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_RMASK 0x3
+#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_LSB 0x2
+#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_MSB 0x2
+#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_RMASK 0x1
+#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_LSB 0x1
+#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_MSB 0x1
+#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_RMASK 0x1
+#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_LSB 0x0
+#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_MSB 0x0
+#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_RMASK 0x1
+
+#define QIB_7322_SendCheckMask0_OFFS 0x4C0
+#define QIB_7322_SendCheckMask0_DEF 0x0000000000000000
+#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_LSB 0x0
+#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_MSB 0x3F
+#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_RMASK 0x0
+
+#define QIB_7322_SendGRHCheckMask0_OFFS 0x4E0
+#define QIB_7322_SendGRHCheckMask0_DEF 0x0000000000000000
+#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_LSB 0x0
+#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_MSB 0x3F
+#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_RMASK 0x0
+
+#define QIB_7322_SendIBPacketMask0_OFFS 0x500
+#define QIB_7322_SendIBPacketMask0_DEF 0x0000000000000000
+#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_LSB 0x0
+#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_MSB 0x3F
+#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_RMASK 0x0
+
+#define QIB_7322_IntRedirect0_OFFS 0x540
+#define QIB_7322_IntRedirect0_DEF 0x0000000000000000
+#define QIB_7322_IntRedirect0_vec11_LSB 0x37
+#define QIB_7322_IntRedirect0_vec11_MSB 0x3B
+#define QIB_7322_IntRedirect0_vec11_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec10_LSB 0x32
+#define QIB_7322_IntRedirect0_vec10_MSB 0x36
+#define QIB_7322_IntRedirect0_vec10_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec9_LSB 0x2D
+#define QIB_7322_IntRedirect0_vec9_MSB 0x31
+#define QIB_7322_IntRedirect0_vec9_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec8_LSB 0x28
+#define QIB_7322_IntRedirect0_vec8_MSB 0x2C
+#define QIB_7322_IntRedirect0_vec8_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec7_LSB 0x23
+#define QIB_7322_IntRedirect0_vec7_MSB 0x27
+#define QIB_7322_IntRedirect0_vec7_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec6_LSB 0x1E
+#define QIB_7322_IntRedirect0_vec6_MSB 0x22
+#define QIB_7322_IntRedirect0_vec6_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec5_LSB 0x19
+#define QIB_7322_IntRedirect0_vec5_MSB 0x1D
+#define QIB_7322_IntRedirect0_vec5_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec4_LSB 0x14
+#define QIB_7322_IntRedirect0_vec4_MSB 0x18
+#define QIB_7322_IntRedirect0_vec4_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec3_LSB 0xF
+#define QIB_7322_IntRedirect0_vec3_MSB 0x13
+#define QIB_7322_IntRedirect0_vec3_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec2_LSB 0xA
+#define QIB_7322_IntRedirect0_vec2_MSB 0xE
+#define QIB_7322_IntRedirect0_vec2_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec1_LSB 0x5
+#define QIB_7322_IntRedirect0_vec1_MSB 0x9
+#define QIB_7322_IntRedirect0_vec1_RMASK 0x1F
+#define QIB_7322_IntRedirect0_vec0_LSB 0x0
+#define QIB_7322_IntRedirect0_vec0_MSB 0x4
+#define QIB_7322_IntRedirect0_vec0_RMASK 0x1F
+
+#define QIB_7322_Int_Granted_OFFS 0x570
+#define QIB_7322_Int_Granted_DEF 0x0000000000000000
+
+#define QIB_7322_vec_clr_without_int_OFFS 0x578
+#define QIB_7322_vec_clr_without_int_DEF 0x0000000000000000
+
+#define QIB_7322_DCACtrlA_OFFS 0x580
+#define QIB_7322_DCACtrlA_DEF 0x0000000000000000
+#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_LSB 0x4
+#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_MSB 0x4
+#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_RMASK 0x1
+#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_LSB 0x3
+#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_MSB 0x3
+#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_RMASK 0x1
+#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_LSB 0x2
+#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_MSB 0x2
+#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_RMASK 0x1
+#define QIB_7322_DCACtrlA_EagerDCAEnable_LSB 0x1
+#define QIB_7322_DCACtrlA_EagerDCAEnable_MSB 0x1
+#define QIB_7322_DCACtrlA_EagerDCAEnable_RMASK 0x1
+#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_LSB 0x0
+#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_MSB 0x0
+#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_RMASK 0x1
+
+#define QIB_7322_DCACtrlB_OFFS 0x588
+#define QIB_7322_DCACtrlB_DEF 0x0000000000000000
+#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_LSB 0x36
+#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_MSB 0x3B
+#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_LSB 0x2E
+#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_MSB 0x35
+#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_LSB 0x28
+#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_MSB 0x2D
+#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_LSB 0x20
+#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_MSB 0x27
+#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_LSB 0x16
+#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_MSB 0x1B
+#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_LSB 0xE
+#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_MSB 0x15
+#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_LSB 0x8
+#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_MSB 0xD
+#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_LSB 0x0
+#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_MSB 0x7
+#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_RMASK 0xFF
+
+#define QIB_7322_DCACtrlC_OFFS 0x590
+#define QIB_7322_DCACtrlC_DEF 0x0000000000000000
+#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_LSB 0x36
+#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_MSB 0x3B
+#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_LSB 0x2E
+#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_MSB 0x35
+#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_LSB 0x28
+#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_MSB 0x2D
+#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_LSB 0x20
+#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_MSB 0x27
+#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_LSB 0x16
+#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_MSB 0x1B
+#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_LSB 0xE
+#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_MSB 0x15
+#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_LSB 0x8
+#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_MSB 0xD
+#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_LSB 0x0
+#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_MSB 0x7
+#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_RMASK 0xFF
+
+#define QIB_7322_DCACtrlD_OFFS 0x598
+#define QIB_7322_DCACtrlD_DEF 0x0000000000000000
+#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_LSB 0x36
+#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_MSB 0x3B
+#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_LSB 0x2E
+#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_MSB 0x35
+#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_LSB 0x28
+#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_MSB 0x2D
+#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_LSB 0x20
+#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_MSB 0x27
+#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_LSB 0x16
+#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_MSB 0x1B
+#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_LSB 0xE
+#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_MSB 0x15
+#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_LSB 0x8
+#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_MSB 0xD
+#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_LSB 0x0
+#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_MSB 0x7
+#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_RMASK 0xFF
+
+#define QIB_7322_DCACtrlE_OFFS 0x5A0
+#define QIB_7322_DCACtrlE_DEF 0x0000000000000000
+#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_LSB 0x36
+#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_MSB 0x3B
+#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_LSB 0x2E
+#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_MSB 0x35
+#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_LSB 0x28
+#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_MSB 0x2D
+#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_LSB 0x20
+#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_MSB 0x27
+#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_LSB 0x16
+#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_MSB 0x1B
+#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_LSB 0xE
+#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_MSB 0x15
+#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_LSB 0x8
+#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_MSB 0xD
+#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_LSB 0x0
+#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_MSB 0x7
+#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_RMASK 0xFF
+
+#define QIB_7322_DCACtrlF_OFFS 0x5A8
+#define QIB_7322_DCACtrlF_DEF 0x0000000000000000
+#define QIB_7322_DCACtrlF_SendDma1DCAOPH_LSB 0x28
+#define QIB_7322_DCACtrlF_SendDma1DCAOPH_MSB 0x2F
+#define QIB_7322_DCACtrlF_SendDma1DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlF_SendDma0DCAOPH_LSB 0x20
+#define QIB_7322_DCACtrlF_SendDma0DCAOPH_MSB 0x27
+#define QIB_7322_DCACtrlF_SendDma0DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_LSB 0x16
+#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_MSB 0x1B
+#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_LSB 0xE
+#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_MSB 0x15
+#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_RMASK 0xFF
+#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_LSB 0x8
+#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_MSB 0xD
+#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_RMASK 0x3F
+#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_LSB 0x0
+#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_MSB 0x7
+#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_RMASK 0xFF
+
+#define QIB_7322_RcvAvailTimeOut0_OFFS 0xC00
+#define QIB_7322_RcvAvailTimeOut0_DEF 0x0000000000000000
+#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_LSB 0x10
+#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_MSB 0x1F
+#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_RMASK 0xFFFF
+#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_LSB 0x0
+#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_MSB 0xF
+#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_RMASK 0xFFFF
+
+#define QIB_7322_CntrRegBase_0_OFFS 0x1028
+#define QIB_7322_CntrRegBase_0_DEF 0x0000000000012000
+
+#define QIB_7322_ErrMask_0_OFFS 0x1080
+#define QIB_7322_ErrMask_0_DEF 0x0000000000000000
+#define QIB_7322_ErrMask_0_IBStatusChangedMask_LSB 0x3A
+#define QIB_7322_ErrMask_0_IBStatusChangedMask_MSB 0x3A
+#define QIB_7322_ErrMask_0_IBStatusChangedMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SHeadersErrMask_LSB 0x39
+#define QIB_7322_ErrMask_0_SHeadersErrMask_MSB 0x39
+#define QIB_7322_ErrMask_0_SHeadersErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_LSB 0x36
+#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_MSB 0x36
+#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaHaltErrMask_LSB 0x31
+#define QIB_7322_ErrMask_0_SDmaHaltErrMask_MSB 0x31
+#define QIB_7322_ErrMask_0_SDmaHaltErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_LSB 0x30
+#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_MSB 0x30
+#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_LSB 0x2F
+#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_MSB 0x2F
+#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_LSB 0x2E
+#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_MSB 0x2E
+#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_LSB 0x2D
+#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_MSB 0x2D
+#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_LSB 0x2C
+#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_MSB 0x2C
+#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDma1stDescErrMask_LSB 0x2B
+#define QIB_7322_ErrMask_0_SDma1stDescErrMask_MSB 0x2B
+#define QIB_7322_ErrMask_0_SDma1stDescErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaBaseErrMask_LSB 0x2A
+#define QIB_7322_ErrMask_0_SDmaBaseErrMask_MSB 0x2A
+#define QIB_7322_ErrMask_0_SDmaBaseErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_LSB 0x29
+#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_MSB 0x29
+#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_LSB 0x28
+#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_MSB 0x28
+#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_LSB 0x27
+#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_MSB 0x27
+#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_LSB 0x26
+#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_MSB 0x26
+#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_LSB 0x25
+#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_MSB 0x25
+#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_LSB 0x24
+#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_MSB 0x24
+#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_LSB 0x22
+#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_MSB 0x22
+#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_LSB 0x21
+#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_MSB 0x21
+#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendPktLenErrMask_LSB 0x20
+#define QIB_7322_ErrMask_0_SendPktLenErrMask_MSB 0x20
+#define QIB_7322_ErrMask_0_SendPktLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendUnderRunErrMask_LSB 0x1F
+#define QIB_7322_ErrMask_0_SendUnderRunErrMask_MSB 0x1F
+#define QIB_7322_ErrMask_0_SendUnderRunErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_LSB 0x1E
+#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_MSB 0x1E
+#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_LSB 0x1D
+#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_MSB 0x1D
+#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_LSB 0x11
+#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_MSB 0x11
+#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvHdrErrMask_LSB 0x10
+#define QIB_7322_ErrMask_0_RcvHdrErrMask_MSB 0x10
+#define QIB_7322_ErrMask_0_RcvHdrErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_LSB 0xF
+#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_MSB 0xF
+#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvBadTidErrMask_LSB 0xE
+#define QIB_7322_ErrMask_0_RcvBadTidErrMask_MSB 0xE
+#define QIB_7322_ErrMask_0_RcvBadTidErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_LSB 0xB
+#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_MSB 0xB
+#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_LSB 0xA
+#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_MSB 0xA
+#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvEBPErrMask_LSB 0x9
+#define QIB_7322_ErrMask_0_RcvEBPErrMask_MSB 0x9
+#define QIB_7322_ErrMask_0_RcvEBPErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_LSB 0x8
+#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_MSB 0x8
+#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_LSB 0x7
+#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_MSB 0x7
+#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_LSB 0x6
+#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_MSB 0x6
+#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_LSB 0x5
+#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_MSB 0x5
+#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_LSB 0x4
+#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_MSB 0x4
+#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_LSB 0x3
+#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_MSB 0x3
+#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvICRCErrMask_LSB 0x2
+#define QIB_7322_ErrMask_0_RcvICRCErrMask_MSB 0x2
+#define QIB_7322_ErrMask_0_RcvICRCErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvVCRCErrMask_LSB 0x1
+#define QIB_7322_ErrMask_0_RcvVCRCErrMask_MSB 0x1
+#define QIB_7322_ErrMask_0_RcvVCRCErrMask_RMASK 0x1
+#define QIB_7322_ErrMask_0_RcvFormatErrMask_LSB 0x0
+#define QIB_7322_ErrMask_0_RcvFormatErrMask_MSB 0x0
+#define QIB_7322_ErrMask_0_RcvFormatErrMask_RMASK 0x1
+
+#define QIB_7322_ErrStatus_0_OFFS 0x1088
+#define QIB_7322_ErrStatus_0_DEF 0x0000000000000000
+#define QIB_7322_ErrStatus_0_IBStatusChanged_LSB 0x3A
+#define QIB_7322_ErrStatus_0_IBStatusChanged_MSB 0x3A
+#define QIB_7322_ErrStatus_0_IBStatusChanged_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SHeadersErr_LSB 0x39
+#define QIB_7322_ErrStatus_0_SHeadersErr_MSB 0x39
+#define QIB_7322_ErrStatus_0_SHeadersErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_LSB 0x36
+#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_MSB 0x36
+#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaHaltErr_LSB 0x31
+#define QIB_7322_ErrStatus_0_SDmaHaltErr_MSB 0x31
+#define QIB_7322_ErrStatus_0_SDmaHaltErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_LSB 0x30
+#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_MSB 0x30
+#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_LSB 0x2F
+#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_MSB 0x2F
+#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_LSB 0x2E
+#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_MSB 0x2E
+#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaDwEnErr_LSB 0x2D
+#define QIB_7322_ErrStatus_0_SDmaDwEnErr_MSB 0x2D
+#define QIB_7322_ErrStatus_0_SDmaDwEnErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_LSB 0x2C
+#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_MSB 0x2C
+#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDma1stDescErr_LSB 0x2B
+#define QIB_7322_ErrStatus_0_SDma1stDescErr_MSB 0x2B
+#define QIB_7322_ErrStatus_0_SDma1stDescErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaBaseErr_LSB 0x2A
+#define QIB_7322_ErrStatus_0_SDmaBaseErr_MSB 0x2A
+#define QIB_7322_ErrStatus_0_SDmaBaseErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_LSB 0x29
+#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_MSB 0x29
+#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_LSB 0x28
+#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_MSB 0x28
+#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_LSB 0x27
+#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_MSB 0x27
+#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendBufMisuseErr_LSB 0x26
+#define QIB_7322_ErrStatus_0_SendBufMisuseErr_MSB 0x26
+#define QIB_7322_ErrStatus_0_SendBufMisuseErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_LSB 0x25
+#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_MSB 0x25
+#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_LSB 0x24
+#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_MSB 0x24
+#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_LSB 0x22
+#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_MSB 0x22
+#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_LSB 0x21
+#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_MSB 0x21
+#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendPktLenErr_LSB 0x20
+#define QIB_7322_ErrStatus_0_SendPktLenErr_MSB 0x20
+#define QIB_7322_ErrStatus_0_SendPktLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendUnderRunErr_LSB 0x1F
+#define QIB_7322_ErrStatus_0_SendUnderRunErr_MSB 0x1F
+#define QIB_7322_ErrStatus_0_SendUnderRunErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_LSB 0x1E
+#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_MSB 0x1E
+#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_SendMinPktLenErr_LSB 0x1D
+#define QIB_7322_ErrStatus_0_SendMinPktLenErr_MSB 0x1D
+#define QIB_7322_ErrStatus_0_SendMinPktLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_LSB 0x11
+#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_MSB 0x11
+#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvHdrErr_LSB 0x10
+#define QIB_7322_ErrStatus_0_RcvHdrErr_MSB 0x10
+#define QIB_7322_ErrStatus_0_RcvHdrErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvHdrLenErr_LSB 0xF
+#define QIB_7322_ErrStatus_0_RcvHdrLenErr_MSB 0xF
+#define QIB_7322_ErrStatus_0_RcvHdrLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvBadTidErr_LSB 0xE
+#define QIB_7322_ErrStatus_0_RcvBadTidErr_MSB 0xE
+#define QIB_7322_ErrStatus_0_RcvBadTidErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvBadVersionErr_LSB 0xB
+#define QIB_7322_ErrStatus_0_RcvBadVersionErr_MSB 0xB
+#define QIB_7322_ErrStatus_0_RcvBadVersionErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvIBFlowErr_LSB 0xA
+#define QIB_7322_ErrStatus_0_RcvIBFlowErr_MSB 0xA
+#define QIB_7322_ErrStatus_0_RcvIBFlowErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvEBPErr_LSB 0x9
+#define QIB_7322_ErrStatus_0_RcvEBPErr_MSB 0x9
+#define QIB_7322_ErrStatus_0_RcvEBPErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_LSB 0x8
+#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_MSB 0x8
+#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_LSB 0x7
+#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_MSB 0x7
+#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_LSB 0x6
+#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_MSB 0x6
+#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_LSB 0x5
+#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_MSB 0x5
+#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_LSB 0x4
+#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_MSB 0x4
+#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_LSB 0x3
+#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_MSB 0x3
+#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvICRCErr_LSB 0x2
+#define QIB_7322_ErrStatus_0_RcvICRCErr_MSB 0x2
+#define QIB_7322_ErrStatus_0_RcvICRCErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvVCRCErr_LSB 0x1
+#define QIB_7322_ErrStatus_0_RcvVCRCErr_MSB 0x1
+#define QIB_7322_ErrStatus_0_RcvVCRCErr_RMASK 0x1
+#define QIB_7322_ErrStatus_0_RcvFormatErr_LSB 0x0
+#define QIB_7322_ErrStatus_0_RcvFormatErr_MSB 0x0
+#define QIB_7322_ErrStatus_0_RcvFormatErr_RMASK 0x1
+
+#define QIB_7322_ErrClear_0_OFFS 0x1090
+#define QIB_7322_ErrClear_0_DEF 0x0000000000000000
+#define QIB_7322_ErrClear_0_IBStatusChangedClear_LSB 0x3A
+#define QIB_7322_ErrClear_0_IBStatusChangedClear_MSB 0x3A
+#define QIB_7322_ErrClear_0_IBStatusChangedClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SHeadersErrClear_LSB 0x39
+#define QIB_7322_ErrClear_0_SHeadersErrClear_MSB 0x39
+#define QIB_7322_ErrClear_0_SHeadersErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_LSB 0x36
+#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_MSB 0x36
+#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaHaltErrClear_LSB 0x31
+#define QIB_7322_ErrClear_0_SDmaHaltErrClear_MSB 0x31
+#define QIB_7322_ErrClear_0_SDmaHaltErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_LSB 0x30
+#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_MSB 0x30
+#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_LSB 0x2F
+#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_MSB 0x2F
+#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_LSB 0x2E
+#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_MSB 0x2E
+#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_LSB 0x2D
+#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_MSB 0x2D
+#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_LSB 0x2C
+#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_MSB 0x2C
+#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDma1stDescErrClear_LSB 0x2B
+#define QIB_7322_ErrClear_0_SDma1stDescErrClear_MSB 0x2B
+#define QIB_7322_ErrClear_0_SDma1stDescErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaBaseErrClear_LSB 0x2A
+#define QIB_7322_ErrClear_0_SDmaBaseErrClear_MSB 0x2A
+#define QIB_7322_ErrClear_0_SDmaBaseErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_LSB 0x29
+#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_MSB 0x29
+#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_LSB 0x28
+#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_MSB 0x28
+#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_LSB 0x27
+#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_MSB 0x27
+#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_LSB 0x26
+#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_MSB 0x26
+#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_LSB 0x25
+#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_MSB 0x25
+#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_LSB 0x24
+#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_MSB 0x24
+#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_LSB 0x22
+#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_MSB 0x22
+#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_LSB 0x21
+#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_MSB 0x21
+#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendPktLenErrClear_LSB 0x20
+#define QIB_7322_ErrClear_0_SendPktLenErrClear_MSB 0x20
+#define QIB_7322_ErrClear_0_SendPktLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendUnderRunErrClear_LSB 0x1F
+#define QIB_7322_ErrClear_0_SendUnderRunErrClear_MSB 0x1F
+#define QIB_7322_ErrClear_0_SendUnderRunErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_LSB 0x1E
+#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_MSB 0x1E
+#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_LSB 0x1D
+#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_MSB 0x1D
+#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_LSB 0x11
+#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_MSB 0x11
+#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvHdrErrClear_LSB 0x10
+#define QIB_7322_ErrClear_0_RcvHdrErrClear_MSB 0x10
+#define QIB_7322_ErrClear_0_RcvHdrErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_LSB 0xF
+#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_MSB 0xF
+#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvBadTidErrClear_LSB 0xE
+#define QIB_7322_ErrClear_0_RcvBadTidErrClear_MSB 0xE
+#define QIB_7322_ErrClear_0_RcvBadTidErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_LSB 0xB
+#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_MSB 0xB
+#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_LSB 0xA
+#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_MSB 0xA
+#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvEBPErrClear_LSB 0x9
+#define QIB_7322_ErrClear_0_RcvEBPErrClear_MSB 0x9
+#define QIB_7322_ErrClear_0_RcvEBPErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_LSB 0x8
+#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_MSB 0x8
+#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_LSB 0x7
+#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_MSB 0x7
+#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_LSB 0x6
+#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_MSB 0x6
+#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_LSB 0x5
+#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_MSB 0x5
+#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_LSB 0x4
+#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_MSB 0x4
+#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_LSB 0x3
+#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_MSB 0x3
+#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvICRCErrClear_LSB 0x2
+#define QIB_7322_ErrClear_0_RcvICRCErrClear_MSB 0x2
+#define QIB_7322_ErrClear_0_RcvICRCErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvVCRCErrClear_LSB 0x1
+#define QIB_7322_ErrClear_0_RcvVCRCErrClear_MSB 0x1
+#define QIB_7322_ErrClear_0_RcvVCRCErrClear_RMASK 0x1
+#define QIB_7322_ErrClear_0_RcvFormatErrClear_LSB 0x0
+#define QIB_7322_ErrClear_0_RcvFormatErrClear_MSB 0x0
+#define QIB_7322_ErrClear_0_RcvFormatErrClear_RMASK 0x1
+
+#define QIB_7322_TXEStatus_0_OFFS 0x10B8
+#define QIB_7322_TXEStatus_0_DEF 0x0000000XC00080FF
+#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_LSB 0x1F
+#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_MSB 0x1F
+#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_RMASK 0x1
+#define QIB_7322_TXEStatus_0_RmFifoEmpty_LSB 0x1E
+#define QIB_7322_TXEStatus_0_RmFifoEmpty_MSB 0x1E
+#define QIB_7322_TXEStatus_0_RmFifoEmpty_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_LSB 0xF
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_MSB 0xF
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_LSB 0x7
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_MSB 0x7
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_LSB 0x6
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_MSB 0x6
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_LSB 0x5
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_MSB 0x5
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_LSB 0x4
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_MSB 0x4
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_LSB 0x3
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_MSB 0x3
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_LSB 0x2
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_MSB 0x2
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_LSB 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_MSB 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_RMASK 0x1
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_LSB 0x0
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_MSB 0x0
+#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_RMASK 0x1
+
+#define QIB_7322_RcvCtrl_0_OFFS 0x1100
+#define QIB_7322_RcvCtrl_0_DEF 0x0000000000000000
+#define QIB_7322_RcvCtrl_0_RcvResetCredit_LSB 0x2A
+#define QIB_7322_RcvCtrl_0_RcvResetCredit_MSB 0x2A
+#define QIB_7322_RcvCtrl_0_RcvResetCredit_RMASK 0x1
+#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_LSB 0x29
+#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_MSB 0x29
+#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_RMASK 0x1
+#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_LSB 0x28
+#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_MSB 0x28
+#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_RMASK 0x1
+#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_LSB 0x27
+#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_MSB 0x27
+#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_RMASK 0x1
+#define QIB_7322_RcvCtrl_0_ContextEnableUser_LSB 0x2
+#define QIB_7322_RcvCtrl_0_ContextEnableUser_MSB 0x11
+#define QIB_7322_RcvCtrl_0_ContextEnableUser_RMASK 0xFFFF
+#define QIB_7322_RcvCtrl_0_ContextEnableKernel_LSB 0x0
+#define QIB_7322_RcvCtrl_0_ContextEnableKernel_MSB 0x0
+#define QIB_7322_RcvCtrl_0_ContextEnableKernel_RMASK 0x1
+
+#define QIB_7322_RcvBTHQP_0_OFFS 0x1108
+#define QIB_7322_RcvBTHQP_0_DEF 0x0000000000000000
+#define QIB_7322_RcvBTHQP_0_RcvBTHQP_LSB 0x0
+#define QIB_7322_RcvBTHQP_0_RcvBTHQP_MSB 0x17
+#define QIB_7322_RcvBTHQP_0_RcvBTHQP_RMASK 0xFFFFFF
+
+#define QIB_7322_RcvQPMapTableA_0_OFFS 0x1110
+#define QIB_7322_RcvQPMapTableA_0_DEF 0x0000000000000000
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_LSB 0x19
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_MSB 0x1D
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_LSB 0x14
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_MSB 0x18
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_LSB 0xF
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_MSB 0x13
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_LSB 0xA
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_MSB 0xE
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_LSB 0x5
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_MSB 0x9
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_LSB 0x0
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_MSB 0x4
+#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_RMASK 0x1F
+
+#define QIB_7322_RcvQPMapTableB_0_OFFS 0x1118
+#define QIB_7322_RcvQPMapTableB_0_DEF 0x0000000000000000
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_LSB 0x19
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_MSB 0x1D
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_LSB 0x14
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_MSB 0x18
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_LSB 0xF
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_MSB 0x13
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_LSB 0xA
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_MSB 0xE
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_LSB 0x5
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_MSB 0x9
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_LSB 0x0
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_MSB 0x4
+#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_RMASK 0x1F
+
+#define QIB_7322_RcvQPMapTableC_0_OFFS 0x1120
+#define QIB_7322_RcvQPMapTableC_0_DEF 0x0000000000000000
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_LSB 0x19
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_MSB 0x1D
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_LSB 0x14
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_MSB 0x18
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_LSB 0xF
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_MSB 0x13
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_LSB 0xA
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_MSB 0xE
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_LSB 0x5
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_MSB 0x9
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_LSB 0x0
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_MSB 0x4
+#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_RMASK 0x1F
+
+#define QIB_7322_RcvQPMapTableD_0_OFFS 0x1128
+#define QIB_7322_RcvQPMapTableD_0_DEF 0x0000000000000000
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_LSB 0x19
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_MSB 0x1D
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_LSB 0x14
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_MSB 0x18
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_LSB 0xF
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_MSB 0x13
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_LSB 0xA
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_MSB 0xE
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_LSB 0x5
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_MSB 0x9
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_LSB 0x0
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_MSB 0x4
+#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_RMASK 0x1F
+
+#define QIB_7322_RcvQPMapTableE_0_OFFS 0x1130
+#define QIB_7322_RcvQPMapTableE_0_DEF 0x0000000000000000
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_LSB 0x19
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_MSB 0x1D
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_LSB 0x14
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_MSB 0x18
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_LSB 0xF
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_MSB 0x13
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_LSB 0xA
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_MSB 0xE
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_LSB 0x5
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_MSB 0x9
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_LSB 0x0
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_MSB 0x4
+#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_RMASK 0x1F
+
+#define QIB_7322_RcvQPMapTableF_0_OFFS 0x1138
+#define QIB_7322_RcvQPMapTableF_0_DEF 0x0000000000000000
+#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_LSB 0x5
+#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_MSB 0x9
+#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_RMASK 0x1F
+#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_LSB 0x0
+#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_MSB 0x4
+#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_RMASK 0x1F
+
+#define QIB_7322_PSStat_0_OFFS 0x1140
+#define QIB_7322_PSStat_0_DEF 0x0000000000000000
+
+#define QIB_7322_PSStart_0_OFFS 0x1148
+#define QIB_7322_PSStart_0_DEF 0x0000000000000000
+
+#define QIB_7322_PSInterval_0_OFFS 0x1150
+#define QIB_7322_PSInterval_0_DEF 0x0000000000000000
+
+#define QIB_7322_RcvStatus_0_OFFS 0x1160
+#define QIB_7322_RcvStatus_0_DEF 0x0000000000000000
+#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_LSB 0x1
+#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_MSB 0x5
+#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_RMASK 0x1F
+#define QIB_7322_RcvStatus_0_RxPktInProgress_LSB 0x0
+#define QIB_7322_RcvStatus_0_RxPktInProgress_MSB 0x0
+#define QIB_7322_RcvStatus_0_RxPktInProgress_RMASK 0x1
+
+#define QIB_7322_RcvPartitionKey_0_OFFS 0x1168
+#define QIB_7322_RcvPartitionKey_0_DEF 0x0000000000000000
+
+#define QIB_7322_RcvQPMulticastContext_0_OFFS 0x1170
+#define QIB_7322_RcvQPMulticastContext_0_DEF 0x0000000000000000
+#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_LSB 0x0
+#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_MSB 0x4
+#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_RMASK 0x1F
+
+#define QIB_7322_RcvPktLEDCnt_0_OFFS 0x1178
+#define QIB_7322_RcvPktLEDCnt_0_DEF 0x0000000000000000
+#define QIB_7322_RcvPktLEDCnt_0_ONperiod_LSB 0x20
+#define QIB_7322_RcvPktLEDCnt_0_ONperiod_MSB 0x3F
+#define QIB_7322_RcvPktLEDCnt_0_ONperiod_RMASK 0xFFFFFFFF
+#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_LSB 0x0
+#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_MSB 0x1F
+#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_RMASK 0xFFFFFFFF
+
+#define QIB_7322_SendDmaIdleCnt_0_OFFS 0x1180
+#define QIB_7322_SendDmaIdleCnt_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_LSB 0x0
+#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_MSB 0xF
+#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_RMASK 0xFFFF
+
+#define QIB_7322_SendDmaReloadCnt_0_OFFS 0x1188
+#define QIB_7322_SendDmaReloadCnt_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_LSB 0x0
+#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_MSB 0xF
+#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_RMASK 0xFFFF
+
+#define QIB_7322_SendDmaDescCnt_0_OFFS 0x1190
+#define QIB_7322_SendDmaDescCnt_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_LSB 0x0
+#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_MSB 0xF
+#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_RMASK 0xFFFF
+
+#define QIB_7322_SendCtrl_0_OFFS 0x11C0
+#define QIB_7322_SendCtrl_0_DEF 0x0000000000000000
+#define QIB_7322_SendCtrl_0_IBVLArbiterEn_LSB 0xF
+#define QIB_7322_SendCtrl_0_IBVLArbiterEn_MSB 0xF
+#define QIB_7322_SendCtrl_0_IBVLArbiterEn_RMASK 0x1
+#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_LSB 0xE
+#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_MSB 0xE
+#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_RMASK 0x1
+#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_LSB 0xD
+#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_MSB 0xD
+#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_RMASK 0x1
+#define QIB_7322_SendCtrl_0_SDmaHalt_LSB 0xC
+#define QIB_7322_SendCtrl_0_SDmaHalt_MSB 0xC
+#define QIB_7322_SendCtrl_0_SDmaHalt_RMASK 0x1
+#define QIB_7322_SendCtrl_0_SDmaEnable_LSB 0xB
+#define QIB_7322_SendCtrl_0_SDmaEnable_MSB 0xB
+#define QIB_7322_SendCtrl_0_SDmaEnable_RMASK 0x1
+#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_LSB 0xA
+#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_MSB 0xA
+#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_RMASK 0x1
+#define QIB_7322_SendCtrl_0_SDmaIntEnable_LSB 0x9
+#define QIB_7322_SendCtrl_0_SDmaIntEnable_MSB 0x9
+#define QIB_7322_SendCtrl_0_SDmaIntEnable_RMASK 0x1
+#define QIB_7322_SendCtrl_0_SDmaCleanup_LSB 0x8
+#define QIB_7322_SendCtrl_0_SDmaCleanup_MSB 0x8
+#define QIB_7322_SendCtrl_0_SDmaCleanup_RMASK 0x1
+#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_LSB 0x7
+#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_MSB 0x7
+#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_RMASK 0x1
+#define QIB_7322_SendCtrl_0_SendEnable_LSB 0x3
+#define QIB_7322_SendCtrl_0_SendEnable_MSB 0x3
+#define QIB_7322_SendCtrl_0_SendEnable_RMASK 0x1
+#define QIB_7322_SendCtrl_0_TxeBypassIbc_LSB 0x1
+#define QIB_7322_SendCtrl_0_TxeBypassIbc_MSB 0x1
+#define QIB_7322_SendCtrl_0_TxeBypassIbc_RMASK 0x1
+#define QIB_7322_SendCtrl_0_TxeAbortIbc_LSB 0x0
+#define QIB_7322_SendCtrl_0_TxeAbortIbc_MSB 0x0
+#define QIB_7322_SendCtrl_0_TxeAbortIbc_RMASK 0x1
+
+#define QIB_7322_SendDmaBase_0_OFFS 0x11F8
+#define QIB_7322_SendDmaBase_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaBase_0_SendDmaBase_LSB 0x0
+#define QIB_7322_SendDmaBase_0_SendDmaBase_MSB 0x2F
+#define QIB_7322_SendDmaBase_0_SendDmaBase_RMASK 0xFFFFFFFFFFFF
+
+#define QIB_7322_SendDmaLenGen_0_OFFS 0x1200
+#define QIB_7322_SendDmaLenGen_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaLenGen_0_Generation_LSB 0x10
+#define QIB_7322_SendDmaLenGen_0_Generation_MSB 0x12
+#define QIB_7322_SendDmaLenGen_0_Generation_RMASK 0x7
+#define QIB_7322_SendDmaLenGen_0_Length_LSB 0x0
+#define QIB_7322_SendDmaLenGen_0_Length_MSB 0xF
+#define QIB_7322_SendDmaLenGen_0_Length_RMASK 0xFFFF
+
+#define QIB_7322_SendDmaTail_0_OFFS 0x1208
+#define QIB_7322_SendDmaTail_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaTail_0_SendDmaTail_LSB 0x0
+#define QIB_7322_SendDmaTail_0_SendDmaTail_MSB 0xF
+#define QIB_7322_SendDmaTail_0_SendDmaTail_RMASK 0xFFFF
+
+#define QIB_7322_SendDmaHead_0_OFFS 0x1210
+#define QIB_7322_SendDmaHead_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_LSB 0x20
+#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_MSB 0x2F
+#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_RMASK 0xFFFF
+#define QIB_7322_SendDmaHead_0_SendDmaHead_LSB 0x0
+#define QIB_7322_SendDmaHead_0_SendDmaHead_MSB 0xF
+#define QIB_7322_SendDmaHead_0_SendDmaHead_RMASK 0xFFFF
+
+#define QIB_7322_SendDmaHeadAddr_0_OFFS 0x1218
+#define QIB_7322_SendDmaHeadAddr_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_LSB 0x0
+#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_MSB 0x2F
+#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_RMASK 0xFFFFFFFFFFFF
+
+#define QIB_7322_SendDmaBufMask0_0_OFFS 0x1220
+#define QIB_7322_SendDmaBufMask0_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_LSB 0x0
+#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_MSB 0x3F
+#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_RMASK 0x0
+
+#define QIB_7322_SendDmaStatus_0_OFFS 0x1238
+#define QIB_7322_SendDmaStatus_0_DEF 0x0000000042000000
+#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_LSB 0x3F
+#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_MSB 0x3F
+#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_HaltInProg_LSB 0x3E
+#define QIB_7322_SendDmaStatus_0_HaltInProg_MSB 0x3E
+#define QIB_7322_SendDmaStatus_0_HaltInProg_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_LSB 0x3D
+#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_MSB 0x3D
+#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_LSB 0x2F
+#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_MSB 0x3C
+#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_RMASK 0x3FFF
+#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_LSB 0x28
+#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_MSB 0x2E
+#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_RMASK 0x7F
+#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_LSB 0x20
+#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_MSB 0x27
+#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_RMASK 0xFF
+#define QIB_7322_SendDmaStatus_0_ScbFull_LSB 0x1F
+#define QIB_7322_SendDmaStatus_0_ScbFull_MSB 0x1F
+#define QIB_7322_SendDmaStatus_0_ScbFull_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_ScbEmpty_LSB 0x1E
+#define QIB_7322_SendDmaStatus_0_ScbEmpty_MSB 0x1E
+#define QIB_7322_SendDmaStatus_0_ScbEmpty_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_ScbEntryValid_LSB 0x1D
+#define QIB_7322_SendDmaStatus_0_ScbEntryValid_MSB 0x1D
+#define QIB_7322_SendDmaStatus_0_ScbEntryValid_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_LSB 0x1C
+#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_MSB 0x1C
+#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_LSB 0x1B
+#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_MSB 0x1B
+#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_LSB 0x1A
+#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_MSB 0x1A
+#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_LSB 0x19
+#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_MSB 0x19
+#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_SplFifoFull_LSB 0x18
+#define QIB_7322_SendDmaStatus_0_SplFifoFull_MSB 0x18
+#define QIB_7322_SendDmaStatus_0_SplFifoFull_RMASK 0x1
+#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_LSB 0x10
+#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_MSB 0x17
+#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_RMASK 0xFF
+#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_LSB 0x0
+#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_MSB 0xF
+#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_RMASK 0xFFFF
+
+#define QIB_7322_SendDmaPriorityThld_0_OFFS 0x1258
+#define QIB_7322_SendDmaPriorityThld_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_LSB 0x0
+#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_MSB 0x3
+#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_RMASK 0xF
+
+#define QIB_7322_SendHdrErrSymptom_0_OFFS 0x1260
+#define QIB_7322_SendHdrErrSymptom_0_DEF 0x0000000000000000
+#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_LSB 0x6
+#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_MSB 0x6
+#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_RMASK 0x1
+#define QIB_7322_SendHdrErrSymptom_0_GRHFail_LSB 0x5
+#define QIB_7322_SendHdrErrSymptom_0_GRHFail_MSB 0x5
+#define QIB_7322_SendHdrErrSymptom_0_GRHFail_RMASK 0x1
+#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_LSB 0x4
+#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_MSB 0x4
+#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_RMASK 0x1
+#define QIB_7322_SendHdrErrSymptom_0_QPFail_LSB 0x3
+#define QIB_7322_SendHdrErrSymptom_0_QPFail_MSB 0x3
+#define QIB_7322_SendHdrErrSymptom_0_QPFail_RMASK 0x1
+#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_LSB 0x2
+#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_MSB 0x2
+#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_RMASK 0x1
+#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_LSB 0x1
+#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_MSB 0x1
+#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_RMASK 0x1
+#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_LSB 0x0
+#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_MSB 0x0
+#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_RMASK 0x1
+
+#define QIB_7322_RxCreditVL0_0_OFFS 0x1280
+#define QIB_7322_RxCreditVL0_0_DEF 0x0000000000000000
+#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_LSB 0x10
+#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_MSB 0x1B
+#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_RMASK 0xFFF
+#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_LSB 0x0
+#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_MSB 0xB
+#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_RMASK 0xFFF
+
+#define QIB_7322_SendDmaBufUsed0_0_OFFS 0x1480
+#define QIB_7322_SendDmaBufUsed0_0_DEF 0x0000000000000000
+#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_LSB 0x0
+#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_MSB 0x3F
+#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_RMASK 0x0
+
+#define QIB_7322_SendCheckControl_0_OFFS 0x14A8
+#define QIB_7322_SendCheckControl_0_DEF 0x0000000000000000
+#define QIB_7322_SendCheckControl_0_PKey_En_LSB 0x4
+#define QIB_7322_SendCheckControl_0_PKey_En_MSB 0x4
+#define QIB_7322_SendCheckControl_0_PKey_En_RMASK 0x1
+#define QIB_7322_SendCheckControl_0_BTHQP_En_LSB 0x3
+#define QIB_7322_SendCheckControl_0_BTHQP_En_MSB 0x3
+#define QIB_7322_SendCheckControl_0_BTHQP_En_RMASK 0x1
+#define QIB_7322_SendCheckControl_0_SLID_En_LSB 0x2
+#define QIB_7322_SendCheckControl_0_SLID_En_MSB 0x2
+#define QIB_7322_SendCheckControl_0_SLID_En_RMASK 0x1
+#define QIB_7322_SendCheckControl_0_RawIPV6_En_LSB 0x1
+#define QIB_7322_SendCheckControl_0_RawIPV6_En_MSB 0x1
+#define QIB_7322_SendCheckControl_0_RawIPV6_En_RMASK 0x1
+#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_LSB 0x0
+#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_MSB 0x0
+#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_RMASK 0x1
+
+#define QIB_7322_SendIBSLIDMask_0_OFFS 0x14B0
+#define QIB_7322_SendIBSLIDMask_0_DEF 0x0000000000000000
+#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_LSB 0x0
+#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_MSB 0xF
+#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_RMASK 0xFFFF
+
+#define QIB_7322_SendIBSLIDAssign_0_OFFS 0x14B8
+#define QIB_7322_SendIBSLIDAssign_0_DEF 0x0000000000000000
+#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_LSB 0x0
+#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_MSB 0xF
+#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_RMASK 0xFFFF
+
+#define QIB_7322_IBCStatusA_0_OFFS 0x1540
+#define QIB_7322_IBCStatusA_0_DEF 0x0000000000000X02
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_LSB 0x27
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_MSB 0x27
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_LSB 0x26
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_MSB 0x26
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_LSB 0x25
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_MSB 0x25
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_LSB 0x24
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_MSB 0x24
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_LSB 0x23
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_MSB 0x23
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_LSB 0x22
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_MSB 0x22
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_LSB 0x21
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_MSB 0x21
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_LSB 0x20
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_MSB 0x20
+#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_TxReady_LSB 0x1E
+#define QIB_7322_IBCStatusA_0_TxReady_MSB 0x1E
+#define QIB_7322_IBCStatusA_0_TxReady_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_LSB 0x1D
+#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_MSB 0x1D
+#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_LSB 0xF
+#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_MSB 0xF
+#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_ScrambleEn_LSB 0xE
+#define QIB_7322_IBCStatusA_0_ScrambleEn_MSB 0xE
+#define QIB_7322_IBCStatusA_0_ScrambleEn_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_LSB 0xD
+#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_MSB 0xD
+#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_LSB 0xC
+#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_MSB 0xC
+#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_LSB 0xA
+#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_MSB 0xA
+#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_LinkWidthActive_LSB 0x9
+#define QIB_7322_IBCStatusA_0_LinkWidthActive_MSB 0x9
+#define QIB_7322_IBCStatusA_0_LinkWidthActive_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_LinkSpeedActive_LSB 0x8
+#define QIB_7322_IBCStatusA_0_LinkSpeedActive_MSB 0x8
+#define QIB_7322_IBCStatusA_0_LinkSpeedActive_RMASK 0x1
+#define QIB_7322_IBCStatusA_0_LinkState_LSB 0x5
+#define QIB_7322_IBCStatusA_0_LinkState_MSB 0x7
+#define QIB_7322_IBCStatusA_0_LinkState_RMASK 0x7
+#define QIB_7322_IBCStatusA_0_LinkTrainingState_LSB 0x0
+#define QIB_7322_IBCStatusA_0_LinkTrainingState_MSB 0x4
+#define QIB_7322_IBCStatusA_0_LinkTrainingState_RMASK 0x1F
+
+#define QIB_7322_IBCStatusB_0_OFFS 0x1548
+#define QIB_7322_IBCStatusB_0_DEF 0x00000000XXXXXXXX
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_LSB 0x27
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_MSB 0x27
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_RMASK 0x1
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_LSB 0x26
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_MSB 0x26
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_RMASK 0x1
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_LSB 0x25
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_MSB 0x25
+#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_RMASK 0x1
+#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_LSB 0x24
+#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_MSB 0x24
+#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_RMASK 0x1
+#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_LSB 0x20
+#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_MSB 0x23
+#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_RMASK 0xF
+#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_LSB 0x1E
+#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_MSB 0x1F
+#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_RMASK 0x3
+#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_LSB 0x1A
+#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_MSB 0x1D
+#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_RMASK 0xF
+#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_LSB 0x0
+#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_MSB 0x19
+#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_RMASK 0x3FFFFFF
+
+#define QIB_7322_IBCCtrlA_0_OFFS 0x1560
+#define QIB_7322_IBCCtrlA_0_DEF 0x0000000000000000
+#define QIB_7322_IBCCtrlA_0_Loopback_LSB 0x3F
+#define QIB_7322_IBCCtrlA_0_Loopback_MSB 0x3F
+#define QIB_7322_IBCCtrlA_0_Loopback_RMASK 0x1
+#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_LSB 0x3E
+#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_MSB 0x3E
+#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_RMASK 0x1
+#define QIB_7322_IBCCtrlA_0_IBLinkEn_LSB 0x3D
+#define QIB_7322_IBCCtrlA_0_IBLinkEn_MSB 0x3D
+#define QIB_7322_IBCCtrlA_0_IBLinkEn_RMASK 0x1
+#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_LSB 0x3C
+#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_MSB 0x3C
+#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_RMASK 0x1
+#define QIB_7322_IBCCtrlA_0_NumVLane_LSB 0x30
+#define QIB_7322_IBCCtrlA_0_NumVLane_MSB 0x32
+#define QIB_7322_IBCCtrlA_0_NumVLane_RMASK 0x7
+#define QIB_7322_IBCCtrlA_0_OverrunThreshold_LSB 0x24
+#define QIB_7322_IBCCtrlA_0_OverrunThreshold_MSB 0x27
+#define QIB_7322_IBCCtrlA_0_OverrunThreshold_RMASK 0xF
+#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_LSB 0x20
+#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_MSB 0x23
+#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_RMASK 0xF
+#define QIB_7322_IBCCtrlA_0_MaxPktLen_LSB 0x15
+#define QIB_7322_IBCCtrlA_0_MaxPktLen_MSB 0x1F
+#define QIB_7322_IBCCtrlA_0_MaxPktLen_RMASK 0x7FF
+#define QIB_7322_IBCCtrlA_0_LinkCmd_LSB 0x13
+#define QIB_7322_IBCCtrlA_0_LinkCmd_MSB 0x14
+#define QIB_7322_IBCCtrlA_0_LinkCmd_RMASK 0x3
+#define QIB_7322_IBCCtrlA_0_LinkInitCmd_LSB 0x10
+#define QIB_7322_IBCCtrlA_0_LinkInitCmd_MSB 0x12
+#define QIB_7322_IBCCtrlA_0_LinkInitCmd_RMASK 0x7
+#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_LSB 0x8
+#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_MSB 0xF
+#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_RMASK 0xFF
+#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_LSB 0x0
+#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_MSB 0x7
+#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_RMASK 0xFF
+
+#define QIB_7322_IBCCtrlB_0_OFFS 0x1568
+#define QIB_7322_IBCCtrlB_0_DEF 0x00000000000305FF
+#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_LSB 0x30
+#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_MSB 0x3F
+#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_RMASK 0xFFFF
+#define QIB_7322_IBCCtrlB_0_IB_DLID_LSB 0x20
+#define QIB_7322_IBCCtrlB_0_IB_DLID_MSB 0x2F
+#define QIB_7322_IBCCtrlB_0_IB_DLID_RMASK 0xFFFF
+#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_LSB 0x1B
+#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_MSB 0x1B
+#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_LSB 0x1A
+#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_MSB 0x1A
+#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_LSB 0x12
+#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_MSB 0x19
+#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_RMASK 0xFF
+#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_LSB 0x11
+#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_MSB 0x11
+#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_LSB 0x10
+#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_MSB 0x10
+#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_SD_DDS_LSB 0xC
+#define QIB_7322_IBCCtrlB_0_SD_DDS_MSB 0xF
+#define QIB_7322_IBCCtrlB_0_SD_DDS_RMASK 0xF
+#define QIB_7322_IBCCtrlB_0_SD_DDSV_LSB 0xB
+#define QIB_7322_IBCCtrlB_0_SD_DDSV_MSB 0xB
+#define QIB_7322_IBCCtrlB_0_SD_DDSV_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_LSB 0xA
+#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_MSB 0xA
+#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_LSB 0x9
+#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_MSB 0x9
+#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_LSB 0x8
+#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_MSB 0x8
+#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_LSB 0x7
+#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_MSB 0x7
+#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_LSB 0x5
+#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_MSB 0x6
+#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_RMASK 0x3
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_LSB 0x4
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_MSB 0x4
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_LSB 0x3
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_MSB 0x3
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_LSB 0x2
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_MSB 0x2
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_LSB 0x1
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_MSB 0x1
+#define QIB_7322_IBCCtrlB_0_SD_SPEED_RMASK 0x1
+#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_LSB 0x0
+#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_MSB 0x0
+#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_RMASK 0x1
+
+#define QIB_7322_IBCCtrlC_0_OFFS 0x1570
+#define QIB_7322_IBCCtrlC_0_DEF 0x0000000000000301
+#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_LSB 0x5
+#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_MSB 0x9
+#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_RMASK 0x1F
+#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_LSB 0x0
+#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_MSB 0x4
+#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_RMASK 0x1F
+
+#define QIB_7322_HRTBT_GUID_0_OFFS 0x1588
+#define QIB_7322_HRTBT_GUID_0_DEF 0x0000000000000000
+
+#define QIB_7322_IB_SDTEST_IF_TX_0_OFFS 0x1590
+#define QIB_7322_IB_SDTEST_IF_TX_0_DEF 0x0000000000000000
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_LSB 0x30
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_MSB 0x3F
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_RMASK 0xFFFF
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_LSB 0x20
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_MSB 0x2F
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_RMASK 0xFFFF
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_LSB 0xD
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_MSB 0xF
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_RMASK 0x7
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_LSB 0xB
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_MSB 0xC
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_RMASK 0x3
+#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_LSB 0x4
+#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_MSB 0x4
+#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_RMASK 0x1
+#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_LSB 0x2
+#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_MSB 0x3
+#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_RMASK 0x3
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_LSB 0x1
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_MSB 0x1
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_RMASK 0x1
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_LSB 0x0
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_MSB 0x0
+#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_RMASK 0x1
+
+#define QIB_7322_IB_SDTEST_IF_RX_0_OFFS 0x1598
+#define QIB_7322_IB_SDTEST_IF_RX_0_DEF 0x0000000000000000
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_LSB 0x30
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_MSB 0x3F
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_RMASK 0xFFFF
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_LSB 0x20
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_MSB 0x2F
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_RMASK 0xFFFF
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_LSB 0x18
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_MSB 0x1F
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_RMASK 0xFF
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_LSB 0x10
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_MSB 0x17
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_RMASK 0xFF
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_LSB 0x1
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_MSB 0x1
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_RMASK 0x1
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_LSB 0x0
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_MSB 0x0
+#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_RMASK 0x1
+
+#define QIB_7322_IBNCModeCtrl_0_OFFS 0x15B8
+#define QIB_7322_IBNCModeCtrl_0_DEF 0x0000000000000000
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_LSB 0x22
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_MSB 0x22
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_RMASK 0x1
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_LSB 0x21
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_MSB 0x21
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_RMASK 0x1
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_LSB 0x20
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_MSB 0x20
+#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_RMASK 0x1
+#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_LSB 0x11
+#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_MSB 0x19
+#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_RMASK 0x1FF
+#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_LSB 0x8
+#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_MSB 0x10
+#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_RMASK 0x1FF
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_LSB 0x2
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_MSB 0x2
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_RMASK 0x1
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_LSB 0x1
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_MSB 0x1
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_RMASK 0x1
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_LSB 0x0
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_MSB 0x0
+#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_RMASK 0x1
+
+#define QIB_7322_IBSerdesStatus_0_OFFS 0x15D0
+#define QIB_7322_IBSerdesStatus_0_DEF 0x0000000000000000
+
+#define QIB_7322_IBPCSConfig_0_OFFS 0x15D8
+#define QIB_7322_IBPCSConfig_0_DEF 0x0000000000000007
+#define QIB_7322_IBPCSConfig_0_link_sync_mask_LSB 0x9
+#define QIB_7322_IBPCSConfig_0_link_sync_mask_MSB 0x12
+#define QIB_7322_IBPCSConfig_0_link_sync_mask_RMASK 0x3FF
+#define QIB_7322_IBPCSConfig_0_xcv_rreset_LSB 0x2
+#define QIB_7322_IBPCSConfig_0_xcv_rreset_MSB 0x2
+#define QIB_7322_IBPCSConfig_0_xcv_rreset_RMASK 0x1
+#define QIB_7322_IBPCSConfig_0_xcv_treset_LSB 0x1
+#define QIB_7322_IBPCSConfig_0_xcv_treset_MSB 0x1
+#define QIB_7322_IBPCSConfig_0_xcv_treset_RMASK 0x1
+#define QIB_7322_IBPCSConfig_0_tx_rx_reset_LSB 0x0
+#define QIB_7322_IBPCSConfig_0_tx_rx_reset_MSB 0x0
+#define QIB_7322_IBPCSConfig_0_tx_rx_reset_RMASK 0x1
+
+#define QIB_7322_IBSerdesCtrl_0_OFFS 0x15E0
+#define QIB_7322_IBSerdesCtrl_0_DEF 0x0000000000FFA00F
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_LSB 0x1A
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_MSB 0x1A
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_LSB 0x19
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_MSB 0x19
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_LSB 0x18
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_MSB 0x18
+#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_LSB 0x14
+#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_MSB 0x17
+#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_RMASK 0xF
+#define QIB_7322_IBSerdesCtrl_0_CGMODE_LSB 0x10
+#define QIB_7322_IBSerdesCtrl_0_CGMODE_MSB 0x13
+#define QIB_7322_IBSerdesCtrl_0_CGMODE_RMASK 0xF
+#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_LSB 0xF
+#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_MSB 0xF
+#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_LSB 0xD
+#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_MSB 0xD
+#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_LPEN_LSB 0xC
+#define QIB_7322_IBSerdesCtrl_0_LPEN_MSB 0xC
+#define QIB_7322_IBSerdesCtrl_0_LPEN_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_PLLPD_LSB 0xB
+#define QIB_7322_IBSerdesCtrl_0_PLLPD_MSB 0xB
+#define QIB_7322_IBSerdesCtrl_0_PLLPD_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_TXPD_LSB 0xA
+#define QIB_7322_IBSerdesCtrl_0_TXPD_MSB 0xA
+#define QIB_7322_IBSerdesCtrl_0_TXPD_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_RXPD_LSB 0x9
+#define QIB_7322_IBSerdesCtrl_0_RXPD_MSB 0x9
+#define QIB_7322_IBSerdesCtrl_0_RXPD_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_TXIDLE_LSB 0x8
+#define QIB_7322_IBSerdesCtrl_0_TXIDLE_MSB 0x8
+#define QIB_7322_IBSerdesCtrl_0_TXIDLE_RMASK 0x1
+#define QIB_7322_IBSerdesCtrl_0_CMODE_LSB 0x0
+#define QIB_7322_IBSerdesCtrl_0_CMODE_MSB 0x6
+#define QIB_7322_IBSerdesCtrl_0_CMODE_RMASK 0x7F
+
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_OFFS 0x1600
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_DEF 0x0000000000000000
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_LSB 0x1F
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_MSB 0x1F
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_RMASK 0x1
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_LSB 0x1E
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_MSB 0x1E
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_RMASK 0x1
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_LSB 0xE
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_MSB 0x11
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_RMASK 0xF
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_LSB 0x9
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_MSB 0xD
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_RMASK 0x1F
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_LSB 0x5
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_MSB 0x8
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_RMASK 0xF
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_LSB 0x3
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_MSB 0x4
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_RMASK 0x3
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_LSB 0x0
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_MSB 0x2
+#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_RMASK 0x7
+
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_OFFS 0x1640
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_DEF 0x0000000000000000
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_LSB 0x27
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_MSB 0x27
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_LSB 0x26
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_MSB 0x26
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_LSB 0x25
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_MSB 0x25
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_LSB 0x24
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_MSB 0x24
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_LSB 0x23
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_MSB 0x23
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_LSB 0x22
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_MSB 0x22
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_LSB 0x21
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_MSB 0x21
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_LSB 0x20
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_MSB 0x20
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_LSB 0x18
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_MSB 0x1F
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_LSB 0x10
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_MSB 0x17
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_LSB 0x8
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_MSB 0xF
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_LSB 0x0
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_MSB 0x7
+#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_RMASK 0xFF
+
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_OFFS 0x1648
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_DEF 0x0000000000000000
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_LSB 0x27
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_MSB 0x27
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_LSB 0x26
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_MSB 0x26
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_LSB 0x25
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_MSB 0x25
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_LSB 0x24
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_MSB 0x24
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_LSB 0x23
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_MSB 0x23
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_LSB 0x22
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_MSB 0x22
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_LSB 0x21
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_MSB 0x21
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_LSB 0x20
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_MSB 0x20
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_LSB 0x18
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_MSB 0x1F
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_LSB 0x10
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_MSB 0x17
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_LSB 0x8
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_MSB 0xF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_LSB 0x0
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_MSB 0x7
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_RMASK 0xFF
+
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_OFFS 0x1650
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_DEF 0x0000000000000000
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_LSB 0x27
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_MSB 0x27
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_LSB 0x26
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_MSB 0x26
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_LSB 0x25
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_MSB 0x25
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_LSB 0x24
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_MSB 0x24
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_LSB 0x23
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_MSB 0x23
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_LSB 0x22
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_MSB 0x22
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_LSB 0x21
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_MSB 0x21
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_LSB 0x20
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_MSB 0x20
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_LSB 0x18
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_MSB 0x1F
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_LSB 0x10
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_MSB 0x17
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_LSB 0x8
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_MSB 0xF
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_LSB 0x0
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_MSB 0x7
+#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_RMASK 0xFF
+
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_OFFS 0x1658
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_DEF 0x0000000000000000
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_LSB 0x27
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_MSB 0x27
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_LSB 0x26
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_MSB 0x26
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_LSB 0x25
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_MSB 0x25
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_LSB 0x24
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_MSB 0x24
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_LSB 0x23
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_MSB 0x23
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_LSB 0x22
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_MSB 0x22
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_LSB 0x21
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_MSB 0x21
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_LSB 0x20
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_MSB 0x20
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_LSB 0x18
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_MSB 0x1F
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_LSB 0x10
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_MSB 0x17
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_LSB 0x8
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_MSB 0xF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_LSB 0x0
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_MSB 0x7
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_RMASK 0xFF
+
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_OFFS 0x1660
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_DEF 0x0000000000000000
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_LSB 0x27
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_MSB 0x27
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_LSB 0x26
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_MSB 0x26
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_LSB 0x25
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_MSB 0x25
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_LSB 0x24
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_MSB 0x24
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_LSB 0x23
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_MSB 0x23
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_LSB 0x22
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_MSB 0x22
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_LSB 0x21
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_MSB 0x21
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_LSB 0x20
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_MSB 0x20
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_LSB 0x18
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_MSB 0x1F
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_LSB 0x10
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_MSB 0x17
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_LSB 0x8
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_MSB 0xF
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_LSB 0x0
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_MSB 0x7
+#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_RMASK 0xFF
+
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_OFFS 0x1668
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_DEF 0x0000000000000000
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_LSB 0x27
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_MSB 0x27
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_LSB 0x26
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_MSB 0x26
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_LSB 0x25
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_MSB 0x25
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_LSB 0x24
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_MSB 0x24
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_LSB 0x23
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_MSB 0x23
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_LSB 0x22
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_MSB 0x22
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_LSB 0x21
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_MSB 0x21
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_LSB 0x20
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_MSB 0x20
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_RMASK 0x1
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_LSB 0x18
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_MSB 0x1F
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_LSB 0x10
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_MSB 0x17
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_LSB 0x8
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_MSB 0xF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_RMASK 0xFF
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_LSB 0x0
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_MSB 0x7
+#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_RMASK 0xFF
+
+#define QIB_7322_ADAPT_DISABLE_TIMER_THRESHOLD_0_OFFS 0x1670
+#define QIB_7322_ADAPT_DISABLE_TIMER_THRESHOLD_0_DEF 0x0000000000000000
+
+#define QIB_7322_HighPriorityLimit_0_OFFS 0x1BC0
+#define QIB_7322_HighPriorityLimit_0_DEF 0x0000000000000000
+#define QIB_7322_HighPriorityLimit_0_Limit_LSB 0x0
+#define QIB_7322_HighPriorityLimit_0_Limit_MSB 0x7
+#define QIB_7322_HighPriorityLimit_0_Limit_RMASK 0xFF
+
+#define QIB_7322_LowPriority0_0_OFFS 0x1C00
+#define QIB_7322_LowPriority0_0_DEF 0x0000000000000000
+#define QIB_7322_LowPriority0_0_VirtualLane_LSB 0x10
+#define QIB_7322_LowPriority0_0_VirtualLane_MSB 0x12
+#define QIB_7322_LowPriority0_0_VirtualLane_RMASK 0x7
+#define QIB_7322_LowPriority0_0_Weight_LSB 0x0
+#define QIB_7322_LowPriority0_0_Weight_MSB 0x7
+#define QIB_7322_LowPriority0_0_Weight_RMASK 0xFF
+
+#define QIB_7322_HighPriority0_0_OFFS 0x1E00
+#define QIB_7322_HighPriority0_0_DEF 0x0000000000000000
+#define QIB_7322_HighPriority0_0_VirtualLane_LSB 0x10
+#define QIB_7322_HighPriority0_0_VirtualLane_MSB 0x12
+#define QIB_7322_HighPriority0_0_VirtualLane_RMASK 0x7
+#define QIB_7322_HighPriority0_0_Weight_LSB 0x0
+#define QIB_7322_HighPriority0_0_Weight_MSB 0x7
+#define QIB_7322_HighPriority0_0_Weight_RMASK 0xFF
+
+#define QIB_7322_CntrRegBase_1_OFFS 0x2028
+#define QIB_7322_CntrRegBase_1_DEF 0x0000000000013000
+
+#define QIB_7322_RcvQPMulticastContext_1_OFFS 0x2170
+
+#define QIB_7322_SendCtrl_1_OFFS 0x21C0
+
+#define QIB_7322_SendBufAvail0_OFFS 0x3000
+#define QIB_7322_SendBufAvail0_DEF 0x0000000000000000
+#define QIB_7322_SendBufAvail0_SendBuf_31_0_LSB 0x0
+#define QIB_7322_SendBufAvail0_SendBuf_31_0_MSB 0x3F
+#define QIB_7322_SendBufAvail0_SendBuf_31_0_RMASK 0x0
+
+#define QIB_7322_MsixTable_OFFS 0x8000
+#define QIB_7322_MsixTable_DEF 0x0000000000000000
+
+#define QIB_7322_MsixPba_OFFS 0x9000
+#define QIB_7322_MsixPba_DEF 0x0000000000000000
+
+#define QIB_7322_LAMemory_OFFS 0xA000
+#define QIB_7322_LAMemory_DEF 0x0000000000000000
+
+#define QIB_7322_LBIntCnt_OFFS 0x11000
+#define QIB_7322_LBIntCnt_DEF 0x0000000000000000
+
+#define QIB_7322_LBFlowStallCnt_OFFS 0x11008
+#define QIB_7322_LBFlowStallCnt_DEF 0x0000000000000000
+
+#define QIB_7322_RxTIDFullErrCnt_OFFS 0x110D0
+#define QIB_7322_RxTIDFullErrCnt_DEF 0x0000000000000000
+
+#define QIB_7322_RxTIDValidErrCnt_OFFS 0x110D8
+#define QIB_7322_RxTIDValidErrCnt_DEF 0x0000000000000000
+
+#define QIB_7322_RxP0HdrEgrOvflCnt_OFFS 0x110E8
+#define QIB_7322_RxP0HdrEgrOvflCnt_DEF 0x0000000000000000
+
+#define QIB_7322_PcieRetryBufDiagQwordCnt_OFFS 0x111A0
+#define QIB_7322_PcieRetryBufDiagQwordCnt_DEF 0x0000000000000000
+
+#define QIB_7322_RxTidFlowDropCnt_OFFS 0x111E0
+#define QIB_7322_RxTidFlowDropCnt_DEF 0x0000000000000000
+
+#define QIB_7322_LBIntCnt_0_OFFS 0x12000
+#define QIB_7322_LBIntCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxCreditUpToDateTimeOut_0_OFFS 0x12008
+#define QIB_7322_TxCreditUpToDateTimeOut_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxSDmaDescCnt_0_OFFS 0x12010
+#define QIB_7322_TxSDmaDescCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxUnsupVLErrCnt_0_OFFS 0x12018
+#define QIB_7322_TxUnsupVLErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxDataPktCnt_0_OFFS 0x12020
+#define QIB_7322_TxDataPktCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxFlowPktCnt_0_OFFS 0x12028
+#define QIB_7322_TxFlowPktCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxDwordCnt_0_OFFS 0x12030
+#define QIB_7322_TxDwordCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxLenErrCnt_0_OFFS 0x12038
+#define QIB_7322_TxLenErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxMaxMinLenErrCnt_0_OFFS 0x12040
+#define QIB_7322_TxMaxMinLenErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxUnderrunCnt_0_OFFS 0x12048
+#define QIB_7322_TxUnderrunCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxFlowStallCnt_0_OFFS 0x12050
+#define QIB_7322_TxFlowStallCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxDroppedPktCnt_0_OFFS 0x12058
+#define QIB_7322_TxDroppedPktCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxDroppedPktCnt_0_OFFS 0x12060
+#define QIB_7322_RxDroppedPktCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxDataPktCnt_0_OFFS 0x12068
+#define QIB_7322_RxDataPktCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxFlowPktCnt_0_OFFS 0x12070
+#define QIB_7322_RxFlowPktCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxDwordCnt_0_OFFS 0x12078
+#define QIB_7322_RxDwordCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxLenErrCnt_0_OFFS 0x12080
+#define QIB_7322_RxLenErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxMaxMinLenErrCnt_0_OFFS 0x12088
+#define QIB_7322_RxMaxMinLenErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxICRCErrCnt_0_OFFS 0x12090
+#define QIB_7322_RxICRCErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxVCRCErrCnt_0_OFFS 0x12098
+#define QIB_7322_RxVCRCErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxFlowCtrlViolCnt_0_OFFS 0x120A0
+#define QIB_7322_RxFlowCtrlViolCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxVersionErrCnt_0_OFFS 0x120A8
+#define QIB_7322_RxVersionErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxLinkMalformCnt_0_OFFS 0x120B0
+#define QIB_7322_RxLinkMalformCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxEBPCnt_0_OFFS 0x120B8
+#define QIB_7322_RxEBPCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxLPCRCErrCnt_0_OFFS 0x120C0
+#define QIB_7322_RxLPCRCErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxBufOvflCnt_0_OFFS 0x120C8
+#define QIB_7322_RxBufOvflCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxLenTruncateCnt_0_OFFS 0x120D0
+#define QIB_7322_RxLenTruncateCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxPKeyMismatchCnt_0_OFFS 0x120E0
+#define QIB_7322_RxPKeyMismatchCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_IBLinkDownedCnt_0_OFFS 0x12180
+#define QIB_7322_IBLinkDownedCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_IBSymbolErrCnt_0_OFFS 0x12188
+#define QIB_7322_IBSymbolErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_IBStatusChangeCnt_0_OFFS 0x12190
+#define QIB_7322_IBStatusChangeCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_IBLinkErrRecoveryCnt_0_OFFS 0x12198
+#define QIB_7322_IBLinkErrRecoveryCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_ExcessBufferOvflCnt_0_OFFS 0x121A8
+#define QIB_7322_ExcessBufferOvflCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_LocalLinkIntegrityErrCnt_0_OFFS 0x121B0
+#define QIB_7322_LocalLinkIntegrityErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxVlErrCnt_0_OFFS 0x121B8
+#define QIB_7322_RxVlErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxDlidFltrCnt_0_OFFS 0x121C0
+#define QIB_7322_RxDlidFltrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxVL15DroppedPktCnt_0_OFFS 0x121C8
+#define QIB_7322_RxVL15DroppedPktCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxOtherLocalPhyErrCnt_0_OFFS 0x121D0
+#define QIB_7322_RxOtherLocalPhyErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_RxQPInvalidContextCnt_0_OFFS 0x121D8
+#define QIB_7322_RxQPInvalidContextCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_TxHeadersErrCnt_0_OFFS 0x121F8
+#define QIB_7322_TxHeadersErrCnt_0_DEF 0x0000000000000000
+
+#define QIB_7322_PSRcvDataCount_0_OFFS 0x12218
+#define QIB_7322_PSRcvDataCount_0_DEF 0x0000000000000000
+
+#define QIB_7322_PSRcvPktsCount_0_OFFS 0x12220
+#define QIB_7322_PSRcvPktsCount_0_DEF 0x0000000000000000
+
+#define QIB_7322_PSXmitDataCount_0_OFFS 0x12228
+#define QIB_7322_PSXmitDataCount_0_DEF 0x0000000000000000
+
+#define QIB_7322_PSXmitPktsCount_0_OFFS 0x12230
+#define QIB_7322_PSXmitPktsCount_0_DEF 0x0000000000000000
+
+#define QIB_7322_PSXmitWaitCount_0_OFFS 0x12238
+#define QIB_7322_PSXmitWaitCount_0_DEF 0x0000000000000000
+
+#define QIB_7322_LBIntCnt_1_OFFS 0x13000
+#define QIB_7322_LBIntCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxCreditUpToDateTimeOut_1_OFFS 0x13008
+#define QIB_7322_TxCreditUpToDateTimeOut_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxSDmaDescCnt_1_OFFS 0x13010
+#define QIB_7322_TxSDmaDescCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxUnsupVLErrCnt_1_OFFS 0x13018
+#define QIB_7322_TxUnsupVLErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxDataPktCnt_1_OFFS 0x13020
+#define QIB_7322_TxDataPktCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxFlowPktCnt_1_OFFS 0x13028
+#define QIB_7322_TxFlowPktCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxDwordCnt_1_OFFS 0x13030
+#define QIB_7322_TxDwordCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxLenErrCnt_1_OFFS 0x13038
+#define QIB_7322_TxLenErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxMaxMinLenErrCnt_1_OFFS 0x13040
+#define QIB_7322_TxMaxMinLenErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxUnderrunCnt_1_OFFS 0x13048
+#define QIB_7322_TxUnderrunCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxFlowStallCnt_1_OFFS 0x13050
+#define QIB_7322_TxFlowStallCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxDroppedPktCnt_1_OFFS 0x13058
+#define QIB_7322_TxDroppedPktCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxDroppedPktCnt_1_OFFS 0x13060
+#define QIB_7322_RxDroppedPktCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxDataPktCnt_1_OFFS 0x13068
+#define QIB_7322_RxDataPktCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxFlowPktCnt_1_OFFS 0x13070
+#define QIB_7322_RxFlowPktCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxDwordCnt_1_OFFS 0x13078
+#define QIB_7322_RxDwordCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxLenErrCnt_1_OFFS 0x13080
+#define QIB_7322_RxLenErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxMaxMinLenErrCnt_1_OFFS 0x13088
+#define QIB_7322_RxMaxMinLenErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxICRCErrCnt_1_OFFS 0x13090
+#define QIB_7322_RxICRCErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxVCRCErrCnt_1_OFFS 0x13098
+#define QIB_7322_RxVCRCErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxFlowCtrlViolCnt_1_OFFS 0x130A0
+#define QIB_7322_RxFlowCtrlViolCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxVersionErrCnt_1_OFFS 0x130A8
+#define QIB_7322_RxVersionErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxLinkMalformCnt_1_OFFS 0x130B0
+#define QIB_7322_RxLinkMalformCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxEBPCnt_1_OFFS 0x130B8
+#define QIB_7322_RxEBPCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxLPCRCErrCnt_1_OFFS 0x130C0
+#define QIB_7322_RxLPCRCErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxBufOvflCnt_1_OFFS 0x130C8
+#define QIB_7322_RxBufOvflCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxLenTruncateCnt_1_OFFS 0x130D0
+#define QIB_7322_RxLenTruncateCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxPKeyMismatchCnt_1_OFFS 0x130E0
+#define QIB_7322_RxPKeyMismatchCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_IBLinkDownedCnt_1_OFFS 0x13180
+#define QIB_7322_IBLinkDownedCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_IBSymbolErrCnt_1_OFFS 0x13188
+#define QIB_7322_IBSymbolErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_IBStatusChangeCnt_1_OFFS 0x13190
+#define QIB_7322_IBStatusChangeCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_IBLinkErrRecoveryCnt_1_OFFS 0x13198
+#define QIB_7322_IBLinkErrRecoveryCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_ExcessBufferOvflCnt_1_OFFS 0x131A8
+#define QIB_7322_ExcessBufferOvflCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_LocalLinkIntegrityErrCnt_1_OFFS 0x131B0
+#define QIB_7322_LocalLinkIntegrityErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxVlErrCnt_1_OFFS 0x131B8
+#define QIB_7322_RxVlErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxDlidFltrCnt_1_OFFS 0x131C0
+#define QIB_7322_RxDlidFltrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxVL15DroppedPktCnt_1_OFFS 0x131C8
+#define QIB_7322_RxVL15DroppedPktCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxOtherLocalPhyErrCnt_1_OFFS 0x131D0
+#define QIB_7322_RxOtherLocalPhyErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_RxQPInvalidContextCnt_1_OFFS 0x131D8
+#define QIB_7322_RxQPInvalidContextCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_TxHeadersErrCnt_1_OFFS 0x131F8
+#define QIB_7322_TxHeadersErrCnt_1_DEF 0x0000000000000000
+
+#define QIB_7322_PSRcvDataCount_1_OFFS 0x13218
+#define QIB_7322_PSRcvDataCount_1_DEF 0x0000000000000000
+
+#define QIB_7322_PSRcvPktsCount_1_OFFS 0x13220
+#define QIB_7322_PSRcvPktsCount_1_DEF 0x0000000000000000
+
+#define QIB_7322_PSXmitDataCount_1_OFFS 0x13228
+#define QIB_7322_PSXmitDataCount_1_DEF 0x0000000000000000
+
+#define QIB_7322_PSXmitPktsCount_1_OFFS 0x13230
+#define QIB_7322_PSXmitPktsCount_1_DEF 0x0000000000000000
+
+#define QIB_7322_PSXmitWaitCount_1_OFFS 0x13238
+#define QIB_7322_PSXmitWaitCount_1_DEF 0x0000000000000000
+
+#define QIB_7322_RcvEgrArray_OFFS 0x14000
+#define QIB_7322_RcvEgrArray_DEF 0x0000000000000000
+#define QIB_7322_RcvEgrArray_RT_BufSize_LSB 0x25
+#define QIB_7322_RcvEgrArray_RT_BufSize_MSB 0x27
+#define QIB_7322_RcvEgrArray_RT_BufSize_RMASK 0x7
+#define QIB_7322_RcvEgrArray_RT_Addr_LSB 0x0
+#define QIB_7322_RcvEgrArray_RT_Addr_MSB 0x24
+#define QIB_7322_RcvEgrArray_RT_Addr_RMASK 0x1FFFFFFFFF
+
+#define QIB_7322_RcvTIDArray0_OFFS 0x50000
+#define QIB_7322_RcvTIDArray0_DEF 0x0000000000000000
+#define QIB_7322_RcvTIDArray0_RT_BufSize_LSB 0x25
+#define QIB_7322_RcvTIDArray0_RT_BufSize_MSB 0x27
+#define QIB_7322_RcvTIDArray0_RT_BufSize_RMASK 0x7
+#define QIB_7322_RcvTIDArray0_RT_Addr_LSB 0x0
+#define QIB_7322_RcvTIDArray0_RT_Addr_MSB 0x24
+#define QIB_7322_RcvTIDArray0_RT_Addr_RMASK 0x1FFFFFFFFF
+
+#define QIB_7322_IBSD_DDS_MAP_TABLE_0_OFFS 0xD0000
+#define QIB_7322_IBSD_DDS_MAP_TABLE_0_DEF 0x0000000000000000
+
+#define QIB_7322_RcvHdrTail0_OFFS 0x200000
+#define QIB_7322_RcvHdrTail0_DEF 0x0000000000000000
+
+#define QIB_7322_RcvHdrHead0_OFFS 0x200008
+#define QIB_7322_RcvHdrHead0_DEF 0x0000000000000000
+#define QIB_7322_RcvHdrHead0_counter_LSB 0x20
+#define QIB_7322_RcvHdrHead0_counter_MSB 0x2F
+#define QIB_7322_RcvHdrHead0_counter_RMASK 0xFFFF
+#define QIB_7322_RcvHdrHead0_RcvHeadPointer_LSB 0x0
+#define QIB_7322_RcvHdrHead0_RcvHeadPointer_MSB 0x1F
+#define QIB_7322_RcvHdrHead0_RcvHeadPointer_RMASK 0xFFFFFFFF
+
+#define QIB_7322_RcvEgrIndexTail0_OFFS 0x200010
+#define QIB_7322_RcvEgrIndexTail0_DEF 0x0000000000000000
+
+#define QIB_7322_RcvEgrIndexHead0_OFFS 0x200018
+#define QIB_7322_RcvEgrIndexHead0_DEF 0x0000000000000000
+
+#define QIB_7322_RcvTIDFlowTable0_OFFS 0x201000
+#define QIB_7322_RcvTIDFlowTable0_DEF 0x0000000000000000
+#define QIB_7322_RcvTIDFlowTable0_GenMismatch_LSB 0x1C
+#define QIB_7322_RcvTIDFlowTable0_GenMismatch_MSB 0x1C
+#define QIB_7322_RcvTIDFlowTable0_GenMismatch_RMASK 0x1
+#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_LSB 0x1B
+#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_MSB 0x1B
+#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_RMASK 0x1
+#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_LSB 0x16
+#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_MSB 0x16
+#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_RMASK 0x1
+#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_LSB 0x15
+#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_MSB 0x15
+#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_RMASK 0x1
+#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_LSB 0x14
+#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_MSB 0x14
+#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_RMASK 0x1
+#define QIB_7322_RcvTIDFlowTable0_FlowValid_LSB 0x13
+#define QIB_7322_RcvTIDFlowTable0_FlowValid_MSB 0x13
+#define QIB_7322_RcvTIDFlowTable0_FlowValid_RMASK 0x1
+#define QIB_7322_RcvTIDFlowTable0_GenVal_LSB 0xB
+#define QIB_7322_RcvTIDFlowTable0_GenVal_MSB 0x12
+#define QIB_7322_RcvTIDFlowTable0_GenVal_RMASK 0xFF
+#define QIB_7322_RcvTIDFlowTable0_SeqNum_LSB 0x0
+#define QIB_7322_RcvTIDFlowTable0_SeqNum_MSB 0xA
+#define QIB_7322_RcvTIDFlowTable0_SeqNum_RMASK 0x7FF
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
new file mode 100644
index 00000000000..5670ace27c6
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -0,0 +1,812 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _QIB_COMMON_H
+#define _QIB_COMMON_H
+
+/*
+ * This file contains defines, structures, etc. that are used
+ * to communicate between kernel and user code.
+ */
+
+/* This is the IEEE-assigned OUI for QLogic Inc. QLogic_IB */
+#define QIB_SRC_OUI_1 0x00
+#define QIB_SRC_OUI_2 0x11
+#define QIB_SRC_OUI_3 0x75
+
+/* version of protocol header (known to chip also). In the long run,
+ * we should be able to generate and accept a range of version numbers;
+ * for now we only accept one, and it's compiled in.
+ */
+#define IPS_PROTO_VERSION 2
+
+/*
+ * These are compile time constants that you may want to enable or disable
+ * if you are trying to debug problems with code or performance.
+ * QIB_VERBOSE_TRACING define as 1 if you want additional tracing in
+ * fastpath code
+ * QIB_TRACE_REGWRITES define as 1 if you want register writes to be
+ * traced in faspath code
+ * _QIB_TRACING define as 0 if you want to remove all tracing in a
+ * compilation unit
+ */
+
+/*
+ * The value in the BTH QP field that QLogic_IB uses to differentiate
+ * an qlogic_ib protocol IB packet vs standard IB transport
+ * This it needs to be even (0x656b78), because the LSB is sometimes
+ * used for the MSB of context. The change may cause a problem
+ * interoperating with older software.
+ */
+#define QIB_KD_QP 0x656b78
+
+/*
+ * These are the status bits readable (in ascii form, 64bit value)
+ * from the "status" sysfs file. For binary compatibility, values
+ * must remain as is; removed states can be reused for different
+ * purposes.
+ */
+#define QIB_STATUS_INITTED 0x1 /* basic initialization done */
+/* Chip has been found and initted */
+#define QIB_STATUS_CHIP_PRESENT 0x20
+/* IB link is at ACTIVE, usable for data traffic */
+#define QIB_STATUS_IB_READY 0x40
+/* link is configured, LID, MTU, etc. have been set */
+#define QIB_STATUS_IB_CONF 0x80
+/* A Fatal hardware error has occurred. */
+#define QIB_STATUS_HWERROR 0x200
+
+/*
+ * The list of usermode accessible registers. Also see Reg_* later in file.
+ */
+enum qib_ureg {
+ /* (RO) DMA RcvHdr to be used next. */
+ ur_rcvhdrtail = 0,
+ /* (RW) RcvHdr entry to be processed next by host. */
+ ur_rcvhdrhead = 1,
+ /* (RO) Index of next Eager index to use. */
+ ur_rcvegrindextail = 2,
+ /* (RW) Eager TID to be processed next */
+ ur_rcvegrindexhead = 3,
+ /* For internal use only; max register number. */
+ _QIB_UregMax
+};
+
+/* bit values for spi_runtime_flags */
+#define QIB_RUNTIME_PCIE 0x0002
+#define QIB_RUNTIME_FORCE_WC_ORDER 0x0004
+#define QIB_RUNTIME_RCVHDR_COPY 0x0008
+#define QIB_RUNTIME_MASTER 0x0010
+#define QIB_RUNTIME_RCHK 0x0020
+#define QIB_RUNTIME_NODMA_RTAIL 0x0080
+#define QIB_RUNTIME_SPECIAL_TRIGGER 0x0100
+#define QIB_RUNTIME_SDMA 0x0200
+#define QIB_RUNTIME_FORCE_PIOAVAIL 0x0400
+#define QIB_RUNTIME_PIO_REGSWAPPED 0x0800
+#define QIB_RUNTIME_CTXT_MSB_IN_QP 0x1000
+#define QIB_RUNTIME_CTXT_REDIRECT 0x2000
+#define QIB_RUNTIME_HDRSUPP 0x4000
+
+/*
+ * This structure is returned by qib_userinit() immediately after
+ * open to get implementation-specific info, and info specific to this
+ * instance.
+ *
+ * This struct must have explict pad fields where type sizes
+ * may result in different alignments between 32 and 64 bit
+ * programs, since the 64 bit * bit kernel requires the user code
+ * to have matching offsets
+ */
+struct qib_base_info {
+ /* version of hardware, for feature checking. */
+ __u32 spi_hw_version;
+ /* version of software, for feature checking. */
+ __u32 spi_sw_version;
+ /* QLogic_IB context assigned, goes into sent packets */
+ __u16 spi_ctxt;
+ __u16 spi_subctxt;
+ /*
+ * IB MTU, packets IB data must be less than this.
+ * The MTU is in bytes, and will be a multiple of 4 bytes.
+ */
+ __u32 spi_mtu;
+ /*
+ * Size of a PIO buffer. Any given packet's total size must be less
+ * than this (in words). Included is the starting control word, so
+ * if 513 is returned, then total pkt size is 512 words or less.
+ */
+ __u32 spi_piosize;
+ /* size of the TID cache in qlogic_ib, in entries */
+ __u32 spi_tidcnt;
+ /* size of the TID Eager list in qlogic_ib, in entries */
+ __u32 spi_tidegrcnt;
+ /* size of a single receive header queue entry in words. */
+ __u32 spi_rcvhdrent_size;
+ /*
+ * Count of receive header queue entries allocated.
+ * This may be less than the spu_rcvhdrcnt passed in!.
+ */
+ __u32 spi_rcvhdr_cnt;
+
+ /* per-chip and other runtime features bitmap (QIB_RUNTIME_*) */
+ __u32 spi_runtime_flags;
+
+ /* address where hardware receive header queue is mapped */
+ __u64 spi_rcvhdr_base;
+
+ /* user program. */
+
+ /* base address of eager TID receive buffers used by hardware. */
+ __u64 spi_rcv_egrbufs;
+
+ /* Allocated by initialization code, not by protocol. */
+
+ /*
+ * Size of each TID buffer in host memory, starting at
+ * spi_rcv_egrbufs. The buffers are virtually contiguous.
+ */
+ __u32 spi_rcv_egrbufsize;
+ /*
+ * The special QP (queue pair) value that identifies an qlogic_ib
+ * protocol packet from standard IB packets. More, probably much
+ * more, to be added.
+ */
+ __u32 spi_qpair;
+
+ /*
+ * User register base for init code, not to be used directly by
+ * protocol or applications. Always points to chip registers,
+ * for normal or shared context.
+ */
+ __u64 spi_uregbase;
+ /*
+ * Maximum buffer size in bytes that can be used in a single TID
+ * entry (assuming the buffer is aligned to this boundary). This is
+ * the minimum of what the hardware and software support Guaranteed
+ * to be a power of 2.
+ */
+ __u32 spi_tid_maxsize;
+ /*
+ * alignment of each pio send buffer (byte count
+ * to add to spi_piobufbase to get to second buffer)
+ */
+ __u32 spi_pioalign;
+ /*
+ * The index of the first pio buffer available to this process;
+ * needed to do lookup in spi_pioavailaddr; not added to
+ * spi_piobufbase.
+ */
+ __u32 spi_pioindex;
+ /* number of buffers mapped for this process */
+ __u32 spi_piocnt;
+
+ /*
+ * Base address of writeonly pio buffers for this process.
+ * Each buffer has spi_piosize words, and is aligned on spi_pioalign
+ * boundaries. spi_piocnt buffers are mapped from this address
+ */
+ __u64 spi_piobufbase;
+
+ /*
+ * Base address of readonly memory copy of the pioavail registers.
+ * There are 2 bits for each buffer.
+ */
+ __u64 spi_pioavailaddr;
+
+ /*
+ * Address where driver updates a copy of the interface and driver
+ * status (QIB_STATUS_*) as a 64 bit value. It's followed by a
+ * link status qword (formerly combined with driver status), then a
+ * string indicating hardware error, if there was one.
+ */
+ __u64 spi_status;
+
+ /* number of chip ctxts available to user processes */
+ __u32 spi_nctxts;
+ __u16 spi_unit; /* unit number of chip we are using */
+ __u16 spi_port; /* IB port number we are using */
+ /* num bufs in each contiguous set */
+ __u32 spi_rcv_egrperchunk;
+ /* size in bytes of each contiguous set */
+ __u32 spi_rcv_egrchunksize;
+ /* total size of mmap to cover full rcvegrbuffers */
+ __u32 spi_rcv_egrbuftotlen;
+ __u32 spi_rhf_offset; /* dword offset in hdrqent for rcvhdr flags */
+ /* address of readonly memory copy of the rcvhdrq tail register. */
+ __u64 spi_rcvhdr_tailaddr;
+
+ /*
+ * shared memory pages for subctxts if ctxt is shared; these cover
+ * all the processes in the group sharing a single context.
+ * all have enough space for the num_subcontexts value on this job.
+ */
+ __u64 spi_subctxt_uregbase;
+ __u64 spi_subctxt_rcvegrbuf;
+ __u64 spi_subctxt_rcvhdr_base;
+
+ /* shared memory page for send buffer disarm status */
+ __u64 spi_sendbuf_status;
+} __attribute__ ((aligned(8)));
+
+/*
+ * This version number is given to the driver by the user code during
+ * initialization in the spu_userversion field of qib_user_info, so
+ * the driver can check for compatibility with user code.
+ *
+ * The major version changes when data structures
+ * change in an incompatible way. The driver must be the same or higher
+ * for initialization to succeed. In some cases, a higher version
+ * driver will not interoperate with older software, and initialization
+ * will return an error.
+ */
+#define QIB_USER_SWMAJOR 1
+
+/*
+ * Minor version differences are always compatible
+ * a within a major version, however if user software is larger
+ * than driver software, some new features and/or structure fields
+ * may not be implemented; the user code must deal with this if it
+ * cares, or it must abort after initialization reports the difference.
+ */
+#define QIB_USER_SWMINOR 13
+
+#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
+
+#ifndef QIB_KERN_TYPE
+#define QIB_KERN_TYPE 0
+#endif
+
+/*
+ * Similarly, this is the kernel version going back to the user. It's
+ * slightly different, in that we want to tell if the driver was built as
+ * part of a QLogic release, or from the driver from openfabrics.org,
+ * kernel.org, or a standard distribution, for support reasons.
+ * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied.
+ *
+ * It's returned by the driver to the user code during initialization in the
+ * spi_sw_version field of qib_base_info, so the user code can in turn
+ * check for compatibility with the kernel.
+*/
+#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION)
+
+/*
+ * Define the driver version number. This is something that refers only
+ * to the driver itself, not the software interfaces it supports.
+ */
+#define QIB_DRIVER_VERSION_BASE "1.11"
+
+/* create the final driver version string */
+#ifdef QIB_IDSTR
+#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR
+#else
+#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE
+#endif
+
+/*
+ * If the unit is specified via open, HCA choice is fixed. If port is
+ * specified, it's also fixed. Otherwise we try to spread contexts
+ * across ports and HCAs, using different algorithims. WITHIN is
+ * the old default, prior to this mechanism.
+ */
+#define QIB_PORT_ALG_ACROSS 0 /* round robin contexts across HCAs, then
+ * ports; this is the default */
+#define QIB_PORT_ALG_WITHIN 1 /* use all contexts on an HCA (round robin
+ * active ports within), then next HCA */
+#define QIB_PORT_ALG_COUNT 2 /* number of algorithm choices */
+
+/*
+ * This structure is passed to qib_userinit() to tell the driver where
+ * user code buffers are, sizes, etc. The offsets and sizes of the
+ * fields must remain unchanged, for binary compatibility. It can
+ * be extended, if userversion is changed so user code can tell, if needed
+ */
+struct qib_user_info {
+ /*
+ * version of user software, to detect compatibility issues.
+ * Should be set to QIB_USER_SWVERSION.
+ */
+ __u32 spu_userversion;
+
+ __u32 _spu_unused2;
+
+ /* size of struct base_info to write to */
+ __u32 spu_base_info_size;
+
+ __u32 spu_port_alg; /* which QIB_PORT_ALG_*; unused user minor < 11 */
+
+ /*
+ * If two or more processes wish to share a context, each process
+ * must set the spu_subctxt_cnt and spu_subctxt_id to the same
+ * values. The only restriction on the spu_subctxt_id is that
+ * it be unique for a given node.
+ */
+ __u16 spu_subctxt_cnt;
+ __u16 spu_subctxt_id;
+
+ __u32 spu_port; /* IB port requested by user if > 0 */
+
+ /*
+ * address of struct base_info to write to
+ */
+ __u64 spu_base_info;
+
+} __attribute__ ((aligned(8)));
+
+/* User commands. */
+
+/* 16 available, was: old set up userspace (for old user code) */
+#define QIB_CMD_CTXT_INFO 17 /* find out what resources we got */
+#define QIB_CMD_RECV_CTRL 18 /* control receipt of packets */
+#define QIB_CMD_TID_UPDATE 19 /* update expected TID entries */
+#define QIB_CMD_TID_FREE 20 /* free expected TID entries */
+#define QIB_CMD_SET_PART_KEY 21 /* add partition key */
+/* 22 available, was: return info on slave processes (for old user code) */
+#define QIB_CMD_ASSIGN_CTXT 23 /* allocate HCA and ctxt */
+#define QIB_CMD_USER_INIT 24 /* set up userspace */
+#define QIB_CMD_UNUSED_1 25
+#define QIB_CMD_UNUSED_2 26
+#define QIB_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */
+#define QIB_CMD_POLL_TYPE 28 /* set the kind of polling we want */
+#define QIB_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */
+/* 30 is unused */
+#define QIB_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */
+#define QIB_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */
+/* 33 available, was a testing feature */
+#define QIB_CMD_DISARM_BUFS 34 /* disarm send buffers w/ errors */
+#define QIB_CMD_ACK_EVENT 35 /* ack & clear bits */
+#define QIB_CMD_CPUS_LIST 36 /* list of cpus allocated, for pinned
+ * processes: qib_cpus_list */
+
+/*
+ * QIB_CMD_ACK_EVENT obsoletes QIB_CMD_DISARM_BUFS, but we keep it for
+ * compatibility with libraries from previous release. The ACK_EVENT
+ * will take appropriate driver action (if any, just DISARM for now),
+ * then clear the bits passed in as part of the mask. These bits are
+ * in the first 64bit word at spi_sendbuf_status, and are passed to
+ * the driver in the event_mask union as well.
+ */
+#define _QIB_EVENT_DISARM_BUFS_BIT 0
+#define _QIB_EVENT_LINKDOWN_BIT 1
+#define _QIB_EVENT_LID_CHANGE_BIT 2
+#define _QIB_EVENT_LMC_CHANGE_BIT 3
+#define _QIB_EVENT_SL2VL_CHANGE_BIT 4
+#define _QIB_MAX_EVENT_BIT _QIB_EVENT_SL2VL_CHANGE_BIT
+
+#define QIB_EVENT_DISARM_BUFS_BIT (1UL << _QIB_EVENT_DISARM_BUFS_BIT)
+#define QIB_EVENT_LINKDOWN_BIT (1UL << _QIB_EVENT_LINKDOWN_BIT)
+#define QIB_EVENT_LID_CHANGE_BIT (1UL << _QIB_EVENT_LID_CHANGE_BIT)
+#define QIB_EVENT_LMC_CHANGE_BIT (1UL << _QIB_EVENT_LMC_CHANGE_BIT)
+#define QIB_EVENT_SL2VL_CHANGE_BIT (1UL << _QIB_EVENT_SL2VL_CHANGE_BIT)
+
+
+/*
+ * Poll types
+ */
+#define QIB_POLL_TYPE_ANYRCV 0x0
+#define QIB_POLL_TYPE_URGENT 0x1
+
+struct qib_ctxt_info {
+ __u16 num_active; /* number of active units */
+ __u16 unit; /* unit (chip) assigned to caller */
+ __u16 port; /* IB port assigned to caller (1-based) */
+ __u16 ctxt; /* ctxt on unit assigned to caller */
+ __u16 subctxt; /* subctxt on unit assigned to caller */
+ __u16 num_ctxts; /* number of ctxts available on unit */
+ __u16 num_subctxts; /* number of subctxts opened on ctxt */
+ __u16 rec_cpu; /* cpu # for affinity (ffff if none) */
+};
+
+struct qib_tid_info {
+ __u32 tidcnt;
+ /* make structure same size in 32 and 64 bit */
+ __u32 tid__unused;
+ /* virtual address of first page in transfer */
+ __u64 tidvaddr;
+ /* pointer (same size 32/64 bit) to __u16 tid array */
+ __u64 tidlist;
+
+ /*
+ * pointer (same size 32/64 bit) to bitmap of TIDs used
+ * for this call; checked for being large enough at open
+ */
+ __u64 tidmap;
+};
+
+struct qib_cmd {
+ __u32 type; /* command type */
+ union {
+ struct qib_tid_info tid_info;
+ struct qib_user_info user_info;
+
+ /*
+ * address in userspace where we should put the sdma
+ * inflight counter
+ */
+ __u64 sdma_inflight;
+ /*
+ * address in userspace where we should put the sdma
+ * completion counter
+ */
+ __u64 sdma_complete;
+ /* address in userspace of struct qib_ctxt_info to
+ write result to */
+ __u64 ctxt_info;
+ /* enable/disable receipt of packets */
+ __u32 recv_ctrl;
+ /* enable/disable armlaunch errors (non-zero to enable) */
+ __u32 armlaunch_ctrl;
+ /* partition key to set */
+ __u16 part_key;
+ /* user address of __u32 bitmask of active slaves */
+ __u64 slave_mask_addr;
+ /* type of polling we want */
+ __u16 poll_type;
+ /* back pressure enable bit for one particular context */
+ __u8 ctxt_bp;
+ /* qib_user_event_ack(), IPATH_EVENT_* bits */
+ __u64 event_mask;
+ } cmd;
+};
+
+struct qib_iovec {
+ /* Pointer to data, but same size 32 and 64 bit */
+ __u64 iov_base;
+
+ /*
+ * Length of data; don't need 64 bits, but want
+ * qib_sendpkt to remain same size as before 32 bit changes, so...
+ */
+ __u64 iov_len;
+};
+
+/*
+ * Describes a single packet for send. Each packet can have one or more
+ * buffers, but the total length (exclusive of IB headers) must be less
+ * than the MTU, and if using the PIO method, entire packet length,
+ * including IB headers, must be less than the qib_piosize value (words).
+ * Use of this necessitates including sys/uio.h
+ */
+struct __qib_sendpkt {
+ __u32 sps_flags; /* flags for packet (TBD) */
+ __u32 sps_cnt; /* number of entries to use in sps_iov */
+ /* array of iov's describing packet. TEMPORARY */
+ struct qib_iovec sps_iov[4];
+};
+
+/*
+ * Diagnostics can send a packet by "writing" the following
+ * structs to the diag data special file.
+ * This allows a custom
+ * pbc (+ static rate) qword, so that special modes and deliberate
+ * changes to CRCs can be used. The elements were also re-ordered
+ * for better alignment and to avoid padding issues.
+ */
+#define _DIAG_XPKT_VERS 3
+struct qib_diag_xpkt {
+ __u16 version;
+ __u16 unit;
+ __u16 port;
+ __u16 len;
+ __u64 data;
+ __u64 pbc_wd;
+};
+
+/*
+ * Data layout in I2C flash (for GUID, etc.)
+ * All fields are little-endian binary unless otherwise stated
+ */
+#define QIB_FLASH_VERSION 2
+struct qib_flash {
+ /* flash layout version (QIB_FLASH_VERSION) */
+ __u8 if_fversion;
+ /* checksum protecting if_length bytes */
+ __u8 if_csum;
+ /*
+ * valid length (in use, protected by if_csum), including
+ * if_fversion and if_csum themselves)
+ */
+ __u8 if_length;
+ /* the GUID, in network order */
+ __u8 if_guid[8];
+ /* number of GUIDs to use, starting from if_guid */
+ __u8 if_numguid;
+ /* the (last 10 characters of) board serial number, in ASCII */
+ char if_serial[12];
+ /* board mfg date (YYYYMMDD ASCII) */
+ char if_mfgdate[8];
+ /* last board rework/test date (YYYYMMDD ASCII) */
+ char if_testdate[8];
+ /* logging of error counts, TBD */
+ __u8 if_errcntp[4];
+ /* powered on hours, updated at driver unload */
+ __u8 if_powerhour[2];
+ /* ASCII free-form comment field */
+ char if_comment[32];
+ /* Backwards compatible prefix for longer QLogic Serial Numbers */
+ char if_sprefix[4];
+ /* 82 bytes used, min flash size is 128 bytes */
+ __u8 if_future[46];
+};
+
+/*
+ * These are the counters implemented in the chip, and are listed in order.
+ * The InterCaps naming is taken straight from the chip spec.
+ */
+struct qlogic_ib_counters {
+ __u64 LBIntCnt;
+ __u64 LBFlowStallCnt;
+ __u64 TxSDmaDescCnt; /* was Reserved1 */
+ __u64 TxUnsupVLErrCnt;
+ __u64 TxDataPktCnt;
+ __u64 TxFlowPktCnt;
+ __u64 TxDwordCnt;
+ __u64 TxLenErrCnt;
+ __u64 TxMaxMinLenErrCnt;
+ __u64 TxUnderrunCnt;
+ __u64 TxFlowStallCnt;
+ __u64 TxDroppedPktCnt;
+ __u64 RxDroppedPktCnt;
+ __u64 RxDataPktCnt;
+ __u64 RxFlowPktCnt;
+ __u64 RxDwordCnt;
+ __u64 RxLenErrCnt;
+ __u64 RxMaxMinLenErrCnt;
+ __u64 RxICRCErrCnt;
+ __u64 RxVCRCErrCnt;
+ __u64 RxFlowCtrlErrCnt;
+ __u64 RxBadFormatCnt;
+ __u64 RxLinkProblemCnt;
+ __u64 RxEBPCnt;
+ __u64 RxLPCRCErrCnt;
+ __u64 RxBufOvflCnt;
+ __u64 RxTIDFullErrCnt;
+ __u64 RxTIDValidErrCnt;
+ __u64 RxPKeyMismatchCnt;
+ __u64 RxP0HdrEgrOvflCnt;
+ __u64 RxP1HdrEgrOvflCnt;
+ __u64 RxP2HdrEgrOvflCnt;
+ __u64 RxP3HdrEgrOvflCnt;
+ __u64 RxP4HdrEgrOvflCnt;
+ __u64 RxP5HdrEgrOvflCnt;
+ __u64 RxP6HdrEgrOvflCnt;
+ __u64 RxP7HdrEgrOvflCnt;
+ __u64 RxP8HdrEgrOvflCnt;
+ __u64 RxP9HdrEgrOvflCnt;
+ __u64 RxP10HdrEgrOvflCnt;
+ __u64 RxP11HdrEgrOvflCnt;
+ __u64 RxP12HdrEgrOvflCnt;
+ __u64 RxP13HdrEgrOvflCnt;
+ __u64 RxP14HdrEgrOvflCnt;
+ __u64 RxP15HdrEgrOvflCnt;
+ __u64 RxP16HdrEgrOvflCnt;
+ __u64 IBStatusChangeCnt;
+ __u64 IBLinkErrRecoveryCnt;
+ __u64 IBLinkDownedCnt;
+ __u64 IBSymbolErrCnt;
+ __u64 RxVL15DroppedPktCnt;
+ __u64 RxOtherLocalPhyErrCnt;
+ __u64 PcieRetryBufDiagQwordCnt;
+ __u64 ExcessBufferOvflCnt;
+ __u64 LocalLinkIntegrityErrCnt;
+ __u64 RxVlErrCnt;
+ __u64 RxDlidFltrCnt;
+};
+
+/*
+ * The next set of defines are for packet headers, and chip register
+ * and memory bits that are visible to and/or used by user-mode software.
+ */
+
+/* RcvHdrFlags bits */
+#define QLOGIC_IB_RHF_LENGTH_MASK 0x7FF
+#define QLOGIC_IB_RHF_LENGTH_SHIFT 0
+#define QLOGIC_IB_RHF_RCVTYPE_MASK 0x7
+#define QLOGIC_IB_RHF_RCVTYPE_SHIFT 11
+#define QLOGIC_IB_RHF_EGRINDEX_MASK 0xFFF
+#define QLOGIC_IB_RHF_EGRINDEX_SHIFT 16
+#define QLOGIC_IB_RHF_SEQ_MASK 0xF
+#define QLOGIC_IB_RHF_SEQ_SHIFT 0
+#define QLOGIC_IB_RHF_HDRQ_OFFSET_MASK 0x7FF
+#define QLOGIC_IB_RHF_HDRQ_OFFSET_SHIFT 4
+#define QLOGIC_IB_RHF_H_ICRCERR 0x80000000
+#define QLOGIC_IB_RHF_H_VCRCERR 0x40000000
+#define QLOGIC_IB_RHF_H_PARITYERR 0x20000000
+#define QLOGIC_IB_RHF_H_LENERR 0x10000000
+#define QLOGIC_IB_RHF_H_MTUERR 0x08000000
+#define QLOGIC_IB_RHF_H_IHDRERR 0x04000000
+#define QLOGIC_IB_RHF_H_TIDERR 0x02000000
+#define QLOGIC_IB_RHF_H_MKERR 0x01000000
+#define QLOGIC_IB_RHF_H_IBERR 0x00800000
+#define QLOGIC_IB_RHF_H_ERR_MASK 0xFF800000
+#define QLOGIC_IB_RHF_L_USE_EGR 0x80000000
+#define QLOGIC_IB_RHF_L_SWA 0x00008000
+#define QLOGIC_IB_RHF_L_SWB 0x00004000
+
+/* qlogic_ib header fields */
+#define QLOGIC_IB_I_VERS_MASK 0xF
+#define QLOGIC_IB_I_VERS_SHIFT 28
+#define QLOGIC_IB_I_CTXT_MASK 0xF
+#define QLOGIC_IB_I_CTXT_SHIFT 24
+#define QLOGIC_IB_I_TID_MASK 0x7FF
+#define QLOGIC_IB_I_TID_SHIFT 13
+#define QLOGIC_IB_I_OFFSET_MASK 0x1FFF
+#define QLOGIC_IB_I_OFFSET_SHIFT 0
+
+/* K_PktFlags bits */
+#define QLOGIC_IB_KPF_INTR 0x1
+#define QLOGIC_IB_KPF_SUBCTXT_MASK 0x3
+#define QLOGIC_IB_KPF_SUBCTXT_SHIFT 1
+
+#define QLOGIC_IB_MAX_SUBCTXT 4
+
+/* SendPIO per-buffer control */
+#define QLOGIC_IB_SP_TEST 0x40
+#define QLOGIC_IB_SP_TESTEBP 0x20
+#define QLOGIC_IB_SP_TRIGGER_SHIFT 15
+
+/* SendPIOAvail bits */
+#define QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT 1
+#define QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT 0
+
+/* qlogic_ib header format */
+struct qib_header {
+ /*
+ * Version - 4 bits, Context - 4 bits, TID - 10 bits and Offset -
+ * 14 bits before ECO change ~28 Dec 03. After that, Vers 4,
+ * Context 4, TID 11, offset 13.
+ */
+ __le32 ver_ctxt_tid_offset;
+ __le16 chksum;
+ __le16 pkt_flags;
+};
+
+/*
+ * qlogic_ib user message header format.
+ * This structure contains the first 4 fields common to all protocols
+ * that employ qlogic_ib.
+ */
+struct qib_message_header {
+ __be16 lrh[4];
+ __be32 bth[3];
+ /* fields below this point are in host byte order */
+ struct qib_header iph;
+ /* fields below are simplified, but should match PSM */
+ /* some are accessed by driver when packet spliting is needed */
+ __u8 sub_opcode;
+ __u8 flags;
+ __u16 commidx;
+ __u32 ack_seq_num;
+ __u8 flowid;
+ __u8 hdr_dlen;
+ __u16 mqhdr;
+ __u32 uwords[4];
+};
+
+/* sequence number bits for message */
+union qib_seqnum {
+ struct {
+ __u32 seq:11;
+ __u32 gen:8;
+ __u32 flow:5;
+ };
+ struct {
+ __u32 pkt:16;
+ __u32 msg:8;
+ };
+ __u32 val;
+};
+
+/* qib receiving-dma tid-session-member */
+struct qib_tid_session_member {
+ __u16 tid;
+ __u16 offset;
+ __u16 length;
+};
+
+/* IB - LRH header consts */
+#define QIB_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
+#define QIB_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
+
+/* misc. */
+#define SIZE_OF_CRC 1
+
+#define QIB_DEFAULT_P_KEY 0xFFFF
+#define QIB_PERMISSIVE_LID 0xFFFF
+#define QIB_AETH_CREDIT_SHIFT 24
+#define QIB_AETH_CREDIT_MASK 0x1F
+#define QIB_AETH_CREDIT_INVAL 0x1F
+#define QIB_PSN_MASK 0xFFFFFF
+#define QIB_MSN_MASK 0xFFFFFF
+#define QIB_QPN_MASK 0xFFFFFF
+#define QIB_MULTICAST_LID_BASE 0xC000
+#define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
+#define QIB_MULTICAST_QPN 0xFFFFFF
+
+/* Receive Header Queue: receive type (from qlogic_ib) */
+#define RCVHQ_RCV_TYPE_EXPECTED 0
+#define RCVHQ_RCV_TYPE_EAGER 1
+#define RCVHQ_RCV_TYPE_NON_KD 2
+#define RCVHQ_RCV_TYPE_ERROR 3
+
+#define QIB_HEADER_QUEUE_WORDS 9
+
+/* functions for extracting fields from rcvhdrq entries for the driver.
+ */
+static inline __u32 qib_hdrget_err_flags(const __le32 *rbuf)
+{
+ return __le32_to_cpu(rbuf[1]) & QLOGIC_IB_RHF_H_ERR_MASK;
+}
+
+static inline __u32 qib_hdrget_rcv_type(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_RCVTYPE_SHIFT) &
+ QLOGIC_IB_RHF_RCVTYPE_MASK;
+}
+
+static inline __u32 qib_hdrget_length_in_bytes(const __le32 *rbuf)
+{
+ return ((__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_LENGTH_SHIFT) &
+ QLOGIC_IB_RHF_LENGTH_MASK) << 2;
+}
+
+static inline __u32 qib_hdrget_index(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_EGRINDEX_SHIFT) &
+ QLOGIC_IB_RHF_EGRINDEX_MASK;
+}
+
+static inline __u32 qib_hdrget_seq(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[1]) >> QLOGIC_IB_RHF_SEQ_SHIFT) &
+ QLOGIC_IB_RHF_SEQ_MASK;
+}
+
+static inline __u32 qib_hdrget_offset(const __le32 *rbuf)
+{
+ return (__le32_to_cpu(rbuf[1]) >> QLOGIC_IB_RHF_HDRQ_OFFSET_SHIFT) &
+ QLOGIC_IB_RHF_HDRQ_OFFSET_MASK;
+}
+
+static inline __u32 qib_hdrget_use_egr_buf(const __le32 *rbuf)
+{
+ return __le32_to_cpu(rbuf[0]) & QLOGIC_IB_RHF_L_USE_EGR;
+}
+
+static inline __u32 qib_hdrget_qib_ver(__le32 hdrword)
+{
+ return (__le32_to_cpu(hdrword) >> QLOGIC_IB_I_VERS_SHIFT) &
+ QLOGIC_IB_I_VERS_MASK;
+}
+
+#endif /* _QIB_COMMON_H */
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
new file mode 100644
index 00000000000..ab4e11cfab1
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/kthread.h>
+
+#include "qib_verbs.h"
+#include "qib.h"
+
+/**
+ * qib_cq_enter - add a new entry to the completion queue
+ * @cq: completion queue
+ * @entry: work completion entry to add
+ * @sig: true if @entry is a solicitated entry
+ *
+ * This may be called with qp->s_lock held.
+ */
+void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
+{
+ struct qib_cq_wc *wc;
+ unsigned long flags;
+ u32 head;
+ u32 next;
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ /*
+ * Note that the head pointer might be writable by user processes.
+ * Take care to verify it is a sane value.
+ */
+ wc = cq->queue;
+ head = wc->head;
+ if (head >= (unsigned) cq->ibcq.cqe) {
+ head = cq->ibcq.cqe;
+ next = 0;
+ } else
+ next = head + 1;
+ if (unlikely(next == wc->tail)) {
+ spin_unlock_irqrestore(&cq->lock, flags);
+ if (cq->ibcq.event_handler) {
+ struct ib_event ev;
+
+ ev.device = cq->ibcq.device;
+ ev.element.cq = &cq->ibcq;
+ ev.event = IB_EVENT_CQ_ERR;
+ cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
+ }
+ return;
+ }
+ if (cq->ip) {
+ wc->uqueue[head].wr_id = entry->wr_id;
+ wc->uqueue[head].status = entry->status;
+ wc->uqueue[head].opcode = entry->opcode;
+ wc->uqueue[head].vendor_err = entry->vendor_err;
+ wc->uqueue[head].byte_len = entry->byte_len;
+ wc->uqueue[head].ex.imm_data =
+ (__u32 __force)entry->ex.imm_data;
+ wc->uqueue[head].qp_num = entry->qp->qp_num;
+ wc->uqueue[head].src_qp = entry->src_qp;
+ wc->uqueue[head].wc_flags = entry->wc_flags;
+ wc->uqueue[head].pkey_index = entry->pkey_index;
+ wc->uqueue[head].slid = entry->slid;
+ wc->uqueue[head].sl = entry->sl;
+ wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
+ wc->uqueue[head].port_num = entry->port_num;
+ /* Make sure entry is written before the head index. */
+ smp_wmb();
+ } else
+ wc->kqueue[head] = *entry;
+ wc->head = next;
+
+ if (cq->notify == IB_CQ_NEXT_COMP ||
+ (cq->notify == IB_CQ_SOLICITED &&
+ (solicited || entry->status != IB_WC_SUCCESS))) {
+ struct kthread_worker *worker;
+ /*
+ * This will cause send_complete() to be called in
+ * another thread.
+ */
+ smp_rmb();
+ worker = cq->dd->worker;
+ if (likely(worker)) {
+ cq->notify = IB_CQ_NONE;
+ cq->triggered++;
+ queue_kthread_work(worker, &cq->comptask);
+ }
+ }
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+}
+
+/**
+ * qib_poll_cq - poll for work completion entries
+ * @ibcq: the completion queue to poll
+ * @num_entries: the maximum number of entries to return
+ * @entry: pointer to array where work completions are placed
+ *
+ * Returns the number of completion entries polled.
+ *
+ * This may be called from interrupt context. Also called by ib_poll_cq()
+ * in the generic verbs code.
+ */
+int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+ struct qib_cq *cq = to_icq(ibcq);
+ struct qib_cq_wc *wc;
+ unsigned long flags;
+ int npolled;
+ u32 tail;
+
+ /* The kernel can only poll a kernel completion queue */
+ if (cq->ip) {
+ npolled = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&cq->lock, flags);
+
+ wc = cq->queue;
+ tail = wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
+ for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+ if (tail == wc->head)
+ break;
+ /* The kernel doesn't need a RMB since it has the lock. */
+ *entry = wc->kqueue[tail];
+ if (tail >= cq->ibcq.cqe)
+ tail = 0;
+ else
+ tail++;
+ }
+ wc->tail = tail;
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+bail:
+ return npolled;
+}
+
+static void send_complete(struct kthread_work *work)
+{
+ struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
+
+ /*
+ * The completion handler will most likely rearm the notification
+ * and poll for all pending entries. If a new completion entry
+ * is added while we are in this routine, queue_work()
+ * won't call us again until we return so we check triggered to
+ * see if we need to call the handler again.
+ */
+ for (;;) {
+ u8 triggered = cq->triggered;
+
+ /*
+ * IPoIB connected mode assumes the callback is from a
+ * soft IRQ. We simulate this by blocking "bottom halves".
+ * See the implementation for ipoib_cm_handle_tx_wc(),
+ * netif_tx_lock_bh() and netif_tx_lock().
+ */
+ local_bh_disable();
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ local_bh_enable();
+
+ if (cq->triggered == triggered)
+ return;
+ }
+}
+
+/**
+ * qib_create_cq - create a completion queue
+ * @ibdev: the device this completion queue is attached to
+ * @entries: the minimum size of the completion queue
+ * @context: unused by the QLogic_IB driver
+ * @udata: user data for libibverbs.so
+ *
+ * Returns a pointer to the completion queue or negative errno values
+ * for failure.
+ *
+ * Called by ib_create_cq() in the generic verbs code.
+ */
+struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector, struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct qib_ibdev *dev = to_idev(ibdev);
+ struct qib_cq *cq;
+ struct qib_cq_wc *wc;
+ struct ib_cq *ret;
+ u32 sz;
+
+ if (entries < 1 || entries > ib_qib_max_cqes) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ /* Allocate the completion queue structure. */
+ cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto done;
+ }
+
+ /*
+ * Allocate the completion queue entries and head/tail pointers.
+ * This is allocated separately so that it can be resized and
+ * also mapped into user space.
+ * We need to use vmalloc() in order to support mmap and large
+ * numbers of entries.
+ */
+ sz = sizeof(*wc);
+ if (udata && udata->outlen >= sizeof(__u64))
+ sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
+ else
+ sz += sizeof(struct ib_wc) * (entries + 1);
+ wc = vmalloc_user(sz);
+ if (!wc) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_cq;
+ }
+
+ /*
+ * Return the address of the WC as the offset to mmap.
+ * See qib_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ int err;
+
+ cq->ip = qib_create_mmap_info(dev, sz, context, wc);
+ if (!cq->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wc;
+ }
+
+ err = ib_copy_to_udata(udata, &cq->ip->offset,
+ sizeof(cq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else
+ cq->ip = NULL;
+
+ spin_lock(&dev->n_cqs_lock);
+ if (dev->n_cqs_allocated == ib_qib_max_cqs) {
+ spin_unlock(&dev->n_cqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_cqs_allocated++;
+ spin_unlock(&dev->n_cqs_lock);
+
+ if (cq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ /*
+ * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
+ * The number of entries should be >= the number requested or return
+ * an error.
+ */
+ cq->dd = dd_from_dev(dev);
+ cq->ibcq.cqe = entries;
+ cq->notify = IB_CQ_NONE;
+ cq->triggered = 0;
+ spin_lock_init(&cq->lock);
+ init_kthread_work(&cq->comptask, send_complete);
+ wc->head = 0;
+ wc->tail = 0;
+ cq->queue = wc;
+
+ ret = &cq->ibcq;
+
+ goto done;
+
+bail_ip:
+ kfree(cq->ip);
+bail_wc:
+ vfree(wc);
+bail_cq:
+ kfree(cq);
+done:
+ return ret;
+}
+
+/**
+ * qib_destroy_cq - destroy a completion queue
+ * @ibcq: the completion queue to destroy.
+ *
+ * Returns 0 for success.
+ *
+ * Called by ib_destroy_cq() in the generic verbs code.
+ */
+int qib_destroy_cq(struct ib_cq *ibcq)
+{
+ struct qib_ibdev *dev = to_idev(ibcq->device);
+ struct qib_cq *cq = to_icq(ibcq);
+
+ flush_kthread_work(&cq->comptask);
+ spin_lock(&dev->n_cqs_lock);
+ dev->n_cqs_allocated--;
+ spin_unlock(&dev->n_cqs_lock);
+ if (cq->ip)
+ kref_put(&cq->ip->ref, qib_release_mmap_info);
+ else
+ vfree(cq->queue);
+ kfree(cq);
+
+ return 0;
+}
+
+/**
+ * qib_req_notify_cq - change the notification type for a completion queue
+ * @ibcq: the completion queue
+ * @notify_flags: the type of notification to request
+ *
+ * Returns 0 for success.
+ *
+ * This may be called from interrupt context. Also called by
+ * ib_req_notify_cq() in the generic verbs code.
+ */
+int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+{
+ struct qib_cq *cq = to_icq(ibcq);
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+ /*
+ * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
+ * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
+ */
+ if (cq->notify != IB_CQ_NEXT_COMP)
+ cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
+
+ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ cq->queue->head != cq->queue->tail)
+ ret = 1;
+
+ spin_unlock_irqrestore(&cq->lock, flags);
+
+ return ret;
+}
+
+/**
+ * qib_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
+int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+{
+ struct qib_cq *cq = to_icq(ibcq);
+ struct qib_cq_wc *old_wc;
+ struct qib_cq_wc *wc;
+ u32 head, tail, n;
+ int ret;
+ u32 sz;
+
+ if (cqe < 1 || cqe > ib_qib_max_cqes) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /*
+ * Need to use vmalloc() if we want to support large #s of entries.
+ */
+ sz = sizeof(*wc);
+ if (udata && udata->outlen >= sizeof(__u64))
+ sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
+ else
+ sz += sizeof(struct ib_wc) * (cqe + 1);
+ wc = vmalloc_user(sz);
+ if (!wc) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ /* Check that we can write the offset to mmap. */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ __u64 offset = 0;
+
+ ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
+ if (ret)
+ goto bail_free;
+ }
+
+ spin_lock_irq(&cq->lock);
+ /*
+ * Make sure head and tail are sane since they
+ * might be user writable.
+ */
+ old_wc = cq->queue;
+ head = old_wc->head;
+ if (head > (u32) cq->ibcq.cqe)
+ head = (u32) cq->ibcq.cqe;
+ tail = old_wc->tail;
+ if (tail > (u32) cq->ibcq.cqe)
+ tail = (u32) cq->ibcq.cqe;
+ if (head < tail)
+ n = cq->ibcq.cqe + 1 + head - tail;
+ else
+ n = head - tail;
+ if (unlikely((u32)cqe < n)) {
+ ret = -EINVAL;
+ goto bail_unlock;
+ }
+ for (n = 0; tail != head; n++) {
+ if (cq->ip)
+ wc->uqueue[n] = old_wc->uqueue[tail];
+ else
+ wc->kqueue[n] = old_wc->kqueue[tail];
+ if (tail == (u32) cq->ibcq.cqe)
+ tail = 0;
+ else
+ tail++;
+ }
+ cq->ibcq.cqe = cqe;
+ wc->head = n;
+ wc->tail = 0;
+ cq->queue = wc;
+ spin_unlock_irq(&cq->lock);
+
+ vfree(old_wc);
+
+ if (cq->ip) {
+ struct qib_ibdev *dev = to_idev(ibcq->device);
+ struct qib_mmap_info *ip = cq->ip;
+
+ qib_update_mmap_info(dev, ip, sz, wc);
+
+ /*
+ * Return the offset to mmap.
+ * See qib_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ ret = ib_copy_to_udata(udata, &ip->offset,
+ sizeof(ip->offset));
+ if (ret)
+ goto bail;
+ }
+
+ spin_lock_irq(&dev->pending_lock);
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ ret = 0;
+ goto bail;
+
+bail_unlock:
+ spin_unlock_irq(&cq->lock);
+bail_free:
+ vfree(wc);
+bail:
+ return ret;
+}
+
+int qib_cq_init(struct qib_devdata *dd)
+{
+ int ret = 0;
+ int cpu;
+ struct task_struct *task;
+
+ if (dd->worker)
+ return 0;
+ dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
+ if (!dd->worker)
+ return -ENOMEM;
+ init_kthread_worker(dd->worker);
+ task = kthread_create_on_node(
+ kthread_worker_fn,
+ dd->worker,
+ dd->assigned_node_id,
+ "qib_cq%d", dd->unit);
+ if (IS_ERR(task))
+ goto task_fail;
+ cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+ kthread_bind(task, cpu);
+ wake_up_process(task);
+out:
+ return ret;
+task_fail:
+ ret = PTR_ERR(task);
+ kfree(dd->worker);
+ dd->worker = NULL;
+ goto out;
+}
+
+void qib_cq_exit(struct qib_devdata *dd)
+{
+ struct kthread_worker *worker;
+
+ worker = dd->worker;
+ if (!worker)
+ return;
+ /* blocks future queuing from send_complete() */
+ dd->worker = NULL;
+ smp_wmb();
+ flush_kthread_worker(worker);
+ kthread_stop(worker->task);
+ kfree(worker);
+}
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
new file mode 100644
index 00000000000..799a0c3bffc
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -0,0 +1,283 @@
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+
+#include "qib.h"
+#include "qib_verbs.h"
+#include "qib_debugfs.h"
+
+static struct dentry *qib_dbg_root;
+
+#define DEBUGFS_FILE(name) \
+static const struct seq_operations _##name##_seq_ops = { \
+ .start = _##name##_seq_start, \
+ .next = _##name##_seq_next, \
+ .stop = _##name##_seq_stop, \
+ .show = _##name##_seq_show \
+}; \
+static int _##name##_open(struct inode *inode, struct file *s) \
+{ \
+ struct seq_file *seq; \
+ int ret; \
+ ret = seq_open(s, &_##name##_seq_ops); \
+ if (ret) \
+ return ret; \
+ seq = s->private_data; \
+ seq->private = inode->i_private; \
+ return 0; \
+} \
+static const struct file_operations _##name##_file_ops = { \
+ .owner = THIS_MODULE, \
+ .open = _##name##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = seq_release \
+};
+
+#define DEBUGFS_FILE_CREATE(name) \
+do { \
+ struct dentry *ent; \
+ ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \
+ ibd, &_##name##_file_ops); \
+ if (!ent) \
+ pr_warn("create of " #name " failed\n"); \
+} while (0)
+
+static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct qib_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+
+static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _opcode_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ for (j = 0; j < dd->first_user_ctxt; j++) {
+ if (!dd->rcd[j])
+ continue;
+ n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
+ n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu\n", i,
+ (unsigned long long) n_packets,
+ (unsigned long long) n_bytes);
+
+ return 0;
+}
+
+DEBUGFS_FILE(opcode_stats)
+
+static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (!*pos)
+ return SEQ_START_TOKEN;
+ if (*pos >= dd->first_user_ctxt)
+ return NULL;
+ return pos;
+}
+
+static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (v == SEQ_START_TOKEN)
+ return pos;
+
+ ++*pos;
+ if (*pos >= dd->first_user_ctxt)
+ return NULL;
+ return pos;
+}
+
+static void _ctx_stats_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _ctx_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos;
+ loff_t i, j;
+ u64 n_packets = 0;
+ struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
+ struct qib_devdata *dd = dd_from_dev(ibd);
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(s, "Ctx:npkts\n");
+ return 0;
+ }
+
+ spos = v;
+ i = *spos;
+
+ if (!dd->rcd[i])
+ return SEQ_SKIP;
+
+ for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
+ n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
+
+ if (!n_packets)
+ return SEQ_SKIP;
+
+ seq_printf(s, " %llu:%llu\n", i, n_packets);
+ return 0;
+}
+
+DEBUGFS_FILE(ctx_stats)
+
+static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct qib_qp_iter *iter;
+ loff_t n = *pos;
+
+ iter = qib_qp_iter_init(s->private);
+ if (!iter)
+ return NULL;
+
+ while (n--) {
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ }
+
+ return iter;
+}
+
+static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
+ loff_t *pos)
+{
+ struct qib_qp_iter *iter = iter_ptr;
+
+ (*pos)++;
+
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+{
+ /* nothing for now */
+}
+
+static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
+{
+ struct qib_qp_iter *iter = iter_ptr;
+
+ if (!iter)
+ return 0;
+
+ qib_qp_iter_print(s, iter);
+
+ return 0;
+}
+
+DEBUGFS_FILE(qp_stats)
+
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd)
+{
+ char name[10];
+
+ snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit);
+ ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root);
+ if (!ibd->qib_ibdev_dbg) {
+ pr_warn("create of %s failed\n", name);
+ return;
+ }
+ DEBUGFS_FILE_CREATE(opcode_stats);
+ DEBUGFS_FILE_CREATE(ctx_stats);
+ DEBUGFS_FILE_CREATE(qp_stats);
+ return;
+}
+
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd)
+{
+ if (!qib_dbg_root)
+ goto out;
+ debugfs_remove_recursive(ibd->qib_ibdev_dbg);
+out:
+ ibd->qib_ibdev_dbg = NULL;
+}
+
+void qib_dbg_init(void)
+{
+ qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL);
+ if (!qib_dbg_root)
+ pr_warn("init of debugfs failed\n");
+}
+
+void qib_dbg_exit(void)
+{
+ debugfs_remove_recursive(qib_dbg_root);
+ qib_dbg_root = NULL;
+}
+
+#endif
+
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h
new file mode 100644
index 00000000000..7ae983a91b8
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.h
@@ -0,0 +1,45 @@
+#ifndef _QIB_DEBUGFS_H
+#define _QIB_DEBUGFS_H
+
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+struct qib_ibdev;
+void qib_dbg_ibdev_init(struct qib_ibdev *ibd);
+void qib_dbg_ibdev_exit(struct qib_ibdev *ibd);
+void qib_dbg_init(void);
+void qib_dbg_exit(void);
+
+#endif
+
+#endif /* _QIB_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
new file mode 100644
index 00000000000..5dfda4c5cc9
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -0,0 +1,911 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains support for diagnostic functions. It is accessed by
+ * opening the qib_diag device, normally minor number 129. Diagnostic use
+ * of the QLogic_IB chip may render the chip or board unusable until the
+ * driver is unloaded, or in some cases, until the system is rebooted.
+ *
+ * Accesses to the chip through this interface are not similar to going
+ * through the /sys/bus/pci resource mmap interface.
+ */
+
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/vmalloc.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+
+/*
+ * Each client that opens the diag device must read then write
+ * offset 0, to prevent lossage from random cat or od. diag_state
+ * sequences this "handshake".
+ */
+enum diag_state { UNUSED = 0, OPENED, INIT, READY };
+
+/* State for an individual client. PID so children cannot abuse handshake */
+static struct qib_diag_client {
+ struct qib_diag_client *next;
+ struct qib_devdata *dd;
+ pid_t pid;
+ enum diag_state state;
+} *client_pool;
+
+/*
+ * Get a client struct. Recycled if possible, else kmalloc.
+ * Must be called with qib_mutex held
+ */
+static struct qib_diag_client *get_client(struct qib_devdata *dd)
+{
+ struct qib_diag_client *dc;
+
+ dc = client_pool;
+ if (dc)
+ /* got from pool remove it and use */
+ client_pool = dc->next;
+ else
+ /* None in pool, alloc and init */
+ dc = kmalloc(sizeof *dc, GFP_KERNEL);
+
+ if (dc) {
+ dc->next = NULL;
+ dc->dd = dd;
+ dc->pid = current->pid;
+ dc->state = OPENED;
+ }
+ return dc;
+}
+
+/*
+ * Return to pool. Must be called with qib_mutex held
+ */
+static void return_client(struct qib_diag_client *dc)
+{
+ struct qib_devdata *dd = dc->dd;
+ struct qib_diag_client *tdc, *rdc;
+
+ rdc = NULL;
+ if (dc == dd->diag_client) {
+ dd->diag_client = dc->next;
+ rdc = dc;
+ } else {
+ tdc = dc->dd->diag_client;
+ while (tdc) {
+ if (dc == tdc->next) {
+ tdc->next = dc->next;
+ rdc = dc;
+ break;
+ }
+ tdc = tdc->next;
+ }
+ }
+ if (rdc) {
+ rdc->state = UNUSED;
+ rdc->dd = NULL;
+ rdc->pid = 0;
+ rdc->next = client_pool;
+ client_pool = rdc;
+ }
+}
+
+static int qib_diag_open(struct inode *in, struct file *fp);
+static int qib_diag_release(struct inode *in, struct file *fp);
+static ssize_t qib_diag_read(struct file *fp, char __user *data,
+ size_t count, loff_t *off);
+static ssize_t qib_diag_write(struct file *fp, const char __user *data,
+ size_t count, loff_t *off);
+
+static const struct file_operations diag_file_ops = {
+ .owner = THIS_MODULE,
+ .write = qib_diag_write,
+ .read = qib_diag_read,
+ .open = qib_diag_open,
+ .release = qib_diag_release,
+ .llseek = default_llseek,
+};
+
+static atomic_t diagpkt_count = ATOMIC_INIT(0);
+static struct cdev *diagpkt_cdev;
+static struct device *diagpkt_device;
+
+static ssize_t qib_diagpkt_write(struct file *fp, const char __user *data,
+ size_t count, loff_t *off);
+
+static const struct file_operations diagpkt_file_ops = {
+ .owner = THIS_MODULE,
+ .write = qib_diagpkt_write,
+ .llseek = noop_llseek,
+};
+
+int qib_diag_add(struct qib_devdata *dd)
+{
+ char name[16];
+ int ret = 0;
+
+ if (atomic_inc_return(&diagpkt_count) == 1) {
+ ret = qib_cdev_init(QIB_DIAGPKT_MINOR, "ipath_diagpkt",
+ &diagpkt_file_ops, &diagpkt_cdev,
+ &diagpkt_device);
+ if (ret)
+ goto done;
+ }
+
+ snprintf(name, sizeof(name), "ipath_diag%d", dd->unit);
+ ret = qib_cdev_init(QIB_DIAG_MINOR_BASE + dd->unit, name,
+ &diag_file_ops, &dd->diag_cdev,
+ &dd->diag_device);
+done:
+ return ret;
+}
+
+static void qib_unregister_observers(struct qib_devdata *dd);
+
+void qib_diag_remove(struct qib_devdata *dd)
+{
+ struct qib_diag_client *dc;
+
+ if (atomic_dec_and_test(&diagpkt_count))
+ qib_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
+
+ qib_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
+
+ /*
+ * Return all diag_clients of this device. There should be none,
+ * as we are "guaranteed" that no clients are still open
+ */
+ while (dd->diag_client)
+ return_client(dd->diag_client);
+
+ /* Now clean up all unused client structs */
+ while (client_pool) {
+ dc = client_pool;
+ client_pool = dc->next;
+ kfree(dc);
+ }
+ /* Clean up observer list */
+ qib_unregister_observers(dd);
+}
+
+/* qib_remap_ioaddr32 - remap an offset into chip address space to __iomem *
+ *
+ * @dd: the qlogic_ib device
+ * @offs: the offset in chip-space
+ * @cntp: Pointer to max (byte) count for transfer starting at offset
+ * This returns a u32 __iomem * so it can be used for both 64 and 32-bit
+ * mapping. It is needed because with the use of PAT for control of
+ * write-combining, the logically contiguous address-space of the chip
+ * may be split into virtually non-contiguous spaces, with different
+ * attributes, which are them mapped to contiguous physical space
+ * based from the first BAR.
+ *
+ * The code below makes the same assumptions as were made in
+ * init_chip_wc_pat() (qib_init.c), copied here:
+ * Assumes chip address space looks like:
+ * - kregs + sregs + cregs + uregs (in any order)
+ * - piobufs (2K and 4K bufs in either order)
+ * or:
+ * - kregs + sregs + cregs (in any order)
+ * - piobufs (2K and 4K bufs in either order)
+ * - uregs
+ *
+ * If cntp is non-NULL, returns how many bytes from offset can be accessed
+ * Returns 0 if the offset is not mapped.
+ */
+static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset,
+ u32 *cntp)
+{
+ u32 kreglen;
+ u32 snd_bottom, snd_lim = 0;
+ u32 __iomem *krb32 = (u32 __iomem *)dd->kregbase;
+ u32 __iomem *map = NULL;
+ u32 cnt = 0;
+ u32 tot4k, offs4k;
+
+ /* First, simplest case, offset is within the first map. */
+ kreglen = (dd->kregend - dd->kregbase) * sizeof(u64);
+ if (offset < kreglen) {
+ map = krb32 + (offset / sizeof(u32));
+ cnt = kreglen - offset;
+ goto mapped;
+ }
+
+ /*
+ * Next check for user regs, the next most common case,
+ * and a cheap check because if they are not in the first map
+ * they are last in chip.
+ */
+ if (dd->userbase) {
+ /* If user regs mapped, they are after send, so set limit. */
+ u32 ulim = (dd->cfgctxts * dd->ureg_align) + dd->uregbase;
+ if (!dd->piovl15base)
+ snd_lim = dd->uregbase;
+ krb32 = (u32 __iomem *)dd->userbase;
+ if (offset >= dd->uregbase && offset < ulim) {
+ map = krb32 + (offset - dd->uregbase) / sizeof(u32);
+ cnt = ulim - offset;
+ goto mapped;
+ }
+ }
+
+ /*
+ * Lastly, check for offset within Send Buffers.
+ * This is gnarly because struct devdata is deliberately vague
+ * about things like 7322 VL15 buffers, and we are not in
+ * chip-specific code here, so should not make many assumptions.
+ * The one we _do_ make is that the only chip that has more sndbufs
+ * than we admit is the 7322, and it has userregs above that, so
+ * we know the snd_lim.
+ */
+ /* Assume 2K buffers are first. */
+ snd_bottom = dd->pio2k_bufbase;
+ if (snd_lim == 0) {
+ u32 tot2k = dd->piobcnt2k * ALIGN(dd->piosize2k, dd->palign);
+ snd_lim = snd_bottom + tot2k;
+ }
+ /* If 4k buffers exist, account for them by bumping
+ * appropriate limit.
+ */
+ tot4k = dd->piobcnt4k * dd->align4k;
+ offs4k = dd->piobufbase >> 32;
+ if (dd->piobcnt4k) {
+ if (snd_bottom > offs4k)
+ snd_bottom = offs4k;
+ else {
+ /* 4k above 2k. Bump snd_lim, if needed*/
+ if (!dd->userbase || dd->piovl15base)
+ snd_lim = offs4k + tot4k;
+ }
+ }
+ /*
+ * Judgement call: can we ignore the space between SendBuffs and
+ * UserRegs, where we would like to see vl15 buffs, but not more?
+ */
+ if (offset >= snd_bottom && offset < snd_lim) {
+ offset -= snd_bottom;
+ map = (u32 __iomem *)dd->piobase + (offset / sizeof(u32));
+ cnt = snd_lim - offset;
+ }
+
+ if (!map && offs4k && dd->piovl15base) {
+ snd_lim = offs4k + tot4k + 2 * dd->align4k;
+ if (offset >= (offs4k + tot4k) && offset < snd_lim) {
+ map = (u32 __iomem *)dd->piovl15base +
+ ((offset - (offs4k + tot4k)) / sizeof(u32));
+ cnt = snd_lim - offset;
+ }
+ }
+
+mapped:
+ if (cntp)
+ *cntp = cnt;
+ return map;
+}
+
+/*
+ * qib_read_umem64 - read a 64-bit quantity from the chip into user space
+ * @dd: the qlogic_ib device
+ * @uaddr: the location to store the data in user memory
+ * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
+ * @count: number of bytes to copy (multiple of 32 bits)
+ *
+ * This function also localizes all chip memory accesses.
+ * The copy should be written such that we read full cacheline packets
+ * from the chip. This is usually used for a single qword
+ *
+ * NOTE: This assumes the chip address is 64-bit aligned.
+ */
+static int qib_read_umem64(struct qib_devdata *dd, void __user *uaddr,
+ u32 regoffs, size_t count)
+{
+ const u64 __iomem *reg_addr;
+ const u64 __iomem *reg_end;
+ u32 limit;
+ int ret;
+
+ reg_addr = (const u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
+ if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (count >= limit)
+ count = limit;
+ reg_end = reg_addr + (count / sizeof(u64));
+
+ /* not very efficient, but it works for now */
+ while (reg_addr < reg_end) {
+ u64 data = readq(reg_addr);
+
+ if (copy_to_user(uaddr, &data, sizeof(u64))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ reg_addr++;
+ uaddr += sizeof(u64);
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+/*
+ * qib_write_umem64 - write a 64-bit quantity to the chip from user space
+ * @dd: the qlogic_ib device
+ * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
+ * @uaddr: the source of the data in user memory
+ * @count: the number of bytes to copy (multiple of 32 bits)
+ *
+ * This is usually used for a single qword
+ * NOTE: This assumes the chip address is 64-bit aligned.
+ */
+
+static int qib_write_umem64(struct qib_devdata *dd, u32 regoffs,
+ const void __user *uaddr, size_t count)
+{
+ u64 __iomem *reg_addr;
+ const u64 __iomem *reg_end;
+ u32 limit;
+ int ret;
+
+ reg_addr = (u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
+ if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (count >= limit)
+ count = limit;
+ reg_end = reg_addr + (count / sizeof(u64));
+
+ /* not very efficient, but it works for now */
+ while (reg_addr < reg_end) {
+ u64 data;
+ if (copy_from_user(&data, uaddr, sizeof(data))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ writeq(data, reg_addr);
+
+ reg_addr++;
+ uaddr += sizeof(u64);
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+/*
+ * qib_read_umem32 - read a 32-bit quantity from the chip into user space
+ * @dd: the qlogic_ib device
+ * @uaddr: the location to store the data in user memory
+ * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
+ * @count: number of bytes to copy
+ *
+ * read 32 bit values, not 64 bit; for memories that only
+ * support 32 bit reads; usually a single dword.
+ */
+static int qib_read_umem32(struct qib_devdata *dd, void __user *uaddr,
+ u32 regoffs, size_t count)
+{
+ const u32 __iomem *reg_addr;
+ const u32 __iomem *reg_end;
+ u32 limit;
+ int ret;
+
+ reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
+ if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (count >= limit)
+ count = limit;
+ reg_end = reg_addr + (count / sizeof(u32));
+
+ /* not very efficient, but it works for now */
+ while (reg_addr < reg_end) {
+ u32 data = readl(reg_addr);
+
+ if (copy_to_user(uaddr, &data, sizeof(data))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ reg_addr++;
+ uaddr += sizeof(u32);
+
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+/*
+ * qib_write_umem32 - write a 32-bit quantity to the chip from user space
+ * @dd: the qlogic_ib device
+ * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
+ * @uaddr: the source of the data in user memory
+ * @count: number of bytes to copy
+ *
+ * write 32 bit values, not 64 bit; for memories that only
+ * support 32 bit write; usually a single dword.
+ */
+
+static int qib_write_umem32(struct qib_devdata *dd, u32 regoffs,
+ const void __user *uaddr, size_t count)
+{
+ u32 __iomem *reg_addr;
+ const u32 __iomem *reg_end;
+ u32 limit;
+ int ret;
+
+ reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
+ if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (count >= limit)
+ count = limit;
+ reg_end = reg_addr + (count / sizeof(u32));
+
+ while (reg_addr < reg_end) {
+ u32 data;
+
+ if (copy_from_user(&data, uaddr, sizeof(data))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ writel(data, reg_addr);
+
+ reg_addr++;
+ uaddr += sizeof(u32);
+ }
+ ret = 0;
+bail:
+ return ret;
+}
+
+static int qib_diag_open(struct inode *in, struct file *fp)
+{
+ int unit = iminor(in) - QIB_DIAG_MINOR_BASE;
+ struct qib_devdata *dd;
+ struct qib_diag_client *dc;
+ int ret;
+
+ mutex_lock(&qib_mutex);
+
+ dd = qib_lookup(unit);
+
+ if (dd == NULL || !(dd->flags & QIB_PRESENT) ||
+ !dd->kregbase) {
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ dc = get_client(dd);
+ if (!dc) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ dc->next = dd->diag_client;
+ dd->diag_client = dc;
+ fp->private_data = dc;
+ ret = 0;
+bail:
+ mutex_unlock(&qib_mutex);
+
+ return ret;
+}
+
+/**
+ * qib_diagpkt_write - write an IB packet
+ * @fp: the diag data device file pointer
+ * @data: qib_diag_pkt structure saying where to get the packet
+ * @count: size of data to write
+ * @off: unused by this code
+ */
+static ssize_t qib_diagpkt_write(struct file *fp,
+ const char __user *data,
+ size_t count, loff_t *off)
+{
+ u32 __iomem *piobuf;
+ u32 plen, pbufn, maxlen_reserve;
+ struct qib_diag_xpkt dp;
+ u32 *tmpbuf = NULL;
+ struct qib_devdata *dd;
+ struct qib_pportdata *ppd;
+ ssize_t ret = 0;
+
+ if (count != sizeof(dp)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (copy_from_user(&dp, data, sizeof(dp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ dd = qib_lookup(dp.unit);
+ if (!dd || !(dd->flags & QIB_PRESENT) || !dd->kregbase) {
+ ret = -ENODEV;
+ goto bail;
+ }
+ if (!(dd->flags & QIB_INITTED)) {
+ /* no hardware, freeze, etc. */
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ if (dp.version != _DIAG_XPKT_VERS) {
+ qib_dev_err(dd, "Invalid version %u for diagpkt_write\n",
+ dp.version);
+ ret = -EINVAL;
+ goto bail;
+ }
+ /* send count must be an exact number of dwords */
+ if (dp.len & 3) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (!dp.port || dp.port > dd->num_pports) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ ppd = &dd->pport[dp.port - 1];
+
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ plen = sizeof(u32) + dp.len;
+
+ tmpbuf = vmalloc(plen);
+ if (!tmpbuf) {
+ qib_devinfo(dd->pcidev,
+ "Unable to allocate tmp buffer, failing\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ if (copy_from_user(tmpbuf,
+ (const void __user *) (unsigned long) dp.data,
+ dp.len)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ plen >>= 2; /* in dwords */
+
+ if (dp.pbc_wd == 0)
+ dp.pbc_wd = plen;
+
+ piobuf = dd->f_getsendbuf(ppd, dp.pbc_wd, &pbufn);
+ if (!piobuf) {
+ ret = -EBUSY;
+ goto bail;
+ }
+ /* disarm it just to be extra sure */
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbufn));
+
+ /* disable header check on pbufn for this packet */
+ dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_DIS1, NULL);
+
+ writeq(dp.pbc_wd, piobuf);
+ /*
+ * Copy all but the trigger word, then flush, so it's written
+ * to chip before trigger word, then write trigger word, then
+ * flush again, so packet is sent.
+ */
+ if (dd->flags & QIB_PIO_FLUSH_WC) {
+ qib_flush_wc();
+ qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
+ qib_flush_wc();
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
+ } else
+ qib_pio_copy(piobuf + 2, tmpbuf, plen);
+
+ if (dd->flags & QIB_USE_SPCL_TRIG) {
+ u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
+
+ qib_flush_wc();
+ __raw_writel(0xaebecede, piobuf + spcl_off);
+ }
+
+ /*
+ * Ensure buffer is written to the chip, then re-enable
+ * header checks (if supported by chip). The txchk
+ * code will ensure seen by chip before returning.
+ */
+ qib_flush_wc();
+ qib_sendbuf_done(dd, pbufn);
+ dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_ENAB1, NULL);
+
+ ret = sizeof(dp);
+
+bail:
+ vfree(tmpbuf);
+ return ret;
+}
+
+static int qib_diag_release(struct inode *in, struct file *fp)
+{
+ mutex_lock(&qib_mutex);
+ return_client(fp->private_data);
+ fp->private_data = NULL;
+ mutex_unlock(&qib_mutex);
+ return 0;
+}
+
+/*
+ * Chip-specific code calls to register its interest in
+ * a specific range.
+ */
+struct diag_observer_list_elt {
+ struct diag_observer_list_elt *next;
+ const struct diag_observer *op;
+};
+
+int qib_register_observer(struct qib_devdata *dd,
+ const struct diag_observer *op)
+{
+ struct diag_observer_list_elt *olp;
+ unsigned long flags;
+
+ if (!dd || !op)
+ return -EINVAL;
+ olp = vmalloc(sizeof *olp);
+ if (!olp) {
+ pr_err("vmalloc for observer failed\n");
+ return -ENOMEM;
+ }
+
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ olp->op = op;
+ olp->next = dd->diag_observer_list;
+ dd->diag_observer_list = olp;
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+
+ return 0;
+}
+
+/* Remove all registered observers when device is closed */
+static void qib_unregister_observers(struct qib_devdata *dd)
+{
+ struct diag_observer_list_elt *olp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ olp = dd->diag_observer_list;
+ while (olp) {
+ /* Pop one observer, let go of lock */
+ dd->diag_observer_list = olp->next;
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+ vfree(olp);
+ /* try again. */
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ olp = dd->diag_observer_list;
+ }
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+}
+
+/*
+ * Find the observer, if any, for the specified address. Initial implementation
+ * is simple stack of observers. This must be called with diag transaction
+ * lock held.
+ */
+static const struct diag_observer *diag_get_observer(struct qib_devdata *dd,
+ u32 addr)
+{
+ struct diag_observer_list_elt *olp;
+ const struct diag_observer *op = NULL;
+
+ olp = dd->diag_observer_list;
+ while (olp) {
+ op = olp->op;
+ if (addr >= op->bottom && addr <= op->top)
+ break;
+ olp = olp->next;
+ }
+ if (!olp)
+ op = NULL;
+
+ return op;
+}
+
+static ssize_t qib_diag_read(struct file *fp, char __user *data,
+ size_t count, loff_t *off)
+{
+ struct qib_diag_client *dc = fp->private_data;
+ struct qib_devdata *dd = dc->dd;
+ void __iomem *kreg_base;
+ ssize_t ret;
+
+ if (dc->pid != current->pid) {
+ ret = -EPERM;
+ goto bail;
+ }
+
+ kreg_base = dd->kregbase;
+
+ if (count == 0)
+ ret = 0;
+ else if ((count % 4) || (*off % 4))
+ /* address or length is not 32-bit aligned, hence invalid */
+ ret = -EINVAL;
+ else if (dc->state < READY && (*off || count != 8))
+ ret = -EINVAL; /* prevent cat /dev/qib_diag* */
+ else {
+ unsigned long flags;
+ u64 data64 = 0;
+ int use_32;
+ const struct diag_observer *op;
+
+ use_32 = (count % 8) || (*off % 8);
+ ret = -1;
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ /*
+ * Check for observer on this address range.
+ * we only support a single 32 or 64-bit read
+ * via observer, currently.
+ */
+ op = diag_get_observer(dd, *off);
+ if (op) {
+ u32 offset = *off;
+ ret = op->hook(dd, op, offset, &data64, 0, use_32);
+ }
+ /*
+ * We need to release lock before any copy_to_user(),
+ * whether implicit in qib_read_umem* or explicit below.
+ */
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+ if (!op) {
+ if (use_32)
+ /*
+ * Address or length is not 64-bit aligned;
+ * do 32-bit rd
+ */
+ ret = qib_read_umem32(dd, data, (u32) *off,
+ count);
+ else
+ ret = qib_read_umem64(dd, data, (u32) *off,
+ count);
+ } else if (ret == count) {
+ /* Below finishes case where observer existed */
+ ret = copy_to_user(data, &data64, use_32 ?
+ sizeof(u32) : sizeof(u64));
+ if (ret)
+ ret = -EFAULT;
+ }
+ }
+
+ if (ret >= 0) {
+ *off += count;
+ ret = count;
+ if (dc->state == OPENED)
+ dc->state = INIT;
+ }
+bail:
+ return ret;
+}
+
+static ssize_t qib_diag_write(struct file *fp, const char __user *data,
+ size_t count, loff_t *off)
+{
+ struct qib_diag_client *dc = fp->private_data;
+ struct qib_devdata *dd = dc->dd;
+ void __iomem *kreg_base;
+ ssize_t ret;
+
+ if (dc->pid != current->pid) {
+ ret = -EPERM;
+ goto bail;
+ }
+
+ kreg_base = dd->kregbase;
+
+ if (count == 0)
+ ret = 0;
+ else if ((count % 4) || (*off % 4))
+ /* address or length is not 32-bit aligned, hence invalid */
+ ret = -EINVAL;
+ else if (dc->state < READY &&
+ ((*off || count != 8) || dc->state != INIT))
+ /* No writes except second-step of init seq */
+ ret = -EINVAL; /* before any other write allowed */
+ else {
+ unsigned long flags;
+ const struct diag_observer *op = NULL;
+ int use_32 = (count % 8) || (*off % 8);
+
+ /*
+ * Check for observer on this address range.
+ * We only support a single 32 or 64-bit write
+ * via observer, currently. This helps, because
+ * we would otherwise have to jump through hoops
+ * to make "diag transaction" meaningful when we
+ * cannot do a copy_from_user while holding the lock.
+ */
+ if (count == 4 || count == 8) {
+ u64 data64;
+ u32 offset = *off;
+ ret = copy_from_user(&data64, data, count);
+ if (ret) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ op = diag_get_observer(dd, *off);
+ if (op)
+ ret = op->hook(dd, op, offset, &data64, ~0Ull,
+ use_32);
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+ }
+
+ if (!op) {
+ if (use_32)
+ /*
+ * Address or length is not 64-bit aligned;
+ * do 32-bit write
+ */
+ ret = qib_write_umem32(dd, (u32) *off, data,
+ count);
+ else
+ ret = qib_write_umem64(dd, (u32) *off, data,
+ count);
+ }
+ }
+
+ if (ret >= 0) {
+ *off += count;
+ ret = count;
+ if (dc->state == INIT)
+ dc->state = READY; /* all read/write OK now */
+ }
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
new file mode 100644
index 00000000000..59fe092b4b0
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_dma.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2006, 2009, 2010 QLogic, Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/types.h>
+#include <linux/scatterlist.h>
+
+#include "qib_verbs.h"
+
+#define BAD_DMA_ADDRESS ((u64) 0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int qib_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+ return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 qib_dma_map_single(struct ib_device *dev, void *cpu_addr,
+ size_t size, enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ return (u64) cpu_addr;
+}
+
+static void qib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static u64 qib_dma_map_page(struct ib_device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ u64 addr;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ if (offset + size > PAGE_SIZE) {
+ addr = BAD_DMA_ADDRESS;
+ goto done;
+ }
+
+ addr = (u64) page_address(page);
+ if (addr)
+ addr += offset;
+ /* TODO: handle highmem pages */
+
+done:
+ return addr;
+}
+
+static void qib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ u64 addr;
+ int i;
+ int ret = nents;
+
+ BUG_ON(!valid_dma_direction(direction));
+
+ for_each_sg(sgl, sg, nents, i) {
+ addr = (u64) page_address(sg_page(sg));
+ /* TODO: handle highmem pages */
+ if (!addr) {
+ ret = 0;
+ break;
+ }
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
+ }
+ return ret;
+}
+
+static void qib_unmap_sg(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+}
+
+static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
+ size_t size, enum dma_data_direction dir)
+{
+}
+
+static void qib_sync_single_for_device(struct ib_device *dev, u64 addr,
+ size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+static void *qib_dma_alloc_coherent(struct ib_device *dev, size_t size,
+ u64 *dma_handle, gfp_t flag)
+{
+ struct page *p;
+ void *addr = NULL;
+
+ p = alloc_pages(flag, get_order(size));
+ if (p)
+ addr = page_address(p);
+ if (dma_handle)
+ *dma_handle = (u64) addr;
+ return addr;
+}
+
+static void qib_dma_free_coherent(struct ib_device *dev, size_t size,
+ void *cpu_addr, u64 dma_handle)
+{
+ free_pages((unsigned long) cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops qib_dma_mapping_ops = {
+ .mapping_error = qib_mapping_error,
+ .map_single = qib_dma_map_single,
+ .unmap_single = qib_dma_unmap_single,
+ .map_page = qib_dma_map_page,
+ .unmap_page = qib_dma_unmap_page,
+ .map_sg = qib_map_sg,
+ .unmap_sg = qib_unmap_sg,
+ .sync_single_for_cpu = qib_sync_single_for_cpu,
+ .sync_single_for_device = qib_sync_single_for_device,
+ .alloc_coherent = qib_dma_alloc_coherent,
+ .free_coherent = qib_dma_free_coherent
+};
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
new file mode 100644
index 00000000000..5bee08f16d7
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/prefetch.h>
+
+#include "qib.h"
+
+/*
+ * The size has to be longer than this string, so we can append
+ * board/chip information to it in the init code.
+ */
+const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
+
+DEFINE_SPINLOCK(qib_devs_lock);
+LIST_HEAD(qib_dev_list);
+DEFINE_MUTEX(qib_mutex); /* general driver use */
+
+unsigned qib_ibmtu;
+module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
+MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
+
+unsigned qib_compat_ddr_negotiate = 1;
+module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(compat_ddr_negotiate,
+ "Attempt pre-IBTA 1.2 DDR speed negotiation");
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel <ibsupport@intel.com>");
+MODULE_DESCRIPTION("Intel IB driver");
+MODULE_VERSION(QIB_DRIVER_VERSION);
+
+/*
+ * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
+ * PIO send buffers. This is well beyond anything currently
+ * defined in the InfiniBand spec.
+ */
+#define QIB_PIO_MAXIBHDR 128
+
+/*
+ * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt.
+ */
+#define QIB_MAX_PKT_RECV 64
+
+struct qlogic_ib_stats qib_stats;
+
+const char *qib_get_unit_name(int unit)
+{
+ static char iname[16];
+
+ snprintf(iname, sizeof iname, "infinipath%u", unit);
+ return iname;
+}
+
+/*
+ * Return count of units with at least one port ACTIVE.
+ */
+int qib_count_active_units(void)
+{
+ struct qib_devdata *dd;
+ struct qib_pportdata *ppd;
+ unsigned long flags;
+ int pidx, nunits_active = 0;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ list_for_each_entry(dd, &qib_dev_list, list) {
+ if (!(dd->flags & QIB_PRESENT) || !dd->kregbase)
+ continue;
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
+ QIBL_LINKARMED | QIBL_LINKACTIVE))) {
+ nunits_active++;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ return nunits_active;
+}
+
+/*
+ * Return count of all units, optionally return in arguments
+ * the number of usable (present) units, and the number of
+ * ports that are up.
+ */
+int qib_count_units(int *npresentp, int *nupp)
+{
+ int nunits = 0, npresent = 0, nup = 0;
+ struct qib_devdata *dd;
+ unsigned long flags;
+ int pidx;
+ struct qib_pportdata *ppd;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+
+ list_for_each_entry(dd, &qib_dev_list, list) {
+ nunits++;
+ if ((dd->flags & QIB_PRESENT) && dd->kregbase)
+ npresent++;
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT |
+ QIBL_LINKARMED | QIBL_LINKACTIVE)))
+ nup++;
+ }
+ }
+
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+
+ if (npresentp)
+ *npresentp = npresent;
+ if (nupp)
+ *nupp = nup;
+
+ return nunits;
+}
+
+/**
+ * qib_wait_linkstate - wait for an IB link state change to occur
+ * @dd: the qlogic_ib device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * wait up to msecs milliseconds for IB link state change to occur for
+ * now, take the easy polling route. Currently used only by
+ * qib_set_linkstate. Returns 0 if state reached, otherwise
+ * -ETIMEDOUT state can have multiple states set, for any of several
+ * transitions.
+ */
+int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ if (ppd->state_wanted) {
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ ret = -EBUSY;
+ goto bail;
+ }
+ ppd->state_wanted = state;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ wait_event_interruptible_timeout(ppd->state_wait,
+ (ppd->lflags & state),
+ msecs_to_jiffies(msecs));
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->state_wanted = 0;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+
+ if (!(ppd->lflags & state))
+ ret = -ETIMEDOUT;
+ else
+ ret = 0;
+bail:
+ return ret;
+}
+
+int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate)
+{
+ u32 lstate;
+ int ret;
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+
+ switch (newstate) {
+ case QIB_IB_LINKDOWN_ONLY:
+ dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
+ IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case QIB_IB_LINKDOWN:
+ dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
+ IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case QIB_IB_LINKDOWN_SLEEP:
+ dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
+ IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case QIB_IB_LINKDOWN_DISABLE:
+ dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
+ IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE);
+ /* don't wait */
+ ret = 0;
+ goto bail;
+
+ case QIB_IB_LINKARM:
+ if (ppd->lflags & QIBL_LINKARMED) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ /*
+ * Since the port can be ACTIVE when we ask for ARMED,
+ * clear QIBL_LINKV so we can wait for a transition.
+ * If the link isn't ARMED, then something else happened
+ * and there is no point waiting for ARMED.
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_LINKV;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
+ IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP);
+ lstate = QIBL_LINKV;
+ break;
+
+ case QIB_IB_LINKACTIVE:
+ if (ppd->lflags & QIBL_LINKACTIVE) {
+ ret = 0;
+ goto bail;
+ }
+ if (!(ppd->lflags & QIBL_LINKARMED)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE,
+ IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP);
+ lstate = QIBL_LINKACTIVE;
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto bail;
+ }
+ ret = qib_wait_linkstate(ppd, lstate, 10);
+
+bail:
+ return ret;
+}
+
+/*
+ * Get address of eager buffer from it's index (allocated in chunks, not
+ * contiguous).
+ */
+static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
+{
+ const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift;
+ const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1);
+
+ return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift);
+}
+
+/*
+ * Returns 1 if error was a CRC, else 0.
+ * Needed for some chip's synthesized error counters.
+ */
+static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
+ u32 ctxt, u32 eflags, u32 l, u32 etail,
+ __le32 *rhf_addr, struct qib_message_header *rhdr)
+{
+ u32 ret = 0;
+
+ if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR))
+ ret = 1;
+ else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
+ /* For TIDERR and RC QPs premptively schedule a NAK */
+ struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
+ struct qib_other_headers *ohdr = NULL;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ struct qib_qp *qp = NULL;
+ u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
+ u16 lid = be16_to_cpu(hdr->lrh[1]);
+ int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ u32 qp_num;
+ u32 opcode;
+ u32 psn;
+ int diff;
+
+ /* Sanity check packet */
+ if (tlen < 24)
+ goto drop;
+
+ if (lid < QIB_MULTICAST_LID_BASE) {
+ lid &= ~((1 << ppd->lmc) - 1);
+ if (unlikely(lid != ppd->lid))
+ goto drop;
+ }
+
+ /* Check for GRH */
+ if (lnh == QIB_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else if (lnh == QIB_LRH_GRH) {
+ u32 vtf;
+
+ ohdr = &hdr->u.l.oth;
+ if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else
+ goto drop;
+
+ /* Get opcode and PSN from packet */
+ opcode = be32_to_cpu(ohdr->bth[0]);
+ opcode >>= 24;
+ psn = be32_to_cpu(ohdr->bth[2]);
+
+ /* Get the destination QP number. */
+ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+ if (qp_num != QIB_MULTICAST_QPN) {
+ int ruc_res;
+ qp = qib_lookup_qpn(ibp, qp_num);
+ if (!qp)
+ goto drop;
+
+ /*
+ * Handle only RC QPs - for other QP types drop error
+ * packet.
+ */
+ spin_lock(&qp->r_lock);
+
+ /* Check for valid receive state. */
+ if (!(ib_qib_state_ops[qp->state] &
+ QIB_PROCESS_RECV_OK)) {
+ ibp->n_pkt_drops++;
+ goto unlock;
+ }
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ ruc_res =
+ qib_ruc_check_hdr(
+ ibp, hdr,
+ lnh == QIB_LRH_GRH,
+ qp,
+ be32_to_cpu(ohdr->bth[0]));
+ if (ruc_res)
+ goto unlock;
+
+ /* Only deal with RDMA Writes for now */
+ if (opcode <
+ IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
+ diff = qib_cmp24(psn, qp->r_psn);
+ if (!qp->r_nak_state && diff >= 0) {
+ ibp->n_rc_seqnak++;
+ qp->r_nak_state =
+ IB_NAK_PSN_ERROR;
+ /* Use the expected PSN. */
+ qp->r_ack_psn = qp->r_psn;
+ /*
+ * Wait to send the sequence
+ * NAK until all packets
+ * in the receive queue have
+ * been processed.
+ * Otherwise, we end up
+ * propagating congestion.
+ */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |=
+ QIB_R_RSP_NAK;
+ atomic_inc(
+ &qp->refcount);
+ list_add_tail(
+ &qp->rspwait,
+ &rcd->qp_wait_list);
+ }
+ } /* Out of sequence NAK */
+ } /* QP Request NAKs */
+ break;
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ case IB_QPT_UC:
+ default:
+ /* For now don't handle any other QP types */
+ break;
+ }
+
+unlock:
+ spin_unlock(&qp->r_lock);
+ /*
+ * Notify qib_destroy_qp() if it is waiting
+ * for us to finish.
+ */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ } /* Unicast QP */
+ } /* Valid packet with TIDErr */
+
+drop:
+ return ret;
+}
+
+/*
+ * qib_kreceive - receive a packet
+ * @rcd: the qlogic_ib context
+ * @llic: gets count of good packets needed to clear lli,
+ * (used with chips that need need to track crcs for lli)
+ *
+ * called from interrupt handler for errors or receive interrupt
+ * Returns number of CRC error packets, needed by some chips for
+ * local link integrity tracking. crcs are adjusted down by following
+ * good packets, if any, and count of good packets is also tracked.
+ */
+u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
+{
+ struct qib_devdata *dd = rcd->dd;
+ struct qib_pportdata *ppd = rcd->ppd;
+ __le32 *rhf_addr;
+ void *ebuf;
+ const u32 rsize = dd->rcvhdrentsize; /* words */
+ const u32 maxcnt = dd->rcvhdrcnt * rsize; /* words */
+ u32 etail = -1, l, hdrqtail;
+ struct qib_message_header *hdr;
+ u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
+ int last;
+ u64 lval;
+ struct qib_qp *qp, *nqp;
+
+ l = rcd->head;
+ rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
+ if (dd->flags & QIB_NODMA_RTAIL) {
+ u32 seq = qib_hdrget_seq(rhf_addr);
+ if (seq != rcd->seq_cnt)
+ goto bail;
+ hdrqtail = 0;
+ } else {
+ hdrqtail = qib_get_rcvhdrtail(rcd);
+ if (l == hdrqtail)
+ goto bail;
+ smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
+ }
+
+ for (last = 0, i = 1; !last; i += !last) {
+ hdr = dd->f_get_msgheader(dd, rhf_addr);
+ eflags = qib_hdrget_err_flags(rhf_addr);
+ etype = qib_hdrget_rcv_type(rhf_addr);
+ /* total length */
+ tlen = qib_hdrget_length_in_bytes(rhf_addr);
+ ebuf = NULL;
+ if ((dd->flags & QIB_NODMA_RTAIL) ?
+ qib_hdrget_use_egr_buf(rhf_addr) :
+ (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
+ etail = qib_hdrget_index(rhf_addr);
+ updegr = 1;
+ if (tlen > sizeof(*hdr) ||
+ etype >= RCVHQ_RCV_TYPE_NON_KD) {
+ ebuf = qib_get_egrbuf(rcd, etail);
+ prefetch_range(ebuf, tlen - sizeof(*hdr));
+ }
+ }
+ if (!eflags) {
+ u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
+
+ if (lrh_len != tlen) {
+ qib_stats.sps_lenerrs++;
+ goto move_along;
+ }
+ }
+ if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags &&
+ ebuf == NULL &&
+ tlen > (dd->rcvhdrentsize - 2 + 1 -
+ qib_hdrget_offset(rhf_addr)) << 2) {
+ goto move_along;
+ }
+
+ /*
+ * Both tiderr and qibhdrerr are set for all plain IB
+ * packets; only qibhdrerr should be set.
+ */
+ if (unlikely(eflags))
+ crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
+ etail, rhf_addr, hdr);
+ else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
+ qib_ib_rcv(rcd, hdr, ebuf, tlen);
+ if (crcs)
+ crcs--;
+ else if (llic && *llic)
+ --*llic;
+ }
+move_along:
+ l += rsize;
+ if (l >= maxcnt)
+ l = 0;
+ if (i == QIB_MAX_PKT_RECV)
+ last = 1;
+
+ rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
+ if (dd->flags & QIB_NODMA_RTAIL) {
+ u32 seq = qib_hdrget_seq(rhf_addr);
+
+ if (++rcd->seq_cnt > 13)
+ rcd->seq_cnt = 1;
+ if (seq != rcd->seq_cnt)
+ last = 1;
+ } else if (l == hdrqtail)
+ last = 1;
+ /*
+ * Update head regs etc., every 16 packets, if not last pkt,
+ * to help prevent rcvhdrq overflows, when many packets
+ * are processed and queue is nearly full.
+ * Don't request an interrupt for intermediate updates.
+ */
+ lval = l;
+ if (!last && !(i & 0xf)) {
+ dd->f_update_usrhead(rcd, lval, updegr, etail, i);
+ updegr = 0;
+ }
+ }
+ /*
+ * Notify qib_destroy_qp() if it is waiting
+ * for lookaside_qp to finish.
+ */
+ if (rcd->lookaside_qp) {
+ if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
+ wake_up(&rcd->lookaside_qp->wait);
+ rcd->lookaside_qp = NULL;
+ }
+
+ rcd->head = l;
+
+ /*
+ * Iterate over all QPs waiting to respond.
+ * The list won't change since the IRQ is only run on one CPU.
+ */
+ list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
+ list_del_init(&qp->rspwait);
+ if (qp->r_flags & QIB_R_RSP_NAK) {
+ qp->r_flags &= ~QIB_R_RSP_NAK;
+ qib_send_rc_ack(qp);
+ }
+ if (qp->r_flags & QIB_R_RSP_SEND) {
+ unsigned long flags;
+
+ qp->r_flags &= ~QIB_R_RSP_SEND;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_qib_state_ops[qp->state] &
+ QIB_PROCESS_OR_FLUSH_SEND)
+ qib_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+
+bail:
+ /* Report number of packets consumed */
+ if (npkts)
+ *npkts = i;
+
+ /*
+ * Always write head at end, and setup rcv interrupt, even
+ * if no packets were processed.
+ */
+ lval = (u64)rcd->head | dd->rhdrhead_intr_off;
+ dd->f_update_usrhead(rcd, lval, updegr, etail, i);
+ return crcs;
+}
+
+/**
+ * qib_set_mtu - set the MTU
+ * @ppd: the perport data
+ * @arg: the new MTU
+ *
+ * We can handle "any" incoming size, the issue here is whether we
+ * need to restrict our outgoing size. For now, we don't do any
+ * sanity checking on this, and we don't deal with what happens to
+ * programs that are already running when the size changes.
+ * NOTE: changing the MTU will usually cause the IBC to go back to
+ * link INIT state...
+ */
+int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
+{
+ u32 piosize;
+ int ret, chk;
+
+ if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
+ arg != 4096) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ chk = ib_mtu_enum_to_int(qib_ibmtu);
+ if (chk > 0 && arg > chk) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ piosize = ppd->ibmaxlen;
+ ppd->ibmtu = arg;
+
+ if (arg >= (piosize - QIB_PIO_MAXIBHDR)) {
+ /* Only if it's not the initial value (or reset to it) */
+ if (piosize != ppd->init_ibmaxlen) {
+ if (arg > piosize && arg <= ppd->init_ibmaxlen)
+ piosize = ppd->init_ibmaxlen - 2 * sizeof(u32);
+ ppd->ibmaxlen = piosize;
+ }
+ } else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) {
+ piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32);
+ ppd->ibmaxlen = piosize;
+ }
+
+ ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
+{
+ struct qib_devdata *dd = ppd->dd;
+ ppd->lid = lid;
+ ppd->lmc = lmc;
+
+ dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC,
+ lid | (~((1U << lmc) - 1)) << 16);
+
+ qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n",
+ dd->unit, ppd->port, lid);
+
+ return 0;
+}
+
+/*
+ * Following deal with the "obviously simple" task of overriding the state
+ * of the LEDS, which normally indicate link physical and logical status.
+ * The complications arise in dealing with different hardware mappings
+ * and the board-dependent routine being called from interrupts.
+ * and then there's the requirement to _flash_ them.
+ */
+#define LED_OVER_FREQ_SHIFT 8
+#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
+/* Below is "non-zero" to force override, but both actual LEDs are off */
+#define LED_OVER_BOTH_OFF (8)
+
+static void qib_run_led_override(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+ struct qib_devdata *dd = ppd->dd;
+ int timeoff;
+ int ph_idx;
+
+ if (!(dd->flags & QIB_INITTED))
+ return;
+
+ ph_idx = ppd->led_override_phase++ & 1;
+ ppd->led_override = ppd->led_override_vals[ph_idx];
+ timeoff = ppd->led_override_timeoff;
+
+ dd->f_setextled(ppd, 1);
+ /*
+ * don't re-fire the timer if user asked for it to be off; we let
+ * it fire one more time after they turn it off to simplify
+ */
+ if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
+ mod_timer(&ppd->led_override_timer, jiffies + timeoff);
+}
+
+void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int timeoff, freq;
+
+ if (!(dd->flags & QIB_INITTED))
+ return;
+
+ /* First check if we are blinking. If not, use 1HZ polling */
+ timeoff = HZ;
+ freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+
+ if (freq) {
+ /* For blink, set each phase from one nybble of val */
+ ppd->led_override_vals[0] = val & 0xF;
+ ppd->led_override_vals[1] = (val >> 4) & 0xF;
+ timeoff = (HZ << 4)/freq;
+ } else {
+ /* Non-blink set both phases the same. */
+ ppd->led_override_vals[0] = val & 0xF;
+ ppd->led_override_vals[1] = val & 0xF;
+ }
+ ppd->led_override_timeoff = timeoff;
+
+ /*
+ * If the timer has not already been started, do so. Use a "quick"
+ * timeout so the function will be called soon, to look at our request.
+ */
+ if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
+ /* Need to start timer */
+ init_timer(&ppd->led_override_timer);
+ ppd->led_override_timer.function = qib_run_led_override;
+ ppd->led_override_timer.data = (unsigned long) ppd;
+ ppd->led_override_timer.expires = jiffies + 1;
+ add_timer(&ppd->led_override_timer);
+ } else {
+ if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
+ mod_timer(&ppd->led_override_timer, jiffies + 1);
+ atomic_dec(&ppd->led_override_timer_active);
+ }
+}
+
+/**
+ * qib_reset_device - reset the chip if possible
+ * @unit: the device to reset
+ *
+ * Whether or not reset is successful, we attempt to re-initialize the chip
+ * (that is, much like a driver unload/reload). We clear the INITTED flag
+ * so that the various entry points will fail until we reinitialize. For
+ * now, we only allow this if no user contexts are open that use chip resources
+ */
+int qib_reset_device(int unit)
+{
+ int ret, i;
+ struct qib_devdata *dd = qib_lookup(unit);
+ struct qib_pportdata *ppd;
+ unsigned long flags;
+ int pidx;
+
+ if (!dd) {
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
+
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
+ qib_devinfo(dd->pcidev,
+ "Invalid unit number %u or not initialized or not present\n",
+ unit);
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ if (dd->rcd)
+ for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
+ if (!dd->rcd[i] || !dd->rcd[i]->cnt)
+ continue;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ ret = -EBUSY;
+ goto bail;
+ }
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (atomic_read(&ppd->led_override_timer_active)) {
+ /* Need to stop LED timer, _then_ shut off LEDs */
+ del_timer_sync(&ppd->led_override_timer);
+ atomic_set(&ppd->led_override_timer_active, 0);
+ }
+
+ /* Shut off LEDs after we are sure timer is not running */
+ ppd->led_override = LED_OVER_BOTH_OFF;
+ dd->f_setextled(ppd, 0);
+ if (dd->flags & QIB_HAS_SEND_DMA)
+ qib_teardown_sdma(ppd);
+ }
+
+ ret = dd->f_reset(dd);
+ if (ret == 1)
+ ret = qib_init(dd, 1);
+ else
+ ret = -EAGAIN;
+ if (ret)
+ qib_dev_err(dd,
+ "Reinitialize unit %u after reset failed with %d\n",
+ unit, ret);
+ else
+ qib_devinfo(dd->pcidev,
+ "Reinitialized unit %u after resetting\n",
+ unit);
+
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
new file mode 100644
index 00000000000..4d5d71aaa2b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+
+/*
+ * Functions specific to the serial EEPROM on cards handled by ib_qib.
+ * The actual serail interface code is in qib_twsi.c. This file is a client
+ */
+
+/**
+ * qib_eeprom_read - receives bytes from the eeprom via I2C
+ * @dd: the qlogic_ib device
+ * @eeprom_offset: address to read from
+ * @buffer: where to store result
+ * @len: number of bytes to receive
+ */
+int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset,
+ void *buff, int len)
+{
+ int ret;
+
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (!ret) {
+ ret = qib_twsi_reset(dd);
+ if (ret)
+ qib_dev_err(dd, "EEPROM Reset for read failed\n");
+ else
+ ret = qib_twsi_blk_rd(dd, dd->twsi_eeprom_dev,
+ eeprom_offset, buff, len);
+ mutex_unlock(&dd->eep_lock);
+ }
+
+ return ret;
+}
+
+/*
+ * Actually update the eeprom, first doing write enable if
+ * needed, then restoring write enable state.
+ * Must be called with eep_lock held
+ */
+static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset,
+ const void *buf, int len)
+{
+ int ret, pwen;
+
+ pwen = dd->f_eeprom_wen(dd, 1);
+ ret = qib_twsi_reset(dd);
+ if (ret)
+ qib_dev_err(dd, "EEPROM Reset for write failed\n");
+ else
+ ret = qib_twsi_blk_wr(dd, dd->twsi_eeprom_dev,
+ offset, buf, len);
+ dd->f_eeprom_wen(dd, pwen);
+ return ret;
+}
+
+/**
+ * qib_eeprom_write - writes data to the eeprom via I2C
+ * @dd: the qlogic_ib device
+ * @eeprom_offset: where to place data
+ * @buffer: data to write
+ * @len: number of bytes to write
+ */
+int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset,
+ const void *buff, int len)
+{
+ int ret;
+
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (!ret) {
+ ret = eeprom_write_with_enable(dd, eeprom_offset, buff, len);
+ mutex_unlock(&dd->eep_lock);
+ }
+
+ return ret;
+}
+
+static u8 flash_csum(struct qib_flash *ifp, int adjust)
+{
+ u8 *ip = (u8 *) ifp;
+ u8 csum = 0, len;
+
+ /*
+ * Limit length checksummed to max length of actual data.
+ * Checksum of erased eeprom will still be bad, but we avoid
+ * reading past the end of the buffer we were passed.
+ */
+ len = ifp->if_length;
+ if (len > sizeof(struct qib_flash))
+ len = sizeof(struct qib_flash);
+ while (len--)
+ csum += *ip++;
+ csum -= ifp->if_csum;
+ csum = ~csum;
+ if (adjust)
+ ifp->if_csum = csum;
+
+ return csum;
+}
+
+/**
+ * qib_get_eeprom_info- get the GUID et al. from the TSWI EEPROM device
+ * @dd: the qlogic_ib device
+ *
+ * We have the capability to use the nguid field, and get
+ * the guid from the first chip's flash, to use for all of them.
+ */
+void qib_get_eeprom_info(struct qib_devdata *dd)
+{
+ void *buf;
+ struct qib_flash *ifp;
+ __be64 guid;
+ int len, eep_stat;
+ u8 csum, *bguid;
+ int t = dd->unit;
+ struct qib_devdata *dd0 = qib_lookup(0);
+
+ if (t && dd0->nguid > 1 && t <= dd0->nguid) {
+ u8 oguid;
+ dd->base_guid = dd0->base_guid;
+ bguid = (u8 *) &dd->base_guid;
+
+ oguid = bguid[7];
+ bguid[7] += t;
+ if (oguid > bguid[7]) {
+ if (bguid[6] == 0xff) {
+ if (bguid[5] == 0xff) {
+ qib_dev_err(dd,
+ "Can't set %s GUID from base, wraps to OUI!\n",
+ qib_get_unit_name(t));
+ dd->base_guid = 0;
+ goto bail;
+ }
+ bguid[5]++;
+ }
+ bguid[6]++;
+ }
+ dd->nguid = 1;
+ goto bail;
+ }
+
+ /*
+ * Read full flash, not just currently used part, since it may have
+ * been written with a newer definition.
+ * */
+ len = sizeof(struct qib_flash);
+ buf = vmalloc(len);
+ if (!buf) {
+ qib_dev_err(dd,
+ "Couldn't allocate memory to read %u bytes from eeprom for GUID\n",
+ len);
+ goto bail;
+ }
+
+ /*
+ * Use "public" eeprom read function, which does locking and
+ * figures out device. This will migrate to chip-specific.
+ */
+ eep_stat = qib_eeprom_read(dd, 0, buf, len);
+
+ if (eep_stat) {
+ qib_dev_err(dd, "Failed reading GUID from eeprom\n");
+ goto done;
+ }
+ ifp = (struct qib_flash *)buf;
+
+ csum = flash_csum(ifp, 0);
+ if (csum != ifp->if_csum) {
+ qib_devinfo(dd->pcidev,
+ "Bad I2C flash checksum: 0x%x, not 0x%x\n",
+ csum, ifp->if_csum);
+ goto done;
+ }
+ if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
+ *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
+ qib_dev_err(dd,
+ "Invalid GUID %llx from flash; ignoring\n",
+ *(unsigned long long *) ifp->if_guid);
+ /* don't allow GUID if all 0 or all 1's */
+ goto done;
+ }
+
+ /* complain, but allow it */
+ if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
+ qib_devinfo(dd->pcidev,
+ "Warning, GUID %llx is default, probably not correct!\n",
+ *(unsigned long long *) ifp->if_guid);
+
+ bguid = ifp->if_guid;
+ if (!bguid[0] && !bguid[1] && !bguid[2]) {
+ /*
+ * Original incorrect GUID format in flash; fix in
+ * core copy, by shifting up 2 octets; don't need to
+ * change top octet, since both it and shifted are 0.
+ */
+ bguid[1] = bguid[3];
+ bguid[2] = bguid[4];
+ bguid[3] = 0;
+ bguid[4] = 0;
+ guid = *(__be64 *) ifp->if_guid;
+ } else
+ guid = *(__be64 *) ifp->if_guid;
+ dd->base_guid = guid;
+ dd->nguid = ifp->if_numguid;
+ /*
+ * Things are slightly complicated by the desire to transparently
+ * support both the Pathscale 10-digit serial number and the QLogic
+ * 13-character version.
+ */
+ if ((ifp->if_fversion > 1) && ifp->if_sprefix[0] &&
+ ((u8 *) ifp->if_sprefix)[0] != 0xFF) {
+ char *snp = dd->serial;
+
+ /*
+ * This board has a Serial-prefix, which is stored
+ * elsewhere for backward-compatibility.
+ */
+ memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix);
+ snp[sizeof ifp->if_sprefix] = '\0';
+ len = strlen(snp);
+ snp += len;
+ len = (sizeof dd->serial) - len;
+ if (len > sizeof ifp->if_serial)
+ len = sizeof ifp->if_serial;
+ memcpy(snp, ifp->if_serial, len);
+ } else
+ memcpy(dd->serial, ifp->if_serial,
+ sizeof ifp->if_serial);
+ if (!strstr(ifp->if_comment, "Tested successfully"))
+ qib_dev_err(dd,
+ "Board SN %s did not pass functional test: %s\n",
+ dd->serial, ifp->if_comment);
+
+ memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT);
+ /*
+ * Power-on (actually "active") hours are kept as little-endian value
+ * in EEPROM, but as seconds in a (possibly as small as 24-bit)
+ * atomic_t while running.
+ */
+ atomic_set(&dd->active_time, 0);
+ dd->eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
+
+done:
+ vfree(buf);
+
+bail:;
+}
+
+/**
+ * qib_update_eeprom_log - copy active-time and error counters to eeprom
+ * @dd: the qlogic_ib device
+ *
+ * Although the time is kept as seconds in the qib_devdata struct, it is
+ * rounded to hours for re-write, as we have only 16 bits in EEPROM.
+ * First-cut code reads whole (expected) struct qib_flash, modifies,
+ * re-writes. Future direction: read/write only what we need, assuming
+ * that the EEPROM had to have been "good enough" for driver init, and
+ * if not, we aren't making it worse.
+ *
+ */
+int qib_update_eeprom_log(struct qib_devdata *dd)
+{
+ void *buf;
+ struct qib_flash *ifp;
+ int len, hi_water;
+ uint32_t new_time, new_hrs;
+ u8 csum;
+ int ret, idx;
+ unsigned long flags;
+
+ /* first, check if we actually need to do anything. */
+ ret = 0;
+ for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) {
+ if (dd->eep_st_new_errs[idx]) {
+ ret = 1;
+ break;
+ }
+ }
+ new_time = atomic_read(&dd->active_time);
+
+ if (ret == 0 && new_time < 3600)
+ goto bail;
+
+ /*
+ * The quick-check above determined that there is something worthy
+ * of logging, so get current contents and do a more detailed idea.
+ * read full flash, not just currently used part, since it may have
+ * been written with a newer definition
+ */
+ len = sizeof(struct qib_flash);
+ buf = vmalloc(len);
+ ret = 1;
+ if (!buf) {
+ qib_dev_err(dd,
+ "Couldn't allocate memory to read %u bytes from eeprom for logging\n",
+ len);
+ goto bail;
+ }
+
+ /* Grab semaphore and read current EEPROM. If we get an
+ * error, let go, but if not, keep it until we finish write.
+ */
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (ret) {
+ qib_dev_err(dd, "Unable to acquire EEPROM for logging\n");
+ goto free_bail;
+ }
+ ret = qib_twsi_blk_rd(dd, dd->twsi_eeprom_dev, 0, buf, len);
+ if (ret) {
+ mutex_unlock(&dd->eep_lock);
+ qib_dev_err(dd, "Unable read EEPROM for logging\n");
+ goto free_bail;
+ }
+ ifp = (struct qib_flash *)buf;
+
+ csum = flash_csum(ifp, 0);
+ if (csum != ifp->if_csum) {
+ mutex_unlock(&dd->eep_lock);
+ qib_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
+ csum, ifp->if_csum);
+ ret = 1;
+ goto free_bail;
+ }
+ hi_water = 0;
+ spin_lock_irqsave(&dd->eep_st_lock, flags);
+ for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) {
+ int new_val = dd->eep_st_new_errs[idx];
+ if (new_val) {
+ /*
+ * If we have seen any errors, add to EEPROM values
+ * We need to saturate at 0xFF (255) and we also
+ * would need to adjust the checksum if we were
+ * trying to minimize EEPROM traffic
+ * Note that we add to actual current count in EEPROM,
+ * in case it was altered while we were running.
+ */
+ new_val += ifp->if_errcntp[idx];
+ if (new_val > 0xFF)
+ new_val = 0xFF;
+ if (ifp->if_errcntp[idx] != new_val) {
+ ifp->if_errcntp[idx] = new_val;
+ hi_water = offsetof(struct qib_flash,
+ if_errcntp) + idx;
+ }
+ /*
+ * update our shadow (used to minimize EEPROM
+ * traffic), to match what we are about to write.
+ */
+ dd->eep_st_errs[idx] = new_val;
+ dd->eep_st_new_errs[idx] = 0;
+ }
+ }
+ /*
+ * Now update active-time. We would like to round to the nearest hour
+ * but unless atomic_t are sure to be proper signed ints we cannot,
+ * because we need to account for what we "transfer" to EEPROM and
+ * if we log an hour at 31 minutes, then we would need to set
+ * active_time to -29 to accurately count the _next_ hour.
+ */
+ if (new_time >= 3600) {
+ new_hrs = new_time / 3600;
+ atomic_sub((new_hrs * 3600), &dd->active_time);
+ new_hrs += dd->eep_hrs;
+ if (new_hrs > 0xFFFF)
+ new_hrs = 0xFFFF;
+ dd->eep_hrs = new_hrs;
+ if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
+ ifp->if_powerhour[0] = new_hrs & 0xFF;
+ hi_water = offsetof(struct qib_flash, if_powerhour);
+ }
+ if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
+ ifp->if_powerhour[1] = new_hrs >> 8;
+ hi_water = offsetof(struct qib_flash, if_powerhour) + 1;
+ }
+ }
+ /*
+ * There is a tiny possibility that we could somehow fail to write
+ * the EEPROM after updating our shadows, but problems from holding
+ * the spinlock too long are a much bigger issue.
+ */
+ spin_unlock_irqrestore(&dd->eep_st_lock, flags);
+ if (hi_water) {
+ /* we made some change to the data, uopdate cksum and write */
+ csum = flash_csum(ifp, 1);
+ ret = eeprom_write_with_enable(dd, 0, buf, hi_water + 1);
+ }
+ mutex_unlock(&dd->eep_lock);
+ if (ret)
+ qib_dev_err(dd, "Failed updating EEPROM\n");
+
+free_bail:
+ vfree(buf);
+bail:
+ return ret;
+}
+
+/**
+ * qib_inc_eeprom_err - increment one of the four error counters
+ * that are logged to EEPROM.
+ * @dd: the qlogic_ib device
+ * @eidx: 0..3, the counter to increment
+ * @incr: how much to add
+ *
+ * Each counter is 8-bits, and saturates at 255 (0xFF). They
+ * are copied to the EEPROM (aka flash) whenever qib_update_eeprom_log()
+ * is called, but it can only be called in a context that allows sleep.
+ * This function can be called even at interrupt level.
+ */
+void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr)
+{
+ uint new_val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->eep_st_lock, flags);
+ new_val = dd->eep_st_new_errs[eidx] + incr;
+ if (new_val > 255)
+ new_val = 255;
+ dd->eep_st_new_errs[eidx] = new_val;
+ spin_unlock_irqrestore(&dd->eep_st_lock, flags);
+}
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
new file mode 100644
index 00000000000..b15e34eeef6
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -0,0 +1,2410 @@
+/*
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/cdev.h>
+#include <linux/swap.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/aio.h>
+#include <linux/jiffies.h>
+#include <asm/pgtable.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+
+#include "qib.h"
+#include "qib_common.h"
+#include "qib_user_sdma.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+
+static int qib_open(struct inode *, struct file *);
+static int qib_close(struct inode *, struct file *);
+static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *);
+static ssize_t qib_aio_write(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+static unsigned int qib_poll(struct file *, struct poll_table_struct *);
+static int qib_mmapf(struct file *, struct vm_area_struct *);
+
+static const struct file_operations qib_file_ops = {
+ .owner = THIS_MODULE,
+ .write = qib_write,
+ .aio_write = qib_aio_write,
+ .open = qib_open,
+ .release = qib_close,
+ .poll = qib_poll,
+ .mmap = qib_mmapf,
+ .llseek = noop_llseek,
+};
+
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly.
+ */
+static u64 cvt_kvaddr(void *p)
+{
+ struct page *page;
+ u64 paddr = 0;
+
+ page = vmalloc_to_page(p);
+ if (page)
+ paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+ return paddr;
+}
+
+static int qib_get_base_info(struct file *fp, void __user *ubase,
+ size_t ubase_size)
+{
+ struct qib_ctxtdata *rcd = ctxt_fp(fp);
+ int ret = 0;
+ struct qib_base_info *kinfo = NULL;
+ struct qib_devdata *dd = rcd->dd;
+ struct qib_pportdata *ppd = rcd->ppd;
+ unsigned subctxt_cnt;
+ int shared, master;
+ size_t sz;
+
+ subctxt_cnt = rcd->subctxt_cnt;
+ if (!subctxt_cnt) {
+ shared = 0;
+ master = 0;
+ subctxt_cnt = 1;
+ } else {
+ shared = 1;
+ master = !subctxt_fp(fp);
+ }
+
+ sz = sizeof(*kinfo);
+ /* If context sharing is not requested, allow the old size structure */
+ if (!shared)
+ sz -= 7 * sizeof(u64);
+ if (ubase_size < sz) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
+ if (kinfo == NULL) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ ret = dd->f_get_base_info(rcd, kinfo);
+ if (ret < 0)
+ goto bail;
+
+ kinfo->spi_rcvhdr_cnt = dd->rcvhdrcnt;
+ kinfo->spi_rcvhdrent_size = dd->rcvhdrentsize;
+ kinfo->spi_tidegrcnt = rcd->rcvegrcnt;
+ kinfo->spi_rcv_egrbufsize = dd->rcvegrbufsize;
+ /*
+ * have to mmap whole thing
+ */
+ kinfo->spi_rcv_egrbuftotlen =
+ rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size;
+ kinfo->spi_rcv_egrperchunk = rcd->rcvegrbufs_perchunk;
+ kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
+ rcd->rcvegrbuf_chunks;
+ kinfo->spi_tidcnt = dd->rcvtidcnt / subctxt_cnt;
+ if (master)
+ kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt;
+ /*
+ * for this use, may be cfgctxts summed over all chips that
+ * are are configured and present
+ */
+ kinfo->spi_nctxts = dd->cfgctxts;
+ /* unit (chip/board) our context is on */
+ kinfo->spi_unit = dd->unit;
+ kinfo->spi_port = ppd->port;
+ /* for now, only a single page */
+ kinfo->spi_tid_maxsize = PAGE_SIZE;
+
+ /*
+ * Doing this per context, and based on the skip value, etc. This has
+ * to be the actual buffer size, since the protocol code treats it
+ * as an array.
+ *
+ * These have to be set to user addresses in the user code via mmap.
+ * These values are used on return to user code for the mmap target
+ * addresses only. For 32 bit, same 44 bit address problem, so use
+ * the physical address, not virtual. Before 2.6.11, using the
+ * page_address() macro worked, but in 2.6.11, even that returns the
+ * full 64 bit address (upper bits all 1's). So far, using the
+ * physical addresses (or chip offsets, for chip mapping) works, but
+ * no doubt some future kernel release will change that, and we'll be
+ * on to yet another method of dealing with this.
+ * Normally only one of rcvhdr_tailaddr or rhf_offset is useful
+ * since the chips with non-zero rhf_offset don't normally
+ * enable tail register updates to host memory, but for testing,
+ * both can be enabled and used.
+ */
+ kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys;
+ kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys;
+ kinfo->spi_rhf_offset = dd->rhf_offset;
+ kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys;
+ kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
+ /* setup per-unit (not port) status area for user programs */
+ kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
+ (char *) ppd->statusp -
+ (char *) dd->pioavailregs_dma;
+ kinfo->spi_uregbase = (u64) dd->uregbase + dd->ureg_align * rcd->ctxt;
+ if (!shared) {
+ kinfo->spi_piocnt = rcd->piocnt;
+ kinfo->spi_piobufbase = (u64) rcd->piobufs;
+ kinfo->spi_sendbuf_status = cvt_kvaddr(rcd->user_event_mask);
+ } else if (master) {
+ kinfo->spi_piocnt = (rcd->piocnt / subctxt_cnt) +
+ (rcd->piocnt % subctxt_cnt);
+ /* Master's PIO buffers are after all the slave's */
+ kinfo->spi_piobufbase = (u64) rcd->piobufs +
+ dd->palign *
+ (rcd->piocnt - kinfo->spi_piocnt);
+ } else {
+ unsigned slave = subctxt_fp(fp) - 1;
+
+ kinfo->spi_piocnt = rcd->piocnt / subctxt_cnt;
+ kinfo->spi_piobufbase = (u64) rcd->piobufs +
+ dd->palign * kinfo->spi_piocnt * slave;
+ }
+
+ if (shared) {
+ kinfo->spi_sendbuf_status =
+ cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]);
+ /* only spi_subctxt_* fields should be set in this block! */
+ kinfo->spi_subctxt_uregbase = cvt_kvaddr(rcd->subctxt_uregbase);
+
+ kinfo->spi_subctxt_rcvegrbuf =
+ cvt_kvaddr(rcd->subctxt_rcvegrbuf);
+ kinfo->spi_subctxt_rcvhdr_base =
+ cvt_kvaddr(rcd->subctxt_rcvhdr_base);
+ }
+
+ /*
+ * All user buffers are 2KB buffers. If we ever support
+ * giving 4KB buffers to user processes, this will need some
+ * work. Can't use piobufbase directly, because it has
+ * both 2K and 4K buffer base values.
+ */
+ kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) /
+ dd->palign;
+ kinfo->spi_pioalign = dd->palign;
+ kinfo->spi_qpair = QIB_KD_QP;
+ /*
+ * user mode PIO buffers are always 2KB, even when 4KB can
+ * be received, and sent via the kernel; this is ibmaxlen
+ * for 2K MTU.
+ */
+ kinfo->spi_piosize = dd->piosize2k - 2 * sizeof(u32);
+ kinfo->spi_mtu = ppd->ibmaxlen; /* maxlen, not ibmtu */
+ kinfo->spi_ctxt = rcd->ctxt;
+ kinfo->spi_subctxt = subctxt_fp(fp);
+ kinfo->spi_sw_version = QIB_KERN_SWVERSION;
+ kinfo->spi_sw_version |= 1U << 31; /* QLogic-built, not kernel.org */
+ kinfo->spi_hw_version = dd->revision;
+
+ if (master)
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER;
+
+ sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+ if (copy_to_user(ubase, kinfo, sz))
+ ret = -EFAULT;
+bail:
+ kfree(kinfo);
+ return ret;
+}
+
+/**
+ * qib_tid_update - update a context TID
+ * @rcd: the context
+ * @fp: the qib device file
+ * @ti: the TID information
+ *
+ * The new implementation as of Oct 2004 is that the driver assigns
+ * the tid and returns it to the caller. To reduce search time, we
+ * keep a cursor for each context, walking the shadow tid array to find
+ * one that's not in use.
+ *
+ * For now, if we can't allocate the full list, we fail, although
+ * in the long run, we'll allocate as many as we can, and the
+ * caller will deal with that by trying the remaining pages later.
+ * That means that when we fail, we have to mark the tids as not in
+ * use again, in our shadow copy.
+ *
+ * It's up to the caller to free the tids when they are done.
+ * We'll unlock the pages as they free them.
+ *
+ * Also, right now we are locking one page at a time, but since
+ * the intended use of this routine is for a single group of
+ * virtually contiguous pages, that should change to improve
+ * performance.
+ */
+static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
+ const struct qib_tid_info *ti)
+{
+ int ret = 0, ntids;
+ u32 tid, ctxttid, cnt, i, tidcnt, tidoff;
+ u16 *tidlist;
+ struct qib_devdata *dd = rcd->dd;
+ u64 physaddr;
+ unsigned long vaddr;
+ u64 __iomem *tidbase;
+ unsigned long tidmap[8];
+ struct page **pagep = NULL;
+ unsigned subctxt = subctxt_fp(fp);
+
+ if (!dd->pageshadow) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ cnt = ti->tidcnt;
+ if (!cnt) {
+ ret = -EFAULT;
+ goto done;
+ }
+ ctxttid = rcd->ctxt * dd->rcvtidcnt;
+ if (!rcd->subctxt_cnt) {
+ tidcnt = dd->rcvtidcnt;
+ tid = rcd->tidcursor;
+ tidoff = 0;
+ } else if (!subctxt) {
+ tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) +
+ (dd->rcvtidcnt % rcd->subctxt_cnt);
+ tidoff = dd->rcvtidcnt - tidcnt;
+ ctxttid += tidoff;
+ tid = tidcursor_fp(fp);
+ } else {
+ tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt;
+ tidoff = tidcnt * (subctxt - 1);
+ ctxttid += tidoff;
+ tid = tidcursor_fp(fp);
+ }
+ if (cnt > tidcnt) {
+ /* make sure it all fits in tid_pg_list */
+ qib_devinfo(dd->pcidev,
+ "Process tried to allocate %u TIDs, only trying max (%u)\n",
+ cnt, tidcnt);
+ cnt = tidcnt;
+ }
+ pagep = (struct page **) rcd->tid_pg_list;
+ tidlist = (u16 *) &pagep[dd->rcvtidcnt];
+ pagep += tidoff;
+ tidlist += tidoff;
+
+ memset(tidmap, 0, sizeof(tidmap));
+ /* before decrement; chip actual # */
+ ntids = tidcnt;
+ tidbase = (u64 __iomem *) (((char __iomem *) dd->kregbase) +
+ dd->rcvtidbase +
+ ctxttid * sizeof(*tidbase));
+
+ /* virtual address of first page in transfer */
+ vaddr = ti->tidvaddr;
+ if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
+ cnt * PAGE_SIZE)) {
+ ret = -EFAULT;
+ goto done;
+ }
+ ret = qib_get_user_pages(vaddr, cnt, pagep);
+ if (ret) {
+ /*
+ * if (ret == -EBUSY)
+ * We can't continue because the pagep array won't be
+ * initialized. This should never happen,
+ * unless perhaps the user has mpin'ed the pages
+ * themselves.
+ */
+ qib_devinfo(dd->pcidev,
+ "Failed to lock addr %p, %u pages: "
+ "errno %d\n", (void *) vaddr, cnt, -ret);
+ goto done;
+ }
+ for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
+ for (; ntids--; tid++) {
+ if (tid == tidcnt)
+ tid = 0;
+ if (!dd->pageshadow[ctxttid + tid])
+ break;
+ }
+ if (ntids < 0) {
+ /*
+ * Oops, wrapped all the way through their TIDs,
+ * and didn't have enough free; see comments at
+ * start of routine
+ */
+ i--; /* last tidlist[i] not filled in */
+ ret = -ENOMEM;
+ break;
+ }
+ tidlist[i] = tid + tidoff;
+ /* we "know" system pages and TID pages are same size */
+ dd->pageshadow[ctxttid + tid] = pagep[i];
+ dd->physshadow[ctxttid + tid] =
+ qib_map_page(dd->pcidev, pagep[i], 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ /*
+ * don't need atomic or it's overhead
+ */
+ __set_bit(tid, tidmap);
+ physaddr = dd->physshadow[ctxttid + tid];
+ /* PERFORMANCE: below should almost certainly be cached */
+ dd->f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED, physaddr);
+ /*
+ * don't check this tid in qib_ctxtshadow, since we
+ * just filled it in; start with the next one.
+ */
+ tid++;
+ }
+
+ if (ret) {
+ u32 limit;
+cleanup:
+ /* jump here if copy out of updated info failed... */
+ /* same code that's in qib_free_tid() */
+ limit = sizeof(tidmap) * BITS_PER_BYTE;
+ if (limit > tidcnt)
+ /* just in case size changes in future */
+ limit = tidcnt;
+ tid = find_first_bit((const unsigned long *)tidmap, limit);
+ for (; tid < limit; tid++) {
+ if (!test_bit(tid, tidmap))
+ continue;
+ if (dd->pageshadow[ctxttid + tid]) {
+ dma_addr_t phys;
+
+ phys = dd->physshadow[ctxttid + tid];
+ dd->physshadow[ctxttid + tid] = dd->tidinvalid;
+ /* PERFORMANCE: below should almost certainly
+ * be cached
+ */
+ dd->f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED,
+ dd->tidinvalid);
+ pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ dd->pageshadow[ctxttid + tid] = NULL;
+ }
+ }
+ qib_release_user_pages(pagep, cnt);
+ } else {
+ /*
+ * Copy the updated array, with qib_tid's filled in, back
+ * to user. Since we did the copy in already, this "should
+ * never fail" If it does, we have to clean up...
+ */
+ if (copy_to_user((void __user *)
+ (unsigned long) ti->tidlist,
+ tidlist, cnt * sizeof(*tidlist))) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
+ tidmap, sizeof tidmap)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ if (tid == tidcnt)
+ tid = 0;
+ if (!rcd->subctxt_cnt)
+ rcd->tidcursor = tid;
+ else
+ tidcursor_fp(fp) = tid;
+ }
+
+done:
+ return ret;
+}
+
+/**
+ * qib_tid_free - free a context TID
+ * @rcd: the context
+ * @subctxt: the subcontext
+ * @ti: the TID info
+ *
+ * right now we are unlocking one page at a time, but since
+ * the intended use of this routine is for a single group of
+ * virtually contiguous pages, that should change to improve
+ * performance. We check that the TID is in range for this context
+ * but otherwise don't check validity; if user has an error and
+ * frees the wrong tid, it's only their own data that can thereby
+ * be corrupted. We do check that the TID was in use, for sanity
+ * We always use our idea of the saved address, not the address that
+ * they pass in to us.
+ */
+static int qib_tid_free(struct qib_ctxtdata *rcd, unsigned subctxt,
+ const struct qib_tid_info *ti)
+{
+ int ret = 0;
+ u32 tid, ctxttid, cnt, limit, tidcnt;
+ struct qib_devdata *dd = rcd->dd;
+ u64 __iomem *tidbase;
+ unsigned long tidmap[8];
+
+ if (!dd->pageshadow) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
+ sizeof tidmap)) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ ctxttid = rcd->ctxt * dd->rcvtidcnt;
+ if (!rcd->subctxt_cnt)
+ tidcnt = dd->rcvtidcnt;
+ else if (!subctxt) {
+ tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) +
+ (dd->rcvtidcnt % rcd->subctxt_cnt);
+ ctxttid += dd->rcvtidcnt - tidcnt;
+ } else {
+ tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt;
+ ctxttid += tidcnt * (subctxt - 1);
+ }
+ tidbase = (u64 __iomem *) ((char __iomem *)(dd->kregbase) +
+ dd->rcvtidbase +
+ ctxttid * sizeof(*tidbase));
+
+ limit = sizeof(tidmap) * BITS_PER_BYTE;
+ if (limit > tidcnt)
+ /* just in case size changes in future */
+ limit = tidcnt;
+ tid = find_first_bit(tidmap, limit);
+ for (cnt = 0; tid < limit; tid++) {
+ /*
+ * small optimization; if we detect a run of 3 or so without
+ * any set, use find_first_bit again. That's mainly to
+ * accelerate the case where we wrapped, so we have some at
+ * the beginning, and some at the end, and a big gap
+ * in the middle.
+ */
+ if (!test_bit(tid, tidmap))
+ continue;
+ cnt++;
+ if (dd->pageshadow[ctxttid + tid]) {
+ struct page *p;
+ dma_addr_t phys;
+
+ p = dd->pageshadow[ctxttid + tid];
+ dd->pageshadow[ctxttid + tid] = NULL;
+ phys = dd->physshadow[ctxttid + tid];
+ dd->physshadow[ctxttid + tid] = dd->tidinvalid;
+ /* PERFORMANCE: below should almost certainly be
+ * cached
+ */
+ dd->f_put_tid(dd, &tidbase[tid],
+ RCVHQ_RCV_TYPE_EXPECTED, dd->tidinvalid);
+ pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ qib_release_user_pages(&p, 1);
+ }
+ }
+done:
+ return ret;
+}
+
+/**
+ * qib_set_part_key - set a partition key
+ * @rcd: the context
+ * @key: the key
+ *
+ * We can have up to 4 active at a time (other than the default, which is
+ * always allowed). This is somewhat tricky, since multiple contexts may set
+ * the same key, so we reference count them, and clean up at exit. All 4
+ * partition keys are packed into a single qlogic_ib register. It's an
+ * error for a process to set the same pkey multiple times. We provide no
+ * mechanism to de-allocate a pkey at this time, we may eventually need to
+ * do that. I've used the atomic operations, and no locking, and only make
+ * a single pass through what's available. This should be more than
+ * adequate for some time. I'll think about spinlocks or the like if and as
+ * it's necessary.
+ */
+static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
+{
+ struct qib_pportdata *ppd = rcd->ppd;
+ int i, any = 0, pidx = -1;
+ u16 lkey = key & 0x7FFF;
+ int ret;
+
+ if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF)) {
+ /* nothing to do; this key always valid */
+ ret = 0;
+ goto bail;
+ }
+
+ if (!lkey) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /*
+ * Set the full membership bit, because it has to be
+ * set in the register or the packet, and it seems
+ * cleaner to set in the register than to force all
+ * callers to set it.
+ */
+ key |= 0x8000;
+
+ for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) {
+ if (!rcd->pkeys[i] && pidx == -1)
+ pidx = i;
+ if (rcd->pkeys[i] == key) {
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (pidx == -1) {
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
+ if (!ppd->pkeys[i]) {
+ any++;
+ continue;
+ }
+ if (ppd->pkeys[i] == key) {
+ atomic_t *pkrefs = &ppd->pkeyrefs[i];
+
+ if (atomic_inc_return(pkrefs) > 1) {
+ rcd->pkeys[pidx] = key;
+ ret = 0;
+ goto bail;
+ } else {
+ /*
+ * lost race, decrement count, catch below
+ */
+ atomic_dec(pkrefs);
+ any++;
+ }
+ }
+ if ((ppd->pkeys[i] & 0x7FFF) == lkey) {
+ /*
+ * It makes no sense to have both the limited and
+ * full membership PKEY set at the same time since
+ * the unlimited one will disable the limited one.
+ */
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (!any) {
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
+ if (!ppd->pkeys[i] &&
+ atomic_inc_return(&ppd->pkeyrefs[i]) == 1) {
+ rcd->pkeys[pidx] = key;
+ ppd->pkeys[i] = key;
+ (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
+ ret = 0;
+ goto bail;
+ }
+ }
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_manage_rcvq - manage a context's receive queue
+ * @rcd: the context
+ * @subctxt: the subcontext
+ * @start_stop: action to carry out
+ *
+ * start_stop == 0 disables receive on the context, for use in queue
+ * overflow conditions. start_stop==1 re-enables, to be used to
+ * re-init the software copy of the head register
+ */
+static int qib_manage_rcvq(struct qib_ctxtdata *rcd, unsigned subctxt,
+ int start_stop)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned int rcvctrl_op;
+
+ if (subctxt)
+ goto bail;
+ /* atomically clear receive enable ctxt. */
+ if (start_stop) {
+ /*
+ * On enable, force in-memory copy of the tail register to
+ * 0, so that protocol code doesn't have to worry about
+ * whether or not the chip has yet updated the in-memory
+ * copy or not on return from the system call. The chip
+ * always resets it's tail register back to 0 on a
+ * transition from disabled to enabled.
+ */
+ if (rcd->rcvhdrtail_kvaddr)
+ qib_clear_rcvhdrtail(rcd);
+ rcvctrl_op = QIB_RCVCTRL_CTXT_ENB;
+ } else
+ rcvctrl_op = QIB_RCVCTRL_CTXT_DIS;
+ dd->f_rcvctrl(rcd->ppd, rcvctrl_op, rcd->ctxt);
+ /* always; new head should be equal to new tail; see above */
+bail:
+ return 0;
+}
+
+static void qib_clean_part_key(struct qib_ctxtdata *rcd,
+ struct qib_devdata *dd)
+{
+ int i, j, pchanged = 0;
+ u64 oldpkey;
+ struct qib_pportdata *ppd = rcd->ppd;
+
+ /* for debugging only */
+ oldpkey = (u64) ppd->pkeys[0] |
+ ((u64) ppd->pkeys[1] << 16) |
+ ((u64) ppd->pkeys[2] << 32) |
+ ((u64) ppd->pkeys[3] << 48);
+
+ for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) {
+ if (!rcd->pkeys[i])
+ continue;
+ for (j = 0; j < ARRAY_SIZE(ppd->pkeys); j++) {
+ /* check for match independent of the global bit */
+ if ((ppd->pkeys[j] & 0x7fff) !=
+ (rcd->pkeys[i] & 0x7fff))
+ continue;
+ if (atomic_dec_and_test(&ppd->pkeyrefs[j])) {
+ ppd->pkeys[j] = 0;
+ pchanged++;
+ }
+ break;
+ }
+ rcd->pkeys[i] = 0;
+ }
+ if (pchanged)
+ (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
+}
+
+/* common code for the mappings on dma_alloc_coherent mem */
+static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd,
+ unsigned len, void *kvaddr, u32 write_ok, char *what)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned long pfn;
+ int ret;
+
+ if ((vma->vm_end - vma->vm_start) > len) {
+ qib_devinfo(dd->pcidev,
+ "FAIL on %s: len %lx > %x\n", what,
+ vma->vm_end - vma->vm_start, len);
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ /*
+ * shared context user code requires rcvhdrq mapped r/w, others
+ * only allowed readonly mapping.
+ */
+ if (!write_ok) {
+ if (vma->vm_flags & VM_WRITE) {
+ qib_devinfo(dd->pcidev,
+ "%s must be mapped readonly\n", what);
+ ret = -EPERM;
+ goto bail;
+ }
+
+ /* don't allow them to later change with mprotect */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ }
+
+ pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, vma->vm_start, pfn,
+ len, vma->vm_page_prot);
+ if (ret)
+ qib_devinfo(dd->pcidev,
+ "%s ctxt%u mmap of %lx, %x bytes failed: %d\n",
+ what, rcd->ctxt, pfn, len, ret);
+bail:
+ return ret;
+}
+
+static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd,
+ u64 ureg)
+{
+ unsigned long phys;
+ unsigned long sz;
+ int ret;
+
+ /*
+ * This is real hardware, so use io_remap. This is the mechanism
+ * for the user process to update the head registers for their ctxt
+ * in the chip.
+ */
+ sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE;
+ if ((vma->vm_end - vma->vm_start) > sz) {
+ qib_devinfo(dd->pcidev,
+ "FAIL mmap userreg: reqlen %lx > PAGE\n",
+ vma->vm_end - vma->vm_start);
+ ret = -EFAULT;
+ } else {
+ phys = dd->physaddr + ureg;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ phys >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ }
+ return ret;
+}
+
+static int mmap_piobufs(struct vm_area_struct *vma,
+ struct qib_devdata *dd,
+ struct qib_ctxtdata *rcd,
+ unsigned piobufs, unsigned piocnt)
+{
+ unsigned long phys;
+ int ret;
+
+ /*
+ * When we map the PIO buffers in the chip, we want to map them as
+ * writeonly, no read possible; unfortunately, x86 doesn't allow
+ * for this in hardware, but we still prevent users from asking
+ * for it.
+ */
+ if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) {
+ qib_devinfo(dd->pcidev,
+ "FAIL mmap piobufs: reqlen %lx > PAGE\n",
+ vma->vm_end - vma->vm_start);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ phys = dd->physaddr + piobufs;
+
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+ pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
+ pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
+#endif
+
+ /*
+ * don't allow them to later change to readable with mprotect (for when
+ * not initially mapped readable, as is normally the case)
+ */
+ vma->vm_flags &= ~VM_MAYREAD;
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+
+ if (qib_wc_pat)
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+ ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+bail:
+ return ret;
+}
+
+static int mmap_rcvegrbufs(struct vm_area_struct *vma,
+ struct qib_ctxtdata *rcd)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned long start, size;
+ size_t total_size, i;
+ unsigned long pfn;
+ int ret;
+
+ size = rcd->rcvegrbuf_size;
+ total_size = rcd->rcvegrbuf_chunks * size;
+ if ((vma->vm_end - vma->vm_start) > total_size) {
+ qib_devinfo(dd->pcidev,
+ "FAIL on egr bufs: reqlen %lx > actual %lx\n",
+ vma->vm_end - vma->vm_start,
+ (unsigned long) total_size);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (vma->vm_flags & VM_WRITE) {
+ qib_devinfo(dd->pcidev,
+ "Can't map eager buffers as writable (flags=%lx)\n",
+ vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /* don't allow them to later change to writeable with mprotect */
+ vma->vm_flags &= ~VM_MAYWRITE;
+
+ start = vma->vm_start;
+
+ for (i = 0; i < rcd->rcvegrbuf_chunks; i++, start += size) {
+ pfn = virt_to_phys(rcd->rcvegrbuf[i]) >> PAGE_SHIFT;
+ ret = remap_pfn_range(vma, start, pfn, size,
+ vma->vm_page_prot);
+ if (ret < 0)
+ goto bail;
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/*
+ * qib_file_vma_fault - handle a VMA page fault.
+ */
+static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *page;
+
+ page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
+ if (!page)
+ return VM_FAULT_SIGBUS;
+
+ get_page(page);
+ vmf->page = page;
+
+ return 0;
+}
+
+static struct vm_operations_struct qib_file_vm_ops = {
+ .fault = qib_file_vma_fault,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+ struct qib_ctxtdata *rcd, unsigned subctxt)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned subctxt_cnt;
+ unsigned long len;
+ void *addr;
+ size_t size;
+ int ret = 0;
+
+ subctxt_cnt = rcd->subctxt_cnt;
+ size = rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size;
+
+ /*
+ * Each process has all the subctxt uregbase, rcvhdrq, and
+ * rcvegrbufs mmapped - as an array for all the processes,
+ * and also separately for this process.
+ */
+ if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase)) {
+ addr = rcd->subctxt_uregbase;
+ size = PAGE_SIZE * subctxt_cnt;
+ } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base)) {
+ addr = rcd->subctxt_rcvhdr_base;
+ size = rcd->rcvhdrq_size * subctxt_cnt;
+ } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf)) {
+ addr = rcd->subctxt_rcvegrbuf;
+ size *= subctxt_cnt;
+ } else if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase +
+ PAGE_SIZE * subctxt)) {
+ addr = rcd->subctxt_uregbase + PAGE_SIZE * subctxt;
+ size = PAGE_SIZE;
+ } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base +
+ rcd->rcvhdrq_size * subctxt)) {
+ addr = rcd->subctxt_rcvhdr_base +
+ rcd->rcvhdrq_size * subctxt;
+ size = rcd->rcvhdrq_size;
+ } else if (pgaddr == cvt_kvaddr(&rcd->user_event_mask[subctxt])) {
+ addr = rcd->user_event_mask;
+ size = PAGE_SIZE;
+ } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf +
+ size * subctxt)) {
+ addr = rcd->subctxt_rcvegrbuf + size * subctxt;
+ /* rcvegrbufs are read-only on the slave */
+ if (vma->vm_flags & VM_WRITE) {
+ qib_devinfo(dd->pcidev,
+ "Can't map eager buffers as "
+ "writable (flags=%lx)\n", vma->vm_flags);
+ ret = -EPERM;
+ goto bail;
+ }
+ /*
+ * Don't allow permission to later change to writeable
+ * with mprotect.
+ */
+ vma->vm_flags &= ~VM_MAYWRITE;
+ } else
+ goto bail;
+ len = vma->vm_end - vma->vm_start;
+ if (len > size) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+ vma->vm_ops = &qib_file_vm_ops;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_mmapf - mmap various structures into user space
+ * @fp: the file pointer
+ * @vma: the VM area
+ *
+ * We use this to have a shared buffer between the kernel and the user code
+ * for the rcvhdr queue, egr buffers, and the per-context user regs and pio
+ * buffers in the chip. We have the open and close entries so we can bump
+ * the ref count and keep the driver from being unloaded while still mapped.
+ */
+static int qib_mmapf(struct file *fp, struct vm_area_struct *vma)
+{
+ struct qib_ctxtdata *rcd;
+ struct qib_devdata *dd;
+ u64 pgaddr, ureg;
+ unsigned piobufs, piocnt;
+ int ret, match = 1;
+
+ rcd = ctxt_fp(fp);
+ if (!rcd || !(vma->vm_flags & VM_SHARED)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ dd = rcd->dd;
+
+ /*
+ * This is the qib_do_user_init() code, mapping the shared buffers
+ * and per-context user registers into the user process. The address
+ * referred to by vm_pgoff is the file offset passed via mmap().
+ * For shared contexts, this is the kernel vmalloc() address of the
+ * pages to share with the master.
+ * For non-shared or master ctxts, this is a physical address.
+ * We only do one mmap for each space mapped.
+ */
+ pgaddr = vma->vm_pgoff << PAGE_SHIFT;
+
+ /*
+ * Check for 0 in case one of the allocations failed, but user
+ * called mmap anyway.
+ */
+ if (!pgaddr) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /*
+ * Physical addresses must fit in 40 bits for our hardware.
+ * Check for kernel virtual addresses first, anything else must
+ * match a HW or memory address.
+ */
+ ret = mmap_kvaddr(vma, pgaddr, rcd, subctxt_fp(fp));
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ goto bail;
+ }
+
+ ureg = dd->uregbase + dd->ureg_align * rcd->ctxt;
+ if (!rcd->subctxt_cnt) {
+ /* ctxt is not shared */
+ piocnt = rcd->piocnt;
+ piobufs = rcd->piobufs;
+ } else if (!subctxt_fp(fp)) {
+ /* caller is the master */
+ piocnt = (rcd->piocnt / rcd->subctxt_cnt) +
+ (rcd->piocnt % rcd->subctxt_cnt);
+ piobufs = rcd->piobufs +
+ dd->palign * (rcd->piocnt - piocnt);
+ } else {
+ unsigned slave = subctxt_fp(fp) - 1;
+
+ /* caller is a slave */
+ piocnt = rcd->piocnt / rcd->subctxt_cnt;
+ piobufs = rcd->piobufs + dd->palign * piocnt * slave;
+ }
+
+ if (pgaddr == ureg)
+ ret = mmap_ureg(vma, dd, ureg);
+ else if (pgaddr == piobufs)
+ ret = mmap_piobufs(vma, dd, rcd, piobufs, piocnt);
+ else if (pgaddr == dd->pioavailregs_phys)
+ /* in-memory copy of pioavail registers */
+ ret = qib_mmap_mem(vma, rcd, PAGE_SIZE,
+ (void *) dd->pioavailregs_dma, 0,
+ "pioavail registers");
+ else if (pgaddr == rcd->rcvegr_phys)
+ ret = mmap_rcvegrbufs(vma, rcd);
+ else if (pgaddr == (u64) rcd->rcvhdrq_phys)
+ /*
+ * The rcvhdrq itself; multiple pages, contiguous
+ * from an i/o perspective. Shared contexts need
+ * to map r/w, so we allow writing.
+ */
+ ret = qib_mmap_mem(vma, rcd, rcd->rcvhdrq_size,
+ rcd->rcvhdrq, 1, "rcvhdrq");
+ else if (pgaddr == (u64) rcd->rcvhdrqtailaddr_phys)
+ /* in-memory copy of rcvhdrq tail register */
+ ret = qib_mmap_mem(vma, rcd, PAGE_SIZE,
+ rcd->rcvhdrtail_kvaddr, 0,
+ "rcvhdrq tail");
+ else
+ match = 0;
+ if (!match)
+ ret = -EINVAL;
+
+ vma->vm_private_data = NULL;
+
+ if (ret < 0)
+ qib_devinfo(dd->pcidev,
+ "mmap Failure %d: off %llx len %lx\n",
+ -ret, (unsigned long long)pgaddr,
+ vma->vm_end - vma->vm_start);
+bail:
+ return ret;
+}
+
+static unsigned int qib_poll_urgent(struct qib_ctxtdata *rcd,
+ struct file *fp,
+ struct poll_table_struct *pt)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned pollflag;
+
+ poll_wait(fp, &rcd->wait, pt);
+
+ spin_lock_irq(&dd->uctxt_lock);
+ if (rcd->urgent != rcd->urgent_poll) {
+ pollflag = POLLIN | POLLRDNORM;
+ rcd->urgent_poll = rcd->urgent;
+ } else {
+ pollflag = 0;
+ set_bit(QIB_CTXT_WAITING_URG, &rcd->flag);
+ }
+ spin_unlock_irq(&dd->uctxt_lock);
+
+ return pollflag;
+}
+
+static unsigned int qib_poll_next(struct qib_ctxtdata *rcd,
+ struct file *fp,
+ struct poll_table_struct *pt)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned pollflag;
+
+ poll_wait(fp, &rcd->wait, pt);
+
+ spin_lock_irq(&dd->uctxt_lock);
+ if (dd->f_hdrqempty(rcd)) {
+ set_bit(QIB_CTXT_WAITING_RCV, &rcd->flag);
+ dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_ENB, rcd->ctxt);
+ pollflag = 0;
+ } else
+ pollflag = POLLIN | POLLRDNORM;
+ spin_unlock_irq(&dd->uctxt_lock);
+
+ return pollflag;
+}
+
+static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt)
+{
+ struct qib_ctxtdata *rcd;
+ unsigned pollflag;
+
+ rcd = ctxt_fp(fp);
+ if (!rcd)
+ pollflag = POLLERR;
+ else if (rcd->poll_type == QIB_POLL_TYPE_URGENT)
+ pollflag = qib_poll_urgent(rcd, fp, pt);
+ else if (rcd->poll_type == QIB_POLL_TYPE_ANYRCV)
+ pollflag = qib_poll_next(rcd, fp, pt);
+ else /* invalid */
+ pollflag = POLLERR;
+
+ return pollflag;
+}
+
+static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
+{
+ struct qib_filedata *fd = fp->private_data;
+ const unsigned int weight = cpumask_weight(&current->cpus_allowed);
+ const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
+ int local_cpu;
+
+ /*
+ * If process has NOT already set it's affinity, select and
+ * reserve a processor for it on the local NUMA node.
+ */
+ if ((weight >= qib_cpulist_count) &&
+ (cpumask_weight(local_mask) <= qib_cpulist_count)) {
+ for_each_cpu(local_cpu, local_mask)
+ if (!test_and_set_bit(local_cpu, qib_cpulist)) {
+ fd->rec_cpu_num = local_cpu;
+ return;
+ }
+ }
+
+ /*
+ * If process has NOT already set it's affinity, select and
+ * reserve a processor for it, as a rendevous for all
+ * users of the driver. If they don't actually later
+ * set affinity to this cpu, or set it to some other cpu,
+ * it just means that sooner or later we don't recommend
+ * a cpu, and let the scheduler do it's best.
+ */
+ if (weight >= qib_cpulist_count) {
+ int cpu;
+ cpu = find_first_zero_bit(qib_cpulist,
+ qib_cpulist_count);
+ if (cpu == qib_cpulist_count)
+ qib_dev_err(dd,
+ "no cpus avail for affinity PID %u\n",
+ current->pid);
+ else {
+ __set_bit(cpu, qib_cpulist);
+ fd->rec_cpu_num = cpu;
+ }
+ }
+}
+
+/*
+ * Check that userland and driver are compatible for subcontexts.
+ */
+static int qib_compatible_subctxts(int user_swmajor, int user_swminor)
+{
+ /* this code is written long-hand for clarity */
+ if (QIB_USER_SWMAJOR != user_swmajor) {
+ /* no promise of compatibility if major mismatch */
+ return 0;
+ }
+ if (QIB_USER_SWMAJOR == 1) {
+ switch (QIB_USER_SWMINOR) {
+ case 0:
+ case 1:
+ case 2:
+ /* no subctxt implementation so cannot be compatible */
+ return 0;
+ case 3:
+ /* 3 is only compatible with itself */
+ return user_swminor == 3;
+ default:
+ /* >= 4 are compatible (or are expected to be) */
+ return user_swminor <= QIB_USER_SWMINOR;
+ }
+ }
+ /* make no promises yet for future major versions */
+ return 0;
+}
+
+static int init_subctxts(struct qib_devdata *dd,
+ struct qib_ctxtdata *rcd,
+ const struct qib_user_info *uinfo)
+{
+ int ret = 0;
+ unsigned num_subctxts;
+ size_t size;
+
+ /*
+ * If the user is requesting zero subctxts,
+ * skip the subctxt allocation.
+ */
+ if (uinfo->spu_subctxt_cnt <= 0)
+ goto bail;
+ num_subctxts = uinfo->spu_subctxt_cnt;
+
+ /* Check for subctxt compatibility */
+ if (!qib_compatible_subctxts(uinfo->spu_userversion >> 16,
+ uinfo->spu_userversion & 0xffff)) {
+ qib_devinfo(dd->pcidev,
+ "Mismatched user version (%d.%d) and driver "
+ "version (%d.%d) while context sharing. Ensure "
+ "that driver and library are from the same "
+ "release.\n",
+ (int) (uinfo->spu_userversion >> 16),
+ (int) (uinfo->spu_userversion & 0xffff),
+ QIB_USER_SWMAJOR, QIB_USER_SWMINOR);
+ goto bail;
+ }
+ if (num_subctxts > QLOGIC_IB_MAX_SUBCTXT) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts);
+ if (!rcd->subctxt_uregbase) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ /* Note: rcd->rcvhdrq_size isn't initialized yet. */
+ size = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE) * num_subctxts;
+ rcd->subctxt_rcvhdr_base = vmalloc_user(size);
+ if (!rcd->subctxt_rcvhdr_base) {
+ ret = -ENOMEM;
+ goto bail_ureg;
+ }
+
+ rcd->subctxt_rcvegrbuf = vmalloc_user(rcd->rcvegrbuf_chunks *
+ rcd->rcvegrbuf_size *
+ num_subctxts);
+ if (!rcd->subctxt_rcvegrbuf) {
+ ret = -ENOMEM;
+ goto bail_rhdr;
+ }
+
+ rcd->subctxt_cnt = uinfo->spu_subctxt_cnt;
+ rcd->subctxt_id = uinfo->spu_subctxt_id;
+ rcd->active_slaves = 1;
+ rcd->redirect_seq_cnt = 1;
+ set_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag);
+ goto bail;
+
+bail_rhdr:
+ vfree(rcd->subctxt_rcvhdr_base);
+bail_ureg:
+ vfree(rcd->subctxt_uregbase);
+ rcd->subctxt_uregbase = NULL;
+bail:
+ return ret;
+}
+
+static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
+ struct file *fp, const struct qib_user_info *uinfo)
+{
+ struct qib_filedata *fd = fp->private_data;
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_ctxtdata *rcd;
+ void *ptmp = NULL;
+ int ret;
+ int numa_id;
+
+ assign_ctxt_affinity(fp, dd);
+
+ numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ?
+ cpu_to_node(fd->rec_cpu_num) :
+ numa_node_id()) : dd->assigned_node_id;
+
+ rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
+
+ /*
+ * Allocate memory for use in qib_tid_update() at open to
+ * reduce cost of expected send setup per message segment
+ */
+ if (rcd)
+ ptmp = kmalloc(dd->rcvtidcnt * sizeof(u16) +
+ dd->rcvtidcnt * sizeof(struct page **),
+ GFP_KERNEL);
+
+ if (!rcd || !ptmp) {
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata memory, failing open\n");
+ ret = -ENOMEM;
+ goto bailerr;
+ }
+ rcd->userversion = uinfo->spu_userversion;
+ ret = init_subctxts(dd, rcd, uinfo);
+ if (ret)
+ goto bailerr;
+ rcd->tid_pg_list = ptmp;
+ rcd->pid = current->pid;
+ init_waitqueue_head(&dd->rcd[ctxt]->wait);
+ strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
+ ctxt_fp(fp) = rcd;
+ qib_stats.sps_ctxts++;
+ dd->freectxts--;
+ ret = 0;
+ goto bail;
+
+bailerr:
+ if (fd->rec_cpu_num != -1)
+ __clear_bit(fd->rec_cpu_num, qib_cpulist);
+
+ dd->rcd[ctxt] = NULL;
+ kfree(rcd);
+ kfree(ptmp);
+bail:
+ return ret;
+}
+
+static inline int usable(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+
+ return dd && (dd->flags & QIB_PRESENT) && dd->kregbase && ppd->lid &&
+ (ppd->lflags & QIBL_LINKACTIVE);
+}
+
+/*
+ * Select a context on the given device, either using a requested port
+ * or the port based on the context number.
+ */
+static int choose_port_ctxt(struct file *fp, struct qib_devdata *dd, u32 port,
+ const struct qib_user_info *uinfo)
+{
+ struct qib_pportdata *ppd = NULL;
+ int ret, ctxt;
+
+ if (port) {
+ if (!usable(dd->pport + port - 1)) {
+ ret = -ENETDOWN;
+ goto done;
+ } else
+ ppd = dd->pport + port - 1;
+ }
+ for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts && dd->rcd[ctxt];
+ ctxt++)
+ ;
+ if (ctxt == dd->cfgctxts) {
+ ret = -EBUSY;
+ goto done;
+ }
+ if (!ppd) {
+ u32 pidx = ctxt % dd->num_pports;
+ if (usable(dd->pport + pidx))
+ ppd = dd->pport + pidx;
+ else {
+ for (pidx = 0; pidx < dd->num_pports && !ppd;
+ pidx++)
+ if (usable(dd->pport + pidx))
+ ppd = dd->pport + pidx;
+ }
+ }
+ ret = ppd ? setup_ctxt(ppd, ctxt, fp, uinfo) : -ENETDOWN;
+done:
+ return ret;
+}
+
+static int find_free_ctxt(int unit, struct file *fp,
+ const struct qib_user_info *uinfo)
+{
+ struct qib_devdata *dd = qib_lookup(unit);
+ int ret;
+
+ if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports))
+ ret = -ENODEV;
+ else
+ ret = choose_port_ctxt(fp, dd, uinfo->spu_port, uinfo);
+
+ return ret;
+}
+
+static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
+ unsigned alg)
+{
+ struct qib_devdata *udd = NULL;
+ int ret = 0, devmax, npresent, nup, ndev, dusable = 0, i;
+ u32 port = uinfo->spu_port, ctxt;
+
+ devmax = qib_count_units(&npresent, &nup);
+ if (!npresent) {
+ ret = -ENXIO;
+ goto done;
+ }
+ if (nup == 0) {
+ ret = -ENETDOWN;
+ goto done;
+ }
+
+ if (alg == QIB_PORT_ALG_ACROSS) {
+ unsigned inuse = ~0U;
+ /* find device (with ACTIVE ports) with fewest ctxts in use */
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct qib_devdata *dd = qib_lookup(ndev);
+ unsigned cused = 0, cfree = 0, pusable = 0;
+ if (!dd)
+ continue;
+ if (port && port <= dd->num_pports &&
+ usable(dd->pport + port - 1))
+ pusable = 1;
+ else
+ for (i = 0; i < dd->num_pports; i++)
+ if (usable(dd->pport + i))
+ pusable++;
+ if (!pusable)
+ continue;
+ for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts;
+ ctxt++)
+ if (dd->rcd[ctxt])
+ cused++;
+ else
+ cfree++;
+ if (cfree && cused < inuse) {
+ udd = dd;
+ inuse = cused;
+ }
+ }
+ if (udd) {
+ ret = choose_port_ctxt(fp, udd, port, uinfo);
+ goto done;
+ }
+ } else {
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct qib_devdata *dd = qib_lookup(ndev);
+ if (dd) {
+ ret = choose_port_ctxt(fp, dd, port, uinfo);
+ if (!ret)
+ goto done;
+ if (ret == -EBUSY)
+ dusable++;
+ }
+ }
+ }
+ ret = dusable ? -EBUSY : -ENETDOWN;
+
+done:
+ return ret;
+}
+
+static int find_shared_ctxt(struct file *fp,
+ const struct qib_user_info *uinfo)
+{
+ int devmax, ndev, i;
+ int ret = 0;
+
+ devmax = qib_count_units(NULL, NULL);
+
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct qib_devdata *dd = qib_lookup(ndev);
+
+ /* device portion of usable() */
+ if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase))
+ continue;
+ for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
+ struct qib_ctxtdata *rcd = dd->rcd[i];
+
+ /* Skip ctxts which are not yet open */
+ if (!rcd || !rcd->cnt)
+ continue;
+ /* Skip ctxt if it doesn't match the requested one */
+ if (rcd->subctxt_id != uinfo->spu_subctxt_id)
+ continue;
+ /* Verify the sharing process matches the master */
+ if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
+ rcd->userversion != uinfo->spu_userversion ||
+ rcd->cnt >= rcd->subctxt_cnt) {
+ ret = -EINVAL;
+ goto done;
+ }
+ ctxt_fp(fp) = rcd;
+ subctxt_fp(fp) = rcd->cnt++;
+ rcd->subpid[subctxt_fp(fp)] = current->pid;
+ tidcursor_fp(fp) = 0;
+ rcd->active_slaves |= 1 << subctxt_fp(fp);
+ ret = 1;
+ goto done;
+ }
+ }
+
+done:
+ return ret;
+}
+
+static int qib_open(struct inode *in, struct file *fp)
+{
+ /* The real work is performed later in qib_assign_ctxt() */
+ fp->private_data = kzalloc(sizeof(struct qib_filedata), GFP_KERNEL);
+ if (fp->private_data) /* no cpu affinity by default */
+ ((struct qib_filedata *)fp->private_data)->rec_cpu_num = -1;
+ return fp->private_data ? 0 : -ENOMEM;
+}
+
+static int find_hca(unsigned int cpu, int *unit)
+{
+ int ret = 0, devmax, npresent, nup, ndev;
+
+ *unit = -1;
+
+ devmax = qib_count_units(&npresent, &nup);
+ if (!npresent) {
+ ret = -ENXIO;
+ goto done;
+ }
+ if (!nup) {
+ ret = -ENETDOWN;
+ goto done;
+ }
+ for (ndev = 0; ndev < devmax; ndev++) {
+ struct qib_devdata *dd = qib_lookup(ndev);
+ if (dd) {
+ if (pcibus_to_node(dd->pcidev->bus) < 0) {
+ ret = -EINVAL;
+ goto done;
+ }
+ if (cpu_to_node(cpu) ==
+ pcibus_to_node(dd->pcidev->bus)) {
+ *unit = ndev;
+ goto done;
+ }
+ }
+ }
+done:
+ return ret;
+}
+
+static int do_qib_user_sdma_queue_create(struct file *fp)
+{
+ struct qib_filedata *fd = fp->private_data;
+ struct qib_ctxtdata *rcd = fd->rcd;
+ struct qib_devdata *dd = rcd->dd;
+
+ if (dd->flags & QIB_HAS_SEND_DMA) {
+
+ fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
+ dd->unit,
+ rcd->ctxt,
+ fd->subctxt);
+ if (!fd->pq)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Get ctxt early, so can set affinity prior to memory allocation.
+ */
+static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+{
+ int ret;
+ int i_minor;
+ unsigned swmajor, swminor, alg = QIB_PORT_ALG_ACROSS;
+
+ /* Check to be sure we haven't already initialized this file */
+ if (ctxt_fp(fp)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* for now, if major version is different, bail */
+ swmajor = uinfo->spu_userversion >> 16;
+ if (swmajor != QIB_USER_SWMAJOR) {
+ ret = -ENODEV;
+ goto done;
+ }
+
+ swminor = uinfo->spu_userversion & 0xffff;
+
+ if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
+ alg = uinfo->spu_port_alg;
+
+ mutex_lock(&qib_mutex);
+
+ if (qib_compatible_subctxts(swmajor, swminor) &&
+ uinfo->spu_subctxt_cnt) {
+ ret = find_shared_ctxt(fp, uinfo);
+ if (ret > 0) {
+ ret = do_qib_user_sdma_queue_create(fp);
+ if (!ret)
+ assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
+ goto done_ok;
+ }
+ }
+
+ i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
+ if (i_minor)
+ ret = find_free_ctxt(i_minor - 1, fp, uinfo);
+ else {
+ int unit;
+ const unsigned int cpu = cpumask_first(&current->cpus_allowed);
+ const unsigned int weight =
+ cpumask_weight(&current->cpus_allowed);
+
+ if (weight == 1 && !test_bit(cpu, qib_cpulist))
+ if (!find_hca(cpu, &unit) && unit >= 0)
+ if (!find_free_ctxt(unit, fp, uinfo)) {
+ ret = 0;
+ goto done_chk_sdma;
+ }
+ ret = get_a_ctxt(fp, uinfo, alg);
+ }
+
+done_chk_sdma:
+ if (!ret)
+ ret = do_qib_user_sdma_queue_create(fp);
+done_ok:
+ mutex_unlock(&qib_mutex);
+
+done:
+ return ret;
+}
+
+
+static int qib_do_user_init(struct file *fp,
+ const struct qib_user_info *uinfo)
+{
+ int ret;
+ struct qib_ctxtdata *rcd = ctxt_fp(fp);
+ struct qib_devdata *dd;
+ unsigned uctxt;
+
+ /* Subctxts don't need to initialize anything since master did it. */
+ if (subctxt_fp(fp)) {
+ ret = wait_event_interruptible(rcd->wait,
+ !test_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag));
+ goto bail;
+ }
+
+ dd = rcd->dd;
+
+ /* some ctxts may get extra buffers, calculate that here */
+ uctxt = rcd->ctxt - dd->first_user_ctxt;
+ if (uctxt < dd->ctxts_extrabuf) {
+ rcd->piocnt = dd->pbufsctxt + 1;
+ rcd->pio_base = rcd->piocnt * uctxt;
+ } else {
+ rcd->piocnt = dd->pbufsctxt;
+ rcd->pio_base = rcd->piocnt * uctxt +
+ dd->ctxts_extrabuf;
+ }
+
+ /*
+ * All user buffers are 2KB buffers. If we ever support
+ * giving 4KB buffers to user processes, this will need some
+ * work. Can't use piobufbase directly, because it has
+ * both 2K and 4K buffer base values. So check and handle.
+ */
+ if ((rcd->pio_base + rcd->piocnt) > dd->piobcnt2k) {
+ if (rcd->pio_base >= dd->piobcnt2k) {
+ qib_dev_err(dd,
+ "%u:ctxt%u: no 2KB buffers available\n",
+ dd->unit, rcd->ctxt);
+ ret = -ENOBUFS;
+ goto bail;
+ }
+ rcd->piocnt = dd->piobcnt2k - rcd->pio_base;
+ qib_dev_err(dd, "Ctxt%u: would use 4KB bufs, using %u\n",
+ rcd->ctxt, rcd->piocnt);
+ }
+
+ rcd->piobufs = dd->pio2k_bufbase + rcd->pio_base * dd->palign;
+ qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt,
+ TXCHK_CHG_TYPE_USER, rcd);
+ /*
+ * try to ensure that processes start up with consistent avail update
+ * for their own range, at least. If system very quiet, it might
+ * have the in-memory copy out of date at startup for this range of
+ * buffers, when a context gets re-used. Do after the chg_pioavail
+ * and before the rest of setup, so it's "almost certain" the dma
+ * will have occurred (can't 100% guarantee, but should be many
+ * decimals of 9s, with this ordering), given how much else happens
+ * after this.
+ */
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+
+ /*
+ * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+ * array for time being. If rcd->ctxt > chip-supported,
+ * we need to do extra stuff here to handle by handling overflow
+ * through ctxt 0, someday
+ */
+ ret = qib_create_rcvhdrq(dd, rcd);
+ if (!ret)
+ ret = qib_setup_eagerbufs(rcd);
+ if (ret)
+ goto bail_pio;
+
+ rcd->tidcursor = 0; /* start at beginning after open */
+
+ /* initialize poll variables... */
+ rcd->urgent = 0;
+ rcd->urgent_poll = 0;
+
+ /*
+ * Now enable the ctxt for receive.
+ * For chips that are set to DMA the tail register to memory
+ * when they change (and when the update bit transitions from
+ * 0 to 1. So for those chips, we turn it off and then back on.
+ * This will (very briefly) affect any other open ctxts, but the
+ * duration is very short, and therefore isn't an issue. We
+ * explicitly set the in-memory tail copy to 0 beforehand, so we
+ * don't have to wait to be sure the DMA update has happened
+ * (chip resets head/tail to 0 on transition to enable).
+ */
+ if (rcd->rcvhdrtail_kvaddr)
+ qib_clear_rcvhdrtail(rcd);
+
+ dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_TIDFLOW_ENB,
+ rcd->ctxt);
+
+ /* Notify any waiting slaves */
+ if (rcd->subctxt_cnt) {
+ clear_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag);
+ wake_up(&rcd->wait);
+ }
+ return 0;
+
+bail_pio:
+ qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt,
+ TXCHK_CHG_TYPE_KERN, rcd);
+bail:
+ return ret;
+}
+
+/**
+ * unlock_exptid - unlock any expected TID entries context still had in use
+ * @rcd: ctxt
+ *
+ * We don't actually update the chip here, because we do a bulk update
+ * below, using f_clear_tids.
+ */
+static void unlock_expected_tids(struct qib_ctxtdata *rcd)
+{
+ struct qib_devdata *dd = rcd->dd;
+ int ctxt_tidbase = rcd->ctxt * dd->rcvtidcnt;
+ int i, cnt = 0, maxtid = ctxt_tidbase + dd->rcvtidcnt;
+
+ for (i = ctxt_tidbase; i < maxtid; i++) {
+ struct page *p = dd->pageshadow[i];
+ dma_addr_t phys;
+
+ if (!p)
+ continue;
+
+ phys = dd->physshadow[i];
+ dd->physshadow[i] = dd->tidinvalid;
+ dd->pageshadow[i] = NULL;
+ pci_unmap_page(dd->pcidev, phys, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ qib_release_user_pages(&p, 1);
+ cnt++;
+ }
+}
+
+static int qib_close(struct inode *in, struct file *fp)
+{
+ int ret = 0;
+ struct qib_filedata *fd;
+ struct qib_ctxtdata *rcd;
+ struct qib_devdata *dd;
+ unsigned long flags;
+ unsigned ctxt;
+ pid_t pid;
+
+ mutex_lock(&qib_mutex);
+
+ fd = fp->private_data;
+ fp->private_data = NULL;
+ rcd = fd->rcd;
+ if (!rcd) {
+ mutex_unlock(&qib_mutex);
+ goto bail;
+ }
+
+ dd = rcd->dd;
+
+ /* ensure all pio buffer writes in progress are flushed */
+ qib_flush_wc();
+
+ /* drain user sdma queue */
+ if (fd->pq) {
+ qib_user_sdma_queue_drain(rcd->ppd, fd->pq);
+ qib_user_sdma_queue_destroy(fd->pq);
+ }
+
+ if (fd->rec_cpu_num != -1)
+ __clear_bit(fd->rec_cpu_num, qib_cpulist);
+
+ if (--rcd->cnt) {
+ /*
+ * XXX If the master closes the context before the slave(s),
+ * revoke the mmap for the eager receive queue so
+ * the slave(s) don't wait for receive data forever.
+ */
+ rcd->active_slaves &= ~(1 << fd->subctxt);
+ rcd->subpid[fd->subctxt] = 0;
+ mutex_unlock(&qib_mutex);
+ goto bail;
+ }
+
+ /* early; no interrupt users after this */
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ ctxt = rcd->ctxt;
+ dd->rcd[ctxt] = NULL;
+ pid = rcd->pid;
+ rcd->pid = 0;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ if (rcd->rcvwait_to || rcd->piowait_to ||
+ rcd->rcvnowait || rcd->pionowait) {
+ rcd->rcvwait_to = 0;
+ rcd->piowait_to = 0;
+ rcd->rcvnowait = 0;
+ rcd->pionowait = 0;
+ }
+ if (rcd->flag)
+ rcd->flag = 0;
+
+ if (dd->kregbase) {
+ /* atomically clear receive enable ctxt and intr avail. */
+ dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_DIS |
+ QIB_RCVCTRL_INTRAVAIL_DIS, ctxt);
+
+ /* clean up the pkeys for this ctxt user */
+ qib_clean_part_key(rcd, dd);
+ qib_disarm_piobufs(dd, rcd->pio_base, rcd->piocnt);
+ qib_chg_pioavailkernel(dd, rcd->pio_base,
+ rcd->piocnt, TXCHK_CHG_TYPE_KERN, NULL);
+
+ dd->f_clear_tids(dd, rcd);
+
+ if (dd->pageshadow)
+ unlock_expected_tids(rcd);
+ qib_stats.sps_ctxts--;
+ dd->freectxts++;
+ }
+
+ mutex_unlock(&qib_mutex);
+ qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */
+
+bail:
+ kfree(fd);
+ return ret;
+}
+
+static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo)
+{
+ struct qib_ctxt_info info;
+ int ret;
+ size_t sz;
+ struct qib_ctxtdata *rcd = ctxt_fp(fp);
+ struct qib_filedata *fd;
+
+ fd = fp->private_data;
+
+ info.num_active = qib_count_active_units();
+ info.unit = rcd->dd->unit;
+ info.port = rcd->ppd->port;
+ info.ctxt = rcd->ctxt;
+ info.subctxt = subctxt_fp(fp);
+ /* Number of user ctxts available for this device. */
+ info.num_ctxts = rcd->dd->cfgctxts - rcd->dd->first_user_ctxt;
+ info.num_subctxts = rcd->subctxt_cnt;
+ info.rec_cpu = fd->rec_cpu_num;
+ sz = sizeof(info);
+
+ if (copy_to_user(uinfo, &info, sz)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static int qib_sdma_get_inflight(struct qib_user_sdma_queue *pq,
+ u32 __user *inflightp)
+{
+ const u32 val = qib_user_sdma_inflight_counter(pq);
+
+ if (put_user(val, inflightp))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int qib_sdma_get_complete(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ u32 __user *completep)
+{
+ u32 val;
+ int err;
+
+ if (!pq)
+ return -EINVAL;
+
+ err = qib_user_sdma_make_progress(ppd, pq);
+ if (err < 0)
+ return err;
+
+ val = qib_user_sdma_complete_counter(pq);
+ if (put_user(val, completep))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int disarm_req_delay(struct qib_ctxtdata *rcd)
+{
+ int ret = 0;
+
+ if (!usable(rcd->ppd)) {
+ int i;
+ /*
+ * if link is down, or otherwise not usable, delay
+ * the caller up to 30 seconds, so we don't thrash
+ * in trying to get the chip back to ACTIVE, and
+ * set flag so they make the call again.
+ */
+ if (rcd->user_event_mask) {
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt, if any
+ */
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[i]);
+ }
+ for (i = 0; !usable(rcd->ppd) && i < 300; i++)
+ msleep(100);
+ ret = -ENETDOWN;
+ }
+ return ret;
+}
+
+/*
+ * Find all user contexts in use, and set the specified bit in their
+ * event mask.
+ * See also find_ctxt() for a similar use, that is specific to send buffers.
+ */
+int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit)
+{
+ struct qib_ctxtdata *rcd;
+ unsigned ctxt;
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->dd->uctxt_lock, flags);
+ for (ctxt = ppd->dd->first_user_ctxt; ctxt < ppd->dd->cfgctxts;
+ ctxt++) {
+ rcd = ppd->dd->rcd[ctxt];
+ if (!rcd)
+ continue;
+ if (rcd->user_event_mask) {
+ int i;
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt, if any
+ */
+ set_bit(evtbit, &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ set_bit(evtbit, &rcd->user_event_mask[i]);
+ }
+ ret = 1;
+ break;
+ }
+ spin_unlock_irqrestore(&ppd->dd->uctxt_lock, flags);
+
+ return ret;
+}
+
+/*
+ * clear the event notifier events for this context.
+ * For the DISARM_BUFS case, we also take action (this obsoletes
+ * the older QIB_CMD_DISARM_BUFS, but we keep it for backwards
+ * compatibility.
+ * Other bits don't currently require actions, just atomically clear.
+ * User process then performs actions appropriate to bit having been
+ * set, if desired, and checks again in future.
+ */
+static int qib_user_event_ack(struct qib_ctxtdata *rcd, int subctxt,
+ unsigned long events)
+{
+ int ret = 0, i;
+
+ for (i = 0; i <= _QIB_MAX_EVENT_BIT; i++) {
+ if (!test_bit(i, &events))
+ continue;
+ if (i == _QIB_EVENT_DISARM_BUFS_BIT) {
+ (void)qib_disarm_piobufs_ifneeded(rcd);
+ ret = disarm_req_delay(rcd);
+ } else
+ clear_bit(i, &rcd->user_event_mask[subctxt]);
+ }
+ return ret;
+}
+
+static ssize_t qib_write(struct file *fp, const char __user *data,
+ size_t count, loff_t *off)
+{
+ const struct qib_cmd __user *ucmd;
+ struct qib_ctxtdata *rcd;
+ const void __user *src;
+ size_t consumed, copy = 0;
+ struct qib_cmd cmd;
+ ssize_t ret = 0;
+ void *dest;
+
+ if (count < sizeof(cmd.type)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ ucmd = (const struct qib_cmd __user *) data;
+
+ if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+
+ consumed = sizeof(cmd.type);
+
+ switch (cmd.type) {
+ case QIB_CMD_ASSIGN_CTXT:
+ case QIB_CMD_USER_INIT:
+ copy = sizeof(cmd.cmd.user_info);
+ dest = &cmd.cmd.user_info;
+ src = &ucmd->cmd.user_info;
+ break;
+
+ case QIB_CMD_RECV_CTRL:
+ copy = sizeof(cmd.cmd.recv_ctrl);
+ dest = &cmd.cmd.recv_ctrl;
+ src = &ucmd->cmd.recv_ctrl;
+ break;
+
+ case QIB_CMD_CTXT_INFO:
+ copy = sizeof(cmd.cmd.ctxt_info);
+ dest = &cmd.cmd.ctxt_info;
+ src = &ucmd->cmd.ctxt_info;
+ break;
+
+ case QIB_CMD_TID_UPDATE:
+ case QIB_CMD_TID_FREE:
+ copy = sizeof(cmd.cmd.tid_info);
+ dest = &cmd.cmd.tid_info;
+ src = &ucmd->cmd.tid_info;
+ break;
+
+ case QIB_CMD_SET_PART_KEY:
+ copy = sizeof(cmd.cmd.part_key);
+ dest = &cmd.cmd.part_key;
+ src = &ucmd->cmd.part_key;
+ break;
+
+ case QIB_CMD_DISARM_BUFS:
+ case QIB_CMD_PIOAVAILUPD: /* force an update of PIOAvail reg */
+ copy = 0;
+ src = NULL;
+ dest = NULL;
+ break;
+
+ case QIB_CMD_POLL_TYPE:
+ copy = sizeof(cmd.cmd.poll_type);
+ dest = &cmd.cmd.poll_type;
+ src = &ucmd->cmd.poll_type;
+ break;
+
+ case QIB_CMD_ARMLAUNCH_CTRL:
+ copy = sizeof(cmd.cmd.armlaunch_ctrl);
+ dest = &cmd.cmd.armlaunch_ctrl;
+ src = &ucmd->cmd.armlaunch_ctrl;
+ break;
+
+ case QIB_CMD_SDMA_INFLIGHT:
+ copy = sizeof(cmd.cmd.sdma_inflight);
+ dest = &cmd.cmd.sdma_inflight;
+ src = &ucmd->cmd.sdma_inflight;
+ break;
+
+ case QIB_CMD_SDMA_COMPLETE:
+ copy = sizeof(cmd.cmd.sdma_complete);
+ dest = &cmd.cmd.sdma_complete;
+ src = &ucmd->cmd.sdma_complete;
+ break;
+
+ case QIB_CMD_ACK_EVENT:
+ copy = sizeof(cmd.cmd.event_mask);
+ dest = &cmd.cmd.event_mask;
+ src = &ucmd->cmd.event_mask;
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (copy) {
+ if ((count - consumed) < copy) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ if (copy_from_user(dest, src, copy)) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ consumed += copy;
+ }
+
+ rcd = ctxt_fp(fp);
+ if (!rcd && cmd.type != QIB_CMD_ASSIGN_CTXT) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ switch (cmd.type) {
+ case QIB_CMD_ASSIGN_CTXT:
+ ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ break;
+
+ case QIB_CMD_USER_INIT:
+ ret = qib_do_user_init(fp, &cmd.cmd.user_info);
+ if (ret)
+ goto bail;
+ ret = qib_get_base_info(fp, (void __user *) (unsigned long)
+ cmd.cmd.user_info.spu_base_info,
+ cmd.cmd.user_info.spu_base_info_size);
+ break;
+
+ case QIB_CMD_RECV_CTRL:
+ ret = qib_manage_rcvq(rcd, subctxt_fp(fp), cmd.cmd.recv_ctrl);
+ break;
+
+ case QIB_CMD_CTXT_INFO:
+ ret = qib_ctxt_info(fp, (struct qib_ctxt_info __user *)
+ (unsigned long) cmd.cmd.ctxt_info);
+ break;
+
+ case QIB_CMD_TID_UPDATE:
+ ret = qib_tid_update(rcd, fp, &cmd.cmd.tid_info);
+ break;
+
+ case QIB_CMD_TID_FREE:
+ ret = qib_tid_free(rcd, subctxt_fp(fp), &cmd.cmd.tid_info);
+ break;
+
+ case QIB_CMD_SET_PART_KEY:
+ ret = qib_set_part_key(rcd, cmd.cmd.part_key);
+ break;
+
+ case QIB_CMD_DISARM_BUFS:
+ (void)qib_disarm_piobufs_ifneeded(rcd);
+ ret = disarm_req_delay(rcd);
+ break;
+
+ case QIB_CMD_PIOAVAILUPD:
+ qib_force_pio_avail_update(rcd->dd);
+ break;
+
+ case QIB_CMD_POLL_TYPE:
+ rcd->poll_type = cmd.cmd.poll_type;
+ break;
+
+ case QIB_CMD_ARMLAUNCH_CTRL:
+ rcd->dd->f_set_armlaunch(rcd->dd, cmd.cmd.armlaunch_ctrl);
+ break;
+
+ case QIB_CMD_SDMA_INFLIGHT:
+ ret = qib_sdma_get_inflight(user_sdma_queue_fp(fp),
+ (u32 __user *) (unsigned long)
+ cmd.cmd.sdma_inflight);
+ break;
+
+ case QIB_CMD_SDMA_COMPLETE:
+ ret = qib_sdma_get_complete(rcd->ppd,
+ user_sdma_queue_fp(fp),
+ (u32 __user *) (unsigned long)
+ cmd.cmd.sdma_complete);
+ break;
+
+ case QIB_CMD_ACK_EVENT:
+ ret = qib_user_event_ack(rcd, subctxt_fp(fp),
+ cmd.cmd.event_mask);
+ break;
+ }
+
+ if (ret >= 0)
+ ret = consumed;
+
+bail:
+ return ret;
+}
+
+static ssize_t qib_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long dim, loff_t off)
+{
+ struct qib_filedata *fp = iocb->ki_filp->private_data;
+ struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp);
+ struct qib_user_sdma_queue *pq = fp->pq;
+
+ if (!dim || !pq)
+ return -EINVAL;
+
+ return qib_user_sdma_writev(rcd, pq, iov, dim);
+}
+
+static struct class *qib_class;
+static dev_t qib_dev;
+
+int qib_cdev_init(int minor, const char *name,
+ const struct file_operations *fops,
+ struct cdev **cdevp, struct device **devp)
+{
+ const dev_t dev = MKDEV(MAJOR(qib_dev), minor);
+ struct cdev *cdev;
+ struct device *device = NULL;
+ int ret;
+
+ cdev = cdev_alloc();
+ if (!cdev) {
+ pr_err("Could not allocate cdev for minor %d, %s\n",
+ minor, name);
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ cdev->owner = THIS_MODULE;
+ cdev->ops = fops;
+ kobject_set_name(&cdev->kobj, name);
+
+ ret = cdev_add(cdev, dev, 1);
+ if (ret < 0) {
+ pr_err("Could not add cdev for minor %d, %s (err %d)\n",
+ minor, name, -ret);
+ goto err_cdev;
+ }
+
+ device = device_create(qib_class, NULL, dev, NULL, "%s", name);
+ if (!IS_ERR(device))
+ goto done;
+ ret = PTR_ERR(device);
+ device = NULL;
+ pr_err("Could not create device for minor %d, %s (err %d)\n",
+ minor, name, -ret);
+err_cdev:
+ cdev_del(cdev);
+ cdev = NULL;
+done:
+ *cdevp = cdev;
+ *devp = device;
+ return ret;
+}
+
+void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp)
+{
+ struct device *device = *devp;
+
+ if (device) {
+ device_unregister(device);
+ *devp = NULL;
+ }
+
+ if (*cdevp) {
+ cdev_del(*cdevp);
+ *cdevp = NULL;
+ }
+}
+
+static struct cdev *wildcard_cdev;
+static struct device *wildcard_device;
+
+int __init qib_dev_init(void)
+{
+ int ret;
+
+ ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME);
+ if (ret < 0) {
+ pr_err("Could not allocate chrdev region (err %d)\n", -ret);
+ goto done;
+ }
+
+ qib_class = class_create(THIS_MODULE, "ipath");
+ if (IS_ERR(qib_class)) {
+ ret = PTR_ERR(qib_class);
+ pr_err("Could not create device class (err %d)\n", -ret);
+ unregister_chrdev_region(qib_dev, QIB_NMINORS);
+ }
+
+done:
+ return ret;
+}
+
+void qib_dev_cleanup(void)
+{
+ if (qib_class) {
+ class_destroy(qib_class);
+ qib_class = NULL;
+ }
+
+ unregister_chrdev_region(qib_dev, QIB_NMINORS);
+}
+
+static atomic_t user_count = ATOMIC_INIT(0);
+
+static void qib_user_remove(struct qib_devdata *dd)
+{
+ if (atomic_dec_return(&user_count) == 0)
+ qib_cdev_cleanup(&wildcard_cdev, &wildcard_device);
+
+ qib_cdev_cleanup(&dd->user_cdev, &dd->user_device);
+}
+
+static int qib_user_add(struct qib_devdata *dd)
+{
+ char name[10];
+ int ret;
+
+ if (atomic_inc_return(&user_count) == 1) {
+ ret = qib_cdev_init(0, "ipath", &qib_file_ops,
+ &wildcard_cdev, &wildcard_device);
+ if (ret)
+ goto done;
+ }
+
+ snprintf(name, sizeof(name), "ipath%d", dd->unit);
+ ret = qib_cdev_init(dd->unit + 1, name, &qib_file_ops,
+ &dd->user_cdev, &dd->user_device);
+ if (ret)
+ qib_user_remove(dd);
+done:
+ return ret;
+}
+
+/*
+ * Create per-unit files in /dev
+ */
+int qib_device_create(struct qib_devdata *dd)
+{
+ int r, ret;
+
+ r = qib_user_add(dd);
+ ret = qib_diag_add(dd);
+ if (r && !ret)
+ ret = r;
+ return ret;
+}
+
+/*
+ * Remove per-unit files in /dev
+ * void, core kernel returns no errors for this stuff
+ */
+void qib_device_remove(struct qib_devdata *dd)
+{
+ qib_user_remove(dd);
+ qib_diag_remove(dd);
+}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
new file mode 100644
index 00000000000..cab610ccd50
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/namei.h>
+
+#include "qib.h"
+
+#define QIBFS_MAGIC 0x726a77
+
+static struct super_block *qib_super;
+
+#define private2dd(file) (file_inode(file)->i_private)
+
+static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
+ umode_t mode, const struct file_operations *fops,
+ void *data)
+{
+ int error;
+ struct inode *inode = new_inode(dir->i_sb);
+
+ if (!inode) {
+ error = -EPERM;
+ goto bail;
+ }
+
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_blocks = 0;
+ inode->i_atime = CURRENT_TIME;
+ inode->i_mtime = inode->i_atime;
+ inode->i_ctime = inode->i_atime;
+ inode->i_private = data;
+ if (S_ISDIR(mode)) {
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ inc_nlink(dir);
+ }
+
+ inode->i_fop = fops;
+
+ d_instantiate(dentry, inode);
+ error = 0;
+
+bail:
+ return error;
+}
+
+static int create_file(const char *name, umode_t mode,
+ struct dentry *parent, struct dentry **dentry,
+ const struct file_operations *fops, void *data)
+{
+ int error;
+
+ *dentry = NULL;
+ mutex_lock(&parent->d_inode->i_mutex);
+ *dentry = lookup_one_len(name, parent, strlen(name));
+ if (!IS_ERR(*dentry))
+ error = qibfs_mknod(parent->d_inode, *dentry,
+ mode, fops, data);
+ else
+ error = PTR_ERR(*dentry);
+ mutex_unlock(&parent->d_inode->i_mutex);
+
+ return error;
+}
+
+static ssize_t driver_stats_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ qib_stats.sps_ints = qib_sps_ints();
+ return simple_read_from_buffer(buf, count, ppos, &qib_stats,
+ sizeof qib_stats);
+}
+
+/*
+ * driver stats field names, one line per stat, single string. Used by
+ * programs like ipathstats to print the stats in a way which works for
+ * different versions of drivers, without changing program source.
+ * if qlogic_ib_stats changes, this needs to change. Names need to be
+ * 12 chars or less (w/o newline), for proper display by ipathstats utility.
+ */
+static const char qib_statnames[] =
+ "KernIntr\n"
+ "ErrorIntr\n"
+ "Tx_Errs\n"
+ "Rcv_Errs\n"
+ "H/W_Errs\n"
+ "NoPIOBufs\n"
+ "CtxtsOpen\n"
+ "RcvLen_Errs\n"
+ "EgrBufFull\n"
+ "EgrHdrFull\n"
+ ;
+
+static ssize_t driver_names_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return simple_read_from_buffer(buf, count, ppos, qib_statnames,
+ sizeof qib_statnames - 1); /* no null */
+}
+
+static const struct file_operations driver_ops[] = {
+ { .read = driver_stats_read, .llseek = generic_file_llseek, },
+ { .read = driver_names_read, .llseek = generic_file_llseek, },
+};
+
+/* read the per-device counters */
+static ssize_t dev_counters_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 *counters;
+ size_t avail;
+ struct qib_devdata *dd = private2dd(file);
+
+ avail = dd->f_read_cntrs(dd, *ppos, NULL, &counters);
+ return simple_read_from_buffer(buf, count, ppos, counters, avail);
+}
+
+/* read the per-device counters */
+static ssize_t dev_names_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ char *names;
+ size_t avail;
+ struct qib_devdata *dd = private2dd(file);
+
+ avail = dd->f_read_cntrs(dd, *ppos, &names, NULL);
+ return simple_read_from_buffer(buf, count, ppos, names, avail);
+}
+
+static const struct file_operations cntr_ops[] = {
+ { .read = dev_counters_read, .llseek = generic_file_llseek, },
+ { .read = dev_names_read, .llseek = generic_file_llseek, },
+};
+
+/*
+ * Could use file_inode(file)->i_ino to figure out which file,
+ * instead of separate routine for each, but for now, this works...
+ */
+
+/* read the per-port names (same for each port) */
+static ssize_t portnames_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ char *names;
+ size_t avail;
+ struct qib_devdata *dd = private2dd(file);
+
+ avail = dd->f_read_portcntrs(dd, *ppos, 0, &names, NULL);
+ return simple_read_from_buffer(buf, count, ppos, names, avail);
+}
+
+/* read the per-port counters for port 1 (pidx 0) */
+static ssize_t portcntrs_1_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 *counters;
+ size_t avail;
+ struct qib_devdata *dd = private2dd(file);
+
+ avail = dd->f_read_portcntrs(dd, *ppos, 0, NULL, &counters);
+ return simple_read_from_buffer(buf, count, ppos, counters, avail);
+}
+
+/* read the per-port counters for port 2 (pidx 1) */
+static ssize_t portcntrs_2_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 *counters;
+ size_t avail;
+ struct qib_devdata *dd = private2dd(file);
+
+ avail = dd->f_read_portcntrs(dd, *ppos, 1, NULL, &counters);
+ return simple_read_from_buffer(buf, count, ppos, counters, avail);
+}
+
+static const struct file_operations portcntr_ops[] = {
+ { .read = portnames_read, .llseek = generic_file_llseek, },
+ { .read = portcntrs_1_read, .llseek = generic_file_llseek, },
+ { .read = portcntrs_2_read, .llseek = generic_file_llseek, },
+};
+
+/*
+ * read the per-port QSFP data for port 1 (pidx 0)
+ */
+static ssize_t qsfp_1_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct qib_devdata *dd = private2dd(file);
+ char *tmp;
+ int ret;
+
+ tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ ret = qib_qsfp_dump(dd->pport, tmp, PAGE_SIZE);
+ if (ret > 0)
+ ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
+ kfree(tmp);
+ return ret;
+}
+
+/*
+ * read the per-port QSFP data for port 2 (pidx 1)
+ */
+static ssize_t qsfp_2_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct qib_devdata *dd = private2dd(file);
+ char *tmp;
+ int ret;
+
+ if (dd->num_pports < 2)
+ return -ENODEV;
+
+ tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ ret = qib_qsfp_dump(dd->pport + 1, tmp, PAGE_SIZE);
+ if (ret > 0)
+ ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
+ kfree(tmp);
+ return ret;
+}
+
+static const struct file_operations qsfp_ops[] = {
+ { .read = qsfp_1_read, .llseek = generic_file_llseek, },
+ { .read = qsfp_2_read, .llseek = generic_file_llseek, },
+};
+
+static ssize_t flash_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct qib_devdata *dd;
+ ssize_t ret;
+ loff_t pos;
+ char *tmp;
+
+ pos = *ppos;
+
+ if (pos < 0) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (pos >= sizeof(struct qib_flash)) {
+ ret = 0;
+ goto bail;
+ }
+
+ if (count > sizeof(struct qib_flash) - pos)
+ count = sizeof(struct qib_flash) - pos;
+
+ tmp = kmalloc(count, GFP_KERNEL);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ dd = private2dd(file);
+ if (qib_eeprom_read(dd, pos, tmp, count)) {
+ qib_dev_err(dd, "failed to read from flash\n");
+ ret = -ENXIO;
+ goto bail_tmp;
+ }
+
+ if (copy_to_user(buf, tmp, count)) {
+ ret = -EFAULT;
+ goto bail_tmp;
+ }
+
+ *ppos = pos + count;
+ ret = count;
+
+bail_tmp:
+ kfree(tmp);
+
+bail:
+ return ret;
+}
+
+static ssize_t flash_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct qib_devdata *dd;
+ ssize_t ret;
+ loff_t pos;
+ char *tmp;
+
+ pos = *ppos;
+
+ if (pos != 0) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ if (count != sizeof(struct qib_flash)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ tmp = kmalloc(count, GFP_KERNEL);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ if (copy_from_user(tmp, buf, count)) {
+ ret = -EFAULT;
+ goto bail_tmp;
+ }
+
+ dd = private2dd(file);
+ if (qib_eeprom_write(dd, pos, tmp, count)) {
+ ret = -ENXIO;
+ qib_dev_err(dd, "failed to write to flash\n");
+ goto bail_tmp;
+ }
+
+ *ppos = pos + count;
+ ret = count;
+
+bail_tmp:
+ kfree(tmp);
+
+bail:
+ return ret;
+}
+
+static const struct file_operations flash_ops = {
+ .read = flash_read,
+ .write = flash_write,
+ .llseek = default_llseek,
+};
+
+static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
+{
+ struct dentry *dir, *tmp;
+ char unit[10];
+ int ret, i;
+
+ /* create the per-unit directory */
+ snprintf(unit, sizeof unit, "%u", dd->unit);
+ ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
+ &simple_dir_operations, dd);
+ if (ret) {
+ pr_err("create_file(%s) failed: %d\n", unit, ret);
+ goto bail;
+ }
+
+ /* create the files in the new directory */
+ ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp,
+ &cntr_ops[0], dd);
+ if (ret) {
+ pr_err("create_file(%s/counters) failed: %d\n",
+ unit, ret);
+ goto bail;
+ }
+ ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp,
+ &cntr_ops[1], dd);
+ if (ret) {
+ pr_err("create_file(%s/counter_names) failed: %d\n",
+ unit, ret);
+ goto bail;
+ }
+ ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp,
+ &portcntr_ops[0], dd);
+ if (ret) {
+ pr_err("create_file(%s/%s) failed: %d\n",
+ unit, "portcounter_names", ret);
+ goto bail;
+ }
+ for (i = 1; i <= dd->num_pports; i++) {
+ char fname[24];
+
+ sprintf(fname, "port%dcounters", i);
+ /* create the files in the new directory */
+ ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
+ &portcntr_ops[i], dd);
+ if (ret) {
+ pr_err("create_file(%s/%s) failed: %d\n",
+ unit, fname, ret);
+ goto bail;
+ }
+ if (!(dd->flags & QIB_HAS_QSFP))
+ continue;
+ sprintf(fname, "qsfp%d", i);
+ ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
+ &qsfp_ops[i - 1], dd);
+ if (ret) {
+ pr_err("create_file(%s/%s) failed: %d\n",
+ unit, fname, ret);
+ goto bail;
+ }
+ }
+
+ ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
+ &flash_ops, dd);
+ if (ret)
+ pr_err("create_file(%s/flash) failed: %d\n",
+ unit, ret);
+bail:
+ return ret;
+}
+
+static int remove_file(struct dentry *parent, char *name)
+{
+ struct dentry *tmp;
+ int ret;
+
+ tmp = lookup_one_len(name, parent, strlen(name));
+
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ goto bail;
+ }
+
+ spin_lock(&tmp->d_lock);
+ if (!(d_unhashed(tmp) && tmp->d_inode)) {
+ __d_drop(tmp);
+ spin_unlock(&tmp->d_lock);
+ simple_unlink(parent->d_inode, tmp);
+ } else {
+ spin_unlock(&tmp->d_lock);
+ }
+ dput(tmp);
+
+ ret = 0;
+bail:
+ /*
+ * We don't expect clients to care about the return value, but
+ * it's there if they need it.
+ */
+ return ret;
+}
+
+static int remove_device_files(struct super_block *sb,
+ struct qib_devdata *dd)
+{
+ struct dentry *dir, *root;
+ char unit[10];
+ int ret, i;
+
+ root = dget(sb->s_root);
+ mutex_lock(&root->d_inode->i_mutex);
+ snprintf(unit, sizeof unit, "%u", dd->unit);
+ dir = lookup_one_len(unit, root, strlen(unit));
+
+ if (IS_ERR(dir)) {
+ ret = PTR_ERR(dir);
+ pr_err("Lookup of %s failed\n", unit);
+ goto bail;
+ }
+
+ mutex_lock(&dir->d_inode->i_mutex);
+ remove_file(dir, "counters");
+ remove_file(dir, "counter_names");
+ remove_file(dir, "portcounter_names");
+ for (i = 0; i < dd->num_pports; i++) {
+ char fname[24];
+
+ sprintf(fname, "port%dcounters", i + 1);
+ remove_file(dir, fname);
+ if (dd->flags & QIB_HAS_QSFP) {
+ sprintf(fname, "qsfp%d", i + 1);
+ remove_file(dir, fname);
+ }
+ }
+ remove_file(dir, "flash");
+ mutex_unlock(&dir->d_inode->i_mutex);
+ ret = simple_rmdir(root->d_inode, dir);
+ d_delete(dir);
+ dput(dir);
+
+bail:
+ mutex_unlock(&root->d_inode->i_mutex);
+ dput(root);
+ return ret;
+}
+
+/*
+ * This fills everything in when the fs is mounted, to handle umount/mount
+ * after device init. The direct add_cntr_files() call handles adding
+ * them from the init code, when the fs is already mounted.
+ */
+static int qibfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct qib_devdata *dd, *tmp;
+ unsigned long flags;
+ int ret;
+
+ static struct tree_descr files[] = {
+ [2] = {"driver_stats", &driver_ops[0], S_IRUGO},
+ [3] = {"driver_stats_names", &driver_ops[1], S_IRUGO},
+ {""},
+ };
+
+ ret = simple_fill_super(sb, QIBFS_MAGIC, files);
+ if (ret) {
+ pr_err("simple_fill_super failed: %d\n", ret);
+ goto bail;
+ }
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+
+ list_for_each_entry_safe(dd, tmp, &qib_dev_list, list) {
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ ret = add_cntr_files(sb, dd);
+ if (ret)
+ goto bail;
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+
+bail:
+ return ret;
+}
+
+static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ struct dentry *ret;
+ ret = mount_single(fs_type, flags, data, qibfs_fill_super);
+ if (!IS_ERR(ret))
+ qib_super = ret->d_sb;
+ return ret;
+}
+
+static void qibfs_kill_super(struct super_block *s)
+{
+ kill_litter_super(s);
+ qib_super = NULL;
+}
+
+int qibfs_add(struct qib_devdata *dd)
+{
+ int ret;
+
+ /*
+ * On first unit initialized, qib_super will not yet exist
+ * because nobody has yet tried to mount the filesystem, so
+ * we can't consider that to be an error; if an error occurs
+ * during the mount, that will get a complaint, so this is OK.
+ * add_cntr_files() for all units is done at mount from
+ * qibfs_fill_super(), so one way or another, everything works.
+ */
+ if (qib_super == NULL)
+ ret = 0;
+ else
+ ret = add_cntr_files(qib_super, dd);
+ return ret;
+}
+
+int qibfs_remove(struct qib_devdata *dd)
+{
+ int ret = 0;
+
+ if (qib_super)
+ ret = remove_device_files(qib_super, dd);
+
+ return ret;
+}
+
+static struct file_system_type qibfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ipathfs",
+ .mount = qibfs_mount,
+ .kill_sb = qibfs_kill_super,
+};
+MODULE_ALIAS_FS("ipathfs");
+
+int __init qib_init_qibfs(void)
+{
+ return register_filesystem(&qibfs_fs_type);
+}
+
+int __exit qib_exit_qibfs(void)
+{
+ return unregister_filesystem(&qibfs_fs_type);
+}
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
new file mode 100644
index 00000000000..d68266ac761
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -0,0 +1,3599 @@
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the
+ * QLogic_IB 6120 PCIe chip.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <rdma/ib_verbs.h>
+
+#include "qib.h"
+#include "qib_6120_regs.h"
+
+static void qib_6120_setup_setextled(struct qib_pportdata *, u32);
+static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op);
+static u8 qib_6120_phys_portstate(u64);
+static u32 qib_6120_iblink_state(u64);
+
+/*
+ * This file contains all the chip-specific register information and
+ * access functions for the Intel Intel_IB PCI-Express chip.
+ *
+ */
+
+/* KREG_IDX uses machine-generated #defines */
+#define KREG_IDX(regname) (QIB_6120_##regname##_OFFS / sizeof(u64))
+
+/* Use defines to tie machine-generated names to lower-case names */
+#define kr_extctrl KREG_IDX(EXTCtrl)
+#define kr_extstatus KREG_IDX(EXTStatus)
+#define kr_gpio_clear KREG_IDX(GPIOClear)
+#define kr_gpio_mask KREG_IDX(GPIOMask)
+#define kr_gpio_out KREG_IDX(GPIOOut)
+#define kr_gpio_status KREG_IDX(GPIOStatus)
+#define kr_rcvctrl KREG_IDX(RcvCtrl)
+#define kr_sendctrl KREG_IDX(SendCtrl)
+#define kr_partitionkey KREG_IDX(RcvPartitionKey)
+#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl)
+#define kr_ibcstatus KREG_IDX(IBCStatus)
+#define kr_ibcctrl KREG_IDX(IBCCtrl)
+#define kr_sendbuffererror KREG_IDX(SendBufErr0)
+#define kr_rcvbthqp KREG_IDX(RcvBTHQP)
+#define kr_counterregbase KREG_IDX(CntrRegBase)
+#define kr_palign KREG_IDX(PageAlign)
+#define kr_rcvegrbase KREG_IDX(RcvEgrBase)
+#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt)
+#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt)
+#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize)
+#define kr_rcvhdrsize KREG_IDX(RcvHdrSize)
+#define kr_rcvtidbase KREG_IDX(RcvTIDBase)
+#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt)
+#define kr_scratch KREG_IDX(Scratch)
+#define kr_sendctrl KREG_IDX(SendCtrl)
+#define kr_sendpioavailaddr KREG_IDX(SendPIOAvailAddr)
+#define kr_sendpiobufbase KREG_IDX(SendPIOBufBase)
+#define kr_sendpiobufcnt KREG_IDX(SendPIOBufCnt)
+#define kr_sendpiosize KREG_IDX(SendPIOSize)
+#define kr_sendregbase KREG_IDX(SendRegBase)
+#define kr_userregbase KREG_IDX(UserRegBase)
+#define kr_control KREG_IDX(Control)
+#define kr_intclear KREG_IDX(IntClear)
+#define kr_intmask KREG_IDX(IntMask)
+#define kr_intstatus KREG_IDX(IntStatus)
+#define kr_errclear KREG_IDX(ErrClear)
+#define kr_errmask KREG_IDX(ErrMask)
+#define kr_errstatus KREG_IDX(ErrStatus)
+#define kr_hwerrclear KREG_IDX(HwErrClear)
+#define kr_hwerrmask KREG_IDX(HwErrMask)
+#define kr_hwerrstatus KREG_IDX(HwErrStatus)
+#define kr_revision KREG_IDX(Revision)
+#define kr_portcnt KREG_IDX(PortCnt)
+#define kr_serdes_cfg0 KREG_IDX(SerdesCfg0)
+#define kr_serdes_cfg1 (kr_serdes_cfg0 + 1)
+#define kr_serdes_stat KREG_IDX(SerdesStat)
+#define kr_xgxs_cfg KREG_IDX(XGXSCfg)
+
+/* These must only be written via qib_write_kreg_ctxt() */
+#define kr_rcvhdraddr KREG_IDX(RcvHdrAddr0)
+#define kr_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0)
+
+#define CREG_IDX(regname) ((QIB_6120_##regname##_OFFS - \
+ QIB_6120_LBIntCnt_OFFS) / sizeof(u64))
+
+#define cr_badformat CREG_IDX(RxBadFormatCnt)
+#define cr_erricrc CREG_IDX(RxICRCErrCnt)
+#define cr_errlink CREG_IDX(RxLinkProblemCnt)
+#define cr_errlpcrc CREG_IDX(RxLPCRCErrCnt)
+#define cr_errpkey CREG_IDX(RxPKeyMismatchCnt)
+#define cr_rcvflowctrl_err CREG_IDX(RxFlowCtrlErrCnt)
+#define cr_err_rlen CREG_IDX(RxLenErrCnt)
+#define cr_errslen CREG_IDX(TxLenErrCnt)
+#define cr_errtidfull CREG_IDX(RxTIDFullErrCnt)
+#define cr_errtidvalid CREG_IDX(RxTIDValidErrCnt)
+#define cr_errvcrc CREG_IDX(RxVCRCErrCnt)
+#define cr_ibstatuschange CREG_IDX(IBStatusChangeCnt)
+#define cr_lbint CREG_IDX(LBIntCnt)
+#define cr_invalidrlen CREG_IDX(RxMaxMinLenErrCnt)
+#define cr_invalidslen CREG_IDX(TxMaxMinLenErrCnt)
+#define cr_lbflowstall CREG_IDX(LBFlowStallCnt)
+#define cr_pktrcv CREG_IDX(RxDataPktCnt)
+#define cr_pktrcvflowctrl CREG_IDX(RxFlowPktCnt)
+#define cr_pktsend CREG_IDX(TxDataPktCnt)
+#define cr_pktsendflow CREG_IDX(TxFlowPktCnt)
+#define cr_portovfl CREG_IDX(RxP0HdrEgrOvflCnt)
+#define cr_rcvebp CREG_IDX(RxEBPCnt)
+#define cr_rcvovfl CREG_IDX(RxBufOvflCnt)
+#define cr_senddropped CREG_IDX(TxDroppedPktCnt)
+#define cr_sendstall CREG_IDX(TxFlowStallCnt)
+#define cr_sendunderrun CREG_IDX(TxUnderrunCnt)
+#define cr_wordrcv CREG_IDX(RxDwordCnt)
+#define cr_wordsend CREG_IDX(TxDwordCnt)
+#define cr_txunsupvl CREG_IDX(TxUnsupVLErrCnt)
+#define cr_rxdroppkt CREG_IDX(RxDroppedPktCnt)
+#define cr_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt)
+#define cr_iblinkdown CREG_IDX(IBLinkDownedCnt)
+#define cr_ibsymbolerr CREG_IDX(IBSymbolErrCnt)
+
+#define SYM_RMASK(regname, fldname) ((u64) \
+ QIB_6120_##regname##_##fldname##_RMASK)
+#define SYM_MASK(regname, fldname) ((u64) \
+ QIB_6120_##regname##_##fldname##_RMASK << \
+ QIB_6120_##regname##_##fldname##_LSB)
+#define SYM_LSB(regname, fldname) (QIB_6120_##regname##_##fldname##_LSB)
+
+#define SYM_FIELD(value, regname, fldname) ((u64) \
+ (((value) >> SYM_LSB(regname, fldname)) & \
+ SYM_RMASK(regname, fldname)))
+#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask)
+#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask)
+
+/* link training states, from IBC */
+#define IB_6120_LT_STATE_DISABLED 0x00
+#define IB_6120_LT_STATE_LINKUP 0x01
+#define IB_6120_LT_STATE_POLLACTIVE 0x02
+#define IB_6120_LT_STATE_POLLQUIET 0x03
+#define IB_6120_LT_STATE_SLEEPDELAY 0x04
+#define IB_6120_LT_STATE_SLEEPQUIET 0x05
+#define IB_6120_LT_STATE_CFGDEBOUNCE 0x08
+#define IB_6120_LT_STATE_CFGRCVFCFG 0x09
+#define IB_6120_LT_STATE_CFGWAITRMT 0x0a
+#define IB_6120_LT_STATE_CFGIDLE 0x0b
+#define IB_6120_LT_STATE_RECOVERRETRAIN 0x0c
+#define IB_6120_LT_STATE_RECOVERWAITRMT 0x0e
+#define IB_6120_LT_STATE_RECOVERIDLE 0x0f
+
+/* link state machine states from IBC */
+#define IB_6120_L_STATE_DOWN 0x0
+#define IB_6120_L_STATE_INIT 0x1
+#define IB_6120_L_STATE_ARM 0x2
+#define IB_6120_L_STATE_ACTIVE 0x3
+#define IB_6120_L_STATE_ACT_DEFER 0x4
+
+static const u8 qib_6120_physportstate[0x20] = {
+ [IB_6120_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
+ [IB_6120_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
+ [IB_6120_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
+ [IB_6120_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
+ [IB_6120_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
+ [IB_6120_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
+ [IB_6120_LT_STATE_CFGDEBOUNCE] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_6120_LT_STATE_CFGRCVFCFG] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_6120_LT_STATE_CFGWAITRMT] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_6120_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_6120_LT_STATE_RECOVERRETRAIN] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [IB_6120_LT_STATE_RECOVERWAITRMT] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [IB_6120_LT_STATE_RECOVERIDLE] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
+};
+
+
+struct qib_chip_specific {
+ u64 __iomem *cregbase;
+ u64 *cntrs;
+ u64 *portcntrs;
+ void *dummy_hdrq; /* used after ctxt close */
+ dma_addr_t dummy_hdrq_phys;
+ spinlock_t kernel_tid_lock; /* no back to back kernel TID writes */
+ spinlock_t user_tid_lock; /* no back to back user TID writes */
+ spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */
+ spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */
+ u64 hwerrmask;
+ u64 errormask;
+ u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */
+ u64 gpio_mask; /* shadow the gpio mask register */
+ u64 extctrl; /* shadow the gpio output enable, etc... */
+ /*
+ * these 5 fields are used to establish deltas for IB symbol
+ * errors and linkrecovery errors. They can be reported on
+ * some chips during link negotiation prior to INIT, and with
+ * DDR when faking DDR negotiations with non-IBTA switches.
+ * The chip counters are adjusted at driver unload if there is
+ * a non-zero delta.
+ */
+ u64 ibdeltainprog;
+ u64 ibsymdelta;
+ u64 ibsymsnap;
+ u64 iblnkerrdelta;
+ u64 iblnkerrsnap;
+ u64 ibcctrl; /* shadow for kr_ibcctrl */
+ u32 lastlinkrecov; /* link recovery issue */
+ int irq;
+ u32 cntrnamelen;
+ u32 portcntrnamelen;
+ u32 ncntrs;
+ u32 nportcntrs;
+ /* used with gpio interrupts to implement IB counters */
+ u32 rxfc_unsupvl_errs;
+ u32 overrun_thresh_errs;
+ /*
+ * these count only cases where _successive_ LocalLinkIntegrity
+ * errors were seen in the receive headers of IB standard packets
+ */
+ u32 lli_errs;
+ u32 lli_counter;
+ u64 lli_thresh;
+ u64 sword; /* total dwords sent (sample result) */
+ u64 rword; /* total dwords received (sample result) */
+ u64 spkts; /* total packets sent (sample result) */
+ u64 rpkts; /* total packets received (sample result) */
+ u64 xmit_wait; /* # of ticks no data sent (sample result) */
+ struct timer_list pma_timer;
+ char emsgbuf[128];
+ char bitsmsgbuf[64];
+ u8 pma_sample_status;
+};
+
+/* ibcctrl bits */
+#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
+/* cycle through TS1/TS2 till OK */
+#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2
+/* wait for TS1, then go on */
+#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3
+#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16
+
+#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */
+#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
+#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
+#define QLOGIC_IB_IBCC_LINKCMD_SHIFT 18
+
+/*
+ * We could have a single register get/put routine, that takes a group type,
+ * but this is somewhat clearer and cleaner. It also gives us some error
+ * checking. 64 bit register reads should always work, but are inefficient
+ * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
+ * so we use kreg32 wherever possible. User register and counter register
+ * reads are always 32 bit reads, so only one form of those routines.
+ */
+
+/**
+ * qib_read_ureg32 - read 32-bit virtualized per-context register
+ * @dd: device
+ * @regno: register number
+ * @ctxt: context number
+ *
+ * Return the contents of a register that is virtualized to be per context.
+ * Returns -1 on errors (not distinguishable from valid contents at
+ * runtime; we may add a separate error variable at some point).
+ */
+static inline u32 qib_read_ureg32(const struct qib_devdata *dd,
+ enum qib_ureg regno, int ctxt)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+
+ if (dd->userbase)
+ return readl(regno + (u64 __iomem *)
+ ((char __iomem *)dd->userbase +
+ dd->ureg_align * ctxt));
+ else
+ return readl(regno + (u64 __iomem *)
+ (dd->uregbase +
+ (char __iomem *)dd->kregbase +
+ dd->ureg_align * ctxt));
+}
+
+/**
+ * qib_write_ureg - write 32-bit virtualized per-context register
+ * @dd: device
+ * @regno: register number
+ * @value: value
+ * @ctxt: context
+ *
+ * Write the contents of a register that is virtualized to be per context.
+ */
+static inline void qib_write_ureg(const struct qib_devdata *dd,
+ enum qib_ureg regno, u64 value, int ctxt)
+{
+ u64 __iomem *ubase;
+ if (dd->userbase)
+ ubase = (u64 __iomem *)
+ ((char __iomem *) dd->userbase +
+ dd->ureg_align * ctxt);
+ else
+ ubase = (u64 __iomem *)
+ (dd->uregbase +
+ (char __iomem *) dd->kregbase +
+ dd->ureg_align * ctxt);
+
+ if (dd->kregbase && (dd->flags & QIB_PRESENT))
+ writeq(value, &ubase[regno]);
+}
+
+static inline u32 qib_read_kreg32(const struct qib_devdata *dd,
+ const u16 regno)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return -1;
+ return readl((u32 __iomem *)&dd->kregbase[regno]);
+}
+
+static inline u64 qib_read_kreg64(const struct qib_devdata *dd,
+ const u16 regno)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return -1;
+
+ return readq(&dd->kregbase[regno]);
+}
+
+static inline void qib_write_kreg(const struct qib_devdata *dd,
+ const u16 regno, u64 value)
+{
+ if (dd->kregbase && (dd->flags & QIB_PRESENT))
+ writeq(value, &dd->kregbase[regno]);
+}
+
+/**
+ * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register
+ * @dd: the qlogic_ib device
+ * @regno: the register number to write
+ * @ctxt: the context containing the register
+ * @value: the value to write
+ */
+static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd,
+ const u16 regno, unsigned ctxt,
+ u64 value)
+{
+ qib_write_kreg(dd, regno + ctxt, value);
+}
+
+static inline void write_6120_creg(const struct qib_devdata *dd,
+ u16 regno, u64 value)
+{
+ if (dd->cspec->cregbase && (dd->flags & QIB_PRESENT))
+ writeq(value, &dd->cspec->cregbase[regno]);
+}
+
+static inline u64 read_6120_creg(const struct qib_devdata *dd, u16 regno)
+{
+ if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readq(&dd->cspec->cregbase[regno]);
+}
+
+static inline u32 read_6120_creg32(const struct qib_devdata *dd, u16 regno)
+{
+ if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readl(&dd->cspec->cregbase[regno]);
+}
+
+/* kr_control bits */
+#define QLOGIC_IB_C_RESET 1U
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define QLOGIC_IB_I_RCVURG_MASK ((1U << 5) - 1)
+#define QLOGIC_IB_I_RCVURG_SHIFT 0
+#define QLOGIC_IB_I_RCVAVAIL_MASK ((1U << 5) - 1)
+#define QLOGIC_IB_I_RCVAVAIL_SHIFT 12
+
+#define QLOGIC_IB_C_FREEZEMODE 0x00000002
+#define QLOGIC_IB_C_LINKENABLE 0x00000004
+#define QLOGIC_IB_I_ERROR 0x0000000080000000ULL
+#define QLOGIC_IB_I_SPIOSENT 0x0000000040000000ULL
+#define QLOGIC_IB_I_SPIOBUFAVAIL 0x0000000020000000ULL
+#define QLOGIC_IB_I_GPIO 0x0000000010000000ULL
+#define QLOGIC_IB_I_BITSEXTANT \
+ ((QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT) | \
+ (QLOGIC_IB_I_RCVAVAIL_MASK << \
+ QLOGIC_IB_I_RCVAVAIL_SHIFT) | \
+ QLOGIC_IB_I_ERROR | QLOGIC_IB_I_SPIOSENT | \
+ QLOGIC_IB_I_SPIOBUFAVAIL | QLOGIC_IB_I_GPIO)
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL
+#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT 0
+#define QLOGIC_IB_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
+#define QLOGIC_IB_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
+#define QLOGIC_IB_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
+#define QLOGIC_IB_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
+#define QLOGIC_IB_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
+#define QLOGIC_IB_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
+#define QLOGIC_IB_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
+#define QLOGIC_IB_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
+#define QLOGIC_IB_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
+#define QLOGIC_IB_HWE_SERDESPLLFAILED 0x1000000000000000ULL
+
+
+/* kr_extstatus bits */
+#define QLOGIC_IB_EXTS_FREQSEL 0x2
+#define QLOGIC_IB_EXTS_SERDESSEL 0x4
+#define QLOGIC_IB_EXTS_MEMBIST_ENDTEST 0x0000000000004000
+#define QLOGIC_IB_EXTS_MEMBIST_FOUND 0x0000000000008000
+
+/* kr_xgxsconfig bits */
+#define QLOGIC_IB_XGXS_RESET 0x5ULL
+
+#define _QIB_GPIO_SDA_NUM 1
+#define _QIB_GPIO_SCL_NUM 0
+
+/* Bits in GPIO for the added IB link interrupts */
+#define GPIO_RXUVL_BIT 3
+#define GPIO_OVRUN_BIT 4
+#define GPIO_LLI_BIT 5
+#define GPIO_ERRINTR_MASK 0x38
+
+
+#define QLOGIC_IB_RT_BUFSIZE_MASK 0xe0000000ULL
+#define QLOGIC_IB_RT_BUFSIZE_SHIFTVAL(tid) \
+ ((((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) >> 29) + 11 - 1)
+#define QLOGIC_IB_RT_BUFSIZE(tid) (1 << QLOGIC_IB_RT_BUFSIZE_SHIFTVAL(tid))
+#define QLOGIC_IB_RT_IS_VALID(tid) \
+ (((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) && \
+ ((((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) != QLOGIC_IB_RT_BUFSIZE_MASK)))
+#define QLOGIC_IB_RT_ADDR_MASK 0x1FFFFFFFULL /* 29 bits valid */
+#define QLOGIC_IB_RT_ADDR_SHIFT 10
+
+#define QLOGIC_IB_R_INTRAVAIL_SHIFT 16
+#define QLOGIC_IB_R_TAILUPD_SHIFT 31
+#define IBA6120_R_PKEY_DIS_SHIFT 30
+
+#define PBC_6120_VL15_SEND_CTRL (1ULL << 31) /* pbc; VL15; link_buf only */
+
+#define IBCBUSFRSPCPARITYERR HWE_MASK(IBCBusFromSPCParityErr)
+#define IBCBUSTOSPCPARITYERR HWE_MASK(IBCBusToSPCParityErr)
+
+#define SYM_MASK_BIT(regname, fldname, bit) ((u64) \
+ ((1ULL << (SYM_LSB(regname, fldname) + (bit)))))
+
+#define TXEMEMPARITYERR_PIOBUF \
+ SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 0)
+#define TXEMEMPARITYERR_PIOPBC \
+ SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 1)
+#define TXEMEMPARITYERR_PIOLAUNCHFIFO \
+ SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 2)
+
+#define RXEMEMPARITYERR_RCVBUF \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 0)
+#define RXEMEMPARITYERR_LOOKUPQ \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 1)
+#define RXEMEMPARITYERR_EXPTID \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 2)
+#define RXEMEMPARITYERR_EAGERTID \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 3)
+#define RXEMEMPARITYERR_FLAGBUF \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 4)
+#define RXEMEMPARITYERR_DATAINFO \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 5)
+#define RXEMEMPARITYERR_HDRINFO \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 6)
+
+/* 6120 specific hardware errors... */
+static const struct qib_hwerror_msgs qib_6120_hwerror_msgs[] = {
+ /* generic hardware errors */
+ QLOGIC_IB_HWE_MSG(IBCBUSFRSPCPARITYERR, "QIB2IB Parity"),
+ QLOGIC_IB_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2QIB Parity"),
+
+ QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOBUF,
+ "TXE PIOBUF Memory Parity"),
+ QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOPBC,
+ "TXE PIOPBC Memory Parity"),
+ QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOLAUNCHFIFO,
+ "TXE PIOLAUNCHFIFO Memory Parity"),
+
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_RCVBUF,
+ "RXE RCVBUF Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_LOOKUPQ,
+ "RXE LOOKUPQ Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EAGERTID,
+ "RXE EAGERTID Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EXPTID,
+ "RXE EXPTID Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_FLAGBUF,
+ "RXE FLAGBUF Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_DATAINFO,
+ "RXE DATAINFO Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_HDRINFO,
+ "RXE HDRINFO Memory Parity"),
+
+ /* chip-specific hardware errors */
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEPOISONEDTLP,
+ "PCIe Poisoned TLP"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT,
+ "PCIe completion timeout"),
+ /*
+ * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * parity or memory parity error failures, because most likely we
+ * won't be able to talk to the core of the chip. Nonetheless, we
+ * might see them, if they are in parts of the PCIe core that aren't
+ * essential.
+ */
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE1PLLFAILED,
+ "PCIePLL1"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE0PLLFAILED,
+ "PCIePLL0"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXTLH,
+ "PCIe XTLH core parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXADM,
+ "PCIe ADM TX core parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYRADM,
+ "PCIe ADM RX core parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SERDESPLLFAILED,
+ "SerDes PLL"),
+};
+
+#define TXE_PIO_PARITY (TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC)
+#define _QIB_PLL_FAIL (QLOGIC_IB_HWE_COREPLL_FBSLIP | \
+ QLOGIC_IB_HWE_COREPLL_RFSLIP)
+
+ /* variables for sanity checking interrupt and errors */
+#define IB_HWE_BITSEXTANT \
+ (HWE_MASK(RXEMemParityErr) | \
+ HWE_MASK(TXEMemParityErr) | \
+ (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << \
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) | \
+ QLOGIC_IB_HWE_PCIE1PLLFAILED | \
+ QLOGIC_IB_HWE_PCIE0PLLFAILED | \
+ QLOGIC_IB_HWE_PCIEPOISONEDTLP | \
+ QLOGIC_IB_HWE_PCIECPLTIMEOUT | \
+ QLOGIC_IB_HWE_PCIEBUSPARITYXTLH | \
+ QLOGIC_IB_HWE_PCIEBUSPARITYXADM | \
+ QLOGIC_IB_HWE_PCIEBUSPARITYRADM | \
+ HWE_MASK(PowerOnBISTFailed) | \
+ QLOGIC_IB_HWE_COREPLL_FBSLIP | \
+ QLOGIC_IB_HWE_COREPLL_RFSLIP | \
+ QLOGIC_IB_HWE_SERDESPLLFAILED | \
+ HWE_MASK(IBCBusToSPCParityErr) | \
+ HWE_MASK(IBCBusFromSPCParityErr))
+
+#define IB_E_BITSEXTANT \
+ (ERR_MASK(RcvFormatErr) | ERR_MASK(RcvVCRCErr) | \
+ ERR_MASK(RcvICRCErr) | ERR_MASK(RcvMinPktLenErr) | \
+ ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvLongPktLenErr) | \
+ ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvUnexpectedCharErr) | \
+ ERR_MASK(RcvUnsupportedVLErr) | ERR_MASK(RcvEBPErr) | \
+ ERR_MASK(RcvIBFlowErr) | ERR_MASK(RcvBadVersionErr) | \
+ ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | \
+ ERR_MASK(RcvBadTidErr) | ERR_MASK(RcvHdrLenErr) | \
+ ERR_MASK(RcvHdrErr) | ERR_MASK(RcvIBLostLinkErr) | \
+ ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendMaxPktLenErr) | \
+ ERR_MASK(SendUnderRunErr) | ERR_MASK(SendPktLenErr) | \
+ ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendDroppedDataPktErr) | \
+ ERR_MASK(SendPioArmLaunchErr) | \
+ ERR_MASK(SendUnexpectedPktNumErr) | \
+ ERR_MASK(SendUnsupportedVLErr) | ERR_MASK(IBStatusChanged) | \
+ ERR_MASK(InvalidAddrErr) | ERR_MASK(ResetNegated) | \
+ ERR_MASK(HardwareErr))
+
+#define QLOGIC_IB_E_PKTERRS ( \
+ ERR_MASK(SendPktLenErr) | \
+ ERR_MASK(SendDroppedDataPktErr) | \
+ ERR_MASK(RcvVCRCErr) | \
+ ERR_MASK(RcvICRCErr) | \
+ ERR_MASK(RcvShortPktLenErr) | \
+ ERR_MASK(RcvEBPErr))
+
+/* These are all rcv-related errors which we want to count for stats */
+#define E_SUM_PKTERRS \
+ (ERR_MASK(RcvHdrLenErr) | ERR_MASK(RcvBadTidErr) | \
+ ERR_MASK(RcvBadVersionErr) | ERR_MASK(RcvHdrErr) | \
+ ERR_MASK(RcvLongPktLenErr) | ERR_MASK(RcvShortPktLenErr) | \
+ ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \
+ ERR_MASK(RcvFormatErr) | ERR_MASK(RcvUnsupportedVLErr) | \
+ ERR_MASK(RcvUnexpectedCharErr) | ERR_MASK(RcvEBPErr))
+
+/* These are all send-related errors which we want to count for stats */
+#define E_SUM_ERRS \
+ (ERR_MASK(SendPioArmLaunchErr) | \
+ ERR_MASK(SendUnexpectedPktNumErr) | \
+ ERR_MASK(SendDroppedDataPktErr) | \
+ ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnsupportedVLErr) | \
+ ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \
+ ERR_MASK(InvalidAddrErr))
+
+/*
+ * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
+ * errors not related to freeze and cancelling buffers. Can't ignore
+ * armlaunch because could get more while still cleaning up, and need
+ * to cancel those as they happen.
+ */
+#define E_SPKT_ERRS_IGNORE \
+ (ERR_MASK(SendDroppedDataPktErr) | \
+ ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendMinPktLenErr) | \
+ ERR_MASK(SendPktLenErr))
+
+/*
+ * these are errors that can occur when the link changes state while
+ * a packet is being sent or received. This doesn't cover things
+ * like EBP or VCRC that can be the result of a sending having the
+ * link change state, so we receive a "known bad" packet.
+ */
+#define E_SUM_LINK_PKTERRS \
+ (ERR_MASK(SendDroppedDataPktErr) | \
+ ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \
+ ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \
+ ERR_MASK(RcvUnexpectedCharErr))
+
+static void qib_6120_put_tid_2(struct qib_devdata *, u64 __iomem *,
+ u32, unsigned long);
+
+/*
+ * On platforms using this chip, and not having ordered WC stores, we
+ * can get TXE parity errors due to speculative reads to the PIO buffers,
+ * and this, due to a chip issue can result in (many) false parity error
+ * reports. So it's a debug print on those, and an info print on systems
+ * where the speculative reads don't occur.
+ */
+static void qib_6120_txe_recover(struct qib_devdata *dd)
+{
+ if (!qib_unordered_wc())
+ qib_devinfo(dd->pcidev,
+ "Recovering from TXE PIO parity error\n");
+}
+
+/* enable/disable chip from delivering interrupts */
+static void qib_6120_set_intr_state(struct qib_devdata *dd, u32 enable)
+{
+ if (enable) {
+ if (dd->flags & QIB_BADINTR)
+ return;
+ qib_write_kreg(dd, kr_intmask, ~0ULL);
+ /* force re-interrupt of any pending interrupts. */
+ qib_write_kreg(dd, kr_intclear, 0ULL);
+ } else
+ qib_write_kreg(dd, kr_intmask, 0ULL);
+}
+
+/*
+ * Try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ * This is in chip-specific code because of all of the register accesses,
+ * even though the details are similar on most chips
+ */
+static void qib_6120_clear_freeze(struct qib_devdata *dd)
+{
+ /* disable error interrupts, to avoid confusion */
+ qib_write_kreg(dd, kr_errmask, 0ULL);
+
+ /* also disable interrupts; errormask is sometimes overwriten */
+ qib_6120_set_intr_state(dd, 0);
+
+ qib_cancel_sends(dd->pport);
+
+ /* clear the freeze, and be sure chip saw it */
+ qib_write_kreg(dd, kr_control, dd->control);
+ qib_read_kreg32(dd, kr_scratch);
+
+ /* force in-memory update now we are out of freeze */
+ qib_force_pio_avail_update(dd);
+
+ /*
+ * force new interrupt if any hwerr, error or interrupt bits are
+ * still set, and clear "safe" send packet errors related to freeze
+ * and cancelling sends. Re-enable error interrupts before possible
+ * force of re-interrupt on pending interrupts.
+ */
+ qib_write_kreg(dd, kr_hwerrclear, 0ULL);
+ qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE);
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+ qib_6120_set_intr_state(dd, 1);
+}
+
+/**
+ * qib_handle_6120_hwerrors - display hardware errors.
+ * @dd: the qlogic_ib device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. Reuse the same message buffer as
+ * handle_6120_errors() to avoid excessive stack usage.
+ */
+static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
+ size_t msgl)
+{
+ u64 hwerrs;
+ u32 bits, ctrl;
+ int isfatal = 0;
+ char *bitsmsg;
+ int log_idx;
+
+ hwerrs = qib_read_kreg64(dd, kr_hwerrstatus);
+ if (!hwerrs)
+ return;
+ if (hwerrs == ~0ULL) {
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
+ return;
+ }
+ qib_stats.sps_hwerrs++;
+
+ /* Always clear the error status register, except MEMBISTFAIL,
+ * regardless of whether we continue or stop using the chip.
+ * We want that set so we know it failed, even across driver reload.
+ * We'll still ignore it in the hwerrmask. We do this partly for
+ * diagnostics, but also for support */
+ qib_write_kreg(dd, kr_hwerrclear,
+ hwerrs & ~HWE_MASK(PowerOnBISTFailed));
+
+ hwerrs &= dd->cspec->hwerrmask;
+
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log)
+ qib_inc_eeprom_err(dd, log_idx, 1);
+
+ /*
+ * Make sure we get this much out, unless told to be quiet,
+ * or it's occurred within the last 5 seconds.
+ */
+ if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID))
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
+
+ if (hwerrs & ~IB_HWE_BITSEXTANT)
+ qib_dev_err(dd,
+ "hwerror interrupt with unknown errors %llx set\n",
+ (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT));
+
+ ctrl = qib_read_kreg32(dd, kr_control);
+ if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) {
+ /*
+ * Parity errors in send memory are recoverable,
+ * just cancel the send (if indicated in * sendbuffererror),
+ * count the occurrence, unfreeze (if no other handled
+ * hardware error bits are set), and continue. They can
+ * occur if a processor speculative read is done to the PIO
+ * buffer while we are sending a packet, for example.
+ */
+ if (hwerrs & TXE_PIO_PARITY) {
+ qib_6120_txe_recover(dd);
+ hwerrs &= ~TXE_PIO_PARITY;
+ }
+
+ if (!hwerrs) {
+ static u32 freeze_cnt;
+
+ freeze_cnt++;
+ qib_6120_clear_freeze(dd);
+ } else
+ isfatal = 1;
+ }
+
+ *msg = '\0';
+
+ if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
+ isfatal = 1;
+ strlcat(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+ }
+
+ qib_format_hwerrors(hwerrs, qib_6120_hwerror_msgs,
+ ARRAY_SIZE(qib_6120_hwerror_msgs), msg, msgl);
+
+ bitsmsg = dd->cspec->bitsmsgbuf;
+ if (hwerrs & (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK <<
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT)) {
+ bits = (u32) ((hwerrs >>
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) &
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK);
+ snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf,
+ "[PCIe Mem Parity Errs %x] ", bits);
+ strlcat(msg, bitsmsg, msgl);
+ }
+
+ if (hwerrs & _QIB_PLL_FAIL) {
+ isfatal = 1;
+ snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf,
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
+ (unsigned long long) hwerrs & _QIB_PLL_FAIL);
+ strlcat(msg, bitsmsg, msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->cspec->hwerrmask &= ~(hwerrs & _QIB_PLL_FAIL);
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+ }
+
+ if (hwerrs & QLOGIC_IB_HWE_SERDESPLLFAILED) {
+ /*
+ * If it occurs, it is left masked since the external
+ * interface is unused
+ */
+ dd->cspec->hwerrmask &= ~QLOGIC_IB_HWE_SERDESPLLFAILED;
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+ }
+
+ if (hwerrs)
+ /*
+ * if any set that we aren't ignoring; only
+ * make the complaint once, in case it's stuck
+ * or recurring, and we get here multiple
+ * times.
+ */
+ qib_dev_err(dd, "%s hardware error\n", msg);
+ else
+ *msg = 0; /* recovered from all of them */
+
+ if (isfatal && !dd->diag_client) {
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
+ /*
+ * for /sys status file and user programs to print; if no
+ * trailing brace is copied, we'll know it was truncated.
+ */
+ if (dd->freezemsg)
+ snprintf(dd->freezemsg, dd->freezelen,
+ "{%s}", msg);
+ qib_disable_after_error(dd);
+ }
+}
+
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else. Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+static int qib_decode_6120_err(struct qib_devdata *dd, char *buf, size_t blen,
+ u64 err)
+{
+ int iserr = 1;
+
+ *buf = '\0';
+ if (err & QLOGIC_IB_E_PKTERRS) {
+ if (!(err & ~QLOGIC_IB_E_PKTERRS))
+ iserr = 0;
+ if ((err & ERR_MASK(RcvICRCErr)) &&
+ !(err&(ERR_MASK(RcvVCRCErr)|ERR_MASK(RcvEBPErr))))
+ strlcat(buf, "CRC ", blen);
+ if (!iserr)
+ goto done;
+ }
+ if (err & ERR_MASK(RcvHdrLenErr))
+ strlcat(buf, "rhdrlen ", blen);
+ if (err & ERR_MASK(RcvBadTidErr))
+ strlcat(buf, "rbadtid ", blen);
+ if (err & ERR_MASK(RcvBadVersionErr))
+ strlcat(buf, "rbadversion ", blen);
+ if (err & ERR_MASK(RcvHdrErr))
+ strlcat(buf, "rhdr ", blen);
+ if (err & ERR_MASK(RcvLongPktLenErr))
+ strlcat(buf, "rlongpktlen ", blen);
+ if (err & ERR_MASK(RcvMaxPktLenErr))
+ strlcat(buf, "rmaxpktlen ", blen);
+ if (err & ERR_MASK(RcvMinPktLenErr))
+ strlcat(buf, "rminpktlen ", blen);
+ if (err & ERR_MASK(SendMinPktLenErr))
+ strlcat(buf, "sminpktlen ", blen);
+ if (err & ERR_MASK(RcvFormatErr))
+ strlcat(buf, "rformaterr ", blen);
+ if (err & ERR_MASK(RcvUnsupportedVLErr))
+ strlcat(buf, "runsupvl ", blen);
+ if (err & ERR_MASK(RcvUnexpectedCharErr))
+ strlcat(buf, "runexpchar ", blen);
+ if (err & ERR_MASK(RcvIBFlowErr))
+ strlcat(buf, "ribflow ", blen);
+ if (err & ERR_MASK(SendUnderRunErr))
+ strlcat(buf, "sunderrun ", blen);
+ if (err & ERR_MASK(SendPioArmLaunchErr))
+ strlcat(buf, "spioarmlaunch ", blen);
+ if (err & ERR_MASK(SendUnexpectedPktNumErr))
+ strlcat(buf, "sunexperrpktnum ", blen);
+ if (err & ERR_MASK(SendDroppedSmpPktErr))
+ strlcat(buf, "sdroppedsmppkt ", blen);
+ if (err & ERR_MASK(SendMaxPktLenErr))
+ strlcat(buf, "smaxpktlen ", blen);
+ if (err & ERR_MASK(SendUnsupportedVLErr))
+ strlcat(buf, "sunsupVL ", blen);
+ if (err & ERR_MASK(InvalidAddrErr))
+ strlcat(buf, "invalidaddr ", blen);
+ if (err & ERR_MASK(RcvEgrFullErr))
+ strlcat(buf, "rcvegrfull ", blen);
+ if (err & ERR_MASK(RcvHdrFullErr))
+ strlcat(buf, "rcvhdrfull ", blen);
+ if (err & ERR_MASK(IBStatusChanged))
+ strlcat(buf, "ibcstatuschg ", blen);
+ if (err & ERR_MASK(RcvIBLostLinkErr))
+ strlcat(buf, "riblostlink ", blen);
+ if (err & ERR_MASK(HardwareErr))
+ strlcat(buf, "hardware ", blen);
+ if (err & ERR_MASK(ResetNegated))
+ strlcat(buf, "reset ", blen);
+done:
+ return iserr;
+}
+
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+static void qib_disarm_6120_senderrbufs(struct qib_pportdata *ppd)
+{
+ unsigned long sbuf[2];
+ struct qib_devdata *dd = ppd->dd;
+
+ /*
+ * It's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling.
+ */
+ sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror);
+ sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1);
+
+ if (sbuf[0] || sbuf[1])
+ qib_disarm_piobufs_set(dd, sbuf,
+ dd->piobcnt2k + dd->piobcnt4k);
+}
+
+static int chk_6120_linkrecovery(struct qib_devdata *dd, u64 ibcs)
+{
+ int ret = 1;
+ u32 ibstate = qib_6120_iblink_state(ibcs);
+ u32 linkrecov = read_6120_creg32(dd, cr_iblinkerrrecov);
+
+ if (linkrecov != dd->cspec->lastlinkrecov) {
+ /* and no more until active again */
+ dd->cspec->lastlinkrecov = 0;
+ qib_set_linkstate(dd->pport, QIB_IB_LINKDOWN);
+ ret = 0;
+ }
+ if (ibstate == IB_PORT_ACTIVE)
+ dd->cspec->lastlinkrecov =
+ read_6120_creg32(dd, cr_iblinkerrrecov);
+ return ret;
+}
+
+static void handle_6120_errors(struct qib_devdata *dd, u64 errs)
+{
+ char *msg;
+ u64 ignore_this_time = 0;
+ u64 iserr = 0;
+ int log_idx;
+ struct qib_pportdata *ppd = dd->pport;
+ u64 mask;
+
+ /* don't report errors that are masked */
+ errs &= dd->cspec->errormask;
+ msg = dd->cspec->emsgbuf;
+
+ /* do these first, they are most important */
+ if (errs & ERR_MASK(HardwareErr))
+ qib_handle_6120_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf);
+ else
+ for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx)
+ if (errs & dd->eep_st_masks[log_idx].errs_to_log)
+ qib_inc_eeprom_err(dd, log_idx, 1);
+
+ if (errs & ~IB_E_BITSEXTANT)
+ qib_dev_err(dd,
+ "error interrupt with unknown errors %llx set\n",
+ (unsigned long long) (errs & ~IB_E_BITSEXTANT));
+
+ if (errs & E_SUM_ERRS) {
+ qib_disarm_6120_senderrbufs(ppd);
+ if ((errs & E_SUM_LINK_PKTERRS) &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
+ /*
+ * This can happen when trying to bring the link
+ * up, but the IB link changes state at the "wrong"
+ * time. The IB logic then complains that the packet
+ * isn't valid. We don't want to confuse people, so
+ * we just don't print them, except at debug
+ */
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
+ }
+ } else if ((errs & E_SUM_LINK_PKTERRS) &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
+ /*
+ * This can happen when SMA is trying to bring the link
+ * up, but the IB link changes state at the "wrong" time.
+ * The IB logic then complains that the packet isn't
+ * valid. We don't want to confuse people, so we just
+ * don't print them, except at debug
+ */
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
+ }
+
+ qib_write_kreg(dd, kr_errclear, errs);
+
+ errs &= ~ignore_this_time;
+ if (!errs)
+ goto done;
+
+ /*
+ * The ones we mask off are handled specially below
+ * or above.
+ */
+ mask = ERR_MASK(IBStatusChanged) | ERR_MASK(RcvEgrFullErr) |
+ ERR_MASK(RcvHdrFullErr) | ERR_MASK(HardwareErr);
+ qib_decode_6120_err(dd, msg, sizeof dd->cspec->emsgbuf, errs & ~mask);
+
+ if (errs & E_SUM_PKTERRS)
+ qib_stats.sps_rcverrs++;
+ if (errs & E_SUM_ERRS)
+ qib_stats.sps_txerrs++;
+
+ iserr = errs & ~(E_SUM_PKTERRS | QLOGIC_IB_E_PKTERRS);
+
+ if (errs & ERR_MASK(IBStatusChanged)) {
+ u64 ibcs = qib_read_kreg64(dd, kr_ibcstatus);
+ u32 ibstate = qib_6120_iblink_state(ibcs);
+ int handle = 1;
+
+ if (ibstate != IB_PORT_INIT && dd->cspec->lastlinkrecov)
+ handle = chk_6120_linkrecovery(dd, ibcs);
+ /*
+ * Since going into a recovery state causes the link state
+ * to go down and since recovery is transitory, it is better
+ * if we "miss" ever seeing the link training state go into
+ * recovery (i.e., ignore this transition for link state
+ * special handling purposes) without updating lastibcstat.
+ */
+ if (handle && qib_6120_phys_portstate(ibcs) ==
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER)
+ handle = 0;
+ if (handle)
+ qib_handle_e_ibstatuschanged(ppd, ibcs);
+ }
+
+ if (errs & ERR_MASK(ResetNegated)) {
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
+ dd->flags &= ~QIB_INITTED; /* needs re-init */
+ /* mark as having had error */
+ *dd->devstatusp |= QIB_STATUS_HWERROR;
+ *dd->pport->statusp &= ~QIB_STATUS_IB_CONF;
+ }
+
+ if (*msg && iserr)
+ qib_dev_porterr(dd, ppd->port, "%s error\n", msg);
+
+ if (ppd->state_wanted & ppd->lflags)
+ wake_up_interruptible(&ppd->state_wait);
+
+ /*
+ * If there were hdrq or egrfull errors, wake up any processes
+ * waiting in poll. We used to try to check which contexts had
+ * the overflow, but given the cost of that and the chip reads
+ * to support it, it's better to just wake everybody up if we
+ * get an overflow; waiters can poll again if it's not them.
+ */
+ if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) {
+ qib_handle_urcv(dd, ~0U);
+ if (errs & ERR_MASK(RcvEgrFullErr))
+ qib_stats.sps_buffull++;
+ else
+ qib_stats.sps_hdrfull++;
+ }
+done:
+ return;
+}
+
+/**
+ * qib_6120_init_hwerrors - enable hardware errors
+ * @dd: the qlogic_ib device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void qib_6120_init_hwerrors(struct qib_devdata *dd)
+{
+ u64 val;
+ u64 extsval;
+
+ extsval = qib_read_kreg64(dd, kr_extstatus);
+
+ if (!(extsval & QLOGIC_IB_EXTS_MEMBIST_ENDTEST))
+ qib_dev_err(dd, "MemBIST did not complete!\n");
+
+ /* init so all hwerrors interrupt, and enter freeze, ajdust below */
+ val = ~0ULL;
+ if (dd->minrev < 2) {
+ /*
+ * Avoid problem with internal interface bus parity
+ * checking. Fixed in Rev2.
+ */
+ val &= ~QLOGIC_IB_HWE_PCIEBUSPARITYRADM;
+ }
+ /* avoid some intel cpu's speculative read freeze mode issue */
+ val &= ~TXEMEMPARITYERR_PIOBUF;
+
+ dd->cspec->hwerrmask = val;
+
+ qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed));
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+
+ /* clear all */
+ qib_write_kreg(dd, kr_errclear, ~0ULL);
+ /* enable errors that are masked, at least this first time. */
+ qib_write_kreg(dd, kr_errmask, ~0ULL);
+ dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask);
+ /* clear any interrupts up to this point (ints still not enabled) */
+ qib_write_kreg(dd, kr_intclear, ~0ULL);
+
+ qib_write_kreg(dd, kr_rcvbthqp,
+ dd->qpn_mask << (QIB_6120_RcvBTHQP_BTHQP_Mask_LSB - 1) |
+ QIB_KD_QP);
+}
+
+/*
+ * Disable and enable the armlaunch error. Used for PIO bandwidth testing
+ * on chips that are count-based, rather than trigger-based. There is no
+ * reference counting, but that's also fine, given the intended use.
+ * Only chip-specific because it's all register accesses
+ */
+static void qib_set_6120_armlaunch(struct qib_devdata *dd, u32 enable)
+{
+ if (enable) {
+ qib_write_kreg(dd, kr_errclear,
+ ERR_MASK(SendPioArmLaunchErr));
+ dd->cspec->errormask |= ERR_MASK(SendPioArmLaunchErr);
+ } else
+ dd->cspec->errormask &= ~ERR_MASK(SendPioArmLaunchErr);
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+}
+
+/*
+ * Formerly took parameter <which> in pre-shifted,
+ * pre-merged form with LinkCmd and LinkInitCmd
+ * together, and assuming the zero was NOP.
+ */
+static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd,
+ u16 linitcmd)
+{
+ u64 mod_wd;
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+
+ if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) {
+ /*
+ * If we are told to disable, note that so link-recovery
+ * code does not attempt to bring us back up.
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_LINK_DISABLED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) {
+ /*
+ * Any other linkinitcmd will lead to LINKDOWN and then
+ * to INIT (if all is well), so clear flag to let
+ * link-recovery code attempt to bring us back up.
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_LINK_DISABLED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
+
+ mod_wd = (linkcmd << QLOGIC_IB_IBCC_LINKCMD_SHIFT) |
+ (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
+
+ qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl | mod_wd);
+ /* write to chip to prevent back-to-back writes of control reg */
+ qib_write_kreg(dd, kr_scratch, 0);
+}
+
+/**
+ * qib_6120_bringup_serdes - bring up the serdes
+ * @dd: the qlogic_ib device
+ */
+static int qib_6120_bringup_serdes(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 val, config1, prev_val, hwstat, ibc;
+
+ /* Put IBC in reset, sends disabled */
+ dd->control &= ~QLOGIC_IB_C_LINKENABLE;
+ qib_write_kreg(dd, kr_control, 0ULL);
+
+ dd->cspec->ibdeltainprog = 1;
+ dd->cspec->ibsymsnap = read_6120_creg32(dd, cr_ibsymbolerr);
+ dd->cspec->iblnkerrsnap = read_6120_creg32(dd, cr_iblinkerrrecov);
+
+ /* flowcontrolwatermark is in units of KBytes */
+ ibc = 0x5ULL << SYM_LSB(IBCCtrl, FlowCtrlWaterMark);
+ /*
+ * How often flowctrl sent. More or less in usecs; balance against
+ * watermark value, so that in theory senders always get a flow
+ * control update in time to not let the IB link go idle.
+ */
+ ibc |= 0x3ULL << SYM_LSB(IBCCtrl, FlowCtrlPeriod);
+ /* max error tolerance */
+ dd->cspec->lli_thresh = 0xf;
+ ibc |= (u64) dd->cspec->lli_thresh << SYM_LSB(IBCCtrl, PhyerrThreshold);
+ /* use "real" buffer space for */
+ ibc |= 4ULL << SYM_LSB(IBCCtrl, CreditScale);
+ /* IB credit flow control. */
+ ibc |= 0xfULL << SYM_LSB(IBCCtrl, OverrunThreshold);
+ /*
+ * set initial max size pkt IBC will send, including ICRC; it's the
+ * PIO buffer size in dwords, less 1; also see qib_set_mtu()
+ */
+ ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << SYM_LSB(IBCCtrl, MaxPktLen);
+ dd->cspec->ibcctrl = ibc; /* without linkcmd or linkinitcmd! */
+
+ /* initially come up waiting for TS1, without sending anything. */
+ val = dd->cspec->ibcctrl | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
+ QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
+ qib_write_kreg(dd, kr_ibcctrl, val);
+
+ val = qib_read_kreg64(dd, kr_serdes_cfg0);
+ config1 = qib_read_kreg64(dd, kr_serdes_cfg1);
+
+ /*
+ * Force reset on, also set rxdetect enable. Must do before reading
+ * serdesstatus at least for simulation, or some of the bits in
+ * serdes status will come back as undefined and cause simulation
+ * failures
+ */
+ val |= SYM_MASK(SerdesCfg0, ResetPLL) |
+ SYM_MASK(SerdesCfg0, RxDetEnX) |
+ (SYM_MASK(SerdesCfg0, L1PwrDnA) |
+ SYM_MASK(SerdesCfg0, L1PwrDnB) |
+ SYM_MASK(SerdesCfg0, L1PwrDnC) |
+ SYM_MASK(SerdesCfg0, L1PwrDnD));
+ qib_write_kreg(dd, kr_serdes_cfg0, val);
+ /* be sure chip saw it */
+ qib_read_kreg64(dd, kr_scratch);
+ udelay(5); /* need pll reset set at least for a bit */
+ /*
+ * after PLL is reset, set the per-lane Resets and TxIdle and
+ * clear the PLL reset and rxdetect (to get falling edge).
+ * Leave L1PWR bits set (permanently)
+ */
+ val &= ~(SYM_MASK(SerdesCfg0, RxDetEnX) |
+ SYM_MASK(SerdesCfg0, ResetPLL) |
+ (SYM_MASK(SerdesCfg0, L1PwrDnA) |
+ SYM_MASK(SerdesCfg0, L1PwrDnB) |
+ SYM_MASK(SerdesCfg0, L1PwrDnC) |
+ SYM_MASK(SerdesCfg0, L1PwrDnD)));
+ val |= (SYM_MASK(SerdesCfg0, ResetA) |
+ SYM_MASK(SerdesCfg0, ResetB) |
+ SYM_MASK(SerdesCfg0, ResetC) |
+ SYM_MASK(SerdesCfg0, ResetD)) |
+ SYM_MASK(SerdesCfg0, TxIdeEnX);
+ qib_write_kreg(dd, kr_serdes_cfg0, val);
+ /* be sure chip saw it */
+ (void) qib_read_kreg64(dd, kr_scratch);
+ /* need PLL reset clear for at least 11 usec before lane
+ * resets cleared; give it a few more to be sure */
+ udelay(15);
+ val &= ~((SYM_MASK(SerdesCfg0, ResetA) |
+ SYM_MASK(SerdesCfg0, ResetB) |
+ SYM_MASK(SerdesCfg0, ResetC) |
+ SYM_MASK(SerdesCfg0, ResetD)) |
+ SYM_MASK(SerdesCfg0, TxIdeEnX));
+
+ qib_write_kreg(dd, kr_serdes_cfg0, val);
+ /* be sure chip saw it */
+ (void) qib_read_kreg64(dd, kr_scratch);
+
+ val = qib_read_kreg64(dd, kr_xgxs_cfg);
+ prev_val = val;
+ if (val & QLOGIC_IB_XGXS_RESET)
+ val &= ~QLOGIC_IB_XGXS_RESET;
+ if (SYM_FIELD(val, XGXSCfg, polarity_inv) != ppd->rx_pol_inv) {
+ /* need to compensate for Tx inversion in partner */
+ val &= ~SYM_MASK(XGXSCfg, polarity_inv);
+ val |= (u64)ppd->rx_pol_inv << SYM_LSB(XGXSCfg, polarity_inv);
+ }
+ if (val != prev_val)
+ qib_write_kreg(dd, kr_xgxs_cfg, val);
+
+ val = qib_read_kreg64(dd, kr_serdes_cfg0);
+
+ /* clear current and de-emphasis bits */
+ config1 &= ~0x0ffffffff00ULL;
+ /* set current to 20ma */
+ config1 |= 0x00000000000ULL;
+ /* set de-emphasis to -5.68dB */
+ config1 |= 0x0cccc000000ULL;
+ qib_write_kreg(dd, kr_serdes_cfg1, config1);
+
+ /* base and port guid same for single port */
+ ppd->guid = dd->base_guid;
+
+ /*
+ * the process of setting and un-resetting the serdes normally
+ * causes a serdes PLL error, so check for that and clear it
+ * here. Also clearr hwerr bit in errstatus, but not others.
+ */
+ hwstat = qib_read_kreg64(dd, kr_hwerrstatus);
+ if (hwstat) {
+ /* should just have PLL, clear all set, in an case */
+ qib_write_kreg(dd, kr_hwerrclear, hwstat);
+ qib_write_kreg(dd, kr_errclear, ERR_MASK(HardwareErr));
+ }
+
+ dd->control |= QLOGIC_IB_C_LINKENABLE;
+ dd->control &= ~QLOGIC_IB_C_FREEZEMODE;
+ qib_write_kreg(dd, kr_control, dd->control);
+
+ return 0;
+}
+
+/**
+ * qib_6120_quiet_serdes - set serdes to txidle
+ * @ppd: physical port of the qlogic_ib device
+ * Called when driver is being unloaded
+ */
+static void qib_6120_quiet_serdes(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 val;
+
+ qib_set_ib_6120_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+
+ /* disable IBC */
+ dd->control &= ~QLOGIC_IB_C_LINKENABLE;
+ qib_write_kreg(dd, kr_control,
+ dd->control | QLOGIC_IB_C_FREEZEMODE);
+
+ if (dd->cspec->ibsymdelta || dd->cspec->iblnkerrdelta ||
+ dd->cspec->ibdeltainprog) {
+ u64 diagc;
+
+ /* enable counter writes */
+ diagc = qib_read_kreg64(dd, kr_hwdiagctrl);
+ qib_write_kreg(dd, kr_hwdiagctrl,
+ diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable));
+
+ if (dd->cspec->ibsymdelta || dd->cspec->ibdeltainprog) {
+ val = read_6120_creg32(dd, cr_ibsymbolerr);
+ if (dd->cspec->ibdeltainprog)
+ val -= val - dd->cspec->ibsymsnap;
+ val -= dd->cspec->ibsymdelta;
+ write_6120_creg(dd, cr_ibsymbolerr, val);
+ }
+ if (dd->cspec->iblnkerrdelta || dd->cspec->ibdeltainprog) {
+ val = read_6120_creg32(dd, cr_iblinkerrrecov);
+ if (dd->cspec->ibdeltainprog)
+ val -= val - dd->cspec->iblnkerrsnap;
+ val -= dd->cspec->iblnkerrdelta;
+ write_6120_creg(dd, cr_iblinkerrrecov, val);
+ }
+
+ /* and disable counter writes */
+ qib_write_kreg(dd, kr_hwdiagctrl, diagc);
+ }
+
+ val = qib_read_kreg64(dd, kr_serdes_cfg0);
+ val |= SYM_MASK(SerdesCfg0, TxIdeEnX);
+ qib_write_kreg(dd, kr_serdes_cfg0, val);
+}
+
+/**
+ * qib_6120_setup_setextled - set the state of the two external LEDs
+ * @dd: the qlogic_ib device
+ * @on: whether the link is up or not
+ *
+ * The exact combo of LEDs if on is true is determined by looking
+ * at the ibcstatus.
+
+ * These LEDs indicate the physical and logical state of IB link.
+ * For this chip (at least with recommended board pinouts), LED1
+ * is Yellow (logical state) and LED2 is Green (physical state),
+ *
+ * Note: We try to match the Mellanox HCA LED behavior as best
+ * we can. Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate. That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void qib_6120_setup_setextled(struct qib_pportdata *ppd, u32 on)
+{
+ u64 extctl, val, lst, ltst;
+ unsigned long flags;
+ struct qib_devdata *dd = ppd->dd;
+
+ /*
+ * The diags use the LED to indicate diag info, so we leave
+ * the external LED alone when the diags are running.
+ */
+ if (dd->diag_client)
+ return;
+
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (ppd->led_override) {
+ ltst = (ppd->led_override & QIB_LED_PHYS) ?
+ IB_PHYSPORTSTATE_LINKUP : IB_PHYSPORTSTATE_DISABLED,
+ lst = (ppd->led_override & QIB_LED_LOG) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ } else if (on) {
+ val = qib_read_kreg64(dd, kr_ibcstatus);
+ ltst = qib_6120_phys_portstate(val);
+ lst = qib_6120_iblink_state(val);
+ } else {
+ ltst = 0;
+ lst = 0;
+ }
+
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ extctl = dd->cspec->extctrl & ~(SYM_MASK(EXTCtrl, LEDPriPortGreenOn) |
+ SYM_MASK(EXTCtrl, LEDPriPortYellowOn));
+
+ if (ltst == IB_PHYSPORTSTATE_LINKUP)
+ extctl |= SYM_MASK(EXTCtrl, LEDPriPortYellowOn);
+ if (lst == IB_PORT_ACTIVE)
+ extctl |= SYM_MASK(EXTCtrl, LEDPriPortGreenOn);
+ dd->cspec->extctrl = extctl;
+ qib_write_kreg(dd, kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+}
+
+static void qib_6120_free_irq(struct qib_devdata *dd)
+{
+ if (dd->cspec->irq) {
+ free_irq(dd->cspec->irq, dd);
+ dd->cspec->irq = 0;
+ }
+ qib_nomsi(dd);
+}
+
+/**
+ * qib_6120_setup_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the qlogic_ib device
+ *
+ * This is called during driver unload.
+*/
+static void qib_6120_setup_cleanup(struct qib_devdata *dd)
+{
+ qib_6120_free_irq(dd);
+ kfree(dd->cspec->cntrs);
+ kfree(dd->cspec->portcntrs);
+ if (dd->cspec->dummy_hdrq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ ALIGN(dd->rcvhdrcnt *
+ dd->rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE),
+ dd->cspec->dummy_hdrq,
+ dd->cspec->dummy_hdrq_phys);
+ dd->cspec->dummy_hdrq = NULL;
+ }
+}
+
+static void qib_wantpiobuf_6120_intr(struct qib_devdata *dd, u32 needint)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ if (needint)
+ dd->sendctrl |= SYM_MASK(SendCtrl, PIOIntBufAvail);
+ else
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOIntBufAvail);
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+}
+
+/*
+ * handle errors and unusual events first, separate function
+ * to improve cache hits for fast path interrupt handling
+ */
+static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat)
+{
+ if (unlikely(istat & ~QLOGIC_IB_I_BITSEXTANT))
+ qib_dev_err(dd, "interrupt with unknown interrupts %Lx set\n",
+ istat & ~QLOGIC_IB_I_BITSEXTANT);
+
+ if (istat & QLOGIC_IB_I_ERROR) {
+ u64 estat = 0;
+
+ qib_stats.sps_errints++;
+ estat = qib_read_kreg64(dd, kr_errstatus);
+ if (!estat)
+ qib_devinfo(dd->pcidev,
+ "error interrupt (%Lx), but no error bits set!\n",
+ istat);
+ handle_6120_errors(dd, estat);
+ }
+
+ if (istat & QLOGIC_IB_I_GPIO) {
+ u32 gpiostatus;
+ u32 to_clear = 0;
+
+ /*
+ * GPIO_3..5 on IBA6120 Rev2 chips indicate
+ * errors that we need to count.
+ */
+ gpiostatus = qib_read_kreg32(dd, kr_gpio_status);
+ /* First the error-counter case. */
+ if (gpiostatus & GPIO_ERRINTR_MASK) {
+ /* want to clear the bits we see asserted. */
+ to_clear |= (gpiostatus & GPIO_ERRINTR_MASK);
+
+ /*
+ * Count appropriately, clear bits out of our copy,
+ * as they have been "handled".
+ */
+ if (gpiostatus & (1 << GPIO_RXUVL_BIT))
+ dd->cspec->rxfc_unsupvl_errs++;
+ if (gpiostatus & (1 << GPIO_OVRUN_BIT))
+ dd->cspec->overrun_thresh_errs++;
+ if (gpiostatus & (1 << GPIO_LLI_BIT))
+ dd->cspec->lli_errs++;
+ gpiostatus &= ~GPIO_ERRINTR_MASK;
+ }
+ if (gpiostatus) {
+ /*
+ * Some unexpected bits remain. If they could have
+ * caused the interrupt, complain and clear.
+ * To avoid repetition of this condition, also clear
+ * the mask. It is almost certainly due to error.
+ */
+ const u32 mask = qib_read_kreg32(dd, kr_gpio_mask);
+
+ /*
+ * Also check that the chip reflects our shadow,
+ * and report issues, If they caused the interrupt.
+ * we will suppress by refreshing from the shadow.
+ */
+ if (mask & gpiostatus) {
+ to_clear |= (gpiostatus & mask);
+ dd->cspec->gpio_mask &= ~(gpiostatus & mask);
+ qib_write_kreg(dd, kr_gpio_mask,
+ dd->cspec->gpio_mask);
+ }
+ }
+ if (to_clear)
+ qib_write_kreg(dd, kr_gpio_clear, (u64) to_clear);
+ }
+}
+
+static irqreturn_t qib_6120intr(int irq, void *data)
+{
+ struct qib_devdata *dd = data;
+ irqreturn_t ret;
+ u32 istat, ctxtrbits, rmask, crcs = 0;
+ unsigned i;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) {
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ ret = IRQ_HANDLED;
+ goto bail;
+ }
+
+ istat = qib_read_kreg32(dd, kr_intstatus);
+
+ if (unlikely(!istat)) {
+ ret = IRQ_NONE; /* not our interrupt, or already handled */
+ goto bail;
+ }
+ if (unlikely(istat == -1)) {
+ qib_bad_intrstatus(dd);
+ /* don't know if it was our interrupt or not */
+ ret = IRQ_NONE;
+ goto bail;
+ }
+
+ this_cpu_inc(*dd->int_counter);
+
+ if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
+ QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
+ unlikely_6120_intr(dd, istat);
+
+ /*
+ * Clear the interrupt bits we found set, relatively early, so we
+ * "know" know the chip will have seen this by the time we process
+ * the queue, and will re-interrupt if necessary. The processor
+ * itself won't take the interrupt again until we return.
+ */
+ qib_write_kreg(dd, kr_intclear, istat);
+
+ /*
+ * Handle kernel receive queues before checking for pio buffers
+ * available since receives can overflow; piobuf waiters can afford
+ * a few extra cycles, since they were waiting anyway.
+ */
+ ctxtrbits = istat &
+ ((QLOGIC_IB_I_RCVAVAIL_MASK << QLOGIC_IB_I_RCVAVAIL_SHIFT) |
+ (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT));
+ if (ctxtrbits) {
+ rmask = (1U << QLOGIC_IB_I_RCVAVAIL_SHIFT) |
+ (1U << QLOGIC_IB_I_RCVURG_SHIFT);
+ for (i = 0; i < dd->first_user_ctxt; i++) {
+ if (ctxtrbits & rmask) {
+ ctxtrbits &= ~rmask;
+ crcs += qib_kreceive(dd->rcd[i],
+ &dd->cspec->lli_counter,
+ NULL);
+ }
+ rmask <<= 1;
+ }
+ if (crcs) {
+ u32 cntr = dd->cspec->lli_counter;
+ cntr += crcs;
+ if (cntr) {
+ if (cntr > dd->cspec->lli_thresh) {
+ dd->cspec->lli_counter = 0;
+ dd->cspec->lli_errs++;
+ } else
+ dd->cspec->lli_counter += cntr;
+ }
+ }
+
+
+ if (ctxtrbits) {
+ ctxtrbits =
+ (ctxtrbits >> QLOGIC_IB_I_RCVAVAIL_SHIFT) |
+ (ctxtrbits >> QLOGIC_IB_I_RCVURG_SHIFT);
+ qib_handle_urcv(dd, ctxtrbits);
+ }
+ }
+
+ if ((istat & QLOGIC_IB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED))
+ qib_ib_piobufavail(dd);
+
+ ret = IRQ_HANDLED;
+bail:
+ return ret;
+}
+
+/*
+ * Set up our chip-specific interrupt handler
+ * The interrupt type has already been setup, so
+ * we just need to do the registration and error checking.
+ */
+static void qib_setup_6120_interrupt(struct qib_devdata *dd)
+{
+ /*
+ * If the chip supports added error indication via GPIO pins,
+ * enable interrupts on those bits so the interrupt routine
+ * can count the events. Also set flag so interrupt routine
+ * can know they are expected.
+ */
+ if (SYM_FIELD(dd->revision, Revision_R,
+ ChipRevMinor) > 1) {
+ /* Rev2+ reports extra errors via internal GPIO pins */
+ dd->cspec->gpio_mask |= GPIO_ERRINTR_MASK;
+ qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
+ }
+
+ if (!dd->cspec->irq)
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
+ else {
+ int ret;
+ ret = request_irq(dd->cspec->irq, qib_6120intr, 0,
+ QIB_DRV_NAME, dd);
+ if (ret)
+ qib_dev_err(dd,
+ "Couldn't setup interrupt (irq=%d): %d\n",
+ dd->cspec->irq, ret);
+ }
+}
+
+/**
+ * pe_boardname - fill in the board name
+ * @dd: the qlogic_ib device
+ *
+ * info is based on the board revision register
+ */
+static void pe_boardname(struct qib_devdata *dd)
+{
+ char *n;
+ u32 boardid, namelen;
+
+ boardid = SYM_FIELD(dd->revision, Revision,
+ BoardID);
+
+ switch (boardid) {
+ case 2:
+ n = "InfiniPath_QLE7140";
+ break;
+ default:
+ qib_dev_err(dd, "Unknown 6120 board with ID %u\n", boardid);
+ n = "Unknown_InfiniPath_6120";
+ break;
+ }
+ namelen = strlen(n) + 1;
+ dd->boardname = kmalloc(namelen, GFP_KERNEL);
+ if (!dd->boardname)
+ qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
+ else
+ snprintf(dd->boardname, namelen, "%s", n);
+
+ if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2)
+ qib_dev_err(dd,
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->majrev, dd->minrev);
+
+ snprintf(dd->boardversion, sizeof(dd->boardversion),
+ "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
+ QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname,
+ (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch),
+ dd->majrev, dd->minrev,
+ (unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
+
+}
+
+/*
+ * This routine sleeps, so it can only be called from user context, not
+ * from interrupt context. If we need interrupt context, we can split
+ * it into two routines.
+ */
+static int qib_6120_setup_reset(struct qib_devdata *dd)
+{
+ u64 val;
+ int i;
+ int ret;
+ u16 cmdval;
+ u8 int_line, clinesz;
+
+ qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz);
+
+ /* Use ERROR so it shows up in logs, etc. */
+ qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit);
+
+ /* no interrupts till re-initted */
+ qib_6120_set_intr_state(dd, 0);
+
+ dd->cspec->ibdeltainprog = 0;
+ dd->cspec->ibsymdelta = 0;
+ dd->cspec->iblnkerrdelta = 0;
+
+ /*
+ * Keep chip from being accessed until we are ready. Use
+ * writeq() directly, to allow the write even though QIB_PRESENT
+ * isn't set.
+ */
+ dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
+ val = dd->control | QLOGIC_IB_C_RESET;
+ writeq(val, &dd->kregbase[kr_control]);
+ mb(); /* prevent compiler re-ordering around actual reset */
+
+ for (i = 1; i <= 5; i++) {
+ /*
+ * Allow MBIST, etc. to complete; longer on each retry.
+ * We sometimes get machine checks from bus timeout if no
+ * response, so for now, make it *really* long.
+ */
+ msleep(1000 + (1 + i) * 2000);
+
+ qib_pcie_reenable(dd, cmdval, int_line, clinesz);
+
+ /*
+ * Use readq directly, so we don't need to mark it as PRESENT
+ * until we get a successful indication that all is well.
+ */
+ val = readq(&dd->kregbase[kr_revision]);
+ if (val == dd->revision) {
+ dd->flags |= QIB_PRESENT; /* it's back */
+ ret = qib_reinit_intr(dd);
+ goto bail;
+ }
+ }
+ ret = 0; /* failed */
+
+bail:
+ if (ret) {
+ if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
+ /* clear the reset error, init error/hwerror mask */
+ qib_6120_init_hwerrors(dd);
+ /* for Rev2 error interrupts; nop for rev 1 */
+ qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
+ /* clear the reset error, init error/hwerror mask */
+ qib_6120_init_hwerrors(dd);
+ }
+ return ret;
+}
+
+/**
+ * qib_6120_put_tid - write a TID in chip
+ * @dd: the qlogic_ib device
+ * @tidptr: pointer to the expected TID (in chip) to update
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0)
+ * for expected
+ * @pa: physical address of in memory buffer; tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for special locking etc.
+ * It's used for both the full cleanup on exit, as well as the normal
+ * setup and teardown.
+ */
+static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+ unsigned long flags;
+ int tidx;
+ spinlock_t *tidlockp; /* select appropriate spinlock */
+
+ if (!dd->kregbase)
+ return;
+
+ if (pa != dd->tidinvalid) {
+ if (pa & ((1U << 11) - 1)) {
+ qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n",
+ pa);
+ return;
+ }
+ pa >>= 11;
+ if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
+ return;
+ }
+
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ pa |= dd->tidtemplate;
+ else /* for now, always full 4KB page */
+ pa |= 2 << 29;
+ }
+
+ /*
+ * Avoid chip issue by writing the scratch register
+ * before and after the TID, and with an io write barrier.
+ * We use a spinlock around the writes, so they can't intermix
+ * with other TID (eager or expected) writes (the chip problem
+ * is triggered by back to back TID writes). Unfortunately, this
+ * call can be done from interrupt level for the ctxt 0 eager TIDs,
+ * so we have to use irqsave locks.
+ */
+ /*
+ * Assumes tidptr always > egrtidbase
+ * if type == RCVHQ_RCV_TYPE_EAGER.
+ */
+ tidx = tidptr - dd->egrtidbase;
+
+ tidlockp = (type == RCVHQ_RCV_TYPE_EAGER && tidx < dd->rcvhdrcnt)
+ ? &dd->cspec->kernel_tid_lock : &dd->cspec->user_tid_lock;
+ spin_lock_irqsave(tidlockp, flags);
+ qib_write_kreg(dd, kr_scratch, 0xfeeddeaf);
+ writel(pa, tidp32);
+ qib_write_kreg(dd, kr_scratch, 0xdeadbeef);
+ mmiowb();
+ spin_unlock_irqrestore(tidlockp, flags);
+}
+
+/**
+ * qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher
+ * @dd: the qlogic_ib device
+ * @tidptr: pointer to the expected TID (in chip) to update
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0)
+ * for expected
+ * @pa: physical address of in memory buffer; tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+ u32 tidx;
+
+ if (!dd->kregbase)
+ return;
+
+ if (pa != dd->tidinvalid) {
+ if (pa & ((1U << 11) - 1)) {
+ qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n",
+ pa);
+ return;
+ }
+ pa >>= 11;
+ if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
+ return;
+ }
+
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ pa |= dd->tidtemplate;
+ else /* for now, always full 4KB page */
+ pa |= 2 << 29;
+ }
+ tidx = tidptr - dd->egrtidbase;
+ writel(pa, tidp32);
+ mmiowb();
+}
+
+
+/**
+ * qib_6120_clear_tids - clear all TID entries for a context, expected and eager
+ * @dd: the qlogic_ib device
+ * @ctxt: the context
+ *
+ * clear all TID entries for a context, expected and eager.
+ * Used from qib_close(). On this chip, TIDs are only 32 bits,
+ * not 64, but they are still on 64 bit boundaries, so tidbase
+ * is declared as u64 * for the pointer math, even though we write 32 bits
+ */
+static void qib_6120_clear_tids(struct qib_devdata *dd,
+ struct qib_ctxtdata *rcd)
+{
+ u64 __iomem *tidbase;
+ unsigned long tidinv;
+ u32 ctxt;
+ int i;
+
+ if (!dd->kregbase || !rcd)
+ return;
+
+ ctxt = rcd->ctxt;
+
+ tidinv = dd->tidinvalid;
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->kregbase) +
+ dd->rcvtidbase +
+ ctxt * dd->rcvtidcnt * sizeof(*tidbase));
+
+ for (i = 0; i < dd->rcvtidcnt; i++)
+ /* use func pointer because could be one of two funcs */
+ dd->f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ tidinv);
+
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->kregbase) +
+ dd->rcvegrbase +
+ rcd->rcvegr_tid_base * sizeof(*tidbase));
+
+ for (i = 0; i < rcd->rcvegrcnt; i++)
+ /* use func pointer because could be one of two funcs */
+ dd->f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ tidinv);
+}
+
+/**
+ * qib_6120_tidtemplate - setup constants for TID updates
+ * @dd: the qlogic_ib device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void qib_6120_tidtemplate(struct qib_devdata *dd)
+{
+ u32 egrsize = dd->rcvegrbufsize;
+
+ /*
+ * For now, we always allocate 4KB buffers (at init) so we can
+ * receive max size packets. We may want a module parameter to
+ * specify 2KB or 4KB and/or make be per ctxt instead of per device
+ * for those who want to reduce memory footprint. Note that the
+ * rcvhdrentsize size must be large enough to hold the largest
+ * IB header (currently 96 bytes) that we expect to handle (plus of
+ * course the 2 dwords of RHF).
+ */
+ if (egrsize == 2048)
+ dd->tidtemplate = 1U << 29;
+ else if (egrsize == 4096)
+ dd->tidtemplate = 2U << 29;
+ dd->tidinvalid = 0;
+}
+
+int __attribute__((weak)) qib_unordered_wc(void)
+{
+ return 0;
+}
+
+/**
+ * qib_6120_get_base_info - set chip-specific flags for user code
+ * @rcd: the qlogic_ib ctxt
+ * @kbase: qib_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithms.
+ */
+static int qib_6120_get_base_info(struct qib_ctxtdata *rcd,
+ struct qib_base_info *kinfo)
+{
+ if (qib_unordered_wc())
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_FORCE_WC_ORDER;
+
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_PCIE |
+ QIB_RUNTIME_FORCE_PIOAVAIL | QIB_RUNTIME_PIO_REGSWAPPED;
+ return 0;
+}
+
+
+static struct qib_message_header *
+qib_6120_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr)
+{
+ return (struct qib_message_header *)
+ &rhf_addr[sizeof(u64) / sizeof(u32)];
+}
+
+static void qib_6120_config_ctxts(struct qib_devdata *dd)
+{
+ dd->ctxtcnt = qib_read_kreg32(dd, kr_portcnt);
+ if (qib_n_krcv_queues > 1) {
+ dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
+ if (dd->first_user_ctxt > dd->ctxtcnt)
+ dd->first_user_ctxt = dd->ctxtcnt;
+ dd->qpn_mask = dd->first_user_ctxt <= 2 ? 2 : 6;
+ } else
+ dd->first_user_ctxt = dd->num_pports;
+ dd->n_krcv_queues = dd->first_user_ctxt;
+}
+
+static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd,
+ u32 updegr, u32 egrhd, u32 npkts)
+{
+ if (updegr)
+ qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
+ mmiowb();
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ mmiowb();
+}
+
+static u32 qib_6120_hdrqempty(struct qib_ctxtdata *rcd)
+{
+ u32 head, tail;
+
+ head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt);
+ if (rcd->rcvhdrtail_kvaddr)
+ tail = qib_get_rcvhdrtail(rcd);
+ else
+ tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt);
+ return head == tail;
+}
+
+/*
+ * Used when we close any ctxt, for DMA already in flight
+ * at close. Can't be done until we know hdrq size, so not
+ * early in chip init.
+ */
+static void alloc_dummy_hdrq(struct qib_devdata *dd)
+{
+ dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev,
+ dd->rcd[0]->rcvhdrq_size,
+ &dd->cspec->dummy_hdrq_phys,
+ GFP_ATOMIC | __GFP_COMP);
+ if (!dd->cspec->dummy_hdrq) {
+ qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n");
+ /* fallback to just 0'ing */
+ dd->cspec->dummy_hdrq_phys = 0UL;
+ }
+}
+
+/*
+ * Modify the RCVCTRL register in chip-specific way. This
+ * is a function because bit positions and (future) register
+ * location is chip-specific, but the needed operations are
+ * generic. <op> is a bit-mask because we often want to
+ * do multiple modifications.
+ */
+static void rcvctrl_6120_mod(struct qib_pportdata *ppd, unsigned int op,
+ int ctxt)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 mask, val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
+
+ if (op & QIB_RCVCTRL_TAILUPD_ENB)
+ dd->rcvctrl |= (1ULL << QLOGIC_IB_R_TAILUPD_SHIFT);
+ if (op & QIB_RCVCTRL_TAILUPD_DIS)
+ dd->rcvctrl &= ~(1ULL << QLOGIC_IB_R_TAILUPD_SHIFT);
+ if (op & QIB_RCVCTRL_PKEY_ENB)
+ dd->rcvctrl &= ~(1ULL << IBA6120_R_PKEY_DIS_SHIFT);
+ if (op & QIB_RCVCTRL_PKEY_DIS)
+ dd->rcvctrl |= (1ULL << IBA6120_R_PKEY_DIS_SHIFT);
+ if (ctxt < 0)
+ mask = (1ULL << dd->ctxtcnt) - 1;
+ else
+ mask = (1ULL << ctxt);
+ if (op & QIB_RCVCTRL_CTXT_ENB) {
+ /* always done for specific ctxt */
+ dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, PortEnable));
+ if (!(dd->flags & QIB_NODMA_RTAIL))
+ dd->rcvctrl |= 1ULL << QLOGIC_IB_R_TAILUPD_SHIFT;
+ /* Write these registers before the context is enabled. */
+ qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt,
+ dd->rcd[ctxt]->rcvhdrqtailaddr_phys);
+ qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt,
+ dd->rcd[ctxt]->rcvhdrq_phys);
+
+ if (ctxt == 0 && !dd->cspec->dummy_hdrq)
+ alloc_dummy_hdrq(dd);
+ }
+ if (op & QIB_RCVCTRL_CTXT_DIS)
+ dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, PortEnable));
+ if (op & QIB_RCVCTRL_INTRAVAIL_ENB)
+ dd->rcvctrl |= (mask << QLOGIC_IB_R_INTRAVAIL_SHIFT);
+ if (op & QIB_RCVCTRL_INTRAVAIL_DIS)
+ dd->rcvctrl &= ~(mask << QLOGIC_IB_R_INTRAVAIL_SHIFT);
+ qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl);
+ if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && dd->rhdrhead_intr_off) {
+ /* arm rcv interrupt */
+ val = qib_read_ureg32(dd, ur_rcvhdrhead, ctxt) |
+ dd->rhdrhead_intr_off;
+ qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt);
+ }
+ if (op & QIB_RCVCTRL_CTXT_ENB) {
+ /*
+ * Init the context registers also; if we were
+ * disabled, tail and head should both be zero
+ * already from the enable, but since we don't
+ * know, we have to do it explicitly.
+ */
+ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt);
+ qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt);
+
+ val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt);
+ dd->rcd[ctxt]->head = val;
+ /* If kctxt, interrupt on next receive. */
+ if (ctxt < dd->first_user_ctxt)
+ val |= dd->rhdrhead_intr_off;
+ qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt);
+ }
+ if (op & QIB_RCVCTRL_CTXT_DIS) {
+ /*
+ * Be paranoid, and never write 0's to these, just use an
+ * unused page. Of course,
+ * rcvhdraddr points to a large chunk of memory, so this
+ * could still trash things, but at least it won't trash
+ * page 0, and by disabling the ctxt, it should stop "soon",
+ * even if a packet or two is in already in flight after we
+ * disabled the ctxt. Only 6120 has this issue.
+ */
+ if (ctxt >= 0) {
+ qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt,
+ dd->cspec->dummy_hdrq_phys);
+ qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt,
+ dd->cspec->dummy_hdrq_phys);
+ } else {
+ unsigned i;
+
+ for (i = 0; i < dd->cfgctxts; i++) {
+ qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr,
+ i, dd->cspec->dummy_hdrq_phys);
+ qib_write_kreg_ctxt(dd, kr_rcvhdraddr,
+ i, dd->cspec->dummy_hdrq_phys);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
+}
+
+/*
+ * Modify the SENDCTRL register in chip-specific way. This
+ * is a function there may be multiple such registers with
+ * slightly different layouts. Only operations actually used
+ * are implemented yet.
+ * Chip requires no back-back sendctrl writes, so write
+ * scratch register after writing sendctrl
+ */
+static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 tmp_dd_sendctrl;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+
+ /* First the ones that are "sticky", saved in shadow */
+ if (op & QIB_SENDCTRL_CLEAR)
+ dd->sendctrl = 0;
+ if (op & QIB_SENDCTRL_SEND_DIS)
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOEnable);
+ else if (op & QIB_SENDCTRL_SEND_ENB)
+ dd->sendctrl |= SYM_MASK(SendCtrl, PIOEnable);
+ if (op & QIB_SENDCTRL_AVAIL_DIS)
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOBufAvailUpd);
+ else if (op & QIB_SENDCTRL_AVAIL_ENB)
+ dd->sendctrl |= SYM_MASK(SendCtrl, PIOBufAvailUpd);
+
+ if (op & QIB_SENDCTRL_DISARM_ALL) {
+ u32 i, last;
+
+ tmp_dd_sendctrl = dd->sendctrl;
+ /*
+ * disarm any that are not yet launched, disabling sends
+ * and updates until done.
+ */
+ last = dd->piobcnt2k + dd->piobcnt4k;
+ tmp_dd_sendctrl &=
+ ~(SYM_MASK(SendCtrl, PIOEnable) |
+ SYM_MASK(SendCtrl, PIOBufAvailUpd));
+ for (i = 0; i < last; i++) {
+ qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl |
+ SYM_MASK(SendCtrl, Disarm) | i);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+ }
+
+ tmp_dd_sendctrl = dd->sendctrl;
+
+ if (op & QIB_SENDCTRL_FLUSH)
+ tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Abort);
+ if (op & QIB_SENDCTRL_DISARM)
+ tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) |
+ ((op & QIB_6120_SendCtrl_DisarmPIOBuf_RMASK) <<
+ SYM_LSB(SendCtrl, DisarmPIOBuf));
+ if (op & QIB_SENDCTRL_AVAIL_BLIP)
+ tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, PIOBufAvailUpd);
+
+ qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ if (op & QIB_SENDCTRL_AVAIL_BLIP) {
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+
+ if (op & QIB_SENDCTRL_FLUSH) {
+ u32 v;
+ /*
+ * ensure writes have hit chip, then do a few
+ * more reads, to allow DMA of pioavail registers
+ * to occur, so in-memory copy is in sync with
+ * the chip. Not always safe to sleep.
+ */
+ v = qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_scratch, v);
+ v = qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_scratch, v);
+ qib_read_kreg32(dd, kr_scratch);
+ }
+}
+
+/**
+ * qib_portcntr_6120 - read a per-port counter
+ * @dd: the qlogic_ib device
+ * @creg: the counter to snapshot
+ */
+static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg)
+{
+ u64 ret = 0ULL;
+ struct qib_devdata *dd = ppd->dd;
+ u16 creg;
+ /* 0xffff for unimplemented or synthesized counters */
+ static const u16 xlator[] = {
+ [QIBPORTCNTR_PKTSEND] = cr_pktsend,
+ [QIBPORTCNTR_WORDSEND] = cr_wordsend,
+ [QIBPORTCNTR_PSXMITDATA] = 0xffff,
+ [QIBPORTCNTR_PSXMITPKTS] = 0xffff,
+ [QIBPORTCNTR_PSXMITWAIT] = 0xffff,
+ [QIBPORTCNTR_SENDSTALL] = cr_sendstall,
+ [QIBPORTCNTR_PKTRCV] = cr_pktrcv,
+ [QIBPORTCNTR_PSRCVDATA] = 0xffff,
+ [QIBPORTCNTR_PSRCVPKTS] = 0xffff,
+ [QIBPORTCNTR_RCVEBP] = cr_rcvebp,
+ [QIBPORTCNTR_RCVOVFL] = cr_rcvovfl,
+ [QIBPORTCNTR_WORDRCV] = cr_wordrcv,
+ [QIBPORTCNTR_RXDROPPKT] = cr_rxdroppkt,
+ [QIBPORTCNTR_RXLOCALPHYERR] = 0xffff,
+ [QIBPORTCNTR_RXVLERR] = 0xffff,
+ [QIBPORTCNTR_ERRICRC] = cr_erricrc,
+ [QIBPORTCNTR_ERRVCRC] = cr_errvcrc,
+ [QIBPORTCNTR_ERRLPCRC] = cr_errlpcrc,
+ [QIBPORTCNTR_BADFORMAT] = cr_badformat,
+ [QIBPORTCNTR_ERR_RLEN] = cr_err_rlen,
+ [QIBPORTCNTR_IBSYMBOLERR] = cr_ibsymbolerr,
+ [QIBPORTCNTR_INVALIDRLEN] = cr_invalidrlen,
+ [QIBPORTCNTR_UNSUPVL] = cr_txunsupvl,
+ [QIBPORTCNTR_EXCESSBUFOVFL] = 0xffff,
+ [QIBPORTCNTR_ERRLINK] = cr_errlink,
+ [QIBPORTCNTR_IBLINKDOWN] = cr_iblinkdown,
+ [QIBPORTCNTR_IBLINKERRRECOV] = cr_iblinkerrrecov,
+ [QIBPORTCNTR_LLI] = 0xffff,
+ [QIBPORTCNTR_PSINTERVAL] = 0xffff,
+ [QIBPORTCNTR_PSSTART] = 0xffff,
+ [QIBPORTCNTR_PSSTAT] = 0xffff,
+ [QIBPORTCNTR_VL15PKTDROP] = 0xffff,
+ [QIBPORTCNTR_ERRPKEY] = cr_errpkey,
+ [QIBPORTCNTR_KHDROVFL] = 0xffff,
+ };
+
+ if (reg >= ARRAY_SIZE(xlator)) {
+ qib_devinfo(ppd->dd->pcidev,
+ "Unimplemented portcounter %u\n", reg);
+ goto done;
+ }
+ creg = xlator[reg];
+
+ /* handle counters requests not implemented as chip counters */
+ if (reg == QIBPORTCNTR_LLI)
+ ret = dd->cspec->lli_errs;
+ else if (reg == QIBPORTCNTR_EXCESSBUFOVFL)
+ ret = dd->cspec->overrun_thresh_errs;
+ else if (reg == QIBPORTCNTR_KHDROVFL) {
+ int i;
+
+ /* sum over all kernel contexts */
+ for (i = 0; i < dd->first_user_ctxt; i++)
+ ret += read_6120_creg32(dd, cr_portovfl + i);
+ } else if (reg == QIBPORTCNTR_PSSTAT)
+ ret = dd->cspec->pma_sample_status;
+ if (creg == 0xffff)
+ goto done;
+
+ /*
+ * only fast incrementing counters are 64bit; use 32 bit reads to
+ * avoid two independent reads when on opteron
+ */
+ if (creg == cr_wordsend || creg == cr_wordrcv ||
+ creg == cr_pktsend || creg == cr_pktrcv)
+ ret = read_6120_creg(dd, creg);
+ else
+ ret = read_6120_creg32(dd, creg);
+ if (creg == cr_ibsymbolerr) {
+ if (dd->cspec->ibdeltainprog)
+ ret -= ret - dd->cspec->ibsymsnap;
+ ret -= dd->cspec->ibsymdelta;
+ } else if (creg == cr_iblinkerrrecov) {
+ if (dd->cspec->ibdeltainprog)
+ ret -= ret - dd->cspec->iblnkerrsnap;
+ ret -= dd->cspec->iblnkerrdelta;
+ }
+ if (reg == QIBPORTCNTR_RXDROPPKT) /* add special cased count */
+ ret += dd->cspec->rxfc_unsupvl_errs;
+
+done:
+ return ret;
+}
+
+/*
+ * Device counter names (not port-specific), one line per stat,
+ * single string. Used by utilities like ipathstats to print the stats
+ * in a way which works for different versions of drivers, without changing
+ * the utility. Names need to be 12 chars or less (w/o newline), for proper
+ * display by utility.
+ * Non-error counters are first.
+ * Start of "error" conters is indicated by a leading "E " on the first
+ * "error" counter, and doesn't count in label length.
+ * The EgrOvfl list needs to be last so we truncate them at the configured
+ * context count for the device.
+ * cntr6120indices contains the corresponding register indices.
+ */
+static const char cntr6120names[] =
+ "Interrupts\n"
+ "HostBusStall\n"
+ "E RxTIDFull\n"
+ "RxTIDInvalid\n"
+ "Ctxt0EgrOvfl\n"
+ "Ctxt1EgrOvfl\n"
+ "Ctxt2EgrOvfl\n"
+ "Ctxt3EgrOvfl\n"
+ "Ctxt4EgrOvfl\n";
+
+static const size_t cntr6120indices[] = {
+ cr_lbint,
+ cr_lbflowstall,
+ cr_errtidfull,
+ cr_errtidvalid,
+ cr_portovfl + 0,
+ cr_portovfl + 1,
+ cr_portovfl + 2,
+ cr_portovfl + 3,
+ cr_portovfl + 4,
+};
+
+/*
+ * same as cntr6120names and cntr6120indices, but for port-specific counters.
+ * portcntr6120indices is somewhat complicated by some registers needing
+ * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG
+ */
+static const char portcntr6120names[] =
+ "TxPkt\n"
+ "TxFlowPkt\n"
+ "TxWords\n"
+ "RxPkt\n"
+ "RxFlowPkt\n"
+ "RxWords\n"
+ "TxFlowStall\n"
+ "E IBStatusChng\n"
+ "IBLinkDown\n"
+ "IBLnkRecov\n"
+ "IBRxLinkErr\n"
+ "IBSymbolErr\n"
+ "RxLLIErr\n"
+ "RxBadFormat\n"
+ "RxBadLen\n"
+ "RxBufOvrfl\n"
+ "RxEBP\n"
+ "RxFlowCtlErr\n"
+ "RxICRCerr\n"
+ "RxLPCRCerr\n"
+ "RxVCRCerr\n"
+ "RxInvalLen\n"
+ "RxInvalPKey\n"
+ "RxPktDropped\n"
+ "TxBadLength\n"
+ "TxDropped\n"
+ "TxInvalLen\n"
+ "TxUnderrun\n"
+ "TxUnsupVL\n"
+ ;
+
+#define _PORT_VIRT_FLAG 0x8000 /* "virtual", need adjustments */
+static const size_t portcntr6120indices[] = {
+ QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG,
+ cr_pktsendflow,
+ QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG,
+ cr_pktrcvflowctrl,
+ QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG,
+ cr_ibstatuschange,
+ QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_LLI | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG,
+ cr_rcvflowctrl_err,
+ QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG,
+ cr_invalidslen,
+ cr_senddropped,
+ cr_errslen,
+ cr_sendunderrun,
+ cr_txunsupvl,
+};
+
+/* do all the setup to make the counter reads efficient later */
+static void init_6120_cntrnames(struct qib_devdata *dd)
+{
+ int i, j = 0;
+ char *s;
+
+ for (i = 0, s = (char *)cntr6120names; s && j <= dd->cfgctxts;
+ i++) {
+ /* we always have at least one counter before the egrovfl */
+ if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12))
+ j = 1;
+ s = strchr(s + 1, '\n');
+ if (s && j)
+ j++;
+ }
+ dd->cspec->ncntrs = i;
+ if (!s)
+ /* full list; size is without terminating null */
+ dd->cspec->cntrnamelen = sizeof(cntr6120names) - 1;
+ else
+ dd->cspec->cntrnamelen = 1 + s - cntr6120names;
+ dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
+ * sizeof(u64), GFP_KERNEL);
+ if (!dd->cspec->cntrs)
+ qib_dev_err(dd, "Failed allocation for counters\n");
+
+ for (i = 0, s = (char *)portcntr6120names; s; i++)
+ s = strchr(s + 1, '\n');
+ dd->cspec->nportcntrs = i - 1;
+ dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1;
+ dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
+ * sizeof(u64), GFP_KERNEL);
+ if (!dd->cspec->portcntrs)
+ qib_dev_err(dd, "Failed allocation for portcounters\n");
+}
+
+static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
+ u64 **cntrp)
+{
+ u32 ret;
+
+ if (namep) {
+ ret = dd->cspec->cntrnamelen;
+ if (pos >= ret)
+ ret = 0; /* final read after getting everything */
+ else
+ *namep = (char *)cntr6120names;
+ } else {
+ u64 *cntr = dd->cspec->cntrs;
+ int i;
+
+ ret = dd->cspec->ncntrs * sizeof(u64);
+ if (!cntr || pos >= ret) {
+ /* everything read, or couldn't get memory */
+ ret = 0;
+ goto done;
+ }
+ if (pos >= ret) {
+ ret = 0; /* final read after getting everything */
+ goto done;
+ }
+ *cntrp = cntr;
+ for (i = 0; i < dd->cspec->ncntrs; i++)
+ *cntr++ = read_6120_creg32(dd, cntr6120indices[i]);
+ }
+done:
+ return ret;
+}
+
+static u32 qib_read_6120portcntrs(struct qib_devdata *dd, loff_t pos, u32 port,
+ char **namep, u64 **cntrp)
+{
+ u32 ret;
+
+ if (namep) {
+ ret = dd->cspec->portcntrnamelen;
+ if (pos >= ret)
+ ret = 0; /* final read after getting everything */
+ else
+ *namep = (char *)portcntr6120names;
+ } else {
+ u64 *cntr = dd->cspec->portcntrs;
+ struct qib_pportdata *ppd = &dd->pport[port];
+ int i;
+
+ ret = dd->cspec->nportcntrs * sizeof(u64);
+ if (!cntr || pos >= ret) {
+ /* everything read, or couldn't get memory */
+ ret = 0;
+ goto done;
+ }
+ *cntrp = cntr;
+ for (i = 0; i < dd->cspec->nportcntrs; i++) {
+ if (portcntr6120indices[i] & _PORT_VIRT_FLAG)
+ *cntr++ = qib_portcntr_6120(ppd,
+ portcntr6120indices[i] &
+ ~_PORT_VIRT_FLAG);
+ else
+ *cntr++ = read_6120_creg32(dd,
+ portcntr6120indices[i]);
+ }
+ }
+done:
+ return ret;
+}
+
+static void qib_chk_6120_errormask(struct qib_devdata *dd)
+{
+ static u32 fixed;
+ u32 ctrl;
+ unsigned long errormask;
+ unsigned long hwerrs;
+
+ if (!dd->cspec->errormask || !(dd->flags & QIB_INITTED))
+ return;
+
+ errormask = qib_read_kreg64(dd, kr_errmask);
+
+ if (errormask == dd->cspec->errormask)
+ return;
+ fixed++;
+
+ hwerrs = qib_read_kreg64(dd, kr_hwerrstatus);
+ ctrl = qib_read_kreg32(dd, kr_control);
+
+ qib_write_kreg(dd, kr_errmask,
+ dd->cspec->errormask);
+
+ if ((hwerrs & dd->cspec->hwerrmask) ||
+ (ctrl & QLOGIC_IB_C_FREEZEMODE)) {
+ qib_write_kreg(dd, kr_hwerrclear, 0ULL);
+ qib_write_kreg(dd, kr_errclear, 0ULL);
+ /* force re-interrupt of pending events, just in case */
+ qib_write_kreg(dd, kr_intclear, 0ULL);
+ qib_devinfo(dd->pcidev,
+ "errormask fixed(%u) %lx->%lx, ctrl %x hwerr %lx\n",
+ fixed, errormask, (unsigned long)dd->cspec->errormask,
+ ctrl, hwerrs);
+ }
+}
+
+/**
+ * qib_get_faststats - get word counters from chip before they overflow
+ * @opaque - contains a pointer to the qlogic_ib device qib_devdata
+ *
+ * This needs more work; in particular, decision on whether we really
+ * need traffic_wds done the way it is
+ * called from add_timer
+ */
+static void qib_get_6120_faststats(unsigned long opaque)
+{
+ struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ struct qib_pportdata *ppd = dd->pport;
+ unsigned long flags;
+ u64 traffic_wds;
+
+ /*
+ * don't access the chip while running diags, or memory diags can
+ * fail
+ */
+ if (!(dd->flags & QIB_INITTED) || dd->diag_client)
+ /* but re-arm the timer, for diags case; won't hurt other */
+ goto done;
+
+ /*
+ * We now try to maintain an activity timer, based on traffic
+ * exceeding a threshold, so we need to check the word-counts
+ * even if they are 64-bit.
+ */
+ traffic_wds = qib_portcntr_6120(ppd, cr_wordsend) +
+ qib_portcntr_6120(ppd, cr_wordrcv);
+ spin_lock_irqsave(&dd->eep_st_lock, flags);
+ traffic_wds -= dd->traffic_wds;
+ dd->traffic_wds += traffic_wds;
+ if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD)
+ atomic_add(5, &dd->active_time); /* S/B #define */
+ spin_unlock_irqrestore(&dd->eep_st_lock, flags);
+
+ qib_chk_6120_errormask(dd);
+done:
+ mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER);
+}
+
+/* no interrupt fallback for these chips */
+static int qib_6120_nointr_fallback(struct qib_devdata *dd)
+{
+ return 0;
+}
+
+/*
+ * reset the XGXS (between serdes and IBC). Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining. To do this right, we reset IBC
+ * as well.
+ */
+static void qib_6120_xgxs_reset(struct qib_pportdata *ppd)
+{
+ u64 val, prev_val;
+ struct qib_devdata *dd = ppd->dd;
+
+ prev_val = qib_read_kreg64(dd, kr_xgxs_cfg);
+ val = prev_val | QLOGIC_IB_XGXS_RESET;
+ prev_val &= ~QLOGIC_IB_XGXS_RESET; /* be sure */
+ qib_write_kreg(dd, kr_control,
+ dd->control & ~QLOGIC_IB_C_LINKENABLE);
+ qib_write_kreg(dd, kr_xgxs_cfg, val);
+ qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_xgxs_cfg, prev_val);
+ qib_write_kreg(dd, kr_control, dd->control);
+}
+
+static int qib_6120_get_ib_cfg(struct qib_pportdata *ppd, int which)
+{
+ int ret;
+
+ switch (which) {
+ case QIB_IB_CFG_LWID:
+ ret = ppd->link_width_active;
+ break;
+
+ case QIB_IB_CFG_SPD:
+ ret = ppd->link_speed_active;
+ break;
+
+ case QIB_IB_CFG_LWID_ENB:
+ ret = ppd->link_width_enabled;
+ break;
+
+ case QIB_IB_CFG_SPD_ENB:
+ ret = ppd->link_speed_enabled;
+ break;
+
+ case QIB_IB_CFG_OP_VLS:
+ ret = ppd->vls_operational;
+ break;
+
+ case QIB_IB_CFG_VL_HIGH_CAP:
+ ret = 0;
+ break;
+
+ case QIB_IB_CFG_VL_LOW_CAP:
+ ret = 0;
+ break;
+
+ case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
+ ret = SYM_FIELD(ppd->dd->cspec->ibcctrl, IBCCtrl,
+ OverrunThreshold);
+ break;
+
+ case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
+ ret = SYM_FIELD(ppd->dd->cspec->ibcctrl, IBCCtrl,
+ PhyerrThreshold);
+ break;
+
+ case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
+ /* will only take effect when the link state changes */
+ ret = (ppd->dd->cspec->ibcctrl &
+ SYM_MASK(IBCCtrl, LinkDownDefaultState)) ?
+ IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL;
+ break;
+
+ case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */
+ ret = 0; /* no heartbeat on this chip */
+ break;
+
+ case QIB_IB_CFG_PMA_TICKS:
+ ret = 250; /* 1 usec. */
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * We assume range checking is already done, if needed.
+ */
+static int qib_6120_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret = 0;
+ u64 val64;
+ u16 lcmd, licmd;
+
+ switch (which) {
+ case QIB_IB_CFG_LWID_ENB:
+ ppd->link_width_enabled = val;
+ break;
+
+ case QIB_IB_CFG_SPD_ENB:
+ ppd->link_speed_enabled = val;
+ break;
+
+ case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
+ val64 = SYM_FIELD(dd->cspec->ibcctrl, IBCCtrl,
+ OverrunThreshold);
+ if (val64 != val) {
+ dd->cspec->ibcctrl &=
+ ~SYM_MASK(IBCCtrl, OverrunThreshold);
+ dd->cspec->ibcctrl |= (u64) val <<
+ SYM_LSB(IBCCtrl, OverrunThreshold);
+ qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+ break;
+
+ case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
+ val64 = SYM_FIELD(dd->cspec->ibcctrl, IBCCtrl,
+ PhyerrThreshold);
+ if (val64 != val) {
+ dd->cspec->ibcctrl &=
+ ~SYM_MASK(IBCCtrl, PhyerrThreshold);
+ dd->cspec->ibcctrl |= (u64) val <<
+ SYM_LSB(IBCCtrl, PhyerrThreshold);
+ qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+ break;
+
+ case QIB_IB_CFG_PKEYS: /* update pkeys */
+ val64 = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) |
+ ((u64) ppd->pkeys[2] << 32) |
+ ((u64) ppd->pkeys[3] << 48);
+ qib_write_kreg(dd, kr_partitionkey, val64);
+ break;
+
+ case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
+ /* will only take effect when the link state changes */
+ if (val == IB_LINKINITCMD_POLL)
+ dd->cspec->ibcctrl &=
+ ~SYM_MASK(IBCCtrl, LinkDownDefaultState);
+ else /* SLEEP */
+ dd->cspec->ibcctrl |=
+ SYM_MASK(IBCCtrl, LinkDownDefaultState);
+ qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ break;
+
+ case QIB_IB_CFG_MTU: /* update the MTU in IBC */
+ /*
+ * Update our housekeeping variables, and set IBC max
+ * size, same as init code; max IBC is max we allow in
+ * buffer, less the qword pbc, plus 1 for ICRC, in dwords
+ * Set even if it's unchanged, print debug message only
+ * on changes.
+ */
+ val = (ppd->ibmaxlen >> 2) + 1;
+ dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, MaxPktLen);
+ dd->cspec->ibcctrl |= (u64)val <<
+ SYM_LSB(IBCCtrl, MaxPktLen);
+ qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ break;
+
+ case QIB_IB_CFG_LSTATE: /* set the IB link state */
+ switch (val & 0xffff0000) {
+ case IB_LINKCMD_DOWN:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN;
+ if (!dd->cspec->ibdeltainprog) {
+ dd->cspec->ibdeltainprog = 1;
+ dd->cspec->ibsymsnap =
+ read_6120_creg32(dd, cr_ibsymbolerr);
+ dd->cspec->iblnkerrsnap =
+ read_6120_creg32(dd, cr_iblinkerrrecov);
+ }
+ break;
+
+ case IB_LINKCMD_ARMED:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED;
+ break;
+
+ case IB_LINKCMD_ACTIVE:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE;
+ break;
+
+ default:
+ ret = -EINVAL;
+ qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16);
+ goto bail;
+ }
+ switch (val & 0xffff) {
+ case IB_LINKINITCMD_NOP:
+ licmd = 0;
+ break;
+
+ case IB_LINKINITCMD_POLL:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL;
+ break;
+
+ case IB_LINKINITCMD_SLEEP:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP;
+ break;
+
+ case IB_LINKINITCMD_DISABLE:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE;
+ break;
+
+ default:
+ ret = -EINVAL;
+ qib_dev_err(dd, "bad linkinitcmd req 0x%x\n",
+ val & 0xffff);
+ goto bail;
+ }
+ qib_set_ib_6120_lstate(ppd, lcmd, licmd);
+ goto bail;
+
+ case QIB_IB_CFG_HRTBT:
+ ret = -EINVAL;
+ break;
+
+ default:
+ ret = -EINVAL;
+ }
+bail:
+ return ret;
+}
+
+static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what)
+{
+ int ret = 0;
+ if (!strncmp(what, "ibc", 3)) {
+ ppd->dd->cspec->ibcctrl |= SYM_MASK(IBCCtrl, Loopback);
+ qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n",
+ ppd->dd->unit, ppd->port);
+ } else if (!strncmp(what, "off", 3)) {
+ ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
+ } else
+ ret = -EINVAL;
+ if (!ret) {
+ qib_write_kreg(ppd->dd, kr_ibcctrl, ppd->dd->cspec->ibcctrl);
+ qib_write_kreg(ppd->dd, kr_scratch, 0);
+ }
+ return ret;
+}
+
+static void pma_6120_timer(unsigned long data)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)data;
+ struct qib_chip_specific *cs = ppd->dd->cspec;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ibp->lock, flags);
+ if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) {
+ cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
+ qib_snapshot_counters(ppd, &cs->sword, &cs->rword,
+ &cs->spkts, &cs->rpkts, &cs->xmit_wait);
+ mod_timer(&cs->pma_timer,
+ jiffies + usecs_to_jiffies(ibp->pma_sample_interval));
+ } else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
+ u64 ta, tb, tc, td, te;
+
+ cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
+ qib_snapshot_counters(ppd, &ta, &tb, &tc, &td, &te);
+
+ cs->sword = ta - cs->sword;
+ cs->rword = tb - cs->rword;
+ cs->spkts = tc - cs->spkts;
+ cs->rpkts = td - cs->rpkts;
+ cs->xmit_wait = te - cs->xmit_wait;
+ }
+ spin_unlock_irqrestore(&ibp->lock, flags);
+}
+
+/*
+ * Note that the caller has the ibp->lock held.
+ */
+static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv,
+ u32 start)
+{
+ struct qib_chip_specific *cs = ppd->dd->cspec;
+
+ if (start && intv) {
+ cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED;
+ mod_timer(&cs->pma_timer, jiffies + usecs_to_jiffies(start));
+ } else if (intv) {
+ cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
+ qib_snapshot_counters(ppd, &cs->sword, &cs->rword,
+ &cs->spkts, &cs->rpkts, &cs->xmit_wait);
+ mod_timer(&cs->pma_timer, jiffies + usecs_to_jiffies(intv));
+ } else {
+ cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
+ cs->sword = 0;
+ cs->rword = 0;
+ cs->spkts = 0;
+ cs->rpkts = 0;
+ cs->xmit_wait = 0;
+ }
+}
+
+static u32 qib_6120_iblink_state(u64 ibcs)
+{
+ u32 state = (u32)SYM_FIELD(ibcs, IBCStatus, LinkState);
+
+ switch (state) {
+ case IB_6120_L_STATE_INIT:
+ state = IB_PORT_INIT;
+ break;
+ case IB_6120_L_STATE_ARM:
+ state = IB_PORT_ARMED;
+ break;
+ case IB_6120_L_STATE_ACTIVE:
+ /* fall through */
+ case IB_6120_L_STATE_ACT_DEFER:
+ state = IB_PORT_ACTIVE;
+ break;
+ default: /* fall through */
+ case IB_6120_L_STATE_DOWN:
+ state = IB_PORT_DOWN;
+ break;
+ }
+ return state;
+}
+
+/* returns the IBTA port state, rather than the IBC link training state */
+static u8 qib_6120_phys_portstate(u64 ibcs)
+{
+ u8 state = (u8)SYM_FIELD(ibcs, IBCStatus, LinkTrainingState);
+ return qib_6120_physportstate[state];
+}
+
+static int qib_6120_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+
+ if (ibup) {
+ if (ppd->dd->cspec->ibdeltainprog) {
+ ppd->dd->cspec->ibdeltainprog = 0;
+ ppd->dd->cspec->ibsymdelta +=
+ read_6120_creg32(ppd->dd, cr_ibsymbolerr) -
+ ppd->dd->cspec->ibsymsnap;
+ ppd->dd->cspec->iblnkerrdelta +=
+ read_6120_creg32(ppd->dd, cr_iblinkerrrecov) -
+ ppd->dd->cspec->iblnkerrsnap;
+ }
+ qib_hol_init(ppd);
+ } else {
+ ppd->dd->cspec->lli_counter = 0;
+ if (!ppd->dd->cspec->ibdeltainprog) {
+ ppd->dd->cspec->ibdeltainprog = 1;
+ ppd->dd->cspec->ibsymsnap =
+ read_6120_creg32(ppd->dd, cr_ibsymbolerr);
+ ppd->dd->cspec->iblnkerrsnap =
+ read_6120_creg32(ppd->dd, cr_iblinkerrrecov);
+ }
+ qib_hol_down(ppd);
+ }
+
+ qib_6120_setup_setextled(ppd, ibup);
+
+ return 0;
+}
+
+/* Does read/modify/write to appropriate registers to
+ * set output and direction bits selected by mask.
+ * these are in their canonical postions (e.g. lsb of
+ * dir will end up in D48 of extctrl on existing chips).
+ * returns contents of GP Inputs.
+ */
+static int gpio_6120_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask)
+{
+ u64 read_val, new_out;
+ unsigned long flags;
+
+ if (mask) {
+ /* some bits being written, lock access to GPIO */
+ dir &= mask;
+ out &= mask;
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe));
+ dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe));
+ new_out = (dd->cspec->gpio_out & ~mask) | out;
+
+ qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl);
+ qib_write_kreg(dd, kr_gpio_out, new_out);
+ dd->cspec->gpio_out = new_out;
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+ }
+ /*
+ * It is unlikely that a read at this time would get valid
+ * data on a pin whose direction line was set in the same
+ * call to this function. We include the read here because
+ * that allows us to potentially combine a change on one pin with
+ * a read on another, and because the old code did something like
+ * this.
+ */
+ read_val = qib_read_kreg64(dd, kr_extstatus);
+ return SYM_FIELD(read_val, EXTStatus, GPIOIn);
+}
+
+/*
+ * Read fundamental info we need to use the chip. These are
+ * the registers that describe chip capabilities, and are
+ * saved in shadow registers.
+ */
+static void get_6120_chip_params(struct qib_devdata *dd)
+{
+ u64 val;
+ u32 piobufs;
+ int mtu;
+
+ dd->uregbase = qib_read_kreg32(dd, kr_userregbase);
+
+ dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt);
+ dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase);
+ dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase);
+ dd->palign = qib_read_kreg32(dd, kr_palign);
+ dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase);
+ dd->pio2k_bufbase = dd->piobufbase & 0xffffffff;
+
+ dd->rcvhdrcnt = qib_read_kreg32(dd, kr_rcvegrcnt);
+
+ val = qib_read_kreg64(dd, kr_sendpiosize);
+ dd->piosize2k = val & ~0U;
+ dd->piosize4k = val >> 32;
+
+ mtu = ib_mtu_enum_to_int(qib_ibmtu);
+ if (mtu == -1)
+ mtu = QIB_DEFAULT_MTU;
+ dd->pport->ibmtu = (u32)mtu;
+
+ val = qib_read_kreg64(dd, kr_sendpiobufcnt);
+ dd->piobcnt2k = val & ~0U;
+ dd->piobcnt4k = val >> 32;
+ dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1;
+ /* these may be adjusted in init_chip_wc_pat() */
+ dd->pio2kbase = (u32 __iomem *)
+ (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase);
+ if (dd->piobcnt4k) {
+ dd->pio4kbase = (u32 __iomem *)
+ (((char __iomem *) dd->kregbase) +
+ (dd->piobufbase >> 32));
+ /*
+ * 4K buffers take 2 pages; we use roundup just to be
+ * paranoid; we calculate it once here, rather than on
+ * ever buf allocate
+ */
+ dd->align4k = ALIGN(dd->piosize4k, dd->palign);
+ }
+
+ piobufs = dd->piobcnt4k + dd->piobcnt2k;
+
+ dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) /
+ (sizeof(u64) * BITS_PER_BYTE / 2);
+}
+
+/*
+ * The chip base addresses in cspec and cpspec have to be set
+ * after possible init_chip_wc_pat(), rather than in
+ * get_6120_chip_params(), so split out as separate function
+ */
+static void set_6120_baseaddrs(struct qib_devdata *dd)
+{
+ u32 cregbase;
+ cregbase = qib_read_kreg32(dd, kr_counterregbase);
+ dd->cspec->cregbase = (u64 __iomem *)
+ ((char __iomem *) dd->kregbase + cregbase);
+
+ dd->egrtidbase = (u64 __iomem *)
+ ((char __iomem *) dd->kregbase + dd->rcvegrbase);
+}
+
+/*
+ * Write the final few registers that depend on some of the
+ * init setup. Done late in init, just before bringing up
+ * the serdes.
+ */
+static int qib_late_6120_initreg(struct qib_devdata *dd)
+{
+ int ret = 0;
+ u64 val;
+
+ qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize);
+ qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize);
+ qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt);
+ qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
+ val = qib_read_kreg64(dd, kr_sendpioavailaddr);
+ if (val != dd->pioavailregs_phys) {
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
+static int init_6120_variables(struct qib_devdata *dd)
+{
+ int ret = 0;
+ struct qib_pportdata *ppd;
+ u32 sbufs;
+
+ ppd = (struct qib_pportdata *)(dd + 1);
+ dd->pport = ppd;
+ dd->num_pports = 1;
+
+ dd->cspec = (struct qib_chip_specific *)(ppd + dd->num_pports);
+ ppd->cpspec = NULL; /* not used in this chip */
+
+ spin_lock_init(&dd->cspec->kernel_tid_lock);
+ spin_lock_init(&dd->cspec->user_tid_lock);
+ spin_lock_init(&dd->cspec->rcvmod_lock);
+ spin_lock_init(&dd->cspec->gpio_lock);
+
+ /* we haven't yet set QIB_PRESENT, so use read directly */
+ dd->revision = readq(&dd->kregbase[kr_revision]);
+
+ if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
+ ret = -ENODEV;
+ goto bail;
+ }
+ dd->flags |= QIB_PRESENT; /* now register routines work */
+
+ dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R,
+ ChipRevMajor);
+ dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R,
+ ChipRevMinor);
+
+ get_6120_chip_params(dd);
+ pe_boardname(dd); /* fill in boardname */
+
+ /*
+ * GPIO bits for TWSI data and clock,
+ * used for serial EEPROM.
+ */
+ dd->gpio_sda_num = _QIB_GPIO_SDA_NUM;
+ dd->gpio_scl_num = _QIB_GPIO_SCL_NUM;
+ dd->twsi_eeprom_dev = QIB_TWSI_NO_DEV;
+
+ if (qib_unordered_wc())
+ dd->flags |= QIB_PIO_FLUSH_WC;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr);
+
+ /* Ignore errors in PIO/PBC on systems with unordered write-combining */
+ if (qib_unordered_wc())
+ dd->eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
+
+ dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr);
+
+ dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
+
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
+ ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+ ppd->link_speed_supported = QIB_IB_SDR;
+ ppd->link_width_enabled = IB_WIDTH_4X;
+ ppd->link_speed_enabled = ppd->link_speed_supported;
+ /* these can't change for this chip, so set once */
+ ppd->link_width_active = ppd->link_width_enabled;
+ ppd->link_speed_active = ppd->link_speed_enabled;
+ ppd->vls_supported = IB_VL_VL0;
+ ppd->vls_operational = ppd->vls_supported;
+
+ dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE;
+ dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE;
+ dd->rhf_offset = 0;
+
+ /* we always allocate at least 2048 bytes for eager buffers */
+ ret = ib_mtu_enum_to_int(qib_ibmtu);
+ dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
+ BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+ dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
+
+ qib_6120_tidtemplate(dd);
+
+ /*
+ * We can request a receive interrupt for 1 or
+ * more packets from current offset. For now, we set this
+ * up for a single packet.
+ */
+ dd->rhdrhead_intr_off = 1ULL << 32;
+
+ /* setup the stats timer; the add_timer is done at end of init */
+ init_timer(&dd->stats_timer);
+ dd->stats_timer.function = qib_get_6120_faststats;
+ dd->stats_timer.data = (unsigned long) dd;
+
+ init_timer(&dd->cspec->pma_timer);
+ dd->cspec->pma_timer.function = pma_6120_timer;
+ dd->cspec->pma_timer.data = (unsigned long) ppd;
+
+ dd->ureg_align = qib_read_kreg32(dd, kr_palign);
+
+ dd->piosize2kmax_dwords = dd->piosize2k >> 2;
+ qib_6120_config_ctxts(dd);
+ qib_set_ctxtcnt(dd);
+
+ if (qib_wc_pat) {
+ ret = init_chip_wc_pat(dd, 0);
+ if (ret)
+ goto bail;
+ }
+ set_6120_baseaddrs(dd); /* set chip access pointers now */
+
+ ret = 0;
+ if (qib_mini_init)
+ goto bail;
+
+ qib_num_cfg_vls = 1; /* if any 6120's, only one VL */
+
+ ret = qib_create_ctxts(dd);
+ init_6120_cntrnames(dd);
+
+ /* use all of 4KB buffers for the kernel, otherwise 16 */
+ sbufs = dd->piobcnt4k ? dd->piobcnt4k : 16;
+
+ dd->lastctxt_piobuf = dd->piobcnt2k + dd->piobcnt4k - sbufs;
+ dd->pbufsctxt = dd->lastctxt_piobuf /
+ (dd->cfgctxts - dd->first_user_ctxt);
+
+ if (ret)
+ goto bail;
+bail:
+ return ret;
+}
+
+/*
+ * For this chip, we want to use the same buffer every time
+ * when we are trying to bring the link up (they are always VL15
+ * packets). At that link state the packet should always go out immediately
+ * (or at least be discarded at the tx interface if the link is down).
+ * If it doesn't, and the buffer isn't available, that means some other
+ * sender has gotten ahead of us, and is preventing our packet from going
+ * out. In that case, we flush all packets, and try again. If that still
+ * fails, we fail the request, and hope things work the next time around.
+ *
+ * We don't need very complicated heuristics on whether the packet had
+ * time to go out or not, since even at SDR 1X, it goes out in very short
+ * time periods, covered by the chip reads done here and as part of the
+ * flush.
+ */
+static u32 __iomem *get_6120_link_buf(struct qib_pportdata *ppd, u32 *bnum)
+{
+ u32 __iomem *buf;
+ u32 lbuf = ppd->dd->piobcnt2k + ppd->dd->piobcnt4k - 1;
+
+ /*
+ * always blip to get avail list updated, since it's almost
+ * always needed, and is fairly cheap.
+ */
+ sendctrl_6120_mod(ppd->dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */
+ buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf);
+ if (buf)
+ goto done;
+
+ sendctrl_6120_mod(ppd, QIB_SENDCTRL_DISARM_ALL | QIB_SENDCTRL_FLUSH |
+ QIB_SENDCTRL_AVAIL_BLIP);
+ ppd->dd->upd_pio_shadow = 1; /* update our idea of what's busy */
+ qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */
+ buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf);
+done:
+ return buf;
+}
+
+static u32 __iomem *qib_6120_getsendbuf(struct qib_pportdata *ppd, u64 pbc,
+ u32 *pbufnum)
+{
+ u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK;
+ struct qib_devdata *dd = ppd->dd;
+ u32 __iomem *buf;
+
+ if (((pbc >> 32) & PBC_6120_VL15_SEND_CTRL) &&
+ !(ppd->lflags & (QIBL_IB_AUTONEG_INPROG | QIBL_LINKACTIVE)))
+ buf = get_6120_link_buf(ppd, pbufnum);
+ else {
+
+ if ((plen + 1) > dd->piosize2kmax_dwords)
+ first = dd->piobcnt2k;
+ else
+ first = 0;
+ /* try 4k if all 2k busy, so same last for both sizes */
+ last = dd->piobcnt2k + dd->piobcnt4k - 1;
+ buf = qib_getsendbuf_range(dd, pbufnum, first, last);
+ }
+ return buf;
+}
+
+static int init_sdma_6120_regs(struct qib_pportdata *ppd)
+{
+ return -ENODEV;
+}
+
+static u16 qib_sdma_6120_gethead(struct qib_pportdata *ppd)
+{
+ return 0;
+}
+
+static int qib_sdma_6120_busy(struct qib_pportdata *ppd)
+{
+ return 0;
+}
+
+static void qib_sdma_update_6120_tail(struct qib_pportdata *ppd, u16 tail)
+{
+}
+
+static void qib_6120_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op)
+{
+}
+
+static void qib_sdma_set_6120_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
+{
+}
+
+/*
+ * the pbc doesn't need a VL15 indicator, but we need it for link_buf.
+ * The chip ignores the bit if set.
+ */
+static u32 qib_6120_setpbc_control(struct qib_pportdata *ppd, u32 plen,
+ u8 srate, u8 vl)
+{
+ return vl == 15 ? PBC_6120_VL15_SEND_CTRL : 0;
+}
+
+static void qib_6120_initvl15_bufs(struct qib_devdata *dd)
+{
+}
+
+static void qib_6120_init_ctxt(struct qib_ctxtdata *rcd)
+{
+ rcd->rcvegrcnt = rcd->dd->rcvhdrcnt;
+ rcd->rcvegr_tid_base = rcd->ctxt * rcd->rcvegrcnt;
+}
+
+static void qib_6120_txchk_change(struct qib_devdata *dd, u32 start,
+ u32 len, u32 avail, struct qib_ctxtdata *rcd)
+{
+}
+
+static void writescratch(struct qib_devdata *dd, u32 val)
+{
+ (void) qib_write_kreg(dd, kr_scratch, val);
+}
+
+static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum)
+{
+ return -ENXIO;
+}
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ return 0;
+}
+#endif
+
+/* Dummy function, as 6120 boards never disable EEPROM Write */
+static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen)
+{
+ return 1;
+}
+
+/**
+ * qib_init_iba6120_funcs - set up the chip-specific function pointers
+ * @pdev: pci_dev of the qlogic_ib device
+ * @ent: pci_device_id matching this chip
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ *
+ * It also allocates/partially-inits the qib_devdata struct for
+ * this device.
+ */
+struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct qib_devdata *dd;
+ int ret;
+
+ dd = qib_alloc_devdata(pdev, sizeof(struct qib_pportdata) +
+ sizeof(struct qib_chip_specific));
+ if (IS_ERR(dd))
+ goto bail;
+
+ dd->f_bringup_serdes = qib_6120_bringup_serdes;
+ dd->f_cleanup = qib_6120_setup_cleanup;
+ dd->f_clear_tids = qib_6120_clear_tids;
+ dd->f_free_irq = qib_6120_free_irq;
+ dd->f_get_base_info = qib_6120_get_base_info;
+ dd->f_get_msgheader = qib_6120_get_msgheader;
+ dd->f_getsendbuf = qib_6120_getsendbuf;
+ dd->f_gpio_mod = gpio_6120_mod;
+ dd->f_eeprom_wen = qib_6120_eeprom_wen;
+ dd->f_hdrqempty = qib_6120_hdrqempty;
+ dd->f_ib_updown = qib_6120_ib_updown;
+ dd->f_init_ctxt = qib_6120_init_ctxt;
+ dd->f_initvl15_bufs = qib_6120_initvl15_bufs;
+ dd->f_intr_fallback = qib_6120_nointr_fallback;
+ dd->f_late_initreg = qib_late_6120_initreg;
+ dd->f_setpbc_control = qib_6120_setpbc_control;
+ dd->f_portcntr = qib_portcntr_6120;
+ dd->f_put_tid = (dd->minrev >= 2) ?
+ qib_6120_put_tid_2 :
+ qib_6120_put_tid;
+ dd->f_quiet_serdes = qib_6120_quiet_serdes;
+ dd->f_rcvctrl = rcvctrl_6120_mod;
+ dd->f_read_cntrs = qib_read_6120cntrs;
+ dd->f_read_portcntrs = qib_read_6120portcntrs;
+ dd->f_reset = qib_6120_setup_reset;
+ dd->f_init_sdma_regs = init_sdma_6120_regs;
+ dd->f_sdma_busy = qib_sdma_6120_busy;
+ dd->f_sdma_gethead = qib_sdma_6120_gethead;
+ dd->f_sdma_sendctrl = qib_6120_sdma_sendctrl;
+ dd->f_sdma_set_desc_cnt = qib_sdma_set_6120_desc_cnt;
+ dd->f_sdma_update_tail = qib_sdma_update_6120_tail;
+ dd->f_sendctrl = sendctrl_6120_mod;
+ dd->f_set_armlaunch = qib_set_6120_armlaunch;
+ dd->f_set_cntr_sample = qib_set_cntr_6120_sample;
+ dd->f_iblink_state = qib_6120_iblink_state;
+ dd->f_ibphys_portstate = qib_6120_phys_portstate;
+ dd->f_get_ib_cfg = qib_6120_get_ib_cfg;
+ dd->f_set_ib_cfg = qib_6120_set_ib_cfg;
+ dd->f_set_ib_loopback = qib_6120_set_loopback;
+ dd->f_set_intr_state = qib_6120_set_intr_state;
+ dd->f_setextled = qib_6120_setup_setextled;
+ dd->f_txchk_change = qib_6120_txchk_change;
+ dd->f_update_usrhead = qib_update_6120_usrhead;
+ dd->f_wantpiobuf_intr = qib_wantpiobuf_6120_intr;
+ dd->f_xgxs_reset = qib_6120_xgxs_reset;
+ dd->f_writescratch = writescratch;
+ dd->f_tempsense_rd = qib_6120_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_6120_notify_dca;
+#endif
+ /*
+ * Do remaining pcie setup and save pcie values in dd.
+ * Any error printing is already done by the init code.
+ * On return, we have the chip mapped and accessible,
+ * but chip registers are not set up until start of
+ * init_6120_variables.
+ */
+ ret = qib_pcie_ddinit(dd, pdev, ent);
+ if (ret < 0)
+ goto bail_free;
+
+ /* initialize chip-specific variables */
+ ret = init_6120_variables(dd);
+ if (ret)
+ goto bail_cleanup;
+
+ if (qib_mini_init)
+ goto bail;
+
+ if (qib_pcie_params(dd, 8, NULL, NULL))
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
+ dd->cspec->irq = pdev->irq; /* save IRQ */
+
+ /* clear diagctrl register, in case diags were running and crashed */
+ qib_write_kreg(dd, kr_hwdiagctrl, 0);
+
+ if (qib_read_kreg64(dd, kr_hwerrstatus) &
+ QLOGIC_IB_HWE_SERDESPLLFAILED)
+ qib_write_kreg(dd, kr_hwerrclear,
+ QLOGIC_IB_HWE_SERDESPLLFAILED);
+
+ /* setup interrupt handler (interrupt type handled above) */
+ qib_setup_6120_interrupt(dd);
+ /* Note that qpn_mask is set by qib_6120_config_ctxts() first */
+ qib_6120_init_hwerrors(dd);
+
+ goto bail;
+
+bail_cleanup:
+ qib_pcie_ddcleanup(dd);
+bail_free:
+ qib_free_devdata(dd);
+ dd = ERR_PTR(ret);
+bail:
+ return dd;
+}
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
new file mode 100644
index 00000000000..7dec89fdc12
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -0,0 +1,4659 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the
+ * QLogic_IB 7220 chip (except that specific to the SerDes)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <rdma/ib_verbs.h>
+
+#include "qib.h"
+#include "qib_7220.h"
+
+static void qib_setup_7220_setextled(struct qib_pportdata *, u32);
+static void qib_7220_handle_hwerrors(struct qib_devdata *, char *, size_t);
+static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op);
+static u32 qib_7220_iblink_state(u64);
+static u8 qib_7220_phys_portstate(u64);
+static void qib_sdma_update_7220_tail(struct qib_pportdata *, u16);
+static void qib_set_ib_7220_lstate(struct qib_pportdata *, u16, u16);
+
+/*
+ * This file contains almost all the chip-specific register information and
+ * access functions for the QLogic QLogic_IB 7220 PCI-Express chip, with the
+ * exception of SerDes support, which in in qib_sd7220.c.
+ */
+
+/* Below uses machine-generated qib_chipnum_regs.h file */
+#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64))
+
+/* Use defines to tie machine-generated names to lower-case names */
+#define kr_control KREG_IDX(Control)
+#define kr_counterregbase KREG_IDX(CntrRegBase)
+#define kr_errclear KREG_IDX(ErrClear)
+#define kr_errmask KREG_IDX(ErrMask)
+#define kr_errstatus KREG_IDX(ErrStatus)
+#define kr_extctrl KREG_IDX(EXTCtrl)
+#define kr_extstatus KREG_IDX(EXTStatus)
+#define kr_gpio_clear KREG_IDX(GPIOClear)
+#define kr_gpio_mask KREG_IDX(GPIOMask)
+#define kr_gpio_out KREG_IDX(GPIOOut)
+#define kr_gpio_status KREG_IDX(GPIOStatus)
+#define kr_hrtbt_guid KREG_IDX(HRTBT_GUID)
+#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl)
+#define kr_hwerrclear KREG_IDX(HwErrClear)
+#define kr_hwerrmask KREG_IDX(HwErrMask)
+#define kr_hwerrstatus KREG_IDX(HwErrStatus)
+#define kr_ibcctrl KREG_IDX(IBCCtrl)
+#define kr_ibcddrctrl KREG_IDX(IBCDDRCtrl)
+#define kr_ibcddrstatus KREG_IDX(IBCDDRStatus)
+#define kr_ibcstatus KREG_IDX(IBCStatus)
+#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl)
+#define kr_intclear KREG_IDX(IntClear)
+#define kr_intmask KREG_IDX(IntMask)
+#define kr_intstatus KREG_IDX(IntStatus)
+#define kr_ncmodectrl KREG_IDX(IBNCModeCtrl)
+#define kr_palign KREG_IDX(PageAlign)
+#define kr_partitionkey KREG_IDX(RcvPartitionKey)
+#define kr_portcnt KREG_IDX(PortCnt)
+#define kr_rcvbthqp KREG_IDX(RcvBTHQP)
+#define kr_rcvctrl KREG_IDX(RcvCtrl)
+#define kr_rcvegrbase KREG_IDX(RcvEgrBase)
+#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt)
+#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt)
+#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize)
+#define kr_rcvhdrsize KREG_IDX(RcvHdrSize)
+#define kr_rcvpktledcnt KREG_IDX(RcvPktLEDCnt)
+#define kr_rcvtidbase KREG_IDX(RcvTIDBase)
+#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt)
+#define kr_revision KREG_IDX(Revision)
+#define kr_scratch KREG_IDX(Scratch)
+#define kr_sendbuffererror KREG_IDX(SendBufErr0)
+#define kr_sendctrl KREG_IDX(SendCtrl)
+#define kr_senddmabase KREG_IDX(SendDmaBase)
+#define kr_senddmabufmask0 KREG_IDX(SendDmaBufMask0)
+#define kr_senddmabufmask1 (KREG_IDX(SendDmaBufMask0) + 1)
+#define kr_senddmabufmask2 (KREG_IDX(SendDmaBufMask0) + 2)
+#define kr_senddmahead KREG_IDX(SendDmaHead)
+#define kr_senddmaheadaddr KREG_IDX(SendDmaHeadAddr)
+#define kr_senddmalengen KREG_IDX(SendDmaLenGen)
+#define kr_senddmastatus KREG_IDX(SendDmaStatus)
+#define kr_senddmatail KREG_IDX(SendDmaTail)
+#define kr_sendpioavailaddr KREG_IDX(SendBufAvailAddr)
+#define kr_sendpiobufbase KREG_IDX(SendBufBase)
+#define kr_sendpiobufcnt KREG_IDX(SendBufCnt)
+#define kr_sendpiosize KREG_IDX(SendBufSize)
+#define kr_sendregbase KREG_IDX(SendRegBase)
+#define kr_userregbase KREG_IDX(UserRegBase)
+#define kr_xgxs_cfg KREG_IDX(XGXSCfg)
+
+/* These must only be written via qib_write_kreg_ctxt() */
+#define kr_rcvhdraddr KREG_IDX(RcvHdrAddr0)
+#define kr_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0)
+
+
+#define CREG_IDX(regname) ((QIB_7220_##regname##_OFFS - \
+ QIB_7220_LBIntCnt_OFFS) / sizeof(u64))
+
+#define cr_badformat CREG_IDX(RxVersionErrCnt)
+#define cr_erricrc CREG_IDX(RxICRCErrCnt)
+#define cr_errlink CREG_IDX(RxLinkMalformCnt)
+#define cr_errlpcrc CREG_IDX(RxLPCRCErrCnt)
+#define cr_errpkey CREG_IDX(RxPKeyMismatchCnt)
+#define cr_rcvflowctrl_err CREG_IDX(RxFlowCtrlViolCnt)
+#define cr_err_rlen CREG_IDX(RxLenErrCnt)
+#define cr_errslen CREG_IDX(TxLenErrCnt)
+#define cr_errtidfull CREG_IDX(RxTIDFullErrCnt)
+#define cr_errtidvalid CREG_IDX(RxTIDValidErrCnt)
+#define cr_errvcrc CREG_IDX(RxVCRCErrCnt)
+#define cr_ibstatuschange CREG_IDX(IBStatusChangeCnt)
+#define cr_lbint CREG_IDX(LBIntCnt)
+#define cr_invalidrlen CREG_IDX(RxMaxMinLenErrCnt)
+#define cr_invalidslen CREG_IDX(TxMaxMinLenErrCnt)
+#define cr_lbflowstall CREG_IDX(LBFlowStallCnt)
+#define cr_pktrcv CREG_IDX(RxDataPktCnt)
+#define cr_pktrcvflowctrl CREG_IDX(RxFlowPktCnt)
+#define cr_pktsend CREG_IDX(TxDataPktCnt)
+#define cr_pktsendflow CREG_IDX(TxFlowPktCnt)
+#define cr_portovfl CREG_IDX(RxP0HdrEgrOvflCnt)
+#define cr_rcvebp CREG_IDX(RxEBPCnt)
+#define cr_rcvovfl CREG_IDX(RxBufOvflCnt)
+#define cr_senddropped CREG_IDX(TxDroppedPktCnt)
+#define cr_sendstall CREG_IDX(TxFlowStallCnt)
+#define cr_sendunderrun CREG_IDX(TxUnderrunCnt)
+#define cr_wordrcv CREG_IDX(RxDwordCnt)
+#define cr_wordsend CREG_IDX(TxDwordCnt)
+#define cr_txunsupvl CREG_IDX(TxUnsupVLErrCnt)
+#define cr_rxdroppkt CREG_IDX(RxDroppedPktCnt)
+#define cr_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt)
+#define cr_iblinkdown CREG_IDX(IBLinkDownedCnt)
+#define cr_ibsymbolerr CREG_IDX(IBSymbolErrCnt)
+#define cr_vl15droppedpkt CREG_IDX(RxVL15DroppedPktCnt)
+#define cr_rxotherlocalphyerr CREG_IDX(RxOtherLocalPhyErrCnt)
+#define cr_excessbufferovfl CREG_IDX(ExcessBufferOvflCnt)
+#define cr_locallinkintegrityerr CREG_IDX(LocalLinkIntegrityErrCnt)
+#define cr_rxvlerr CREG_IDX(RxVlErrCnt)
+#define cr_rxdlidfltr CREG_IDX(RxDlidFltrCnt)
+#define cr_psstat CREG_IDX(PSStat)
+#define cr_psstart CREG_IDX(PSStart)
+#define cr_psinterval CREG_IDX(PSInterval)
+#define cr_psrcvdatacount CREG_IDX(PSRcvDataCount)
+#define cr_psrcvpktscount CREG_IDX(PSRcvPktsCount)
+#define cr_psxmitdatacount CREG_IDX(PSXmitDataCount)
+#define cr_psxmitpktscount CREG_IDX(PSXmitPktsCount)
+#define cr_psxmitwaitcount CREG_IDX(PSXmitWaitCount)
+#define cr_txsdmadesc CREG_IDX(TxSDmaDescCnt)
+#define cr_pcieretrydiag CREG_IDX(PcieRetryBufDiagQwordCnt)
+
+#define SYM_RMASK(regname, fldname) ((u64) \
+ QIB_7220_##regname##_##fldname##_RMASK)
+#define SYM_MASK(regname, fldname) ((u64) \
+ QIB_7220_##regname##_##fldname##_RMASK << \
+ QIB_7220_##regname##_##fldname##_LSB)
+#define SYM_LSB(regname, fldname) (QIB_7220_##regname##_##fldname##_LSB)
+#define SYM_FIELD(value, regname, fldname) ((u64) \
+ (((value) >> SYM_LSB(regname, fldname)) & \
+ SYM_RMASK(regname, fldname)))
+#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask)
+#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask)
+
+/* ibcctrl bits */
+#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
+/* cycle through TS1/TS2 till OK */
+#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2
+/* wait for TS1, then go on */
+#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3
+#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16
+
+#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */
+#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
+#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
+
+#define BLOB_7220_IBCHG 0x81
+
+/*
+ * We could have a single register get/put routine, that takes a group type,
+ * but this is somewhat clearer and cleaner. It also gives us some error
+ * checking. 64 bit register reads should always work, but are inefficient
+ * on opteron (the northbridge always generates 2 separate HT 32 bit reads),
+ * so we use kreg32 wherever possible. User register and counter register
+ * reads are always 32 bit reads, so only one form of those routines.
+ */
+
+/**
+ * qib_read_ureg32 - read 32-bit virtualized per-context register
+ * @dd: device
+ * @regno: register number
+ * @ctxt: context number
+ *
+ * Return the contents of a register that is virtualized to be per context.
+ * Returns -1 on errors (not distinguishable from valid contents at
+ * runtime; we may add a separate error variable at some point).
+ */
+static inline u32 qib_read_ureg32(const struct qib_devdata *dd,
+ enum qib_ureg regno, int ctxt)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+
+ if (dd->userbase)
+ return readl(regno + (u64 __iomem *)
+ ((char __iomem *)dd->userbase +
+ dd->ureg_align * ctxt));
+ else
+ return readl(regno + (u64 __iomem *)
+ (dd->uregbase +
+ (char __iomem *)dd->kregbase +
+ dd->ureg_align * ctxt));
+}
+
+/**
+ * qib_write_ureg - write 32-bit virtualized per-context register
+ * @dd: device
+ * @regno: register number
+ * @value: value
+ * @ctxt: context
+ *
+ * Write the contents of a register that is virtualized to be per context.
+ */
+static inline void qib_write_ureg(const struct qib_devdata *dd,
+ enum qib_ureg regno, u64 value, int ctxt)
+{
+ u64 __iomem *ubase;
+
+ if (dd->userbase)
+ ubase = (u64 __iomem *)
+ ((char __iomem *) dd->userbase +
+ dd->ureg_align * ctxt);
+ else
+ ubase = (u64 __iomem *)
+ (dd->uregbase +
+ (char __iomem *) dd->kregbase +
+ dd->ureg_align * ctxt);
+
+ if (dd->kregbase && (dd->flags & QIB_PRESENT))
+ writeq(value, &ubase[regno]);
+}
+
+/**
+ * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register
+ * @dd: the qlogic_ib device
+ * @regno: the register number to write
+ * @ctxt: the context containing the register
+ * @value: the value to write
+ */
+static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd,
+ const u16 regno, unsigned ctxt,
+ u64 value)
+{
+ qib_write_kreg(dd, regno + ctxt, value);
+}
+
+static inline void write_7220_creg(const struct qib_devdata *dd,
+ u16 regno, u64 value)
+{
+ if (dd->cspec->cregbase && (dd->flags & QIB_PRESENT))
+ writeq(value, &dd->cspec->cregbase[regno]);
+}
+
+static inline u64 read_7220_creg(const struct qib_devdata *dd, u16 regno)
+{
+ if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readq(&dd->cspec->cregbase[regno]);
+}
+
+static inline u32 read_7220_creg32(const struct qib_devdata *dd, u16 regno)
+{
+ if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readl(&dd->cspec->cregbase[regno]);
+}
+
+/* kr_revision bits */
+#define QLOGIC_IB_R_EMULATORREV_MASK ((1ULL << 22) - 1)
+#define QLOGIC_IB_R_EMULATORREV_SHIFT 40
+
+/* kr_control bits */
+#define QLOGIC_IB_C_RESET (1U << 7)
+
+/* kr_intstatus, kr_intclear, kr_intmask bits */
+#define QLOGIC_IB_I_RCVURG_MASK ((1ULL << 17) - 1)
+#define QLOGIC_IB_I_RCVURG_SHIFT 32
+#define QLOGIC_IB_I_RCVAVAIL_MASK ((1ULL << 17) - 1)
+#define QLOGIC_IB_I_RCVAVAIL_SHIFT 0
+#define QLOGIC_IB_I_SERDESTRIMDONE (1ULL << 27)
+
+#define QLOGIC_IB_C_FREEZEMODE 0x00000002
+#define QLOGIC_IB_C_LINKENABLE 0x00000004
+
+#define QLOGIC_IB_I_SDMAINT 0x8000000000000000ULL
+#define QLOGIC_IB_I_SDMADISABLED 0x4000000000000000ULL
+#define QLOGIC_IB_I_ERROR 0x0000000080000000ULL
+#define QLOGIC_IB_I_SPIOSENT 0x0000000040000000ULL
+#define QLOGIC_IB_I_SPIOBUFAVAIL 0x0000000020000000ULL
+#define QLOGIC_IB_I_GPIO 0x0000000010000000ULL
+
+/* variables for sanity checking interrupt and errors */
+#define QLOGIC_IB_I_BITSEXTANT \
+ (QLOGIC_IB_I_SDMAINT | QLOGIC_IB_I_SDMADISABLED | \
+ (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT) | \
+ (QLOGIC_IB_I_RCVAVAIL_MASK << \
+ QLOGIC_IB_I_RCVAVAIL_SHIFT) | \
+ QLOGIC_IB_I_ERROR | QLOGIC_IB_I_SPIOSENT | \
+ QLOGIC_IB_I_SPIOBUFAVAIL | QLOGIC_IB_I_GPIO | \
+ QLOGIC_IB_I_SERDESTRIMDONE)
+
+#define IB_HWE_BITSEXTANT \
+ (HWE_MASK(RXEMemParityErr) | \
+ HWE_MASK(TXEMemParityErr) | \
+ (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << \
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) | \
+ QLOGIC_IB_HWE_PCIE1PLLFAILED | \
+ QLOGIC_IB_HWE_PCIE0PLLFAILED | \
+ QLOGIC_IB_HWE_PCIEPOISONEDTLP | \
+ QLOGIC_IB_HWE_PCIECPLTIMEOUT | \
+ QLOGIC_IB_HWE_PCIEBUSPARITYXTLH | \
+ QLOGIC_IB_HWE_PCIEBUSPARITYXADM | \
+ QLOGIC_IB_HWE_PCIEBUSPARITYRADM | \
+ HWE_MASK(PowerOnBISTFailed) | \
+ QLOGIC_IB_HWE_COREPLL_FBSLIP | \
+ QLOGIC_IB_HWE_COREPLL_RFSLIP | \
+ QLOGIC_IB_HWE_SERDESPLLFAILED | \
+ HWE_MASK(IBCBusToSPCParityErr) | \
+ HWE_MASK(IBCBusFromSPCParityErr) | \
+ QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR | \
+ QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR | \
+ QLOGIC_IB_HWE_SDMAMEMREADERR | \
+ QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED | \
+ QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT | \
+ QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT | \
+ QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT | \
+ QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT | \
+ QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR | \
+ QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR | \
+ QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR | \
+ QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR)
+
+#define IB_E_BITSEXTANT \
+ (ERR_MASK(RcvFormatErr) | ERR_MASK(RcvVCRCErr) | \
+ ERR_MASK(RcvICRCErr) | ERR_MASK(RcvMinPktLenErr) | \
+ ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvLongPktLenErr) | \
+ ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvUnexpectedCharErr) | \
+ ERR_MASK(RcvUnsupportedVLErr) | ERR_MASK(RcvEBPErr) | \
+ ERR_MASK(RcvIBFlowErr) | ERR_MASK(RcvBadVersionErr) | \
+ ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | \
+ ERR_MASK(RcvBadTidErr) | ERR_MASK(RcvHdrLenErr) | \
+ ERR_MASK(RcvHdrErr) | ERR_MASK(RcvIBLostLinkErr) | \
+ ERR_MASK(SendSpecialTriggerErr) | \
+ ERR_MASK(SDmaDisabledErr) | ERR_MASK(SendMinPktLenErr) | \
+ ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnderRunErr) | \
+ ERR_MASK(SendPktLenErr) | ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendDroppedDataPktErr) | \
+ ERR_MASK(SendPioArmLaunchErr) | \
+ ERR_MASK(SendUnexpectedPktNumErr) | \
+ ERR_MASK(SendUnsupportedVLErr) | ERR_MASK(SendBufMisuseErr) | \
+ ERR_MASK(SDmaGenMismatchErr) | ERR_MASK(SDmaOutOfBoundErr) | \
+ ERR_MASK(SDmaTailOutOfBoundErr) | ERR_MASK(SDmaBaseErr) | \
+ ERR_MASK(SDma1stDescErr) | ERR_MASK(SDmaRpyTagErr) | \
+ ERR_MASK(SDmaDwEnErr) | ERR_MASK(SDmaMissingDwErr) | \
+ ERR_MASK(SDmaUnexpDataErr) | \
+ ERR_MASK(IBStatusChanged) | ERR_MASK(InvalidAddrErr) | \
+ ERR_MASK(ResetNegated) | ERR_MASK(HardwareErr) | \
+ ERR_MASK(SDmaDescAddrMisalignErr) | \
+ ERR_MASK(InvalidEEPCmd))
+
+/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
+#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK 0x00000000000000ffULL
+#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT 0
+#define QLOGIC_IB_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL
+#define QLOGIC_IB_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL
+#define QLOGIC_IB_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL
+#define QLOGIC_IB_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL
+#define QLOGIC_IB_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL
+#define QLOGIC_IB_HWE_COREPLL_FBSLIP 0x0080000000000000ULL
+#define QLOGIC_IB_HWE_COREPLL_RFSLIP 0x0100000000000000ULL
+#define QLOGIC_IB_HWE_PCIE1PLLFAILED 0x0400000000000000ULL
+#define QLOGIC_IB_HWE_PCIE0PLLFAILED 0x0800000000000000ULL
+#define QLOGIC_IB_HWE_SERDESPLLFAILED 0x1000000000000000ULL
+/* specific to this chip */
+#define QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR 0x0000000000000040ULL
+#define QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR 0x0000000000000080ULL
+#define QLOGIC_IB_HWE_SDMAMEMREADERR 0x0000000010000000ULL
+#define QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED 0x2000000000000000ULL
+#define QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT 0x0100000000000000ULL
+#define QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT 0x0200000000000000ULL
+#define QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT 0x0400000000000000ULL
+#define QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT 0x0800000000000000ULL
+#define QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR 0x0000008000000000ULL
+#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
+#define QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR 0x0000001000000000ULL
+#define QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR 0x0000002000000000ULL
+
+#define IBA7220_IBCC_LINKCMD_SHIFT 19
+
+/* kr_ibcddrctrl bits */
+#define IBA7220_IBC_DLIDLMC_MASK 0xFFFFFFFFUL
+#define IBA7220_IBC_DLIDLMC_SHIFT 32
+
+#define IBA7220_IBC_HRTBT_MASK (SYM_RMASK(IBCDDRCtrl, HRTBT_AUTO) | \
+ SYM_RMASK(IBCDDRCtrl, HRTBT_ENB))
+#define IBA7220_IBC_HRTBT_SHIFT SYM_LSB(IBCDDRCtrl, HRTBT_ENB)
+
+#define IBA7220_IBC_LANE_REV_SUPPORTED (1<<8)
+#define IBA7220_IBC_LREV_MASK 1
+#define IBA7220_IBC_LREV_SHIFT 8
+#define IBA7220_IBC_RXPOL_MASK 1
+#define IBA7220_IBC_RXPOL_SHIFT 7
+#define IBA7220_IBC_WIDTH_SHIFT 5
+#define IBA7220_IBC_WIDTH_MASK 0x3
+#define IBA7220_IBC_WIDTH_1X_ONLY (0 << IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_WIDTH_4X_ONLY (1 << IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_WIDTH_AUTONEG (2 << IBA7220_IBC_WIDTH_SHIFT)
+#define IBA7220_IBC_SPEED_AUTONEG (1 << 1)
+#define IBA7220_IBC_SPEED_SDR (1 << 2)
+#define IBA7220_IBC_SPEED_DDR (1 << 3)
+#define IBA7220_IBC_SPEED_AUTONEG_MASK (0x7 << 1)
+#define IBA7220_IBC_IBTA_1_2_MASK (1)
+
+/* kr_ibcddrstatus */
+/* link latency shift is 0, don't bother defining */
+#define IBA7220_DDRSTAT_LINKLAT_MASK 0x3ffffff
+
+/* kr_extstatus bits */
+#define QLOGIC_IB_EXTS_FREQSEL 0x2
+#define QLOGIC_IB_EXTS_SERDESSEL 0x4
+#define QLOGIC_IB_EXTS_MEMBIST_ENDTEST 0x0000000000004000
+#define QLOGIC_IB_EXTS_MEMBIST_DISABLED 0x0000000000008000
+
+/* kr_xgxsconfig bits */
+#define QLOGIC_IB_XGXS_RESET 0x5ULL
+#define QLOGIC_IB_XGXS_FC_SAFE (1ULL << 63)
+
+/* kr_rcvpktledcnt */
+#define IBA7220_LEDBLINK_ON_SHIFT 32 /* 4ns period on after packet */
+#define IBA7220_LEDBLINK_OFF_SHIFT 0 /* 4ns period off before next on */
+
+#define _QIB_GPIO_SDA_NUM 1
+#define _QIB_GPIO_SCL_NUM 0
+#define QIB_TWSI_EEPROM_DEV 0xA2 /* All Production 7220 cards. */
+#define QIB_TWSI_TEMP_DEV 0x98
+
+/* HW counter clock is at 4nsec */
+#define QIB_7220_PSXMITWAIT_CHECK_RATE 4000
+
+#define IBA7220_R_INTRAVAIL_SHIFT 17
+#define IBA7220_R_PKEY_DIS_SHIFT 34
+#define IBA7220_R_TAILUPD_SHIFT 35
+#define IBA7220_R_CTXTCFG_SHIFT 36
+
+#define IBA7220_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */
+
+/*
+ * the size bits give us 2^N, in KB units. 0 marks as invalid,
+ * and 7 is reserved. We currently use only 2KB and 4KB
+ */
+#define IBA7220_TID_SZ_SHIFT 37 /* shift to 3bit size selector */
+#define IBA7220_TID_SZ_2K (1UL << IBA7220_TID_SZ_SHIFT) /* 2KB */
+#define IBA7220_TID_SZ_4K (2UL << IBA7220_TID_SZ_SHIFT) /* 4KB */
+#define IBA7220_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */
+#define PBC_7220_VL15_SEND (1ULL << 63) /* pbc; VL15, no credit check */
+#define PBC_7220_VL15_SEND_CTRL (1ULL << 31) /* control version of same */
+
+#define AUTONEG_TRIES 5 /* sequential retries to negotiate DDR */
+
+/* packet rate matching delay multiplier */
+static u8 rate_to_delay[2][2] = {
+ /* 1x, 4x */
+ { 8, 2 }, /* SDR */
+ { 4, 1 } /* DDR */
+};
+
+static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = {
+ [IB_RATE_2_5_GBPS] = 8,
+ [IB_RATE_5_GBPS] = 4,
+ [IB_RATE_10_GBPS] = 2,
+ [IB_RATE_20_GBPS] = 1
+};
+
+#define IBA7220_LINKSPEED_SHIFT SYM_LSB(IBCStatus, LinkSpeedActive)
+#define IBA7220_LINKWIDTH_SHIFT SYM_LSB(IBCStatus, LinkWidthActive)
+
+/* link training states, from IBC */
+#define IB_7220_LT_STATE_DISABLED 0x00
+#define IB_7220_LT_STATE_LINKUP 0x01
+#define IB_7220_LT_STATE_POLLACTIVE 0x02
+#define IB_7220_LT_STATE_POLLQUIET 0x03
+#define IB_7220_LT_STATE_SLEEPDELAY 0x04
+#define IB_7220_LT_STATE_SLEEPQUIET 0x05
+#define IB_7220_LT_STATE_CFGDEBOUNCE 0x08
+#define IB_7220_LT_STATE_CFGRCVFCFG 0x09
+#define IB_7220_LT_STATE_CFGWAITRMT 0x0a
+#define IB_7220_LT_STATE_CFGIDLE 0x0b
+#define IB_7220_LT_STATE_RECOVERRETRAIN 0x0c
+#define IB_7220_LT_STATE_RECOVERWAITRMT 0x0e
+#define IB_7220_LT_STATE_RECOVERIDLE 0x0f
+
+/* link state machine states from IBC */
+#define IB_7220_L_STATE_DOWN 0x0
+#define IB_7220_L_STATE_INIT 0x1
+#define IB_7220_L_STATE_ARM 0x2
+#define IB_7220_L_STATE_ACTIVE 0x3
+#define IB_7220_L_STATE_ACT_DEFER 0x4
+
+static const u8 qib_7220_physportstate[0x20] = {
+ [IB_7220_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
+ [IB_7220_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
+ [IB_7220_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
+ [IB_7220_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
+ [IB_7220_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
+ [IB_7220_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
+ [IB_7220_LT_STATE_CFGDEBOUNCE] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7220_LT_STATE_CFGRCVFCFG] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7220_LT_STATE_CFGWAITRMT] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7220_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7220_LT_STATE_RECOVERRETRAIN] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [IB_7220_LT_STATE_RECOVERWAITRMT] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [IB_7220_LT_STATE_RECOVERIDLE] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
+};
+
+int qib_special_trigger;
+module_param_named(special_trigger, qib_special_trigger, int, S_IRUGO);
+MODULE_PARM_DESC(special_trigger, "Enable SpecialTrigger arm/launch");
+
+#define IBCBUSFRSPCPARITYERR HWE_MASK(IBCBusFromSPCParityErr)
+#define IBCBUSTOSPCPARITYERR HWE_MASK(IBCBusToSPCParityErr)
+
+#define SYM_MASK_BIT(regname, fldname, bit) ((u64) \
+ (1ULL << (SYM_LSB(regname, fldname) + (bit))))
+
+#define TXEMEMPARITYERR_PIOBUF \
+ SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 0)
+#define TXEMEMPARITYERR_PIOPBC \
+ SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 1)
+#define TXEMEMPARITYERR_PIOLAUNCHFIFO \
+ SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 2)
+
+#define RXEMEMPARITYERR_RCVBUF \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 0)
+#define RXEMEMPARITYERR_LOOKUPQ \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 1)
+#define RXEMEMPARITYERR_EXPTID \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 2)
+#define RXEMEMPARITYERR_EAGERTID \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 3)
+#define RXEMEMPARITYERR_FLAGBUF \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 4)
+#define RXEMEMPARITYERR_DATAINFO \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 5)
+#define RXEMEMPARITYERR_HDRINFO \
+ SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 6)
+
+/* 7220 specific hardware errors... */
+static const struct qib_hwerror_msgs qib_7220_hwerror_msgs[] = {
+ /* generic hardware errors */
+ QLOGIC_IB_HWE_MSG(IBCBUSFRSPCPARITYERR, "QIB2IB Parity"),
+ QLOGIC_IB_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2QIB Parity"),
+
+ QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOBUF,
+ "TXE PIOBUF Memory Parity"),
+ QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOPBC,
+ "TXE PIOPBC Memory Parity"),
+ QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOLAUNCHFIFO,
+ "TXE PIOLAUNCHFIFO Memory Parity"),
+
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_RCVBUF,
+ "RXE RCVBUF Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_LOOKUPQ,
+ "RXE LOOKUPQ Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EAGERTID,
+ "RXE EAGERTID Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EXPTID,
+ "RXE EXPTID Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_FLAGBUF,
+ "RXE FLAGBUF Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_DATAINFO,
+ "RXE DATAINFO Memory Parity"),
+ QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_HDRINFO,
+ "RXE HDRINFO Memory Parity"),
+
+ /* chip-specific hardware errors */
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEPOISONEDTLP,
+ "PCIe Poisoned TLP"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT,
+ "PCIe completion timeout"),
+ /*
+ * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+ * parity or memory parity error failures, because most likely we
+ * won't be able to talk to the core of the chip. Nonetheless, we
+ * might see them, if they are in parts of the PCIe core that aren't
+ * essential.
+ */
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE1PLLFAILED,
+ "PCIePLL1"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE0PLLFAILED,
+ "PCIePLL0"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXTLH,
+ "PCIe XTLH core parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXADM,
+ "PCIe ADM TX core parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYRADM,
+ "PCIe ADM RX core parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SERDESPLLFAILED,
+ "SerDes PLL"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR,
+ "PCIe cpl header queue"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR,
+ "PCIe cpl data queue"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SDMAMEMREADERR,
+ "Send DMA memory read"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED,
+ "uC PLL clock not locked"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT,
+ "PCIe serdes Q0 no clock"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT,
+ "PCIe serdes Q1 no clock"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT,
+ "PCIe serdes Q2 no clock"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT,
+ "PCIe serdes Q3 no clock"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR,
+ "DDS RXEQ memory parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR,
+ "IB uC memory parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR,
+ "PCIe uC oct0 memory parity"),
+ QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR,
+ "PCIe uC oct1 memory parity"),
+};
+
+#define RXE_PARITY (RXEMEMPARITYERR_EAGERTID|RXEMEMPARITYERR_EXPTID)
+
+#define QLOGIC_IB_E_PKTERRS (\
+ ERR_MASK(SendPktLenErr) | \
+ ERR_MASK(SendDroppedDataPktErr) | \
+ ERR_MASK(RcvVCRCErr) | \
+ ERR_MASK(RcvICRCErr) | \
+ ERR_MASK(RcvShortPktLenErr) | \
+ ERR_MASK(RcvEBPErr))
+
+/* Convenience for decoding Send DMA errors */
+#define QLOGIC_IB_E_SDMAERRS ( \
+ ERR_MASK(SDmaGenMismatchErr) | \
+ ERR_MASK(SDmaOutOfBoundErr) | \
+ ERR_MASK(SDmaTailOutOfBoundErr) | ERR_MASK(SDmaBaseErr) | \
+ ERR_MASK(SDma1stDescErr) | ERR_MASK(SDmaRpyTagErr) | \
+ ERR_MASK(SDmaDwEnErr) | ERR_MASK(SDmaMissingDwErr) | \
+ ERR_MASK(SDmaUnexpDataErr) | \
+ ERR_MASK(SDmaDescAddrMisalignErr) | \
+ ERR_MASK(SDmaDisabledErr) | \
+ ERR_MASK(SendBufMisuseErr))
+
+/* These are all rcv-related errors which we want to count for stats */
+#define E_SUM_PKTERRS \
+ (ERR_MASK(RcvHdrLenErr) | ERR_MASK(RcvBadTidErr) | \
+ ERR_MASK(RcvBadVersionErr) | ERR_MASK(RcvHdrErr) | \
+ ERR_MASK(RcvLongPktLenErr) | ERR_MASK(RcvShortPktLenErr) | \
+ ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \
+ ERR_MASK(RcvFormatErr) | ERR_MASK(RcvUnsupportedVLErr) | \
+ ERR_MASK(RcvUnexpectedCharErr) | ERR_MASK(RcvEBPErr))
+
+/* These are all send-related errors which we want to count for stats */
+#define E_SUM_ERRS \
+ (ERR_MASK(SendPioArmLaunchErr) | ERR_MASK(SendUnexpectedPktNumErr) | \
+ ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnsupportedVLErr) | \
+ ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \
+ ERR_MASK(InvalidAddrErr))
+
+/*
+ * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
+ * errors not related to freeze and cancelling buffers. Can't ignore
+ * armlaunch because could get more while still cleaning up, and need
+ * to cancel those as they happen.
+ */
+#define E_SPKT_ERRS_IGNORE \
+ (ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendMinPktLenErr) | \
+ ERR_MASK(SendPktLenErr))
+
+/*
+ * these are errors that can occur when the link changes state while
+ * a packet is being sent or received. This doesn't cover things
+ * like EBP or VCRC that can be the result of a sending having the
+ * link change state, so we receive a "known bad" packet.
+ */
+#define E_SUM_LINK_PKTERRS \
+ (ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \
+ ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \
+ ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \
+ ERR_MASK(RcvUnexpectedCharErr))
+
+static void autoneg_7220_work(struct work_struct *);
+static u32 __iomem *qib_7220_getsendbuf(struct qib_pportdata *, u64, u32 *);
+
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ * because we don't need to force the update of pioavail.
+ */
+static void qib_disarm_7220_senderrbufs(struct qib_pportdata *ppd)
+{
+ unsigned long sbuf[3];
+ struct qib_devdata *dd = ppd->dd;
+
+ /*
+ * It's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling.
+ */
+ /* read these before writing errorclear */
+ sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror);
+ sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1);
+ sbuf[2] = qib_read_kreg64(dd, kr_sendbuffererror + 2);
+
+ if (sbuf[0] || sbuf[1] || sbuf[2])
+ qib_disarm_piobufs_set(dd, sbuf,
+ dd->piobcnt2k + dd->piobcnt4k);
+}
+
+static void qib_7220_txe_recover(struct qib_devdata *dd)
+{
+ qib_devinfo(dd->pcidev, "Recovering from TXE PIO parity error\n");
+ qib_disarm_7220_senderrbufs(dd->pport);
+}
+
+/*
+ * This is called with interrupts disabled and sdma_lock held.
+ */
+static void qib_7220_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 set_sendctrl = 0;
+ u64 clr_sendctrl = 0;
+
+ if (op & QIB_SDMA_SENDCTRL_OP_ENABLE)
+ set_sendctrl |= SYM_MASK(SendCtrl, SDmaEnable);
+ else
+ clr_sendctrl |= SYM_MASK(SendCtrl, SDmaEnable);
+
+ if (op & QIB_SDMA_SENDCTRL_OP_INTENABLE)
+ set_sendctrl |= SYM_MASK(SendCtrl, SDmaIntEnable);
+ else
+ clr_sendctrl |= SYM_MASK(SendCtrl, SDmaIntEnable);
+
+ if (op & QIB_SDMA_SENDCTRL_OP_HALT)
+ set_sendctrl |= SYM_MASK(SendCtrl, SDmaHalt);
+ else
+ clr_sendctrl |= SYM_MASK(SendCtrl, SDmaHalt);
+
+ spin_lock(&dd->sendctrl_lock);
+
+ dd->sendctrl |= set_sendctrl;
+ dd->sendctrl &= ~clr_sendctrl;
+
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ spin_unlock(&dd->sendctrl_lock);
+}
+
+static void qib_decode_7220_sdma_errs(struct qib_pportdata *ppd,
+ u64 err, char *buf, size_t blen)
+{
+ static const struct {
+ u64 err;
+ const char *msg;
+ } errs[] = {
+ { ERR_MASK(SDmaGenMismatchErr),
+ "SDmaGenMismatch" },
+ { ERR_MASK(SDmaOutOfBoundErr),
+ "SDmaOutOfBound" },
+ { ERR_MASK(SDmaTailOutOfBoundErr),
+ "SDmaTailOutOfBound" },
+ { ERR_MASK(SDmaBaseErr),
+ "SDmaBase" },
+ { ERR_MASK(SDma1stDescErr),
+ "SDma1stDesc" },
+ { ERR_MASK(SDmaRpyTagErr),
+ "SDmaRpyTag" },
+ { ERR_MASK(SDmaDwEnErr),
+ "SDmaDwEn" },
+ { ERR_MASK(SDmaMissingDwErr),
+ "SDmaMissingDw" },
+ { ERR_MASK(SDmaUnexpDataErr),
+ "SDmaUnexpData" },
+ { ERR_MASK(SDmaDescAddrMisalignErr),
+ "SDmaDescAddrMisalign" },
+ { ERR_MASK(SendBufMisuseErr),
+ "SendBufMisuse" },
+ { ERR_MASK(SDmaDisabledErr),
+ "SDmaDisabled" },
+ };
+ int i;
+ size_t bidx = 0;
+
+ for (i = 0; i < ARRAY_SIZE(errs); i++) {
+ if (err & errs[i].err)
+ bidx += scnprintf(buf + bidx, blen - bidx,
+ "%s ", errs[i].msg);
+ }
+}
+
+/*
+ * This is called as part of link down clean up so disarm and flush
+ * all send buffers so that SMP packets can be sent.
+ */
+static void qib_7220_sdma_hw_clean_up(struct qib_pportdata *ppd)
+{
+ /* This will trigger the Abort interrupt */
+ sendctrl_7220_mod(ppd, QIB_SENDCTRL_DISARM_ALL | QIB_SENDCTRL_FLUSH |
+ QIB_SENDCTRL_AVAIL_BLIP);
+ ppd->dd->upd_pio_shadow = 1; /* update our idea of what's busy */
+}
+
+static void qib_sdma_7220_setlengen(struct qib_pportdata *ppd)
+{
+ /*
+ * Set SendDmaLenGen and clear and set
+ * the MSB of the generation count to enable generation checking
+ * and load the internal generation counter.
+ */
+ qib_write_kreg(ppd->dd, kr_senddmalengen, ppd->sdma_descq_cnt);
+ qib_write_kreg(ppd->dd, kr_senddmalengen,
+ ppd->sdma_descq_cnt |
+ (1ULL << QIB_7220_SendDmaLenGen_Generation_MSB));
+}
+
+static void qib_7220_sdma_hw_start_up(struct qib_pportdata *ppd)
+{
+ qib_sdma_7220_setlengen(ppd);
+ qib_sdma_update_7220_tail(ppd, 0); /* Set SendDmaTail */
+ ppd->sdma_head_dma[0] = 0;
+}
+
+#define DISABLES_SDMA ( \
+ ERR_MASK(SDmaDisabledErr) | \
+ ERR_MASK(SDmaBaseErr) | \
+ ERR_MASK(SDmaTailOutOfBoundErr) | \
+ ERR_MASK(SDmaOutOfBoundErr) | \
+ ERR_MASK(SDma1stDescErr) | \
+ ERR_MASK(SDmaRpyTagErr) | \
+ ERR_MASK(SDmaGenMismatchErr) | \
+ ERR_MASK(SDmaDescAddrMisalignErr) | \
+ ERR_MASK(SDmaMissingDwErr) | \
+ ERR_MASK(SDmaDwEnErr))
+
+static void sdma_7220_errors(struct qib_pportdata *ppd, u64 errs)
+{
+ unsigned long flags;
+ struct qib_devdata *dd = ppd->dd;
+ char *msg;
+
+ errs &= QLOGIC_IB_E_SDMAERRS;
+
+ msg = dd->cspec->sdmamsgbuf;
+ qib_decode_7220_sdma_errs(ppd, errs, msg, sizeof dd->cspec->sdmamsgbuf);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ if (errs & ERR_MASK(SendBufMisuseErr)) {
+ unsigned long sbuf[3];
+
+ sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror);
+ sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1);
+ sbuf[2] = qib_read_kreg64(dd, kr_sendbuffererror + 2);
+
+ qib_dev_err(ppd->dd,
+ "IB%u:%u SendBufMisuse: %04lx %016lx %016lx\n",
+ ppd->dd->unit, ppd->port, sbuf[2], sbuf[1],
+ sbuf[0]);
+ }
+
+ if (errs & ERR_MASK(SDmaUnexpDataErr))
+ qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", ppd->dd->unit,
+ ppd->port);
+
+ switch (ppd->sdma_state.current_state) {
+ case qib_sdma_state_s00_hw_down:
+ /* not expecting any interrupts */
+ break;
+
+ case qib_sdma_state_s10_hw_start_up_wait:
+ /* handled in intr path */
+ break;
+
+ case qib_sdma_state_s20_idle:
+ /* not expecting any interrupts */
+ break;
+
+ case qib_sdma_state_s30_sw_clean_up_wait:
+ /* not expecting any interrupts */
+ break;
+
+ case qib_sdma_state_s40_hw_clean_up_wait:
+ if (errs & ERR_MASK(SDmaDisabledErr))
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e50_hw_cleaned);
+ break;
+
+ case qib_sdma_state_s50_hw_halt_wait:
+ /* handled in intr path */
+ break;
+
+ case qib_sdma_state_s99_running:
+ if (errs & DISABLES_SDMA)
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e7220_err_halted);
+ break;
+ }
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else. Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+static int qib_decode_7220_err(struct qib_devdata *dd, char *buf, size_t blen,
+ u64 err)
+{
+ int iserr = 1;
+
+ *buf = '\0';
+ if (err & QLOGIC_IB_E_PKTERRS) {
+ if (!(err & ~QLOGIC_IB_E_PKTERRS))
+ iserr = 0;
+ if ((err & ERR_MASK(RcvICRCErr)) &&
+ !(err & (ERR_MASK(RcvVCRCErr) | ERR_MASK(RcvEBPErr))))
+ strlcat(buf, "CRC ", blen);
+ if (!iserr)
+ goto done;
+ }
+ if (err & ERR_MASK(RcvHdrLenErr))
+ strlcat(buf, "rhdrlen ", blen);
+ if (err & ERR_MASK(RcvBadTidErr))
+ strlcat(buf, "rbadtid ", blen);
+ if (err & ERR_MASK(RcvBadVersionErr))
+ strlcat(buf, "rbadversion ", blen);
+ if (err & ERR_MASK(RcvHdrErr))
+ strlcat(buf, "rhdr ", blen);
+ if (err & ERR_MASK(SendSpecialTriggerErr))
+ strlcat(buf, "sendspecialtrigger ", blen);
+ if (err & ERR_MASK(RcvLongPktLenErr))
+ strlcat(buf, "rlongpktlen ", blen);
+ if (err & ERR_MASK(RcvMaxPktLenErr))
+ strlcat(buf, "rmaxpktlen ", blen);
+ if (err & ERR_MASK(RcvMinPktLenErr))
+ strlcat(buf, "rminpktlen ", blen);
+ if (err & ERR_MASK(SendMinPktLenErr))
+ strlcat(buf, "sminpktlen ", blen);
+ if (err & ERR_MASK(RcvFormatErr))
+ strlcat(buf, "rformaterr ", blen);
+ if (err & ERR_MASK(RcvUnsupportedVLErr))
+ strlcat(buf, "runsupvl ", blen);
+ if (err & ERR_MASK(RcvUnexpectedCharErr))
+ strlcat(buf, "runexpchar ", blen);
+ if (err & ERR_MASK(RcvIBFlowErr))
+ strlcat(buf, "ribflow ", blen);
+ if (err & ERR_MASK(SendUnderRunErr))
+ strlcat(buf, "sunderrun ", blen);
+ if (err & ERR_MASK(SendPioArmLaunchErr))
+ strlcat(buf, "spioarmlaunch ", blen);
+ if (err & ERR_MASK(SendUnexpectedPktNumErr))
+ strlcat(buf, "sunexperrpktnum ", blen);
+ if (err & ERR_MASK(SendDroppedSmpPktErr))
+ strlcat(buf, "sdroppedsmppkt ", blen);
+ if (err & ERR_MASK(SendMaxPktLenErr))
+ strlcat(buf, "smaxpktlen ", blen);
+ if (err & ERR_MASK(SendUnsupportedVLErr))
+ strlcat(buf, "sunsupVL ", blen);
+ if (err & ERR_MASK(InvalidAddrErr))
+ strlcat(buf, "invalidaddr ", blen);
+ if (err & ERR_MASK(RcvEgrFullErr))
+ strlcat(buf, "rcvegrfull ", blen);
+ if (err & ERR_MASK(RcvHdrFullErr))
+ strlcat(buf, "rcvhdrfull ", blen);
+ if (err & ERR_MASK(IBStatusChanged))
+ strlcat(buf, "ibcstatuschg ", blen);
+ if (err & ERR_MASK(RcvIBLostLinkErr))
+ strlcat(buf, "riblostlink ", blen);
+ if (err & ERR_MASK(HardwareErr))
+ strlcat(buf, "hardware ", blen);
+ if (err & ERR_MASK(ResetNegated))
+ strlcat(buf, "reset ", blen);
+ if (err & QLOGIC_IB_E_SDMAERRS)
+ qib_decode_7220_sdma_errs(dd->pport, err, buf, blen);
+ if (err & ERR_MASK(InvalidEEPCmd))
+ strlcat(buf, "invalideepromcmd ", blen);
+done:
+ return iserr;
+}
+
+static void reenable_7220_chase(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+ ppd->cpspec->chase_timer.expires = 0;
+ qib_set_ib_7220_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN,
+ QLOGIC_IB_IBCC_LINKINITCMD_POLL);
+}
+
+static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst)
+{
+ u8 ibclt;
+ unsigned long tnow;
+
+ ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState);
+
+ /*
+ * Detect and handle the state chase issue, where we can
+ * get stuck if we are unlucky on timing on both sides of
+ * the link. If we are, we disable, set a timer, and
+ * then re-enable.
+ */
+ switch (ibclt) {
+ case IB_7220_LT_STATE_CFGRCVFCFG:
+ case IB_7220_LT_STATE_CFGWAITRMT:
+ case IB_7220_LT_STATE_TXREVLANES:
+ case IB_7220_LT_STATE_CFGENH:
+ tnow = jiffies;
+ if (ppd->cpspec->chase_end &&
+ time_after(tnow, ppd->cpspec->chase_end)) {
+ ppd->cpspec->chase_end = 0;
+ qib_set_ib_7220_lstate(ppd,
+ QLOGIC_IB_IBCC_LINKCMD_DOWN,
+ QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+ ppd->cpspec->chase_timer.expires = jiffies +
+ QIB_CHASE_DIS_TIME;
+ add_timer(&ppd->cpspec->chase_timer);
+ } else if (!ppd->cpspec->chase_end)
+ ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME;
+ break;
+
+ default:
+ ppd->cpspec->chase_end = 0;
+ break;
+ }
+}
+
+static void handle_7220_errors(struct qib_devdata *dd, u64 errs)
+{
+ char *msg;
+ u64 ignore_this_time = 0;
+ u64 iserr = 0;
+ int log_idx;
+ struct qib_pportdata *ppd = dd->pport;
+ u64 mask;
+
+ /* don't report errors that are masked */
+ errs &= dd->cspec->errormask;
+ msg = dd->cspec->emsgbuf;
+
+ /* do these first, they are most important */
+ if (errs & ERR_MASK(HardwareErr))
+ qib_7220_handle_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf);
+ else
+ for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx)
+ if (errs & dd->eep_st_masks[log_idx].errs_to_log)
+ qib_inc_eeprom_err(dd, log_idx, 1);
+
+ if (errs & QLOGIC_IB_E_SDMAERRS)
+ sdma_7220_errors(ppd, errs);
+
+ if (errs & ~IB_E_BITSEXTANT)
+ qib_dev_err(dd,
+ "error interrupt with unknown errors %llx set\n",
+ (unsigned long long) (errs & ~IB_E_BITSEXTANT));
+
+ if (errs & E_SUM_ERRS) {
+ qib_disarm_7220_senderrbufs(ppd);
+ if ((errs & E_SUM_LINK_PKTERRS) &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
+ /*
+ * This can happen when trying to bring the link
+ * up, but the IB link changes state at the "wrong"
+ * time. The IB logic then complains that the packet
+ * isn't valid. We don't want to confuse people, so
+ * we just don't print them, except at debug
+ */
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
+ }
+ } else if ((errs & E_SUM_LINK_PKTERRS) &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
+ /*
+ * This can happen when SMA is trying to bring the link
+ * up, but the IB link changes state at the "wrong" time.
+ * The IB logic then complains that the packet isn't
+ * valid. We don't want to confuse people, so we just
+ * don't print them, except at debug
+ */
+ ignore_this_time = errs & E_SUM_LINK_PKTERRS;
+ }
+
+ qib_write_kreg(dd, kr_errclear, errs);
+
+ errs &= ~ignore_this_time;
+ if (!errs)
+ goto done;
+
+ /*
+ * The ones we mask off are handled specially below
+ * or above. Also mask SDMADISABLED by default as it
+ * is too chatty.
+ */
+ mask = ERR_MASK(IBStatusChanged) |
+ ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) |
+ ERR_MASK(HardwareErr) | ERR_MASK(SDmaDisabledErr);
+
+ qib_decode_7220_err(dd, msg, sizeof dd->cspec->emsgbuf, errs & ~mask);
+
+ if (errs & E_SUM_PKTERRS)
+ qib_stats.sps_rcverrs++;
+ if (errs & E_SUM_ERRS)
+ qib_stats.sps_txerrs++;
+ iserr = errs & ~(E_SUM_PKTERRS | QLOGIC_IB_E_PKTERRS |
+ ERR_MASK(SDmaDisabledErr));
+
+ if (errs & ERR_MASK(IBStatusChanged)) {
+ u64 ibcs;
+
+ ibcs = qib_read_kreg64(dd, kr_ibcstatus);
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+ handle_7220_chase(ppd, ibcs);
+
+ /* Update our picture of width and speed from chip */
+ ppd->link_width_active =
+ ((ibcs >> IBA7220_LINKWIDTH_SHIFT) & 1) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ ppd->link_speed_active =
+ ((ibcs >> IBA7220_LINKSPEED_SHIFT) & 1) ?
+ QIB_IB_DDR : QIB_IB_SDR;
+
+ /*
+ * Since going into a recovery state causes the link state
+ * to go down and since recovery is transitory, it is better
+ * if we "miss" ever seeing the link training state go into
+ * recovery (i.e., ignore this transition for link state
+ * special handling purposes) without updating lastibcstat.
+ */
+ if (qib_7220_phys_portstate(ibcs) !=
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER)
+ qib_handle_e_ibstatuschanged(ppd, ibcs);
+ }
+
+ if (errs & ERR_MASK(ResetNegated)) {
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
+ dd->flags &= ~QIB_INITTED; /* needs re-init */
+ /* mark as having had error */
+ *dd->devstatusp |= QIB_STATUS_HWERROR;
+ *dd->pport->statusp &= ~QIB_STATUS_IB_CONF;
+ }
+
+ if (*msg && iserr)
+ qib_dev_porterr(dd, ppd->port, "%s error\n", msg);
+
+ if (ppd->state_wanted & ppd->lflags)
+ wake_up_interruptible(&ppd->state_wait);
+
+ /*
+ * If there were hdrq or egrfull errors, wake up any processes
+ * waiting in poll. We used to try to check which contexts had
+ * the overflow, but given the cost of that and the chip reads
+ * to support it, it's better to just wake everybody up if we
+ * get an overflow; waiters can poll again if it's not them.
+ */
+ if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) {
+ qib_handle_urcv(dd, ~0U);
+ if (errs & ERR_MASK(RcvEgrFullErr))
+ qib_stats.sps_buffull++;
+ else
+ qib_stats.sps_hdrfull++;
+ }
+done:
+ return;
+}
+
+/* enable/disable chip from delivering interrupts */
+static void qib_7220_set_intr_state(struct qib_devdata *dd, u32 enable)
+{
+ if (enable) {
+ if (dd->flags & QIB_BADINTR)
+ return;
+ qib_write_kreg(dd, kr_intmask, ~0ULL);
+ /* force re-interrupt of any pending interrupts. */
+ qib_write_kreg(dd, kr_intclear, 0ULL);
+ } else
+ qib_write_kreg(dd, kr_intmask, 0ULL);
+}
+
+/*
+ * Try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ * This is in chip-specific code because of all of the register accesses,
+ * even though the details are similar on most chips.
+ */
+static void qib_7220_clear_freeze(struct qib_devdata *dd)
+{
+ /* disable error interrupts, to avoid confusion */
+ qib_write_kreg(dd, kr_errmask, 0ULL);
+
+ /* also disable interrupts; errormask is sometimes overwriten */
+ qib_7220_set_intr_state(dd, 0);
+
+ qib_cancel_sends(dd->pport);
+
+ /* clear the freeze, and be sure chip saw it */
+ qib_write_kreg(dd, kr_control, dd->control);
+ qib_read_kreg32(dd, kr_scratch);
+
+ /* force in-memory update now we are out of freeze */
+ qib_force_pio_avail_update(dd);
+
+ /*
+ * force new interrupt if any hwerr, error or interrupt bits are
+ * still set, and clear "safe" send packet errors related to freeze
+ * and cancelling sends. Re-enable error interrupts before possible
+ * force of re-interrupt on pending interrupts.
+ */
+ qib_write_kreg(dd, kr_hwerrclear, 0ULL);
+ qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE);
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+ qib_7220_set_intr_state(dd, 1);
+}
+
+/**
+ * qib_7220_handle_hwerrors - display hardware errors.
+ * @dd: the qlogic_ib device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * handle_7220_errors() to avoid excessive stack usage.
+ */
+static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
+ size_t msgl)
+{
+ u64 hwerrs;
+ u32 bits, ctrl;
+ int isfatal = 0;
+ char *bitsmsg;
+ int log_idx;
+
+ hwerrs = qib_read_kreg64(dd, kr_hwerrstatus);
+ if (!hwerrs)
+ goto bail;
+ if (hwerrs == ~0ULL) {
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
+ goto bail;
+ }
+ qib_stats.sps_hwerrs++;
+
+ /*
+ * Always clear the error status register, except MEMBISTFAIL,
+ * regardless of whether we continue or stop using the chip.
+ * We want that set so we know it failed, even across driver reload.
+ * We'll still ignore it in the hwerrmask. We do this partly for
+ * diagnostics, but also for support.
+ */
+ qib_write_kreg(dd, kr_hwerrclear,
+ hwerrs & ~HWE_MASK(PowerOnBISTFailed));
+
+ hwerrs &= dd->cspec->hwerrmask;
+
+ /* We log some errors to EEPROM, check if we have any of those. */
+ for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx)
+ if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log)
+ qib_inc_eeprom_err(dd, log_idx, 1);
+ if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC |
+ RXE_PARITY))
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
+
+ if (hwerrs & ~IB_HWE_BITSEXTANT)
+ qib_dev_err(dd,
+ "hwerror interrupt with unknown errors %llx set\n",
+ (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT));
+
+ if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR)
+ qib_sd7220_clr_ibpar(dd);
+
+ ctrl = qib_read_kreg32(dd, kr_control);
+ if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) {
+ /*
+ * Parity errors in send memory are recoverable by h/w
+ * just do housekeeping, exit freeze mode and continue.
+ */
+ if (hwerrs & (TXEMEMPARITYERR_PIOBUF |
+ TXEMEMPARITYERR_PIOPBC)) {
+ qib_7220_txe_recover(dd);
+ hwerrs &= ~(TXEMEMPARITYERR_PIOBUF |
+ TXEMEMPARITYERR_PIOPBC);
+ }
+ if (hwerrs)
+ isfatal = 1;
+ else
+ qib_7220_clear_freeze(dd);
+ }
+
+ *msg = '\0';
+
+ if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
+ isfatal = 1;
+ strlcat(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+ }
+
+ qib_format_hwerrors(hwerrs, qib_7220_hwerror_msgs,
+ ARRAY_SIZE(qib_7220_hwerror_msgs), msg, msgl);
+
+ bitsmsg = dd->cspec->bitsmsgbuf;
+ if (hwerrs & (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK <<
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT)) {
+ bits = (u32) ((hwerrs >>
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) &
+ QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK);
+ snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf,
+ "[PCIe Mem Parity Errs %x] ", bits);
+ strlcat(msg, bitsmsg, msgl);
+ }
+
+#define _QIB_PLL_FAIL (QLOGIC_IB_HWE_COREPLL_FBSLIP | \
+ QLOGIC_IB_HWE_COREPLL_RFSLIP)
+
+ if (hwerrs & _QIB_PLL_FAIL) {
+ isfatal = 1;
+ snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf,
+ "[PLL failed (%llx), InfiniPath hardware unusable]",
+ (unsigned long long) hwerrs & _QIB_PLL_FAIL);
+ strlcat(msg, bitsmsg, msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->cspec->hwerrmask &= ~(hwerrs & _QIB_PLL_FAIL);
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+ }
+
+ if (hwerrs & QLOGIC_IB_HWE_SERDESPLLFAILED) {
+ /*
+ * If it occurs, it is left masked since the eternal
+ * interface is unused.
+ */
+ dd->cspec->hwerrmask &= ~QLOGIC_IB_HWE_SERDESPLLFAILED;
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+ }
+
+ qib_dev_err(dd, "%s hardware error\n", msg);
+
+ if (isfatal && !dd->diag_client) {
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
+ /*
+ * For /sys status file and user programs to print; if no
+ * trailing brace is copied, we'll know it was truncated.
+ */
+ if (dd->freezemsg)
+ snprintf(dd->freezemsg, dd->freezelen,
+ "{%s}", msg);
+ qib_disable_after_error(dd);
+ }
+bail:;
+}
+
+/**
+ * qib_7220_init_hwerrors - enable hardware errors
+ * @dd: the qlogic_ib device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void qib_7220_init_hwerrors(struct qib_devdata *dd)
+{
+ u64 val;
+ u64 extsval;
+
+ extsval = qib_read_kreg64(dd, kr_extstatus);
+
+ if (!(extsval & (QLOGIC_IB_EXTS_MEMBIST_ENDTEST |
+ QLOGIC_IB_EXTS_MEMBIST_DISABLED)))
+ qib_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & QLOGIC_IB_EXTS_MEMBIST_DISABLED)
+ qib_devinfo(dd->pcidev, "MemBIST is disabled.\n");
+
+ val = ~0ULL; /* default to all hwerrors become interrupts, */
+
+ val &= ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR;
+ dd->cspec->hwerrmask = val;
+
+ qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed));
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+
+ /* clear all */
+ qib_write_kreg(dd, kr_errclear, ~0ULL);
+ /* enable errors that are masked, at least this first time. */
+ qib_write_kreg(dd, kr_errmask, ~0ULL);
+ dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask);
+ /* clear any interrupts up to this point (ints still not enabled) */
+ qib_write_kreg(dd, kr_intclear, ~0ULL);
+}
+
+/*
+ * Disable and enable the armlaunch error. Used for PIO bandwidth testing
+ * on chips that are count-based, rather than trigger-based. There is no
+ * reference counting, but that's also fine, given the intended use.
+ * Only chip-specific because it's all register accesses
+ */
+static void qib_set_7220_armlaunch(struct qib_devdata *dd, u32 enable)
+{
+ if (enable) {
+ qib_write_kreg(dd, kr_errclear, ERR_MASK(SendPioArmLaunchErr));
+ dd->cspec->errormask |= ERR_MASK(SendPioArmLaunchErr);
+ } else
+ dd->cspec->errormask &= ~ERR_MASK(SendPioArmLaunchErr);
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+}
+
+/*
+ * Formerly took parameter <which> in pre-shifted,
+ * pre-merged form with LinkCmd and LinkInitCmd
+ * together, and assuming the zero was NOP.
+ */
+static void qib_set_ib_7220_lstate(struct qib_pportdata *ppd, u16 linkcmd,
+ u16 linitcmd)
+{
+ u64 mod_wd;
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+
+ if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) {
+ /*
+ * If we are told to disable, note that so link-recovery
+ * code does not attempt to bring us back up.
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_LINK_DISABLED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) {
+ /*
+ * Any other linkinitcmd will lead to LINKDOWN and then
+ * to INIT (if all is well), so clear flag to let
+ * link-recovery code attempt to bring us back up.
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_LINK_DISABLED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
+
+ mod_wd = (linkcmd << IBA7220_IBCC_LINKCMD_SHIFT) |
+ (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
+
+ qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl | mod_wd);
+ /* write to chip to prevent back-to-back writes of ibc reg */
+ qib_write_kreg(dd, kr_scratch, 0);
+}
+
+/*
+ * All detailed interaction with the SerDes has been moved to qib_sd7220.c
+ *
+ * The portion of IBA7220-specific bringup_serdes() that actually deals with
+ * registers and memory within the SerDes itself is qib_sd7220_init().
+ */
+
+/**
+ * qib_7220_bringup_serdes - bring up the serdes
+ * @ppd: physical port on the qlogic_ib device
+ */
+static int qib_7220_bringup_serdes(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 val, prev_val, guid, ibc;
+ int ret = 0;
+
+ /* Put IBC in reset, sends disabled */
+ dd->control &= ~QLOGIC_IB_C_LINKENABLE;
+ qib_write_kreg(dd, kr_control, 0ULL);
+
+ if (qib_compat_ddr_negotiate) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymsnap = read_7220_creg32(dd, cr_ibsymbolerr);
+ ppd->cpspec->iblnkerrsnap =
+ read_7220_creg32(dd, cr_iblinkerrrecov);
+ }
+
+ /* flowcontrolwatermark is in units of KBytes */
+ ibc = 0x5ULL << SYM_LSB(IBCCtrl, FlowCtrlWaterMark);
+ /*
+ * How often flowctrl sent. More or less in usecs; balance against
+ * watermark value, so that in theory senders always get a flow
+ * control update in time to not let the IB link go idle.
+ */
+ ibc |= 0x3ULL << SYM_LSB(IBCCtrl, FlowCtrlPeriod);
+ /* max error tolerance */
+ ibc |= 0xfULL << SYM_LSB(IBCCtrl, PhyerrThreshold);
+ /* use "real" buffer space for */
+ ibc |= 4ULL << SYM_LSB(IBCCtrl, CreditScale);
+ /* IB credit flow control. */
+ ibc |= 0xfULL << SYM_LSB(IBCCtrl, OverrunThreshold);
+ /*
+ * set initial max size pkt IBC will send, including ICRC; it's the
+ * PIO buffer size in dwords, less 1; also see qib_set_mtu()
+ */
+ ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << SYM_LSB(IBCCtrl, MaxPktLen);
+ ppd->cpspec->ibcctrl = ibc; /* without linkcmd or linkinitcmd! */
+
+ /* initially come up waiting for TS1, without sending anything. */
+ val = ppd->cpspec->ibcctrl | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
+ QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
+ qib_write_kreg(dd, kr_ibcctrl, val);
+
+ if (!ppd->cpspec->ibcddrctrl) {
+ /* not on re-init after reset */
+ ppd->cpspec->ibcddrctrl = qib_read_kreg64(dd, kr_ibcddrctrl);
+
+ if (ppd->link_speed_enabled == (QIB_IB_SDR | QIB_IB_DDR))
+ ppd->cpspec->ibcddrctrl |=
+ IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ else
+ ppd->cpspec->ibcddrctrl |=
+ ppd->link_speed_enabled == QIB_IB_DDR ?
+ IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR;
+ if ((ppd->link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) ==
+ (IB_WIDTH_1X | IB_WIDTH_4X))
+ ppd->cpspec->ibcddrctrl |= IBA7220_IBC_WIDTH_AUTONEG;
+ else
+ ppd->cpspec->ibcddrctrl |=
+ ppd->link_width_enabled == IB_WIDTH_4X ?
+ IBA7220_IBC_WIDTH_4X_ONLY :
+ IBA7220_IBC_WIDTH_1X_ONLY;
+
+ /* always enable these on driver reload, not sticky */
+ ppd->cpspec->ibcddrctrl |=
+ IBA7220_IBC_RXPOL_MASK << IBA7220_IBC_RXPOL_SHIFT;
+ ppd->cpspec->ibcddrctrl |=
+ IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
+
+ /* enable automatic lane reversal detection for receive */
+ ppd->cpspec->ibcddrctrl |= IBA7220_IBC_LANE_REV_SUPPORTED;
+ } else
+ /* write to chip to prevent back-to-back writes of ibc reg */
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ qib_write_kreg(dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ qib_write_kreg(dd, kr_ncmodectrl, 0Ull);
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ ret = qib_sd7220_init(dd);
+
+ val = qib_read_kreg64(dd, kr_xgxs_cfg);
+ prev_val = val;
+ val |= QLOGIC_IB_XGXS_FC_SAFE;
+ if (val != prev_val) {
+ qib_write_kreg(dd, kr_xgxs_cfg, val);
+ qib_read_kreg32(dd, kr_scratch);
+ }
+ if (val & QLOGIC_IB_XGXS_RESET)
+ val &= ~QLOGIC_IB_XGXS_RESET;
+ if (val != prev_val)
+ qib_write_kreg(dd, kr_xgxs_cfg, val);
+
+ /* first time through, set port guid */
+ if (!ppd->guid)
+ ppd->guid = dd->base_guid;
+ guid = be64_to_cpu(ppd->guid);
+
+ qib_write_kreg(dd, kr_hrtbt_guid, guid);
+ if (!ret) {
+ dd->control |= QLOGIC_IB_C_LINKENABLE;
+ qib_write_kreg(dd, kr_control, dd->control);
+ } else
+ /* write to chip to prevent back-to-back writes of ibc reg */
+ qib_write_kreg(dd, kr_scratch, 0);
+ return ret;
+}
+
+/**
+ * qib_7220_quiet_serdes - set serdes to txidle
+ * @ppd: physical port of the qlogic_ib device
+ * Called when driver is being unloaded
+ */
+static void qib_7220_quiet_serdes(struct qib_pportdata *ppd)
+{
+ u64 val;
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+
+ /* disable IBC */
+ dd->control &= ~QLOGIC_IB_C_LINKENABLE;
+ qib_write_kreg(dd, kr_control,
+ dd->control | QLOGIC_IB_C_FREEZEMODE);
+
+ ppd->cpspec->chase_end = 0;
+ if (ppd->cpspec->chase_timer.data) /* if initted */
+ del_timer_sync(&ppd->cpspec->chase_timer);
+
+ if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta ||
+ ppd->cpspec->ibdeltainprog) {
+ u64 diagc;
+
+ /* enable counter writes */
+ diagc = qib_read_kreg64(dd, kr_hwdiagctrl);
+ qib_write_kreg(dd, kr_hwdiagctrl,
+ diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable));
+
+ if (ppd->cpspec->ibsymdelta || ppd->cpspec->ibdeltainprog) {
+ val = read_7220_creg32(dd, cr_ibsymbolerr);
+ if (ppd->cpspec->ibdeltainprog)
+ val -= val - ppd->cpspec->ibsymsnap;
+ val -= ppd->cpspec->ibsymdelta;
+ write_7220_creg(dd, cr_ibsymbolerr, val);
+ }
+ if (ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) {
+ val = read_7220_creg32(dd, cr_iblinkerrrecov);
+ if (ppd->cpspec->ibdeltainprog)
+ val -= val - ppd->cpspec->iblnkerrsnap;
+ val -= ppd->cpspec->iblnkerrdelta;
+ write_7220_creg(dd, cr_iblinkerrrecov, val);
+ }
+
+ /* and disable counter writes */
+ qib_write_kreg(dd, kr_hwdiagctrl, diagc);
+ }
+ qib_set_ib_7220_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ wake_up(&ppd->cpspec->autoneg_wait);
+ cancel_delayed_work_sync(&ppd->cpspec->autoneg_work);
+
+ shutdown_7220_relock_poll(ppd->dd);
+ val = qib_read_kreg64(ppd->dd, kr_xgxs_cfg);
+ val |= QLOGIC_IB_XGXS_RESET;
+ qib_write_kreg(ppd->dd, kr_xgxs_cfg, val);
+}
+
+/**
+ * qib_setup_7220_setextled - set the state of the two external LEDs
+ * @dd: the qlogic_ib device
+ * @on: whether the link is up or not
+ *
+ * The exact combo of LEDs if on is true is determined by looking
+ * at the ibcstatus.
+ *
+ * These LEDs indicate the physical and logical state of IB link.
+ * For this chip (at least with recommended board pinouts), LED1
+ * is Yellow (logical state) and LED2 is Green (physical state),
+ *
+ * Note: We try to match the Mellanox HCA LED behavior as best
+ * we can. Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate. That's
+ * visible overhead, so not something we will do.
+ *
+ */
+static void qib_setup_7220_setextled(struct qib_pportdata *ppd, u32 on)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 extctl, ledblink = 0, val, lst, ltst;
+ unsigned long flags;
+
+ /*
+ * The diags use the LED to indicate diag info, so we leave
+ * the external LED alone when the diags are running.
+ */
+ if (dd->diag_client)
+ return;
+
+ if (ppd->led_override) {
+ ltst = (ppd->led_override & QIB_LED_PHYS) ?
+ IB_PHYSPORTSTATE_LINKUP : IB_PHYSPORTSTATE_DISABLED,
+ lst = (ppd->led_override & QIB_LED_LOG) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ } else if (on) {
+ val = qib_read_kreg64(dd, kr_ibcstatus);
+ ltst = qib_7220_phys_portstate(val);
+ lst = qib_7220_iblink_state(val);
+ } else {
+ ltst = 0;
+ lst = 0;
+ }
+
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ extctl = dd->cspec->extctrl & ~(SYM_MASK(EXTCtrl, LEDPriPortGreenOn) |
+ SYM_MASK(EXTCtrl, LEDPriPortYellowOn));
+ if (ltst == IB_PHYSPORTSTATE_LINKUP) {
+ extctl |= SYM_MASK(EXTCtrl, LEDPriPortGreenOn);
+ /*
+ * counts are in chip clock (4ns) periods.
+ * This is 1/16 sec (66.6ms) on,
+ * 3/16 sec (187.5 ms) off, with packets rcvd
+ */
+ ledblink = ((66600 * 1000UL / 4) << IBA7220_LEDBLINK_ON_SHIFT)
+ | ((187500 * 1000UL / 4) << IBA7220_LEDBLINK_OFF_SHIFT);
+ }
+ if (lst == IB_PORT_ACTIVE)
+ extctl |= SYM_MASK(EXTCtrl, LEDPriPortYellowOn);
+ dd->cspec->extctrl = extctl;
+ qib_write_kreg(dd, kr_extctrl, extctl);
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+
+ if (ledblink) /* blink the LED on packet receive */
+ qib_write_kreg(dd, kr_rcvpktledcnt, ledblink);
+}
+
+static void qib_7220_free_irq(struct qib_devdata *dd)
+{
+ if (dd->cspec->irq) {
+ free_irq(dd->cspec->irq, dd);
+ dd->cspec->irq = 0;
+ }
+ qib_nomsi(dd);
+}
+
+/*
+ * qib_setup_7220_cleanup - clean up any per-chip chip-specific stuff
+ * @dd: the qlogic_ib device
+ *
+ * This is called during driver unload.
+ *
+ */
+static void qib_setup_7220_cleanup(struct qib_devdata *dd)
+{
+ qib_7220_free_irq(dd);
+ kfree(dd->cspec->cntrs);
+ kfree(dd->cspec->portcntrs);
+}
+
+/*
+ * This is only called for SDmaInt.
+ * SDmaDisabled is handled on the error path.
+ */
+static void sdma_7220_intr(struct qib_pportdata *ppd, u64 istat)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ switch (ppd->sdma_state.current_state) {
+ case qib_sdma_state_s00_hw_down:
+ break;
+
+ case qib_sdma_state_s10_hw_start_up_wait:
+ __qib_sdma_process_event(ppd, qib_sdma_event_e20_hw_started);
+ break;
+
+ case qib_sdma_state_s20_idle:
+ break;
+
+ case qib_sdma_state_s30_sw_clean_up_wait:
+ break;
+
+ case qib_sdma_state_s40_hw_clean_up_wait:
+ break;
+
+ case qib_sdma_state_s50_hw_halt_wait:
+ __qib_sdma_process_event(ppd, qib_sdma_event_e60_hw_halted);
+ break;
+
+ case qib_sdma_state_s99_running:
+ /* too chatty to print here */
+ __qib_sdma_intr(ppd);
+ break;
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+static void qib_wantpiobuf_7220_intr(struct qib_devdata *dd, u32 needint)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ if (needint) {
+ if (!(dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd)))
+ goto done;
+ /*
+ * blip the availupd off, next write will be on, so
+ * we ensure an avail update, regardless of threshold or
+ * buffers becoming free, whenever we want an interrupt
+ */
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl &
+ ~SYM_MASK(SendCtrl, SendBufAvailUpd));
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ dd->sendctrl |= SYM_MASK(SendCtrl, SendIntBufAvail);
+ } else
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, SendIntBufAvail);
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+done:
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+}
+
+/*
+ * Handle errors and unusual events first, separate function
+ * to improve cache hits for fast path interrupt handling.
+ */
+static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat)
+{
+ if (unlikely(istat & ~QLOGIC_IB_I_BITSEXTANT))
+ qib_dev_err(dd,
+ "interrupt with unknown interrupts %Lx set\n",
+ istat & ~QLOGIC_IB_I_BITSEXTANT);
+
+ if (istat & QLOGIC_IB_I_GPIO) {
+ u32 gpiostatus;
+
+ /*
+ * Boards for this chip currently don't use GPIO interrupts,
+ * so clear by writing GPIOstatus to GPIOclear, and complain
+ * to alert developer. To avoid endless repeats, clear
+ * the bits in the mask, since there is some kind of
+ * programming error or chip problem.
+ */
+ gpiostatus = qib_read_kreg32(dd, kr_gpio_status);
+ /*
+ * In theory, writing GPIOstatus to GPIOclear could
+ * have a bad side-effect on some diagnostic that wanted
+ * to poll for a status-change, but the various shadows
+ * make that problematic at best. Diags will just suppress
+ * all GPIO interrupts during such tests.
+ */
+ qib_write_kreg(dd, kr_gpio_clear, gpiostatus);
+
+ if (gpiostatus) {
+ const u32 mask = qib_read_kreg32(dd, kr_gpio_mask);
+ u32 gpio_irq = mask & gpiostatus;
+
+ /*
+ * A bit set in status and (chip) Mask register
+ * would cause an interrupt. Since we are not
+ * expecting any, report it. Also check that the
+ * chip reflects our shadow, report issues,
+ * and refresh from the shadow.
+ */
+ /*
+ * Clear any troublemakers, and update chip
+ * from shadow
+ */
+ dd->cspec->gpio_mask &= ~gpio_irq;
+ qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
+ }
+ }
+
+ if (istat & QLOGIC_IB_I_ERROR) {
+ u64 estat;
+
+ qib_stats.sps_errints++;
+ estat = qib_read_kreg64(dd, kr_errstatus);
+ if (!estat)
+ qib_devinfo(dd->pcidev,
+ "error interrupt (%Lx), but no error bits set!\n",
+ istat);
+ else
+ handle_7220_errors(dd, estat);
+ }
+}
+
+static irqreturn_t qib_7220intr(int irq, void *data)
+{
+ struct qib_devdata *dd = data;
+ irqreturn_t ret;
+ u64 istat;
+ u64 ctxtrbits;
+ u64 rmask;
+ unsigned i;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) {
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ ret = IRQ_HANDLED;
+ goto bail;
+ }
+
+ istat = qib_read_kreg64(dd, kr_intstatus);
+
+ if (unlikely(!istat)) {
+ ret = IRQ_NONE; /* not our interrupt, or already handled */
+ goto bail;
+ }
+ if (unlikely(istat == -1)) {
+ qib_bad_intrstatus(dd);
+ /* don't know if it was our interrupt or not */
+ ret = IRQ_NONE;
+ goto bail;
+ }
+
+ this_cpu_inc(*dd->int_counter);
+ if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
+ QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
+ unlikely_7220_intr(dd, istat);
+
+ /*
+ * Clear the interrupt bits we found set, relatively early, so we
+ * "know" know the chip will have seen this by the time we process
+ * the queue, and will re-interrupt if necessary. The processor
+ * itself won't take the interrupt again until we return.
+ */
+ qib_write_kreg(dd, kr_intclear, istat);
+
+ /*
+ * Handle kernel receive queues before checking for pio buffers
+ * available since receives can overflow; piobuf waiters can afford
+ * a few extra cycles, since they were waiting anyway.
+ */
+ ctxtrbits = istat &
+ ((QLOGIC_IB_I_RCVAVAIL_MASK << QLOGIC_IB_I_RCVAVAIL_SHIFT) |
+ (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT));
+ if (ctxtrbits) {
+ rmask = (1ULL << QLOGIC_IB_I_RCVAVAIL_SHIFT) |
+ (1ULL << QLOGIC_IB_I_RCVURG_SHIFT);
+ for (i = 0; i < dd->first_user_ctxt; i++) {
+ if (ctxtrbits & rmask) {
+ ctxtrbits &= ~rmask;
+ qib_kreceive(dd->rcd[i], NULL, NULL);
+ }
+ rmask <<= 1;
+ }
+ if (ctxtrbits) {
+ ctxtrbits =
+ (ctxtrbits >> QLOGIC_IB_I_RCVAVAIL_SHIFT) |
+ (ctxtrbits >> QLOGIC_IB_I_RCVURG_SHIFT);
+ qib_handle_urcv(dd, ctxtrbits);
+ }
+ }
+
+ /* only call for SDmaInt */
+ if (istat & QLOGIC_IB_I_SDMAINT)
+ sdma_7220_intr(dd->pport, istat);
+
+ if ((istat & QLOGIC_IB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED))
+ qib_ib_piobufavail(dd);
+
+ ret = IRQ_HANDLED;
+bail:
+ return ret;
+}
+
+/*
+ * Set up our chip-specific interrupt handler.
+ * The interrupt type has already been setup, so
+ * we just need to do the registration and error checking.
+ * If we are using MSI interrupts, we may fall back to
+ * INTx later, if the interrupt handler doesn't get called
+ * within 1/2 second (see verify_interrupt()).
+ */
+static void qib_setup_7220_interrupt(struct qib_devdata *dd)
+{
+ if (!dd->cspec->irq)
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
+ else {
+ int ret = request_irq(dd->cspec->irq, qib_7220intr,
+ dd->msi_lo ? 0 : IRQF_SHARED,
+ QIB_DRV_NAME, dd);
+
+ if (ret)
+ qib_dev_err(dd,
+ "Couldn't setup %s interrupt (irq=%d): %d\n",
+ dd->msi_lo ? "MSI" : "INTx",
+ dd->cspec->irq, ret);
+ }
+}
+
+/**
+ * qib_7220_boardname - fill in the board name
+ * @dd: the qlogic_ib device
+ *
+ * info is based on the board revision register
+ */
+static void qib_7220_boardname(struct qib_devdata *dd)
+{
+ char *n;
+ u32 boardid, namelen;
+
+ boardid = SYM_FIELD(dd->revision, Revision,
+ BoardID);
+
+ switch (boardid) {
+ case 1:
+ n = "InfiniPath_QLE7240";
+ break;
+ case 2:
+ n = "InfiniPath_QLE7280";
+ break;
+ default:
+ qib_dev_err(dd, "Unknown 7220 board with ID %u\n", boardid);
+ n = "Unknown_InfiniPath_7220";
+ break;
+ }
+
+ namelen = strlen(n) + 1;
+ dd->boardname = kmalloc(namelen, GFP_KERNEL);
+ if (!dd->boardname)
+ qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
+ else
+ snprintf(dd->boardname, namelen, "%s", n);
+
+ if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2)
+ qib_dev_err(dd,
+ "Unsupported InfiniPath hardware revision %u.%u!\n",
+ dd->majrev, dd->minrev);
+
+ snprintf(dd->boardversion, sizeof(dd->boardversion),
+ "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
+ QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname,
+ (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch),
+ dd->majrev, dd->minrev,
+ (unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
+}
+
+/*
+ * This routine sleeps, so it can only be called from user context, not
+ * from interrupt context.
+ */
+static int qib_setup_7220_reset(struct qib_devdata *dd)
+{
+ u64 val;
+ int i;
+ int ret;
+ u16 cmdval;
+ u8 int_line, clinesz;
+ unsigned long flags;
+
+ qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz);
+
+ /* Use dev_err so it shows up in logs, etc. */
+ qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit);
+
+ /* no interrupts till re-initted */
+ qib_7220_set_intr_state(dd, 0);
+
+ dd->pport->cpspec->ibdeltainprog = 0;
+ dd->pport->cpspec->ibsymdelta = 0;
+ dd->pport->cpspec->iblnkerrdelta = 0;
+
+ /*
+ * Keep chip from being accessed until we are ready. Use
+ * writeq() directly, to allow the write even though QIB_PRESENT
+ * isn't set.
+ */
+ dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
+ val = dd->control | QLOGIC_IB_C_RESET;
+ writeq(val, &dd->kregbase[kr_control]);
+ mb(); /* prevent compiler reordering around actual reset */
+
+ for (i = 1; i <= 5; i++) {
+ /*
+ * Allow MBIST, etc. to complete; longer on each retry.
+ * We sometimes get machine checks from bus timeout if no
+ * response, so for now, make it *really* long.
+ */
+ msleep(1000 + (1 + i) * 2000);
+
+ qib_pcie_reenable(dd, cmdval, int_line, clinesz);
+
+ /*
+ * Use readq directly, so we don't need to mark it as PRESENT
+ * until we get a successful indication that all is well.
+ */
+ val = readq(&dd->kregbase[kr_revision]);
+ if (val == dd->revision) {
+ dd->flags |= QIB_PRESENT; /* it's back */
+ ret = qib_reinit_intr(dd);
+ goto bail;
+ }
+ }
+ ret = 0; /* failed */
+
+bail:
+ if (ret) {
+ if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
+
+ /* hold IBC in reset, no sends, etc till later */
+ qib_write_kreg(dd, kr_control, 0ULL);
+
+ /* clear the reset error, init error/hwerror mask */
+ qib_7220_init_hwerrors(dd);
+
+ /* do setup similar to speed or link-width changes */
+ if (dd->pport->cpspec->ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK)
+ dd->cspec->presets_needed = 1;
+ spin_lock_irqsave(&dd->pport->lflags_lock, flags);
+ dd->pport->lflags |= QIBL_IB_FORCE_NOTIFY;
+ dd->pport->lflags &= ~QIBL_IB_AUTONEG_FAILED;
+ spin_unlock_irqrestore(&dd->pport->lflags_lock, flags);
+ }
+
+ return ret;
+}
+
+/**
+ * qib_7220_put_tid - write a TID to the chip
+ * @dd: the qlogic_ib device
+ * @tidptr: pointer to the expected TID (in chip) to update
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; tidinvalid if freeing
+ */
+static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ if (pa != dd->tidinvalid) {
+ u64 chippa = pa >> IBA7220_TID_PA_SHIFT;
+
+ /* paranoia checks */
+ if (pa != (chippa << IBA7220_TID_PA_SHIFT)) {
+ qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n",
+ pa);
+ return;
+ }
+ if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) {
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
+ return;
+ }
+
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ chippa |= dd->tidtemplate;
+ else /* for now, always full 4KB page */
+ chippa |= IBA7220_TID_SZ_4K;
+ pa = chippa;
+ }
+ writeq(pa, tidptr);
+ mmiowb();
+}
+
+/**
+ * qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager
+ * @dd: the qlogic_ib device
+ * @ctxt: the ctxt
+ *
+ * clear all TID entries for a ctxt, expected and eager.
+ * Used from qib_close(). On this chip, TIDs are only 32 bits,
+ * not 64, but they are still on 64 bit boundaries, so tidbase
+ * is declared as u64 * for the pointer math, even though we write 32 bits
+ */
+static void qib_7220_clear_tids(struct qib_devdata *dd,
+ struct qib_ctxtdata *rcd)
+{
+ u64 __iomem *tidbase;
+ unsigned long tidinv;
+ u32 ctxt;
+ int i;
+
+ if (!dd->kregbase || !rcd)
+ return;
+
+ ctxt = rcd->ctxt;
+
+ tidinv = dd->tidinvalid;
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->kregbase) +
+ dd->rcvtidbase +
+ ctxt * dd->rcvtidcnt * sizeof(*tidbase));
+
+ for (i = 0; i < dd->rcvtidcnt; i++)
+ qib_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ tidinv);
+
+ tidbase = (u64 __iomem *)
+ ((char __iomem *)(dd->kregbase) +
+ dd->rcvegrbase +
+ rcd->rcvegr_tid_base * sizeof(*tidbase));
+
+ for (i = 0; i < rcd->rcvegrcnt; i++)
+ qib_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ tidinv);
+}
+
+/**
+ * qib_7220_tidtemplate - setup constants for TID updates
+ * @dd: the qlogic_ib device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void qib_7220_tidtemplate(struct qib_devdata *dd)
+{
+ if (dd->rcvegrbufsize == 2048)
+ dd->tidtemplate = IBA7220_TID_SZ_2K;
+ else if (dd->rcvegrbufsize == 4096)
+ dd->tidtemplate = IBA7220_TID_SZ_4K;
+ dd->tidinvalid = 0;
+}
+
+/**
+ * qib_init_7220_get_base_info - set chip-specific flags for user code
+ * @rcd: the qlogic_ib ctxt
+ * @kbase: qib_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithims.
+ */
+static int qib_7220_get_base_info(struct qib_ctxtdata *rcd,
+ struct qib_base_info *kinfo)
+{
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_PCIE |
+ QIB_RUNTIME_NODMA_RTAIL | QIB_RUNTIME_SDMA;
+
+ if (rcd->dd->flags & QIB_USE_SPCL_TRIG)
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_SPECIAL_TRIGGER;
+
+ return 0;
+}
+
+static struct qib_message_header *
+qib_7220_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr)
+{
+ u32 offset = qib_hdrget_offset(rhf_addr);
+
+ return (struct qib_message_header *)
+ (rhf_addr - dd->rhf_offset + offset);
+}
+
+static void qib_7220_config_ctxts(struct qib_devdata *dd)
+{
+ unsigned long flags;
+ u32 nchipctxts;
+
+ nchipctxts = qib_read_kreg32(dd, kr_portcnt);
+ dd->cspec->numctxts = nchipctxts;
+ if (qib_n_krcv_queues > 1) {
+ dd->qpn_mask = 0x3e;
+ dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
+ if (dd->first_user_ctxt > nchipctxts)
+ dd->first_user_ctxt = nchipctxts;
+ } else
+ dd->first_user_ctxt = dd->num_pports;
+ dd->n_krcv_queues = dd->first_user_ctxt;
+
+ if (!qib_cfgctxts) {
+ int nctxts = dd->first_user_ctxt + num_online_cpus();
+
+ if (nctxts <= 5)
+ dd->ctxtcnt = 5;
+ else if (nctxts <= 9)
+ dd->ctxtcnt = 9;
+ else if (nctxts <= nchipctxts)
+ dd->ctxtcnt = nchipctxts;
+ } else if (qib_cfgctxts <= nchipctxts)
+ dd->ctxtcnt = qib_cfgctxts;
+ if (!dd->ctxtcnt) /* none of the above, set to max */
+ dd->ctxtcnt = nchipctxts;
+
+ /*
+ * Chip can be configured for 5, 9, or 17 ctxts, and choice
+ * affects number of eager TIDs per ctxt (1K, 2K, 4K).
+ * Lock to be paranoid about later motion, etc.
+ */
+ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
+ if (dd->ctxtcnt > 9)
+ dd->rcvctrl |= 2ULL << IBA7220_R_CTXTCFG_SHIFT;
+ else if (dd->ctxtcnt > 5)
+ dd->rcvctrl |= 1ULL << IBA7220_R_CTXTCFG_SHIFT;
+ /* else configure for default 5 receive ctxts */
+ if (dd->qpn_mask)
+ dd->rcvctrl |= 1ULL << QIB_7220_RcvCtrl_RcvQPMapEnable_LSB;
+ qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl);
+ spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
+
+ /* kr_rcvegrcnt changes based on the number of contexts enabled */
+ dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt);
+ dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, IBA7220_KRCVEGRCNT);
+}
+
+static int qib_7220_get_ib_cfg(struct qib_pportdata *ppd, int which)
+{
+ int lsb, ret = 0;
+ u64 maskr; /* right-justified mask */
+
+ switch (which) {
+ case QIB_IB_CFG_LWID_ENB: /* Get allowed Link-width */
+ ret = ppd->link_width_enabled;
+ goto done;
+
+ case QIB_IB_CFG_LWID: /* Get currently active Link-width */
+ ret = ppd->link_width_active;
+ goto done;
+
+ case QIB_IB_CFG_SPD_ENB: /* Get allowed Link speeds */
+ ret = ppd->link_speed_enabled;
+ goto done;
+
+ case QIB_IB_CFG_SPD: /* Get current Link spd */
+ ret = ppd->link_speed_active;
+ goto done;
+
+ case QIB_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */
+ lsb = IBA7220_IBC_RXPOL_SHIFT;
+ maskr = IBA7220_IBC_RXPOL_MASK;
+ break;
+
+ case QIB_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */
+ lsb = IBA7220_IBC_LREV_SHIFT;
+ maskr = IBA7220_IBC_LREV_MASK;
+ break;
+
+ case QIB_IB_CFG_LINKLATENCY:
+ ret = qib_read_kreg64(ppd->dd, kr_ibcddrstatus)
+ & IBA7220_DDRSTAT_LINKLAT_MASK;
+ goto done;
+
+ case QIB_IB_CFG_OP_VLS:
+ ret = ppd->vls_operational;
+ goto done;
+
+ case QIB_IB_CFG_VL_HIGH_CAP:
+ ret = 0;
+ goto done;
+
+ case QIB_IB_CFG_VL_LOW_CAP:
+ ret = 0;
+ goto done;
+
+ case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
+ ret = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl,
+ OverrunThreshold);
+ goto done;
+
+ case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
+ ret = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl,
+ PhyerrThreshold);
+ goto done;
+
+ case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
+ /* will only take effect when the link state changes */
+ ret = (ppd->cpspec->ibcctrl &
+ SYM_MASK(IBCCtrl, LinkDownDefaultState)) ?
+ IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL;
+ goto done;
+
+ case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */
+ lsb = IBA7220_IBC_HRTBT_SHIFT;
+ maskr = IBA7220_IBC_HRTBT_MASK;
+ break;
+
+ case QIB_IB_CFG_PMA_TICKS:
+ /*
+ * 0x00 = 10x link transfer rate or 4 nsec. for 2.5Gbs
+ * Since the clock is always 250MHz, the value is 1 or 0.
+ */
+ ret = (ppd->link_speed_active == QIB_IB_DDR);
+ goto done;
+
+ default:
+ ret = -EINVAL;
+ goto done;
+ }
+ ret = (int)((ppd->cpspec->ibcddrctrl >> lsb) & maskr);
+done:
+ return ret;
+}
+
+static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 maskr; /* right-justified mask */
+ int lsb, ret = 0, setforce = 0;
+ u16 lcmd, licmd;
+ unsigned long flags;
+ u32 tmp = 0;
+
+ switch (which) {
+ case QIB_IB_CFG_LIDLMC:
+ /*
+ * Set LID and LMC. Combined to avoid possible hazard
+ * caller puts LMC in 16MSbits, DLID in 16LSbits of val
+ */
+ lsb = IBA7220_IBC_DLIDLMC_SHIFT;
+ maskr = IBA7220_IBC_DLIDLMC_MASK;
+ break;
+
+ case QIB_IB_CFG_LWID_ENB: /* set allowed Link-width */
+ /*
+ * As with speed, only write the actual register if
+ * the link is currently down, otherwise takes effect
+ * on next link change.
+ */
+ ppd->link_width_enabled = val;
+ if (!(ppd->lflags & QIBL_LINKDOWN))
+ goto bail;
+ /*
+ * We set the QIBL_IB_FORCE_NOTIFY bit so updown
+ * will get called because we want update
+ * link_width_active, and the change may not take
+ * effect for some time (if we are in POLL), so this
+ * flag will force the updown routine to be called
+ * on the next ibstatuschange down interrupt, even
+ * if it's not an down->up transition.
+ */
+ val--; /* convert from IB to chip */
+ maskr = IBA7220_IBC_WIDTH_MASK;
+ lsb = IBA7220_IBC_WIDTH_SHIFT;
+ setforce = 1;
+ break;
+
+ case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */
+ /*
+ * If we turn off IB1.2, need to preset SerDes defaults,
+ * but not right now. Set a flag for the next time
+ * we command the link down. As with width, only write the
+ * actual register if the link is currently down, otherwise
+ * takes effect on next link change. Since setting is being
+ * explicitly requested (via MAD or sysfs), clear autoneg
+ * failure status if speed autoneg is enabled.
+ */
+ ppd->link_speed_enabled = val;
+ if ((ppd->cpspec->ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK) &&
+ !(val & (val - 1)))
+ dd->cspec->presets_needed = 1;
+ if (!(ppd->lflags & QIBL_LINKDOWN))
+ goto bail;
+ /*
+ * We set the QIBL_IB_FORCE_NOTIFY bit so updown
+ * will get called because we want update
+ * link_speed_active, and the change may not take
+ * effect for some time (if we are in POLL), so this
+ * flag will force the updown routine to be called
+ * on the next ibstatuschange down interrupt, even
+ * if it's not an down->up transition.
+ */
+ if (val == (QIB_IB_SDR | QIB_IB_DDR)) {
+ val = IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ } else
+ val = val == QIB_IB_DDR ?
+ IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR;
+ maskr = IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ /* IBTA 1.2 mode + speed bits are contiguous */
+ lsb = SYM_LSB(IBCDDRCtrl, IB_ENHANCED_MODE);
+ setforce = 1;
+ break;
+
+ case QIB_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */
+ lsb = IBA7220_IBC_RXPOL_SHIFT;
+ maskr = IBA7220_IBC_RXPOL_MASK;
+ break;
+
+ case QIB_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */
+ lsb = IBA7220_IBC_LREV_SHIFT;
+ maskr = IBA7220_IBC_LREV_MASK;
+ break;
+
+ case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
+ maskr = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl,
+ OverrunThreshold);
+ if (maskr != val) {
+ ppd->cpspec->ibcctrl &=
+ ~SYM_MASK(IBCCtrl, OverrunThreshold);
+ ppd->cpspec->ibcctrl |= (u64) val <<
+ SYM_LSB(IBCCtrl, OverrunThreshold);
+ qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+ goto bail;
+
+ case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
+ maskr = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl,
+ PhyerrThreshold);
+ if (maskr != val) {
+ ppd->cpspec->ibcctrl &=
+ ~SYM_MASK(IBCCtrl, PhyerrThreshold);
+ ppd->cpspec->ibcctrl |= (u64) val <<
+ SYM_LSB(IBCCtrl, PhyerrThreshold);
+ qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+ goto bail;
+
+ case QIB_IB_CFG_PKEYS: /* update pkeys */
+ maskr = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) |
+ ((u64) ppd->pkeys[2] << 32) |
+ ((u64) ppd->pkeys[3] << 48);
+ qib_write_kreg(dd, kr_partitionkey, maskr);
+ goto bail;
+
+ case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
+ /* will only take effect when the link state changes */
+ if (val == IB_LINKINITCMD_POLL)
+ ppd->cpspec->ibcctrl &=
+ ~SYM_MASK(IBCCtrl, LinkDownDefaultState);
+ else /* SLEEP */
+ ppd->cpspec->ibcctrl |=
+ SYM_MASK(IBCCtrl, LinkDownDefaultState);
+ qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ goto bail;
+
+ case QIB_IB_CFG_MTU: /* update the MTU in IBC */
+ /*
+ * Update our housekeeping variables, and set IBC max
+ * size, same as init code; max IBC is max we allow in
+ * buffer, less the qword pbc, plus 1 for ICRC, in dwords
+ * Set even if it's unchanged, print debug message only
+ * on changes.
+ */
+ val = (ppd->ibmaxlen >> 2) + 1;
+ ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, MaxPktLen);
+ ppd->cpspec->ibcctrl |= (u64)val << SYM_LSB(IBCCtrl, MaxPktLen);
+ qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ goto bail;
+
+ case QIB_IB_CFG_LSTATE: /* set the IB link state */
+ switch (val & 0xffff0000) {
+ case IB_LINKCMD_DOWN:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN;
+ if (!ppd->cpspec->ibdeltainprog &&
+ qib_compat_ddr_negotiate) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymsnap =
+ read_7220_creg32(dd, cr_ibsymbolerr);
+ ppd->cpspec->iblnkerrsnap =
+ read_7220_creg32(dd, cr_iblinkerrrecov);
+ }
+ break;
+
+ case IB_LINKCMD_ARMED:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED;
+ break;
+
+ case IB_LINKCMD_ACTIVE:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE;
+ break;
+
+ default:
+ ret = -EINVAL;
+ qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16);
+ goto bail;
+ }
+ switch (val & 0xffff) {
+ case IB_LINKINITCMD_NOP:
+ licmd = 0;
+ break;
+
+ case IB_LINKINITCMD_POLL:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL;
+ break;
+
+ case IB_LINKINITCMD_SLEEP:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP;
+ break;
+
+ case IB_LINKINITCMD_DISABLE:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE;
+ ppd->cpspec->chase_end = 0;
+ /*
+ * stop state chase counter and timer, if running.
+ * wait forpending timer, but don't clear .data (ppd)!
+ */
+ if (ppd->cpspec->chase_timer.expires) {
+ del_timer_sync(&ppd->cpspec->chase_timer);
+ ppd->cpspec->chase_timer.expires = 0;
+ }
+ break;
+
+ default:
+ ret = -EINVAL;
+ qib_dev_err(dd, "bad linkinitcmd req 0x%x\n",
+ val & 0xffff);
+ goto bail;
+ }
+ qib_set_ib_7220_lstate(ppd, lcmd, licmd);
+
+ maskr = IBA7220_IBC_WIDTH_MASK;
+ lsb = IBA7220_IBC_WIDTH_SHIFT;
+ tmp = (ppd->cpspec->ibcddrctrl >> lsb) & maskr;
+ /* If the width active on the chip does not match the
+ * width in the shadow register, write the new active
+ * width to the chip.
+ * We don't have to worry about speed as the speed is taken
+ * care of by set_7220_ibspeed_fast called by ib_updown.
+ */
+ if (ppd->link_width_enabled-1 != tmp) {
+ ppd->cpspec->ibcddrctrl &= ~(maskr << lsb);
+ ppd->cpspec->ibcddrctrl |=
+ (((u64)(ppd->link_width_enabled-1) & maskr) <<
+ lsb);
+ qib_write_kreg(dd, kr_ibcddrctrl,
+ ppd->cpspec->ibcddrctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_FORCE_NOTIFY;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
+ goto bail;
+
+ case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */
+ if (val > IBA7220_IBC_HRTBT_MASK) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ lsb = IBA7220_IBC_HRTBT_SHIFT;
+ maskr = IBA7220_IBC_HRTBT_MASK;
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto bail;
+ }
+ ppd->cpspec->ibcddrctrl &= ~(maskr << lsb);
+ ppd->cpspec->ibcddrctrl |= (((u64) val & maskr) << lsb);
+ qib_write_kreg(dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ if (setforce) {
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_FORCE_NOTIFY;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
+bail:
+ return ret;
+}
+
+static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what)
+{
+ int ret = 0;
+ u64 val, ddr;
+
+ if (!strncmp(what, "ibc", 3)) {
+ ppd->cpspec->ibcctrl |= SYM_MASK(IBCCtrl, Loopback);
+ val = 0; /* disable heart beat, so link will come up */
+ qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n",
+ ppd->dd->unit, ppd->port);
+ } else if (!strncmp(what, "off", 3)) {
+ ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
+ /* enable heart beat again */
+ val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
+ } else
+ ret = -EINVAL;
+ if (!ret) {
+ qib_write_kreg(ppd->dd, kr_ibcctrl, ppd->cpspec->ibcctrl);
+ ddr = ppd->cpspec->ibcddrctrl & ~(IBA7220_IBC_HRTBT_MASK
+ << IBA7220_IBC_HRTBT_SHIFT);
+ ppd->cpspec->ibcddrctrl = ddr | val;
+ qib_write_kreg(ppd->dd, kr_ibcddrctrl,
+ ppd->cpspec->ibcddrctrl);
+ qib_write_kreg(ppd->dd, kr_scratch, 0);
+ }
+ return ret;
+}
+
+static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd,
+ u32 updegr, u32 egrhd, u32 npkts)
+{
+ if (updegr)
+ qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
+ mmiowb();
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ mmiowb();
+}
+
+static u32 qib_7220_hdrqempty(struct qib_ctxtdata *rcd)
+{
+ u32 head, tail;
+
+ head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt);
+ if (rcd->rcvhdrtail_kvaddr)
+ tail = qib_get_rcvhdrtail(rcd);
+ else
+ tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt);
+ return head == tail;
+}
+
+/*
+ * Modify the RCVCTRL register in chip-specific way. This
+ * is a function because bit positions and (future) register
+ * location is chip-specifc, but the needed operations are
+ * generic. <op> is a bit-mask because we often want to
+ * do multiple modifications.
+ */
+static void rcvctrl_7220_mod(struct qib_pportdata *ppd, unsigned int op,
+ int ctxt)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 mask, val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
+ if (op & QIB_RCVCTRL_TAILUPD_ENB)
+ dd->rcvctrl |= (1ULL << IBA7220_R_TAILUPD_SHIFT);
+ if (op & QIB_RCVCTRL_TAILUPD_DIS)
+ dd->rcvctrl &= ~(1ULL << IBA7220_R_TAILUPD_SHIFT);
+ if (op & QIB_RCVCTRL_PKEY_ENB)
+ dd->rcvctrl &= ~(1ULL << IBA7220_R_PKEY_DIS_SHIFT);
+ if (op & QIB_RCVCTRL_PKEY_DIS)
+ dd->rcvctrl |= (1ULL << IBA7220_R_PKEY_DIS_SHIFT);
+ if (ctxt < 0)
+ mask = (1ULL << dd->ctxtcnt) - 1;
+ else
+ mask = (1ULL << ctxt);
+ if (op & QIB_RCVCTRL_CTXT_ENB) {
+ /* always done for specific ctxt */
+ dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, PortEnable));
+ if (!(dd->flags & QIB_NODMA_RTAIL))
+ dd->rcvctrl |= 1ULL << IBA7220_R_TAILUPD_SHIFT;
+ /* Write these registers before the context is enabled. */
+ qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt,
+ dd->rcd[ctxt]->rcvhdrqtailaddr_phys);
+ qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt,
+ dd->rcd[ctxt]->rcvhdrq_phys);
+ dd->rcd[ctxt]->seq_cnt = 1;
+ }
+ if (op & QIB_RCVCTRL_CTXT_DIS)
+ dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, PortEnable));
+ if (op & QIB_RCVCTRL_INTRAVAIL_ENB)
+ dd->rcvctrl |= (mask << IBA7220_R_INTRAVAIL_SHIFT);
+ if (op & QIB_RCVCTRL_INTRAVAIL_DIS)
+ dd->rcvctrl &= ~(mask << IBA7220_R_INTRAVAIL_SHIFT);
+ qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl);
+ if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && dd->rhdrhead_intr_off) {
+ /* arm rcv interrupt */
+ val = qib_read_ureg32(dd, ur_rcvhdrhead, ctxt) |
+ dd->rhdrhead_intr_off;
+ qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt);
+ }
+ if (op & QIB_RCVCTRL_CTXT_ENB) {
+ /*
+ * Init the context registers also; if we were
+ * disabled, tail and head should both be zero
+ * already from the enable, but since we don't
+ * know, we have to do it explicitly.
+ */
+ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt);
+ qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt);
+
+ val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt);
+ dd->rcd[ctxt]->head = val;
+ /* If kctxt, interrupt on next receive. */
+ if (ctxt < dd->first_user_ctxt)
+ val |= dd->rhdrhead_intr_off;
+ qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt);
+ }
+ if (op & QIB_RCVCTRL_CTXT_DIS) {
+ if (ctxt >= 0) {
+ qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, 0);
+ qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, 0);
+ } else {
+ unsigned i;
+
+ for (i = 0; i < dd->cfgctxts; i++) {
+ qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr,
+ i, 0);
+ qib_write_kreg_ctxt(dd, kr_rcvhdraddr, i, 0);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
+}
+
+/*
+ * Modify the SENDCTRL register in chip-specific way. This
+ * is a function there may be multiple such registers with
+ * slightly different layouts. To start, we assume the
+ * "canonical" register layout of the first chips.
+ * Chip requires no back-back sendctrl writes, so write
+ * scratch register after writing sendctrl
+ */
+static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 tmp_dd_sendctrl;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+
+ /* First the ones that are "sticky", saved in shadow */
+ if (op & QIB_SENDCTRL_CLEAR)
+ dd->sendctrl = 0;
+ if (op & QIB_SENDCTRL_SEND_DIS)
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, SPioEnable);
+ else if (op & QIB_SENDCTRL_SEND_ENB) {
+ dd->sendctrl |= SYM_MASK(SendCtrl, SPioEnable);
+ if (dd->flags & QIB_USE_SPCL_TRIG)
+ dd->sendctrl |= SYM_MASK(SendCtrl,
+ SSpecialTriggerEn);
+ }
+ if (op & QIB_SENDCTRL_AVAIL_DIS)
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd);
+ else if (op & QIB_SENDCTRL_AVAIL_ENB)
+ dd->sendctrl |= SYM_MASK(SendCtrl, SendBufAvailUpd);
+
+ if (op & QIB_SENDCTRL_DISARM_ALL) {
+ u32 i, last;
+
+ tmp_dd_sendctrl = dd->sendctrl;
+ /*
+ * disarm any that are not yet launched, disabling sends
+ * and updates until done.
+ */
+ last = dd->piobcnt2k + dd->piobcnt4k;
+ tmp_dd_sendctrl &=
+ ~(SYM_MASK(SendCtrl, SPioEnable) |
+ SYM_MASK(SendCtrl, SendBufAvailUpd));
+ for (i = 0; i < last; i++) {
+ qib_write_kreg(dd, kr_sendctrl,
+ tmp_dd_sendctrl |
+ SYM_MASK(SendCtrl, Disarm) | i);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+ }
+
+ tmp_dd_sendctrl = dd->sendctrl;
+
+ if (op & QIB_SENDCTRL_FLUSH)
+ tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Abort);
+ if (op & QIB_SENDCTRL_DISARM)
+ tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) |
+ ((op & QIB_7220_SendCtrl_DisarmPIOBuf_RMASK) <<
+ SYM_LSB(SendCtrl, DisarmPIOBuf));
+ if ((op & QIB_SENDCTRL_AVAIL_BLIP) &&
+ (dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd)))
+ tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd);
+
+ qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ if (op & QIB_SENDCTRL_AVAIL_BLIP) {
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+
+ if (op & QIB_SENDCTRL_FLUSH) {
+ u32 v;
+ /*
+ * ensure writes have hit chip, then do a few
+ * more reads, to allow DMA of pioavail registers
+ * to occur, so in-memory copy is in sync with
+ * the chip. Not always safe to sleep.
+ */
+ v = qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_scratch, v);
+ v = qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_scratch, v);
+ qib_read_kreg32(dd, kr_scratch);
+ }
+}
+
+/**
+ * qib_portcntr_7220 - read a per-port counter
+ * @dd: the qlogic_ib device
+ * @creg: the counter to snapshot
+ */
+static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg)
+{
+ u64 ret = 0ULL;
+ struct qib_devdata *dd = ppd->dd;
+ u16 creg;
+ /* 0xffff for unimplemented or synthesized counters */
+ static const u16 xlator[] = {
+ [QIBPORTCNTR_PKTSEND] = cr_pktsend,
+ [QIBPORTCNTR_WORDSEND] = cr_wordsend,
+ [QIBPORTCNTR_PSXMITDATA] = cr_psxmitdatacount,
+ [QIBPORTCNTR_PSXMITPKTS] = cr_psxmitpktscount,
+ [QIBPORTCNTR_PSXMITWAIT] = cr_psxmitwaitcount,
+ [QIBPORTCNTR_SENDSTALL] = cr_sendstall,
+ [QIBPORTCNTR_PKTRCV] = cr_pktrcv,
+ [QIBPORTCNTR_PSRCVDATA] = cr_psrcvdatacount,
+ [QIBPORTCNTR_PSRCVPKTS] = cr_psrcvpktscount,
+ [QIBPORTCNTR_RCVEBP] = cr_rcvebp,
+ [QIBPORTCNTR_RCVOVFL] = cr_rcvovfl,
+ [QIBPORTCNTR_WORDRCV] = cr_wordrcv,
+ [QIBPORTCNTR_RXDROPPKT] = cr_rxdroppkt,
+ [QIBPORTCNTR_RXLOCALPHYERR] = cr_rxotherlocalphyerr,
+ [QIBPORTCNTR_RXVLERR] = cr_rxvlerr,
+ [QIBPORTCNTR_ERRICRC] = cr_erricrc,
+ [QIBPORTCNTR_ERRVCRC] = cr_errvcrc,
+ [QIBPORTCNTR_ERRLPCRC] = cr_errlpcrc,
+ [QIBPORTCNTR_BADFORMAT] = cr_badformat,
+ [QIBPORTCNTR_ERR_RLEN] = cr_err_rlen,
+ [QIBPORTCNTR_IBSYMBOLERR] = cr_ibsymbolerr,
+ [QIBPORTCNTR_INVALIDRLEN] = cr_invalidrlen,
+ [QIBPORTCNTR_UNSUPVL] = cr_txunsupvl,
+ [QIBPORTCNTR_EXCESSBUFOVFL] = cr_excessbufferovfl,
+ [QIBPORTCNTR_ERRLINK] = cr_errlink,
+ [QIBPORTCNTR_IBLINKDOWN] = cr_iblinkdown,
+ [QIBPORTCNTR_IBLINKERRRECOV] = cr_iblinkerrrecov,
+ [QIBPORTCNTR_LLI] = cr_locallinkintegrityerr,
+ [QIBPORTCNTR_PSINTERVAL] = cr_psinterval,
+ [QIBPORTCNTR_PSSTART] = cr_psstart,
+ [QIBPORTCNTR_PSSTAT] = cr_psstat,
+ [QIBPORTCNTR_VL15PKTDROP] = cr_vl15droppedpkt,
+ [QIBPORTCNTR_ERRPKEY] = cr_errpkey,
+ [QIBPORTCNTR_KHDROVFL] = 0xffff,
+ };
+
+ if (reg >= ARRAY_SIZE(xlator)) {
+ qib_devinfo(ppd->dd->pcidev,
+ "Unimplemented portcounter %u\n", reg);
+ goto done;
+ }
+ creg = xlator[reg];
+
+ if (reg == QIBPORTCNTR_KHDROVFL) {
+ int i;
+
+ /* sum over all kernel contexts */
+ for (i = 0; i < dd->first_user_ctxt; i++)
+ ret += read_7220_creg32(dd, cr_portovfl + i);
+ }
+ if (creg == 0xffff)
+ goto done;
+
+ /*
+ * only fast incrementing counters are 64bit; use 32 bit reads to
+ * avoid two independent reads when on opteron
+ */
+ if ((creg == cr_wordsend || creg == cr_wordrcv ||
+ creg == cr_pktsend || creg == cr_pktrcv))
+ ret = read_7220_creg(dd, creg);
+ else
+ ret = read_7220_creg32(dd, creg);
+ if (creg == cr_ibsymbolerr) {
+ if (dd->pport->cpspec->ibdeltainprog)
+ ret -= ret - ppd->cpspec->ibsymsnap;
+ ret -= dd->pport->cpspec->ibsymdelta;
+ } else if (creg == cr_iblinkerrrecov) {
+ if (dd->pport->cpspec->ibdeltainprog)
+ ret -= ret - ppd->cpspec->iblnkerrsnap;
+ ret -= dd->pport->cpspec->iblnkerrdelta;
+ }
+done:
+ return ret;
+}
+
+/*
+ * Device counter names (not port-specific), one line per stat,
+ * single string. Used by utilities like ipathstats to print the stats
+ * in a way which works for different versions of drivers, without changing
+ * the utility. Names need to be 12 chars or less (w/o newline), for proper
+ * display by utility.
+ * Non-error counters are first.
+ * Start of "error" conters is indicated by a leading "E " on the first
+ * "error" counter, and doesn't count in label length.
+ * The EgrOvfl list needs to be last so we truncate them at the configured
+ * context count for the device.
+ * cntr7220indices contains the corresponding register indices.
+ */
+static const char cntr7220names[] =
+ "Interrupts\n"
+ "HostBusStall\n"
+ "E RxTIDFull\n"
+ "RxTIDInvalid\n"
+ "Ctxt0EgrOvfl\n"
+ "Ctxt1EgrOvfl\n"
+ "Ctxt2EgrOvfl\n"
+ "Ctxt3EgrOvfl\n"
+ "Ctxt4EgrOvfl\n"
+ "Ctxt5EgrOvfl\n"
+ "Ctxt6EgrOvfl\n"
+ "Ctxt7EgrOvfl\n"
+ "Ctxt8EgrOvfl\n"
+ "Ctxt9EgrOvfl\n"
+ "Ctx10EgrOvfl\n"
+ "Ctx11EgrOvfl\n"
+ "Ctx12EgrOvfl\n"
+ "Ctx13EgrOvfl\n"
+ "Ctx14EgrOvfl\n"
+ "Ctx15EgrOvfl\n"
+ "Ctx16EgrOvfl\n";
+
+static const size_t cntr7220indices[] = {
+ cr_lbint,
+ cr_lbflowstall,
+ cr_errtidfull,
+ cr_errtidvalid,
+ cr_portovfl + 0,
+ cr_portovfl + 1,
+ cr_portovfl + 2,
+ cr_portovfl + 3,
+ cr_portovfl + 4,
+ cr_portovfl + 5,
+ cr_portovfl + 6,
+ cr_portovfl + 7,
+ cr_portovfl + 8,
+ cr_portovfl + 9,
+ cr_portovfl + 10,
+ cr_portovfl + 11,
+ cr_portovfl + 12,
+ cr_portovfl + 13,
+ cr_portovfl + 14,
+ cr_portovfl + 15,
+ cr_portovfl + 16,
+};
+
+/*
+ * same as cntr7220names and cntr7220indices, but for port-specific counters.
+ * portcntr7220indices is somewhat complicated by some registers needing
+ * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG
+ */
+static const char portcntr7220names[] =
+ "TxPkt\n"
+ "TxFlowPkt\n"
+ "TxWords\n"
+ "RxPkt\n"
+ "RxFlowPkt\n"
+ "RxWords\n"
+ "TxFlowStall\n"
+ "TxDmaDesc\n" /* 7220 and 7322-only */
+ "E RxDlidFltr\n" /* 7220 and 7322-only */
+ "IBStatusChng\n"
+ "IBLinkDown\n"
+ "IBLnkRecov\n"
+ "IBRxLinkErr\n"
+ "IBSymbolErr\n"
+ "RxLLIErr\n"
+ "RxBadFormat\n"
+ "RxBadLen\n"
+ "RxBufOvrfl\n"
+ "RxEBP\n"
+ "RxFlowCtlErr\n"
+ "RxICRCerr\n"
+ "RxLPCRCerr\n"
+ "RxVCRCerr\n"
+ "RxInvalLen\n"
+ "RxInvalPKey\n"
+ "RxPktDropped\n"
+ "TxBadLength\n"
+ "TxDropped\n"
+ "TxInvalLen\n"
+ "TxUnderrun\n"
+ "TxUnsupVL\n"
+ "RxLclPhyErr\n" /* 7220 and 7322-only */
+ "RxVL15Drop\n" /* 7220 and 7322-only */
+ "RxVlErr\n" /* 7220 and 7322-only */
+ "XcessBufOvfl\n" /* 7220 and 7322-only */
+ ;
+
+#define _PORT_VIRT_FLAG 0x8000 /* "virtual", need adjustments */
+static const size_t portcntr7220indices[] = {
+ QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG,
+ cr_pktsendflow,
+ QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG,
+ cr_pktrcvflowctrl,
+ QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG,
+ cr_txsdmadesc,
+ cr_rxdlidfltr,
+ cr_ibstatuschange,
+ QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_LLI | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG,
+ cr_rcvflowctrl_err,
+ QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG,
+ cr_invalidslen,
+ cr_senddropped,
+ cr_errslen,
+ cr_sendunderrun,
+ cr_txunsupvl,
+ QIBPORTCNTR_RXLOCALPHYERR | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_VL15PKTDROP | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RXVLERR | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_EXCESSBUFOVFL | _PORT_VIRT_FLAG,
+};
+
+/* do all the setup to make the counter reads efficient later */
+static void init_7220_cntrnames(struct qib_devdata *dd)
+{
+ int i, j = 0;
+ char *s;
+
+ for (i = 0, s = (char *)cntr7220names; s && j <= dd->cfgctxts;
+ i++) {
+ /* we always have at least one counter before the egrovfl */
+ if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12))
+ j = 1;
+ s = strchr(s + 1, '\n');
+ if (s && j)
+ j++;
+ }
+ dd->cspec->ncntrs = i;
+ if (!s)
+ /* full list; size is without terminating null */
+ dd->cspec->cntrnamelen = sizeof(cntr7220names) - 1;
+ else
+ dd->cspec->cntrnamelen = 1 + s - cntr7220names;
+ dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
+ * sizeof(u64), GFP_KERNEL);
+ if (!dd->cspec->cntrs)
+ qib_dev_err(dd, "Failed allocation for counters\n");
+
+ for (i = 0, s = (char *)portcntr7220names; s; i++)
+ s = strchr(s + 1, '\n');
+ dd->cspec->nportcntrs = i - 1;
+ dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1;
+ dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
+ * sizeof(u64), GFP_KERNEL);
+ if (!dd->cspec->portcntrs)
+ qib_dev_err(dd, "Failed allocation for portcounters\n");
+}
+
+static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
+ u64 **cntrp)
+{
+ u32 ret;
+
+ if (!dd->cspec->cntrs) {
+ ret = 0;
+ goto done;
+ }
+
+ if (namep) {
+ *namep = (char *)cntr7220names;
+ ret = dd->cspec->cntrnamelen;
+ if (pos >= ret)
+ ret = 0; /* final read after getting everything */
+ } else {
+ u64 *cntr = dd->cspec->cntrs;
+ int i;
+
+ ret = dd->cspec->ncntrs * sizeof(u64);
+ if (!cntr || pos >= ret) {
+ /* everything read, or couldn't get memory */
+ ret = 0;
+ goto done;
+ }
+
+ *cntrp = cntr;
+ for (i = 0; i < dd->cspec->ncntrs; i++)
+ *cntr++ = read_7220_creg32(dd, cntr7220indices[i]);
+ }
+done:
+ return ret;
+}
+
+static u32 qib_read_7220portcntrs(struct qib_devdata *dd, loff_t pos, u32 port,
+ char **namep, u64 **cntrp)
+{
+ u32 ret;
+
+ if (!dd->cspec->portcntrs) {
+ ret = 0;
+ goto done;
+ }
+ if (namep) {
+ *namep = (char *)portcntr7220names;
+ ret = dd->cspec->portcntrnamelen;
+ if (pos >= ret)
+ ret = 0; /* final read after getting everything */
+ } else {
+ u64 *cntr = dd->cspec->portcntrs;
+ struct qib_pportdata *ppd = &dd->pport[port];
+ int i;
+
+ ret = dd->cspec->nportcntrs * sizeof(u64);
+ if (!cntr || pos >= ret) {
+ /* everything read, or couldn't get memory */
+ ret = 0;
+ goto done;
+ }
+ *cntrp = cntr;
+ for (i = 0; i < dd->cspec->nportcntrs; i++) {
+ if (portcntr7220indices[i] & _PORT_VIRT_FLAG)
+ *cntr++ = qib_portcntr_7220(ppd,
+ portcntr7220indices[i] &
+ ~_PORT_VIRT_FLAG);
+ else
+ *cntr++ = read_7220_creg32(dd,
+ portcntr7220indices[i]);
+ }
+ }
+done:
+ return ret;
+}
+
+/**
+ * qib_get_7220_faststats - get word counters from chip before they overflow
+ * @opaque - contains a pointer to the qlogic_ib device qib_devdata
+ *
+ * This needs more work; in particular, decision on whether we really
+ * need traffic_wds done the way it is
+ * called from add_timer
+ */
+static void qib_get_7220_faststats(unsigned long opaque)
+{
+ struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ struct qib_pportdata *ppd = dd->pport;
+ unsigned long flags;
+ u64 traffic_wds;
+
+ /*
+ * don't access the chip while running diags, or memory diags can
+ * fail
+ */
+ if (!(dd->flags & QIB_INITTED) || dd->diag_client)
+ /* but re-arm the timer, for diags case; won't hurt other */
+ goto done;
+
+ /*
+ * We now try to maintain an activity timer, based on traffic
+ * exceeding a threshold, so we need to check the word-counts
+ * even if they are 64-bit.
+ */
+ traffic_wds = qib_portcntr_7220(ppd, cr_wordsend) +
+ qib_portcntr_7220(ppd, cr_wordrcv);
+ spin_lock_irqsave(&dd->eep_st_lock, flags);
+ traffic_wds -= dd->traffic_wds;
+ dd->traffic_wds += traffic_wds;
+ if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD)
+ atomic_add(5, &dd->active_time); /* S/B #define */
+ spin_unlock_irqrestore(&dd->eep_st_lock, flags);
+done:
+ mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER);
+}
+
+/*
+ * If we are using MSI, try to fallback to INTx.
+ */
+static int qib_7220_intr_fallback(struct qib_devdata *dd)
+{
+ if (!dd->msi_lo)
+ return 0;
+
+ qib_devinfo(dd->pcidev,
+ "MSI interrupt not detected, trying INTx interrupts\n");
+ qib_7220_free_irq(dd);
+ qib_enable_intx(dd->pcidev);
+ /*
+ * Some newer kernels require free_irq before disable_msi,
+ * and irq can be changed during disable and INTx enable
+ * and we need to therefore use the pcidev->irq value,
+ * not our saved MSI value.
+ */
+ dd->cspec->irq = dd->pcidev->irq;
+ qib_setup_7220_interrupt(dd);
+ return 1;
+}
+
+/*
+ * Reset the XGXS (between serdes and IBC). Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining. To do this right, we reset IBC
+ * as well.
+ */
+static void qib_7220_xgxs_reset(struct qib_pportdata *ppd)
+{
+ u64 val, prev_val;
+ struct qib_devdata *dd = ppd->dd;
+
+ prev_val = qib_read_kreg64(dd, kr_xgxs_cfg);
+ val = prev_val | QLOGIC_IB_XGXS_RESET;
+ prev_val &= ~QLOGIC_IB_XGXS_RESET; /* be sure */
+ qib_write_kreg(dd, kr_control,
+ dd->control & ~QLOGIC_IB_C_LINKENABLE);
+ qib_write_kreg(dd, kr_xgxs_cfg, val);
+ qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_xgxs_cfg, prev_val);
+ qib_write_kreg(dd, kr_control, dd->control);
+}
+
+/*
+ * For this chip, we want to use the same buffer every time
+ * when we are trying to bring the link up (they are always VL15
+ * packets). At that link state the packet should always go out immediately
+ * (or at least be discarded at the tx interface if the link is down).
+ * If it doesn't, and the buffer isn't available, that means some other
+ * sender has gotten ahead of us, and is preventing our packet from going
+ * out. In that case, we flush all packets, and try again. If that still
+ * fails, we fail the request, and hope things work the next time around.
+ *
+ * We don't need very complicated heuristics on whether the packet had
+ * time to go out or not, since even at SDR 1X, it goes out in very short
+ * time periods, covered by the chip reads done here and as part of the
+ * flush.
+ */
+static u32 __iomem *get_7220_link_buf(struct qib_pportdata *ppd, u32 *bnum)
+{
+ u32 __iomem *buf;
+ u32 lbuf = ppd->dd->cspec->lastbuf_for_pio;
+ int do_cleanup;
+ unsigned long flags;
+
+ /*
+ * always blip to get avail list updated, since it's almost
+ * always needed, and is fairly cheap.
+ */
+ sendctrl_7220_mod(ppd->dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */
+ buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf);
+ if (buf)
+ goto done;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (ppd->sdma_state.current_state == qib_sdma_state_s20_idle &&
+ ppd->sdma_state.current_state != qib_sdma_state_s00_hw_down) {
+ __qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down);
+ do_cleanup = 0;
+ } else {
+ do_cleanup = 1;
+ qib_7220_sdma_hw_clean_up(ppd);
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ if (do_cleanup) {
+ qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */
+ buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf);
+ }
+done:
+ return buf;
+}
+
+/*
+ * This code for non-IBTA-compliant IB speed negotiation is only known to
+ * work for the SDR to DDR transition, and only between an HCA and a switch
+ * with recent firmware. It is based on observed heuristics, rather than
+ * actual knowledge of the non-compliant speed negotiation.
+ * It has a number of hard-coded fields, since the hope is to rewrite this
+ * when a spec is available on how the negoation is intended to work.
+ */
+static void autoneg_7220_sendpkt(struct qib_pportdata *ppd, u32 *hdr,
+ u32 dcnt, u32 *data)
+{
+ int i;
+ u64 pbc;
+ u32 __iomem *piobuf;
+ u32 pnum;
+ struct qib_devdata *dd = ppd->dd;
+
+ i = 0;
+ pbc = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */
+ pbc |= PBC_7220_VL15_SEND;
+ while (!(piobuf = get_7220_link_buf(ppd, &pnum))) {
+ if (i++ > 5)
+ return;
+ udelay(2);
+ }
+ sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_DISARM_BUF(pnum));
+ writeq(pbc, piobuf);
+ qib_flush_wc();
+ qib_pio_copy(piobuf + 2, hdr, 7);
+ qib_pio_copy(piobuf + 9, data, dcnt);
+ if (dd->flags & QIB_USE_SPCL_TRIG) {
+ u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023;
+
+ qib_flush_wc();
+ __raw_writel(0xaebecede, piobuf + spcl_off);
+ }
+ qib_flush_wc();
+ qib_sendbuf_done(dd, pnum);
+}
+
+/*
+ * _start packet gets sent twice at start, _done gets sent twice at end
+ */
+static void autoneg_7220_send(struct qib_pportdata *ppd, int which)
+{
+ struct qib_devdata *dd = ppd->dd;
+ static u32 swapped;
+ u32 dw, i, hcnt, dcnt, *data;
+ static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba };
+ static u32 madpayload_start[0x40] = {
+ 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+ 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */
+ };
+ static u32 madpayload_done[0x40] = {
+ 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+ 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x40000001, 0x1388, 0x15e, /* rest 0's */
+ };
+
+ dcnt = ARRAY_SIZE(madpayload_start);
+ hcnt = ARRAY_SIZE(hdr);
+ if (!swapped) {
+ /* for maintainability, do it at runtime */
+ for (i = 0; i < hcnt; i++) {
+ dw = (__force u32) cpu_to_be32(hdr[i]);
+ hdr[i] = dw;
+ }
+ for (i = 0; i < dcnt; i++) {
+ dw = (__force u32) cpu_to_be32(madpayload_start[i]);
+ madpayload_start[i] = dw;
+ dw = (__force u32) cpu_to_be32(madpayload_done[i]);
+ madpayload_done[i] = dw;
+ }
+ swapped = 1;
+ }
+
+ data = which ? madpayload_done : madpayload_start;
+
+ autoneg_7220_sendpkt(ppd, hdr, dcnt, data);
+ qib_read_kreg64(dd, kr_scratch);
+ udelay(2);
+ autoneg_7220_sendpkt(ppd, hdr, dcnt, data);
+ qib_read_kreg64(dd, kr_scratch);
+ udelay(2);
+}
+
+/*
+ * Do the absolute minimum to cause an IB speed change, and make it
+ * ready, but don't actually trigger the change. The caller will
+ * do that when ready (if link is in Polling training state, it will
+ * happen immediately, otherwise when link next goes down)
+ *
+ * This routine should only be used as part of the DDR autonegotation
+ * code for devices that are not compliant with IB 1.2 (or code that
+ * fixes things up for same).
+ *
+ * When link has gone down, and autoneg enabled, or autoneg has
+ * failed and we give up until next time we set both speeds, and
+ * then we want IBTA enabled as well as "use max enabled speed.
+ */
+static void set_7220_ibspeed_fast(struct qib_pportdata *ppd, u32 speed)
+{
+ ppd->cpspec->ibcddrctrl &= ~(IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK);
+
+ if (speed == (QIB_IB_SDR | QIB_IB_DDR))
+ ppd->cpspec->ibcddrctrl |= IBA7220_IBC_SPEED_AUTONEG_MASK |
+ IBA7220_IBC_IBTA_1_2_MASK;
+ else
+ ppd->cpspec->ibcddrctrl |= speed == QIB_IB_DDR ?
+ IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR;
+
+ qib_write_kreg(ppd->dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl);
+ qib_write_kreg(ppd->dd, kr_scratch, 0);
+}
+
+/*
+ * This routine is only used when we are not talking to another
+ * IB 1.2-compliant device that we think can do DDR.
+ * (This includes all existing switch chips as of Oct 2007.)
+ * 1.2-compliant devices go directly to DDR prior to reaching INIT
+ */
+static void try_7220_autoneg(struct qib_pportdata *ppd)
+{
+ unsigned long flags;
+
+ /*
+ * Required for older non-IB1.2 DDR switches. Newer
+ * non-IB-compliant switches don't need it, but so far,
+ * aren't bothered by it either. "Magic constant"
+ */
+ qib_write_kreg(ppd->dd, kr_ncmodectrl, 0x3b9dc07);
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_AUTONEG_INPROG;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ autoneg_7220_send(ppd, 0);
+ set_7220_ibspeed_fast(ppd, QIB_IB_DDR);
+
+ toggle_7220_rclkrls(ppd->dd);
+ /* 2 msec is minimum length of a poll cycle */
+ queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work,
+ msecs_to_jiffies(2));
+}
+
+/*
+ * Handle the empirically determined mechanism for auto-negotiation
+ * of DDR speed with switches.
+ */
+static void autoneg_7220_work(struct work_struct *work)
+{
+ struct qib_pportdata *ppd;
+ struct qib_devdata *dd;
+ u64 startms;
+ u32 i;
+ unsigned long flags;
+
+ ppd = &container_of(work, struct qib_chippport_specific,
+ autoneg_work.work)->pportdata;
+ dd = ppd->dd;
+
+ startms = jiffies_to_msecs(jiffies);
+
+ /*
+ * Busy wait for this first part, it should be at most a
+ * few hundred usec, since we scheduled ourselves for 2msec.
+ */
+ for (i = 0; i < 25; i++) {
+ if (SYM_FIELD(ppd->lastibcstat, IBCStatus, LinkTrainingState)
+ == IB_7220_LT_STATE_POLLQUIET) {
+ qib_set_linkstate(ppd, QIB_IB_LINKDOWN_DISABLE);
+ break;
+ }
+ udelay(100);
+ }
+
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+ goto done; /* we got there early or told to stop */
+
+ /* we expect this to timeout */
+ if (wait_event_timeout(ppd->cpspec->autoneg_wait,
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(90)))
+ goto done;
+
+ toggle_7220_rclkrls(dd);
+
+ /* we expect this to timeout */
+ if (wait_event_timeout(ppd->cpspec->autoneg_wait,
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(1700)))
+ goto done;
+
+ set_7220_ibspeed_fast(ppd, QIB_IB_SDR);
+ toggle_7220_rclkrls(dd);
+
+ /*
+ * Wait up to 250 msec for link to train and get to INIT at DDR;
+ * this should terminate early.
+ */
+ wait_event_timeout(ppd->cpspec->autoneg_wait,
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(250));
+done:
+ if (ppd->lflags & QIBL_IB_AUTONEG_INPROG) {
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
+ if (dd->cspec->autoneg_tries == AUTONEG_TRIES) {
+ ppd->lflags |= QIBL_IB_AUTONEG_FAILED;
+ dd->cspec->autoneg_tries = 0;
+ }
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ set_7220_ibspeed_fast(ppd, ppd->link_speed_enabled);
+ }
+}
+
+static u32 qib_7220_iblink_state(u64 ibcs)
+{
+ u32 state = (u32)SYM_FIELD(ibcs, IBCStatus, LinkState);
+
+ switch (state) {
+ case IB_7220_L_STATE_INIT:
+ state = IB_PORT_INIT;
+ break;
+ case IB_7220_L_STATE_ARM:
+ state = IB_PORT_ARMED;
+ break;
+ case IB_7220_L_STATE_ACTIVE:
+ /* fall through */
+ case IB_7220_L_STATE_ACT_DEFER:
+ state = IB_PORT_ACTIVE;
+ break;
+ default: /* fall through */
+ case IB_7220_L_STATE_DOWN:
+ state = IB_PORT_DOWN;
+ break;
+ }
+ return state;
+}
+
+/* returns the IBTA port state, rather than the IBC link training state */
+static u8 qib_7220_phys_portstate(u64 ibcs)
+{
+ u8 state = (u8)SYM_FIELD(ibcs, IBCStatus, LinkTrainingState);
+ return qib_7220_physportstate[state];
+}
+
+static int qib_7220_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
+{
+ int ret = 0, symadj = 0;
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+
+ if (!ibup) {
+ /*
+ * When the link goes down we don't want AEQ running, so it
+ * won't interfere with IBC training, etc., and we need
+ * to go back to the static SerDes preset values.
+ */
+ if (!(ppd->lflags & (QIBL_IB_AUTONEG_FAILED |
+ QIBL_IB_AUTONEG_INPROG)))
+ set_7220_ibspeed_fast(ppd, ppd->link_speed_enabled);
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
+ qib_sd7220_presets(dd);
+ qib_cancel_sends(ppd); /* initial disarm, etc. */
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (__qib_sdma_running(ppd))
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e70_go_idle);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ /* this might better in qib_sd7220_presets() */
+ set_7220_relock_poll(dd, ibup);
+ } else {
+ if (qib_compat_ddr_negotiate &&
+ !(ppd->lflags & (QIBL_IB_AUTONEG_FAILED |
+ QIBL_IB_AUTONEG_INPROG)) &&
+ ppd->link_speed_active == QIB_IB_SDR &&
+ (ppd->link_speed_enabled & (QIB_IB_DDR | QIB_IB_SDR)) ==
+ (QIB_IB_DDR | QIB_IB_SDR) &&
+ dd->cspec->autoneg_tries < AUTONEG_TRIES) {
+ /* we are SDR, and DDR auto-negotiation enabled */
+ ++dd->cspec->autoneg_tries;
+ if (!ppd->cpspec->ibdeltainprog) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymsnap = read_7220_creg32(dd,
+ cr_ibsymbolerr);
+ ppd->cpspec->iblnkerrsnap = read_7220_creg32(dd,
+ cr_iblinkerrrecov);
+ }
+ try_7220_autoneg(ppd);
+ ret = 1; /* no other IB status change processing */
+ } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) &&
+ ppd->link_speed_active == QIB_IB_SDR) {
+ autoneg_7220_send(ppd, 1);
+ set_7220_ibspeed_fast(ppd, QIB_IB_DDR);
+ udelay(2);
+ toggle_7220_rclkrls(dd);
+ ret = 1; /* no other IB status change processing */
+ } else {
+ if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) &&
+ (ppd->link_speed_active & QIB_IB_DDR)) {
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~(QIBL_IB_AUTONEG_INPROG |
+ QIBL_IB_AUTONEG_FAILED);
+ spin_unlock_irqrestore(&ppd->lflags_lock,
+ flags);
+ dd->cspec->autoneg_tries = 0;
+ /* re-enable SDR, for next link down */
+ set_7220_ibspeed_fast(ppd,
+ ppd->link_speed_enabled);
+ wake_up(&ppd->cpspec->autoneg_wait);
+ symadj = 1;
+ } else if (ppd->lflags & QIBL_IB_AUTONEG_FAILED) {
+ /*
+ * Clear autoneg failure flag, and do setup
+ * so we'll try next time link goes down and
+ * back to INIT (possibly connected to a
+ * different device).
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED;
+ spin_unlock_irqrestore(&ppd->lflags_lock,
+ flags);
+ ppd->cpspec->ibcddrctrl |=
+ IBA7220_IBC_IBTA_1_2_MASK;
+ qib_write_kreg(dd, kr_ncmodectrl, 0);
+ symadj = 1;
+ }
+ }
+
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+ symadj = 1;
+
+ if (!ret) {
+ ppd->delay_mult = rate_to_delay
+ [(ibcs >> IBA7220_LINKSPEED_SHIFT) & 1]
+ [(ibcs >> IBA7220_LINKWIDTH_SHIFT) & 1];
+
+ set_7220_relock_poll(dd, ibup);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /*
+ * Unlike 7322, the 7220 needs this, due to lack of
+ * interrupt in some cases when we have sdma active
+ * when the link goes down.
+ */
+ if (ppd->sdma_state.current_state !=
+ qib_sdma_state_s20_idle)
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e00_go_hw_down);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ }
+
+ if (symadj) {
+ if (ppd->cpspec->ibdeltainprog) {
+ ppd->cpspec->ibdeltainprog = 0;
+ ppd->cpspec->ibsymdelta += read_7220_creg32(ppd->dd,
+ cr_ibsymbolerr) - ppd->cpspec->ibsymsnap;
+ ppd->cpspec->iblnkerrdelta += read_7220_creg32(ppd->dd,
+ cr_iblinkerrrecov) - ppd->cpspec->iblnkerrsnap;
+ }
+ } else if (!ibup && qib_compat_ddr_negotiate &&
+ !ppd->cpspec->ibdeltainprog &&
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymsnap = read_7220_creg32(ppd->dd,
+ cr_ibsymbolerr);
+ ppd->cpspec->iblnkerrsnap = read_7220_creg32(ppd->dd,
+ cr_iblinkerrrecov);
+ }
+
+ if (!ret)
+ qib_setup_7220_setextled(ppd, ibup);
+ return ret;
+}
+
+/*
+ * Does read/modify/write to appropriate registers to
+ * set output and direction bits selected by mask.
+ * these are in their canonical postions (e.g. lsb of
+ * dir will end up in D48 of extctrl on existing chips).
+ * returns contents of GP Inputs.
+ */
+static int gpio_7220_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask)
+{
+ u64 read_val, new_out;
+ unsigned long flags;
+
+ if (mask) {
+ /* some bits being written, lock access to GPIO */
+ dir &= mask;
+ out &= mask;
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe));
+ dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe));
+ new_out = (dd->cspec->gpio_out & ~mask) | out;
+
+ qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl);
+ qib_write_kreg(dd, kr_gpio_out, new_out);
+ dd->cspec->gpio_out = new_out;
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+ }
+ /*
+ * It is unlikely that a read at this time would get valid
+ * data on a pin whose direction line was set in the same
+ * call to this function. We include the read here because
+ * that allows us to potentially combine a change on one pin with
+ * a read on another, and because the old code did something like
+ * this.
+ */
+ read_val = qib_read_kreg64(dd, kr_extstatus);
+ return SYM_FIELD(read_val, EXTStatus, GPIOIn);
+}
+
+/*
+ * Read fundamental info we need to use the chip. These are
+ * the registers that describe chip capabilities, and are
+ * saved in shadow registers.
+ */
+static void get_7220_chip_params(struct qib_devdata *dd)
+{
+ u64 val;
+ u32 piobufs;
+ int mtu;
+
+ dd->uregbase = qib_read_kreg32(dd, kr_userregbase);
+
+ dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt);
+ dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase);
+ dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase);
+ dd->palign = qib_read_kreg32(dd, kr_palign);
+ dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase);
+ dd->pio2k_bufbase = dd->piobufbase & 0xffffffff;
+
+ val = qib_read_kreg64(dd, kr_sendpiosize);
+ dd->piosize2k = val & ~0U;
+ dd->piosize4k = val >> 32;
+
+ mtu = ib_mtu_enum_to_int(qib_ibmtu);
+ if (mtu == -1)
+ mtu = QIB_DEFAULT_MTU;
+ dd->pport->ibmtu = (u32)mtu;
+
+ val = qib_read_kreg64(dd, kr_sendpiobufcnt);
+ dd->piobcnt2k = val & ~0U;
+ dd->piobcnt4k = val >> 32;
+ /* these may be adjusted in init_chip_wc_pat() */
+ dd->pio2kbase = (u32 __iomem *)
+ ((char __iomem *) dd->kregbase + dd->pio2k_bufbase);
+ if (dd->piobcnt4k) {
+ dd->pio4kbase = (u32 __iomem *)
+ ((char __iomem *) dd->kregbase +
+ (dd->piobufbase >> 32));
+ /*
+ * 4K buffers take 2 pages; we use roundup just to be
+ * paranoid; we calculate it once here, rather than on
+ * ever buf allocate
+ */
+ dd->align4k = ALIGN(dd->piosize4k, dd->palign);
+ }
+
+ piobufs = dd->piobcnt4k + dd->piobcnt2k;
+
+ dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) /
+ (sizeof(u64) * BITS_PER_BYTE / 2);
+}
+
+/*
+ * The chip base addresses in cspec and cpspec have to be set
+ * after possible init_chip_wc_pat(), rather than in
+ * qib_get_7220_chip_params(), so split out as separate function
+ */
+static void set_7220_baseaddrs(struct qib_devdata *dd)
+{
+ u32 cregbase;
+ /* init after possible re-map in init_chip_wc_pat() */
+ cregbase = qib_read_kreg32(dd, kr_counterregbase);
+ dd->cspec->cregbase = (u64 __iomem *)
+ ((char __iomem *) dd->kregbase + cregbase);
+
+ dd->egrtidbase = (u64 __iomem *)
+ ((char __iomem *) dd->kregbase + dd->rcvegrbase);
+}
+
+
+#define SENDCTRL_SHADOWED (SYM_MASK(SendCtrl, SendIntBufAvail) | \
+ SYM_MASK(SendCtrl, SPioEnable) | \
+ SYM_MASK(SendCtrl, SSpecialTriggerEn) | \
+ SYM_MASK(SendCtrl, SendBufAvailUpd) | \
+ SYM_MASK(SendCtrl, AvailUpdThld) | \
+ SYM_MASK(SendCtrl, SDmaEnable) | \
+ SYM_MASK(SendCtrl, SDmaIntEnable) | \
+ SYM_MASK(SendCtrl, SDmaHalt) | \
+ SYM_MASK(SendCtrl, SDmaSingleDescriptor))
+
+static int sendctrl_hook(struct qib_devdata *dd,
+ const struct diag_observer *op,
+ u32 offs, u64 *data, u64 mask, int only_32)
+{
+ unsigned long flags;
+ unsigned idx = offs / sizeof(u64);
+ u64 local_data, all_bits;
+
+ if (idx != kr_sendctrl) {
+ qib_dev_err(dd, "SendCtrl Hook called with offs %X, %s-bit\n",
+ offs, only_32 ? "32" : "64");
+ return 0;
+ }
+
+ all_bits = ~0ULL;
+ if (only_32)
+ all_bits >>= 32;
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ if ((mask & all_bits) != all_bits) {
+ /*
+ * At least some mask bits are zero, so we need
+ * to read. The judgement call is whether from
+ * reg or shadow. First-cut: read reg, and complain
+ * if any bits which should be shadowed are different
+ * from their shadowed value.
+ */
+ if (only_32)
+ local_data = (u64)qib_read_kreg32(dd, idx);
+ else
+ local_data = qib_read_kreg64(dd, idx);
+ qib_dev_err(dd, "Sendctrl -> %X, Shad -> %X\n",
+ (u32)local_data, (u32)dd->sendctrl);
+ if ((local_data & SENDCTRL_SHADOWED) !=
+ (dd->sendctrl & SENDCTRL_SHADOWED))
+ qib_dev_err(dd, "Sendctrl read: %X shadow is %X\n",
+ (u32)local_data, (u32) dd->sendctrl);
+ *data = (local_data & ~mask) | (*data & mask);
+ }
+ if (mask) {
+ /*
+ * At least some mask bits are one, so we need
+ * to write, but only shadow some bits.
+ */
+ u64 sval, tval; /* Shadowed, transient */
+
+ /*
+ * New shadow val is bits we don't want to touch,
+ * ORed with bits we do, that are intended for shadow.
+ */
+ sval = (dd->sendctrl & ~mask);
+ sval |= *data & SENDCTRL_SHADOWED & mask;
+ dd->sendctrl = sval;
+ tval = sval | (*data & ~SENDCTRL_SHADOWED & mask);
+ qib_dev_err(dd, "Sendctrl <- %X, Shad <- %X\n",
+ (u32)tval, (u32)sval);
+ qib_write_kreg(dd, kr_sendctrl, tval);
+ qib_write_kreg(dd, kr_scratch, 0Ull);
+ }
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+
+ return only_32 ? 4 : 8;
+}
+
+static const struct diag_observer sendctrl_observer = {
+ sendctrl_hook, kr_sendctrl * sizeof(u64),
+ kr_sendctrl * sizeof(u64)
+};
+
+/*
+ * write the final few registers that depend on some of the
+ * init setup. Done late in init, just before bringing up
+ * the serdes.
+ */
+static int qib_late_7220_initreg(struct qib_devdata *dd)
+{
+ int ret = 0;
+ u64 val;
+
+ qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize);
+ qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize);
+ qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt);
+ qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
+ val = qib_read_kreg64(dd, kr_sendpioavailaddr);
+ if (val != dd->pioavailregs_phys) {
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
+ ret = -EINVAL;
+ }
+ qib_register_observer(dd, &sendctrl_observer);
+ return ret;
+}
+
+static int qib_init_7220_variables(struct qib_devdata *dd)
+{
+ struct qib_chippport_specific *cpspec;
+ struct qib_pportdata *ppd;
+ int ret = 0;
+ u32 sbufs, updthresh;
+
+ cpspec = (struct qib_chippport_specific *)(dd + 1);
+ ppd = &cpspec->pportdata;
+ dd->pport = ppd;
+ dd->num_pports = 1;
+
+ dd->cspec = (struct qib_chip_specific *)(cpspec + dd->num_pports);
+ ppd->cpspec = cpspec;
+
+ spin_lock_init(&dd->cspec->sdepb_lock);
+ spin_lock_init(&dd->cspec->rcvmod_lock);
+ spin_lock_init(&dd->cspec->gpio_lock);
+
+ /* we haven't yet set QIB_PRESENT, so use read directly */
+ dd->revision = readq(&dd->kregbase[kr_revision]);
+
+ if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
+ ret = -ENODEV;
+ goto bail;
+ }
+ dd->flags |= QIB_PRESENT; /* now register routines work */
+
+ dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R,
+ ChipRevMajor);
+ dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R,
+ ChipRevMinor);
+
+ get_7220_chip_params(dd);
+ qib_7220_boardname(dd);
+
+ /*
+ * GPIO bits for TWSI data and clock,
+ * used for serial EEPROM.
+ */
+ dd->gpio_sda_num = _QIB_GPIO_SDA_NUM;
+ dd->gpio_scl_num = _QIB_GPIO_SCL_NUM;
+ dd->twsi_eeprom_dev = QIB_TWSI_EEPROM_DEV;
+
+ dd->flags |= QIB_HAS_INTX | QIB_HAS_LINK_LATENCY |
+ QIB_NODMA_RTAIL | QIB_HAS_THRESH_UPDATE;
+ dd->flags |= qib_special_trigger ?
+ QIB_USE_SPCL_TRIG : QIB_HAS_SEND_DMA;
+
+ /*
+ * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+ * 2 is Some Misc, 3 is reserved for future.
+ */
+ dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr);
+
+ dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr);
+
+ dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
+
+ init_waitqueue_head(&cpspec->autoneg_wait);
+ INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work);
+
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
+ ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+ ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR;
+
+ ppd->link_width_enabled = ppd->link_width_supported;
+ ppd->link_speed_enabled = ppd->link_speed_supported;
+ /*
+ * Set the initial values to reasonable default, will be set
+ * for real when link is up.
+ */
+ ppd->link_width_active = IB_WIDTH_4X;
+ ppd->link_speed_active = QIB_IB_SDR;
+ ppd->delay_mult = rate_to_delay[0][1];
+ ppd->vls_supported = IB_VL_VL0;
+ ppd->vls_operational = ppd->vls_supported;
+
+ if (!qib_mini_init)
+ qib_write_kreg(dd, kr_rcvbthqp, QIB_KD_QP);
+
+ init_timer(&ppd->cpspec->chase_timer);
+ ppd->cpspec->chase_timer.function = reenable_7220_chase;
+ ppd->cpspec->chase_timer.data = (unsigned long)ppd;
+
+ qib_num_cfg_vls = 1; /* if any 7220's, only one VL */
+
+ dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE;
+ dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE;
+ dd->rhf_offset =
+ dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
+
+ /* we always allocate at least 2048 bytes for eager buffers */
+ ret = ib_mtu_enum_to_int(qib_ibmtu);
+ dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
+ BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+ dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
+
+ qib_7220_tidtemplate(dd);
+
+ /*
+ * We can request a receive interrupt for 1 or
+ * more packets from current offset. For now, we set this
+ * up for a single packet.
+ */
+ dd->rhdrhead_intr_off = 1ULL << 32;
+
+ /* setup the stats timer; the add_timer is done at end of init */
+ init_timer(&dd->stats_timer);
+ dd->stats_timer.function = qib_get_7220_faststats;
+ dd->stats_timer.data = (unsigned long) dd;
+ dd->stats_timer.expires = jiffies + ACTIVITY_TIMER * HZ;
+
+ /*
+ * Control[4] has been added to change the arbitration within
+ * the SDMA engine between favoring data fetches over descriptor
+ * fetches. qib_sdma_fetch_arb==0 gives data fetches priority.
+ */
+ if (qib_sdma_fetch_arb)
+ dd->control |= 1 << 4;
+
+ dd->ureg_align = 0x10000; /* 64KB alignment */
+
+ dd->piosize2kmax_dwords = (dd->piosize2k >> 2)-1;
+ qib_7220_config_ctxts(dd);
+ qib_set_ctxtcnt(dd); /* needed for PAT setup */
+
+ if (qib_wc_pat) {
+ ret = init_chip_wc_pat(dd, 0);
+ if (ret)
+ goto bail;
+ }
+ set_7220_baseaddrs(dd); /* set chip access pointers now */
+
+ ret = 0;
+ if (qib_mini_init)
+ goto bail;
+
+ ret = qib_create_ctxts(dd);
+ init_7220_cntrnames(dd);
+
+ /* use all of 4KB buffers for the kernel SDMA, zero if !SDMA.
+ * reserve the update threshold amount for other kernel use, such
+ * as sending SMI, MAD, and ACKs, or 3, whichever is greater,
+ * unless we aren't enabling SDMA, in which case we want to use
+ * all the 4k bufs for the kernel.
+ * if this was less than the update threshold, we could wait
+ * a long time for an update. Coded this way because we
+ * sometimes change the update threshold for various reasons,
+ * and we want this to remain robust.
+ */
+ updthresh = 8U; /* update threshold */
+ if (dd->flags & QIB_HAS_SEND_DMA) {
+ dd->cspec->sdmabufcnt = dd->piobcnt4k;
+ sbufs = updthresh > 3 ? updthresh : 3;
+ } else {
+ dd->cspec->sdmabufcnt = 0;
+ sbufs = dd->piobcnt4k;
+ }
+
+ dd->cspec->lastbuf_for_pio = dd->piobcnt2k + dd->piobcnt4k -
+ dd->cspec->sdmabufcnt;
+ dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
+ dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+ dd->last_pio = dd->cspec->lastbuf_for_pio;
+ dd->pbufsctxt = dd->lastctxt_piobuf /
+ (dd->cfgctxts - dd->first_user_ctxt);
+
+ /*
+ * if we are at 16 user contexts, we will have one 7 sbufs
+ * per context, so drop the update threshold to match. We
+ * want to update before we actually run out, at low pbufs/ctxt
+ * so give ourselves some margin
+ */
+ if ((dd->pbufsctxt - 2) < updthresh)
+ updthresh = dd->pbufsctxt - 2;
+
+ dd->cspec->updthresh_dflt = updthresh;
+ dd->cspec->updthresh = updthresh;
+
+ /* before full enable, no interrupts, no locking needed */
+ dd->sendctrl |= (updthresh & SYM_RMASK(SendCtrl, AvailUpdThld))
+ << SYM_LSB(SendCtrl, AvailUpdThld);
+
+ dd->psxmitwait_supported = 1;
+ dd->psxmitwait_check_rate = QIB_7220_PSXMITWAIT_CHECK_RATE;
+bail:
+ return ret;
+}
+
+static u32 __iomem *qib_7220_getsendbuf(struct qib_pportdata *ppd, u64 pbc,
+ u32 *pbufnum)
+{
+ u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK;
+ struct qib_devdata *dd = ppd->dd;
+ u32 __iomem *buf;
+
+ if (((pbc >> 32) & PBC_7220_VL15_SEND_CTRL) &&
+ !(ppd->lflags & (QIBL_IB_AUTONEG_INPROG | QIBL_LINKACTIVE)))
+ buf = get_7220_link_buf(ppd, pbufnum);
+ else {
+ if ((plen + 1) > dd->piosize2kmax_dwords)
+ first = dd->piobcnt2k;
+ else
+ first = 0;
+ /* try 4k if all 2k busy, so same last for both sizes */
+ last = dd->cspec->lastbuf_for_pio;
+ buf = qib_getsendbuf_range(dd, pbufnum, first, last);
+ }
+ return buf;
+}
+
+/* these 2 "counters" are really control registers, and are always RW */
+static void qib_set_cntr_7220_sample(struct qib_pportdata *ppd, u32 intv,
+ u32 start)
+{
+ write_7220_creg(ppd->dd, cr_psinterval, intv);
+ write_7220_creg(ppd->dd, cr_psstart, start);
+}
+
+/*
+ * NOTE: no real attempt is made to generalize the SDMA stuff.
+ * At some point "soon" we will have a new more generalized
+ * set of sdma interface, and then we'll clean this up.
+ */
+
+/* Must be called with sdma_lock held, or before init finished */
+static void qib_sdma_update_7220_tail(struct qib_pportdata *ppd, u16 tail)
+{
+ /* Commit writes to memory and advance the tail on the chip */
+ wmb();
+ ppd->sdma_descq_tail = tail;
+ qib_write_kreg(ppd->dd, kr_senddmatail, tail);
+}
+
+static void qib_sdma_set_7220_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
+{
+}
+
+static struct sdma_set_state_action sdma_7220_action_table[] = {
+ [qib_sdma_state_s00_hw_down] = {
+ .op_enable = 0,
+ .op_intenable = 0,
+ .op_halt = 0,
+ .go_s99_running_tofalse = 1,
+ },
+ [qib_sdma_state_s10_hw_start_up_wait] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 1,
+ },
+ [qib_sdma_state_s20_idle] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 1,
+ },
+ [qib_sdma_state_s30_sw_clean_up_wait] = {
+ .op_enable = 0,
+ .op_intenable = 1,
+ .op_halt = 0,
+ },
+ [qib_sdma_state_s40_hw_clean_up_wait] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 1,
+ },
+ [qib_sdma_state_s50_hw_halt_wait] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 1,
+ },
+ [qib_sdma_state_s99_running] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 0,
+ .go_s99_running_totrue = 1,
+ },
+};
+
+static void qib_7220_sdma_init_early(struct qib_pportdata *ppd)
+{
+ ppd->sdma_state.set_state_action = sdma_7220_action_table;
+}
+
+static int init_sdma_7220_regs(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ unsigned i, n;
+ u64 senddmabufmask[3] = { 0 };
+
+ /* Set SendDmaBase */
+ qib_write_kreg(dd, kr_senddmabase, ppd->sdma_descq_phys);
+ qib_sdma_7220_setlengen(ppd);
+ qib_sdma_update_7220_tail(ppd, 0); /* Set SendDmaTail */
+ /* Set SendDmaHeadAddr */
+ qib_write_kreg(dd, kr_senddmaheadaddr, ppd->sdma_head_phys);
+
+ /*
+ * Reserve all the former "kernel" piobufs, using high number range
+ * so we get as many 4K buffers as possible
+ */
+ n = dd->piobcnt2k + dd->piobcnt4k;
+ i = n - dd->cspec->sdmabufcnt;
+
+ for (; i < n; ++i) {
+ unsigned word = i / 64;
+ unsigned bit = i & 63;
+
+ BUG_ON(word >= 3);
+ senddmabufmask[word] |= 1ULL << bit;
+ }
+ qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]);
+ qib_write_kreg(dd, kr_senddmabufmask1, senddmabufmask[1]);
+ qib_write_kreg(dd, kr_senddmabufmask2, senddmabufmask[2]);
+
+ ppd->sdma_state.first_sendbuf = i;
+ ppd->sdma_state.last_sendbuf = n;
+
+ return 0;
+}
+
+/* sdma_lock must be held */
+static u16 qib_sdma_7220_gethead(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int sane;
+ int use_dmahead;
+ u16 swhead;
+ u16 swtail;
+ u16 cnt;
+ u16 hwhead;
+
+ use_dmahead = __qib_sdma_running(ppd) &&
+ (dd->flags & QIB_HAS_SDMA_TIMEOUT);
+retry:
+ hwhead = use_dmahead ?
+ (u16)le64_to_cpu(*ppd->sdma_head_dma) :
+ (u16)qib_read_kreg32(dd, kr_senddmahead);
+
+ swhead = ppd->sdma_descq_head;
+ swtail = ppd->sdma_descq_tail;
+ cnt = ppd->sdma_descq_cnt;
+
+ if (swhead < swtail) {
+ /* not wrapped */
+ sane = (hwhead >= swhead) & (hwhead <= swtail);
+ } else if (swhead > swtail) {
+ /* wrapped around */
+ sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
+ (hwhead <= swtail);
+ } else {
+ /* empty */
+ sane = (hwhead == swhead);
+ }
+
+ if (unlikely(!sane)) {
+ if (use_dmahead) {
+ /* try one more time, directly from the register */
+ use_dmahead = 0;
+ goto retry;
+ }
+ /* assume no progress */
+ hwhead = swhead;
+ }
+
+ return hwhead;
+}
+
+static int qib_sdma_7220_busy(struct qib_pportdata *ppd)
+{
+ u64 hwstatus = qib_read_kreg64(ppd->dd, kr_senddmastatus);
+
+ return (hwstatus & SYM_MASK(SendDmaStatus, ScoreBoardDrainInProg)) ||
+ (hwstatus & SYM_MASK(SendDmaStatus, AbortInProg)) ||
+ (hwstatus & SYM_MASK(SendDmaStatus, InternalSDmaEnable)) ||
+ !(hwstatus & SYM_MASK(SendDmaStatus, ScbEmpty));
+}
+
+/*
+ * Compute the amount of delay before sending the next packet if the
+ * port's send rate differs from the static rate set for the QP.
+ * Since the delay affects this packet but the amount of the delay is
+ * based on the length of the previous packet, use the last delay computed
+ * and save the delay count for this packet to be used next time
+ * we get here.
+ */
+static u32 qib_7220_setpbc_control(struct qib_pportdata *ppd, u32 plen,
+ u8 srate, u8 vl)
+{
+ u8 snd_mult = ppd->delay_mult;
+ u8 rcv_mult = ib_rate_to_delay[srate];
+ u32 ret = ppd->cpspec->last_delay_mult;
+
+ ppd->cpspec->last_delay_mult = (rcv_mult > snd_mult) ?
+ (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
+
+ /* Indicate VL15, if necessary */
+ if (vl == 15)
+ ret |= PBC_7220_VL15_SEND_CTRL;
+ return ret;
+}
+
+static void qib_7220_initvl15_bufs(struct qib_devdata *dd)
+{
+}
+
+static void qib_7220_init_ctxt(struct qib_ctxtdata *rcd)
+{
+ if (!rcd->ctxt) {
+ rcd->rcvegrcnt = IBA7220_KRCVEGRCNT;
+ rcd->rcvegr_tid_base = 0;
+ } else {
+ rcd->rcvegrcnt = rcd->dd->cspec->rcvegrcnt;
+ rcd->rcvegr_tid_base = IBA7220_KRCVEGRCNT +
+ (rcd->ctxt - 1) * rcd->rcvegrcnt;
+ }
+}
+
+static void qib_7220_txchk_change(struct qib_devdata *dd, u32 start,
+ u32 len, u32 which, struct qib_ctxtdata *rcd)
+{
+ int i;
+ unsigned long flags;
+
+ switch (which) {
+ case TXCHK_CHG_TYPE_KERN:
+ /* see if we need to raise avail update threshold */
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ for (i = dd->first_user_ctxt;
+ dd->cspec->updthresh != dd->cspec->updthresh_dflt
+ && i < dd->cfgctxts; i++)
+ if (dd->rcd[i] && dd->rcd[i]->subctxt_cnt &&
+ ((dd->rcd[i]->piocnt / dd->rcd[i]->subctxt_cnt) - 1)
+ < dd->cspec->updthresh_dflt)
+ break;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ if (i == dd->cfgctxts) {
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ dd->cspec->updthresh = dd->cspec->updthresh_dflt;
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld);
+ dd->sendctrl |= (dd->cspec->updthresh &
+ SYM_RMASK(SendCtrl, AvailUpdThld)) <<
+ SYM_LSB(SendCtrl, AvailUpdThld);
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ }
+ break;
+ case TXCHK_CHG_TYPE_USER:
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ if (rcd && rcd->subctxt_cnt && ((rcd->piocnt
+ / rcd->subctxt_cnt) - 1) < dd->cspec->updthresh) {
+ dd->cspec->updthresh = (rcd->piocnt /
+ rcd->subctxt_cnt) - 1;
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld);
+ dd->sendctrl |= (dd->cspec->updthresh &
+ SYM_RMASK(SendCtrl, AvailUpdThld))
+ << SYM_LSB(SendCtrl, AvailUpdThld);
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ } else
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ break;
+ }
+}
+
+static void writescratch(struct qib_devdata *dd, u32 val)
+{
+ qib_write_kreg(dd, kr_scratch, val);
+}
+
+#define VALID_TS_RD_REG_MASK 0xBF
+/**
+ * qib_7220_tempsense_read - read register of temp sensor via TWSI
+ * @dd: the qlogic_ib device
+ * @regnum: register to read from
+ *
+ * returns reg contents (0..255) or < 0 for error
+ */
+static int qib_7220_tempsense_rd(struct qib_devdata *dd, int regnum)
+{
+ int ret;
+ u8 rdata;
+
+ if (regnum > 7) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /* return a bogus value for (the one) register we do not have */
+ if (!((1 << regnum) & VALID_TS_RD_REG_MASK)) {
+ ret = 0;
+ goto bail;
+ }
+
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (ret)
+ goto bail;
+
+ ret = qib_twsi_blk_rd(dd, QIB_TWSI_TEMP_DEV, regnum, &rdata, 1);
+ if (!ret)
+ ret = rdata;
+
+ mutex_unlock(&dd->eep_lock);
+
+ /*
+ * There are three possibilities here:
+ * ret is actual value (0..255)
+ * ret is -ENXIO or -EINVAL from twsi code or this file
+ * ret is -EINTR from mutex_lock_interruptible.
+ */
+bail:
+ return ret;
+}
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ return 0;
+}
+#endif
+
+/* Dummy function, as 7220 boards never disable EEPROM Write */
+static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen)
+{
+ return 1;
+}
+
+/**
+ * qib_init_iba7220_funcs - set up the chip-specific function pointers
+ * @dev: the pci_dev for qlogic_ib device
+ * @ent: pci_device_id struct for this dev
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct qib_devdata *dd;
+ int ret;
+ u32 boardid, minwidth;
+
+ dd = qib_alloc_devdata(pdev, sizeof(struct qib_chip_specific) +
+ sizeof(struct qib_chippport_specific));
+ if (IS_ERR(dd))
+ goto bail;
+
+ dd->f_bringup_serdes = qib_7220_bringup_serdes;
+ dd->f_cleanup = qib_setup_7220_cleanup;
+ dd->f_clear_tids = qib_7220_clear_tids;
+ dd->f_free_irq = qib_7220_free_irq;
+ dd->f_get_base_info = qib_7220_get_base_info;
+ dd->f_get_msgheader = qib_7220_get_msgheader;
+ dd->f_getsendbuf = qib_7220_getsendbuf;
+ dd->f_gpio_mod = gpio_7220_mod;
+ dd->f_eeprom_wen = qib_7220_eeprom_wen;
+ dd->f_hdrqempty = qib_7220_hdrqempty;
+ dd->f_ib_updown = qib_7220_ib_updown;
+ dd->f_init_ctxt = qib_7220_init_ctxt;
+ dd->f_initvl15_bufs = qib_7220_initvl15_bufs;
+ dd->f_intr_fallback = qib_7220_intr_fallback;
+ dd->f_late_initreg = qib_late_7220_initreg;
+ dd->f_setpbc_control = qib_7220_setpbc_control;
+ dd->f_portcntr = qib_portcntr_7220;
+ dd->f_put_tid = qib_7220_put_tid;
+ dd->f_quiet_serdes = qib_7220_quiet_serdes;
+ dd->f_rcvctrl = rcvctrl_7220_mod;
+ dd->f_read_cntrs = qib_read_7220cntrs;
+ dd->f_read_portcntrs = qib_read_7220portcntrs;
+ dd->f_reset = qib_setup_7220_reset;
+ dd->f_init_sdma_regs = init_sdma_7220_regs;
+ dd->f_sdma_busy = qib_sdma_7220_busy;
+ dd->f_sdma_gethead = qib_sdma_7220_gethead;
+ dd->f_sdma_sendctrl = qib_7220_sdma_sendctrl;
+ dd->f_sdma_set_desc_cnt = qib_sdma_set_7220_desc_cnt;
+ dd->f_sdma_update_tail = qib_sdma_update_7220_tail;
+ dd->f_sdma_hw_clean_up = qib_7220_sdma_hw_clean_up;
+ dd->f_sdma_hw_start_up = qib_7220_sdma_hw_start_up;
+ dd->f_sdma_init_early = qib_7220_sdma_init_early;
+ dd->f_sendctrl = sendctrl_7220_mod;
+ dd->f_set_armlaunch = qib_set_7220_armlaunch;
+ dd->f_set_cntr_sample = qib_set_cntr_7220_sample;
+ dd->f_iblink_state = qib_7220_iblink_state;
+ dd->f_ibphys_portstate = qib_7220_phys_portstate;
+ dd->f_get_ib_cfg = qib_7220_get_ib_cfg;
+ dd->f_set_ib_cfg = qib_7220_set_ib_cfg;
+ dd->f_set_ib_loopback = qib_7220_set_loopback;
+ dd->f_set_intr_state = qib_7220_set_intr_state;
+ dd->f_setextled = qib_setup_7220_setextled;
+ dd->f_txchk_change = qib_7220_txchk_change;
+ dd->f_update_usrhead = qib_update_7220_usrhead;
+ dd->f_wantpiobuf_intr = qib_wantpiobuf_7220_intr;
+ dd->f_xgxs_reset = qib_7220_xgxs_reset;
+ dd->f_writescratch = writescratch;
+ dd->f_tempsense_rd = qib_7220_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_7220_notify_dca;
+#endif
+ /*
+ * Do remaining pcie setup and save pcie values in dd.
+ * Any error printing is already done by the init code.
+ * On return, we have the chip mapped, but chip registers
+ * are not set up until start of qib_init_7220_variables.
+ */
+ ret = qib_pcie_ddinit(dd, pdev, ent);
+ if (ret < 0)
+ goto bail_free;
+
+ /* initialize chip-specific variables */
+ ret = qib_init_7220_variables(dd);
+ if (ret)
+ goto bail_cleanup;
+
+ if (qib_mini_init)
+ goto bail;
+
+ boardid = SYM_FIELD(dd->revision, Revision,
+ BoardID);
+ switch (boardid) {
+ case 0:
+ case 2:
+ case 10:
+ case 12:
+ minwidth = 16; /* x16 capable boards */
+ break;
+ default:
+ minwidth = 8; /* x8 capable boards */
+ break;
+ }
+ if (qib_pcie_params(dd, minwidth, NULL, NULL))
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
+
+ /* save IRQ for possible later use */
+ dd->cspec->irq = pdev->irq;
+
+ if (qib_read_kreg64(dd, kr_hwerrstatus) &
+ QLOGIC_IB_HWE_SERDESPLLFAILED)
+ qib_write_kreg(dd, kr_hwerrclear,
+ QLOGIC_IB_HWE_SERDESPLLFAILED);
+
+ /* setup interrupt handler (interrupt type handled above) */
+ qib_setup_7220_interrupt(dd);
+ qib_7220_init_hwerrors(dd);
+
+ /* clear diagctrl register, in case diags were running and crashed */
+ qib_write_kreg(dd, kr_hwdiagctrl, 0);
+
+ goto bail;
+
+bail_cleanup:
+ qib_pcie_ddcleanup(dd);
+bail_free:
+ qib_free_devdata(dd);
+ dd = ERR_PTR(ret);
+bail:
+ return dd;
+}
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
new file mode 100644
index 00000000000..a7eb32517a0
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -0,0 +1,8554 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains all of the code that is specific to the
+ * InfiniPath 7322 chip
+ */
+
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_smi.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
+
+#include "qib.h"
+#include "qib_7322_regs.h"
+#include "qib_qsfp.h"
+
+#include "qib_mad.h"
+#include "qib_verbs.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME " " fmt
+
+static void qib_setup_7322_setextled(struct qib_pportdata *, u32);
+static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t);
+static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op);
+static irqreturn_t qib_7322intr(int irq, void *data);
+static irqreturn_t qib_7322bufavail(int irq, void *data);
+static irqreturn_t sdma_intr(int irq, void *data);
+static irqreturn_t sdma_idle_intr(int irq, void *data);
+static irqreturn_t sdma_progress_intr(int irq, void *data);
+static irqreturn_t sdma_cleanup_intr(int irq, void *data);
+static void qib_7322_txchk_change(struct qib_devdata *, u32, u32, u32,
+ struct qib_ctxtdata *rcd);
+static u8 qib_7322_phys_portstate(u64);
+static u32 qib_7322_iblink_state(u64);
+static void qib_set_ib_7322_lstate(struct qib_pportdata *ppd, u16 linkcmd,
+ u16 linitcmd);
+static void force_h1(struct qib_pportdata *);
+static void adj_tx_serdes(struct qib_pportdata *);
+static u32 qib_7322_setpbc_control(struct qib_pportdata *, u32, u8, u8);
+static void qib_7322_mini_pcs_reset(struct qib_pportdata *);
+
+static u32 ahb_mod(struct qib_devdata *, int, int, int, u32, u32);
+static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
+static void serdes_7322_los_enable(struct qib_pportdata *, int);
+static int serdes_7322_init_old(struct qib_pportdata *);
+static int serdes_7322_init_new(struct qib_pportdata *);
+static void dump_sdma_7322_state(struct qib_pportdata *);
+
+#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
+
+/* LE2 serdes values for different cases */
+#define LE2_DEFAULT 5
+#define LE2_5m 4
+#define LE2_QME 0
+
+/* Below is special-purpose, so only really works for the IB SerDes blocks. */
+#define IBSD(hw_pidx) (hw_pidx + 2)
+
+/* these are variables for documentation and experimentation purposes */
+static const unsigned rcv_int_timeout = 375;
+static const unsigned rcv_int_count = 16;
+static const unsigned sdma_idle_cnt = 64;
+
+/* Time to stop altering Rx Equalization parameters, after link up. */
+#define RXEQ_DISABLE_MSECS 2500
+
+/*
+ * Number of VLs we are configured to use (to allow for more
+ * credits per vl, etc.)
+ */
+ushort qib_num_cfg_vls = 2;
+module_param_named(num_vls, qib_num_cfg_vls, ushort, S_IRUGO);
+MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
+
+static ushort qib_chase = 1;
+module_param_named(chase, qib_chase, ushort, S_IRUGO);
+MODULE_PARM_DESC(chase, "Enable state chase handling");
+
+static ushort qib_long_atten = 10; /* 10 dB ~= 5m length */
+module_param_named(long_attenuation, qib_long_atten, ushort, S_IRUGO);
+MODULE_PARM_DESC(long_attenuation, \
+ "attenuation cutoff (dB) for long copper cable setup");
+
+static ushort qib_singleport;
+module_param_named(singleport, qib_singleport, ushort, S_IRUGO);
+MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space");
+
+static ushort qib_krcvq01_no_msi;
+module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO);
+MODULE_PARM_DESC(krcvq01_no_msi, "No MSI for kctx < 2");
+
+/*
+ * Receive header queue sizes
+ */
+static unsigned qib_rcvhdrcnt;
+module_param_named(rcvhdrcnt, qib_rcvhdrcnt, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrcnt, "receive header count");
+
+static unsigned qib_rcvhdrsize;
+module_param_named(rcvhdrsize, qib_rcvhdrsize, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrsize, "receive header size in 32-bit words");
+
+static unsigned qib_rcvhdrentsize;
+module_param_named(rcvhdrentsize, qib_rcvhdrentsize, uint, S_IRUGO);
+MODULE_PARM_DESC(rcvhdrentsize, "receive header entry size in 32-bit words");
+
+#define MAX_ATTEN_LEN 64 /* plenty for any real system */
+/* for read back, default index is ~5m copper cable */
+static char txselect_list[MAX_ATTEN_LEN] = "10";
+static struct kparam_string kp_txselect = {
+ .string = txselect_list,
+ .maxlen = MAX_ATTEN_LEN
+};
+static int setup_txselect(const char *, struct kernel_param *);
+module_param_call(txselect, setup_txselect, param_get_string,
+ &kp_txselect, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(txselect, \
+ "Tx serdes indices (for no QSFP or invalid QSFP data)");
+
+#define BOARD_QME7342 5
+#define BOARD_QMH7342 6
+#define IS_QMH(dd) (SYM_FIELD((dd)->revision, Revision, BoardID) == \
+ BOARD_QMH7342)
+#define IS_QME(dd) (SYM_FIELD((dd)->revision, Revision, BoardID) == \
+ BOARD_QME7342)
+
+#define KREG_IDX(regname) (QIB_7322_##regname##_OFFS / sizeof(u64))
+
+#define KREG_IBPORT_IDX(regname) ((QIB_7322_##regname##_0_OFFS / sizeof(u64)))
+
+#define MASK_ACROSS(lsb, msb) \
+ (((1ULL << ((msb) + 1 - (lsb))) - 1) << (lsb))
+
+#define SYM_RMASK(regname, fldname) ((u64) \
+ QIB_7322_##regname##_##fldname##_RMASK)
+
+#define SYM_MASK(regname, fldname) ((u64) \
+ QIB_7322_##regname##_##fldname##_RMASK << \
+ QIB_7322_##regname##_##fldname##_LSB)
+
+#define SYM_FIELD(value, regname, fldname) ((u64) \
+ (((value) >> SYM_LSB(regname, fldname)) & \
+ SYM_RMASK(regname, fldname)))
+
+/* useful for things like LaFifoEmpty_0...7, TxCreditOK_0...7, etc. */
+#define SYM_FIELD_ACROSS(value, regname, fldname, nbits) \
+ (((value) >> SYM_LSB(regname, fldname)) & MASK_ACROSS(0, nbits))
+
+#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask)
+#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask)
+#define ERR_MASK_N(fldname) SYM_MASK(ErrMask_0, fldname##Mask)
+#define INT_MASK(fldname) SYM_MASK(IntMask, fldname##IntMask)
+#define INT_MASK_P(fldname, port) SYM_MASK(IntMask, fldname##IntMask##_##port)
+/* Below because most, but not all, fields of IntMask have that full suffix */
+#define INT_MASK_PM(fldname, port) SYM_MASK(IntMask, fldname##Mask##_##port)
+
+
+#define SYM_LSB(regname, fldname) (QIB_7322_##regname##_##fldname##_LSB)
+
+/*
+ * the size bits give us 2^N, in KB units. 0 marks as invalid,
+ * and 7 is reserved. We currently use only 2KB and 4KB
+ */
+#define IBA7322_TID_SZ_SHIFT QIB_7322_RcvTIDArray0_RT_BufSize_LSB
+#define IBA7322_TID_SZ_2K (1UL<<IBA7322_TID_SZ_SHIFT) /* 2KB */
+#define IBA7322_TID_SZ_4K (2UL<<IBA7322_TID_SZ_SHIFT) /* 4KB */
+#define IBA7322_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */
+
+#define SendIBSLIDAssignMask \
+ QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_RMASK
+#define SendIBSLMCMask \
+ QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_RMASK
+
+#define ExtLED_IB1_YEL SYM_MASK(EXTCtrl, LEDPort0YellowOn)
+#define ExtLED_IB1_GRN SYM_MASK(EXTCtrl, LEDPort0GreenOn)
+#define ExtLED_IB2_YEL SYM_MASK(EXTCtrl, LEDPort1YellowOn)
+#define ExtLED_IB2_GRN SYM_MASK(EXTCtrl, LEDPort1GreenOn)
+#define ExtLED_IB1_MASK (ExtLED_IB1_YEL | ExtLED_IB1_GRN)
+#define ExtLED_IB2_MASK (ExtLED_IB2_YEL | ExtLED_IB2_GRN)
+
+#define _QIB_GPIO_SDA_NUM 1
+#define _QIB_GPIO_SCL_NUM 0
+#define QIB_EEPROM_WEN_NUM 14
+#define QIB_TWSI_EEPROM_DEV 0xA2 /* All Production 7322 cards. */
+
+/* HW counter clock is at 4nsec */
+#define QIB_7322_PSXMITWAIT_CHECK_RATE 4000
+
+/* full speed IB port 1 only */
+#define PORT_SPD_CAP (QIB_IB_SDR | QIB_IB_DDR | QIB_IB_QDR)
+#define PORT_SPD_CAP_SHIFT 3
+
+/* full speed featuremask, both ports */
+#define DUAL_PORT_CAP (PORT_SPD_CAP | (PORT_SPD_CAP << PORT_SPD_CAP_SHIFT))
+
+/*
+ * This file contains almost all the chip-specific register information and
+ * access functions for the FAKED QLogic InfiniPath 7322 PCI-Express chip.
+ */
+
+/* Use defines to tie machine-generated names to lower-case names */
+#define kr_contextcnt KREG_IDX(ContextCnt)
+#define kr_control KREG_IDX(Control)
+#define kr_counterregbase KREG_IDX(CntrRegBase)
+#define kr_errclear KREG_IDX(ErrClear)
+#define kr_errmask KREG_IDX(ErrMask)
+#define kr_errstatus KREG_IDX(ErrStatus)
+#define kr_extctrl KREG_IDX(EXTCtrl)
+#define kr_extstatus KREG_IDX(EXTStatus)
+#define kr_gpio_clear KREG_IDX(GPIOClear)
+#define kr_gpio_mask KREG_IDX(GPIOMask)
+#define kr_gpio_out KREG_IDX(GPIOOut)
+#define kr_gpio_status KREG_IDX(GPIOStatus)
+#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl)
+#define kr_debugportval KREG_IDX(DebugPortValueReg)
+#define kr_fmask KREG_IDX(feature_mask)
+#define kr_act_fmask KREG_IDX(active_feature_mask)
+#define kr_hwerrclear KREG_IDX(HwErrClear)
+#define kr_hwerrmask KREG_IDX(HwErrMask)
+#define kr_hwerrstatus KREG_IDX(HwErrStatus)
+#define kr_intclear KREG_IDX(IntClear)
+#define kr_intmask KREG_IDX(IntMask)
+#define kr_intredirect KREG_IDX(IntRedirect0)
+#define kr_intstatus KREG_IDX(IntStatus)
+#define kr_pagealign KREG_IDX(PageAlign)
+#define kr_rcvavailtimeout KREG_IDX(RcvAvailTimeOut0)
+#define kr_rcvctrl KREG_IDX(RcvCtrl) /* Common, but chip also has per-port */
+#define kr_rcvegrbase KREG_IDX(RcvEgrBase)
+#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt)
+#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt)
+#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize)
+#define kr_rcvhdrsize KREG_IDX(RcvHdrSize)
+#define kr_rcvtidbase KREG_IDX(RcvTIDBase)
+#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt)
+#define kr_revision KREG_IDX(Revision)
+#define kr_scratch KREG_IDX(Scratch)
+#define kr_sendbuffererror KREG_IDX(SendBufErr0) /* and base for 1 and 2 */
+#define kr_sendcheckmask KREG_IDX(SendCheckMask0) /* and 1, 2 */
+#define kr_sendctrl KREG_IDX(SendCtrl)
+#define kr_sendgrhcheckmask KREG_IDX(SendGRHCheckMask0) /* and 1, 2 */
+#define kr_sendibpktmask KREG_IDX(SendIBPacketMask0) /* and 1, 2 */
+#define kr_sendpioavailaddr KREG_IDX(SendBufAvailAddr)
+#define kr_sendpiobufbase KREG_IDX(SendBufBase)
+#define kr_sendpiobufcnt KREG_IDX(SendBufCnt)
+#define kr_sendpiosize KREG_IDX(SendBufSize)
+#define kr_sendregbase KREG_IDX(SendRegBase)
+#define kr_sendbufavail0 KREG_IDX(SendBufAvail0)
+#define kr_userregbase KREG_IDX(UserRegBase)
+#define kr_intgranted KREG_IDX(Int_Granted)
+#define kr_vecclr_wo_int KREG_IDX(vec_clr_without_int)
+#define kr_intblocked KREG_IDX(IntBlocked)
+#define kr_r_access KREG_IDX(SPC_JTAG_ACCESS_REG)
+
+/*
+ * per-port kernel registers. Access only with qib_read_kreg_port()
+ * or qib_write_kreg_port()
+ */
+#define krp_errclear KREG_IBPORT_IDX(ErrClear)
+#define krp_errmask KREG_IBPORT_IDX(ErrMask)
+#define krp_errstatus KREG_IBPORT_IDX(ErrStatus)
+#define krp_highprio_0 KREG_IBPORT_IDX(HighPriority0)
+#define krp_highprio_limit KREG_IBPORT_IDX(HighPriorityLimit)
+#define krp_hrtbt_guid KREG_IBPORT_IDX(HRTBT_GUID)
+#define krp_ib_pcsconfig KREG_IBPORT_IDX(IBPCSConfig)
+#define krp_ibcctrl_a KREG_IBPORT_IDX(IBCCtrlA)
+#define krp_ibcctrl_b KREG_IBPORT_IDX(IBCCtrlB)
+#define krp_ibcctrl_c KREG_IBPORT_IDX(IBCCtrlC)
+#define krp_ibcstatus_a KREG_IBPORT_IDX(IBCStatusA)
+#define krp_ibcstatus_b KREG_IBPORT_IDX(IBCStatusB)
+#define krp_txestatus KREG_IBPORT_IDX(TXEStatus)
+#define krp_lowprio_0 KREG_IBPORT_IDX(LowPriority0)
+#define krp_ncmodectrl KREG_IBPORT_IDX(IBNCModeCtrl)
+#define krp_partitionkey KREG_IBPORT_IDX(RcvPartitionKey)
+#define krp_psinterval KREG_IBPORT_IDX(PSInterval)
+#define krp_psstart KREG_IBPORT_IDX(PSStart)
+#define krp_psstat KREG_IBPORT_IDX(PSStat)
+#define krp_rcvbthqp KREG_IBPORT_IDX(RcvBTHQP)
+#define krp_rcvctrl KREG_IBPORT_IDX(RcvCtrl)
+#define krp_rcvpktledcnt KREG_IBPORT_IDX(RcvPktLEDCnt)
+#define krp_rcvqpmaptable KREG_IBPORT_IDX(RcvQPMapTableA)
+#define krp_rxcreditvl0 KREG_IBPORT_IDX(RxCreditVL0)
+#define krp_rxcreditvl15 (KREG_IBPORT_IDX(RxCreditVL0)+15)
+#define krp_sendcheckcontrol KREG_IBPORT_IDX(SendCheckControl)
+#define krp_sendctrl KREG_IBPORT_IDX(SendCtrl)
+#define krp_senddmabase KREG_IBPORT_IDX(SendDmaBase)
+#define krp_senddmabufmask0 KREG_IBPORT_IDX(SendDmaBufMask0)
+#define krp_senddmabufmask1 (KREG_IBPORT_IDX(SendDmaBufMask0) + 1)
+#define krp_senddmabufmask2 (KREG_IBPORT_IDX(SendDmaBufMask0) + 2)
+#define krp_senddmabuf_use0 KREG_IBPORT_IDX(SendDmaBufUsed0)
+#define krp_senddmabuf_use1 (KREG_IBPORT_IDX(SendDmaBufUsed0) + 1)
+#define krp_senddmabuf_use2 (KREG_IBPORT_IDX(SendDmaBufUsed0) + 2)
+#define krp_senddmadesccnt KREG_IBPORT_IDX(SendDmaDescCnt)
+#define krp_senddmahead KREG_IBPORT_IDX(SendDmaHead)
+#define krp_senddmaheadaddr KREG_IBPORT_IDX(SendDmaHeadAddr)
+#define krp_senddmaidlecnt KREG_IBPORT_IDX(SendDmaIdleCnt)
+#define krp_senddmalengen KREG_IBPORT_IDX(SendDmaLenGen)
+#define krp_senddmaprioritythld KREG_IBPORT_IDX(SendDmaPriorityThld)
+#define krp_senddmareloadcnt KREG_IBPORT_IDX(SendDmaReloadCnt)
+#define krp_senddmastatus KREG_IBPORT_IDX(SendDmaStatus)
+#define krp_senddmatail KREG_IBPORT_IDX(SendDmaTail)
+#define krp_sendhdrsymptom KREG_IBPORT_IDX(SendHdrErrSymptom)
+#define krp_sendslid KREG_IBPORT_IDX(SendIBSLIDAssign)
+#define krp_sendslidmask KREG_IBPORT_IDX(SendIBSLIDMask)
+#define krp_ibsdtestiftx KREG_IBPORT_IDX(IB_SDTEST_IF_TX)
+#define krp_adapt_dis_timer KREG_IBPORT_IDX(ADAPT_DISABLE_TIMER_THRESHOLD)
+#define krp_tx_deemph_override KREG_IBPORT_IDX(IBSD_TX_DEEMPHASIS_OVERRIDE)
+#define krp_serdesctrl KREG_IBPORT_IDX(IBSerdesCtrl)
+
+/*
+ * Per-context kernel registers. Access only with qib_read_kreg_ctxt()
+ * or qib_write_kreg_ctxt()
+ */
+#define krc_rcvhdraddr KREG_IDX(RcvHdrAddr0)
+#define krc_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0)
+
+/*
+ * TID Flow table, per context. Reduces
+ * number of hdrq updates to one per flow (or on errors).
+ * context 0 and 1 share same memory, but have distinct
+ * addresses. Since for now, we never use expected sends
+ * on kernel contexts, we don't worry about that (we initialize
+ * those entries for ctxt 0/1 on driver load twice, for example).
+ */
+#define NUM_TIDFLOWS_CTXT 0x20 /* 0x20 per context; have to hardcode */
+#define ur_rcvflowtable (KREG_IDX(RcvTIDFlowTable0) - KREG_IDX(RcvHdrTail0))
+
+/* these are the error bits in the tid flows, and are W1C */
+#define TIDFLOW_ERRBITS ( \
+ (SYM_MASK(RcvTIDFlowTable0, GenMismatch) << \
+ SYM_LSB(RcvTIDFlowTable0, GenMismatch)) | \
+ (SYM_MASK(RcvTIDFlowTable0, SeqMismatch) << \
+ SYM_LSB(RcvTIDFlowTable0, SeqMismatch)))
+
+/* Most (not all) Counters are per-IBport.
+ * Requires LBIntCnt is at offset 0 in the group
+ */
+#define CREG_IDX(regname) \
+((QIB_7322_##regname##_0_OFFS - QIB_7322_LBIntCnt_OFFS) / sizeof(u64))
+
+#define crp_badformat CREG_IDX(RxVersionErrCnt)
+#define crp_err_rlen CREG_IDX(RxLenErrCnt)
+#define crp_erricrc CREG_IDX(RxICRCErrCnt)
+#define crp_errlink CREG_IDX(RxLinkMalformCnt)
+#define crp_errlpcrc CREG_IDX(RxLPCRCErrCnt)
+#define crp_errpkey CREG_IDX(RxPKeyMismatchCnt)
+#define crp_errvcrc CREG_IDX(RxVCRCErrCnt)
+#define crp_excessbufferovfl CREG_IDX(ExcessBufferOvflCnt)
+#define crp_iblinkdown CREG_IDX(IBLinkDownedCnt)
+#define crp_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt)
+#define crp_ibstatuschange CREG_IDX(IBStatusChangeCnt)
+#define crp_ibsymbolerr CREG_IDX(IBSymbolErrCnt)
+#define crp_invalidrlen CREG_IDX(RxMaxMinLenErrCnt)
+#define crp_locallinkintegrityerr CREG_IDX(LocalLinkIntegrityErrCnt)
+#define crp_pktrcv CREG_IDX(RxDataPktCnt)
+#define crp_pktrcvflowctrl CREG_IDX(RxFlowPktCnt)
+#define crp_pktsend CREG_IDX(TxDataPktCnt)
+#define crp_pktsendflow CREG_IDX(TxFlowPktCnt)
+#define crp_psrcvdatacount CREG_IDX(PSRcvDataCount)
+#define crp_psrcvpktscount CREG_IDX(PSRcvPktsCount)
+#define crp_psxmitdatacount CREG_IDX(PSXmitDataCount)
+#define crp_psxmitpktscount CREG_IDX(PSXmitPktsCount)
+#define crp_psxmitwaitcount CREG_IDX(PSXmitWaitCount)
+#define crp_rcvebp CREG_IDX(RxEBPCnt)
+#define crp_rcvflowctrlviol CREG_IDX(RxFlowCtrlViolCnt)
+#define crp_rcvovfl CREG_IDX(RxBufOvflCnt)
+#define crp_rxdlidfltr CREG_IDX(RxDlidFltrCnt)
+#define crp_rxdroppkt CREG_IDX(RxDroppedPktCnt)
+#define crp_rxotherlocalphyerr CREG_IDX(RxOtherLocalPhyErrCnt)
+#define crp_rxqpinvalidctxt CREG_IDX(RxQPInvalidContextCnt)
+#define crp_rxvlerr CREG_IDX(RxVlErrCnt)
+#define crp_sendstall CREG_IDX(TxFlowStallCnt)
+#define crp_txdroppedpkt CREG_IDX(TxDroppedPktCnt)
+#define crp_txhdrerr CREG_IDX(TxHeadersErrCnt)
+#define crp_txlenerr CREG_IDX(TxLenErrCnt)
+#define crp_txminmaxlenerr CREG_IDX(TxMaxMinLenErrCnt)
+#define crp_txsdmadesc CREG_IDX(TxSDmaDescCnt)
+#define crp_txunderrun CREG_IDX(TxUnderrunCnt)
+#define crp_txunsupvl CREG_IDX(TxUnsupVLErrCnt)
+#define crp_vl15droppedpkt CREG_IDX(RxVL15DroppedPktCnt)
+#define crp_wordrcv CREG_IDX(RxDwordCnt)
+#define crp_wordsend CREG_IDX(TxDwordCnt)
+#define crp_tx_creditstalls CREG_IDX(TxCreditUpToDateTimeOut)
+
+/* these are the (few) counters that are not port-specific */
+#define CREG_DEVIDX(regname) ((QIB_7322_##regname##_OFFS - \
+ QIB_7322_LBIntCnt_OFFS) / sizeof(u64))
+#define cr_base_egrovfl CREG_DEVIDX(RxP0HdrEgrOvflCnt)
+#define cr_lbint CREG_DEVIDX(LBIntCnt)
+#define cr_lbstall CREG_DEVIDX(LBFlowStallCnt)
+#define cr_pcieretrydiag CREG_DEVIDX(PcieRetryBufDiagQwordCnt)
+#define cr_rxtidflowdrop CREG_DEVIDX(RxTidFlowDropCnt)
+#define cr_tidfull CREG_DEVIDX(RxTIDFullErrCnt)
+#define cr_tidinvalid CREG_DEVIDX(RxTIDValidErrCnt)
+
+/* no chip register for # of IB ports supported, so define */
+#define NUM_IB_PORTS 2
+
+/* 1 VL15 buffer per hardware IB port, no register for this, so define */
+#define NUM_VL15_BUFS NUM_IB_PORTS
+
+/*
+ * context 0 and 1 are special, and there is no chip register that
+ * defines this value, so we have to define it here.
+ * These are all allocated to either 0 or 1 for single port
+ * hardware configuration, otherwise each gets half
+ */
+#define KCTXT0_EGRCNT 2048
+
+/* values for vl and port fields in PBC, 7322-specific */
+#define PBC_PORT_SEL_LSB 26
+#define PBC_PORT_SEL_RMASK 1
+#define PBC_VL_NUM_LSB 27
+#define PBC_VL_NUM_RMASK 7
+#define PBC_7322_VL15_SEND (1ULL << 63) /* pbc; VL15, no credit check */
+#define PBC_7322_VL15_SEND_CTRL (1ULL << 31) /* control version of same */
+
+static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = {
+ [IB_RATE_2_5_GBPS] = 16,
+ [IB_RATE_5_GBPS] = 8,
+ [IB_RATE_10_GBPS] = 4,
+ [IB_RATE_20_GBPS] = 2,
+ [IB_RATE_30_GBPS] = 2,
+ [IB_RATE_40_GBPS] = 1
+};
+
+#define IBA7322_LINKSPEED_SHIFT SYM_LSB(IBCStatusA_0, LinkSpeedActive)
+#define IBA7322_LINKWIDTH_SHIFT SYM_LSB(IBCStatusA_0, LinkWidthActive)
+
+/* link training states, from IBC */
+#define IB_7322_LT_STATE_DISABLED 0x00
+#define IB_7322_LT_STATE_LINKUP 0x01
+#define IB_7322_LT_STATE_POLLACTIVE 0x02
+#define IB_7322_LT_STATE_POLLQUIET 0x03
+#define IB_7322_LT_STATE_SLEEPDELAY 0x04
+#define IB_7322_LT_STATE_SLEEPQUIET 0x05
+#define IB_7322_LT_STATE_CFGDEBOUNCE 0x08
+#define IB_7322_LT_STATE_CFGRCVFCFG 0x09
+#define IB_7322_LT_STATE_CFGWAITRMT 0x0a
+#define IB_7322_LT_STATE_CFGIDLE 0x0b
+#define IB_7322_LT_STATE_RECOVERRETRAIN 0x0c
+#define IB_7322_LT_STATE_TXREVLANES 0x0d
+#define IB_7322_LT_STATE_RECOVERWAITRMT 0x0e
+#define IB_7322_LT_STATE_RECOVERIDLE 0x0f
+#define IB_7322_LT_STATE_CFGENH 0x10
+#define IB_7322_LT_STATE_CFGTEST 0x11
+#define IB_7322_LT_STATE_CFGWAITRMTTEST 0x12
+#define IB_7322_LT_STATE_CFGWAITENH 0x13
+
+/* link state machine states from IBC */
+#define IB_7322_L_STATE_DOWN 0x0
+#define IB_7322_L_STATE_INIT 0x1
+#define IB_7322_L_STATE_ARM 0x2
+#define IB_7322_L_STATE_ACTIVE 0x3
+#define IB_7322_L_STATE_ACT_DEFER 0x4
+
+static const u8 qib_7322_physportstate[0x20] = {
+ [IB_7322_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
+ [IB_7322_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
+ [IB_7322_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
+ [IB_7322_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
+ [IB_7322_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
+ [IB_7322_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
+ [IB_7322_LT_STATE_CFGDEBOUNCE] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7322_LT_STATE_CFGRCVFCFG] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7322_LT_STATE_CFGWAITRMT] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7322_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_IDLE,
+ [IB_7322_LT_STATE_RECOVERRETRAIN] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [IB_7322_LT_STATE_RECOVERWAITRMT] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [IB_7322_LT_STATE_RECOVERIDLE] =
+ IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
+ [IB_7322_LT_STATE_CFGENH] = IB_PHYSPORTSTATE_CFG_ENH,
+ [IB_7322_LT_STATE_CFGTEST] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7322_LT_STATE_CFGWAITRMTTEST] =
+ IB_PHYSPORTSTATE_CFG_TRAIN,
+ [IB_7322_LT_STATE_CFGWAITENH] =
+ IB_PHYSPORTSTATE_CFG_WAIT_ENH,
+ [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
+ [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
+};
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+struct qib_irq_notify {
+ int rcv;
+ void *arg;
+ struct irq_affinity_notify notify;
+};
+#endif
+
+struct qib_chip_specific {
+ u64 __iomem *cregbase;
+ u64 *cntrs;
+ spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */
+ spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */
+ u64 main_int_mask; /* clear bits which have dedicated handlers */
+ u64 int_enable_mask; /* for per port interrupts in single port mode */
+ u64 errormask;
+ u64 hwerrmask;
+ u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */
+ u64 gpio_mask; /* shadow the gpio mask register */
+ u64 extctrl; /* shadow the gpio output enable, etc... */
+ u32 ncntrs;
+ u32 nportcntrs;
+ u32 cntrnamelen;
+ u32 portcntrnamelen;
+ u32 numctxts;
+ u32 rcvegrcnt;
+ u32 updthresh; /* current AvailUpdThld */
+ u32 updthresh_dflt; /* default AvailUpdThld */
+ u32 r1;
+ int irq;
+ u32 num_msix_entries;
+ u32 sdmabufcnt;
+ u32 lastbuf_for_pio;
+ u32 stay_in_freeze;
+ u32 recovery_ports_initted;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ u32 dca_ctrl;
+ int rhdr_cpu[18];
+ int sdma_cpu[2];
+ u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */
+#endif
+ struct qib_msix_entry *msix_entries;
+ unsigned long *sendchkenable;
+ unsigned long *sendgrhchk;
+ unsigned long *sendibchk;
+ u32 rcvavail_timeout[18];
+ char emsgbuf[128]; /* for device error interrupt msg buffer */
+};
+
+/* Table of entries in "human readable" form Tx Emphasis. */
+struct txdds_ent {
+ u8 amp;
+ u8 pre;
+ u8 main;
+ u8 post;
+};
+
+struct vendor_txdds_ent {
+ u8 oui[QSFP_VOUI_LEN];
+ u8 *partnum;
+ struct txdds_ent sdr;
+ struct txdds_ent ddr;
+ struct txdds_ent qdr;
+};
+
+static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *);
+
+#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */
+#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */
+#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */
+#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */
+
+#define H1_FORCE_VAL 8
+#define H1_FORCE_QME 1 /* may be overridden via setup_txselect() */
+#define H1_FORCE_QMH 7 /* may be overridden via setup_txselect() */
+
+/* The static and dynamic registers are paired, and the pairs indexed by spd */
+#define krp_static_adapt_dis(spd) (KREG_IBPORT_IDX(ADAPT_DISABLE_STATIC_SDR) \
+ + ((spd) * 2))
+
+#define QDR_DFE_DISABLE_DELAY 4000 /* msec after LINKUP */
+#define QDR_STATIC_ADAPT_DOWN 0xf0f0f0f0ULL /* link down, H1-H4 QDR adapts */
+#define QDR_STATIC_ADAPT_DOWN_R1 0ULL /* r1 link down, H1-H4 QDR adapts */
+#define QDR_STATIC_ADAPT_INIT 0xffffffffffULL /* up, disable H0,H1-8, LE */
+#define QDR_STATIC_ADAPT_INIT_R1 0xf0ffffffffULL /* r1 up, disable H0,H1-8 */
+
+struct qib_chippport_specific {
+ u64 __iomem *kpregbase;
+ u64 __iomem *cpregbase;
+ u64 *portcntrs;
+ struct qib_pportdata *ppd;
+ wait_queue_head_t autoneg_wait;
+ struct delayed_work autoneg_work;
+ struct delayed_work ipg_work;
+ struct timer_list chase_timer;
+ /*
+ * these 5 fields are used to establish deltas for IB symbol
+ * errors and linkrecovery errors. They can be reported on
+ * some chips during link negotiation prior to INIT, and with
+ * DDR when faking DDR negotiations with non-IBTA switches.
+ * The chip counters are adjusted at driver unload if there is
+ * a non-zero delta.
+ */
+ u64 ibdeltainprog;
+ u64 ibsymdelta;
+ u64 ibsymsnap;
+ u64 iblnkerrdelta;
+ u64 iblnkerrsnap;
+ u64 iblnkdownsnap;
+ u64 iblnkdowndelta;
+ u64 ibmalfdelta;
+ u64 ibmalfsnap;
+ u64 ibcctrl_a; /* krp_ibcctrl_a shadow */
+ u64 ibcctrl_b; /* krp_ibcctrl_b shadow */
+ unsigned long qdr_dfe_time;
+ unsigned long chase_end;
+ u32 autoneg_tries;
+ u32 recovery_init;
+ u32 qdr_dfe_on;
+ u32 qdr_reforce;
+ /*
+ * Per-bay per-channel rcv QMH H1 values and Tx values for QDR.
+ * entry zero is unused, to simplify indexing
+ */
+ u8 h1_val;
+ u8 no_eep; /* txselect table index to use if no qsfp info */
+ u8 ipg_tries;
+ u8 ibmalfusesnap;
+ struct qib_qsfp_data qsfp_data;
+ char epmsgbuf[192]; /* for port error interrupt msg buffer */
+ char sdmamsgbuf[192]; /* for per-port sdma error messages */
+};
+
+static struct {
+ const char *name;
+ irq_handler_t handler;
+ int lsb;
+ int port; /* 0 if not port-specific, else port # */
+ int dca;
+} irq_table[] = {
+ { "", qib_7322intr, -1, 0, 0 },
+ { " (buf avail)", qib_7322bufavail,
+ SYM_LSB(IntStatus, SendBufAvail), 0, 0},
+ { " (sdma 0)", sdma_intr,
+ SYM_LSB(IntStatus, SDmaInt_0), 1, 1 },
+ { " (sdma 1)", sdma_intr,
+ SYM_LSB(IntStatus, SDmaInt_1), 2, 1 },
+ { " (sdmaI 0)", sdma_idle_intr,
+ SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1},
+ { " (sdmaI 1)", sdma_idle_intr,
+ SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1},
+ { " (sdmaP 0)", sdma_progress_intr,
+ SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 },
+ { " (sdmaP 1)", sdma_progress_intr,
+ SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 },
+ { " (sdmaC 0)", sdma_cleanup_intr,
+ SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 },
+ { " (sdmaC 1)", sdma_cleanup_intr,
+ SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0},
+};
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static const struct dca_reg_map {
+ int shadow_inx;
+ int lsb;
+ u64 mask;
+ u16 regno;
+} dca_rcvhdr_reg_map[] = {
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH),
+ ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH),
+ ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH),
+ ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH),
+ ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) },
+ { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH),
+ ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) },
+ { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH),
+ ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) },
+};
+#endif
+
+/* ibcctrl bits */
+#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
+/* cycle through TS1/TS2 till OK */
+#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2
+/* wait for TS1, then go on */
+#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3
+#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16
+
+#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */
+#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */
+#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */
+
+#define BLOB_7322_IBCHG 0x101
+
+static inline void qib_write_kreg(const struct qib_devdata *dd,
+ const u32 regno, u64 value);
+static inline u32 qib_read_kreg32(const struct qib_devdata *, const u32);
+static void write_7322_initregs(struct qib_devdata *);
+static void write_7322_init_portregs(struct qib_pportdata *);
+static void setup_7322_link_recovery(struct qib_pportdata *, u32);
+static void check_7322_rxe_status(struct qib_pportdata *);
+static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+static void qib_setup_dca(struct qib_devdata *dd);
+static void setup_dca_notifier(struct qib_devdata *dd,
+ struct qib_msix_entry *m);
+static void reset_dca_notifier(struct qib_devdata *dd,
+ struct qib_msix_entry *m);
+#endif
+
+/**
+ * qib_read_ureg32 - read 32-bit virtualized per-context register
+ * @dd: device
+ * @regno: register number
+ * @ctxt: context number
+ *
+ * Return the contents of a register that is virtualized to be per context.
+ * Returns -1 on errors (not distinguishable from valid contents at
+ * runtime; we may add a separate error variable at some point).
+ */
+static inline u32 qib_read_ureg32(const struct qib_devdata *dd,
+ enum qib_ureg regno, int ctxt)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readl(regno + (u64 __iomem *)(
+ (dd->ureg_align * ctxt) + (dd->userbase ?
+ (char __iomem *)dd->userbase :
+ (char __iomem *)dd->kregbase + dd->uregbase)));
+}
+
+/**
+ * qib_read_ureg - read virtualized per-context register
+ * @dd: device
+ * @regno: register number
+ * @ctxt: context number
+ *
+ * Return the contents of a register that is virtualized to be per context.
+ * Returns -1 on errors (not distinguishable from valid contents at
+ * runtime; we may add a separate error variable at some point).
+ */
+static inline u64 qib_read_ureg(const struct qib_devdata *dd,
+ enum qib_ureg regno, int ctxt)
+{
+
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readq(regno + (u64 __iomem *)(
+ (dd->ureg_align * ctxt) + (dd->userbase ?
+ (char __iomem *)dd->userbase :
+ (char __iomem *)dd->kregbase + dd->uregbase)));
+}
+
+/**
+ * qib_write_ureg - write virtualized per-context register
+ * @dd: device
+ * @regno: register number
+ * @value: value
+ * @ctxt: context
+ *
+ * Write the contents of a register that is virtualized to be per context.
+ */
+static inline void qib_write_ureg(const struct qib_devdata *dd,
+ enum qib_ureg regno, u64 value, int ctxt)
+{
+ u64 __iomem *ubase;
+ if (dd->userbase)
+ ubase = (u64 __iomem *)
+ ((char __iomem *) dd->userbase +
+ dd->ureg_align * ctxt);
+ else
+ ubase = (u64 __iomem *)
+ (dd->uregbase +
+ (char __iomem *) dd->kregbase +
+ dd->ureg_align * ctxt);
+
+ if (dd->kregbase && (dd->flags & QIB_PRESENT))
+ writeq(value, &ubase[regno]);
+}
+
+static inline u32 qib_read_kreg32(const struct qib_devdata *dd,
+ const u32 regno)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return -1;
+ return readl((u32 __iomem *) &dd->kregbase[regno]);
+}
+
+static inline u64 qib_read_kreg64(const struct qib_devdata *dd,
+ const u32 regno)
+{
+ if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
+ return -1;
+ return readq(&dd->kregbase[regno]);
+}
+
+static inline void qib_write_kreg(const struct qib_devdata *dd,
+ const u32 regno, u64 value)
+{
+ if (dd->kregbase && (dd->flags & QIB_PRESENT))
+ writeq(value, &dd->kregbase[regno]);
+}
+
+/*
+ * not many sanity checks for the port-specific kernel register routines,
+ * since they are only used when it's known to be safe.
+*/
+static inline u64 qib_read_kreg_port(const struct qib_pportdata *ppd,
+ const u16 regno)
+{
+ if (!ppd->cpspec->kpregbase || !(ppd->dd->flags & QIB_PRESENT))
+ return 0ULL;
+ return readq(&ppd->cpspec->kpregbase[regno]);
+}
+
+static inline void qib_write_kreg_port(const struct qib_pportdata *ppd,
+ const u16 regno, u64 value)
+{
+ if (ppd->cpspec && ppd->dd && ppd->cpspec->kpregbase &&
+ (ppd->dd->flags & QIB_PRESENT))
+ writeq(value, &ppd->cpspec->kpregbase[regno]);
+}
+
+/**
+ * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register
+ * @dd: the qlogic_ib device
+ * @regno: the register number to write
+ * @ctxt: the context containing the register
+ * @value: the value to write
+ */
+static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd,
+ const u16 regno, unsigned ctxt,
+ u64 value)
+{
+ qib_write_kreg(dd, regno + ctxt, value);
+}
+
+static inline u64 read_7322_creg(const struct qib_devdata *dd, u16 regno)
+{
+ if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readq(&dd->cspec->cregbase[regno]);
+
+
+}
+
+static inline u32 read_7322_creg32(const struct qib_devdata *dd, u16 regno)
+{
+ if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT))
+ return 0;
+ return readl(&dd->cspec->cregbase[regno]);
+
+
+}
+
+static inline void write_7322_creg_port(const struct qib_pportdata *ppd,
+ u16 regno, u64 value)
+{
+ if (ppd->cpspec && ppd->cpspec->cpregbase &&
+ (ppd->dd->flags & QIB_PRESENT))
+ writeq(value, &ppd->cpspec->cpregbase[regno]);
+}
+
+static inline u64 read_7322_creg_port(const struct qib_pportdata *ppd,
+ u16 regno)
+{
+ if (!ppd->cpspec || !ppd->cpspec->cpregbase ||
+ !(ppd->dd->flags & QIB_PRESENT))
+ return 0;
+ return readq(&ppd->cpspec->cpregbase[regno]);
+}
+
+static inline u32 read_7322_creg32_port(const struct qib_pportdata *ppd,
+ u16 regno)
+{
+ if (!ppd->cpspec || !ppd->cpspec->cpregbase ||
+ !(ppd->dd->flags & QIB_PRESENT))
+ return 0;
+ return readl(&ppd->cpspec->cpregbase[regno]);
+}
+
+/* bits in Control register */
+#define QLOGIC_IB_C_RESET SYM_MASK(Control, SyncReset)
+#define QLOGIC_IB_C_SDMAFETCHPRIOEN SYM_MASK(Control, SDmaDescFetchPriorityEn)
+
+/* bits in general interrupt regs */
+#define QIB_I_RCVURG_LSB SYM_LSB(IntMask, RcvUrg0IntMask)
+#define QIB_I_RCVURG_RMASK MASK_ACROSS(0, 17)
+#define QIB_I_RCVURG_MASK (QIB_I_RCVURG_RMASK << QIB_I_RCVURG_LSB)
+#define QIB_I_RCVAVAIL_LSB SYM_LSB(IntMask, RcvAvail0IntMask)
+#define QIB_I_RCVAVAIL_RMASK MASK_ACROSS(0, 17)
+#define QIB_I_RCVAVAIL_MASK (QIB_I_RCVAVAIL_RMASK << QIB_I_RCVAVAIL_LSB)
+#define QIB_I_C_ERROR INT_MASK(Err)
+
+#define QIB_I_SPIOSENT (INT_MASK_P(SendDone, 0) | INT_MASK_P(SendDone, 1))
+#define QIB_I_SPIOBUFAVAIL INT_MASK(SendBufAvail)
+#define QIB_I_GPIO INT_MASK(AssertGPIO)
+#define QIB_I_P_SDMAINT(pidx) \
+ (INT_MASK_P(SDma, pidx) | INT_MASK_P(SDmaIdle, pidx) | \
+ INT_MASK_P(SDmaProgress, pidx) | \
+ INT_MASK_PM(SDmaCleanupDone, pidx))
+
+/* Interrupt bits that are "per port" */
+#define QIB_I_P_BITSEXTANT(pidx) \
+ (INT_MASK_P(Err, pidx) | INT_MASK_P(SendDone, pidx) | \
+ INT_MASK_P(SDma, pidx) | INT_MASK_P(SDmaIdle, pidx) | \
+ INT_MASK_P(SDmaProgress, pidx) | \
+ INT_MASK_PM(SDmaCleanupDone, pidx))
+
+/* Interrupt bits that are common to a device */
+/* currently unused: QIB_I_SPIOSENT */
+#define QIB_I_C_BITSEXTANT \
+ (QIB_I_RCVURG_MASK | QIB_I_RCVAVAIL_MASK | \
+ QIB_I_SPIOSENT | \
+ QIB_I_C_ERROR | QIB_I_SPIOBUFAVAIL | QIB_I_GPIO)
+
+#define QIB_I_BITSEXTANT (QIB_I_C_BITSEXTANT | \
+ QIB_I_P_BITSEXTANT(0) | QIB_I_P_BITSEXTANT(1))
+
+/*
+ * Error bits that are "per port".
+ */
+#define QIB_E_P_IBSTATUSCHANGED ERR_MASK_N(IBStatusChanged)
+#define QIB_E_P_SHDR ERR_MASK_N(SHeadersErr)
+#define QIB_E_P_VL15_BUF_MISUSE ERR_MASK_N(VL15BufMisuseErr)
+#define QIB_E_P_SND_BUF_MISUSE ERR_MASK_N(SendBufMisuseErr)
+#define QIB_E_P_SUNSUPVL ERR_MASK_N(SendUnsupportedVLErr)
+#define QIB_E_P_SUNEXP_PKTNUM ERR_MASK_N(SendUnexpectedPktNumErr)
+#define QIB_E_P_SDROP_DATA ERR_MASK_N(SendDroppedDataPktErr)
+#define QIB_E_P_SDROP_SMP ERR_MASK_N(SendDroppedSmpPktErr)
+#define QIB_E_P_SPKTLEN ERR_MASK_N(SendPktLenErr)
+#define QIB_E_P_SUNDERRUN ERR_MASK_N(SendUnderRunErr)
+#define QIB_E_P_SMAXPKTLEN ERR_MASK_N(SendMaxPktLenErr)
+#define QIB_E_P_SMINPKTLEN ERR_MASK_N(SendMinPktLenErr)
+#define QIB_E_P_RIBLOSTLINK ERR_MASK_N(RcvIBLostLinkErr)
+#define QIB_E_P_RHDR ERR_MASK_N(RcvHdrErr)
+#define QIB_E_P_RHDRLEN ERR_MASK_N(RcvHdrLenErr)
+#define QIB_E_P_RBADTID ERR_MASK_N(RcvBadTidErr)
+#define QIB_E_P_RBADVERSION ERR_MASK_N(RcvBadVersionErr)
+#define QIB_E_P_RIBFLOW ERR_MASK_N(RcvIBFlowErr)
+#define QIB_E_P_REBP ERR_MASK_N(RcvEBPErr)
+#define QIB_E_P_RUNSUPVL ERR_MASK_N(RcvUnsupportedVLErr)
+#define QIB_E_P_RUNEXPCHAR ERR_MASK_N(RcvUnexpectedCharErr)
+#define QIB_E_P_RSHORTPKTLEN ERR_MASK_N(RcvShortPktLenErr)
+#define QIB_E_P_RLONGPKTLEN ERR_MASK_N(RcvLongPktLenErr)
+#define QIB_E_P_RMAXPKTLEN ERR_MASK_N(RcvMaxPktLenErr)
+#define QIB_E_P_RMINPKTLEN ERR_MASK_N(RcvMinPktLenErr)
+#define QIB_E_P_RICRC ERR_MASK_N(RcvICRCErr)
+#define QIB_E_P_RVCRC ERR_MASK_N(RcvVCRCErr)
+#define QIB_E_P_RFORMATERR ERR_MASK_N(RcvFormatErr)
+
+#define QIB_E_P_SDMA1STDESC ERR_MASK_N(SDma1stDescErr)
+#define QIB_E_P_SDMABASE ERR_MASK_N(SDmaBaseErr)
+#define QIB_E_P_SDMADESCADDRMISALIGN ERR_MASK_N(SDmaDescAddrMisalignErr)
+#define QIB_E_P_SDMADWEN ERR_MASK_N(SDmaDwEnErr)
+#define QIB_E_P_SDMAGENMISMATCH ERR_MASK_N(SDmaGenMismatchErr)
+#define QIB_E_P_SDMAHALT ERR_MASK_N(SDmaHaltErr)
+#define QIB_E_P_SDMAMISSINGDW ERR_MASK_N(SDmaMissingDwErr)
+#define QIB_E_P_SDMAOUTOFBOUND ERR_MASK_N(SDmaOutOfBoundErr)
+#define QIB_E_P_SDMARPYTAG ERR_MASK_N(SDmaRpyTagErr)
+#define QIB_E_P_SDMATAILOUTOFBOUND ERR_MASK_N(SDmaTailOutOfBoundErr)
+#define QIB_E_P_SDMAUNEXPDATA ERR_MASK_N(SDmaUnexpDataErr)
+
+/* Error bits that are common to a device */
+#define QIB_E_RESET ERR_MASK(ResetNegated)
+#define QIB_E_HARDWARE ERR_MASK(HardwareErr)
+#define QIB_E_INVALIDADDR ERR_MASK(InvalidAddrErr)
+
+
+/*
+ * Per chip (rather than per-port) errors. Most either do
+ * nothing but trigger a print (because they self-recover, or
+ * always occur in tandem with other errors that handle the
+ * issue), or because they indicate errors with no recovery,
+ * but we want to know that they happened.
+ */
+#define QIB_E_SBUF_VL15_MISUSE ERR_MASK(SBufVL15MisUseErr)
+#define QIB_E_BADEEP ERR_MASK(InvalidEEPCmd)
+#define QIB_E_VLMISMATCH ERR_MASK(SendVLMismatchErr)
+#define QIB_E_ARMLAUNCH ERR_MASK(SendArmLaunchErr)
+#define QIB_E_SPCLTRIG ERR_MASK(SendSpecialTriggerErr)
+#define QIB_E_RRCVHDRFULL ERR_MASK(RcvHdrFullErr)
+#define QIB_E_RRCVEGRFULL ERR_MASK(RcvEgrFullErr)
+#define QIB_E_RCVCTXTSHARE ERR_MASK(RcvContextShareErr)
+
+/* SDMA chip errors (not per port)
+ * QIB_E_SDMA_BUF_DUP needs no special handling, because we will also get
+ * the SDMAHALT error immediately, so we just print the dup error via the
+ * E_AUTO mechanism. This is true of most of the per-port fatal errors
+ * as well, but since this is port-independent, by definition, it's
+ * handled a bit differently. SDMA_VL15 and SDMA_WRONG_PORT are per
+ * packet send errors, and so are handled in the same manner as other
+ * per-packet errors.
+ */
+#define QIB_E_SDMA_VL15 ERR_MASK(SDmaVL15Err)
+#define QIB_E_SDMA_WRONG_PORT ERR_MASK(SDmaWrongPortErr)
+#define QIB_E_SDMA_BUF_DUP ERR_MASK(SDmaBufMaskDuplicateErr)
+
+/*
+ * Below functionally equivalent to legacy QLOGIC_IB_E_PKTERRS
+ * it is used to print "common" packet errors.
+ */
+#define QIB_E_P_PKTERRS (QIB_E_P_SPKTLEN |\
+ QIB_E_P_SDROP_DATA | QIB_E_P_RVCRC |\
+ QIB_E_P_RICRC | QIB_E_P_RSHORTPKTLEN |\
+ QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SHDR | \
+ QIB_E_P_REBP)
+
+/* Error Bits that Packet-related (Receive, per-port) */
+#define QIB_E_P_RPKTERRS (\
+ QIB_E_P_RHDRLEN | QIB_E_P_RBADTID | \
+ QIB_E_P_RBADVERSION | QIB_E_P_RHDR | \
+ QIB_E_P_RLONGPKTLEN | QIB_E_P_RSHORTPKTLEN |\
+ QIB_E_P_RMAXPKTLEN | QIB_E_P_RMINPKTLEN | \
+ QIB_E_P_RFORMATERR | QIB_E_P_RUNSUPVL | \
+ QIB_E_P_RUNEXPCHAR | QIB_E_P_RIBFLOW | QIB_E_P_REBP)
+
+/*
+ * Error bits that are Send-related (per port)
+ * (ARMLAUNCH excluded from E_SPKTERRS because it gets special handling).
+ * All of these potentially need to have a buffer disarmed
+ */
+#define QIB_E_P_SPKTERRS (\
+ QIB_E_P_SUNEXP_PKTNUM |\
+ QIB_E_P_SDROP_DATA | QIB_E_P_SDROP_SMP |\
+ QIB_E_P_SMAXPKTLEN |\
+ QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SHDR | \
+ QIB_E_P_SMINPKTLEN | QIB_E_P_SPKTLEN | \
+ QIB_E_P_SND_BUF_MISUSE | QIB_E_P_SUNSUPVL)
+
+#define QIB_E_SPKTERRS ( \
+ QIB_E_SBUF_VL15_MISUSE | QIB_E_VLMISMATCH | \
+ ERR_MASK_N(SendUnsupportedVLErr) | \
+ QIB_E_SPCLTRIG | QIB_E_SDMA_VL15 | QIB_E_SDMA_WRONG_PORT)
+
+#define QIB_E_P_SDMAERRS ( \
+ QIB_E_P_SDMAHALT | \
+ QIB_E_P_SDMADESCADDRMISALIGN | \
+ QIB_E_P_SDMAUNEXPDATA | \
+ QIB_E_P_SDMAMISSINGDW | \
+ QIB_E_P_SDMADWEN | \
+ QIB_E_P_SDMARPYTAG | \
+ QIB_E_P_SDMA1STDESC | \
+ QIB_E_P_SDMABASE | \
+ QIB_E_P_SDMATAILOUTOFBOUND | \
+ QIB_E_P_SDMAOUTOFBOUND | \
+ QIB_E_P_SDMAGENMISMATCH)
+
+/*
+ * This sets some bits more than once, but makes it more obvious which
+ * bits are not handled under other categories, and the repeat definition
+ * is not a problem.
+ */
+#define QIB_E_P_BITSEXTANT ( \
+ QIB_E_P_SPKTERRS | QIB_E_P_PKTERRS | QIB_E_P_RPKTERRS | \
+ QIB_E_P_RIBLOSTLINK | QIB_E_P_IBSTATUSCHANGED | \
+ QIB_E_P_SND_BUF_MISUSE | QIB_E_P_SUNDERRUN | \
+ QIB_E_P_SHDR | QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SDMAERRS \
+ )
+
+/*
+ * These are errors that can occur when the link
+ * changes state while a packet is being sent or received. This doesn't
+ * cover things like EBP or VCRC that can be the result of a sending
+ * having the link change state, so we receive a "known bad" packet.
+ * All of these are "per port", so renamed:
+ */
+#define QIB_E_P_LINK_PKTERRS (\
+ QIB_E_P_SDROP_DATA | QIB_E_P_SDROP_SMP |\
+ QIB_E_P_SMINPKTLEN | QIB_E_P_SPKTLEN |\
+ QIB_E_P_RSHORTPKTLEN | QIB_E_P_RMINPKTLEN |\
+ QIB_E_P_RUNEXPCHAR)
+
+/*
+ * This sets some bits more than once, but makes it more obvious which
+ * bits are not handled under other categories (such as QIB_E_SPKTERRS),
+ * and the repeat definition is not a problem.
+ */
+#define QIB_E_C_BITSEXTANT (\
+ QIB_E_HARDWARE | QIB_E_INVALIDADDR | QIB_E_BADEEP |\
+ QIB_E_ARMLAUNCH | QIB_E_VLMISMATCH | QIB_E_RRCVHDRFULL |\
+ QIB_E_RRCVEGRFULL | QIB_E_RESET | QIB_E_SBUF_VL15_MISUSE)
+
+/* Likewise Neuter E_SPKT_ERRS_IGNORE */
+#define E_SPKT_ERRS_IGNORE 0
+
+#define QIB_EXTS_MEMBIST_DISABLED \
+ SYM_MASK(EXTStatus, MemBISTDisabled)
+#define QIB_EXTS_MEMBIST_ENDTEST \
+ SYM_MASK(EXTStatus, MemBISTEndTest)
+
+#define QIB_E_SPIOARMLAUNCH \
+ ERR_MASK(SendArmLaunchErr)
+
+#define IBA7322_IBCC_LINKINITCMD_MASK SYM_RMASK(IBCCtrlA_0, LinkInitCmd)
+#define IBA7322_IBCC_LINKCMD_SHIFT SYM_LSB(IBCCtrlA_0, LinkCmd)
+
+/*
+ * IBTA_1_2 is set when multiple speeds are enabled (normal),
+ * and also if forced QDR (only QDR enabled). It's enabled for the
+ * forced QDR case so that scrambling will be enabled by the TS3
+ * exchange, when supported by both sides of the link.
+ */
+#define IBA7322_IBC_IBTA_1_2_MASK SYM_MASK(IBCCtrlB_0, IB_ENHANCED_MODE)
+#define IBA7322_IBC_MAX_SPEED_MASK SYM_MASK(IBCCtrlB_0, SD_SPEED)
+#define IBA7322_IBC_SPEED_QDR SYM_MASK(IBCCtrlB_0, SD_SPEED_QDR)
+#define IBA7322_IBC_SPEED_DDR SYM_MASK(IBCCtrlB_0, SD_SPEED_DDR)
+#define IBA7322_IBC_SPEED_SDR SYM_MASK(IBCCtrlB_0, SD_SPEED_SDR)
+#define IBA7322_IBC_SPEED_MASK (SYM_MASK(IBCCtrlB_0, SD_SPEED_SDR) | \
+ SYM_MASK(IBCCtrlB_0, SD_SPEED_DDR) | SYM_MASK(IBCCtrlB_0, SD_SPEED_QDR))
+#define IBA7322_IBC_SPEED_LSB SYM_LSB(IBCCtrlB_0, SD_SPEED_SDR)
+
+#define IBA7322_LEDBLINK_OFF_SHIFT SYM_LSB(RcvPktLEDCnt_0, OFFperiod)
+#define IBA7322_LEDBLINK_ON_SHIFT SYM_LSB(RcvPktLEDCnt_0, ONperiod)
+
+#define IBA7322_IBC_WIDTH_AUTONEG SYM_MASK(IBCCtrlB_0, IB_NUM_CHANNELS)
+#define IBA7322_IBC_WIDTH_4X_ONLY (1<<SYM_LSB(IBCCtrlB_0, IB_NUM_CHANNELS))
+#define IBA7322_IBC_WIDTH_1X_ONLY (0<<SYM_LSB(IBCCtrlB_0, IB_NUM_CHANNELS))
+
+#define IBA7322_IBC_RXPOL_MASK SYM_MASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP)
+#define IBA7322_IBC_RXPOL_LSB SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP)
+#define IBA7322_IBC_HRTBT_MASK (SYM_MASK(IBCCtrlB_0, HRTBT_AUTO) | \
+ SYM_MASK(IBCCtrlB_0, HRTBT_ENB))
+#define IBA7322_IBC_HRTBT_RMASK (IBA7322_IBC_HRTBT_MASK >> \
+ SYM_LSB(IBCCtrlB_0, HRTBT_ENB))
+#define IBA7322_IBC_HRTBT_LSB SYM_LSB(IBCCtrlB_0, HRTBT_ENB)
+
+#define IBA7322_REDIRECT_VEC_PER_REG 12
+
+#define IBA7322_SENDCHK_PKEY SYM_MASK(SendCheckControl_0, PKey_En)
+#define IBA7322_SENDCHK_BTHQP SYM_MASK(SendCheckControl_0, BTHQP_En)
+#define IBA7322_SENDCHK_SLID SYM_MASK(SendCheckControl_0, SLID_En)
+#define IBA7322_SENDCHK_RAW_IPV6 SYM_MASK(SendCheckControl_0, RawIPV6_En)
+#define IBA7322_SENDCHK_MINSZ SYM_MASK(SendCheckControl_0, PacketTooSmall_En)
+
+#define AUTONEG_TRIES 3 /* sequential retries to negotiate DDR */
+
+#define HWE_AUTO(fldname) { .mask = SYM_MASK(HwErrMask, fldname##Mask), \
+ .msg = #fldname , .sz = sizeof(#fldname) }
+#define HWE_AUTO_P(fldname, port) { .mask = SYM_MASK(HwErrMask, \
+ fldname##Mask##_##port), .msg = #fldname , .sz = sizeof(#fldname) }
+static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = {
+ HWE_AUTO_P(IBSerdesPClkNotDetect, 1),
+ HWE_AUTO_P(IBSerdesPClkNotDetect, 0),
+ HWE_AUTO(PCIESerdesPClkNotDetect),
+ HWE_AUTO(PowerOnBISTFailed),
+ HWE_AUTO(TempsenseTholdReached),
+ HWE_AUTO(MemoryErr),
+ HWE_AUTO(PCIeBusParityErr),
+ HWE_AUTO(PcieCplTimeout),
+ HWE_AUTO(PciePoisonedTLP),
+ HWE_AUTO_P(SDmaMemReadErr, 1),
+ HWE_AUTO_P(SDmaMemReadErr, 0),
+ HWE_AUTO_P(IBCBusFromSPCParityErr, 1),
+ HWE_AUTO_P(IBCBusToSPCParityErr, 1),
+ HWE_AUTO_P(IBCBusFromSPCParityErr, 0),
+ HWE_AUTO(statusValidNoEop),
+ HWE_AUTO(LATriggered),
+ { .mask = 0, .sz = 0 }
+};
+
+#define E_AUTO(fldname) { .mask = SYM_MASK(ErrMask, fldname##Mask), \
+ .msg = #fldname, .sz = sizeof(#fldname) }
+#define E_P_AUTO(fldname) { .mask = SYM_MASK(ErrMask_0, fldname##Mask), \
+ .msg = #fldname, .sz = sizeof(#fldname) }
+static const struct qib_hwerror_msgs qib_7322error_msgs[] = {
+ E_AUTO(RcvEgrFullErr),
+ E_AUTO(RcvHdrFullErr),
+ E_AUTO(ResetNegated),
+ E_AUTO(HardwareErr),
+ E_AUTO(InvalidAddrErr),
+ E_AUTO(SDmaVL15Err),
+ E_AUTO(SBufVL15MisUseErr),
+ E_AUTO(InvalidEEPCmd),
+ E_AUTO(RcvContextShareErr),
+ E_AUTO(SendVLMismatchErr),
+ E_AUTO(SendArmLaunchErr),
+ E_AUTO(SendSpecialTriggerErr),
+ E_AUTO(SDmaWrongPortErr),
+ E_AUTO(SDmaBufMaskDuplicateErr),
+ { .mask = 0, .sz = 0 }
+};
+
+static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = {
+ E_P_AUTO(IBStatusChanged),
+ E_P_AUTO(SHeadersErr),
+ E_P_AUTO(VL15BufMisuseErr),
+ /*
+ * SDmaHaltErr is not really an error, make it clearer;
+ */
+ {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted",
+ .sz = 11},
+ E_P_AUTO(SDmaDescAddrMisalignErr),
+ E_P_AUTO(SDmaUnexpDataErr),
+ E_P_AUTO(SDmaMissingDwErr),
+ E_P_AUTO(SDmaDwEnErr),
+ E_P_AUTO(SDmaRpyTagErr),
+ E_P_AUTO(SDma1stDescErr),
+ E_P_AUTO(SDmaBaseErr),
+ E_P_AUTO(SDmaTailOutOfBoundErr),
+ E_P_AUTO(SDmaOutOfBoundErr),
+ E_P_AUTO(SDmaGenMismatchErr),
+ E_P_AUTO(SendBufMisuseErr),
+ E_P_AUTO(SendUnsupportedVLErr),
+ E_P_AUTO(SendUnexpectedPktNumErr),
+ E_P_AUTO(SendDroppedDataPktErr),
+ E_P_AUTO(SendDroppedSmpPktErr),
+ E_P_AUTO(SendPktLenErr),
+ E_P_AUTO(SendUnderRunErr),
+ E_P_AUTO(SendMaxPktLenErr),
+ E_P_AUTO(SendMinPktLenErr),
+ E_P_AUTO(RcvIBLostLinkErr),
+ E_P_AUTO(RcvHdrErr),
+ E_P_AUTO(RcvHdrLenErr),
+ E_P_AUTO(RcvBadTidErr),
+ E_P_AUTO(RcvBadVersionErr),
+ E_P_AUTO(RcvIBFlowErr),
+ E_P_AUTO(RcvEBPErr),
+ E_P_AUTO(RcvUnsupportedVLErr),
+ E_P_AUTO(RcvUnexpectedCharErr),
+ E_P_AUTO(RcvShortPktLenErr),
+ E_P_AUTO(RcvLongPktLenErr),
+ E_P_AUTO(RcvMaxPktLenErr),
+ E_P_AUTO(RcvMinPktLenErr),
+ E_P_AUTO(RcvICRCErr),
+ E_P_AUTO(RcvVCRCErr),
+ E_P_AUTO(RcvFormatErr),
+ { .mask = 0, .sz = 0 }
+};
+
+/*
+ * Below generates "auto-message" for interrupts not specific to any port or
+ * context
+ */
+#define INTR_AUTO(fldname) { .mask = SYM_MASK(IntMask, fldname##Mask), \
+ .msg = #fldname, .sz = sizeof(#fldname) }
+/* Below generates "auto-message" for interrupts specific to a port */
+#define INTR_AUTO_P(fldname) { .mask = MASK_ACROSS(\
+ SYM_LSB(IntMask, fldname##Mask##_0), \
+ SYM_LSB(IntMask, fldname##Mask##_1)), \
+ .msg = #fldname "_P", .sz = sizeof(#fldname "_P") }
+/* For some reason, the SerDesTrimDone bits are reversed */
+#define INTR_AUTO_PI(fldname) { .mask = MASK_ACROSS(\
+ SYM_LSB(IntMask, fldname##Mask##_1), \
+ SYM_LSB(IntMask, fldname##Mask##_0)), \
+ .msg = #fldname "_P", .sz = sizeof(#fldname "_P") }
+/*
+ * Below generates "auto-message" for interrupts specific to a context,
+ * with ctxt-number appended
+ */
+#define INTR_AUTO_C(fldname) { .mask = MASK_ACROSS(\
+ SYM_LSB(IntMask, fldname##0IntMask), \
+ SYM_LSB(IntMask, fldname##17IntMask)), \
+ .msg = #fldname "_C", .sz = sizeof(#fldname "_C") }
+
+static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = {
+ INTR_AUTO_P(SDmaInt),
+ INTR_AUTO_P(SDmaProgressInt),
+ INTR_AUTO_P(SDmaIdleInt),
+ INTR_AUTO_P(SDmaCleanupDone),
+ INTR_AUTO_C(RcvUrg),
+ INTR_AUTO_P(ErrInt),
+ INTR_AUTO(ErrInt), /* non-port-specific errs */
+ INTR_AUTO(AssertGPIOInt),
+ INTR_AUTO_P(SendDoneInt),
+ INTR_AUTO(SendBufAvailInt),
+ INTR_AUTO_C(RcvAvail),
+ { .mask = 0, .sz = 0 }
+};
+
+#define TXSYMPTOM_AUTO_P(fldname) \
+ { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \
+ .msg = #fldname, .sz = sizeof(#fldname) }
+static const struct qib_hwerror_msgs hdrchk_msgs[] = {
+ TXSYMPTOM_AUTO_P(NonKeyPacket),
+ TXSYMPTOM_AUTO_P(GRHFail),
+ TXSYMPTOM_AUTO_P(PkeyFail),
+ TXSYMPTOM_AUTO_P(QPFail),
+ TXSYMPTOM_AUTO_P(SLIDFail),
+ TXSYMPTOM_AUTO_P(RawIPV6),
+ TXSYMPTOM_AUTO_P(PacketTooSmall),
+ { .mask = 0, .sz = 0 }
+};
+
+#define IBA7322_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */
+
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used,
+ * because we don't need to force the update of pioavail
+ */
+static void qib_disarm_7322_senderrbufs(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u32 i;
+ int any;
+ u32 piobcnt = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS;
+ u32 regcnt = (piobcnt + BITS_PER_LONG - 1) / BITS_PER_LONG;
+ unsigned long sbuf[4];
+
+ /*
+ * It's possible that sendbuffererror could have bits set; might
+ * have already done this as a result of hardware error handling.
+ */
+ any = 0;
+ for (i = 0; i < regcnt; ++i) {
+ sbuf[i] = qib_read_kreg64(dd, kr_sendbuffererror + i);
+ if (sbuf[i]) {
+ any = 1;
+ qib_write_kreg(dd, kr_sendbuffererror + i, sbuf[i]);
+ }
+ }
+
+ if (any)
+ qib_disarm_piobufs_set(dd, sbuf, piobcnt);
+}
+
+/* No txe_recover yet, if ever */
+
+/* No decode__errors yet */
+static void err_decode(char *msg, size_t len, u64 errs,
+ const struct qib_hwerror_msgs *msp)
+{
+ u64 these, lmask;
+ int took, multi, n = 0;
+
+ while (errs && msp && msp->mask) {
+ multi = (msp->mask & (msp->mask - 1));
+ while (errs & msp->mask) {
+ these = (errs & msp->mask);
+ lmask = (these & (these - 1)) ^ these;
+ if (len) {
+ if (n++) {
+ /* separate the strings */
+ *msg++ = ',';
+ len--;
+ }
+ BUG_ON(!msp->sz);
+ /* msp->sz counts the nul */
+ took = min_t(size_t, msp->sz - (size_t)1, len);
+ memcpy(msg, msp->msg, took);
+ len -= took;
+ msg += took;
+ if (len)
+ *msg = '\0';
+ }
+ errs &= ~lmask;
+ if (len && multi) {
+ /* More than one bit this mask */
+ int idx = -1;
+
+ while (lmask & msp->mask) {
+ ++idx;
+ lmask >>= 1;
+ }
+ took = scnprintf(msg, len, "_%d", idx);
+ len -= took;
+ msg += took;
+ }
+ }
+ ++msp;
+ }
+ /* If some bits are left, show in hex. */
+ if (len && errs)
+ snprintf(msg, len, "%sMORE:%llX", n ? "," : "",
+ (unsigned long long) errs);
+}
+
+/* only called if r1 set */
+static void flush_fifo(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u32 __iomem *piobuf;
+ u32 bufn;
+ u32 *hdr;
+ u64 pbc;
+ const unsigned hdrwords = 7;
+ static struct qib_ib_header ibhdr = {
+ .lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH),
+ .lrh[1] = IB_LID_PERMISSIVE,
+ .lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC),
+ .lrh[3] = IB_LID_PERMISSIVE,
+ .u.oth.bth[0] = cpu_to_be32(
+ (IB_OPCODE_UD_SEND_ONLY << 24) | QIB_DEFAULT_P_KEY),
+ .u.oth.bth[1] = cpu_to_be32(0),
+ .u.oth.bth[2] = cpu_to_be32(0),
+ .u.oth.u.ud.deth[0] = cpu_to_be32(0),
+ .u.oth.u.ud.deth[1] = cpu_to_be32(0),
+ };
+
+ /*
+ * Send a dummy VL15 packet to flush the launch FIFO.
+ * This will not actually be sent since the TxeBypassIbc bit is set.
+ */
+ pbc = PBC_7322_VL15_SEND |
+ (((u64)ppd->hw_pidx) << (PBC_PORT_SEL_LSB + 32)) |
+ (hdrwords + SIZE_OF_CRC);
+ piobuf = qib_7322_getsendbuf(ppd, pbc, &bufn);
+ if (!piobuf)
+ return;
+ writeq(pbc, piobuf);
+ hdr = (u32 *) &ibhdr;
+ if (dd->flags & QIB_PIO_FLUSH_WC) {
+ qib_flush_wc();
+ qib_pio_copy(piobuf + 2, hdr, hdrwords - 1);
+ qib_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords + 1);
+ qib_flush_wc();
+ } else
+ qib_pio_copy(piobuf + 2, hdr, hdrwords);
+ qib_sendbuf_done(dd, bufn);
+}
+
+/*
+ * This is called with interrupts disabled and sdma_lock held.
+ */
+static void qib_7322_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 set_sendctrl = 0;
+ u64 clr_sendctrl = 0;
+
+ if (op & QIB_SDMA_SENDCTRL_OP_ENABLE)
+ set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaEnable);
+ else
+ clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaEnable);
+
+ if (op & QIB_SDMA_SENDCTRL_OP_INTENABLE)
+ set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaIntEnable);
+ else
+ clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaIntEnable);
+
+ if (op & QIB_SDMA_SENDCTRL_OP_HALT)
+ set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaHalt);
+ else
+ clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaHalt);
+
+ if (op & QIB_SDMA_SENDCTRL_OP_DRAIN)
+ set_sendctrl |= SYM_MASK(SendCtrl_0, TxeBypassIbc) |
+ SYM_MASK(SendCtrl_0, TxeAbortIbc) |
+ SYM_MASK(SendCtrl_0, TxeDrainRmFifo);
+ else
+ clr_sendctrl |= SYM_MASK(SendCtrl_0, TxeBypassIbc) |
+ SYM_MASK(SendCtrl_0, TxeAbortIbc) |
+ SYM_MASK(SendCtrl_0, TxeDrainRmFifo);
+
+ spin_lock(&dd->sendctrl_lock);
+
+ /* If we are draining everything, block sends first */
+ if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) {
+ ppd->p_sendctrl &= ~SYM_MASK(SendCtrl_0, SendEnable);
+ qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ ppd->p_sendctrl |= set_sendctrl;
+ ppd->p_sendctrl &= ~clr_sendctrl;
+
+ if (op & QIB_SDMA_SENDCTRL_OP_CLEANUP)
+ qib_write_kreg_port(ppd, krp_sendctrl,
+ ppd->p_sendctrl |
+ SYM_MASK(SendCtrl_0, SDmaCleanup));
+ else
+ qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) {
+ ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, SendEnable);
+ qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ spin_unlock(&dd->sendctrl_lock);
+
+ if ((op & QIB_SDMA_SENDCTRL_OP_DRAIN) && ppd->dd->cspec->r1)
+ flush_fifo(ppd);
+}
+
+static void qib_7322_sdma_hw_clean_up(struct qib_pportdata *ppd)
+{
+ __qib_sdma_process_event(ppd, qib_sdma_event_e50_hw_cleaned);
+}
+
+static void qib_sdma_7322_setlengen(struct qib_pportdata *ppd)
+{
+ /*
+ * Set SendDmaLenGen and clear and set
+ * the MSB of the generation count to enable generation checking
+ * and load the internal generation counter.
+ */
+ qib_write_kreg_port(ppd, krp_senddmalengen, ppd->sdma_descq_cnt);
+ qib_write_kreg_port(ppd, krp_senddmalengen,
+ ppd->sdma_descq_cnt |
+ (1ULL << QIB_7322_SendDmaLenGen_0_Generation_MSB));
+}
+
+/*
+ * Must be called with sdma_lock held, or before init finished.
+ */
+static void qib_sdma_update_7322_tail(struct qib_pportdata *ppd, u16 tail)
+{
+ /* Commit writes to memory and advance the tail on the chip */
+ wmb();
+ ppd->sdma_descq_tail = tail;
+ qib_write_kreg_port(ppd, krp_senddmatail, tail);
+}
+
+/*
+ * This is called with interrupts disabled and sdma_lock held.
+ */
+static void qib_7322_sdma_hw_start_up(struct qib_pportdata *ppd)
+{
+ /*
+ * Drain all FIFOs.
+ * The hardware doesn't require this but we do it so that verbs
+ * and user applications don't wait for link active to send stale
+ * data.
+ */
+ sendctrl_7322_mod(ppd, QIB_SENDCTRL_FLUSH);
+
+ qib_sdma_7322_setlengen(ppd);
+ qib_sdma_update_7322_tail(ppd, 0); /* Set SendDmaTail */
+ ppd->sdma_head_dma[0] = 0;
+ qib_7322_sdma_sendctrl(ppd,
+ ppd->sdma_state.current_op | QIB_SDMA_SENDCTRL_OP_CLEANUP);
+}
+
+#define DISABLES_SDMA ( \
+ QIB_E_P_SDMAHALT | \
+ QIB_E_P_SDMADESCADDRMISALIGN | \
+ QIB_E_P_SDMAMISSINGDW | \
+ QIB_E_P_SDMADWEN | \
+ QIB_E_P_SDMARPYTAG | \
+ QIB_E_P_SDMA1STDESC | \
+ QIB_E_P_SDMABASE | \
+ QIB_E_P_SDMATAILOUTOFBOUND | \
+ QIB_E_P_SDMAOUTOFBOUND | \
+ QIB_E_P_SDMAGENMISMATCH)
+
+static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
+{
+ unsigned long flags;
+ struct qib_devdata *dd = ppd->dd;
+
+ errs &= QIB_E_P_SDMAERRS;
+ err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf),
+ errs, qib_7322p_error_msgs);
+
+ if (errs & QIB_E_P_SDMAUNEXPDATA)
+ qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit,
+ ppd->port);
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ if (errs != QIB_E_P_SDMAHALT) {
+ /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */
+ qib_dev_porterr(dd, ppd->port,
+ "SDMA %s 0x%016llx %s\n",
+ qib_sdma_state_names[ppd->sdma_state.current_state],
+ errs, ppd->cpspec->sdmamsgbuf);
+ dump_sdma_7322_state(ppd);
+ }
+
+ switch (ppd->sdma_state.current_state) {
+ case qib_sdma_state_s00_hw_down:
+ break;
+
+ case qib_sdma_state_s10_hw_start_up_wait:
+ if (errs & QIB_E_P_SDMAHALT)
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e20_hw_started);
+ break;
+
+ case qib_sdma_state_s20_idle:
+ break;
+
+ case qib_sdma_state_s30_sw_clean_up_wait:
+ break;
+
+ case qib_sdma_state_s40_hw_clean_up_wait:
+ if (errs & QIB_E_P_SDMAHALT)
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e50_hw_cleaned);
+ break;
+
+ case qib_sdma_state_s50_hw_halt_wait:
+ if (errs & QIB_E_P_SDMAHALT)
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e60_hw_halted);
+ break;
+
+ case qib_sdma_state_s99_running:
+ __qib_sdma_process_event(ppd, qib_sdma_event_e7322_err_halted);
+ __qib_sdma_process_event(ppd, qib_sdma_event_e60_hw_halted);
+ break;
+ }
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+/*
+ * handle per-device errors (not per-port errors)
+ */
+static noinline void handle_7322_errors(struct qib_devdata *dd)
+{
+ char *msg;
+ u64 iserr = 0;
+ u64 errs;
+ u64 mask;
+ int log_idx;
+
+ qib_stats.sps_errints++;
+ errs = qib_read_kreg64(dd, kr_errstatus);
+ if (!errs) {
+ qib_devinfo(dd->pcidev,
+ "device error interrupt, but no error bits set!\n");
+ goto done;
+ }
+
+ /* don't report errors that are masked */
+ errs &= dd->cspec->errormask;
+ msg = dd->cspec->emsgbuf;
+
+ /* do these first, they are most important */
+ if (errs & QIB_E_HARDWARE) {
+ *msg = '\0';
+ qib_7322_handle_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf);
+ } else
+ for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx)
+ if (errs & dd->eep_st_masks[log_idx].errs_to_log)
+ qib_inc_eeprom_err(dd, log_idx, 1);
+
+ if (errs & QIB_E_SPKTERRS) {
+ qib_disarm_7322_senderrbufs(dd->pport);
+ qib_stats.sps_txerrs++;
+ } else if (errs & QIB_E_INVALIDADDR)
+ qib_stats.sps_txerrs++;
+ else if (errs & QIB_E_ARMLAUNCH) {
+ qib_stats.sps_txerrs++;
+ qib_disarm_7322_senderrbufs(dd->pport);
+ }
+ qib_write_kreg(dd, kr_errclear, errs);
+
+ /*
+ * The ones we mask off are handled specially below
+ * or above. Also mask SDMADISABLED by default as it
+ * is too chatty.
+ */
+ mask = QIB_E_HARDWARE;
+ *msg = '\0';
+
+ err_decode(msg, sizeof dd->cspec->emsgbuf, errs & ~mask,
+ qib_7322error_msgs);
+
+ /*
+ * Getting reset is a tragedy for all ports. Mark the device
+ * _and_ the ports as "offline" in way meaningful to each.
+ */
+ if (errs & QIB_E_RESET) {
+ int pidx;
+
+ qib_dev_err(dd,
+ "Got reset, requires re-init (unload and reload driver)\n");
+ dd->flags &= ~QIB_INITTED; /* needs re-init */
+ /* mark as having had error */
+ *dd->devstatusp |= QIB_STATUS_HWERROR;
+ for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ if (dd->pport[pidx].link_speed_supported)
+ *dd->pport[pidx].statusp &= ~QIB_STATUS_IB_CONF;
+ }
+
+ if (*msg && iserr)
+ qib_dev_err(dd, "%s error\n", msg);
+
+ /*
+ * If there were hdrq or egrfull errors, wake up any processes
+ * waiting in poll. We used to try to check which contexts had
+ * the overflow, but given the cost of that and the chip reads
+ * to support it, it's better to just wake everybody up if we
+ * get an overflow; waiters can poll again if it's not them.
+ */
+ if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) {
+ qib_handle_urcv(dd, ~0U);
+ if (errs & ERR_MASK(RcvEgrFullErr))
+ qib_stats.sps_buffull++;
+ else
+ qib_stats.sps_hdrfull++;
+ }
+
+done:
+ return;
+}
+
+static void qib_error_tasklet(unsigned long data)
+{
+ struct qib_devdata *dd = (struct qib_devdata *)data;
+
+ handle_7322_errors(dd);
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+}
+
+static void reenable_chase(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+
+ ppd->cpspec->chase_timer.expires = 0;
+ qib_set_ib_7322_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN,
+ QLOGIC_IB_IBCC_LINKINITCMD_POLL);
+}
+
+static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow,
+ u8 ibclt)
+{
+ ppd->cpspec->chase_end = 0;
+
+ if (!qib_chase)
+ return;
+
+ qib_set_ib_7322_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN,
+ QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+ ppd->cpspec->chase_timer.expires = jiffies + QIB_CHASE_DIS_TIME;
+ add_timer(&ppd->cpspec->chase_timer);
+}
+
+static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
+{
+ u8 ibclt;
+ unsigned long tnow;
+
+ ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState);
+
+ /*
+ * Detect and handle the state chase issue, where we can
+ * get stuck if we are unlucky on timing on both sides of
+ * the link. If we are, we disable, set a timer, and
+ * then re-enable.
+ */
+ switch (ibclt) {
+ case IB_7322_LT_STATE_CFGRCVFCFG:
+ case IB_7322_LT_STATE_CFGWAITRMT:
+ case IB_7322_LT_STATE_TXREVLANES:
+ case IB_7322_LT_STATE_CFGENH:
+ tnow = jiffies;
+ if (ppd->cpspec->chase_end &&
+ time_after(tnow, ppd->cpspec->chase_end))
+ disable_chase(ppd, tnow, ibclt);
+ else if (!ppd->cpspec->chase_end)
+ ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME;
+ break;
+ default:
+ ppd->cpspec->chase_end = 0;
+ break;
+ }
+
+ if (((ibclt >= IB_7322_LT_STATE_CFGTEST &&
+ ibclt <= IB_7322_LT_STATE_CFGWAITENH) ||
+ ibclt == IB_7322_LT_STATE_LINKUP) &&
+ (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) {
+ force_h1(ppd);
+ ppd->cpspec->qdr_reforce = 1;
+ if (!ppd->dd->cspec->r1)
+ serdes_7322_los_enable(ppd, 0);
+ } else if (ppd->cpspec->qdr_reforce &&
+ (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) &&
+ (ibclt == IB_7322_LT_STATE_CFGENH ||
+ ibclt == IB_7322_LT_STATE_CFGIDLE ||
+ ibclt == IB_7322_LT_STATE_LINKUP))
+ force_h1(ppd);
+
+ if ((IS_QMH(ppd->dd) || IS_QME(ppd->dd)) &&
+ ppd->link_speed_enabled == QIB_IB_QDR &&
+ (ibclt == IB_7322_LT_STATE_CFGTEST ||
+ ibclt == IB_7322_LT_STATE_CFGENH ||
+ (ibclt >= IB_7322_LT_STATE_POLLACTIVE &&
+ ibclt <= IB_7322_LT_STATE_SLEEPQUIET)))
+ adj_tx_serdes(ppd);
+
+ if (ibclt != IB_7322_LT_STATE_LINKUP) {
+ u8 ltstate = qib_7322_phys_portstate(ibcst);
+ u8 pibclt = (u8)SYM_FIELD(ppd->lastibcstat, IBCStatusA_0,
+ LinkTrainingState);
+ if (!ppd->dd->cspec->r1 &&
+ pibclt == IB_7322_LT_STATE_LINKUP &&
+ ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE)
+ /* If the link went down (but no into recovery,
+ * turn LOS back on */
+ serdes_7322_los_enable(ppd, 1);
+ if (!ppd->cpspec->qdr_dfe_on &&
+ ibclt <= IB_7322_LT_STATE_SLEEPQUIET) {
+ ppd->cpspec->qdr_dfe_on = 1;
+ ppd->cpspec->qdr_dfe_time = 0;
+ /* On link down, reenable QDR adaptation */
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_DOWN_R1 :
+ QDR_STATIC_ADAPT_DOWN);
+ pr_info(
+ "IB%u:%u re-enabled QDR adaptation ibclt %x\n",
+ ppd->dd->unit, ppd->port, ibclt);
+ }
+ }
+}
+
+static int qib_7322_set_ib_cfg(struct qib_pportdata *, int, u32);
+
+/*
+ * This is per-pport error handling.
+ * will likely get it's own MSIx interrupt (one for each port,
+ * although just a single handler).
+ */
+static noinline void handle_7322_p_errors(struct qib_pportdata *ppd)
+{
+ char *msg;
+ u64 ignore_this_time = 0, iserr = 0, errs, fmask;
+ struct qib_devdata *dd = ppd->dd;
+
+ /* do this as soon as possible */
+ fmask = qib_read_kreg64(dd, kr_act_fmask);
+ if (!fmask)
+ check_7322_rxe_status(ppd);
+
+ errs = qib_read_kreg_port(ppd, krp_errstatus);
+ if (!errs)
+ qib_devinfo(dd->pcidev,
+ "Port%d error interrupt, but no error bits set!\n",
+ ppd->port);
+ if (!fmask)
+ errs &= ~QIB_E_P_IBSTATUSCHANGED;
+ if (!errs)
+ goto done;
+
+ msg = ppd->cpspec->epmsgbuf;
+ *msg = '\0';
+
+ if (errs & ~QIB_E_P_BITSEXTANT) {
+ err_decode(msg, sizeof ppd->cpspec->epmsgbuf,
+ errs & ~QIB_E_P_BITSEXTANT, qib_7322p_error_msgs);
+ if (!*msg)
+ snprintf(msg, sizeof ppd->cpspec->epmsgbuf,
+ "no others");
+ qib_dev_porterr(dd, ppd->port,
+ "error interrupt with unknown errors 0x%016Lx set (and %s)\n",
+ (errs & ~QIB_E_P_BITSEXTANT), msg);
+ *msg = '\0';
+ }
+
+ if (errs & QIB_E_P_SHDR) {
+ u64 symptom;
+
+ /* determine cause, then write to clear */
+ symptom = qib_read_kreg_port(ppd, krp_sendhdrsymptom);
+ qib_write_kreg_port(ppd, krp_sendhdrsymptom, 0);
+ err_decode(msg, sizeof ppd->cpspec->epmsgbuf, symptom,
+ hdrchk_msgs);
+ *msg = '\0';
+ /* senderrbuf cleared in SPKTERRS below */
+ }
+
+ if (errs & QIB_E_P_SPKTERRS) {
+ if ((errs & QIB_E_P_LINK_PKTERRS) &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
+ /*
+ * This can happen when trying to bring the link
+ * up, but the IB link changes state at the "wrong"
+ * time. The IB logic then complains that the packet
+ * isn't valid. We don't want to confuse people, so
+ * we just don't print them, except at debug
+ */
+ err_decode(msg, sizeof ppd->cpspec->epmsgbuf,
+ (errs & QIB_E_P_LINK_PKTERRS),
+ qib_7322p_error_msgs);
+ *msg = '\0';
+ ignore_this_time = errs & QIB_E_P_LINK_PKTERRS;
+ }
+ qib_disarm_7322_senderrbufs(ppd);
+ } else if ((errs & QIB_E_P_LINK_PKTERRS) &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
+ /*
+ * This can happen when SMA is trying to bring the link
+ * up, but the IB link changes state at the "wrong" time.
+ * The IB logic then complains that the packet isn't
+ * valid. We don't want to confuse people, so we just
+ * don't print them, except at debug
+ */
+ err_decode(msg, sizeof ppd->cpspec->epmsgbuf, errs,
+ qib_7322p_error_msgs);
+ ignore_this_time = errs & QIB_E_P_LINK_PKTERRS;
+ *msg = '\0';
+ }
+
+ qib_write_kreg_port(ppd, krp_errclear, errs);
+
+ errs &= ~ignore_this_time;
+ if (!errs)
+ goto done;
+
+ if (errs & QIB_E_P_RPKTERRS)
+ qib_stats.sps_rcverrs++;
+ if (errs & QIB_E_P_SPKTERRS)
+ qib_stats.sps_txerrs++;
+
+ iserr = errs & ~(QIB_E_P_RPKTERRS | QIB_E_P_PKTERRS);
+
+ if (errs & QIB_E_P_SDMAERRS)
+ sdma_7322_p_errors(ppd, errs);
+
+ if (errs & QIB_E_P_IBSTATUSCHANGED) {
+ u64 ibcs;
+ u8 ltstate;
+
+ ibcs = qib_read_kreg_port(ppd, krp_ibcstatus_a);
+ ltstate = qib_7322_phys_portstate(ibcs);
+
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+ handle_serdes_issues(ppd, ibcs);
+ if (!(ppd->cpspec->ibcctrl_a &
+ SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn))) {
+ /*
+ * We got our interrupt, so init code should be
+ * happy and not try alternatives. Now squelch
+ * other "chatter" from link-negotiation (pre Init)
+ */
+ ppd->cpspec->ibcctrl_a |=
+ SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a,
+ ppd->cpspec->ibcctrl_a);
+ }
+
+ /* Update our picture of width and speed from chip */
+ ppd->link_width_active =
+ (ibcs & SYM_MASK(IBCStatusA_0, LinkWidthActive)) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ ppd->link_speed_active = (ibcs & SYM_MASK(IBCStatusA_0,
+ LinkSpeedQDR)) ? QIB_IB_QDR : (ibcs &
+ SYM_MASK(IBCStatusA_0, LinkSpeedActive)) ?
+ QIB_IB_DDR : QIB_IB_SDR;
+
+ if ((ppd->lflags & QIBL_IB_LINK_DISABLED) && ltstate !=
+ IB_PHYSPORTSTATE_DISABLED)
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+ else
+ /*
+ * Since going into a recovery state causes the link
+ * state to go down and since recovery is transitory,
+ * it is better if we "miss" ever seeing the link
+ * training state go into recovery (i.e., ignore this
+ * transition for link state special handling purposes)
+ * without updating lastibcstat.
+ */
+ if (ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT &&
+ ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE)
+ qib_handle_e_ibstatuschanged(ppd, ibcs);
+ }
+ if (*msg && iserr)
+ qib_dev_porterr(dd, ppd->port, "%s error\n", msg);
+
+ if (ppd->state_wanted & ppd->lflags)
+ wake_up_interruptible(&ppd->state_wait);
+done:
+ return;
+}
+
+/* enable/disable chip from delivering interrupts */
+static void qib_7322_set_intr_state(struct qib_devdata *dd, u32 enable)
+{
+ if (enable) {
+ if (dd->flags & QIB_BADINTR)
+ return;
+ qib_write_kreg(dd, kr_intmask, dd->cspec->int_enable_mask);
+ /* cause any pending enabled interrupts to be re-delivered */
+ qib_write_kreg(dd, kr_intclear, 0ULL);
+ if (dd->cspec->num_msix_entries) {
+ /* and same for MSIx */
+ u64 val = qib_read_kreg64(dd, kr_intgranted);
+ if (val)
+ qib_write_kreg(dd, kr_intgranted, val);
+ }
+ } else
+ qib_write_kreg(dd, kr_intmask, 0ULL);
+}
+
+/*
+ * Try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ * This is in chip-specific code because of all of the register accesses,
+ * even though the details are similar on most chips.
+ */
+static void qib_7322_clear_freeze(struct qib_devdata *dd)
+{
+ int pidx;
+
+ /* disable error interrupts, to avoid confusion */
+ qib_write_kreg(dd, kr_errmask, 0ULL);
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ if (dd->pport[pidx].link_speed_supported)
+ qib_write_kreg_port(dd->pport + pidx, krp_errmask,
+ 0ULL);
+
+ /* also disable interrupts; errormask is sometimes overwriten */
+ qib_7322_set_intr_state(dd, 0);
+
+ /* clear the freeze, and be sure chip saw it */
+ qib_write_kreg(dd, kr_control, dd->control);
+ qib_read_kreg32(dd, kr_scratch);
+
+ /*
+ * Force new interrupt if any hwerr, error or interrupt bits are
+ * still set, and clear "safe" send packet errors related to freeze
+ * and cancelling sends. Re-enable error interrupts before possible
+ * force of re-interrupt on pending interrupts.
+ */
+ qib_write_kreg(dd, kr_hwerrclear, 0ULL);
+ qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE);
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+ /* We need to purge per-port errs and reset mask, too */
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ if (!dd->pport[pidx].link_speed_supported)
+ continue;
+ qib_write_kreg_port(dd->pport + pidx, krp_errclear, ~0Ull);
+ qib_write_kreg_port(dd->pport + pidx, krp_errmask, ~0Ull);
+ }
+ qib_7322_set_intr_state(dd, 1);
+}
+
+/* no error handling to speak of */
+/**
+ * qib_7322_handle_hwerrors - display hardware errors.
+ * @dd: the qlogic_ib device
+ * @msg: the output buffer
+ * @msgl: the size of the output buffer
+ *
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use. Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue. We reuse the same message buffer as
+ * qib_handle_errors() to avoid excessive stack usage.
+ */
+static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
+ size_t msgl)
+{
+ u64 hwerrs;
+ u32 ctrl;
+ int isfatal = 0;
+
+ hwerrs = qib_read_kreg64(dd, kr_hwerrstatus);
+ if (!hwerrs)
+ goto bail;
+ if (hwerrs == ~0ULL) {
+ qib_dev_err(dd,
+ "Read of hardware error status failed (all bits set); ignoring\n");
+ goto bail;
+ }
+ qib_stats.sps_hwerrs++;
+
+ /* Always clear the error status register, except BIST fail */
+ qib_write_kreg(dd, kr_hwerrclear, hwerrs &
+ ~HWE_MASK(PowerOnBISTFailed));
+
+ hwerrs &= dd->cspec->hwerrmask;
+
+ /* no EEPROM logging, yet */
+
+ if (hwerrs)
+ qib_devinfo(dd->pcidev,
+ "Hardware error: hwerr=0x%llx (cleared)\n",
+ (unsigned long long) hwerrs);
+
+ ctrl = qib_read_kreg32(dd, kr_control);
+ if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) {
+ /*
+ * No recovery yet...
+ */
+ if ((hwerrs & ~HWE_MASK(LATriggered)) ||
+ dd->cspec->stay_in_freeze) {
+ /*
+ * If any set that we aren't ignoring only make the
+ * complaint once, in case it's stuck or recurring,
+ * and we get here multiple times
+ * Force link down, so switch knows, and
+ * LEDs are turned off.
+ */
+ if (dd->flags & QIB_INITTED)
+ isfatal = 1;
+ } else
+ qib_7322_clear_freeze(dd);
+ }
+
+ if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
+ isfatal = 1;
+ strlcpy(msg,
+ "[Memory BIST test failed, InfiniPath hardware unusable]",
+ msgl);
+ /* ignore from now on, so disable until driver reloaded */
+ dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+ }
+
+ err_decode(msg, msgl, hwerrs, qib_7322_hwerror_msgs);
+
+ /* Ignore esoteric PLL failures et al. */
+
+ qib_dev_err(dd, "%s hardware error\n", msg);
+
+ if (hwerrs &
+ (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) |
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) {
+ int pidx = 0;
+ int err;
+ unsigned long flags;
+ struct qib_pportdata *ppd = dd->pport;
+ for (; pidx < dd->num_pports; ++pidx, ppd++) {
+ err = 0;
+ if (pidx == 0 && (hwerrs &
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_0)))
+ err++;
+ if (pidx == 1 && (hwerrs &
+ SYM_MASK(HwErrMask, SDmaMemReadErrMask_1)))
+ err++;
+ if (err) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ dump_sdma_7322_state(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ }
+ }
+
+ if (isfatal && !dd->diag_client) {
+ qib_dev_err(dd,
+ "Fatal Hardware Error, no longer usable, SN %.16s\n",
+ dd->serial);
+ /*
+ * for /sys status file and user programs to print; if no
+ * trailing brace is copied, we'll know it was truncated.
+ */
+ if (dd->freezemsg)
+ snprintf(dd->freezemsg, dd->freezelen,
+ "{%s}", msg);
+ qib_disable_after_error(dd);
+ }
+bail:;
+}
+
+/**
+ * qib_7322_init_hwerrors - enable hardware errors
+ * @dd: the qlogic_ib device
+ *
+ * now that we have finished initializing everything that might reasonably
+ * cause a hardware error, and cleared those errors bits as they occur,
+ * we can enable hardware errors in the mask (potentially enabling
+ * freeze mode), and enable hardware errors as errors (along with
+ * everything else) in errormask
+ */
+static void qib_7322_init_hwerrors(struct qib_devdata *dd)
+{
+ int pidx;
+ u64 extsval;
+
+ extsval = qib_read_kreg64(dd, kr_extstatus);
+ if (!(extsval & (QIB_EXTS_MEMBIST_DISABLED |
+ QIB_EXTS_MEMBIST_ENDTEST)))
+ qib_dev_err(dd, "MemBIST did not complete!\n");
+
+ /* never clear BIST failure, so reported on each driver load */
+ qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed));
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+
+ /* clear all */
+ qib_write_kreg(dd, kr_errclear, ~0ULL);
+ /* enable errors that are masked, at least this first time. */
+ qib_write_kreg(dd, kr_errmask, ~0ULL);
+ dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask);
+ for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ if (dd->pport[pidx].link_speed_supported)
+ qib_write_kreg_port(dd->pport + pidx, krp_errmask,
+ ~0ULL);
+}
+
+/*
+ * Disable and enable the armlaunch error. Used for PIO bandwidth testing
+ * on chips that are count-based, rather than trigger-based. There is no
+ * reference counting, but that's also fine, given the intended use.
+ * Only chip-specific because it's all register accesses
+ */
+static void qib_set_7322_armlaunch(struct qib_devdata *dd, u32 enable)
+{
+ if (enable) {
+ qib_write_kreg(dd, kr_errclear, QIB_E_SPIOARMLAUNCH);
+ dd->cspec->errormask |= QIB_E_SPIOARMLAUNCH;
+ } else
+ dd->cspec->errormask &= ~QIB_E_SPIOARMLAUNCH;
+ qib_write_kreg(dd, kr_errmask, dd->cspec->errormask);
+}
+
+/*
+ * Formerly took parameter <which> in pre-shifted,
+ * pre-merged form with LinkCmd and LinkInitCmd
+ * together, and assuming the zero was NOP.
+ */
+static void qib_set_ib_7322_lstate(struct qib_pportdata *ppd, u16 linkcmd,
+ u16 linitcmd)
+{
+ u64 mod_wd;
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+
+ if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) {
+ /*
+ * If we are told to disable, note that so link-recovery
+ * code does not attempt to bring us back up.
+ * Also reset everything that we can, so we start
+ * completely clean when re-enabled (before we
+ * actually issue the disable to the IBC)
+ */
+ qib_7322_mini_pcs_reset(ppd);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_LINK_DISABLED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) {
+ /*
+ * Any other linkinitcmd will lead to LINKDOWN and then
+ * to INIT (if all is well), so clear flag to let
+ * link-recovery code attempt to bring us back up.
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_LINK_DISABLED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ /*
+ * Clear status change interrupt reduction so the
+ * new state is seen.
+ */
+ ppd->cpspec->ibcctrl_a &=
+ ~SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn);
+ }
+
+ mod_wd = (linkcmd << IBA7322_IBCC_LINKCMD_SHIFT) |
+ (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
+
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a |
+ mod_wd);
+ /* write to chip to prevent back-to-back writes of ibc reg */
+ qib_write_kreg(dd, kr_scratch, 0);
+
+}
+
+/*
+ * The total RCV buffer memory is 64KB, used for both ports, and is
+ * in units of 64 bytes (same as IB flow control credit unit).
+ * The consumedVL unit in the same registers are in 32 byte units!
+ * So, a VL15 packet needs 4.50 IB credits, and 9 rx buffer chunks,
+ * and we can therefore allocate just 9 IB credits for 2 VL15 packets
+ * in krp_rxcreditvl15, rather than 10.
+ */
+#define RCV_BUF_UNITSZ 64
+#define NUM_RCV_BUF_UNITS(dd) ((64 * 1024) / (RCV_BUF_UNITSZ * dd->num_pports))
+
+static void set_vls(struct qib_pportdata *ppd)
+{
+ int i, numvls, totcred, cred_vl, vl0extra;
+ struct qib_devdata *dd = ppd->dd;
+ u64 val;
+
+ numvls = qib_num_vls(ppd->vls_operational);
+
+ /*
+ * Set up per-VL credits. Below is kluge based on these assumptions:
+ * 1) port is disabled at the time early_init is called.
+ * 2) give VL15 17 credits, for two max-plausible packets.
+ * 3) Give VL0-N the rest, with any rounding excess used for VL0
+ */
+ /* 2 VL15 packets @ 288 bytes each (including IB headers) */
+ totcred = NUM_RCV_BUF_UNITS(dd);
+ cred_vl = (2 * 288 + RCV_BUF_UNITSZ - 1) / RCV_BUF_UNITSZ;
+ totcred -= cred_vl;
+ qib_write_kreg_port(ppd, krp_rxcreditvl15, (u64) cred_vl);
+ cred_vl = totcred / numvls;
+ vl0extra = totcred - cred_vl * numvls;
+ qib_write_kreg_port(ppd, krp_rxcreditvl0, cred_vl + vl0extra);
+ for (i = 1; i < numvls; i++)
+ qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, cred_vl);
+ for (; i < 8; i++) /* no buffer space for other VLs */
+ qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, 0);
+
+ /* Notify IBC that credits need to be recalculated */
+ val = qib_read_kreg_port(ppd, krp_ibsdtestiftx);
+ val |= SYM_MASK(IB_SDTEST_IF_TX_0, CREDIT_CHANGE);
+ qib_write_kreg_port(ppd, krp_ibsdtestiftx, val);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ val &= ~SYM_MASK(IB_SDTEST_IF_TX_0, CREDIT_CHANGE);
+ qib_write_kreg_port(ppd, krp_ibsdtestiftx, val);
+
+ for (i = 0; i < numvls; i++)
+ val = qib_read_kreg_port(ppd, krp_rxcreditvl0 + i);
+ val = qib_read_kreg_port(ppd, krp_rxcreditvl15);
+
+ /* Change the number of operational VLs */
+ ppd->cpspec->ibcctrl_a = (ppd->cpspec->ibcctrl_a &
+ ~SYM_MASK(IBCCtrlA_0, NumVLane)) |
+ ((u64)(numvls - 1) << SYM_LSB(IBCCtrlA_0, NumVLane));
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+}
+
+/*
+ * The code that deals with actual SerDes is in serdes_7322_init().
+ * Compared to the code for iba7220, it is minimal.
+ */
+static int serdes_7322_init(struct qib_pportdata *ppd);
+
+/**
+ * qib_7322_bringup_serdes - bring up the serdes
+ * @ppd: physical port on the qlogic_ib device
+ */
+static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 val, guid, ibc;
+ unsigned long flags;
+ int ret = 0;
+
+ /*
+ * SerDes model not in Pd, but still need to
+ * set up much of IBCCtrl and IBCDDRCtrl; move elsewhere
+ * eventually.
+ */
+ /* Put IBC in reset, sends disabled (should be in reset already) */
+ ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, IBLinkEn);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+
+ /* ensure previous Tx parameters are not still forced */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
+ if (qib_compat_ddr_negotiate) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
+ crp_ibsymbolerr);
+ ppd->cpspec->iblnkerrsnap = read_7322_creg32_port(ppd,
+ crp_iblinkerrrecov);
+ }
+
+ /* flowcontrolwatermark is in units of KBytes */
+ ibc = 0x5ULL << SYM_LSB(IBCCtrlA_0, FlowCtrlWaterMark);
+ /*
+ * Flow control is sent this often, even if no changes in
+ * buffer space occur. Units are 128ns for this chip.
+ * Set to 3usec.
+ */
+ ibc |= 24ULL << SYM_LSB(IBCCtrlA_0, FlowCtrlPeriod);
+ /* max error tolerance */
+ ibc |= 0xfULL << SYM_LSB(IBCCtrlA_0, PhyerrThreshold);
+ /* IB credit flow control. */
+ ibc |= 0xfULL << SYM_LSB(IBCCtrlA_0, OverrunThreshold);
+ /*
+ * set initial max size pkt IBC will send, including ICRC; it's the
+ * PIO buffer size in dwords, less 1; also see qib_set_mtu()
+ */
+ ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) <<
+ SYM_LSB(IBCCtrlA_0, MaxPktLen);
+ ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */
+
+ /*
+ * Reset the PCS interface to the serdes (and also ibc, which is still
+ * in reset from above). Writes new value of ibcctrl_a as last step.
+ */
+ qib_7322_mini_pcs_reset(ppd);
+
+ if (!ppd->cpspec->ibcctrl_b) {
+ unsigned lse = ppd->link_speed_enabled;
+
+ /*
+ * Not on re-init after reset, establish shadow
+ * and force initial config.
+ */
+ ppd->cpspec->ibcctrl_b = qib_read_kreg_port(ppd,
+ krp_ibcctrl_b);
+ ppd->cpspec->ibcctrl_b &= ~(IBA7322_IBC_SPEED_QDR |
+ IBA7322_IBC_SPEED_DDR |
+ IBA7322_IBC_SPEED_SDR |
+ IBA7322_IBC_WIDTH_AUTONEG |
+ SYM_MASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED));
+ if (lse & (lse - 1)) /* Muliple speeds enabled */
+ ppd->cpspec->ibcctrl_b |=
+ (lse << IBA7322_IBC_SPEED_LSB) |
+ IBA7322_IBC_IBTA_1_2_MASK |
+ IBA7322_IBC_MAX_SPEED_MASK;
+ else
+ ppd->cpspec->ibcctrl_b |= (lse == QIB_IB_QDR) ?
+ IBA7322_IBC_SPEED_QDR |
+ IBA7322_IBC_IBTA_1_2_MASK :
+ (lse == QIB_IB_DDR) ?
+ IBA7322_IBC_SPEED_DDR :
+ IBA7322_IBC_SPEED_SDR;
+ if ((ppd->link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) ==
+ (IB_WIDTH_1X | IB_WIDTH_4X))
+ ppd->cpspec->ibcctrl_b |= IBA7322_IBC_WIDTH_AUTONEG;
+ else
+ ppd->cpspec->ibcctrl_b |=
+ ppd->link_width_enabled == IB_WIDTH_4X ?
+ IBA7322_IBC_WIDTH_4X_ONLY :
+ IBA7322_IBC_WIDTH_1X_ONLY;
+
+ /* always enable these on driver reload, not sticky */
+ ppd->cpspec->ibcctrl_b |= (IBA7322_IBC_RXPOL_MASK |
+ IBA7322_IBC_HRTBT_MASK);
+ }
+ qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b);
+
+ /* setup so we have more time at CFGTEST to change H1 */
+ val = qib_read_kreg_port(ppd, krp_ibcctrl_c);
+ val &= ~SYM_MASK(IBCCtrlC_0, IB_FRONT_PORCH);
+ val |= 0xfULL << SYM_LSB(IBCCtrlC_0, IB_FRONT_PORCH);
+ qib_write_kreg_port(ppd, krp_ibcctrl_c, val);
+
+ serdes_7322_init(ppd);
+
+ guid = be64_to_cpu(ppd->guid);
+ if (!guid) {
+ if (dd->base_guid)
+ guid = be64_to_cpu(dd->base_guid) + ppd->port - 1;
+ ppd->guid = cpu_to_be64(guid);
+ }
+
+ qib_write_kreg_port(ppd, krp_hrtbt_guid, guid);
+ /* write to chip to prevent back-to-back writes of ibc reg */
+ qib_write_kreg(dd, kr_scratch, 0);
+
+ /* Enable port */
+ ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn);
+ set_vls(ppd);
+
+ /* initially come up DISABLED, without sending anything. */
+ val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
+ QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, val);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ /* clear the linkinit cmds */
+ ppd->cpspec->ibcctrl_a = val & ~SYM_MASK(IBCCtrlA_0, LinkInitCmd);
+
+ /* be paranoid against later code motion, etc. */
+ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
+ ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable);
+ qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl);
+ spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
+
+ /* Also enable IBSTATUSCHG interrupt. */
+ val = qib_read_kreg_port(ppd, krp_errmask);
+ qib_write_kreg_port(ppd, krp_errmask,
+ val | ERR_MASK_N(IBStatusChanged));
+
+ /* Always zero until we start messing with SerDes for real */
+ return ret;
+}
+
+/**
+ * qib_7322_quiet_serdes - set serdes to txidle
+ * @dd: the qlogic_ib device
+ * Called when driver is being unloaded
+ */
+static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd)
+{
+ u64 val;
+ unsigned long flags;
+
+ qib_set_ib_7322_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ wake_up(&ppd->cpspec->autoneg_wait);
+ cancel_delayed_work_sync(&ppd->cpspec->autoneg_work);
+ if (ppd->dd->cspec->r1)
+ cancel_delayed_work_sync(&ppd->cpspec->ipg_work);
+
+ ppd->cpspec->chase_end = 0;
+ if (ppd->cpspec->chase_timer.data) /* if initted */
+ del_timer_sync(&ppd->cpspec->chase_timer);
+
+ /*
+ * Despite the name, actually disables IBC as well. Do it when
+ * we are as sure as possible that no more packets can be
+ * received, following the down and the PCS reset.
+ * The actual disabling happens in qib_7322_mini_pci_reset(),
+ * along with the PCS being reset.
+ */
+ ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, IBLinkEn);
+ qib_7322_mini_pcs_reset(ppd);
+
+ /*
+ * Update the adjusted counters so the adjustment persists
+ * across driver reload.
+ */
+ if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta ||
+ ppd->cpspec->ibdeltainprog || ppd->cpspec->iblnkdowndelta) {
+ struct qib_devdata *dd = ppd->dd;
+ u64 diagc;
+
+ /* enable counter writes */
+ diagc = qib_read_kreg64(dd, kr_hwdiagctrl);
+ qib_write_kreg(dd, kr_hwdiagctrl,
+ diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable));
+
+ if (ppd->cpspec->ibsymdelta || ppd->cpspec->ibdeltainprog) {
+ val = read_7322_creg32_port(ppd, crp_ibsymbolerr);
+ if (ppd->cpspec->ibdeltainprog)
+ val -= val - ppd->cpspec->ibsymsnap;
+ val -= ppd->cpspec->ibsymdelta;
+ write_7322_creg_port(ppd, crp_ibsymbolerr, val);
+ }
+ if (ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) {
+ val = read_7322_creg32_port(ppd, crp_iblinkerrrecov);
+ if (ppd->cpspec->ibdeltainprog)
+ val -= val - ppd->cpspec->iblnkerrsnap;
+ val -= ppd->cpspec->iblnkerrdelta;
+ write_7322_creg_port(ppd, crp_iblinkerrrecov, val);
+ }
+ if (ppd->cpspec->iblnkdowndelta) {
+ val = read_7322_creg32_port(ppd, crp_iblinkdown);
+ val += ppd->cpspec->iblnkdowndelta;
+ write_7322_creg_port(ppd, crp_iblinkdown, val);
+ }
+ /*
+ * No need to save ibmalfdelta since IB perfcounters
+ * are cleared on driver reload.
+ */
+
+ /* and disable counter writes */
+ qib_write_kreg(dd, kr_hwdiagctrl, diagc);
+ }
+}
+
+/**
+ * qib_setup_7322_setextled - set the state of the two external LEDs
+ * @ppd: physical port on the qlogic_ib device
+ * @on: whether the link is up or not
+ *
+ * The exact combo of LEDs if on is true is determined by looking
+ * at the ibcstatus.
+ *
+ * These LEDs indicate the physical and logical state of IB link.
+ * For this chip (at least with recommended board pinouts), LED1
+ * is Yellow (logical state) and LED2 is Green (physical state),
+ *
+ * Note: We try to match the Mellanox HCA LED behavior as best
+ * we can. Green indicates physical link state is OK (something is
+ * plugged in, and we can train).
+ * Amber indicates the link is logically up (ACTIVE).
+ * Mellanox further blinks the amber LED to indicate data packet
+ * activity, but we have no hardware support for that, so it would
+ * require waking up every 10-20 msecs and checking the counters
+ * on the chip, and then turning the LED off if appropriate. That's
+ * visible overhead, so not something we will do.
+ */
+static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 extctl, ledblink = 0, val;
+ unsigned long flags;
+ int yel, grn;
+
+ /*
+ * The diags use the LED to indicate diag info, so we leave
+ * the external LED alone when the diags are running.
+ */
+ if (dd->diag_client)
+ return;
+
+ /* Allow override of LED display for, e.g. Locating system in rack */
+ if (ppd->led_override) {
+ grn = (ppd->led_override & QIB_LED_PHYS);
+ yel = (ppd->led_override & QIB_LED_LOG);
+ } else if (on) {
+ val = qib_read_kreg_port(ppd, krp_ibcstatus_a);
+ grn = qib_7322_phys_portstate(val) ==
+ IB_PHYSPORTSTATE_LINKUP;
+ yel = qib_7322_iblink_state(val) == IB_PORT_ACTIVE;
+ } else {
+ grn = 0;
+ yel = 0;
+ }
+
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ extctl = dd->cspec->extctrl & (ppd->port == 1 ?
+ ~ExtLED_IB1_MASK : ~ExtLED_IB2_MASK);
+ if (grn) {
+ extctl |= ppd->port == 1 ? ExtLED_IB1_GRN : ExtLED_IB2_GRN;
+ /*
+ * Counts are in chip clock (4ns) periods.
+ * This is 1/16 sec (66.6ms) on,
+ * 3/16 sec (187.5 ms) off, with packets rcvd.
+ */
+ ledblink = ((66600 * 1000UL / 4) << IBA7322_LEDBLINK_ON_SHIFT) |
+ ((187500 * 1000UL / 4) << IBA7322_LEDBLINK_OFF_SHIFT);
+ }
+ if (yel)
+ extctl |= ppd->port == 1 ? ExtLED_IB1_YEL : ExtLED_IB2_YEL;
+ dd->cspec->extctrl = extctl;
+ qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl);
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+
+ if (ledblink) /* blink the LED on packet receive */
+ qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink);
+}
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event)
+{
+ switch (event) {
+ case DCA_PROVIDER_ADD:
+ if (dd->flags & QIB_DCA_ENABLED)
+ break;
+ if (!dca_add_requester(&dd->pcidev->dev)) {
+ qib_devinfo(dd->pcidev, "DCA enabled\n");
+ dd->flags |= QIB_DCA_ENABLED;
+ qib_setup_dca(dd);
+ }
+ break;
+ case DCA_PROVIDER_REMOVE:
+ if (dd->flags & QIB_DCA_ENABLED) {
+ dca_remove_requester(&dd->pcidev->dev);
+ dd->flags &= ~QIB_DCA_ENABLED;
+ dd->cspec->dca_ctrl = 0;
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA),
+ dd->cspec->dca_ctrl);
+ }
+ break;
+ }
+ return 0;
+}
+
+static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu)
+{
+ struct qib_devdata *dd = rcd->dd;
+ struct qib_chip_specific *cspec = dd->cspec;
+
+ if (!(dd->flags & QIB_DCA_ENABLED))
+ return;
+ if (cspec->rhdr_cpu[rcd->ctxt] != cpu) {
+ const struct dca_reg_map *rmp;
+
+ cspec->rhdr_cpu[rcd->ctxt] = cpu;
+ rmp = &dca_rcvhdr_reg_map[rcd->ctxt];
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask;
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |=
+ (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb;
+ qib_devinfo(dd->pcidev,
+ "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu,
+ (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+ qib_write_kreg(dd, rmp->regno,
+ cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
+ cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+ }
+}
+
+static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_chip_specific *cspec = dd->cspec;
+ unsigned pidx = ppd->port - 1;
+
+ if (!(dd->flags & QIB_DCA_ENABLED))
+ return;
+ if (cspec->sdma_cpu[pidx] != cpu) {
+ cspec->sdma_cpu[pidx] = cpu;
+ cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ?
+ SYM_MASK(DCACtrlF, SendDma1DCAOPH) :
+ SYM_MASK(DCACtrlF, SendDma0DCAOPH));
+ cspec->dca_rcvhdr_ctrl[4] |=
+ (u64) dca3_get_tag(&dd->pcidev->dev, cpu) <<
+ (ppd->hw_pidx ?
+ SYM_LSB(DCACtrlF, SendDma1DCAOPH) :
+ SYM_LSB(DCACtrlF, SendDma0DCAOPH));
+ qib_devinfo(dd->pcidev,
+ "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu,
+ (long long) cspec->dca_rcvhdr_ctrl[4]);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlF),
+ cspec->dca_rcvhdr_ctrl[4]);
+ cspec->dca_ctrl |= ppd->hw_pidx ?
+ SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) :
+ SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable);
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
+ }
+}
+
+static void qib_setup_dca(struct qib_devdata *dd)
+{
+ struct qib_chip_specific *cspec = dd->cspec;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++)
+ cspec->rhdr_cpu[i] = -1;
+ for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+ cspec->sdma_cpu[i] = -1;
+ cspec->dca_rcvhdr_ctrl[0] =
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[1] =
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[2] =
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[3] =
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt));
+ cspec->dca_rcvhdr_ctrl[4] =
+ (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) |
+ (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt));
+ for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
+ qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i,
+ cspec->dca_rcvhdr_ctrl[i]);
+ for (i = 0; i < cspec->num_msix_entries; i++)
+ setup_dca_notifier(dd, &cspec->msix_entries[i]);
+}
+
+static void qib_irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct qib_irq_notify *n =
+ container_of(notify, struct qib_irq_notify, notify);
+ int cpu = cpumask_first(mask);
+
+ if (n->rcv) {
+ struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+ qib_update_rhdrq_dca(rcd, cpu);
+ } else {
+ struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+ qib_update_sdma_dca(ppd, cpu);
+ }
+}
+
+static void qib_irq_notifier_release(struct kref *ref)
+{
+ struct qib_irq_notify *n =
+ container_of(ref, struct qib_irq_notify, notify.kref);
+ struct qib_devdata *dd;
+
+ if (n->rcv) {
+ struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
+ dd = rcd->dd;
+ } else {
+ struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
+ dd = ppd->dd;
+ }
+ qib_devinfo(dd->pcidev,
+ "release on HCA notify 0x%p n 0x%p\n", ref, n);
+ kfree(n);
+}
+#endif
+
+/*
+ * Disable MSIx interrupt if enabled, call generic MSIx code
+ * to cleanup, and clear pending MSIx interrupts.
+ * Used for fallback to INTx, after reset, and when MSIx setup fails.
+ */
+static void qib_7322_nomsix(struct qib_devdata *dd)
+{
+ u64 intgranted;
+ int n;
+
+ dd->cspec->main_int_mask = ~0ULL;
+ n = dd->cspec->num_msix_entries;
+ if (n) {
+ int i;
+
+ dd->cspec->num_msix_entries = 0;
+ for (i = 0; i < n; i++) {
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
+#endif
+ irq_set_affinity_hint(
+ dd->cspec->msix_entries[i].msix.vector, NULL);
+ free_cpumask_var(dd->cspec->msix_entries[i].mask);
+ free_irq(dd->cspec->msix_entries[i].msix.vector,
+ dd->cspec->msix_entries[i].arg);
+ }
+ qib_nomsix(dd);
+ }
+ /* make sure no MSIx interrupts are left pending */
+ intgranted = qib_read_kreg64(dd, kr_intgranted);
+ if (intgranted)
+ qib_write_kreg(dd, kr_intgranted, intgranted);
+}
+
+static void qib_7322_free_irq(struct qib_devdata *dd)
+{
+ if (dd->cspec->irq) {
+ free_irq(dd->cspec->irq, dd);
+ dd->cspec->irq = 0;
+ }
+ qib_7322_nomsix(dd);
+}
+
+static void qib_setup_7322_cleanup(struct qib_devdata *dd)
+{
+ int i;
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ if (dd->flags & QIB_DCA_ENABLED) {
+ dca_remove_requester(&dd->pcidev->dev);
+ dd->flags &= ~QIB_DCA_ENABLED;
+ dd->cspec->dca_ctrl = 0;
+ qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl);
+ }
+#endif
+
+ qib_7322_free_irq(dd);
+ kfree(dd->cspec->cntrs);
+ kfree(dd->cspec->sendchkenable);
+ kfree(dd->cspec->sendgrhchk);
+ kfree(dd->cspec->sendibchk);
+ kfree(dd->cspec->msix_entries);
+ for (i = 0; i < dd->num_pports; i++) {
+ unsigned long flags;
+ u32 mask = QSFP_GPIO_MOD_PRS_N |
+ (QSFP_GPIO_MOD_PRS_N << QSFP_GPIO_PORT2_SHIFT);
+
+ kfree(dd->pport[i].cpspec->portcntrs);
+ if (dd->flags & QIB_HAS_QSFP) {
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ dd->cspec->gpio_mask &= ~mask;
+ qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+ qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
+ }
+ if (dd->pport[i].ibport_data.smi_ah)
+ ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah);
+ }
+}
+
+/* handle SDMA interrupts */
+static void sdma_7322_intr(struct qib_devdata *dd, u64 istat)
+{
+ struct qib_pportdata *ppd0 = &dd->pport[0];
+ struct qib_pportdata *ppd1 = &dd->pport[1];
+ u64 intr0 = istat & (INT_MASK_P(SDma, 0) |
+ INT_MASK_P(SDmaIdle, 0) | INT_MASK_P(SDmaProgress, 0));
+ u64 intr1 = istat & (INT_MASK_P(SDma, 1) |
+ INT_MASK_P(SDmaIdle, 1) | INT_MASK_P(SDmaProgress, 1));
+
+ if (intr0)
+ qib_sdma_intr(ppd0);
+ if (intr1)
+ qib_sdma_intr(ppd1);
+
+ if (istat & INT_MASK_PM(SDmaCleanupDone, 0))
+ qib_sdma_process_event(ppd0, qib_sdma_event_e20_hw_started);
+ if (istat & INT_MASK_PM(SDmaCleanupDone, 1))
+ qib_sdma_process_event(ppd1, qib_sdma_event_e20_hw_started);
+}
+
+/*
+ * Set or clear the Send buffer available interrupt enable bit.
+ */
+static void qib_wantpiobuf_7322_intr(struct qib_devdata *dd, u32 needint)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ if (needint)
+ dd->sendctrl |= SYM_MASK(SendCtrl, SendIntBufAvail);
+ else
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, SendIntBufAvail);
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+}
+
+/*
+ * Somehow got an interrupt with reserved bits set in interrupt status.
+ * Print a message so we know it happened, then clear them.
+ * keep mainline interrupt handler cache-friendly
+ */
+static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat)
+{
+ u64 kills;
+ char msg[128];
+
+ kills = istat & ~QIB_I_BITSEXTANT;
+ qib_dev_err(dd,
+ "Clearing reserved interrupt(s) 0x%016llx: %s\n",
+ (unsigned long long) kills, msg);
+ qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills));
+}
+
+/* keep mainline interrupt handler cache-friendly */
+static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd)
+{
+ u32 gpiostatus;
+ int handled = 0;
+ int pidx;
+
+ /*
+ * Boards for this chip currently don't use GPIO interrupts,
+ * so clear by writing GPIOstatus to GPIOclear, and complain
+ * to developer. To avoid endless repeats, clear
+ * the bits in the mask, since there is some kind of
+ * programming error or chip problem.
+ */
+ gpiostatus = qib_read_kreg32(dd, kr_gpio_status);
+ /*
+ * In theory, writing GPIOstatus to GPIOclear could
+ * have a bad side-effect on some diagnostic that wanted
+ * to poll for a status-change, but the various shadows
+ * make that problematic at best. Diags will just suppress
+ * all GPIO interrupts during such tests.
+ */
+ qib_write_kreg(dd, kr_gpio_clear, gpiostatus);
+ /*
+ * Check for QSFP MOD_PRS changes
+ * only works for single port if IB1 != pidx1
+ */
+ for (pidx = 0; pidx < dd->num_pports && (dd->flags & QIB_HAS_QSFP);
+ ++pidx) {
+ struct qib_pportdata *ppd;
+ struct qib_qsfp_data *qd;
+ u32 mask;
+ if (!dd->pport[pidx].link_speed_supported)
+ continue;
+ mask = QSFP_GPIO_MOD_PRS_N;
+ ppd = dd->pport + pidx;
+ mask <<= (QSFP_GPIO_PORT2_SHIFT * ppd->hw_pidx);
+ if (gpiostatus & dd->cspec->gpio_mask & mask) {
+ u64 pins;
+ qd = &ppd->cpspec->qsfp_data;
+ gpiostatus &= ~mask;
+ pins = qib_read_kreg64(dd, kr_extstatus);
+ pins >>= SYM_LSB(EXTStatus, GPIOIn);
+ if (!(pins & mask)) {
+ ++handled;
+ qd->t_insert = jiffies;
+ queue_work(ib_wq, &qd->work);
+ }
+ }
+ }
+
+ if (gpiostatus && !handled) {
+ const u32 mask = qib_read_kreg32(dd, kr_gpio_mask);
+ u32 gpio_irq = mask & gpiostatus;
+
+ /*
+ * Clear any troublemakers, and update chip from shadow
+ */
+ dd->cspec->gpio_mask &= ~gpio_irq;
+ qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
+ }
+}
+
+/*
+ * Handle errors and unusual events first, separate function
+ * to improve cache hits for fast path interrupt handling.
+ */
+static noinline void unlikely_7322_intr(struct qib_devdata *dd, u64 istat)
+{
+ if (istat & ~QIB_I_BITSEXTANT)
+ unknown_7322_ibits(dd, istat);
+ if (istat & QIB_I_GPIO)
+ unknown_7322_gpio_intr(dd);
+ if (istat & QIB_I_C_ERROR) {
+ qib_write_kreg(dd, kr_errmask, 0ULL);
+ tasklet_schedule(&dd->error_tasklet);
+ }
+ if (istat & INT_MASK_P(Err, 0) && dd->rcd[0])
+ handle_7322_p_errors(dd->rcd[0]->ppd);
+ if (istat & INT_MASK_P(Err, 1) && dd->rcd[1])
+ handle_7322_p_errors(dd->rcd[1]->ppd);
+}
+
+/*
+ * Dynamically adjust the rcv int timeout for a context based on incoming
+ * packet rate.
+ */
+static void adjust_rcv_timeout(struct qib_ctxtdata *rcd, int npkts)
+{
+ struct qib_devdata *dd = rcd->dd;
+ u32 timeout = dd->cspec->rcvavail_timeout[rcd->ctxt];
+
+ /*
+ * Dynamically adjust idle timeout on chip
+ * based on number of packets processed.
+ */
+ if (npkts < rcv_int_count && timeout > 2)
+ timeout >>= 1;
+ else if (npkts >= rcv_int_count && timeout < rcv_int_timeout)
+ timeout = min(timeout << 1, rcv_int_timeout);
+ else
+ return;
+
+ dd->cspec->rcvavail_timeout[rcd->ctxt] = timeout;
+ qib_write_kreg(dd, kr_rcvavailtimeout + rcd->ctxt, timeout);
+}
+
+/*
+ * This is the main interrupt handler.
+ * It will normally only be used for low frequency interrupts but may
+ * have to handle all interrupts if INTx is enabled or fewer than normal
+ * MSIx interrupts were allocated.
+ * This routine should ignore the interrupt bits for any of the
+ * dedicated MSIx handlers.
+ */
+static irqreturn_t qib_7322intr(int irq, void *data)
+{
+ struct qib_devdata *dd = data;
+ irqreturn_t ret;
+ u64 istat;
+ u64 ctxtrbits;
+ u64 rmask;
+ unsigned i;
+ u32 npkts;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) {
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ ret = IRQ_HANDLED;
+ goto bail;
+ }
+
+ istat = qib_read_kreg64(dd, kr_intstatus);
+
+ if (unlikely(istat == ~0ULL)) {
+ qib_bad_intrstatus(dd);
+ qib_dev_err(dd, "Interrupt status all f's, skipping\n");
+ /* don't know if it was our interrupt or not */
+ ret = IRQ_NONE;
+ goto bail;
+ }
+
+ istat &= dd->cspec->main_int_mask;
+ if (unlikely(!istat)) {
+ /* already handled, or shared and not us */
+ ret = IRQ_NONE;
+ goto bail;
+ }
+
+ this_cpu_inc(*dd->int_counter);
+
+ /* handle "errors" of various kinds first, device ahead of port */
+ if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO |
+ QIB_I_C_ERROR | INT_MASK_P(Err, 0) |
+ INT_MASK_P(Err, 1))))
+ unlikely_7322_intr(dd, istat);
+
+ /*
+ * Clear the interrupt bits we found set, relatively early, so we
+ * "know" know the chip will have seen this by the time we process
+ * the queue, and will re-interrupt if necessary. The processor
+ * itself won't take the interrupt again until we return.
+ */
+ qib_write_kreg(dd, kr_intclear, istat);
+
+ /*
+ * Handle kernel receive queues before checking for pio buffers
+ * available since receives can overflow; piobuf waiters can afford
+ * a few extra cycles, since they were waiting anyway.
+ */
+ ctxtrbits = istat & (QIB_I_RCVAVAIL_MASK | QIB_I_RCVURG_MASK);
+ if (ctxtrbits) {
+ rmask = (1ULL << QIB_I_RCVAVAIL_LSB) |
+ (1ULL << QIB_I_RCVURG_LSB);
+ for (i = 0; i < dd->first_user_ctxt; i++) {
+ if (ctxtrbits & rmask) {
+ ctxtrbits &= ~rmask;
+ if (dd->rcd[i])
+ qib_kreceive(dd->rcd[i], NULL, &npkts);
+ }
+ rmask <<= 1;
+ }
+ if (ctxtrbits) {
+ ctxtrbits = (ctxtrbits >> QIB_I_RCVAVAIL_LSB) |
+ (ctxtrbits >> QIB_I_RCVURG_LSB);
+ qib_handle_urcv(dd, ctxtrbits);
+ }
+ }
+
+ if (istat & (QIB_I_P_SDMAINT(0) | QIB_I_P_SDMAINT(1)))
+ sdma_7322_intr(dd, istat);
+
+ if ((istat & QIB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED))
+ qib_ib_piobufavail(dd);
+
+ ret = IRQ_HANDLED;
+bail:
+ return ret;
+}
+
+/*
+ * Dedicated receive packet available interrupt handler.
+ */
+static irqreturn_t qib_7322pintr(int irq, void *data)
+{
+ struct qib_ctxtdata *rcd = data;
+ struct qib_devdata *dd = rcd->dd;
+ u32 npkts;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT)
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ return IRQ_HANDLED;
+
+ this_cpu_inc(*dd->int_counter);
+
+ /* Clear the interrupt bit we expect to be set. */
+ qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) |
+ (1ULL << QIB_I_RCVURG_LSB)) << rcd->ctxt);
+
+ qib_kreceive(rcd, NULL, &npkts);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Dedicated Send buffer available interrupt handler.
+ */
+static irqreturn_t qib_7322bufavail(int irq, void *data)
+{
+ struct qib_devdata *dd = data;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT)
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ return IRQ_HANDLED;
+
+ this_cpu_inc(*dd->int_counter);
+
+ /* Clear the interrupt bit we expect to be set. */
+ qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL);
+
+ /* qib_ib_piobufavail() will clear the want PIO interrupt if needed */
+ if (dd->flags & QIB_INITTED)
+ qib_ib_piobufavail(dd);
+ else
+ qib_wantpiobuf_7322_intr(dd, 0);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Dedicated Send DMA interrupt handler.
+ */
+static irqreturn_t sdma_intr(int irq, void *data)
+{
+ struct qib_pportdata *ppd = data;
+ struct qib_devdata *dd = ppd->dd;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT)
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ return IRQ_HANDLED;
+
+ this_cpu_inc(*dd->int_counter);
+
+ /* Clear the interrupt bit we expect to be set. */
+ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
+ INT_MASK_P(SDma, 1) : INT_MASK_P(SDma, 0));
+ qib_sdma_intr(ppd);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Dedicated Send DMA idle interrupt handler.
+ */
+static irqreturn_t sdma_idle_intr(int irq, void *data)
+{
+ struct qib_pportdata *ppd = data;
+ struct qib_devdata *dd = ppd->dd;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT)
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ return IRQ_HANDLED;
+
+ this_cpu_inc(*dd->int_counter);
+
+ /* Clear the interrupt bit we expect to be set. */
+ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
+ INT_MASK_P(SDmaIdle, 1) : INT_MASK_P(SDmaIdle, 0));
+ qib_sdma_intr(ppd);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Dedicated Send DMA progress interrupt handler.
+ */
+static irqreturn_t sdma_progress_intr(int irq, void *data)
+{
+ struct qib_pportdata *ppd = data;
+ struct qib_devdata *dd = ppd->dd;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT)
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ return IRQ_HANDLED;
+
+ this_cpu_inc(*dd->int_counter);
+
+ /* Clear the interrupt bit we expect to be set. */
+ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
+ INT_MASK_P(SDmaProgress, 1) :
+ INT_MASK_P(SDmaProgress, 0));
+ qib_sdma_intr(ppd);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Dedicated Send DMA cleanup interrupt handler.
+ */
+static irqreturn_t sdma_cleanup_intr(int irq, void *data)
+{
+ struct qib_pportdata *ppd = data;
+ struct qib_devdata *dd = ppd->dd;
+
+ if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT)
+ /*
+ * This return value is not great, but we do not want the
+ * interrupt core code to remove our interrupt handler
+ * because we don't appear to be handling an interrupt
+ * during a chip reset.
+ */
+ return IRQ_HANDLED;
+
+ this_cpu_inc(*dd->int_counter);
+
+ /* Clear the interrupt bit we expect to be set. */
+ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
+ INT_MASK_PM(SDmaCleanupDone, 1) :
+ INT_MASK_PM(SDmaCleanupDone, 0));
+ qib_sdma_process_event(ppd, qib_sdma_event_e20_hw_started);
+
+ return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+ if (!m->dca)
+ return;
+ qib_devinfo(dd->pcidev,
+ "Disabling notifier on HCA %d irq %d\n",
+ dd->unit,
+ m->msix.vector);
+ irq_set_affinity_notifier(
+ m->msix.vector,
+ NULL);
+ m->notifier = NULL;
+}
+
+static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
+{
+ struct qib_irq_notify *n;
+
+ if (!m->dca)
+ return;
+ n = kzalloc(sizeof(*n), GFP_KERNEL);
+ if (n) {
+ int ret;
+
+ m->notifier = n;
+ n->notify.irq = m->msix.vector;
+ n->notify.notify = qib_irq_notifier_notify;
+ n->notify.release = qib_irq_notifier_release;
+ n->arg = m->arg;
+ n->rcv = m->rcv;
+ qib_devinfo(dd->pcidev,
+ "set notifier irq %d rcv %d notify %p\n",
+ n->notify.irq, n->rcv, &n->notify);
+ ret = irq_set_affinity_notifier(
+ n->notify.irq,
+ &n->notify);
+ if (ret) {
+ m->notifier = NULL;
+ kfree(n);
+ }
+ }
+}
+
+#endif
+
+/*
+ * Set up our chip-specific interrupt handler.
+ * The interrupt type has already been setup, so
+ * we just need to do the registration and error checking.
+ * If we are using MSIx interrupts, we may fall back to
+ * INTx later, if the interrupt handler doesn't get called
+ * within 1/2 second (see verify_interrupt()).
+ */
+static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
+{
+ int ret, i, msixnum;
+ u64 redirect[6];
+ u64 mask;
+ const struct cpumask *local_mask;
+ int firstcpu, secondcpu = 0, currrcvcpu = 0;
+
+ if (!dd->num_pports)
+ return;
+
+ if (clearpend) {
+ /*
+ * if not switching interrupt types, be sure interrupts are
+ * disabled, and then clear anything pending at this point,
+ * because we are starting clean.
+ */
+ qib_7322_set_intr_state(dd, 0);
+
+ /* clear the reset error, init error/hwerror mask */
+ qib_7322_init_hwerrors(dd);
+
+ /* clear any interrupt bits that might be set */
+ qib_write_kreg(dd, kr_intclear, ~0ULL);
+
+ /* make sure no pending MSIx intr, and clear diag reg */
+ qib_write_kreg(dd, kr_intgranted, ~0ULL);
+ qib_write_kreg(dd, kr_vecclr_wo_int, ~0ULL);
+ }
+
+ if (!dd->cspec->num_msix_entries) {
+ /* Try to get INTx interrupt */
+try_intx:
+ if (!dd->pcidev->irq) {
+ qib_dev_err(dd,
+ "irq is 0, BIOS error? Interrupts won't work\n");
+ goto bail;
+ }
+ ret = request_irq(dd->pcidev->irq, qib_7322intr,
+ IRQF_SHARED, QIB_DRV_NAME, dd);
+ if (ret) {
+ qib_dev_err(dd,
+ "Couldn't setup INTx interrupt (irq=%d): %d\n",
+ dd->pcidev->irq, ret);
+ goto bail;
+ }
+ dd->cspec->irq = dd->pcidev->irq;
+ dd->cspec->main_int_mask = ~0ULL;
+ goto bail;
+ }
+
+ /* Try to get MSIx interrupts */
+ memset(redirect, 0, sizeof redirect);
+ mask = ~0ULL;
+ msixnum = 0;
+ local_mask = cpumask_of_pcibus(dd->pcidev->bus);
+ firstcpu = cpumask_first(local_mask);
+ if (firstcpu >= nr_cpu_ids ||
+ cpumask_weight(local_mask) == num_online_cpus()) {
+ local_mask = topology_core_cpumask(0);
+ firstcpu = cpumask_first(local_mask);
+ }
+ if (firstcpu < nr_cpu_ids) {
+ secondcpu = cpumask_next(firstcpu, local_mask);
+ if (secondcpu >= nr_cpu_ids)
+ secondcpu = firstcpu;
+ currrcvcpu = secondcpu;
+ }
+ for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) {
+ irq_handler_t handler;
+ void *arg;
+ u64 val;
+ int lsb, reg, sh;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ int dca = 0;
+#endif
+
+ dd->cspec->msix_entries[msixnum].
+ name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
+ = '\0';
+ if (i < ARRAY_SIZE(irq_table)) {
+ if (irq_table[i].port) {
+ /* skip if for a non-configured port */
+ if (irq_table[i].port > dd->num_pports)
+ continue;
+ arg = dd->pport + irq_table[i].port - 1;
+ } else
+ arg = dd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca = irq_table[i].dca;
+#endif
+ lsb = irq_table[i].lsb;
+ handler = irq_table[i].handler;
+ snprintf(dd->cspec->msix_entries[msixnum].name,
+ sizeof(dd->cspec->msix_entries[msixnum].name)
+ - 1,
+ QIB_DRV_NAME "%d%s", dd->unit,
+ irq_table[i].name);
+ } else {
+ unsigned ctxt;
+
+ ctxt = i - ARRAY_SIZE(irq_table);
+ /* per krcvq context receive interrupt */
+ arg = dd->rcd[ctxt];
+ if (!arg)
+ continue;
+ if (qib_krcvq01_no_msi && ctxt < 2)
+ continue;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca = 1;
+#endif
+ lsb = QIB_I_RCVAVAIL_LSB + ctxt;
+ handler = qib_7322pintr;
+ snprintf(dd->cspec->msix_entries[msixnum].name,
+ sizeof(dd->cspec->msix_entries[msixnum].name)
+ - 1,
+ QIB_DRV_NAME "%d (kctx)", dd->unit);
+ }
+ ret = request_irq(
+ dd->cspec->msix_entries[msixnum].msix.vector,
+ handler, 0, dd->cspec->msix_entries[msixnum].name,
+ arg);
+ if (ret) {
+ /*
+ * Shouldn't happen since the enable said we could
+ * have as many as we are trying to setup here.
+ */
+ qib_dev_err(dd,
+ "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n",
+ msixnum,
+ dd->cspec->msix_entries[msixnum].msix.vector,
+ ret);
+ qib_7322_nomsix(dd);
+ goto try_intx;
+ }
+ dd->cspec->msix_entries[msixnum].arg = arg;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->cspec->msix_entries[msixnum].dca = dca;
+ dd->cspec->msix_entries[msixnum].rcv =
+ handler == qib_7322pintr;
+#endif
+ if (lsb >= 0) {
+ reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
+ sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
+ SYM_LSB(IntRedirect0, vec1);
+ mask &= ~(1ULL << lsb);
+ redirect[reg] |= ((u64) msixnum) << sh;
+ }
+ val = qib_read_kreg64(dd, 2 * msixnum + 1 +
+ (QIB_7322_MsixTable_OFFS / sizeof(u64)));
+ if (firstcpu < nr_cpu_ids &&
+ zalloc_cpumask_var(
+ &dd->cspec->msix_entries[msixnum].mask,
+ GFP_KERNEL)) {
+ if (handler == qib_7322pintr) {
+ cpumask_set_cpu(currrcvcpu,
+ dd->cspec->msix_entries[msixnum].mask);
+ currrcvcpu = cpumask_next(currrcvcpu,
+ local_mask);
+ if (currrcvcpu >= nr_cpu_ids)
+ currrcvcpu = secondcpu;
+ } else {
+ cpumask_set_cpu(firstcpu,
+ dd->cspec->msix_entries[msixnum].mask);
+ }
+ irq_set_affinity_hint(
+ dd->cspec->msix_entries[msixnum].msix.vector,
+ dd->cspec->msix_entries[msixnum].mask);
+ }
+ msixnum++;
+ }
+ /* Initialize the vector mapping */
+ for (i = 0; i < ARRAY_SIZE(redirect); i++)
+ qib_write_kreg(dd, kr_intredirect + i, redirect[i]);
+ dd->cspec->main_int_mask = mask;
+ tasklet_init(&dd->error_tasklet, qib_error_tasklet,
+ (unsigned long)dd);
+bail:;
+}
+
+/**
+ * qib_7322_boardname - fill in the board name and note features
+ * @dd: the qlogic_ib device
+ *
+ * info will be based on the board revision register
+ */
+static unsigned qib_7322_boardname(struct qib_devdata *dd)
+{
+ /* Will need enumeration of board-types here */
+ char *n;
+ u32 boardid, namelen;
+ unsigned features = DUAL_PORT_CAP;
+
+ boardid = SYM_FIELD(dd->revision, Revision, BoardID);
+
+ switch (boardid) {
+ case 0:
+ n = "InfiniPath_QLE7342_Emulation";
+ break;
+ case 1:
+ n = "InfiniPath_QLE7340";
+ dd->flags |= QIB_HAS_QSFP;
+ features = PORT_SPD_CAP;
+ break;
+ case 2:
+ n = "InfiniPath_QLE7342";
+ dd->flags |= QIB_HAS_QSFP;
+ break;
+ case 3:
+ n = "InfiniPath_QMI7342";
+ break;
+ case 4:
+ n = "InfiniPath_Unsupported7342";
+ qib_dev_err(dd, "Unsupported version of QMH7342\n");
+ features = 0;
+ break;
+ case BOARD_QMH7342:
+ n = "InfiniPath_QMH7342";
+ features = 0x24;
+ break;
+ case BOARD_QME7342:
+ n = "InfiniPath_QME7342";
+ break;
+ case 8:
+ n = "InfiniPath_QME7362";
+ dd->flags |= QIB_HAS_QSFP;
+ break;
+ case 15:
+ n = "InfiniPath_QLE7342_TEST";
+ dd->flags |= QIB_HAS_QSFP;
+ break;
+ default:
+ n = "InfiniPath_QLE73xy_UNKNOWN";
+ qib_dev_err(dd, "Unknown 7322 board type %u\n", boardid);
+ break;
+ }
+ dd->board_atten = 1; /* index into txdds_Xdr */
+
+ namelen = strlen(n) + 1;
+ dd->boardname = kmalloc(namelen, GFP_KERNEL);
+ if (!dd->boardname)
+ qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
+ else
+ snprintf(dd->boardname, namelen, "%s", n);
+
+ snprintf(dd->boardversion, sizeof(dd->boardversion),
+ "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
+ QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname,
+ (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch),
+ dd->majrev, dd->minrev,
+ (unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
+
+ if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
+ qib_devinfo(dd->pcidev,
+ "IB%u: Forced to single port mode by module parameter\n",
+ dd->unit);
+ features &= PORT_SPD_CAP;
+ }
+
+ return features;
+}
+
+/*
+ * This routine sleeps, so it can only be called from user context, not
+ * from interrupt context.
+ */
+static int qib_do_7322_reset(struct qib_devdata *dd)
+{
+ u64 val;
+ u64 *msix_vecsave;
+ int i, msix_entries, ret = 1;
+ u16 cmdval;
+ u8 int_line, clinesz;
+ unsigned long flags;
+
+ /* Use dev_err so it shows up in logs, etc. */
+ qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit);
+
+ qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz);
+
+ msix_entries = dd->cspec->num_msix_entries;
+
+ /* no interrupts till re-initted */
+ qib_7322_set_intr_state(dd, 0);
+
+ if (msix_entries) {
+ qib_7322_nomsix(dd);
+ /* can be up to 512 bytes, too big for stack */
+ msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries *
+ sizeof(u64), GFP_KERNEL);
+ if (!msix_vecsave)
+ qib_dev_err(dd, "No mem to save MSIx data\n");
+ } else
+ msix_vecsave = NULL;
+
+ /*
+ * Core PCI (as of 2.6.18) doesn't save or rewrite the full vector
+ * info that is set up by the BIOS, so we have to save and restore
+ * it ourselves. There is some risk something could change it,
+ * after we save it, but since we have disabled the MSIx, it
+ * shouldn't be touched...
+ */
+ for (i = 0; i < msix_entries; i++) {
+ u64 vecaddr, vecdata;
+ vecaddr = qib_read_kreg64(dd, 2 * i +
+ (QIB_7322_MsixTable_OFFS / sizeof(u64)));
+ vecdata = qib_read_kreg64(dd, 1 + 2 * i +
+ (QIB_7322_MsixTable_OFFS / sizeof(u64)));
+ if (msix_vecsave) {
+ msix_vecsave[2 * i] = vecaddr;
+ /* save it without the masked bit set */
+ msix_vecsave[1 + 2 * i] = vecdata & ~0x100000000ULL;
+ }
+ }
+
+ dd->pport->cpspec->ibdeltainprog = 0;
+ dd->pport->cpspec->ibsymdelta = 0;
+ dd->pport->cpspec->iblnkerrdelta = 0;
+ dd->pport->cpspec->ibmalfdelta = 0;
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
+
+ /*
+ * Keep chip from being accessed until we are ready. Use
+ * writeq() directly, to allow the write even though QIB_PRESENT
+ * isn't set.
+ */
+ dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR);
+ dd->flags |= QIB_DOING_RESET;
+ val = dd->control | QLOGIC_IB_C_RESET;
+ writeq(val, &dd->kregbase[kr_control]);
+
+ for (i = 1; i <= 5; i++) {
+ /*
+ * Allow MBIST, etc. to complete; longer on each retry.
+ * We sometimes get machine checks from bus timeout if no
+ * response, so for now, make it *really* long.
+ */
+ msleep(1000 + (1 + i) * 3000);
+
+ qib_pcie_reenable(dd, cmdval, int_line, clinesz);
+
+ /*
+ * Use readq directly, so we don't need to mark it as PRESENT
+ * until we get a successful indication that all is well.
+ */
+ val = readq(&dd->kregbase[kr_revision]);
+ if (val == dd->revision)
+ break;
+ if (i == 5) {
+ qib_dev_err(dd,
+ "Failed to initialize after reset, unusable\n");
+ ret = 0;
+ goto bail;
+ }
+ }
+
+ dd->flags |= QIB_PRESENT; /* it's back */
+
+ if (msix_entries) {
+ /* restore the MSIx vector address and data if saved above */
+ for (i = 0; i < msix_entries; i++) {
+ dd->cspec->msix_entries[i].msix.entry = i;
+ if (!msix_vecsave || !msix_vecsave[2 * i])
+ continue;
+ qib_write_kreg(dd, 2 * i +
+ (QIB_7322_MsixTable_OFFS / sizeof(u64)),
+ msix_vecsave[2 * i]);
+ qib_write_kreg(dd, 1 + 2 * i +
+ (QIB_7322_MsixTable_OFFS / sizeof(u64)),
+ msix_vecsave[1 + 2 * i]);
+ }
+ }
+
+ /* initialize the remaining registers. */
+ for (i = 0; i < dd->num_pports; ++i)
+ write_7322_init_portregs(&dd->pport[i]);
+ write_7322_initregs(dd);
+
+ if (qib_pcie_params(dd, dd->lbus_width,
+ &dd->cspec->num_msix_entries,
+ dd->cspec->msix_entries))
+ qib_dev_err(dd,
+ "Reset failed to setup PCIe or interrupts; continuing anyway\n");
+
+ qib_setup_7322_interrupt(dd, 1);
+
+ for (i = 0; i < dd->num_pports; ++i) {
+ struct qib_pportdata *ppd = &dd->pport[i];
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_FORCE_NOTIFY;
+ ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
+
+bail:
+ dd->flags &= ~QIB_DOING_RESET; /* OK or not, no longer resetting */
+ kfree(msix_vecsave);
+ return ret;
+}
+
+/**
+ * qib_7322_put_tid - write a TID to the chip
+ * @dd: the qlogic_ib device
+ * @tidptr: pointer to the expected TID (in chip) to update
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; tidinvalid if freeing
+ */
+static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
+ u32 type, unsigned long pa)
+{
+ if (!(dd->flags & QIB_PRESENT))
+ return;
+ if (pa != dd->tidinvalid) {
+ u64 chippa = pa >> IBA7322_TID_PA_SHIFT;
+
+ /* paranoia checks */
+ if (pa != (chippa << IBA7322_TID_PA_SHIFT)) {
+ qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n",
+ pa);
+ return;
+ }
+ if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) {
+ qib_dev_err(dd,
+ "Physical page address 0x%lx larger than supported\n",
+ pa);
+ return;
+ }
+
+ if (type == RCVHQ_RCV_TYPE_EAGER)
+ chippa |= dd->tidtemplate;
+ else /* for now, always full 4KB page */
+ chippa |= IBA7322_TID_SZ_4K;
+ pa = chippa;
+ }
+ writeq(pa, tidptr);
+ mmiowb();
+}
+
+/**
+ * qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager
+ * @dd: the qlogic_ib device
+ * @ctxt: the ctxt
+ *
+ * clear all TID entries for a ctxt, expected and eager.
+ * Used from qib_close().
+ */
+static void qib_7322_clear_tids(struct qib_devdata *dd,
+ struct qib_ctxtdata *rcd)
+{
+ u64 __iomem *tidbase;
+ unsigned long tidinv;
+ u32 ctxt;
+ int i;
+
+ if (!dd->kregbase || !rcd)
+ return;
+
+ ctxt = rcd->ctxt;
+
+ tidinv = dd->tidinvalid;
+ tidbase = (u64 __iomem *)
+ ((char __iomem *) dd->kregbase +
+ dd->rcvtidbase +
+ ctxt * dd->rcvtidcnt * sizeof(*tidbase));
+
+ for (i = 0; i < dd->rcvtidcnt; i++)
+ qib_7322_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+ tidinv);
+
+ tidbase = (u64 __iomem *)
+ ((char __iomem *) dd->kregbase +
+ dd->rcvegrbase +
+ rcd->rcvegr_tid_base * sizeof(*tidbase));
+
+ for (i = 0; i < rcd->rcvegrcnt; i++)
+ qib_7322_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+ tidinv);
+}
+
+/**
+ * qib_7322_tidtemplate - setup constants for TID updates
+ * @dd: the qlogic_ib device
+ *
+ * We setup stuff that we use a lot, to avoid calculating each time
+ */
+static void qib_7322_tidtemplate(struct qib_devdata *dd)
+{
+ /*
+ * For now, we always allocate 4KB buffers (at init) so we can
+ * receive max size packets. We may want a module parameter to
+ * specify 2KB or 4KB and/or make it per port instead of per device
+ * for those who want to reduce memory footprint. Note that the
+ * rcvhdrentsize size must be large enough to hold the largest
+ * IB header (currently 96 bytes) that we expect to handle (plus of
+ * course the 2 dwords of RHF).
+ */
+ if (dd->rcvegrbufsize == 2048)
+ dd->tidtemplate = IBA7322_TID_SZ_2K;
+ else if (dd->rcvegrbufsize == 4096)
+ dd->tidtemplate = IBA7322_TID_SZ_4K;
+ dd->tidinvalid = 0;
+}
+
+/**
+ * qib_init_7322_get_base_info - set chip-specific flags for user code
+ * @rcd: the qlogic_ib ctxt
+ * @kbase: qib_base_info pointer
+ *
+ * We set the PCIE flag because the lower bandwidth on PCIe vs
+ * HyperTransport can affect some user packet algorithims.
+ */
+
+static int qib_7322_get_base_info(struct qib_ctxtdata *rcd,
+ struct qib_base_info *kinfo)
+{
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_CTXT_MSB_IN_QP |
+ QIB_RUNTIME_PCIE | QIB_RUNTIME_NODMA_RTAIL |
+ QIB_RUNTIME_HDRSUPP | QIB_RUNTIME_SDMA;
+ if (rcd->dd->cspec->r1)
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_RCHK;
+ if (rcd->dd->flags & QIB_USE_SPCL_TRIG)
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_SPECIAL_TRIGGER;
+
+ return 0;
+}
+
+static struct qib_message_header *
+qib_7322_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr)
+{
+ u32 offset = qib_hdrget_offset(rhf_addr);
+
+ return (struct qib_message_header *)
+ (rhf_addr - dd->rhf_offset + offset);
+}
+
+/*
+ * Configure number of contexts.
+ */
+static void qib_7322_config_ctxts(struct qib_devdata *dd)
+{
+ unsigned long flags;
+ u32 nchipctxts;
+
+ nchipctxts = qib_read_kreg32(dd, kr_contextcnt);
+ dd->cspec->numctxts = nchipctxts;
+ if (qib_n_krcv_queues > 1 && dd->num_pports) {
+ dd->first_user_ctxt = NUM_IB_PORTS +
+ (qib_n_krcv_queues - 1) * dd->num_pports;
+ if (dd->first_user_ctxt > nchipctxts)
+ dd->first_user_ctxt = nchipctxts;
+ dd->n_krcv_queues = dd->first_user_ctxt / dd->num_pports;
+ } else {
+ dd->first_user_ctxt = NUM_IB_PORTS;
+ dd->n_krcv_queues = 1;
+ }
+
+ if (!qib_cfgctxts) {
+ int nctxts = dd->first_user_ctxt + num_online_cpus();
+
+ if (nctxts <= 6)
+ dd->ctxtcnt = 6;
+ else if (nctxts <= 10)
+ dd->ctxtcnt = 10;
+ else if (nctxts <= nchipctxts)
+ dd->ctxtcnt = nchipctxts;
+ } else if (qib_cfgctxts < dd->num_pports)
+ dd->ctxtcnt = dd->num_pports;
+ else if (qib_cfgctxts <= nchipctxts)
+ dd->ctxtcnt = qib_cfgctxts;
+ if (!dd->ctxtcnt) /* none of the above, set to max */
+ dd->ctxtcnt = nchipctxts;
+
+ /*
+ * Chip can be configured for 6, 10, or 18 ctxts, and choice
+ * affects number of eager TIDs per ctxt (1K, 2K, 4K).
+ * Lock to be paranoid about later motion, etc.
+ */
+ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
+ if (dd->ctxtcnt > 10)
+ dd->rcvctrl |= 2ULL << SYM_LSB(RcvCtrl, ContextCfg);
+ else if (dd->ctxtcnt > 6)
+ dd->rcvctrl |= 1ULL << SYM_LSB(RcvCtrl, ContextCfg);
+ /* else configure for default 6 receive ctxts */
+
+ /* The XRC opcode is 5. */
+ dd->rcvctrl |= 5ULL << SYM_LSB(RcvCtrl, XrcTypeCode);
+
+ /*
+ * RcvCtrl *must* be written here so that the
+ * chip understands how to change rcvegrcnt below.
+ */
+ qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl);
+ spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
+
+ /* kr_rcvegrcnt changes based on the number of contexts enabled */
+ dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt);
+ if (qib_rcvhdrcnt)
+ dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt);
+ else
+ dd->rcvhdrcnt = 2 * max(dd->cspec->rcvegrcnt,
+ dd->num_pports > 1 ? 1024U : 2048U);
+}
+
+static int qib_7322_get_ib_cfg(struct qib_pportdata *ppd, int which)
+{
+
+ int lsb, ret = 0;
+ u64 maskr; /* right-justified mask */
+
+ switch (which) {
+
+ case QIB_IB_CFG_LWID_ENB: /* Get allowed Link-width */
+ ret = ppd->link_width_enabled;
+ goto done;
+
+ case QIB_IB_CFG_LWID: /* Get currently active Link-width */
+ ret = ppd->link_width_active;
+ goto done;
+
+ case QIB_IB_CFG_SPD_ENB: /* Get allowed Link speeds */
+ ret = ppd->link_speed_enabled;
+ goto done;
+
+ case QIB_IB_CFG_SPD: /* Get current Link spd */
+ ret = ppd->link_speed_active;
+ goto done;
+
+ case QIB_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */
+ lsb = SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP);
+ maskr = SYM_RMASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP);
+ break;
+
+ case QIB_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */
+ lsb = SYM_LSB(IBCCtrlB_0, IB_LANE_REV_SUPPORTED);
+ maskr = SYM_RMASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED);
+ break;
+
+ case QIB_IB_CFG_LINKLATENCY:
+ ret = qib_read_kreg_port(ppd, krp_ibcstatus_b) &
+ SYM_MASK(IBCStatusB_0, LinkRoundTripLatency);
+ goto done;
+
+ case QIB_IB_CFG_OP_VLS:
+ ret = ppd->vls_operational;
+ goto done;
+
+ case QIB_IB_CFG_VL_HIGH_CAP:
+ ret = 16;
+ goto done;
+
+ case QIB_IB_CFG_VL_LOW_CAP:
+ ret = 16;
+ goto done;
+
+ case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
+ ret = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0,
+ OverrunThreshold);
+ goto done;
+
+ case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
+ ret = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0,
+ PhyerrThreshold);
+ goto done;
+
+ case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
+ /* will only take effect when the link state changes */
+ ret = (ppd->cpspec->ibcctrl_a &
+ SYM_MASK(IBCCtrlA_0, LinkDownDefaultState)) ?
+ IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL;
+ goto done;
+
+ case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */
+ lsb = IBA7322_IBC_HRTBT_LSB;
+ maskr = IBA7322_IBC_HRTBT_RMASK; /* OR of AUTO and ENB */
+ break;
+
+ case QIB_IB_CFG_PMA_TICKS:
+ /*
+ * 0x00 = 10x link transfer rate or 4 nsec. for 2.5Gbs
+ * Since the clock is always 250MHz, the value is 3, 1 or 0.
+ */
+ if (ppd->link_speed_active == QIB_IB_QDR)
+ ret = 3;
+ else if (ppd->link_speed_active == QIB_IB_DDR)
+ ret = 1;
+ else
+ ret = 0;
+ goto done;
+
+ default:
+ ret = -EINVAL;
+ goto done;
+ }
+ ret = (int)((ppd->cpspec->ibcctrl_b >> lsb) & maskr);
+done:
+ return ret;
+}
+
+/*
+ * Below again cribbed liberally from older version. Do not lean
+ * heavily on it.
+ */
+#define IBA7322_IBC_DLIDLMC_SHIFT QIB_7322_IBCCtrlB_0_IB_DLID_LSB
+#define IBA7322_IBC_DLIDLMC_MASK (QIB_7322_IBCCtrlB_0_IB_DLID_RMASK \
+ | (QIB_7322_IBCCtrlB_0_IB_DLID_MASK_RMASK << 16))
+
+static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 maskr; /* right-justified mask */
+ int lsb, ret = 0;
+ u16 lcmd, licmd;
+ unsigned long flags;
+
+ switch (which) {
+ case QIB_IB_CFG_LIDLMC:
+ /*
+ * Set LID and LMC. Combined to avoid possible hazard
+ * caller puts LMC in 16MSbits, DLID in 16LSbits of val
+ */
+ lsb = IBA7322_IBC_DLIDLMC_SHIFT;
+ maskr = IBA7322_IBC_DLIDLMC_MASK;
+ /*
+ * For header-checking, the SLID in the packet will
+ * be masked with SendIBSLMCMask, and compared
+ * with SendIBSLIDAssignMask. Make sure we do not
+ * set any bits not covered by the mask, or we get
+ * false-positives.
+ */
+ qib_write_kreg_port(ppd, krp_sendslid,
+ val & (val >> 16) & SendIBSLIDAssignMask);
+ qib_write_kreg_port(ppd, krp_sendslidmask,
+ (val >> 16) & SendIBSLMCMask);
+ break;
+
+ case QIB_IB_CFG_LWID_ENB: /* set allowed Link-width */
+ ppd->link_width_enabled = val;
+ /* convert IB value to chip register value */
+ if (val == IB_WIDTH_1X)
+ val = 0;
+ else if (val == IB_WIDTH_4X)
+ val = 1;
+ else
+ val = 3;
+ maskr = SYM_RMASK(IBCCtrlB_0, IB_NUM_CHANNELS);
+ lsb = SYM_LSB(IBCCtrlB_0, IB_NUM_CHANNELS);
+ break;
+
+ case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */
+ /*
+ * As with width, only write the actual register if the
+ * link is currently down, otherwise takes effect on next
+ * link change. Since setting is being explicitly requested
+ * (via MAD or sysfs), clear autoneg failure status if speed
+ * autoneg is enabled.
+ */
+ ppd->link_speed_enabled = val;
+ val <<= IBA7322_IBC_SPEED_LSB;
+ maskr = IBA7322_IBC_SPEED_MASK | IBA7322_IBC_IBTA_1_2_MASK |
+ IBA7322_IBC_MAX_SPEED_MASK;
+ if (val & (val - 1)) {
+ /* Muliple speeds enabled */
+ val |= IBA7322_IBC_IBTA_1_2_MASK |
+ IBA7322_IBC_MAX_SPEED_MASK;
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ } else if (val & IBA7322_IBC_SPEED_QDR)
+ val |= IBA7322_IBC_IBTA_1_2_MASK;
+ /* IBTA 1.2 mode + min/max + speed bits are contiguous */
+ lsb = SYM_LSB(IBCCtrlB_0, IB_ENHANCED_MODE);
+ break;
+
+ case QIB_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */
+ lsb = SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP);
+ maskr = SYM_RMASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP);
+ break;
+
+ case QIB_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */
+ lsb = SYM_LSB(IBCCtrlB_0, IB_LANE_REV_SUPPORTED);
+ maskr = SYM_RMASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED);
+ break;
+
+ case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
+ maskr = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0,
+ OverrunThreshold);
+ if (maskr != val) {
+ ppd->cpspec->ibcctrl_a &=
+ ~SYM_MASK(IBCCtrlA_0, OverrunThreshold);
+ ppd->cpspec->ibcctrl_a |= (u64) val <<
+ SYM_LSB(IBCCtrlA_0, OverrunThreshold);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a,
+ ppd->cpspec->ibcctrl_a);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ }
+ goto bail;
+
+ case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
+ maskr = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0,
+ PhyerrThreshold);
+ if (maskr != val) {
+ ppd->cpspec->ibcctrl_a &=
+ ~SYM_MASK(IBCCtrlA_0, PhyerrThreshold);
+ ppd->cpspec->ibcctrl_a |= (u64) val <<
+ SYM_LSB(IBCCtrlA_0, PhyerrThreshold);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a,
+ ppd->cpspec->ibcctrl_a);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ }
+ goto bail;
+
+ case QIB_IB_CFG_PKEYS: /* update pkeys */
+ maskr = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) |
+ ((u64) ppd->pkeys[2] << 32) |
+ ((u64) ppd->pkeys[3] << 48);
+ qib_write_kreg_port(ppd, krp_partitionkey, maskr);
+ goto bail;
+
+ case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
+ /* will only take effect when the link state changes */
+ if (val == IB_LINKINITCMD_POLL)
+ ppd->cpspec->ibcctrl_a &=
+ ~SYM_MASK(IBCCtrlA_0, LinkDownDefaultState);
+ else /* SLEEP */
+ ppd->cpspec->ibcctrl_a |=
+ SYM_MASK(IBCCtrlA_0, LinkDownDefaultState);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ goto bail;
+
+ case QIB_IB_CFG_MTU: /* update the MTU in IBC */
+ /*
+ * Update our housekeeping variables, and set IBC max
+ * size, same as init code; max IBC is max we allow in
+ * buffer, less the qword pbc, plus 1 for ICRC, in dwords
+ * Set even if it's unchanged, print debug message only
+ * on changes.
+ */
+ val = (ppd->ibmaxlen >> 2) + 1;
+ ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, MaxPktLen);
+ ppd->cpspec->ibcctrl_a |= (u64)val <<
+ SYM_LSB(IBCCtrlA_0, MaxPktLen);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a,
+ ppd->cpspec->ibcctrl_a);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ goto bail;
+
+ case QIB_IB_CFG_LSTATE: /* set the IB link state */
+ switch (val & 0xffff0000) {
+ case IB_LINKCMD_DOWN:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN;
+ ppd->cpspec->ibmalfusesnap = 1;
+ ppd->cpspec->ibmalfsnap = read_7322_creg32_port(ppd,
+ crp_errlink);
+ if (!ppd->cpspec->ibdeltainprog &&
+ qib_compat_ddr_negotiate) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymsnap =
+ read_7322_creg32_port(ppd,
+ crp_ibsymbolerr);
+ ppd->cpspec->iblnkerrsnap =
+ read_7322_creg32_port(ppd,
+ crp_iblinkerrrecov);
+ }
+ break;
+
+ case IB_LINKCMD_ARMED:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED;
+ if (ppd->cpspec->ibmalfusesnap) {
+ ppd->cpspec->ibmalfusesnap = 0;
+ ppd->cpspec->ibmalfdelta +=
+ read_7322_creg32_port(ppd,
+ crp_errlink) -
+ ppd->cpspec->ibmalfsnap;
+ }
+ break;
+
+ case IB_LINKCMD_ACTIVE:
+ lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE;
+ break;
+
+ default:
+ ret = -EINVAL;
+ qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16);
+ goto bail;
+ }
+ switch (val & 0xffff) {
+ case IB_LINKINITCMD_NOP:
+ licmd = 0;
+ break;
+
+ case IB_LINKINITCMD_POLL:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL;
+ break;
+
+ case IB_LINKINITCMD_SLEEP:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP;
+ break;
+
+ case IB_LINKINITCMD_DISABLE:
+ licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE;
+ ppd->cpspec->chase_end = 0;
+ /*
+ * stop state chase counter and timer, if running.
+ * wait forpending timer, but don't clear .data (ppd)!
+ */
+ if (ppd->cpspec->chase_timer.expires) {
+ del_timer_sync(&ppd->cpspec->chase_timer);
+ ppd->cpspec->chase_timer.expires = 0;
+ }
+ break;
+
+ default:
+ ret = -EINVAL;
+ qib_dev_err(dd, "bad linkinitcmd req 0x%x\n",
+ val & 0xffff);
+ goto bail;
+ }
+ qib_set_ib_7322_lstate(ppd, lcmd, licmd);
+ goto bail;
+
+ case QIB_IB_CFG_OP_VLS:
+ if (ppd->vls_operational != val) {
+ ppd->vls_operational = val;
+ set_vls(ppd);
+ }
+ goto bail;
+
+ case QIB_IB_CFG_VL_HIGH_LIMIT:
+ qib_write_kreg_port(ppd, krp_highprio_limit, val);
+ goto bail;
+
+ case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */
+ if (val > 3) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ lsb = IBA7322_IBC_HRTBT_LSB;
+ maskr = IBA7322_IBC_HRTBT_RMASK; /* OR of AUTO and ENB */
+ break;
+
+ case QIB_IB_CFG_PORT:
+ /* val is the port number of the switch we are connected to. */
+ if (ppd->dd->cspec->r1) {
+ cancel_delayed_work(&ppd->cpspec->ipg_work);
+ ppd->cpspec->ipg_tries = 0;
+ }
+ goto bail;
+
+ default:
+ ret = -EINVAL;
+ goto bail;
+ }
+ ppd->cpspec->ibcctrl_b &= ~(maskr << lsb);
+ ppd->cpspec->ibcctrl_b |= (((u64) val & maskr) << lsb);
+ qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b);
+ qib_write_kreg(dd, kr_scratch, 0);
+bail:
+ return ret;
+}
+
+static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what)
+{
+ int ret = 0;
+ u64 val, ctrlb;
+
+ /* only IBC loopback, may add serdes and xgxs loopbacks later */
+ if (!strncmp(what, "ibc", 3)) {
+ ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0,
+ Loopback);
+ val = 0; /* disable heart beat, so link will come up */
+ qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n",
+ ppd->dd->unit, ppd->port);
+ } else if (!strncmp(what, "off", 3)) {
+ ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0,
+ Loopback);
+ /* enable heart beat again */
+ val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB;
+ qib_devinfo(ppd->dd->pcidev,
+ "Disabling IB%u:%u IBC loopback (normal)\n",
+ ppd->dd->unit, ppd->port);
+ } else
+ ret = -EINVAL;
+ if (!ret) {
+ qib_write_kreg_port(ppd, krp_ibcctrl_a,
+ ppd->cpspec->ibcctrl_a);
+ ctrlb = ppd->cpspec->ibcctrl_b & ~(IBA7322_IBC_HRTBT_MASK
+ << IBA7322_IBC_HRTBT_LSB);
+ ppd->cpspec->ibcctrl_b = ctrlb | val;
+ qib_write_kreg_port(ppd, krp_ibcctrl_b,
+ ppd->cpspec->ibcctrl_b);
+ qib_write_kreg(ppd->dd, kr_scratch, 0);
+ }
+ return ret;
+}
+
+static void get_vl_weights(struct qib_pportdata *ppd, unsigned regno,
+ struct ib_vl_weight_elem *vl)
+{
+ unsigned i;
+
+ for (i = 0; i < 16; i++, regno++, vl++) {
+ u32 val = qib_read_kreg_port(ppd, regno);
+
+ vl->vl = (val >> SYM_LSB(LowPriority0_0, VirtualLane)) &
+ SYM_RMASK(LowPriority0_0, VirtualLane);
+ vl->weight = (val >> SYM_LSB(LowPriority0_0, Weight)) &
+ SYM_RMASK(LowPriority0_0, Weight);
+ }
+}
+
+static void set_vl_weights(struct qib_pportdata *ppd, unsigned regno,
+ struct ib_vl_weight_elem *vl)
+{
+ unsigned i;
+
+ for (i = 0; i < 16; i++, regno++, vl++) {
+ u64 val;
+
+ val = ((vl->vl & SYM_RMASK(LowPriority0_0, VirtualLane)) <<
+ SYM_LSB(LowPriority0_0, VirtualLane)) |
+ ((vl->weight & SYM_RMASK(LowPriority0_0, Weight)) <<
+ SYM_LSB(LowPriority0_0, Weight));
+ qib_write_kreg_port(ppd, regno, val);
+ }
+ if (!(ppd->p_sendctrl & SYM_MASK(SendCtrl_0, IBVLArbiterEn))) {
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, IBVLArbiterEn);
+ qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ }
+}
+
+static int qib_7322_get_ib_table(struct qib_pportdata *ppd, int which, void *t)
+{
+ switch (which) {
+ case QIB_IB_TBL_VL_HIGH_ARB:
+ get_vl_weights(ppd, krp_highprio_0, t);
+ break;
+
+ case QIB_IB_TBL_VL_LOW_ARB:
+ get_vl_weights(ppd, krp_lowprio_0, t);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int qib_7322_set_ib_table(struct qib_pportdata *ppd, int which, void *t)
+{
+ switch (which) {
+ case QIB_IB_TBL_VL_HIGH_ARB:
+ set_vl_weights(ppd, krp_highprio_0, t);
+ break;
+
+ case QIB_IB_TBL_VL_LOW_ARB:
+ set_vl_weights(ppd, krp_lowprio_0, t);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd,
+ u32 updegr, u32 egrhd, u32 npkts)
+{
+ /*
+ * Need to write timeout register before updating rcvhdrhead to ensure
+ * that the timer is enabled on reception of a packet.
+ */
+ if (hd >> IBA7322_HDRHEAD_PKTINT_SHIFT)
+ adjust_rcv_timeout(rcd, npkts);
+ if (updegr)
+ qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt);
+ mmiowb();
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt);
+ mmiowb();
+}
+
+static u32 qib_7322_hdrqempty(struct qib_ctxtdata *rcd)
+{
+ u32 head, tail;
+
+ head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt);
+ if (rcd->rcvhdrtail_kvaddr)
+ tail = qib_get_rcvhdrtail(rcd);
+ else
+ tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt);
+ return head == tail;
+}
+
+#define RCVCTRL_COMMON_MODS (QIB_RCVCTRL_CTXT_ENB | \
+ QIB_RCVCTRL_CTXT_DIS | \
+ QIB_RCVCTRL_TIDFLOW_ENB | \
+ QIB_RCVCTRL_TIDFLOW_DIS | \
+ QIB_RCVCTRL_TAILUPD_ENB | \
+ QIB_RCVCTRL_TAILUPD_DIS | \
+ QIB_RCVCTRL_INTRAVAIL_ENB | \
+ QIB_RCVCTRL_INTRAVAIL_DIS | \
+ QIB_RCVCTRL_BP_ENB | \
+ QIB_RCVCTRL_BP_DIS)
+
+#define RCVCTRL_PORT_MODS (QIB_RCVCTRL_CTXT_ENB | \
+ QIB_RCVCTRL_CTXT_DIS | \
+ QIB_RCVCTRL_PKEY_DIS | \
+ QIB_RCVCTRL_PKEY_ENB)
+
+/*
+ * Modify the RCVCTRL register in chip-specific way. This
+ * is a function because bit positions and (future) register
+ * location is chip-specifc, but the needed operations are
+ * generic. <op> is a bit-mask because we often want to
+ * do multiple modifications.
+ */
+static void rcvctrl_7322_mod(struct qib_pportdata *ppd, unsigned int op,
+ int ctxt)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_ctxtdata *rcd;
+ u64 mask, val;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
+
+ if (op & QIB_RCVCTRL_TIDFLOW_ENB)
+ dd->rcvctrl |= SYM_MASK(RcvCtrl, TidFlowEnable);
+ if (op & QIB_RCVCTRL_TIDFLOW_DIS)
+ dd->rcvctrl &= ~SYM_MASK(RcvCtrl, TidFlowEnable);
+ if (op & QIB_RCVCTRL_TAILUPD_ENB)
+ dd->rcvctrl |= SYM_MASK(RcvCtrl, TailUpd);
+ if (op & QIB_RCVCTRL_TAILUPD_DIS)
+ dd->rcvctrl &= ~SYM_MASK(RcvCtrl, TailUpd);
+ if (op & QIB_RCVCTRL_PKEY_ENB)
+ ppd->p_rcvctrl &= ~SYM_MASK(RcvCtrl_0, RcvPartitionKeyDisable);
+ if (op & QIB_RCVCTRL_PKEY_DIS)
+ ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvPartitionKeyDisable);
+ if (ctxt < 0) {
+ mask = (1ULL << dd->ctxtcnt) - 1;
+ rcd = NULL;
+ } else {
+ mask = (1ULL << ctxt);
+ rcd = dd->rcd[ctxt];
+ }
+ if ((op & QIB_RCVCTRL_CTXT_ENB) && rcd) {
+ ppd->p_rcvctrl |=
+ (mask << SYM_LSB(RcvCtrl_0, ContextEnableKernel));
+ if (!(dd->flags & QIB_NODMA_RTAIL)) {
+ op |= QIB_RCVCTRL_TAILUPD_ENB; /* need reg write */
+ dd->rcvctrl |= SYM_MASK(RcvCtrl, TailUpd);
+ }
+ /* Write these registers before the context is enabled. */
+ qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, ctxt,
+ rcd->rcvhdrqtailaddr_phys);
+ qib_write_kreg_ctxt(dd, krc_rcvhdraddr, ctxt,
+ rcd->rcvhdrq_phys);
+ rcd->seq_cnt = 1;
+ }
+ if (op & QIB_RCVCTRL_CTXT_DIS)
+ ppd->p_rcvctrl &=
+ ~(mask << SYM_LSB(RcvCtrl_0, ContextEnableKernel));
+ if (op & QIB_RCVCTRL_BP_ENB)
+ dd->rcvctrl |= mask << SYM_LSB(RcvCtrl, dontDropRHQFull);
+ if (op & QIB_RCVCTRL_BP_DIS)
+ dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, dontDropRHQFull));
+ if (op & QIB_RCVCTRL_INTRAVAIL_ENB)
+ dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, IntrAvail));
+ if (op & QIB_RCVCTRL_INTRAVAIL_DIS)
+ dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, IntrAvail));
+ /*
+ * Decide which registers to write depending on the ops enabled.
+ * Special case is "flush" (no bits set at all)
+ * which needs to write both.
+ */
+ if (op == 0 || (op & RCVCTRL_COMMON_MODS))
+ qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl);
+ if (op == 0 || (op & RCVCTRL_PORT_MODS))
+ qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl);
+ if ((op & QIB_RCVCTRL_CTXT_ENB) && dd->rcd[ctxt]) {
+ /*
+ * Init the context registers also; if we were
+ * disabled, tail and head should both be zero
+ * already from the enable, but since we don't
+ * know, we have to do it explicitly.
+ */
+ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt);
+ qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt);
+
+ /* be sure enabling write seen; hd/tl should be 0 */
+ (void) qib_read_kreg32(dd, kr_scratch);
+ val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt);
+ dd->rcd[ctxt]->head = val;
+ /* If kctxt, interrupt on next receive. */
+ if (ctxt < dd->first_user_ctxt)
+ val |= dd->rhdrhead_intr_off;
+ qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt);
+ } else if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) &&
+ dd->rcd[ctxt] && dd->rhdrhead_intr_off) {
+ /* arm rcv interrupt */
+ val = dd->rcd[ctxt]->head | dd->rhdrhead_intr_off;
+ qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt);
+ }
+ if (op & QIB_RCVCTRL_CTXT_DIS) {
+ unsigned f;
+
+ /* Now that the context is disabled, clear these registers. */
+ if (ctxt >= 0) {
+ qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, ctxt, 0);
+ qib_write_kreg_ctxt(dd, krc_rcvhdraddr, ctxt, 0);
+ for (f = 0; f < NUM_TIDFLOWS_CTXT; f++)
+ qib_write_ureg(dd, ur_rcvflowtable + f,
+ TIDFLOW_ERRBITS, ctxt);
+ } else {
+ unsigned i;
+
+ for (i = 0; i < dd->cfgctxts; i++) {
+ qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr,
+ i, 0);
+ qib_write_kreg_ctxt(dd, krc_rcvhdraddr, i, 0);
+ for (f = 0; f < NUM_TIDFLOWS_CTXT; f++)
+ qib_write_ureg(dd, ur_rcvflowtable + f,
+ TIDFLOW_ERRBITS, i);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
+}
+
+/*
+ * Modify the SENDCTRL register in chip-specific way. This
+ * is a function where there are multiple such registers with
+ * slightly different layouts.
+ * The chip doesn't allow back-to-back sendctrl writes, so write
+ * the scratch register after writing sendctrl.
+ *
+ * Which register is written depends on the operation.
+ * Most operate on the common register, while
+ * SEND_ENB and SEND_DIS operate on the per-port ones.
+ * SEND_ENB is included in common because it can change SPCL_TRIG
+ */
+#define SENDCTRL_COMMON_MODS (\
+ QIB_SENDCTRL_CLEAR | \
+ QIB_SENDCTRL_AVAIL_DIS | \
+ QIB_SENDCTRL_AVAIL_ENB | \
+ QIB_SENDCTRL_AVAIL_BLIP | \
+ QIB_SENDCTRL_DISARM | \
+ QIB_SENDCTRL_DISARM_ALL | \
+ QIB_SENDCTRL_SEND_ENB)
+
+#define SENDCTRL_PORT_MODS (\
+ QIB_SENDCTRL_CLEAR | \
+ QIB_SENDCTRL_SEND_ENB | \
+ QIB_SENDCTRL_SEND_DIS | \
+ QIB_SENDCTRL_FLUSH)
+
+static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 tmp_dd_sendctrl;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+
+ /* First the dd ones that are "sticky", saved in shadow */
+ if (op & QIB_SENDCTRL_CLEAR)
+ dd->sendctrl = 0;
+ if (op & QIB_SENDCTRL_AVAIL_DIS)
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd);
+ else if (op & QIB_SENDCTRL_AVAIL_ENB) {
+ dd->sendctrl |= SYM_MASK(SendCtrl, SendBufAvailUpd);
+ if (dd->flags & QIB_USE_SPCL_TRIG)
+ dd->sendctrl |= SYM_MASK(SendCtrl, SpecialTriggerEn);
+ }
+
+ /* Then the ppd ones that are "sticky", saved in shadow */
+ if (op & QIB_SENDCTRL_SEND_DIS)
+ ppd->p_sendctrl &= ~SYM_MASK(SendCtrl_0, SendEnable);
+ else if (op & QIB_SENDCTRL_SEND_ENB)
+ ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, SendEnable);
+
+ if (op & QIB_SENDCTRL_DISARM_ALL) {
+ u32 i, last;
+
+ tmp_dd_sendctrl = dd->sendctrl;
+ last = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS;
+ /*
+ * Disarm any buffers that are not yet launched,
+ * disabling updates until done.
+ */
+ tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd);
+ for (i = 0; i < last; i++) {
+ qib_write_kreg(dd, kr_sendctrl,
+ tmp_dd_sendctrl |
+ SYM_MASK(SendCtrl, Disarm) | i);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+ }
+
+ if (op & QIB_SENDCTRL_FLUSH) {
+ u64 tmp_ppd_sendctrl = ppd->p_sendctrl;
+
+ /*
+ * Now drain all the fifos. The Abort bit should never be
+ * needed, so for now, at least, we don't use it.
+ */
+ tmp_ppd_sendctrl |=
+ SYM_MASK(SendCtrl_0, TxeDrainRmFifo) |
+ SYM_MASK(SendCtrl_0, TxeDrainLaFifo) |
+ SYM_MASK(SendCtrl_0, TxeBypassIbc);
+ qib_write_kreg_port(ppd, krp_sendctrl, tmp_ppd_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ tmp_dd_sendctrl = dd->sendctrl;
+
+ if (op & QIB_SENDCTRL_DISARM)
+ tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) |
+ ((op & QIB_7322_SendCtrl_DisarmSendBuf_RMASK) <<
+ SYM_LSB(SendCtrl, DisarmSendBuf));
+ if ((op & QIB_SENDCTRL_AVAIL_BLIP) &&
+ (dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd)))
+ tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd);
+
+ if (op == 0 || (op & SENDCTRL_COMMON_MODS)) {
+ qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ if (op == 0 || (op & SENDCTRL_PORT_MODS)) {
+ qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ if (op & QIB_SENDCTRL_AVAIL_BLIP) {
+ qib_write_kreg(dd, kr_sendctrl, dd->sendctrl);
+ qib_write_kreg(dd, kr_scratch, 0);
+ }
+
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+
+ if (op & QIB_SENDCTRL_FLUSH) {
+ u32 v;
+ /*
+ * ensure writes have hit chip, then do a few
+ * more reads, to allow DMA of pioavail registers
+ * to occur, so in-memory copy is in sync with
+ * the chip. Not always safe to sleep.
+ */
+ v = qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_scratch, v);
+ v = qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg(dd, kr_scratch, v);
+ qib_read_kreg32(dd, kr_scratch);
+ }
+}
+
+#define _PORT_VIRT_FLAG 0x8000U /* "virtual", need adjustments */
+#define _PORT_64BIT_FLAG 0x10000U /* not "virtual", but 64bit */
+#define _PORT_CNTR_IDXMASK 0x7fffU /* mask off flags above */
+
+/**
+ * qib_portcntr_7322 - read a per-port chip counter
+ * @ppd: the qlogic_ib pport
+ * @creg: the counter to read (not a chip offset)
+ */
+static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 ret = 0ULL;
+ u16 creg;
+ /* 0xffff for unimplemented or synthesized counters */
+ static const u32 xlator[] = {
+ [QIBPORTCNTR_PKTSEND] = crp_pktsend | _PORT_64BIT_FLAG,
+ [QIBPORTCNTR_WORDSEND] = crp_wordsend | _PORT_64BIT_FLAG,
+ [QIBPORTCNTR_PSXMITDATA] = crp_psxmitdatacount,
+ [QIBPORTCNTR_PSXMITPKTS] = crp_psxmitpktscount,
+ [QIBPORTCNTR_PSXMITWAIT] = crp_psxmitwaitcount,
+ [QIBPORTCNTR_SENDSTALL] = crp_sendstall,
+ [QIBPORTCNTR_PKTRCV] = crp_pktrcv | _PORT_64BIT_FLAG,
+ [QIBPORTCNTR_PSRCVDATA] = crp_psrcvdatacount,
+ [QIBPORTCNTR_PSRCVPKTS] = crp_psrcvpktscount,
+ [QIBPORTCNTR_RCVEBP] = crp_rcvebp,
+ [QIBPORTCNTR_RCVOVFL] = crp_rcvovfl,
+ [QIBPORTCNTR_WORDRCV] = crp_wordrcv | _PORT_64BIT_FLAG,
+ [QIBPORTCNTR_RXDROPPKT] = 0xffff, /* not needed for 7322 */
+ [QIBPORTCNTR_RXLOCALPHYERR] = crp_rxotherlocalphyerr,
+ [QIBPORTCNTR_RXVLERR] = crp_rxvlerr,
+ [QIBPORTCNTR_ERRICRC] = crp_erricrc,
+ [QIBPORTCNTR_ERRVCRC] = crp_errvcrc,
+ [QIBPORTCNTR_ERRLPCRC] = crp_errlpcrc,
+ [QIBPORTCNTR_BADFORMAT] = crp_badformat,
+ [QIBPORTCNTR_ERR_RLEN] = crp_err_rlen,
+ [QIBPORTCNTR_IBSYMBOLERR] = crp_ibsymbolerr,
+ [QIBPORTCNTR_INVALIDRLEN] = crp_invalidrlen,
+ [QIBPORTCNTR_UNSUPVL] = crp_txunsupvl,
+ [QIBPORTCNTR_EXCESSBUFOVFL] = crp_excessbufferovfl,
+ [QIBPORTCNTR_ERRLINK] = crp_errlink,
+ [QIBPORTCNTR_IBLINKDOWN] = crp_iblinkdown,
+ [QIBPORTCNTR_IBLINKERRRECOV] = crp_iblinkerrrecov,
+ [QIBPORTCNTR_LLI] = crp_locallinkintegrityerr,
+ [QIBPORTCNTR_VL15PKTDROP] = crp_vl15droppedpkt,
+ [QIBPORTCNTR_ERRPKEY] = crp_errpkey,
+ /*
+ * the next 3 aren't really counters, but were implemented
+ * as counters in older chips, so still get accessed as
+ * though they were counters from this code.
+ */
+ [QIBPORTCNTR_PSINTERVAL] = krp_psinterval,
+ [QIBPORTCNTR_PSSTART] = krp_psstart,
+ [QIBPORTCNTR_PSSTAT] = krp_psstat,
+ /* pseudo-counter, summed for all ports */
+ [QIBPORTCNTR_KHDROVFL] = 0xffff,
+ };
+
+ if (reg >= ARRAY_SIZE(xlator)) {
+ qib_devinfo(ppd->dd->pcidev,
+ "Unimplemented portcounter %u\n", reg);
+ goto done;
+ }
+ creg = xlator[reg] & _PORT_CNTR_IDXMASK;
+
+ /* handle non-counters and special cases first */
+ if (reg == QIBPORTCNTR_KHDROVFL) {
+ int i;
+
+ /* sum over all kernel contexts (skip if mini_init) */
+ for (i = 0; dd->rcd && i < dd->first_user_ctxt; i++) {
+ struct qib_ctxtdata *rcd = dd->rcd[i];
+
+ if (!rcd || rcd->ppd != ppd)
+ continue;
+ ret += read_7322_creg32(dd, cr_base_egrovfl + i);
+ }
+ goto done;
+ } else if (reg == QIBPORTCNTR_RXDROPPKT) {
+ /*
+ * Used as part of the synthesis of port_rcv_errors
+ * in the verbs code for IBTA counters. Not needed for 7322,
+ * because all the errors are already counted by other cntrs.
+ */
+ goto done;
+ } else if (reg == QIBPORTCNTR_PSINTERVAL ||
+ reg == QIBPORTCNTR_PSSTART || reg == QIBPORTCNTR_PSSTAT) {
+ /* were counters in older chips, now per-port kernel regs */
+ ret = qib_read_kreg_port(ppd, creg);
+ goto done;
+ }
+
+ /*
+ * Only fast increment counters are 64 bits; use 32 bit reads to
+ * avoid two independent reads when on Opteron.
+ */
+ if (xlator[reg] & _PORT_64BIT_FLAG)
+ ret = read_7322_creg_port(ppd, creg);
+ else
+ ret = read_7322_creg32_port(ppd, creg);
+ if (creg == crp_ibsymbolerr) {
+ if (ppd->cpspec->ibdeltainprog)
+ ret -= ret - ppd->cpspec->ibsymsnap;
+ ret -= ppd->cpspec->ibsymdelta;
+ } else if (creg == crp_iblinkerrrecov) {
+ if (ppd->cpspec->ibdeltainprog)
+ ret -= ret - ppd->cpspec->iblnkerrsnap;
+ ret -= ppd->cpspec->iblnkerrdelta;
+ } else if (creg == crp_errlink)
+ ret -= ppd->cpspec->ibmalfdelta;
+ else if (creg == crp_iblinkdown)
+ ret += ppd->cpspec->iblnkdowndelta;
+done:
+ return ret;
+}
+
+/*
+ * Device counter names (not port-specific), one line per stat,
+ * single string. Used by utilities like ipathstats to print the stats
+ * in a way which works for different versions of drivers, without changing
+ * the utility. Names need to be 12 chars or less (w/o newline), for proper
+ * display by utility.
+ * Non-error counters are first.
+ * Start of "error" conters is indicated by a leading "E " on the first
+ * "error" counter, and doesn't count in label length.
+ * The EgrOvfl list needs to be last so we truncate them at the configured
+ * context count for the device.
+ * cntr7322indices contains the corresponding register indices.
+ */
+static const char cntr7322names[] =
+ "Interrupts\n"
+ "HostBusStall\n"
+ "E RxTIDFull\n"
+ "RxTIDInvalid\n"
+ "RxTIDFloDrop\n" /* 7322 only */
+ "Ctxt0EgrOvfl\n"
+ "Ctxt1EgrOvfl\n"
+ "Ctxt2EgrOvfl\n"
+ "Ctxt3EgrOvfl\n"
+ "Ctxt4EgrOvfl\n"
+ "Ctxt5EgrOvfl\n"
+ "Ctxt6EgrOvfl\n"
+ "Ctxt7EgrOvfl\n"
+ "Ctxt8EgrOvfl\n"
+ "Ctxt9EgrOvfl\n"
+ "Ctx10EgrOvfl\n"
+ "Ctx11EgrOvfl\n"
+ "Ctx12EgrOvfl\n"
+ "Ctx13EgrOvfl\n"
+ "Ctx14EgrOvfl\n"
+ "Ctx15EgrOvfl\n"
+ "Ctx16EgrOvfl\n"
+ "Ctx17EgrOvfl\n"
+ ;
+
+static const u32 cntr7322indices[] = {
+ cr_lbint | _PORT_64BIT_FLAG,
+ cr_lbstall | _PORT_64BIT_FLAG,
+ cr_tidfull,
+ cr_tidinvalid,
+ cr_rxtidflowdrop,
+ cr_base_egrovfl + 0,
+ cr_base_egrovfl + 1,
+ cr_base_egrovfl + 2,
+ cr_base_egrovfl + 3,
+ cr_base_egrovfl + 4,
+ cr_base_egrovfl + 5,
+ cr_base_egrovfl + 6,
+ cr_base_egrovfl + 7,
+ cr_base_egrovfl + 8,
+ cr_base_egrovfl + 9,
+ cr_base_egrovfl + 10,
+ cr_base_egrovfl + 11,
+ cr_base_egrovfl + 12,
+ cr_base_egrovfl + 13,
+ cr_base_egrovfl + 14,
+ cr_base_egrovfl + 15,
+ cr_base_egrovfl + 16,
+ cr_base_egrovfl + 17,
+};
+
+/*
+ * same as cntr7322names and cntr7322indices, but for port-specific counters.
+ * portcntr7322indices is somewhat complicated by some registers needing
+ * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG
+ */
+static const char portcntr7322names[] =
+ "TxPkt\n"
+ "TxFlowPkt\n"
+ "TxWords\n"
+ "RxPkt\n"
+ "RxFlowPkt\n"
+ "RxWords\n"
+ "TxFlowStall\n"
+ "TxDmaDesc\n" /* 7220 and 7322-only */
+ "E RxDlidFltr\n" /* 7220 and 7322-only */
+ "IBStatusChng\n"
+ "IBLinkDown\n"
+ "IBLnkRecov\n"
+ "IBRxLinkErr\n"
+ "IBSymbolErr\n"
+ "RxLLIErr\n"
+ "RxBadFormat\n"
+ "RxBadLen\n"
+ "RxBufOvrfl\n"
+ "RxEBP\n"
+ "RxFlowCtlErr\n"
+ "RxICRCerr\n"
+ "RxLPCRCerr\n"
+ "RxVCRCerr\n"
+ "RxInvalLen\n"
+ "RxInvalPKey\n"
+ "RxPktDropped\n"
+ "TxBadLength\n"
+ "TxDropped\n"
+ "TxInvalLen\n"
+ "TxUnderrun\n"
+ "TxUnsupVL\n"
+ "RxLclPhyErr\n" /* 7220 and 7322-only from here down */
+ "RxVL15Drop\n"
+ "RxVlErr\n"
+ "XcessBufOvfl\n"
+ "RxQPBadCtxt\n" /* 7322-only from here down */
+ "TXBadHeader\n"
+ ;
+
+static const u32 portcntr7322indices[] = {
+ QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG,
+ crp_pktsendflow,
+ QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG,
+ crp_pktrcvflowctrl,
+ QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG,
+ crp_txsdmadesc | _PORT_64BIT_FLAG,
+ crp_rxdlidfltr,
+ crp_ibstatuschange,
+ QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_LLI | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG,
+ crp_rcvflowctrlviol,
+ QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG,
+ crp_txminmaxlenerr,
+ crp_txdroppedpkt,
+ crp_txlenerr,
+ crp_txunderrun,
+ crp_txunsupvl,
+ QIBPORTCNTR_RXLOCALPHYERR | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_VL15PKTDROP | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_RXVLERR | _PORT_VIRT_FLAG,
+ QIBPORTCNTR_EXCESSBUFOVFL | _PORT_VIRT_FLAG,
+ crp_rxqpinvalidctxt,
+ crp_txhdrerr,
+};
+
+/* do all the setup to make the counter reads efficient later */
+static void init_7322_cntrnames(struct qib_devdata *dd)
+{
+ int i, j = 0;
+ char *s;
+
+ for (i = 0, s = (char *)cntr7322names; s && j <= dd->cfgctxts;
+ i++) {
+ /* we always have at least one counter before the egrovfl */
+ if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12))
+ j = 1;
+ s = strchr(s + 1, '\n');
+ if (s && j)
+ j++;
+ }
+ dd->cspec->ncntrs = i;
+ if (!s)
+ /* full list; size is without terminating null */
+ dd->cspec->cntrnamelen = sizeof(cntr7322names) - 1;
+ else
+ dd->cspec->cntrnamelen = 1 + s - cntr7322names;
+ dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
+ * sizeof(u64), GFP_KERNEL);
+ if (!dd->cspec->cntrs)
+ qib_dev_err(dd, "Failed allocation for counters\n");
+
+ for (i = 0, s = (char *)portcntr7322names; s; i++)
+ s = strchr(s + 1, '\n');
+ dd->cspec->nportcntrs = i - 1;
+ dd->cspec->portcntrnamelen = sizeof(portcntr7322names) - 1;
+ for (i = 0; i < dd->num_pports; ++i) {
+ dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs
+ * sizeof(u64), GFP_KERNEL);
+ if (!dd->pport[i].cpspec->portcntrs)
+ qib_dev_err(dd,
+ "Failed allocation for portcounters\n");
+ }
+}
+
+static u32 qib_read_7322cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
+ u64 **cntrp)
+{
+ u32 ret;
+
+ if (namep) {
+ ret = dd->cspec->cntrnamelen;
+ if (pos >= ret)
+ ret = 0; /* final read after getting everything */
+ else
+ *namep = (char *) cntr7322names;
+ } else {
+ u64 *cntr = dd->cspec->cntrs;
+ int i;
+
+ ret = dd->cspec->ncntrs * sizeof(u64);
+ if (!cntr || pos >= ret) {
+ /* everything read, or couldn't get memory */
+ ret = 0;
+ goto done;
+ }
+ *cntrp = cntr;
+ for (i = 0; i < dd->cspec->ncntrs; i++)
+ if (cntr7322indices[i] & _PORT_64BIT_FLAG)
+ *cntr++ = read_7322_creg(dd,
+ cntr7322indices[i] &
+ _PORT_CNTR_IDXMASK);
+ else
+ *cntr++ = read_7322_creg32(dd,
+ cntr7322indices[i]);
+ }
+done:
+ return ret;
+}
+
+static u32 qib_read_7322portcntrs(struct qib_devdata *dd, loff_t pos, u32 port,
+ char **namep, u64 **cntrp)
+{
+ u32 ret;
+
+ if (namep) {
+ ret = dd->cspec->portcntrnamelen;
+ if (pos >= ret)
+ ret = 0; /* final read after getting everything */
+ else
+ *namep = (char *)portcntr7322names;
+ } else {
+ struct qib_pportdata *ppd = &dd->pport[port];
+ u64 *cntr = ppd->cpspec->portcntrs;
+ int i;
+
+ ret = dd->cspec->nportcntrs * sizeof(u64);
+ if (!cntr || pos >= ret) {
+ /* everything read, or couldn't get memory */
+ ret = 0;
+ goto done;
+ }
+ *cntrp = cntr;
+ for (i = 0; i < dd->cspec->nportcntrs; i++) {
+ if (portcntr7322indices[i] & _PORT_VIRT_FLAG)
+ *cntr++ = qib_portcntr_7322(ppd,
+ portcntr7322indices[i] &
+ _PORT_CNTR_IDXMASK);
+ else if (portcntr7322indices[i] & _PORT_64BIT_FLAG)
+ *cntr++ = read_7322_creg_port(ppd,
+ portcntr7322indices[i] &
+ _PORT_CNTR_IDXMASK);
+ else
+ *cntr++ = read_7322_creg32_port(ppd,
+ portcntr7322indices[i]);
+ }
+ }
+done:
+ return ret;
+}
+
+/**
+ * qib_get_7322_faststats - get word counters from chip before they overflow
+ * @opaque - contains a pointer to the qlogic_ib device qib_devdata
+ *
+ * VESTIGIAL IBA7322 has no "small fast counters", so the only
+ * real purpose of this function is to maintain the notion of
+ * "active time", which in turn is only logged into the eeprom,
+ * which we don;t have, yet, for 7322-based boards.
+ *
+ * called from add_timer
+ */
+static void qib_get_7322_faststats(unsigned long opaque)
+{
+ struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ struct qib_pportdata *ppd;
+ unsigned long flags;
+ u64 traffic_wds;
+ int pidx;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+
+ /*
+ * If port isn't enabled or not operational ports, or
+ * diags is running (can cause memory diags to fail)
+ * skip this port this time.
+ */
+ if (!ppd->link_speed_supported || !(dd->flags & QIB_INITTED)
+ || dd->diag_client)
+ continue;
+
+ /*
+ * Maintain an activity timer, based on traffic
+ * exceeding a threshold, so we need to check the word-counts
+ * even if they are 64-bit.
+ */
+ traffic_wds = qib_portcntr_7322(ppd, QIBPORTCNTR_WORDRCV) +
+ qib_portcntr_7322(ppd, QIBPORTCNTR_WORDSEND);
+ spin_lock_irqsave(&ppd->dd->eep_st_lock, flags);
+ traffic_wds -= ppd->dd->traffic_wds;
+ ppd->dd->traffic_wds += traffic_wds;
+ if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD)
+ atomic_add(ACTIVITY_TIMER, &ppd->dd->active_time);
+ spin_unlock_irqrestore(&ppd->dd->eep_st_lock, flags);
+ if (ppd->cpspec->qdr_dfe_on && (ppd->link_speed_active &
+ QIB_IB_QDR) &&
+ (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED |
+ QIBL_LINKACTIVE)) &&
+ ppd->cpspec->qdr_dfe_time &&
+ time_is_before_jiffies(ppd->cpspec->qdr_dfe_time)) {
+ ppd->cpspec->qdr_dfe_on = 0;
+
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_INIT_R1 :
+ QDR_STATIC_ADAPT_INIT);
+ force_h1(ppd);
+ }
+ }
+ mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER);
+}
+
+/*
+ * If we were using MSIx, try to fallback to INTx.
+ */
+static int qib_7322_intr_fallback(struct qib_devdata *dd)
+{
+ if (!dd->cspec->num_msix_entries)
+ return 0; /* already using INTx */
+
+ qib_devinfo(dd->pcidev,
+ "MSIx interrupt not detected, trying INTx interrupts\n");
+ qib_7322_nomsix(dd);
+ qib_enable_intx(dd->pcidev);
+ qib_setup_7322_interrupt(dd, 0);
+ return 1;
+}
+
+/*
+ * Reset the XGXS (between serdes and IBC). Slightly less intrusive
+ * than resetting the IBC or external link state, and useful in some
+ * cases to cause some retraining. To do this right, we reset IBC
+ * as well, then return to previous state (which may be still in reset)
+ * NOTE: some callers of this "know" this writes the current value
+ * of cpspec->ibcctrl_a as part of it's operation, so if that changes,
+ * check all callers.
+ */
+static void qib_7322_mini_pcs_reset(struct qib_pportdata *ppd)
+{
+ u64 val;
+ struct qib_devdata *dd = ppd->dd;
+ const u64 reset_bits = SYM_MASK(IBPCSConfig_0, xcv_rreset) |
+ SYM_MASK(IBPCSConfig_0, xcv_treset) |
+ SYM_MASK(IBPCSConfig_0, tx_rx_reset);
+
+ val = qib_read_kreg_port(ppd, krp_ib_pcsconfig);
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask & ~HWE_MASK(statusValidNoEop));
+ qib_write_kreg_port(ppd, krp_ibcctrl_a,
+ ppd->cpspec->ibcctrl_a &
+ ~SYM_MASK(IBCCtrlA_0, IBLinkEn));
+
+ qib_write_kreg_port(ppd, krp_ib_pcsconfig, val | reset_bits);
+ qib_read_kreg32(dd, kr_scratch);
+ qib_write_kreg_port(ppd, krp_ib_pcsconfig, val & ~reset_bits);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
+ qib_write_kreg(dd, kr_scratch, 0ULL);
+ qib_write_kreg(dd, kr_hwerrclear,
+ SYM_MASK(HwErrClear, statusValidNoEopClear));
+ qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
+}
+
+/*
+ * This code for non-IBTA-compliant IB speed negotiation is only known to
+ * work for the SDR to DDR transition, and only between an HCA and a switch
+ * with recent firmware. It is based on observed heuristics, rather than
+ * actual knowledge of the non-compliant speed negotiation.
+ * It has a number of hard-coded fields, since the hope is to rewrite this
+ * when a spec is available on how the negoation is intended to work.
+ */
+static void autoneg_7322_sendpkt(struct qib_pportdata *ppd, u32 *hdr,
+ u32 dcnt, u32 *data)
+{
+ int i;
+ u64 pbc;
+ u32 __iomem *piobuf;
+ u32 pnum, control, len;
+ struct qib_devdata *dd = ppd->dd;
+
+ i = 0;
+ len = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */
+ control = qib_7322_setpbc_control(ppd, len, 0, 15);
+ pbc = ((u64) control << 32) | len;
+ while (!(piobuf = qib_7322_getsendbuf(ppd, pbc, &pnum))) {
+ if (i++ > 15)
+ return;
+ udelay(2);
+ }
+ /* disable header check on this packet, since it can't be valid */
+ dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_DIS1, NULL);
+ writeq(pbc, piobuf);
+ qib_flush_wc();
+ qib_pio_copy(piobuf + 2, hdr, 7);
+ qib_pio_copy(piobuf + 9, data, dcnt);
+ if (dd->flags & QIB_USE_SPCL_TRIG) {
+ u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023;
+
+ qib_flush_wc();
+ __raw_writel(0xaebecede, piobuf + spcl_off);
+ }
+ qib_flush_wc();
+ qib_sendbuf_done(dd, pnum);
+ /* and re-enable hdr check */
+ dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_ENAB1, NULL);
+}
+
+/*
+ * _start packet gets sent twice at start, _done gets sent twice at end
+ */
+static void qib_autoneg_7322_send(struct qib_pportdata *ppd, int which)
+{
+ struct qib_devdata *dd = ppd->dd;
+ static u32 swapped;
+ u32 dw, i, hcnt, dcnt, *data;
+ static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba };
+ static u32 madpayload_start[0x40] = {
+ 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+ 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */
+ };
+ static u32 madpayload_done[0x40] = {
+ 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0,
+ 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x40000001, 0x1388, 0x15e, /* rest 0's */
+ };
+
+ dcnt = ARRAY_SIZE(madpayload_start);
+ hcnt = ARRAY_SIZE(hdr);
+ if (!swapped) {
+ /* for maintainability, do it at runtime */
+ for (i = 0; i < hcnt; i++) {
+ dw = (__force u32) cpu_to_be32(hdr[i]);
+ hdr[i] = dw;
+ }
+ for (i = 0; i < dcnt; i++) {
+ dw = (__force u32) cpu_to_be32(madpayload_start[i]);
+ madpayload_start[i] = dw;
+ dw = (__force u32) cpu_to_be32(madpayload_done[i]);
+ madpayload_done[i] = dw;
+ }
+ swapped = 1;
+ }
+
+ data = which ? madpayload_done : madpayload_start;
+
+ autoneg_7322_sendpkt(ppd, hdr, dcnt, data);
+ qib_read_kreg64(dd, kr_scratch);
+ udelay(2);
+ autoneg_7322_sendpkt(ppd, hdr, dcnt, data);
+ qib_read_kreg64(dd, kr_scratch);
+ udelay(2);
+}
+
+/*
+ * Do the absolute minimum to cause an IB speed change, and make it
+ * ready, but don't actually trigger the change. The caller will
+ * do that when ready (if link is in Polling training state, it will
+ * happen immediately, otherwise when link next goes down)
+ *
+ * This routine should only be used as part of the DDR autonegotation
+ * code for devices that are not compliant with IB 1.2 (or code that
+ * fixes things up for same).
+ *
+ * When link has gone down, and autoneg enabled, or autoneg has
+ * failed and we give up until next time we set both speeds, and
+ * then we want IBTA enabled as well as "use max enabled speed.
+ */
+static void set_7322_ibspeed_fast(struct qib_pportdata *ppd, u32 speed)
+{
+ u64 newctrlb;
+ newctrlb = ppd->cpspec->ibcctrl_b & ~(IBA7322_IBC_SPEED_MASK |
+ IBA7322_IBC_IBTA_1_2_MASK |
+ IBA7322_IBC_MAX_SPEED_MASK);
+
+ if (speed & (speed - 1)) /* multiple speeds */
+ newctrlb |= (speed << IBA7322_IBC_SPEED_LSB) |
+ IBA7322_IBC_IBTA_1_2_MASK |
+ IBA7322_IBC_MAX_SPEED_MASK;
+ else
+ newctrlb |= speed == QIB_IB_QDR ?
+ IBA7322_IBC_SPEED_QDR | IBA7322_IBC_IBTA_1_2_MASK :
+ ((speed == QIB_IB_DDR ?
+ IBA7322_IBC_SPEED_DDR : IBA7322_IBC_SPEED_SDR));
+
+ if (newctrlb == ppd->cpspec->ibcctrl_b)
+ return;
+
+ ppd->cpspec->ibcctrl_b = newctrlb;
+ qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b);
+ qib_write_kreg(ppd->dd, kr_scratch, 0);
+}
+
+/*
+ * This routine is only used when we are not talking to another
+ * IB 1.2-compliant device that we think can do DDR.
+ * (This includes all existing switch chips as of Oct 2007.)
+ * 1.2-compliant devices go directly to DDR prior to reaching INIT
+ */
+static void try_7322_autoneg(struct qib_pportdata *ppd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_AUTONEG_INPROG;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ qib_autoneg_7322_send(ppd, 0);
+ set_7322_ibspeed_fast(ppd, QIB_IB_DDR);
+ qib_7322_mini_pcs_reset(ppd);
+ /* 2 msec is minimum length of a poll cycle */
+ queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work,
+ msecs_to_jiffies(2));
+}
+
+/*
+ * Handle the empirically determined mechanism for auto-negotiation
+ * of DDR speed with switches.
+ */
+static void autoneg_7322_work(struct work_struct *work)
+{
+ struct qib_pportdata *ppd;
+ struct qib_devdata *dd;
+ u64 startms;
+ u32 i;
+ unsigned long flags;
+
+ ppd = container_of(work, struct qib_chippport_specific,
+ autoneg_work.work)->ppd;
+ dd = ppd->dd;
+
+ startms = jiffies_to_msecs(jiffies);
+
+ /*
+ * Busy wait for this first part, it should be at most a
+ * few hundred usec, since we scheduled ourselves for 2msec.
+ */
+ for (i = 0; i < 25; i++) {
+ if (SYM_FIELD(ppd->lastibcstat, IBCStatusA_0, LinkState)
+ == IB_7322_LT_STATE_POLLQUIET) {
+ qib_set_linkstate(ppd, QIB_IB_LINKDOWN_DISABLE);
+ break;
+ }
+ udelay(100);
+ }
+
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+ goto done; /* we got there early or told to stop */
+
+ /* we expect this to timeout */
+ if (wait_event_timeout(ppd->cpspec->autoneg_wait,
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(90)))
+ goto done;
+ qib_7322_mini_pcs_reset(ppd);
+
+ /* we expect this to timeout */
+ if (wait_event_timeout(ppd->cpspec->autoneg_wait,
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(1700)))
+ goto done;
+ qib_7322_mini_pcs_reset(ppd);
+
+ set_7322_ibspeed_fast(ppd, QIB_IB_SDR);
+
+ /*
+ * Wait up to 250 msec for link to train and get to INIT at DDR;
+ * this should terminate early.
+ */
+ wait_event_timeout(ppd->cpspec->autoneg_wait,
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG),
+ msecs_to_jiffies(250));
+done:
+ if (ppd->lflags & QIBL_IB_AUTONEG_INPROG) {
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG;
+ if (ppd->cpspec->autoneg_tries == AUTONEG_TRIES) {
+ ppd->lflags |= QIBL_IB_AUTONEG_FAILED;
+ ppd->cpspec->autoneg_tries = 0;
+ }
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled);
+ }
+}
+
+/*
+ * This routine is used to request IPG set in the QLogic switch.
+ * Only called if r1.
+ */
+static void try_7322_ipg(struct qib_pportdata *ppd)
+{
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ struct ib_mad_send_buf *send_buf;
+ struct ib_mad_agent *agent;
+ struct ib_smp *smp;
+ unsigned delay;
+ int ret;
+
+ agent = ibp->send_agent;
+ if (!agent)
+ goto retry;
+
+ send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ goto retry;
+
+ if (!ibp->smi_ah) {
+ struct ib_ah *ah;
+
+ ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE));
+ if (IS_ERR(ah))
+ ret = PTR_ERR(ah);
+ else {
+ send_buf->ah = ah;
+ ibp->smi_ah = to_iah(ah);
+ ret = 0;
+ }
+ } else {
+ send_buf->ah = &ibp->smi_ah->ibah;
+ ret = 0;
+ }
+
+ smp = send_buf->mad;
+ smp->base_version = IB_MGMT_BASE_VERSION;
+ smp->mgmt_class = IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE;
+ smp->class_version = 1;
+ smp->method = IB_MGMT_METHOD_SEND;
+ smp->hop_cnt = 1;
+ smp->attr_id = QIB_VENDOR_IPG;
+ smp->attr_mod = 0;
+
+ if (!ret)
+ ret = ib_post_send_mad(send_buf, NULL);
+ if (ret)
+ ib_free_send_mad(send_buf);
+retry:
+ delay = 2 << ppd->cpspec->ipg_tries;
+ queue_delayed_work(ib_wq, &ppd->cpspec->ipg_work,
+ msecs_to_jiffies(delay));
+}
+
+/*
+ * Timeout handler for setting IPG.
+ * Only called if r1.
+ */
+static void ipg_7322_work(struct work_struct *work)
+{
+ struct qib_pportdata *ppd;
+
+ ppd = container_of(work, struct qib_chippport_specific,
+ ipg_work.work)->ppd;
+ if ((ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | QIBL_LINKACTIVE))
+ && ++ppd->cpspec->ipg_tries <= 10)
+ try_7322_ipg(ppd);
+}
+
+static u32 qib_7322_iblink_state(u64 ibcs)
+{
+ u32 state = (u32)SYM_FIELD(ibcs, IBCStatusA_0, LinkState);
+
+ switch (state) {
+ case IB_7322_L_STATE_INIT:
+ state = IB_PORT_INIT;
+ break;
+ case IB_7322_L_STATE_ARM:
+ state = IB_PORT_ARMED;
+ break;
+ case IB_7322_L_STATE_ACTIVE:
+ /* fall through */
+ case IB_7322_L_STATE_ACT_DEFER:
+ state = IB_PORT_ACTIVE;
+ break;
+ default: /* fall through */
+ case IB_7322_L_STATE_DOWN:
+ state = IB_PORT_DOWN;
+ break;
+ }
+ return state;
+}
+
+/* returns the IBTA port state, rather than the IBC link training state */
+static u8 qib_7322_phys_portstate(u64 ibcs)
+{
+ u8 state = (u8)SYM_FIELD(ibcs, IBCStatusA_0, LinkTrainingState);
+ return qib_7322_physportstate[state];
+}
+
+static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
+{
+ int ret = 0, symadj = 0;
+ unsigned long flags;
+ int mult;
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+
+ /* Update our picture of width and speed from chip */
+ if (ibcs & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) {
+ ppd->link_speed_active = QIB_IB_QDR;
+ mult = 4;
+ } else if (ibcs & SYM_MASK(IBCStatusA_0, LinkSpeedActive)) {
+ ppd->link_speed_active = QIB_IB_DDR;
+ mult = 2;
+ } else {
+ ppd->link_speed_active = QIB_IB_SDR;
+ mult = 1;
+ }
+ if (ibcs & SYM_MASK(IBCStatusA_0, LinkWidthActive)) {
+ ppd->link_width_active = IB_WIDTH_4X;
+ mult *= 4;
+ } else
+ ppd->link_width_active = IB_WIDTH_1X;
+ ppd->delay_mult = ib_rate_to_delay[mult_to_ib_rate(mult)];
+
+ if (!ibup) {
+ u64 clr;
+
+ /* Link went down. */
+ /* do IPG MAD again after linkdown, even if last time failed */
+ ppd->cpspec->ipg_tries = 0;
+ clr = qib_read_kreg_port(ppd, krp_ibcstatus_b) &
+ (SYM_MASK(IBCStatusB_0, heartbeat_timed_out) |
+ SYM_MASK(IBCStatusB_0, heartbeat_crosstalk));
+ if (clr)
+ qib_write_kreg_port(ppd, krp_ibcstatus_b, clr);
+ if (!(ppd->lflags & (QIBL_IB_AUTONEG_FAILED |
+ QIBL_IB_AUTONEG_INPROG)))
+ set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled);
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
+ struct qib_qsfp_data *qd =
+ &ppd->cpspec->qsfp_data;
+ /* unlock the Tx settings, speed may change */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+ qib_cancel_sends(ppd);
+ /* on link down, ensure sane pcs state */
+ qib_7322_mini_pcs_reset(ppd);
+ /* schedule the qsfp refresh which should turn the link
+ off */
+ if (ppd->dd->flags & QIB_HAS_QSFP) {
+ qd->t_insert = jiffies;
+ queue_work(ib_wq, &qd->work);
+ }
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (__qib_sdma_running(ppd))
+ __qib_sdma_process_event(ppd,
+ qib_sdma_event_e70_go_idle);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ clr = read_7322_creg32_port(ppd, crp_iblinkdown);
+ if (clr == ppd->cpspec->iblnkdownsnap)
+ ppd->cpspec->iblnkdowndelta++;
+ } else {
+ if (qib_compat_ddr_negotiate &&
+ !(ppd->lflags & (QIBL_IB_AUTONEG_FAILED |
+ QIBL_IB_AUTONEG_INPROG)) &&
+ ppd->link_speed_active == QIB_IB_SDR &&
+ (ppd->link_speed_enabled & QIB_IB_DDR)
+ && ppd->cpspec->autoneg_tries < AUTONEG_TRIES) {
+ /* we are SDR, and auto-negotiation enabled */
+ ++ppd->cpspec->autoneg_tries;
+ if (!ppd->cpspec->ibdeltainprog) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymdelta +=
+ read_7322_creg32_port(ppd,
+ crp_ibsymbolerr) -
+ ppd->cpspec->ibsymsnap;
+ ppd->cpspec->iblnkerrdelta +=
+ read_7322_creg32_port(ppd,
+ crp_iblinkerrrecov) -
+ ppd->cpspec->iblnkerrsnap;
+ }
+ try_7322_autoneg(ppd);
+ ret = 1; /* no other IB status change processing */
+ } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) &&
+ ppd->link_speed_active == QIB_IB_SDR) {
+ qib_autoneg_7322_send(ppd, 1);
+ set_7322_ibspeed_fast(ppd, QIB_IB_DDR);
+ qib_7322_mini_pcs_reset(ppd);
+ udelay(2);
+ ret = 1; /* no other IB status change processing */
+ } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) &&
+ (ppd->link_speed_active & QIB_IB_DDR)) {
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~(QIBL_IB_AUTONEG_INPROG |
+ QIBL_IB_AUTONEG_FAILED);
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ ppd->cpspec->autoneg_tries = 0;
+ /* re-enable SDR, for next link down */
+ set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled);
+ wake_up(&ppd->cpspec->autoneg_wait);
+ symadj = 1;
+ } else if (ppd->lflags & QIBL_IB_AUTONEG_FAILED) {
+ /*
+ * Clear autoneg failure flag, and do setup
+ * so we'll try next time link goes down and
+ * back to INIT (possibly connected to a
+ * different device).
+ */
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ ppd->cpspec->ibcctrl_b |= IBA7322_IBC_IBTA_1_2_MASK;
+ symadj = 1;
+ }
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
+ symadj = 1;
+ if (ppd->dd->cspec->r1 && ppd->cpspec->ipg_tries <= 10)
+ try_7322_ipg(ppd);
+ if (!ppd->cpspec->recovery_init)
+ setup_7322_link_recovery(ppd, 0);
+ ppd->cpspec->qdr_dfe_time = jiffies +
+ msecs_to_jiffies(QDR_DFE_DISABLE_DELAY);
+ }
+ ppd->cpspec->ibmalfusesnap = 0;
+ ppd->cpspec->ibmalfsnap = read_7322_creg32_port(ppd,
+ crp_errlink);
+ }
+ if (symadj) {
+ ppd->cpspec->iblnkdownsnap =
+ read_7322_creg32_port(ppd, crp_iblinkdown);
+ if (ppd->cpspec->ibdeltainprog) {
+ ppd->cpspec->ibdeltainprog = 0;
+ ppd->cpspec->ibsymdelta += read_7322_creg32_port(ppd,
+ crp_ibsymbolerr) - ppd->cpspec->ibsymsnap;
+ ppd->cpspec->iblnkerrdelta += read_7322_creg32_port(ppd,
+ crp_iblinkerrrecov) - ppd->cpspec->iblnkerrsnap;
+ }
+ } else if (!ibup && qib_compat_ddr_negotiate &&
+ !ppd->cpspec->ibdeltainprog &&
+ !(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
+ ppd->cpspec->ibdeltainprog = 1;
+ ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
+ crp_ibsymbolerr);
+ ppd->cpspec->iblnkerrsnap = read_7322_creg32_port(ppd,
+ crp_iblinkerrrecov);
+ }
+
+ if (!ret)
+ qib_setup_7322_setextled(ppd, ibup);
+ return ret;
+}
+
+/*
+ * Does read/modify/write to appropriate registers to
+ * set output and direction bits selected by mask.
+ * these are in their canonical postions (e.g. lsb of
+ * dir will end up in D48 of extctrl on existing chips).
+ * returns contents of GP Inputs.
+ */
+static int gpio_7322_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask)
+{
+ u64 read_val, new_out;
+ unsigned long flags;
+
+ if (mask) {
+ /* some bits being written, lock access to GPIO */
+ dir &= mask;
+ out &= mask;
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe));
+ dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe));
+ new_out = (dd->cspec->gpio_out & ~mask) | out;
+
+ qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl);
+ qib_write_kreg(dd, kr_gpio_out, new_out);
+ dd->cspec->gpio_out = new_out;
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+ }
+ /*
+ * It is unlikely that a read at this time would get valid
+ * data on a pin whose direction line was set in the same
+ * call to this function. We include the read here because
+ * that allows us to potentially combine a change on one pin with
+ * a read on another, and because the old code did something like
+ * this.
+ */
+ read_val = qib_read_kreg64(dd, kr_extstatus);
+ return SYM_FIELD(read_val, EXTStatus, GPIOIn);
+}
+
+/* Enable writes to config EEPROM, if possible. Returns previous state */
+static int qib_7322_eeprom_wen(struct qib_devdata *dd, int wen)
+{
+ int prev_wen;
+ u32 mask;
+
+ mask = 1 << QIB_EEPROM_WEN_NUM;
+ prev_wen = ~gpio_7322_mod(dd, 0, 0, 0) >> QIB_EEPROM_WEN_NUM;
+ gpio_7322_mod(dd, wen ? 0 : mask, mask, mask);
+
+ return prev_wen & 1;
+}
+
+/*
+ * Read fundamental info we need to use the chip. These are
+ * the registers that describe chip capabilities, and are
+ * saved in shadow registers.
+ */
+static void get_7322_chip_params(struct qib_devdata *dd)
+{
+ u64 val;
+ u32 piobufs;
+ int mtu;
+
+ dd->palign = qib_read_kreg32(dd, kr_pagealign);
+
+ dd->uregbase = qib_read_kreg32(dd, kr_userregbase);
+
+ dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt);
+ dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase);
+ dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase);
+ dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase);
+ dd->pio2k_bufbase = dd->piobufbase & 0xffffffff;
+
+ val = qib_read_kreg64(dd, kr_sendpiobufcnt);
+ dd->piobcnt2k = val & ~0U;
+ dd->piobcnt4k = val >> 32;
+ val = qib_read_kreg64(dd, kr_sendpiosize);
+ dd->piosize2k = val & ~0U;
+ dd->piosize4k = val >> 32;
+
+ mtu = ib_mtu_enum_to_int(qib_ibmtu);
+ if (mtu == -1)
+ mtu = QIB_DEFAULT_MTU;
+ dd->pport[0].ibmtu = (u32)mtu;
+ dd->pport[1].ibmtu = (u32)mtu;
+
+ /* these may be adjusted in init_chip_wc_pat() */
+ dd->pio2kbase = (u32 __iomem *)
+ ((char __iomem *) dd->kregbase + dd->pio2k_bufbase);
+ dd->pio4kbase = (u32 __iomem *)
+ ((char __iomem *) dd->kregbase +
+ (dd->piobufbase >> 32));
+ /*
+ * 4K buffers take 2 pages; we use roundup just to be
+ * paranoid; we calculate it once here, rather than on
+ * ever buf allocate
+ */
+ dd->align4k = ALIGN(dd->piosize4k, dd->palign);
+
+ piobufs = dd->piobcnt4k + dd->piobcnt2k + NUM_VL15_BUFS;
+
+ dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) /
+ (sizeof(u64) * BITS_PER_BYTE / 2);
+}
+
+/*
+ * The chip base addresses in cspec and cpspec have to be set
+ * after possible init_chip_wc_pat(), rather than in
+ * get_7322_chip_params(), so split out as separate function
+ */
+static void qib_7322_set_baseaddrs(struct qib_devdata *dd)
+{
+ u32 cregbase;
+ cregbase = qib_read_kreg32(dd, kr_counterregbase);
+
+ dd->cspec->cregbase = (u64 __iomem *)(cregbase +
+ (char __iomem *)dd->kregbase);
+
+ dd->egrtidbase = (u64 __iomem *)
+ ((char __iomem *) dd->kregbase + dd->rcvegrbase);
+
+ /* port registers are defined as relative to base of chip */
+ dd->pport[0].cpspec->kpregbase =
+ (u64 __iomem *)((char __iomem *)dd->kregbase);
+ dd->pport[1].cpspec->kpregbase =
+ (u64 __iomem *)(dd->palign +
+ (char __iomem *)dd->kregbase);
+ dd->pport[0].cpspec->cpregbase =
+ (u64 __iomem *)(qib_read_kreg_port(&dd->pport[0],
+ kr_counterregbase) + (char __iomem *)dd->kregbase);
+ dd->pport[1].cpspec->cpregbase =
+ (u64 __iomem *)(qib_read_kreg_port(&dd->pport[1],
+ kr_counterregbase) + (char __iomem *)dd->kregbase);
+}
+
+/*
+ * This is a fairly special-purpose observer, so we only support
+ * the port-specific parts of SendCtrl
+ */
+
+#define SENDCTRL_SHADOWED (SYM_MASK(SendCtrl_0, SendEnable) | \
+ SYM_MASK(SendCtrl_0, SDmaEnable) | \
+ SYM_MASK(SendCtrl_0, SDmaIntEnable) | \
+ SYM_MASK(SendCtrl_0, SDmaSingleDescriptor) | \
+ SYM_MASK(SendCtrl_0, SDmaHalt) | \
+ SYM_MASK(SendCtrl_0, IBVLArbiterEn) | \
+ SYM_MASK(SendCtrl_0, ForceCreditUpToDate))
+
+static int sendctrl_hook(struct qib_devdata *dd,
+ const struct diag_observer *op, u32 offs,
+ u64 *data, u64 mask, int only_32)
+{
+ unsigned long flags;
+ unsigned idx;
+ unsigned pidx;
+ struct qib_pportdata *ppd = NULL;
+ u64 local_data, all_bits;
+
+ /*
+ * The fixed correspondence between Physical ports and pports is
+ * severed. We need to hunt for the ppd that corresponds
+ * to the offset we got. And we have to do that without admitting
+ * we know the stride, apparently.
+ */
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ u64 __iomem *psptr;
+ u32 psoffs;
+
+ ppd = dd->pport + pidx;
+ if (!ppd->cpspec->kpregbase)
+ continue;
+
+ psptr = ppd->cpspec->kpregbase + krp_sendctrl;
+ psoffs = (u32) (psptr - dd->kregbase) * sizeof(*psptr);
+ if (psoffs == offs)
+ break;
+ }
+
+ /* If pport is not being managed by driver, just avoid shadows. */
+ if (pidx >= dd->num_pports)
+ ppd = NULL;
+
+ /* In any case, "idx" is flat index in kreg space */
+ idx = offs / sizeof(u64);
+
+ all_bits = ~0ULL;
+ if (only_32)
+ all_bits >>= 32;
+
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ if (!ppd || (mask & all_bits) != all_bits) {
+ /*
+ * At least some mask bits are zero, so we need
+ * to read. The judgement call is whether from
+ * reg or shadow. First-cut: read reg, and complain
+ * if any bits which should be shadowed are different
+ * from their shadowed value.
+ */
+ if (only_32)
+ local_data = (u64)qib_read_kreg32(dd, idx);
+ else
+ local_data = qib_read_kreg64(dd, idx);
+ *data = (local_data & ~mask) | (*data & mask);
+ }
+ if (mask) {
+ /*
+ * At least some mask bits are one, so we need
+ * to write, but only shadow some bits.
+ */
+ u64 sval, tval; /* Shadowed, transient */
+
+ /*
+ * New shadow val is bits we don't want to touch,
+ * ORed with bits we do, that are intended for shadow.
+ */
+ if (ppd) {
+ sval = ppd->p_sendctrl & ~mask;
+ sval |= *data & SENDCTRL_SHADOWED & mask;
+ ppd->p_sendctrl = sval;
+ } else
+ sval = *data & SENDCTRL_SHADOWED & mask;
+ tval = sval | (*data & ~SENDCTRL_SHADOWED & mask);
+ qib_write_kreg(dd, idx, tval);
+ qib_write_kreg(dd, kr_scratch, 0Ull);
+ }
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ return only_32 ? 4 : 8;
+}
+
+static const struct diag_observer sendctrl_0_observer = {
+ sendctrl_hook, KREG_IDX(SendCtrl_0) * sizeof(u64),
+ KREG_IDX(SendCtrl_0) * sizeof(u64)
+};
+
+static const struct diag_observer sendctrl_1_observer = {
+ sendctrl_hook, KREG_IDX(SendCtrl_1) * sizeof(u64),
+ KREG_IDX(SendCtrl_1) * sizeof(u64)
+};
+
+static ushort sdma_fetch_prio = 8;
+module_param_named(sdma_fetch_prio, sdma_fetch_prio, ushort, S_IRUGO);
+MODULE_PARM_DESC(sdma_fetch_prio, "SDMA descriptor fetch priority");
+
+/* Besides logging QSFP events, we set appropriate TxDDS values */
+static void init_txdds_table(struct qib_pportdata *ppd, int override);
+
+static void qsfp_7322_event(struct work_struct *work)
+{
+ struct qib_qsfp_data *qd;
+ struct qib_pportdata *ppd;
+ unsigned long pwrup;
+ unsigned long flags;
+ int ret;
+ u32 le2;
+
+ qd = container_of(work, struct qib_qsfp_data, work);
+ ppd = qd->ppd;
+ pwrup = qd->t_insert +
+ msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC);
+
+ /* Delay for 20 msecs to allow ModPrs resistor to setup */
+ mdelay(QSFP_MODPRS_LAG_MSEC);
+
+ if (!qib_qsfp_mod_present(ppd)) {
+ ppd->cpspec->qsfp_data.modpresent = 0;
+ /* Set the physical link to disabled */
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_LINKV;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ } else {
+ /*
+ * Some QSFP's not only do not respond until the full power-up
+ * time, but may behave badly if we try. So hold off responding
+ * to insertion.
+ */
+ while (1) {
+ if (time_is_before_jiffies(pwrup))
+ break;
+ msleep(20);
+ }
+
+ ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
+
+ /*
+ * Need to change LE2 back to defaults if we couldn't
+ * read the cable type (to handle cable swaps), so do this
+ * even on failure to read cable information. We don't
+ * get here for QME, so IS_QME check not needed here.
+ */
+ if (!ret && !ppd->dd->cspec->r1) {
+ if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
+ le2 = LE2_QME;
+ else if (qd->cache.atten[1] >= qib_long_atten &&
+ QSFP_IS_CU(qd->cache.tech))
+ le2 = LE2_5m;
+ else
+ le2 = LE2_DEFAULT;
+ } else
+ le2 = LE2_DEFAULT;
+ ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
+ /*
+ * We always change parameteters, since we can choose
+ * values for cables without eeproms, and the cable may have
+ * changed from a cable with full or partial eeprom content
+ * to one with partial or no content.
+ */
+ init_txdds_table(ppd, 0);
+ /* The physical link is being re-enabled only when the
+ * previous state was DISABLED and the VALID bit is not
+ * set. This should only happen when the cable has been
+ * physically pulled. */
+ if (!ppd->cpspec->qsfp_data.modpresent &&
+ (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) {
+ ppd->cpspec->qsfp_data.modpresent = 1;
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_LINKV;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
+ }
+}
+
+/*
+ * There is little we can do but complain to the user if QSFP
+ * initialization fails.
+ */
+static void qib_init_7322_qsfp(struct qib_pportdata *ppd)
+{
+ unsigned long flags;
+ struct qib_qsfp_data *qd = &ppd->cpspec->qsfp_data;
+ struct qib_devdata *dd = ppd->dd;
+ u64 mod_prs_bit = QSFP_GPIO_MOD_PRS_N;
+
+ mod_prs_bit <<= (QSFP_GPIO_PORT2_SHIFT * ppd->hw_pidx);
+ qd->ppd = ppd;
+ qib_qsfp_init(qd, qsfp_7322_event);
+ spin_lock_irqsave(&dd->cspec->gpio_lock, flags);
+ dd->cspec->extctrl |= (mod_prs_bit << SYM_LSB(EXTCtrl, GPIOInvert));
+ dd->cspec->gpio_mask |= mod_prs_bit;
+ qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl);
+ qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
+ spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
+}
+
+/*
+ * called at device initialization time, and also if the txselect
+ * module parameter is changed. This is used for cables that don't
+ * have valid QSFP EEPROMs (not present, or attenuation is zero).
+ * We initialize to the default, then if there is a specific
+ * unit,port match, we use that (and set it immediately, for the
+ * current speed, if the link is at INIT or better).
+ * String format is "default# unit#,port#=# ... u,p=#", separators must
+ * be a SPACE character. A newline terminates. The u,p=# tuples may
+ * optionally have "u,p=#,#", where the final # is the H1 value
+ * The last specific match is used (actually, all are used, but last
+ * one is the one that winds up set); if none at all, fall back on default.
+ */
+static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
+{
+ char *nxt, *str;
+ u32 pidx, unit, port, deflt, h1;
+ unsigned long val;
+ int any = 0, seth1;
+ int txdds_size;
+
+ str = txselect_list;
+
+ /* default number is validated in setup_txselect() */
+ deflt = simple_strtoul(str, &nxt, 0);
+ for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ dd->pport[pidx].cpspec->no_eep = deflt;
+
+ txdds_size = TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ;
+ if (IS_QME(dd) || IS_QMH(dd))
+ txdds_size += TXDDS_MFG_SZ;
+
+ while (*nxt && nxt[1]) {
+ str = ++nxt;
+ unit = simple_strtoul(str, &nxt, 0);
+ if (nxt == str || !*nxt || *nxt != ',') {
+ while (*nxt && *nxt++ != ' ') /* skip to next, if any */
+ ;
+ continue;
+ }
+ str = ++nxt;
+ port = simple_strtoul(str, &nxt, 0);
+ if (nxt == str || *nxt != '=') {
+ while (*nxt && *nxt++ != ' ') /* skip to next, if any */
+ ;
+ continue;
+ }
+ str = ++nxt;
+ val = simple_strtoul(str, &nxt, 0);
+ if (nxt == str) {
+ while (*nxt && *nxt++ != ' ') /* skip to next, if any */
+ ;
+ continue;
+ }
+ if (val >= txdds_size)
+ continue;
+ seth1 = 0;
+ h1 = 0; /* gcc thinks it might be used uninitted */
+ if (*nxt == ',' && nxt[1]) {
+ str = ++nxt;
+ h1 = (u32)simple_strtoul(str, &nxt, 0);
+ if (nxt == str)
+ while (*nxt && *nxt++ != ' ') /* skip */
+ ;
+ else
+ seth1 = 1;
+ }
+ for (pidx = 0; dd->unit == unit && pidx < dd->num_pports;
+ ++pidx) {
+ struct qib_pportdata *ppd = &dd->pport[pidx];
+
+ if (ppd->port != port || !ppd->link_speed_supported)
+ continue;
+ ppd->cpspec->no_eep = val;
+ if (seth1)
+ ppd->cpspec->h1_val = h1;
+ /* now change the IBC and serdes, overriding generic */
+ init_txdds_table(ppd, 1);
+ /* Re-enable the physical state machine on mezz boards
+ * now that the correct settings have been set.
+ * QSFP boards are handles by the QSFP event handler */
+ if (IS_QMH(dd) || IS_QME(dd))
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
+ any++;
+ }
+ if (*nxt == '\n')
+ break; /* done */
+ }
+ if (change && !any) {
+ /* no specific setting, use the default.
+ * Change the IBC and serdes, but since it's
+ * general, don't override specific settings.
+ */
+ for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ if (dd->pport[pidx].link_speed_supported)
+ init_txdds_table(&dd->pport[pidx], 0);
+ }
+}
+
+/* handle the txselect parameter changing */
+static int setup_txselect(const char *str, struct kernel_param *kp)
+{
+ struct qib_devdata *dd;
+ unsigned long val;
+ char *n;
+ if (strlen(str) >= MAX_ATTEN_LEN) {
+ pr_info("txselect_values string too long\n");
+ return -ENOSPC;
+ }
+ val = simple_strtoul(str, &n, 0);
+ if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ TXDDS_MFG_SZ)) {
+ pr_info("txselect_values must start with a number < %d\n",
+ TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
+ return -EINVAL;
+ }
+ strcpy(txselect_list, str);
+
+ list_for_each_entry(dd, &qib_dev_list, list)
+ if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
+ set_no_qsfp_atten(dd, 1);
+ return 0;
+}
+
+/*
+ * Write the final few registers that depend on some of the
+ * init setup. Done late in init, just before bringing up
+ * the serdes.
+ */
+static int qib_late_7322_initreg(struct qib_devdata *dd)
+{
+ int ret = 0, n;
+ u64 val;
+
+ qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize);
+ qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize);
+ qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt);
+ qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
+ val = qib_read_kreg64(dd, kr_sendpioavailaddr);
+ if (val != dd->pioavailregs_phys) {
+ qib_dev_err(dd,
+ "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n",
+ (unsigned long) dd->pioavailregs_phys,
+ (unsigned long long) val);
+ ret = -EINVAL;
+ }
+
+ n = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS;
+ qib_7322_txchk_change(dd, 0, n, TXCHK_CHG_TYPE_KERN, NULL);
+ /* driver sends get pkey, lid, etc. checking also, to catch bugs */
+ qib_7322_txchk_change(dd, 0, n, TXCHK_CHG_TYPE_ENAB1, NULL);
+
+ qib_register_observer(dd, &sendctrl_0_observer);
+ qib_register_observer(dd, &sendctrl_1_observer);
+
+ dd->control &= ~QLOGIC_IB_C_SDMAFETCHPRIOEN;
+ qib_write_kreg(dd, kr_control, dd->control);
+ /*
+ * Set SendDmaFetchPriority and init Tx params, including
+ * QSFP handler on boards that have QSFP.
+ * First set our default attenuation entry for cables that
+ * don't have valid attenuation.
+ */
+ set_no_qsfp_atten(dd, 0);
+ for (n = 0; n < dd->num_pports; ++n) {
+ struct qib_pportdata *ppd = dd->pport + n;
+
+ qib_write_kreg_port(ppd, krp_senddmaprioritythld,
+ sdma_fetch_prio & 0xf);
+ /* Initialize qsfp if present on board. */
+ if (dd->flags & QIB_HAS_QSFP)
+ qib_init_7322_qsfp(ppd);
+ }
+ dd->control |= QLOGIC_IB_C_SDMAFETCHPRIOEN;
+ qib_write_kreg(dd, kr_control, dd->control);
+
+ return ret;
+}
+
+/* per IB port errors. */
+#define SENDCTRL_PIBP (MASK_ACROSS(0, 1) | MASK_ACROSS(3, 3) | \
+ MASK_ACROSS(8, 15))
+#define RCVCTRL_PIBP (MASK_ACROSS(0, 17) | MASK_ACROSS(39, 41))
+#define ERRS_PIBP (MASK_ACROSS(57, 58) | MASK_ACROSS(54, 54) | \
+ MASK_ACROSS(36, 49) | MASK_ACROSS(29, 34) | MASK_ACROSS(14, 17) | \
+ MASK_ACROSS(0, 11))
+
+/*
+ * Write the initialization per-port registers that need to be done at
+ * driver load and after reset completes (i.e., that aren't done as part
+ * of other init procedures called from qib_init.c).
+ * Some of these should be redundant on reset, but play safe.
+ */
+static void write_7322_init_portregs(struct qib_pportdata *ppd)
+{
+ u64 val;
+ int i;
+
+ if (!ppd->link_speed_supported) {
+ /* no buffer credits for this port */
+ for (i = 1; i < 8; i++)
+ qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, 0);
+ qib_write_kreg_port(ppd, krp_ibcctrl_b, 0);
+ qib_write_kreg(ppd->dd, kr_scratch, 0);
+ return;
+ }
+
+ /*
+ * Set the number of supported virtual lanes in IBC,
+ * for flow control packet handling on unsupported VLs
+ */
+ val = qib_read_kreg_port(ppd, krp_ibsdtestiftx);
+ val &= ~SYM_MASK(IB_SDTEST_IF_TX_0, VL_CAP);
+ val |= (u64)(ppd->vls_supported - 1) <<
+ SYM_LSB(IB_SDTEST_IF_TX_0, VL_CAP);
+ qib_write_kreg_port(ppd, krp_ibsdtestiftx, val);
+
+ qib_write_kreg_port(ppd, krp_rcvbthqp, QIB_KD_QP);
+
+ /* enable tx header checking */
+ qib_write_kreg_port(ppd, krp_sendcheckcontrol, IBA7322_SENDCHK_PKEY |
+ IBA7322_SENDCHK_BTHQP | IBA7322_SENDCHK_SLID |
+ IBA7322_SENDCHK_RAW_IPV6 | IBA7322_SENDCHK_MINSZ);
+
+ qib_write_kreg_port(ppd, krp_ncmodectrl,
+ SYM_MASK(IBNCModeCtrl_0, ScrambleCapLocal));
+
+ /*
+ * Unconditionally clear the bufmask bits. If SDMA is
+ * enabled, we'll set them appropriately later.
+ */
+ qib_write_kreg_port(ppd, krp_senddmabufmask0, 0);
+ qib_write_kreg_port(ppd, krp_senddmabufmask1, 0);
+ qib_write_kreg_port(ppd, krp_senddmabufmask2, 0);
+ if (ppd->dd->cspec->r1)
+ ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, ForceCreditUpToDate);
+}
+
+/*
+ * Write the initialization per-device registers that need to be done at
+ * driver load and after reset completes (i.e., that aren't done as part
+ * of other init procedures called from qib_init.c). Also write per-port
+ * registers that are affected by overall device config, such as QP mapping
+ * Some of these should be redundant on reset, but play safe.
+ */
+static void write_7322_initregs(struct qib_devdata *dd)
+{
+ struct qib_pportdata *ppd;
+ int i, pidx;
+ u64 val;
+
+ /* Set Multicast QPs received by port 2 to map to context one. */
+ qib_write_kreg(dd, KREG_IDX(RcvQPMulticastContext_1), 1);
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ unsigned n, regno;
+ unsigned long flags;
+
+ if (dd->n_krcv_queues < 2 ||
+ !dd->pport[pidx].link_speed_supported)
+ continue;
+
+ ppd = &dd->pport[pidx];
+
+ /* be paranoid against later code motion, etc. */
+ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
+ ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvQPMapEnable);
+ spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
+
+ /* Initialize QP to context mapping */
+ regno = krp_rcvqpmaptable;
+ val = 0;
+ if (dd->num_pports > 1)
+ n = dd->first_user_ctxt / dd->num_pports;
+ else
+ n = dd->first_user_ctxt - 1;
+ for (i = 0; i < 32; ) {
+ unsigned ctxt;
+
+ if (dd->num_pports > 1)
+ ctxt = (i % n) * dd->num_pports + pidx;
+ else if (i % n)
+ ctxt = (i % n) + 1;
+ else
+ ctxt = ppd->hw_pidx;
+ val |= ctxt << (5 * (i % 6));
+ i++;
+ if (i % 6 == 0) {
+ qib_write_kreg_port(ppd, regno, val);
+ val = 0;
+ regno++;
+ }
+ }
+ qib_write_kreg_port(ppd, regno, val);
+ }
+
+ /*
+ * Setup up interrupt mitigation for kernel contexts, but
+ * not user contexts (user contexts use interrupts when
+ * stalled waiting for any packet, so want those interrupts
+ * right away).
+ */
+ for (i = 0; i < dd->first_user_ctxt; i++) {
+ dd->cspec->rcvavail_timeout[i] = rcv_int_timeout;
+ qib_write_kreg(dd, kr_rcvavailtimeout + i, rcv_int_timeout);
+ }
+
+ /*
+ * Initialize as (disabled) rcvflow tables. Application code
+ * will setup each flow as it uses the flow.
+ * Doesn't clear any of the error bits that might be set.
+ */
+ val = TIDFLOW_ERRBITS; /* these are W1C */
+ for (i = 0; i < dd->cfgctxts; i++) {
+ int flow;
+ for (flow = 0; flow < NUM_TIDFLOWS_CTXT; flow++)
+ qib_write_ureg(dd, ur_rcvflowtable+flow, val, i);
+ }
+
+ /*
+ * dual cards init to dual port recovery, single port cards to
+ * the one port. Dual port cards may later adjust to 1 port,
+ * and then back to dual port if both ports are connected
+ * */
+ if (dd->num_pports)
+ setup_7322_link_recovery(dd->pport, dd->num_pports > 1);
+}
+
+static int qib_init_7322_variables(struct qib_devdata *dd)
+{
+ struct qib_pportdata *ppd;
+ unsigned features, pidx, sbufcnt;
+ int ret, mtu;
+ u32 sbufs, updthresh;
+
+ /* pport structs are contiguous, allocated after devdata */
+ ppd = (struct qib_pportdata *)(dd + 1);
+ dd->pport = ppd;
+ ppd[0].dd = dd;
+ ppd[1].dd = dd;
+
+ dd->cspec = (struct qib_chip_specific *)(ppd + 2);
+
+ ppd[0].cpspec = (struct qib_chippport_specific *)(dd->cspec + 1);
+ ppd[1].cpspec = &ppd[0].cpspec[1];
+ ppd[0].cpspec->ppd = &ppd[0]; /* for autoneg_7322_work() */
+ ppd[1].cpspec->ppd = &ppd[1]; /* for autoneg_7322_work() */
+
+ spin_lock_init(&dd->cspec->rcvmod_lock);
+ spin_lock_init(&dd->cspec->gpio_lock);
+
+ /* we haven't yet set QIB_PRESENT, so use read directly */
+ dd->revision = readq(&dd->kregbase[kr_revision]);
+
+ if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
+ qib_dev_err(dd,
+ "Revision register read failure, giving up initialization\n");
+ ret = -ENODEV;
+ goto bail;
+ }
+ dd->flags |= QIB_PRESENT; /* now register routines work */
+
+ dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R, ChipRevMajor);
+ dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R, ChipRevMinor);
+ dd->cspec->r1 = dd->minrev == 1;
+
+ get_7322_chip_params(dd);
+ features = qib_7322_boardname(dd);
+
+ /* now that piobcnt2k and 4k set, we can allocate these */
+ sbufcnt = dd->piobcnt2k + dd->piobcnt4k +
+ NUM_VL15_BUFS + BITS_PER_LONG - 1;
+ sbufcnt /= BITS_PER_LONG;
+ dd->cspec->sendchkenable = kmalloc(sbufcnt *
+ sizeof(*dd->cspec->sendchkenable), GFP_KERNEL);
+ dd->cspec->sendgrhchk = kmalloc(sbufcnt *
+ sizeof(*dd->cspec->sendgrhchk), GFP_KERNEL);
+ dd->cspec->sendibchk = kmalloc(sbufcnt *
+ sizeof(*dd->cspec->sendibchk), GFP_KERNEL);
+ if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk ||
+ !dd->cspec->sendibchk) {
+ qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n");
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ ppd = dd->pport;
+
+ /*
+ * GPIO bits for TWSI data and clock,
+ * used for serial EEPROM.
+ */
+ dd->gpio_sda_num = _QIB_GPIO_SDA_NUM;
+ dd->gpio_scl_num = _QIB_GPIO_SCL_NUM;
+ dd->twsi_eeprom_dev = QIB_TWSI_EEPROM_DEV;
+
+ dd->flags |= QIB_HAS_INTX | QIB_HAS_LINK_LATENCY |
+ QIB_NODMA_RTAIL | QIB_HAS_VLSUPP | QIB_HAS_HDRSUPP |
+ QIB_HAS_THRESH_UPDATE |
+ (sdma_idle_cnt ? QIB_HAS_SDMA_TIMEOUT : 0);
+ dd->flags |= qib_special_trigger ?
+ QIB_USE_SPCL_TRIG : QIB_HAS_SEND_DMA;
+
+ /*
+ * Setup initial values. These may change when PAT is enabled, but
+ * we need these to do initial chip register accesses.
+ */
+ qib_7322_set_baseaddrs(dd);
+
+ mtu = ib_mtu_enum_to_int(qib_ibmtu);
+ if (mtu == -1)
+ mtu = QIB_DEFAULT_MTU;
+
+ dd->cspec->int_enable_mask = QIB_I_BITSEXTANT;
+ /* all hwerrors become interrupts, unless special purposed */
+ dd->cspec->hwerrmask = ~0ULL;
+ /* link_recovery setup causes these errors, so ignore them,
+ * other than clearing them when they occur */
+ dd->cspec->hwerrmask &=
+ ~(SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_0) |
+ SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_1) |
+ HWE_MASK(LATriggered));
+
+ for (pidx = 0; pidx < NUM_IB_PORTS; ++pidx) {
+ struct qib_chippport_specific *cp = ppd->cpspec;
+ ppd->link_speed_supported = features & PORT_SPD_CAP;
+ features >>= PORT_SPD_CAP_SHIFT;
+ if (!ppd->link_speed_supported) {
+ /* single port mode (7340, or configured) */
+ dd->skip_kctxt_mask |= 1 << pidx;
+ if (pidx == 0) {
+ /* Make sure port is disabled. */
+ qib_write_kreg_port(ppd, krp_rcvctrl, 0);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, 0);
+ ppd[0] = ppd[1];
+ dd->cspec->hwerrmask &= ~(SYM_MASK(HwErrMask,
+ IBSerdesPClkNotDetectMask_0)
+ | SYM_MASK(HwErrMask,
+ SDmaMemReadErrMask_0));
+ dd->cspec->int_enable_mask &= ~(
+ SYM_MASK(IntMask, SDmaCleanupDoneMask_0) |
+ SYM_MASK(IntMask, SDmaIdleIntMask_0) |
+ SYM_MASK(IntMask, SDmaProgressIntMask_0) |
+ SYM_MASK(IntMask, SDmaIntMask_0) |
+ SYM_MASK(IntMask, ErrIntMask_0) |
+ SYM_MASK(IntMask, SendDoneIntMask_0));
+ } else {
+ /* Make sure port is disabled. */
+ qib_write_kreg_port(ppd, krp_rcvctrl, 0);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a, 0);
+ dd->cspec->hwerrmask &= ~(SYM_MASK(HwErrMask,
+ IBSerdesPClkNotDetectMask_1)
+ | SYM_MASK(HwErrMask,
+ SDmaMemReadErrMask_1));
+ dd->cspec->int_enable_mask &= ~(
+ SYM_MASK(IntMask, SDmaCleanupDoneMask_1) |
+ SYM_MASK(IntMask, SDmaIdleIntMask_1) |
+ SYM_MASK(IntMask, SDmaProgressIntMask_1) |
+ SYM_MASK(IntMask, SDmaIntMask_1) |
+ SYM_MASK(IntMask, ErrIntMask_1) |
+ SYM_MASK(IntMask, SendDoneIntMask_1));
+ }
+ continue;
+ }
+
+ dd->num_pports++;
+ ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ if (ret) {
+ dd->num_pports--;
+ goto bail;
+ }
+
+ ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
+ ppd->link_width_enabled = IB_WIDTH_4X;
+ ppd->link_speed_enabled = ppd->link_speed_supported;
+ /*
+ * Set the initial values to reasonable default, will be set
+ * for real when link is up.
+ */
+ ppd->link_width_active = IB_WIDTH_4X;
+ ppd->link_speed_active = QIB_IB_SDR;
+ ppd->delay_mult = ib_rate_to_delay[IB_RATE_10_GBPS];
+ switch (qib_num_cfg_vls) {
+ case 1:
+ ppd->vls_supported = IB_VL_VL0;
+ break;
+ case 2:
+ ppd->vls_supported = IB_VL_VL0_1;
+ break;
+ default:
+ qib_devinfo(dd->pcidev,
+ "Invalid num_vls %u, using 4 VLs\n",
+ qib_num_cfg_vls);
+ qib_num_cfg_vls = 4;
+ /* fall through */
+ case 4:
+ ppd->vls_supported = IB_VL_VL0_3;
+ break;
+ case 8:
+ if (mtu <= 2048)
+ ppd->vls_supported = IB_VL_VL0_7;
+ else {
+ qib_devinfo(dd->pcidev,
+ "Invalid num_vls %u for MTU %d "
+ ", using 4 VLs\n",
+ qib_num_cfg_vls, mtu);
+ ppd->vls_supported = IB_VL_VL0_3;
+ qib_num_cfg_vls = 4;
+ }
+ break;
+ }
+ ppd->vls_operational = ppd->vls_supported;
+
+ init_waitqueue_head(&cp->autoneg_wait);
+ INIT_DELAYED_WORK(&cp->autoneg_work,
+ autoneg_7322_work);
+ if (ppd->dd->cspec->r1)
+ INIT_DELAYED_WORK(&cp->ipg_work, ipg_7322_work);
+
+ /*
+ * For Mez and similar cards, no qsfp info, so do
+ * the "cable info" setup here. Can be overridden
+ * in adapter-specific routines.
+ */
+ if (!(dd->flags & QIB_HAS_QSFP)) {
+ if (!IS_QMH(dd) && !IS_QME(dd))
+ qib_devinfo(dd->pcidev,
+ "IB%u:%u: Unknown mezzanine card type\n",
+ dd->unit, ppd->port);
+ cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME;
+ /*
+ * Choose center value as default tx serdes setting
+ * until changed through module parameter.
+ */
+ ppd->cpspec->no_eep = IS_QMH(dd) ?
+ TXDDS_TABLE_SZ + 2 : TXDDS_TABLE_SZ + 4;
+ } else
+ cp->h1_val = H1_FORCE_VAL;
+
+ /* Avoid writes to chip for mini_init */
+ if (!qib_mini_init)
+ write_7322_init_portregs(ppd);
+
+ init_timer(&cp->chase_timer);
+ cp->chase_timer.function = reenable_chase;
+ cp->chase_timer.data = (unsigned long)ppd;
+
+ ppd++;
+ }
+
+ dd->rcvhdrentsize = qib_rcvhdrentsize ?
+ qib_rcvhdrentsize : QIB_RCVHDR_ENTSIZE;
+ dd->rcvhdrsize = qib_rcvhdrsize ?
+ qib_rcvhdrsize : QIB_DFLT_RCVHDRSIZE;
+ dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
+
+ /* we always allocate at least 2048 bytes for eager buffers */
+ dd->rcvegrbufsize = max(mtu, 2048);
+ BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
+ dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
+
+ qib_7322_tidtemplate(dd);
+
+ /*
+ * We can request a receive interrupt for 1 or
+ * more packets from current offset.
+ */
+ dd->rhdrhead_intr_off =
+ (u64) rcv_int_count << IBA7322_HDRHEAD_PKTINT_SHIFT;
+
+ /* setup the stats timer; the add_timer is done at end of init */
+ init_timer(&dd->stats_timer);
+ dd->stats_timer.function = qib_get_7322_faststats;
+ dd->stats_timer.data = (unsigned long) dd;
+
+ dd->ureg_align = 0x10000; /* 64KB alignment */
+
+ dd->piosize2kmax_dwords = dd->piosize2k >> 2;
+
+ qib_7322_config_ctxts(dd);
+ qib_set_ctxtcnt(dd);
+
+ if (qib_wc_pat) {
+ resource_size_t vl15off;
+ /*
+ * We do not set WC on the VL15 buffers to avoid
+ * a rare problem with unaligned writes from
+ * interrupt-flushed store buffers, so we need
+ * to map those separately here. We can't solve
+ * this for the rarely used mtrr case.
+ */
+ ret = init_chip_wc_pat(dd, 0);
+ if (ret)
+ goto bail;
+
+ /* vl15 buffers start just after the 4k buffers */
+ vl15off = dd->physaddr + (dd->piobufbase >> 32) +
+ dd->piobcnt4k * dd->align4k;
+ dd->piovl15base = ioremap_nocache(vl15off,
+ NUM_VL15_BUFS * dd->align4k);
+ if (!dd->piovl15base) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ }
+ qib_7322_set_baseaddrs(dd); /* set chip access pointers now */
+
+ ret = 0;
+ if (qib_mini_init)
+ goto bail;
+ if (!dd->num_pports) {
+ qib_dev_err(dd, "No ports enabled, giving up initialization\n");
+ goto bail; /* no error, so can still figure out why err */
+ }
+
+ write_7322_initregs(dd);
+ ret = qib_create_ctxts(dd);
+ init_7322_cntrnames(dd);
+
+ updthresh = 8U; /* update threshold */
+
+ /* use all of 4KB buffers for the kernel SDMA, zero if !SDMA.
+ * reserve the update threshold amount for other kernel use, such
+ * as sending SMI, MAD, and ACKs, or 3, whichever is greater,
+ * unless we aren't enabling SDMA, in which case we want to use
+ * all the 4k bufs for the kernel.
+ * if this was less than the update threshold, we could wait
+ * a long time for an update. Coded this way because we
+ * sometimes change the update threshold for various reasons,
+ * and we want this to remain robust.
+ */
+ if (dd->flags & QIB_HAS_SEND_DMA) {
+ dd->cspec->sdmabufcnt = dd->piobcnt4k;
+ sbufs = updthresh > 3 ? updthresh : 3;
+ } else {
+ dd->cspec->sdmabufcnt = 0;
+ sbufs = dd->piobcnt4k;
+ }
+ dd->cspec->lastbuf_for_pio = dd->piobcnt2k + dd->piobcnt4k -
+ dd->cspec->sdmabufcnt;
+ dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
+ dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+ dd->last_pio = dd->cspec->lastbuf_for_pio;
+ dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ?
+ dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0;
+
+ /*
+ * If we have 16 user contexts, we will have 7 sbufs
+ * per context, so reduce the update threshold to match. We
+ * want to update before we actually run out, at low pbufs/ctxt
+ * so give ourselves some margin.
+ */
+ if (dd->pbufsctxt >= 2 && dd->pbufsctxt - 2 < updthresh)
+ updthresh = dd->pbufsctxt - 2;
+ dd->cspec->updthresh_dflt = updthresh;
+ dd->cspec->updthresh = updthresh;
+
+ /* before full enable, no interrupts, no locking needed */
+ dd->sendctrl |= ((updthresh & SYM_RMASK(SendCtrl, AvailUpdThld))
+ << SYM_LSB(SendCtrl, AvailUpdThld)) |
+ SYM_MASK(SendCtrl, SendBufAvailPad64Byte);
+
+ dd->psxmitwait_supported = 1;
+ dd->psxmitwait_check_rate = QIB_7322_PSXMITWAIT_CHECK_RATE;
+bail:
+ if (!dd->ctxtcnt)
+ dd->ctxtcnt = 1; /* for other initialization code */
+
+ return ret;
+}
+
+static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *ppd, u64 pbc,
+ u32 *pbufnum)
+{
+ u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK;
+ struct qib_devdata *dd = ppd->dd;
+
+ /* last is same for 2k and 4k, because we use 4k if all 2k busy */
+ if (pbc & PBC_7322_VL15_SEND) {
+ first = dd->piobcnt2k + dd->piobcnt4k + ppd->hw_pidx;
+ last = first;
+ } else {
+ if ((plen + 1) > dd->piosize2kmax_dwords)
+ first = dd->piobcnt2k;
+ else
+ first = 0;
+ last = dd->cspec->lastbuf_for_pio;
+ }
+ return qib_getsendbuf_range(dd, pbufnum, first, last);
+}
+
+static void qib_set_cntr_7322_sample(struct qib_pportdata *ppd, u32 intv,
+ u32 start)
+{
+ qib_write_kreg_port(ppd, krp_psinterval, intv);
+ qib_write_kreg_port(ppd, krp_psstart, start);
+}
+
+/*
+ * Must be called with sdma_lock held, or before init finished.
+ */
+static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
+{
+ qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt);
+}
+
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+static void dump_sdma_7322_state(struct qib_pportdata *ppd)
+{
+ u64 reg, reg1, reg2;
+
+ reg = qib_read_kreg_port(ppd, krp_senddmastatus);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmastatus: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_sendctrl);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sendctrl: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmabase);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmabase: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmabufmask0);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+
+ /* get bufuse bits, clear them, and print them again if non-zero */
+ reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+ qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2);
+ /* 0 and 1 should always be zero, so print as short form */
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+ reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
+ reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
+ reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
+ /* 0 and 1 should always be zero, so print as short form */
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
+ reg, reg1, reg2);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmatail);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmatail: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmahead);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmahead: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmaheadaddr: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmalengen);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmalengen: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmadesccnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmadesccnt: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmaidlecnt: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmapriorityhld: 0x%016llx\n", reg);
+
+ reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA senddmareloadcnt: 0x%016llx\n", reg);
+
+ dump_sdma_state(ppd);
+}
+
+static struct sdma_set_state_action sdma_7322_action_table[] = {
+ [qib_sdma_state_s00_hw_down] = {
+ .go_s99_running_tofalse = 1,
+ .op_enable = 0,
+ .op_intenable = 0,
+ .op_halt = 0,
+ .op_drain = 0,
+ },
+ [qib_sdma_state_s10_hw_start_up_wait] = {
+ .op_enable = 0,
+ .op_intenable = 1,
+ .op_halt = 1,
+ .op_drain = 0,
+ },
+ [qib_sdma_state_s20_idle] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 1,
+ .op_drain = 0,
+ },
+ [qib_sdma_state_s30_sw_clean_up_wait] = {
+ .op_enable = 0,
+ .op_intenable = 1,
+ .op_halt = 1,
+ .op_drain = 0,
+ },
+ [qib_sdma_state_s40_hw_clean_up_wait] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 1,
+ .op_drain = 0,
+ },
+ [qib_sdma_state_s50_hw_halt_wait] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 1,
+ .op_drain = 1,
+ },
+ [qib_sdma_state_s99_running] = {
+ .op_enable = 1,
+ .op_intenable = 1,
+ .op_halt = 0,
+ .op_drain = 0,
+ .go_s99_running_totrue = 1,
+ },
+};
+
+static void qib_7322_sdma_init_early(struct qib_pportdata *ppd)
+{
+ ppd->sdma_state.set_state_action = sdma_7322_action_table;
+}
+
+static int init_sdma_7322_regs(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ unsigned lastbuf, erstbuf;
+ u64 senddmabufmask[3] = { 0 };
+ int n, ret = 0;
+
+ qib_write_kreg_port(ppd, krp_senddmabase, ppd->sdma_descq_phys);
+ qib_sdma_7322_setlengen(ppd);
+ qib_sdma_update_7322_tail(ppd, 0); /* Set SendDmaTail */
+ qib_write_kreg_port(ppd, krp_senddmareloadcnt, sdma_idle_cnt);
+ qib_write_kreg_port(ppd, krp_senddmadesccnt, 0);
+ qib_write_kreg_port(ppd, krp_senddmaheadaddr, ppd->sdma_head_phys);
+
+ if (dd->num_pports)
+ n = dd->cspec->sdmabufcnt / dd->num_pports; /* no remainder */
+ else
+ n = dd->cspec->sdmabufcnt; /* failsafe for init */
+ erstbuf = (dd->piobcnt2k + dd->piobcnt4k) -
+ ((dd->num_pports == 1 || ppd->port == 2) ? n :
+ dd->cspec->sdmabufcnt);
+ lastbuf = erstbuf + n;
+
+ ppd->sdma_state.first_sendbuf = erstbuf;
+ ppd->sdma_state.last_sendbuf = lastbuf;
+ for (; erstbuf < lastbuf; ++erstbuf) {
+ unsigned word = erstbuf / BITS_PER_LONG;
+ unsigned bit = erstbuf & (BITS_PER_LONG - 1);
+
+ BUG_ON(word >= 3);
+ senddmabufmask[word] |= 1ULL << bit;
+ }
+ qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]);
+ qib_write_kreg_port(ppd, krp_senddmabufmask1, senddmabufmask[1]);
+ qib_write_kreg_port(ppd, krp_senddmabufmask2, senddmabufmask[2]);
+ return ret;
+}
+
+/* sdma_lock must be held */
+static u16 qib_sdma_7322_gethead(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int sane;
+ int use_dmahead;
+ u16 swhead;
+ u16 swtail;
+ u16 cnt;
+ u16 hwhead;
+
+ use_dmahead = __qib_sdma_running(ppd) &&
+ (dd->flags & QIB_HAS_SDMA_TIMEOUT);
+retry:
+ hwhead = use_dmahead ?
+ (u16) le64_to_cpu(*ppd->sdma_head_dma) :
+ (u16) qib_read_kreg_port(ppd, krp_senddmahead);
+
+ swhead = ppd->sdma_descq_head;
+ swtail = ppd->sdma_descq_tail;
+ cnt = ppd->sdma_descq_cnt;
+
+ if (swhead < swtail)
+ /* not wrapped */
+ sane = (hwhead >= swhead) & (hwhead <= swtail);
+ else if (swhead > swtail)
+ /* wrapped around */
+ sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
+ (hwhead <= swtail);
+ else
+ /* empty */
+ sane = (hwhead == swhead);
+
+ if (unlikely(!sane)) {
+ if (use_dmahead) {
+ /* try one more time, directly from the register */
+ use_dmahead = 0;
+ goto retry;
+ }
+ /* proceed as if no progress */
+ hwhead = swhead;
+ }
+
+ return hwhead;
+}
+
+static int qib_sdma_7322_busy(struct qib_pportdata *ppd)
+{
+ u64 hwstatus = qib_read_kreg_port(ppd, krp_senddmastatus);
+
+ return (hwstatus & SYM_MASK(SendDmaStatus_0, ScoreBoardDrainInProg)) ||
+ (hwstatus & SYM_MASK(SendDmaStatus_0, HaltInProg)) ||
+ !(hwstatus & SYM_MASK(SendDmaStatus_0, InternalSDmaHalt)) ||
+ !(hwstatus & SYM_MASK(SendDmaStatus_0, ScbEmpty));
+}
+
+/*
+ * Compute the amount of delay before sending the next packet if the
+ * port's send rate differs from the static rate set for the QP.
+ * The delay affects the next packet and the amount of the delay is
+ * based on the length of the this packet.
+ */
+static u32 qib_7322_setpbc_control(struct qib_pportdata *ppd, u32 plen,
+ u8 srate, u8 vl)
+{
+ u8 snd_mult = ppd->delay_mult;
+ u8 rcv_mult = ib_rate_to_delay[srate];
+ u32 ret;
+
+ ret = rcv_mult > snd_mult ? ((plen + 1) >> 1) * snd_mult : 0;
+
+ /* Indicate VL15, else set the VL in the control word */
+ if (vl == 15)
+ ret |= PBC_7322_VL15_SEND_CTRL;
+ else
+ ret |= vl << PBC_VL_NUM_LSB;
+ ret |= ((u32)(ppd->hw_pidx)) << PBC_PORT_SEL_LSB;
+
+ return ret;
+}
+
+/*
+ * Enable the per-port VL15 send buffers for use.
+ * They follow the rest of the buffers, without a config parameter.
+ * This was in initregs, but that is done before the shadow
+ * is set up, and this has to be done after the shadow is
+ * set up.
+ */
+static void qib_7322_initvl15_bufs(struct qib_devdata *dd)
+{
+ unsigned vl15bufs;
+
+ vl15bufs = dd->piobcnt2k + dd->piobcnt4k;
+ qib_chg_pioavailkernel(dd, vl15bufs, NUM_VL15_BUFS,
+ TXCHK_CHG_TYPE_KERN, NULL);
+}
+
+static void qib_7322_init_ctxt(struct qib_ctxtdata *rcd)
+{
+ if (rcd->ctxt < NUM_IB_PORTS) {
+ if (rcd->dd->num_pports > 1) {
+ rcd->rcvegrcnt = KCTXT0_EGRCNT / 2;
+ rcd->rcvegr_tid_base = rcd->ctxt ? rcd->rcvegrcnt : 0;
+ } else {
+ rcd->rcvegrcnt = KCTXT0_EGRCNT;
+ rcd->rcvegr_tid_base = 0;
+ }
+ } else {
+ rcd->rcvegrcnt = rcd->dd->cspec->rcvegrcnt;
+ rcd->rcvegr_tid_base = KCTXT0_EGRCNT +
+ (rcd->ctxt - NUM_IB_PORTS) * rcd->rcvegrcnt;
+ }
+}
+
+#define QTXSLEEPS 5000
+static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start,
+ u32 len, u32 which, struct qib_ctxtdata *rcd)
+{
+ int i;
+ const int last = start + len - 1;
+ const int lastr = last / BITS_PER_LONG;
+ u32 sleeps = 0;
+ int wait = rcd != NULL;
+ unsigned long flags;
+
+ while (wait) {
+ unsigned long shadow;
+ int cstart, previ = -1;
+
+ /*
+ * when flipping from kernel to user, we can't change
+ * the checking type if the buffer is allocated to the
+ * driver. It's OK the other direction, because it's
+ * from close, and we have just disarm'ed all the
+ * buffers. All the kernel to kernel changes are also
+ * OK.
+ */
+ for (cstart = start; cstart <= last; cstart++) {
+ i = ((2 * cstart) + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT)
+ / BITS_PER_LONG;
+ if (i != previ) {
+ shadow = (unsigned long)
+ le64_to_cpu(dd->pioavailregs_dma[i]);
+ previ = i;
+ }
+ if (test_bit(((2 * cstart) +
+ QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT)
+ % BITS_PER_LONG, &shadow))
+ break;
+ }
+
+ if (cstart > last)
+ break;
+
+ if (sleeps == QTXSLEEPS)
+ break;
+ /* make sure we see an updated copy next time around */
+ sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ sleeps++;
+ msleep(20);
+ }
+
+ switch (which) {
+ case TXCHK_CHG_TYPE_DIS1:
+ /*
+ * disable checking on a range; used by diags; just
+ * one buffer, but still written generically
+ */
+ for (i = start; i <= last; i++)
+ clear_bit(i, dd->cspec->sendchkenable);
+ break;
+
+ case TXCHK_CHG_TYPE_ENAB1:
+ /*
+ * (re)enable checking on a range; used by diags; just
+ * one buffer, but still written generically; read
+ * scratch to be sure buffer actually triggered, not
+ * just flushed from processor.
+ */
+ qib_read_kreg32(dd, kr_scratch);
+ for (i = start; i <= last; i++)
+ set_bit(i, dd->cspec->sendchkenable);
+ break;
+
+ case TXCHK_CHG_TYPE_KERN:
+ /* usable by kernel */
+ for (i = start; i <= last; i++) {
+ set_bit(i, dd->cspec->sendibchk);
+ clear_bit(i, dd->cspec->sendgrhchk);
+ }
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ /* see if we need to raise avail update threshold */
+ for (i = dd->first_user_ctxt;
+ dd->cspec->updthresh != dd->cspec->updthresh_dflt
+ && i < dd->cfgctxts; i++)
+ if (dd->rcd[i] && dd->rcd[i]->subctxt_cnt &&
+ ((dd->rcd[i]->piocnt / dd->rcd[i]->subctxt_cnt) - 1)
+ < dd->cspec->updthresh_dflt)
+ break;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ if (i == dd->cfgctxts) {
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ dd->cspec->updthresh = dd->cspec->updthresh_dflt;
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld);
+ dd->sendctrl |= (dd->cspec->updthresh &
+ SYM_RMASK(SendCtrl, AvailUpdThld)) <<
+ SYM_LSB(SendCtrl, AvailUpdThld);
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ }
+ break;
+
+ case TXCHK_CHG_TYPE_USER:
+ /* for user process */
+ for (i = start; i <= last; i++) {
+ clear_bit(i, dd->cspec->sendibchk);
+ set_bit(i, dd->cspec->sendgrhchk);
+ }
+ spin_lock_irqsave(&dd->sendctrl_lock, flags);
+ if (rcd && rcd->subctxt_cnt && ((rcd->piocnt
+ / rcd->subctxt_cnt) - 1) < dd->cspec->updthresh) {
+ dd->cspec->updthresh = (rcd->piocnt /
+ rcd->subctxt_cnt) - 1;
+ dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld);
+ dd->sendctrl |= (dd->cspec->updthresh &
+ SYM_RMASK(SendCtrl, AvailUpdThld))
+ << SYM_LSB(SendCtrl, AvailUpdThld);
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ } else
+ spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
+ break;
+
+ default:
+ break;
+ }
+
+ for (i = start / BITS_PER_LONG; which >= 2 && i <= lastr; ++i)
+ qib_write_kreg(dd, kr_sendcheckmask + i,
+ dd->cspec->sendchkenable[i]);
+
+ for (i = start / BITS_PER_LONG; which < 2 && i <= lastr; ++i) {
+ qib_write_kreg(dd, kr_sendgrhcheckmask + i,
+ dd->cspec->sendgrhchk[i]);
+ qib_write_kreg(dd, kr_sendibpktmask + i,
+ dd->cspec->sendibchk[i]);
+ }
+
+ /*
+ * Be sure whatever we did was seen by the chip and acted upon,
+ * before we return. Mostly important for which >= 2.
+ */
+ qib_read_kreg32(dd, kr_scratch);
+}
+
+
+/* useful for trigger analyzers, etc. */
+static void writescratch(struct qib_devdata *dd, u32 val)
+{
+ qib_write_kreg(dd, kr_scratch, val);
+}
+
+/* Dummy for now, use chip regs soon */
+static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum)
+{
+ return -ENXIO;
+}
+
+/**
+ * qib_init_iba7322_funcs - set up the chip-specific function pointers
+ * @dev: the pci_dev for qlogic_ib device
+ * @ent: pci_device_id struct for this dev
+ *
+ * Also allocates, inits, and returns the devdata struct for this
+ * device instance
+ *
+ * This is global, and is called directly at init to set up the
+ * chip-specific function pointers for later use.
+ */
+struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct qib_devdata *dd;
+ int ret, i;
+ u32 tabsize, actual_cnt = 0;
+
+ dd = qib_alloc_devdata(pdev,
+ NUM_IB_PORTS * sizeof(struct qib_pportdata) +
+ sizeof(struct qib_chip_specific) +
+ NUM_IB_PORTS * sizeof(struct qib_chippport_specific));
+ if (IS_ERR(dd))
+ goto bail;
+
+ dd->f_bringup_serdes = qib_7322_bringup_serdes;
+ dd->f_cleanup = qib_setup_7322_cleanup;
+ dd->f_clear_tids = qib_7322_clear_tids;
+ dd->f_free_irq = qib_7322_free_irq;
+ dd->f_get_base_info = qib_7322_get_base_info;
+ dd->f_get_msgheader = qib_7322_get_msgheader;
+ dd->f_getsendbuf = qib_7322_getsendbuf;
+ dd->f_gpio_mod = gpio_7322_mod;
+ dd->f_eeprom_wen = qib_7322_eeprom_wen;
+ dd->f_hdrqempty = qib_7322_hdrqempty;
+ dd->f_ib_updown = qib_7322_ib_updown;
+ dd->f_init_ctxt = qib_7322_init_ctxt;
+ dd->f_initvl15_bufs = qib_7322_initvl15_bufs;
+ dd->f_intr_fallback = qib_7322_intr_fallback;
+ dd->f_late_initreg = qib_late_7322_initreg;
+ dd->f_setpbc_control = qib_7322_setpbc_control;
+ dd->f_portcntr = qib_portcntr_7322;
+ dd->f_put_tid = qib_7322_put_tid;
+ dd->f_quiet_serdes = qib_7322_mini_quiet_serdes;
+ dd->f_rcvctrl = rcvctrl_7322_mod;
+ dd->f_read_cntrs = qib_read_7322cntrs;
+ dd->f_read_portcntrs = qib_read_7322portcntrs;
+ dd->f_reset = qib_do_7322_reset;
+ dd->f_init_sdma_regs = init_sdma_7322_regs;
+ dd->f_sdma_busy = qib_sdma_7322_busy;
+ dd->f_sdma_gethead = qib_sdma_7322_gethead;
+ dd->f_sdma_sendctrl = qib_7322_sdma_sendctrl;
+ dd->f_sdma_set_desc_cnt = qib_sdma_set_7322_desc_cnt;
+ dd->f_sdma_update_tail = qib_sdma_update_7322_tail;
+ dd->f_sendctrl = sendctrl_7322_mod;
+ dd->f_set_armlaunch = qib_set_7322_armlaunch;
+ dd->f_set_cntr_sample = qib_set_cntr_7322_sample;
+ dd->f_iblink_state = qib_7322_iblink_state;
+ dd->f_ibphys_portstate = qib_7322_phys_portstate;
+ dd->f_get_ib_cfg = qib_7322_get_ib_cfg;
+ dd->f_set_ib_cfg = qib_7322_set_ib_cfg;
+ dd->f_set_ib_loopback = qib_7322_set_loopback;
+ dd->f_get_ib_table = qib_7322_get_ib_table;
+ dd->f_set_ib_table = qib_7322_set_ib_table;
+ dd->f_set_intr_state = qib_7322_set_intr_state;
+ dd->f_setextled = qib_setup_7322_setextled;
+ dd->f_txchk_change = qib_7322_txchk_change;
+ dd->f_update_usrhead = qib_update_7322_usrhead;
+ dd->f_wantpiobuf_intr = qib_wantpiobuf_7322_intr;
+ dd->f_xgxs_reset = qib_7322_mini_pcs_reset;
+ dd->f_sdma_hw_clean_up = qib_7322_sdma_hw_clean_up;
+ dd->f_sdma_hw_start_up = qib_7322_sdma_hw_start_up;
+ dd->f_sdma_init_early = qib_7322_sdma_init_early;
+ dd->f_writescratch = writescratch;
+ dd->f_tempsense_rd = qib_7322_tempsense_rd;
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dd->f_notify_dca = qib_7322_notify_dca;
+#endif
+ /*
+ * Do remaining PCIe setup and save PCIe values in dd.
+ * Any error printing is already done by the init code.
+ * On return, we have the chip mapped, but chip registers
+ * are not set up until start of qib_init_7322_variables.
+ */
+ ret = qib_pcie_ddinit(dd, pdev, ent);
+ if (ret < 0)
+ goto bail_free;
+
+ /* initialize chip-specific variables */
+ ret = qib_init_7322_variables(dd);
+ if (ret)
+ goto bail_cleanup;
+
+ if (qib_mini_init || !dd->num_pports)
+ goto bail;
+
+ /*
+ * Determine number of vectors we want; depends on port count
+ * and number of configured kernel receive queues actually used.
+ * Should also depend on whether sdma is enabled or not, but
+ * that's such a rare testing case it's not worth worrying about.
+ */
+ tabsize = dd->first_user_ctxt + ARRAY_SIZE(irq_table);
+ for (i = 0; i < tabsize; i++)
+ if ((i < ARRAY_SIZE(irq_table) &&
+ irq_table[i].port <= dd->num_pports) ||
+ (i >= ARRAY_SIZE(irq_table) &&
+ dd->rcd[i - ARRAY_SIZE(irq_table)]))
+ actual_cnt++;
+ /* reduce by ctxt's < 2 */
+ if (qib_krcvq01_no_msi)
+ actual_cnt -= dd->num_pports;
+
+ tabsize = actual_cnt;
+ dd->cspec->msix_entries = kzalloc(tabsize *
+ sizeof(struct qib_msix_entry), GFP_KERNEL);
+ if (!dd->cspec->msix_entries) {
+ qib_dev_err(dd, "No memory for MSIx table\n");
+ tabsize = 0;
+ }
+ for (i = 0; i < tabsize; i++)
+ dd->cspec->msix_entries[i].msix.entry = i;
+
+ if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries))
+ qib_dev_err(dd,
+ "Failed to setup PCIe or interrupts; continuing anyway\n");
+ /* may be less than we wanted, if not enough available */
+ dd->cspec->num_msix_entries = tabsize;
+
+ /* setup interrupt handler */
+ qib_setup_7322_interrupt(dd, 1);
+
+ /* clear diagctrl register, in case diags were running and crashed */
+ qib_write_kreg(dd, kr_hwdiagctrl, 0);
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ if (!dca_add_requester(&pdev->dev)) {
+ qib_devinfo(dd->pcidev, "DCA enabled\n");
+ dd->flags |= QIB_DCA_ENABLED;
+ qib_setup_dca(dd);
+ }
+#endif
+ goto bail;
+
+bail_cleanup:
+ qib_pcie_ddcleanup(dd);
+bail_free:
+ qib_free_devdata(dd);
+ dd = ERR_PTR(ret);
+bail:
+ return dd;
+}
+
+/*
+ * Set the table entry at the specified index from the table specifed.
+ * There are 3 * TXDDS_TABLE_SZ entries in all per port, with the first
+ * TXDDS_TABLE_SZ for SDR, the next for DDR, and the last for QDR.
+ * 'idx' below addresses the correct entry, while its 4 LSBs select the
+ * corresponding entry (one of TXDDS_TABLE_SZ) from the selected table.
+ */
+#define DDS_ENT_AMP_LSB 14
+#define DDS_ENT_MAIN_LSB 9
+#define DDS_ENT_POST_LSB 5
+#define DDS_ENT_PRE_XTRA_LSB 3
+#define DDS_ENT_PRE_LSB 0
+
+/*
+ * Set one entry in the TxDDS table for spec'd port
+ * ridx picks one of the entries, while tp points
+ * to the appropriate table entry.
+ */
+static void set_txdds(struct qib_pportdata *ppd, int ridx,
+ const struct txdds_ent *tp)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u32 pack_ent;
+ int regidx;
+
+ /* Get correct offset in chip-space, and in source table */
+ regidx = KREG_IBPORT_IDX(IBSD_DDS_MAP_TABLE) + ridx;
+ /*
+ * We do not use qib_write_kreg_port() because it was intended
+ * only for registers in the lower "port specific" pages.
+ * So do index calculation by hand.
+ */
+ if (ppd->hw_pidx)
+ regidx += (dd->palign / sizeof(u64));
+
+ pack_ent = tp->amp << DDS_ENT_AMP_LSB;
+ pack_ent |= tp->main << DDS_ENT_MAIN_LSB;
+ pack_ent |= tp->pre << DDS_ENT_PRE_LSB;
+ pack_ent |= tp->post << DDS_ENT_POST_LSB;
+ qib_write_kreg(dd, regidx, pack_ent);
+ /* Prevent back-to-back writes by hitting scratch */
+ qib_write_kreg(ppd->dd, kr_scratch, 0);
+}
+
+static const struct vendor_txdds_ent vendor_txdds[] = {
+ { /* Amphenol 1m 30awg NoEq */
+ { 0x41, 0x50, 0x48 }, "584470002 ",
+ { 10, 0, 0, 5 }, { 10, 0, 0, 9 }, { 7, 1, 0, 13 },
+ },
+ { /* Amphenol 3m 28awg NoEq */
+ { 0x41, 0x50, 0x48 }, "584470004 ",
+ { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 0, 1, 7, 15 },
+ },
+ { /* Finisar 3m OM2 Optical */
+ { 0x00, 0x90, 0x65 }, "FCBG410QB1C03-QL",
+ { 0, 0, 0, 3 }, { 0, 0, 0, 4 }, { 0, 0, 0, 13 },
+ },
+ { /* Finisar 30m OM2 Optical */
+ { 0x00, 0x90, 0x65 }, "FCBG410QB1C30-QL",
+ { 0, 0, 0, 1 }, { 0, 0, 0, 5 }, { 0, 0, 0, 11 },
+ },
+ { /* Finisar Default OM2 Optical */
+ { 0x00, 0x90, 0x65 }, NULL,
+ { 0, 0, 0, 2 }, { 0, 0, 0, 5 }, { 0, 0, 0, 12 },
+ },
+ { /* Gore 1m 30awg NoEq */
+ { 0x00, 0x21, 0x77 }, "QSN3300-1 ",
+ { 0, 0, 0, 6 }, { 0, 0, 0, 9 }, { 0, 1, 0, 15 },
+ },
+ { /* Gore 2m 30awg NoEq */
+ { 0x00, 0x21, 0x77 }, "QSN3300-2 ",
+ { 0, 0, 0, 8 }, { 0, 0, 0, 10 }, { 0, 1, 7, 15 },
+ },
+ { /* Gore 1m 28awg NoEq */
+ { 0x00, 0x21, 0x77 }, "QSN3800-1 ",
+ { 0, 0, 0, 6 }, { 0, 0, 0, 8 }, { 0, 1, 0, 15 },
+ },
+ { /* Gore 3m 28awg NoEq */
+ { 0x00, 0x21, 0x77 }, "QSN3800-3 ",
+ { 0, 0, 0, 9 }, { 0, 0, 0, 13 }, { 0, 1, 7, 15 },
+ },
+ { /* Gore 5m 24awg Eq */
+ { 0x00, 0x21, 0x77 }, "QSN7000-5 ",
+ { 0, 0, 0, 7 }, { 0, 0, 0, 9 }, { 0, 1, 3, 15 },
+ },
+ { /* Gore 7m 24awg Eq */
+ { 0x00, 0x21, 0x77 }, "QSN7000-7 ",
+ { 0, 0, 0, 9 }, { 0, 0, 0, 11 }, { 0, 2, 6, 15 },
+ },
+ { /* Gore 5m 26awg Eq */
+ { 0x00, 0x21, 0x77 }, "QSN7600-5 ",
+ { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 0, 1, 9, 13 },
+ },
+ { /* Gore 7m 26awg Eq */
+ { 0x00, 0x21, 0x77 }, "QSN7600-7 ",
+ { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 10, 1, 8, 15 },
+ },
+ { /* Intersil 12m 24awg Active */
+ { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP1224",
+ { 0, 0, 0, 2 }, { 0, 0, 0, 5 }, { 0, 3, 0, 9 },
+ },
+ { /* Intersil 10m 28awg Active */
+ { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP1028",
+ { 0, 0, 0, 6 }, { 0, 0, 0, 4 }, { 0, 2, 0, 2 },
+ },
+ { /* Intersil 7m 30awg Active */
+ { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP0730",
+ { 0, 0, 0, 6 }, { 0, 0, 0, 4 }, { 0, 1, 0, 3 },
+ },
+ { /* Intersil 5m 32awg Active */
+ { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP0532",
+ { 0, 0, 0, 6 }, { 0, 0, 0, 6 }, { 0, 2, 0, 8 },
+ },
+ { /* Intersil Default Active */
+ { 0x00, 0x30, 0xB4 }, NULL,
+ { 0, 0, 0, 6 }, { 0, 0, 0, 5 }, { 0, 2, 0, 5 },
+ },
+ { /* Luxtera 20m Active Optical */
+ { 0x00, 0x25, 0x63 }, NULL,
+ { 0, 0, 0, 5 }, { 0, 0, 0, 8 }, { 0, 2, 0, 12 },
+ },
+ { /* Molex 1M Cu loopback */
+ { 0x00, 0x09, 0x3A }, "74763-0025 ",
+ { 2, 2, 6, 15 }, { 2, 2, 6, 15 }, { 2, 2, 6, 15 },
+ },
+ { /* Molex 2m 28awg NoEq */
+ { 0x00, 0x09, 0x3A }, "74757-2201 ",
+ { 0, 0, 0, 6 }, { 0, 0, 0, 9 }, { 0, 1, 1, 15 },
+ },
+};
+
+static const struct txdds_ent txdds_sdr[TXDDS_TABLE_SZ] = {
+ /* amp, pre, main, post */
+ { 2, 2, 15, 6 }, /* Loopback */
+ { 0, 0, 0, 1 }, /* 2 dB */
+ { 0, 0, 0, 2 }, /* 3 dB */
+ { 0, 0, 0, 3 }, /* 4 dB */
+ { 0, 0, 0, 4 }, /* 5 dB */
+ { 0, 0, 0, 5 }, /* 6 dB */
+ { 0, 0, 0, 6 }, /* 7 dB */
+ { 0, 0, 0, 7 }, /* 8 dB */
+ { 0, 0, 0, 8 }, /* 9 dB */
+ { 0, 0, 0, 9 }, /* 10 dB */
+ { 0, 0, 0, 10 }, /* 11 dB */
+ { 0, 0, 0, 11 }, /* 12 dB */
+ { 0, 0, 0, 12 }, /* 13 dB */
+ { 0, 0, 0, 13 }, /* 14 dB */
+ { 0, 0, 0, 14 }, /* 15 dB */
+ { 0, 0, 0, 15 }, /* 16 dB */
+};
+
+static const struct txdds_ent txdds_ddr[TXDDS_TABLE_SZ] = {
+ /* amp, pre, main, post */
+ { 2, 2, 15, 6 }, /* Loopback */
+ { 0, 0, 0, 8 }, /* 2 dB */
+ { 0, 0, 0, 8 }, /* 3 dB */
+ { 0, 0, 0, 9 }, /* 4 dB */
+ { 0, 0, 0, 9 }, /* 5 dB */
+ { 0, 0, 0, 10 }, /* 6 dB */
+ { 0, 0, 0, 10 }, /* 7 dB */
+ { 0, 0, 0, 11 }, /* 8 dB */
+ { 0, 0, 0, 11 }, /* 9 dB */
+ { 0, 0, 0, 12 }, /* 10 dB */
+ { 0, 0, 0, 12 }, /* 11 dB */
+ { 0, 0, 0, 13 }, /* 12 dB */
+ { 0, 0, 0, 13 }, /* 13 dB */
+ { 0, 0, 0, 14 }, /* 14 dB */
+ { 0, 0, 0, 14 }, /* 15 dB */
+ { 0, 0, 0, 15 }, /* 16 dB */
+};
+
+static const struct txdds_ent txdds_qdr[TXDDS_TABLE_SZ] = {
+ /* amp, pre, main, post */
+ { 2, 2, 15, 6 }, /* Loopback */
+ { 0, 1, 0, 7 }, /* 2 dB (also QMH7342) */
+ { 0, 1, 0, 9 }, /* 3 dB (also QMH7342) */
+ { 0, 1, 0, 11 }, /* 4 dB */
+ { 0, 1, 0, 13 }, /* 5 dB */
+ { 0, 1, 0, 15 }, /* 6 dB */
+ { 0, 1, 3, 15 }, /* 7 dB */
+ { 0, 1, 7, 15 }, /* 8 dB */
+ { 0, 1, 7, 15 }, /* 9 dB */
+ { 0, 1, 8, 15 }, /* 10 dB */
+ { 0, 1, 9, 15 }, /* 11 dB */
+ { 0, 1, 10, 15 }, /* 12 dB */
+ { 0, 2, 6, 15 }, /* 13 dB */
+ { 0, 2, 7, 15 }, /* 14 dB */
+ { 0, 2, 8, 15 }, /* 15 dB */
+ { 0, 2, 9, 15 }, /* 16 dB */
+};
+
+/*
+ * extra entries for use with txselect, for indices >= TXDDS_TABLE_SZ.
+ * These are mostly used for mez cards going through connectors
+ * and backplane traces, but can be used to add other "unusual"
+ * table values as well.
+ */
+static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = {
+ /* amp, pre, main, post */
+ { 0, 0, 0, 1 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 1 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 2 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 3 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 4 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
+};
+
+static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
+ /* amp, pre, main, post */
+ { 0, 0, 0, 7 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 7 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 8 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 9 }, /* QMH7342 backplane settings */
+ { 0, 0, 0, 10 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
+};
+
+static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
+ /* amp, pre, main, post */
+ { 0, 1, 0, 4 }, /* QMH7342 backplane settings */
+ { 0, 1, 0, 5 }, /* QMH7342 backplane settings */
+ { 0, 1, 0, 6 }, /* QMH7342 backplane settings */
+ { 0, 1, 0, 8 }, /* QMH7342 backplane settings */
+ { 0, 1, 0, 10 }, /* QMH7342 backplane settings */
+ { 0, 1, 0, 12 }, /* QMH7342 backplane settings */
+ { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
+ { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
+ { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
+};
+
+static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = {
+ /* amp, pre, main, post */
+ { 0, 0, 0, 0 }, /* QME7342 mfg settings */
+ { 0, 0, 0, 6 }, /* QME7342 P2 mfg settings */
+};
+
+static const struct txdds_ent *get_atten_table(const struct txdds_ent *txdds,
+ unsigned atten)
+{
+ /*
+ * The attenuation table starts at 2dB for entry 1,
+ * with entry 0 being the loopback entry.
+ */
+ if (atten <= 2)
+ atten = 1;
+ else if (atten > TXDDS_TABLE_SZ)
+ atten = TXDDS_TABLE_SZ - 1;
+ else
+ atten--;
+ return txdds + atten;
+}
+
+/*
+ * if override is set, the module parameter txselect has a value
+ * for this specific port, so use it, rather than our normal mechanism.
+ */
+static void find_best_ent(struct qib_pportdata *ppd,
+ const struct txdds_ent **sdr_dds,
+ const struct txdds_ent **ddr_dds,
+ const struct txdds_ent **qdr_dds, int override)
+{
+ struct qib_qsfp_cache *qd = &ppd->cpspec->qsfp_data.cache;
+ int idx;
+
+ /* Search table of known cables */
+ for (idx = 0; !override && idx < ARRAY_SIZE(vendor_txdds); ++idx) {
+ const struct vendor_txdds_ent *v = vendor_txdds + idx;
+
+ if (!memcmp(v->oui, qd->oui, QSFP_VOUI_LEN) &&
+ (!v->partnum ||
+ !memcmp(v->partnum, qd->partnum, QSFP_PN_LEN))) {
+ *sdr_dds = &v->sdr;
+ *ddr_dds = &v->ddr;
+ *qdr_dds = &v->qdr;
+ return;
+ }
+ }
+
+ /* Active cables don't have attenuation so we only set SERDES
+ * settings to account for the attenuation of the board traces. */
+ if (!override && QSFP_IS_ACTIVE(qd->tech)) {
+ *sdr_dds = txdds_sdr + ppd->dd->board_atten;
+ *ddr_dds = txdds_ddr + ppd->dd->board_atten;
+ *qdr_dds = txdds_qdr + ppd->dd->board_atten;
+ return;
+ }
+
+ if (!override && QSFP_HAS_ATTEN(qd->tech) && (qd->atten[0] ||
+ qd->atten[1])) {
+ *sdr_dds = get_atten_table(txdds_sdr, qd->atten[0]);
+ *ddr_dds = get_atten_table(txdds_ddr, qd->atten[0]);
+ *qdr_dds = get_atten_table(txdds_qdr, qd->atten[1]);
+ return;
+ } else if (ppd->cpspec->no_eep < TXDDS_TABLE_SZ) {
+ /*
+ * If we have no (or incomplete) data from the cable
+ * EEPROM, or no QSFP, or override is set, use the
+ * module parameter value to index into the attentuation
+ * table.
+ */
+ idx = ppd->cpspec->no_eep;
+ *sdr_dds = &txdds_sdr[idx];
+ *ddr_dds = &txdds_ddr[idx];
+ *qdr_dds = &txdds_qdr[idx];
+ } else if (ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)) {
+ /* similar to above, but index into the "extra" table. */
+ idx = ppd->cpspec->no_eep - TXDDS_TABLE_SZ;
+ *sdr_dds = &txdds_extra_sdr[idx];
+ *ddr_dds = &txdds_extra_ddr[idx];
+ *qdr_dds = &txdds_extra_qdr[idx];
+ } else if ((IS_QME(ppd->dd) || IS_QMH(ppd->dd)) &&
+ ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ TXDDS_MFG_SZ)) {
+ idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ);
+ pr_info("IB%u:%u use idx %u into txdds_mfg\n",
+ ppd->dd->unit, ppd->port, idx);
+ *sdr_dds = &txdds_extra_mfg[idx];
+ *ddr_dds = &txdds_extra_mfg[idx];
+ *qdr_dds = &txdds_extra_mfg[idx];
+ } else {
+ /* this shouldn't happen, it's range checked */
+ *sdr_dds = txdds_sdr + qib_long_atten;
+ *ddr_dds = txdds_ddr + qib_long_atten;
+ *qdr_dds = txdds_qdr + qib_long_atten;
+ }
+}
+
+static void init_txdds_table(struct qib_pportdata *ppd, int override)
+{
+ const struct txdds_ent *sdr_dds, *ddr_dds, *qdr_dds;
+ struct txdds_ent *dds;
+ int idx;
+ int single_ent = 0;
+
+ find_best_ent(ppd, &sdr_dds, &ddr_dds, &qdr_dds, override);
+
+ /* for mez cards or override, use the selected value for all entries */
+ if (!(ppd->dd->flags & QIB_HAS_QSFP) || override)
+ single_ent = 1;
+
+ /* Fill in the first entry with the best entry found. */
+ set_txdds(ppd, 0, sdr_dds);
+ set_txdds(ppd, TXDDS_TABLE_SZ, ddr_dds);
+ set_txdds(ppd, 2 * TXDDS_TABLE_SZ, qdr_dds);
+ if (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED |
+ QIBL_LINKACTIVE)) {
+ dds = (struct txdds_ent *)(ppd->link_speed_active ==
+ QIB_IB_QDR ? qdr_dds :
+ (ppd->link_speed_active ==
+ QIB_IB_DDR ? ddr_dds : sdr_dds));
+ write_tx_serdes_param(ppd, dds);
+ }
+
+ /* Fill in the remaining entries with the default table values. */
+ for (idx = 1; idx < ARRAY_SIZE(txdds_sdr); ++idx) {
+ set_txdds(ppd, idx, single_ent ? sdr_dds : txdds_sdr + idx);
+ set_txdds(ppd, idx + TXDDS_TABLE_SZ,
+ single_ent ? ddr_dds : txdds_ddr + idx);
+ set_txdds(ppd, idx + 2 * TXDDS_TABLE_SZ,
+ single_ent ? qdr_dds : txdds_qdr + idx);
+ }
+}
+
+#define KR_AHB_ACC KREG_IDX(ahb_access_ctrl)
+#define KR_AHB_TRANS KREG_IDX(ahb_transaction_reg)
+#define AHB_TRANS_RDY SYM_MASK(ahb_transaction_reg, ahb_rdy)
+#define AHB_ADDR_LSB SYM_LSB(ahb_transaction_reg, ahb_address)
+#define AHB_DATA_LSB SYM_LSB(ahb_transaction_reg, ahb_data)
+#define AHB_WR SYM_MASK(ahb_transaction_reg, write_not_read)
+#define AHB_TRANS_TRIES 10
+
+/*
+ * The chan argument is 0=chan0, 1=chan1, 2=pll, 3=chan2, 4=chan4,
+ * 5=subsystem which is why most calls have "chan + chan >> 1"
+ * for the channel argument.
+ */
+static u32 ahb_mod(struct qib_devdata *dd, int quad, int chan, int addr,
+ u32 data, u32 mask)
+{
+ u32 rd_data, wr_data, sz_mask;
+ u64 trans, acc, prev_acc;
+ u32 ret = 0xBAD0BAD;
+ int tries;
+
+ prev_acc = qib_read_kreg64(dd, KR_AHB_ACC);
+ /* From this point on, make sure we return access */
+ acc = (quad << 1) | 1;
+ qib_write_kreg(dd, KR_AHB_ACC, acc);
+
+ for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) {
+ trans = qib_read_kreg64(dd, KR_AHB_TRANS);
+ if (trans & AHB_TRANS_RDY)
+ break;
+ }
+ if (tries >= AHB_TRANS_TRIES) {
+ qib_dev_err(dd, "No ahb_rdy in %d tries\n", AHB_TRANS_TRIES);
+ goto bail;
+ }
+
+ /* If mask is not all 1s, we need to read, but different SerDes
+ * entities have different sizes
+ */
+ sz_mask = (1UL << ((quad == 1) ? 32 : 16)) - 1;
+ wr_data = data & mask & sz_mask;
+ if ((~mask & sz_mask) != 0) {
+ trans = ((chan << 6) | addr) << (AHB_ADDR_LSB + 1);
+ qib_write_kreg(dd, KR_AHB_TRANS, trans);
+
+ for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) {
+ trans = qib_read_kreg64(dd, KR_AHB_TRANS);
+ if (trans & AHB_TRANS_RDY)
+ break;
+ }
+ if (tries >= AHB_TRANS_TRIES) {
+ qib_dev_err(dd, "No Rd ahb_rdy in %d tries\n",
+ AHB_TRANS_TRIES);
+ goto bail;
+ }
+ /* Re-read in case host split reads and read data first */
+ trans = qib_read_kreg64(dd, KR_AHB_TRANS);
+ rd_data = (uint32_t)(trans >> AHB_DATA_LSB);
+ wr_data |= (rd_data & ~mask & sz_mask);
+ }
+
+ /* If mask is not zero, we need to write. */
+ if (mask & sz_mask) {
+ trans = ((chan << 6) | addr) << (AHB_ADDR_LSB + 1);
+ trans |= ((uint64_t)wr_data << AHB_DATA_LSB);
+ trans |= AHB_WR;
+ qib_write_kreg(dd, KR_AHB_TRANS, trans);
+
+ for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) {
+ trans = qib_read_kreg64(dd, KR_AHB_TRANS);
+ if (trans & AHB_TRANS_RDY)
+ break;
+ }
+ if (tries >= AHB_TRANS_TRIES) {
+ qib_dev_err(dd, "No Wr ahb_rdy in %d tries\n",
+ AHB_TRANS_TRIES);
+ goto bail;
+ }
+ }
+ ret = wr_data;
+bail:
+ qib_write_kreg(dd, KR_AHB_ACC, prev_acc);
+ return ret;
+}
+
+static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data,
+ unsigned mask)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int chan;
+ u32 rbc;
+
+ for (chan = 0; chan < SERDES_CHANS; ++chan) {
+ ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), addr,
+ data, mask);
+ rbc = ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ addr, 0, 0);
+ }
+}
+
+static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable)
+{
+ u64 data = qib_read_kreg_port(ppd, krp_serdesctrl);
+ u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN);
+
+ if (enable && !state) {
+ pr_info("IB%u:%u Turning LOS on\n",
+ ppd->dd->unit, ppd->port);
+ data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
+ } else if (!enable && state) {
+ pr_info("IB%u:%u Turning LOS off\n",
+ ppd->dd->unit, ppd->port);
+ data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
+ }
+ qib_write_kreg_port(ppd, krp_serdesctrl, data);
+}
+
+static int serdes_7322_init(struct qib_pportdata *ppd)
+{
+ int ret = 0;
+ if (ppd->dd->cspec->r1)
+ ret = serdes_7322_init_old(ppd);
+ else
+ ret = serdes_7322_init_new(ppd);
+ return ret;
+}
+
+static int serdes_7322_init_old(struct qib_pportdata *ppd)
+{
+ u32 le_val;
+
+ /*
+ * Initialize the Tx DDS tables. Also done every QSFP event,
+ * for adapters with QSFP
+ */
+ init_txdds_table(ppd, 0);
+
+ /* ensure no tx overrides from earlier driver loads */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
+ /* Patch some SerDes defaults to "Better for IB" */
+ /* Timing Loop Bandwidth: cdr_timing[11:9] = 0 */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9));
+
+ /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */
+ ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11));
+ /* Enable LE2: rxle2en_r2a addr 13 bit [6] = 1 */
+ ibsd_wr_allchans(ppd, 13, (1 << 6), (1 << 6));
+
+ /* May be overridden in qsfp_7322_event */
+ le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT;
+ ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7));
+
+ /* enable LE1 adaptation for all but QME, which is disabled */
+ le_val = IS_QME(ppd->dd) ? 0 : 1;
+ ibsd_wr_allchans(ppd, 13, (le_val << 5), (1 << 5));
+
+ /* Clear cmode-override, may be set from older driver */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14);
+
+ /* Timing Recovery: rxtapsel addr 5 bits [9:8] = 0 */
+ ibsd_wr_allchans(ppd, 5, (0 << 8), BMASK(9, 8));
+
+ /* setup LoS params; these are subsystem, so chan == 5 */
+ /* LoS filter threshold_count on, ch 0-3, set to 8 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4));
+
+ /* LoS filter threshold_count off, ch 0-3, set to 4 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8));
+
+ /* LoS filter select enabled */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15);
+
+ /* LoS target data: SDR=4, DDR=2, QDR=1 */
+ ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */
+ ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */
+ ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */
+
+ serdes_7322_los_enable(ppd, 1);
+
+ /* rxbistena; set 0 to avoid effects of it switch later */
+ ibsd_wr_allchans(ppd, 9, 0 << 15, 1 << 15);
+
+ /* Configure 4 DFE taps, and only they adapt */
+ ibsd_wr_allchans(ppd, 16, 0 << 0, BMASK(1, 0));
+
+ /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */
+ le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac;
+ ibsd_wr_allchans(ppd, 21, le_val, 0xfffe);
+
+ /*
+ * Set receive adaptation mode. SDR and DDR adaptation are
+ * always on, and QDR is initially enabled; later disabled.
+ */
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN);
+ ppd->cpspec->qdr_dfe_on = 1;
+
+ /* FLoop LOS gate: PPM filter enabled */
+ ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10);
+
+ /* rx offset center enabled */
+ ibsd_wr_allchans(ppd, 12, 1 << 4, 1 << 4);
+
+ if (!ppd->dd->cspec->r1) {
+ ibsd_wr_allchans(ppd, 12, 1 << 12, 1 << 12);
+ ibsd_wr_allchans(ppd, 12, 2 << 8, 0x0f << 8);
+ }
+
+ /* Set the frequency loop bandwidth to 15 */
+ ibsd_wr_allchans(ppd, 2, 15 << 5, BMASK(8, 5));
+
+ return 0;
+}
+
+static int serdes_7322_init_new(struct qib_pportdata *ppd)
+{
+ unsigned long tend;
+ u32 le_val, rxcaldone;
+ int chan, chan_done = (1 << SERDES_CHANS) - 1;
+
+ /* Clear cmode-override, may be set from older driver */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14);
+
+ /* ensure no tx overrides from earlier driver loads */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
+ /* START OF LSI SUGGESTED SERDES BRINGUP */
+ /* Reset - Calibration Setup */
+ /* Stop DFE adaptaion */
+ ibsd_wr_allchans(ppd, 1, 0, BMASK(9, 1));
+ /* Disable LE1 */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(5, 5));
+ /* Disable autoadapt for LE1 */
+ ibsd_wr_allchans(ppd, 1, 0, BMASK(15, 15));
+ /* Disable LE2 */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(6, 6));
+ /* Disable VGA */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
+ /* Disable AFE Offset Cancel */
+ ibsd_wr_allchans(ppd, 12, 0, BMASK(12, 12));
+ /* Disable Timing Loop */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(3, 3));
+ /* Disable Frequency Loop */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(4, 4));
+ /* Disable Baseline Wander Correction */
+ ibsd_wr_allchans(ppd, 13, 0, BMASK(13, 13));
+ /* Disable RX Calibration */
+ ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10));
+ /* Disable RX Offset Calibration */
+ ibsd_wr_allchans(ppd, 12, 0, BMASK(4, 4));
+ /* Select BB CDR */
+ ibsd_wr_allchans(ppd, 2, (1 << 15), BMASK(15, 15));
+ /* CDR Step Size */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(9, 8));
+ /* Enable phase Calibration */
+ ibsd_wr_allchans(ppd, 12, (1 << 5), BMASK(5, 5));
+ /* DFE Bandwidth [2:14-12] */
+ ibsd_wr_allchans(ppd, 2, (4 << 12), BMASK(14, 12));
+ /* DFE Config (4 taps only) */
+ ibsd_wr_allchans(ppd, 16, 0, BMASK(1, 0));
+ /* Gain Loop Bandwidth */
+ if (!ppd->dd->cspec->r1) {
+ ibsd_wr_allchans(ppd, 12, 1 << 12, BMASK(12, 12));
+ ibsd_wr_allchans(ppd, 12, 2 << 8, BMASK(11, 8));
+ } else {
+ ibsd_wr_allchans(ppd, 19, (3 << 11), BMASK(13, 11));
+ }
+ /* Baseline Wander Correction Gain [13:4-0] (leave as default) */
+ /* Baseline Wander Correction Gain [3:7-5] (leave as default) */
+ /* Data Rate Select [5:7-6] (leave as default) */
+ /* RX Parallel Word Width [3:10-8] (leave as default) */
+
+ /* RX REST */
+ /* Single- or Multi-channel reset */
+ /* RX Analog reset */
+ /* RX Digital reset */
+ ibsd_wr_allchans(ppd, 0, 0, BMASK(15, 13));
+ msleep(20);
+ /* RX Analog reset */
+ ibsd_wr_allchans(ppd, 0, (1 << 14), BMASK(14, 14));
+ msleep(20);
+ /* RX Digital reset */
+ ibsd_wr_allchans(ppd, 0, (1 << 13), BMASK(13, 13));
+ msleep(20);
+
+ /* setup LoS params; these are subsystem, so chan == 5 */
+ /* LoS filter threshold_count on, ch 0-3, set to 8 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4));
+
+ /* LoS filter threshold_count off, ch 0-3, set to 4 */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0));
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8));
+
+ /* LoS filter select enabled */
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15);
+
+ /* LoS target data: SDR=4, DDR=2, QDR=1 */
+ ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */
+ ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */
+ ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */
+
+ /* Turn on LOS on initial SERDES init */
+ serdes_7322_los_enable(ppd, 1);
+ /* FLoop LOS gate: PPM filter enabled */
+ ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10);
+
+ /* RX LATCH CALIBRATION */
+ /* Enable Eyefinder Phase Calibration latch */
+ ibsd_wr_allchans(ppd, 15, 1, BMASK(0, 0));
+ /* Enable RX Offset Calibration latch */
+ ibsd_wr_allchans(ppd, 12, (1 << 4), BMASK(4, 4));
+ msleep(20);
+ /* Start Calibration */
+ ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10));
+ tend = jiffies + msecs_to_jiffies(500);
+ while (chan_done && !time_is_before_jiffies(tend)) {
+ msleep(20);
+ for (chan = 0; chan < SERDES_CHANS; ++chan) {
+ rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
+ (chan + (chan >> 1)),
+ 25, 0, 0);
+ if ((~rxcaldone & (u32)BMASK(9, 9)) == 0 &&
+ (~chan_done & (1 << chan)) == 0)
+ chan_done &= ~(1 << chan);
+ }
+ }
+ if (chan_done) {
+ pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n",
+ IBSD(ppd->hw_pidx), chan_done);
+ } else {
+ for (chan = 0; chan < SERDES_CHANS; ++chan) {
+ rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
+ (chan + (chan >> 1)),
+ 25, 0, 0);
+ if ((~rxcaldone & (u32)BMASK(10, 10)) == 0)
+ pr_info("Serdes %d chan %d calibration failed\n",
+ IBSD(ppd->hw_pidx), chan);
+ }
+ }
+
+ /* Turn off Calibration */
+ ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10));
+ msleep(20);
+
+ /* BRING RX UP */
+ /* Set LE2 value (May be overridden in qsfp_7322_event) */
+ le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT;
+ ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7));
+ /* Set LE2 Loop bandwidth */
+ ibsd_wr_allchans(ppd, 3, (7 << 5), BMASK(7, 5));
+ /* Enable LE2 */
+ ibsd_wr_allchans(ppd, 13, (1 << 6), BMASK(6, 6));
+ msleep(20);
+ /* Enable H0 only */
+ ibsd_wr_allchans(ppd, 1, 1, BMASK(9, 1));
+ /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */
+ le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac;
+ ibsd_wr_allchans(ppd, 21, le_val, 0xfffe);
+ /* Enable VGA */
+ ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
+ msleep(20);
+ /* Set Frequency Loop Bandwidth */
+ ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5));
+ /* Enable Frequency Loop */
+ ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4));
+ /* Set Timing Loop Bandwidth */
+ ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9));
+ /* Enable Timing Loop */
+ ibsd_wr_allchans(ppd, 2, (1 << 3), BMASK(3, 3));
+ msleep(50);
+ /* Enable DFE
+ * Set receive adaptation mode. SDR and DDR adaptation are
+ * always on, and QDR is initially enabled; later disabled.
+ */
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL);
+ qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
+ ppd->dd->cspec->r1 ?
+ QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN);
+ ppd->cpspec->qdr_dfe_on = 1;
+ /* Disable LE1 */
+ ibsd_wr_allchans(ppd, 13, (0 << 5), (1 << 5));
+ /* Disable auto adapt for LE1 */
+ ibsd_wr_allchans(ppd, 1, (0 << 15), BMASK(15, 15));
+ msleep(20);
+ /* Enable AFE Offset Cancel */
+ ibsd_wr_allchans(ppd, 12, (1 << 12), BMASK(12, 12));
+ /* Enable Baseline Wander Correction */
+ ibsd_wr_allchans(ppd, 12, (1 << 13), BMASK(13, 13));
+ /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */
+ ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11));
+ /* VGA output common mode */
+ ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2));
+
+ /*
+ * Initialize the Tx DDS tables. Also done every QSFP event,
+ * for adapters with QSFP
+ */
+ init_txdds_table(ppd, 0);
+
+ return 0;
+}
+
+/* start adjust QMH serdes parameters */
+
+static void set_man_code(struct qib_pportdata *ppd, int chan, int code)
+{
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ 9, code << 9, 0x3f << 9);
+}
+
+static void set_man_mode_h1(struct qib_pportdata *ppd, int chan,
+ int enable, u32 tapenable)
+{
+ if (enable)
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ 1, 3 << 10, 0x1f << 10);
+ else
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ 1, 0, 0x1f << 10);
+}
+
+/* Set clock to 1, 0, 1, 0 */
+static void clock_man(struct qib_pportdata *ppd, int chan)
+{
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ 4, 0x4000, 0x4000);
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ 4, 0, 0x4000);
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ 4, 0x4000, 0x4000);
+ ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)),
+ 4, 0, 0x4000);
+}
+
+/*
+ * write the current Tx serdes pre,post,main,amp settings into the serdes.
+ * The caller must pass the settings appropriate for the current speed,
+ * or not care if they are correct for the current speed.
+ */
+static void write_tx_serdes_param(struct qib_pportdata *ppd,
+ struct txdds_ent *txdds)
+{
+ u64 deemph;
+
+ deemph = qib_read_kreg_port(ppd, krp_tx_deemph_override);
+ /* field names for amp, main, post, pre, respectively */
+ deemph &= ~(SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txampcntl_d2a) |
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txc0_ena) |
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txcp1_ena) |
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txcn1_ena));
+
+ deemph |= SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ tx_override_deemphasis_select);
+ deemph |= (txdds->amp & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txampcntl_d2a)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txampcntl_d2a);
+ deemph |= (txdds->main & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txc0_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txc0_ena);
+ deemph |= (txdds->post & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txcp1_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txcp1_ena);
+ deemph |= (txdds->pre & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txcn1_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ txcn1_ena);
+ qib_write_kreg_port(ppd, krp_tx_deemph_override, deemph);
+}
+
+/*
+ * Set the parameters for mez cards on link bounce, so they are
+ * always exactly what was requested. Similar logic to init_txdds
+ * but does just the serdes.
+ */
+static void adj_tx_serdes(struct qib_pportdata *ppd)
+{
+ const struct txdds_ent *sdr_dds, *ddr_dds, *qdr_dds;
+ struct txdds_ent *dds;
+
+ find_best_ent(ppd, &sdr_dds, &ddr_dds, &qdr_dds, 1);
+ dds = (struct txdds_ent *)(ppd->link_speed_active == QIB_IB_QDR ?
+ qdr_dds : (ppd->link_speed_active == QIB_IB_DDR ?
+ ddr_dds : sdr_dds));
+ write_tx_serdes_param(ppd, dds);
+}
+
+/* set QDR forced value for H1, if needed */
+static void force_h1(struct qib_pportdata *ppd)
+{
+ int chan;
+
+ ppd->cpspec->qdr_reforce = 0;
+ if (!ppd->dd->cspec->r1)
+ return;
+
+ for (chan = 0; chan < SERDES_CHANS; chan++) {
+ set_man_mode_h1(ppd, chan, 1, 0);
+ set_man_code(ppd, chan, ppd->cpspec->h1_val);
+ clock_man(ppd, chan);
+ set_man_mode_h1(ppd, chan, 0, 0);
+ }
+}
+
+#define SJA_EN SYM_MASK(SPC_JTAG_ACCESS_REG, SPC_JTAG_ACCESS_EN)
+#define BISTEN_LSB SYM_LSB(SPC_JTAG_ACCESS_REG, bist_en)
+
+#define R_OPCODE_LSB 3
+#define R_OP_NOP 0
+#define R_OP_SHIFT 2
+#define R_OP_UPDATE 3
+#define R_TDI_LSB 2
+#define R_TDO_LSB 1
+#define R_RDY 1
+
+static int qib_r_grab(struct qib_devdata *dd)
+{
+ u64 val;
+ val = SJA_EN;
+ qib_write_kreg(dd, kr_r_access, val);
+ qib_read_kreg32(dd, kr_scratch);
+ return 0;
+}
+
+/* qib_r_wait_for_rdy() not only waits for the ready bit, it
+ * returns the current state of R_TDO
+ */
+static int qib_r_wait_for_rdy(struct qib_devdata *dd)
+{
+ u64 val;
+ int timeout;
+ for (timeout = 0; timeout < 100 ; ++timeout) {
+ val = qib_read_kreg32(dd, kr_r_access);
+ if (val & R_RDY)
+ return (val >> R_TDO_LSB) & 1;
+ }
+ return -1;
+}
+
+static int qib_r_shift(struct qib_devdata *dd, int bisten,
+ int len, u8 *inp, u8 *outp)
+{
+ u64 valbase, val;
+ int ret, pos;
+
+ valbase = SJA_EN | (bisten << BISTEN_LSB) |
+ (R_OP_SHIFT << R_OPCODE_LSB);
+ ret = qib_r_wait_for_rdy(dd);
+ if (ret < 0)
+ goto bail;
+ for (pos = 0; pos < len; ++pos) {
+ val = valbase;
+ if (outp) {
+ outp[pos >> 3] &= ~(1 << (pos & 7));
+ outp[pos >> 3] |= (ret << (pos & 7));
+ }
+ if (inp) {
+ int tdi = inp[pos >> 3] >> (pos & 7);
+ val |= ((tdi & 1) << R_TDI_LSB);
+ }
+ qib_write_kreg(dd, kr_r_access, val);
+ qib_read_kreg32(dd, kr_scratch);
+ ret = qib_r_wait_for_rdy(dd);
+ if (ret < 0)
+ break;
+ }
+ /* Restore to NOP between operations. */
+ val = SJA_EN | (bisten << BISTEN_LSB);
+ qib_write_kreg(dd, kr_r_access, val);
+ qib_read_kreg32(dd, kr_scratch);
+ ret = qib_r_wait_for_rdy(dd);
+
+ if (ret >= 0)
+ ret = pos;
+bail:
+ return ret;
+}
+
+static int qib_r_update(struct qib_devdata *dd, int bisten)
+{
+ u64 val;
+ int ret;
+
+ val = SJA_EN | (bisten << BISTEN_LSB) | (R_OP_UPDATE << R_OPCODE_LSB);
+ ret = qib_r_wait_for_rdy(dd);
+ if (ret >= 0) {
+ qib_write_kreg(dd, kr_r_access, val);
+ qib_read_kreg32(dd, kr_scratch);
+ }
+ return ret;
+}
+
+#define BISTEN_PORT_SEL 15
+#define LEN_PORT_SEL 625
+#define BISTEN_AT 17
+#define LEN_AT 156
+#define BISTEN_ETM 16
+#define LEN_ETM 632
+
+#define BIT2BYTE(x) (((x) + BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+
+/* these are common for all IB port use cases. */
+static u8 reset_at[BIT2BYTE(LEN_AT)] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00,
+};
+static u8 reset_atetm[BIT2BYTE(LEN_ETM)] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x80, 0xe3, 0x81, 0x73, 0x3c, 0x70, 0x8e,
+ 0x07, 0xce, 0xf1, 0xc0, 0x39, 0x1e, 0x38, 0xc7, 0x03, 0xe7,
+ 0x78, 0xe0, 0x1c, 0x0f, 0x9c, 0x7f, 0x80, 0x73, 0x0f, 0x70,
+ 0xde, 0x01, 0xce, 0x39, 0xc0, 0xf9, 0x06, 0x38, 0xd7, 0x00,
+ 0xe7, 0x19, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+};
+static u8 at[BIT2BYTE(LEN_AT)] = {
+ 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00,
+};
+
+/* used for IB1 or IB2, only one in use */
+static u8 atetm_1port[BIT2BYTE(LEN_ETM)] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x10, 0xf2, 0x80, 0x83, 0x1e, 0x38, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x50, 0xf4, 0x41, 0x00, 0x18, 0x78, 0xc8, 0x03,
+ 0x07, 0x7b, 0xa0, 0x3e, 0x00, 0x02, 0x00, 0x00, 0x18, 0x00,
+ 0x18, 0x00, 0x00, 0x00, 0x00, 0x4b, 0x00, 0x00, 0x00,
+};
+
+/* used when both IB1 and IB2 are in use */
+static u8 atetm_2port[BIT2BYTE(LEN_ETM)] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79,
+ 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0xf8, 0x80, 0x83, 0x1e, 0x38, 0xe0, 0x03, 0x05,
+ 0x7b, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+ 0xa2, 0x0f, 0x50, 0xf4, 0x41, 0x00, 0x18, 0x78, 0xd1, 0x07,
+ 0x02, 0x7c, 0x80, 0x3e, 0x00, 0x02, 0x00, 0x00, 0x3e, 0x00,
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
+};
+
+/* used when only IB1 is in use */
+static u8 portsel_port1[BIT2BYTE(LEN_PORT_SEL)] = {
+ 0x32, 0x65, 0xa4, 0x7b, 0x10, 0x98, 0xdc, 0xfe, 0x13, 0x13,
+ 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x73, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13,
+ 0x13, 0x78, 0x78, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13,
+ 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x74, 0x32,
+ 0x32, 0x32, 0x32, 0x32, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14,
+ 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14,
+ 0x14, 0x14, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/* used when only IB2 is in use */
+static u8 portsel_port2[BIT2BYTE(LEN_PORT_SEL)] = {
+ 0x32, 0x65, 0xa4, 0x7b, 0x10, 0x98, 0xdc, 0xfe, 0x39, 0x39,
+ 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x73, 0x32, 0x32, 0x32,
+ 0x32, 0x32, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
+ 0x39, 0x78, 0x78, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x74, 0x32,
+ 0x32, 0x32, 0x32, 0x32, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a,
+ 0x3a, 0x3a, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01,
+};
+
+/* used when both IB1 and IB2 are in use */
+static u8 portsel_2port[BIT2BYTE(LEN_PORT_SEL)] = {
+ 0x32, 0xba, 0x54, 0x76, 0x10, 0x98, 0xdc, 0xfe, 0x13, 0x13,
+ 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x73, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13,
+ 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13,
+ 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x74, 0x32,
+ 0x32, 0x32, 0x32, 0x32, 0x14, 0x14, 0x14, 0x14, 0x14, 0x3a,
+ 0x3a, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14,
+ 0x14, 0x14, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/*
+ * Do setup to properly handle IB link recovery; if port is zero, we
+ * are initializing to cover both ports; otherwise we are initializing
+ * to cover a single port card, or the port has reached INIT and we may
+ * need to switch coverage types.
+ */
+static void setup_7322_link_recovery(struct qib_pportdata *ppd, u32 both)
+{
+ u8 *portsel, *etm;
+ struct qib_devdata *dd = ppd->dd;
+
+ if (!ppd->dd->cspec->r1)
+ return;
+ if (!both) {
+ dd->cspec->recovery_ports_initted++;
+ ppd->cpspec->recovery_init = 1;
+ }
+ if (!both && dd->cspec->recovery_ports_initted == 1) {
+ portsel = ppd->port == 1 ? portsel_port1 : portsel_port2;
+ etm = atetm_1port;
+ } else {
+ portsel = portsel_2port;
+ etm = atetm_2port;
+ }
+
+ if (qib_r_grab(dd) < 0 ||
+ qib_r_shift(dd, BISTEN_ETM, LEN_ETM, reset_atetm, NULL) < 0 ||
+ qib_r_update(dd, BISTEN_ETM) < 0 ||
+ qib_r_shift(dd, BISTEN_AT, LEN_AT, reset_at, NULL) < 0 ||
+ qib_r_update(dd, BISTEN_AT) < 0 ||
+ qib_r_shift(dd, BISTEN_PORT_SEL, LEN_PORT_SEL,
+ portsel, NULL) < 0 ||
+ qib_r_update(dd, BISTEN_PORT_SEL) < 0 ||
+ qib_r_shift(dd, BISTEN_AT, LEN_AT, at, NULL) < 0 ||
+ qib_r_update(dd, BISTEN_AT) < 0 ||
+ qib_r_shift(dd, BISTEN_ETM, LEN_ETM, etm, NULL) < 0 ||
+ qib_r_update(dd, BISTEN_ETM) < 0)
+ qib_dev_err(dd, "Failed IB link recovery setup\n");
+}
+
+static void check_7322_rxe_status(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u64 fmask;
+
+ if (dd->cspec->recovery_ports_initted != 1)
+ return; /* rest doesn't apply to dualport */
+ qib_write_kreg(dd, kr_control, dd->control |
+ SYM_MASK(Control, FreezeMode));
+ (void)qib_read_kreg64(dd, kr_scratch);
+ udelay(3); /* ibcreset asserted 400ns, be sure that's over */
+ fmask = qib_read_kreg64(dd, kr_act_fmask);
+ if (!fmask) {
+ /*
+ * require a powercycle before we'll work again, and make
+ * sure we get no more interrupts, and don't turn off
+ * freeze.
+ */
+ ppd->dd->cspec->stay_in_freeze = 1;
+ qib_7322_set_intr_state(ppd->dd, 0);
+ qib_write_kreg(dd, kr_fmask, 0ULL);
+ qib_dev_err(dd, "HCA unusable until powercycled\n");
+ return; /* eventually reset */
+ }
+
+ qib_write_kreg(ppd->dd, kr_hwerrclear,
+ SYM_MASK(HwErrClear, IBSerdesPClkNotDetectClear_1));
+
+ /* don't do the full clear_freeze(), not needed for this */
+ qib_write_kreg(dd, kr_control, dd->control);
+ qib_read_kreg32(dd, kr_scratch);
+ /* take IBC out of reset */
+ if (ppd->link_speed_supported) {
+ ppd->cpspec->ibcctrl_a &=
+ ~SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn);
+ qib_write_kreg_port(ppd, krp_ibcctrl_a,
+ ppd->cpspec->ibcctrl_a);
+ qib_read_kreg32(dd, kr_scratch);
+ if (ppd->lflags & QIBL_IB_LINK_DISABLED)
+ qib_set_ib_7322_lstate(ppd, 0,
+ QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
new file mode 100644
index 00000000000..8d3c78ddc90
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -0,0 +1,1857 @@
+/*
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+#include <linux/idr.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+#include <linux/dca.h>
+#endif
+
+#include "qib.h"
+#include "qib_common.h"
+#include "qib_mad.h"
+#ifdef CONFIG_DEBUG_FS
+#include "qib_debugfs.h"
+#include "qib_verbs.h"
+#endif
+
+#undef pr_fmt
+#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+
+/*
+ * min buffers we want to have per context, after driver
+ */
+#define QIB_MIN_USER_CTXT_BUFCNT 7
+
+#define QLOGIC_IB_R_SOFTWARE_MASK 0xFF
+#define QLOGIC_IB_R_SOFTWARE_SHIFT 24
+#define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62)
+
+/*
+ * Number of ctxts we are configured to use (to allow for more pio
+ * buffers per ctxt, etc.) Zero means use chip value.
+ */
+ushort qib_cfgctxts;
+module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
+MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
+
+unsigned qib_numa_aware;
+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
+MODULE_PARM_DESC(numa_aware,
+ "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
+
+/*
+ * If set, do not write to any regs if avoidable, hack to allow
+ * check for deranged default register values.
+ */
+ushort qib_mini_init;
+module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO);
+MODULE_PARM_DESC(mini_init, "If set, do minimal diag init");
+
+unsigned qib_n_krcv_queues;
+module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
+MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
+
+unsigned qib_cc_table_size;
+module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
+/*
+ * qib_wc_pat parameter:
+ * 0 is WC via MTRR
+ * 1 is WC via PAT
+ * If PAT initialization fails, code reverts back to MTRR
+ */
+unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
+module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
+MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
+
+static void verify_interrupt(unsigned long);
+
+static struct idr qib_unit_table;
+u32 qib_cpulist_count;
+unsigned long *qib_cpulist;
+
+/* set number of contexts we'll actually use */
+void qib_set_ctxtcnt(struct qib_devdata *dd)
+{
+ if (!qib_cfgctxts) {
+ dd->cfgctxts = dd->first_user_ctxt + num_online_cpus();
+ if (dd->cfgctxts > dd->ctxtcnt)
+ dd->cfgctxts = dd->ctxtcnt;
+ } else if (qib_cfgctxts < dd->num_pports)
+ dd->cfgctxts = dd->ctxtcnt;
+ else if (qib_cfgctxts <= dd->ctxtcnt)
+ dd->cfgctxts = qib_cfgctxts;
+ else
+ dd->cfgctxts = dd->ctxtcnt;
+ dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
+ dd->cfgctxts - dd->first_user_ctxt;
+}
+
+/*
+ * Common code for creating the receive context array.
+ */
+int qib_create_ctxts(struct qib_devdata *dd)
+{
+ unsigned i;
+ int local_node_id = pcibus_to_node(dd->pcidev->bus);
+
+ if (local_node_id < 0)
+ local_node_id = numa_node_id();
+ dd->assigned_node_id = local_node_id;
+
+ /*
+ * Allocate full ctxtcnt array, rather than just cfgctxts, because
+ * cleanup iterates across all possible ctxts.
+ */
+ dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL);
+ if (!dd->rcd) {
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata array, failing\n");
+ return -ENOMEM;
+ }
+
+ /* create (one or more) kctxt */
+ for (i = 0; i < dd->first_user_ctxt; ++i) {
+ struct qib_pportdata *ppd;
+ struct qib_ctxtdata *rcd;
+
+ if (dd->skip_kctxt_mask & (1 << i))
+ continue;
+
+ ppd = dd->pport + (i % dd->num_pports);
+
+ rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
+ if (!rcd) {
+ qib_dev_err(dd,
+ "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
+ kfree(dd->rcd);
+ dd->rcd = NULL;
+ return -ENOMEM;
+ }
+ rcd->pkeys[0] = QIB_DEFAULT_P_KEY;
+ rcd->seq_cnt = 1;
+ }
+ return 0;
+}
+
+/*
+ * Common code for user and kernel context setup.
+ */
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
+ int node_id)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_ctxtdata *rcd;
+
+ rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
+ if (rcd) {
+ INIT_LIST_HEAD(&rcd->qp_wait_list);
+ rcd->node_id = node_id;
+ rcd->ppd = ppd;
+ rcd->dd = dd;
+ rcd->cnt = 1;
+ rcd->ctxt = ctxt;
+ dd->rcd[ctxt] = rcd;
+#ifdef CONFIG_DEBUG_FS
+ if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
+ rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
+ GFP_KERNEL, node_id);
+ if (!rcd->opstats) {
+ kfree(rcd);
+ qib_dev_err(dd,
+ "Unable to allocate per ctxt stats buffer\n");
+ return NULL;
+ }
+ }
+#endif
+ dd->f_init_ctxt(rcd);
+
+ /*
+ * To avoid wasting a lot of memory, we allocate 32KB chunks
+ * of physically contiguous memory, advance through it until
+ * used up and then allocate more. Of course, we need
+ * memory to store those extra pointers, now. 32KB seems to
+ * be the most that is "safe" under memory pressure
+ * (creating large files and then copying them over
+ * NFS while doing lots of MPI jobs). The OOM killer can
+ * get invoked, even though we say we can sleep and this can
+ * cause significant system problems....
+ */
+ rcd->rcvegrbuf_size = 0x8000;
+ rcd->rcvegrbufs_perchunk =
+ rcd->rcvegrbuf_size / dd->rcvegrbufsize;
+ rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
+ rcd->rcvegrbufs_perchunk - 1) /
+ rcd->rcvegrbufs_perchunk;
+ BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
+ rcd->rcvegrbufs_perchunk_shift =
+ ilog2(rcd->rcvegrbufs_perchunk);
+ }
+ return rcd;
+}
+
+/*
+ * Common code for initializing the physical port structure.
+ */
+int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
+ u8 hw_pidx, u8 port)
+{
+ int size;
+ ppd->dd = dd;
+ ppd->hw_pidx = hw_pidx;
+ ppd->port = port; /* IB port number, not index */
+
+ spin_lock_init(&ppd->sdma_lock);
+ spin_lock_init(&ppd->lflags_lock);
+ spin_lock_init(&ppd->cc_shadow_lock);
+ init_waitqueue_head(&ppd->state_wait);
+
+ init_timer(&ppd->symerr_clear_timer);
+ ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup;
+ ppd->symerr_clear_timer.data = (unsigned long)ppd;
+
+ ppd->qib_wq = NULL;
+ ppd->ibport_data.pmastats =
+ alloc_percpu(struct qib_pma_counters);
+ if (!ppd->ibport_data.pmastats)
+ return -ENOMEM;
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
+ goto bail;
+
+ ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
+ IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
+
+ ppd->cc_max_table_entries =
+ ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
+
+ size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
+ * IB_CCT_ENTRIES;
+ ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion control table for port %d!\n",
+ port);
+ goto bail;
+ }
+
+ size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
+ ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries) {
+ qib_dev_err(dd,
+ "failed to allocate congestion setting list for port %d!\n",
+ port);
+ goto bail_1;
+ }
+
+ size = sizeof(struct cc_table_shadow);
+ ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->ccti_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow ccti list for port %d!\n",
+ port);
+ goto bail_2;
+ }
+
+ size = sizeof(struct ib_cc_congestion_setting_attr);
+ ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
+ if (!ppd->congestion_entries_shadow) {
+ qib_dev_err(dd,
+ "failed to allocate shadow congestion setting list for port %d!\n",
+ port);
+ goto bail_3;
+ }
+
+ return 0;
+
+bail_3:
+ kfree(ppd->ccti_entries_shadow);
+ ppd->ccti_entries_shadow = NULL;
+bail_2:
+ kfree(ppd->congestion_entries);
+ ppd->congestion_entries = NULL;
+bail_1:
+ kfree(ppd->ccti_entries);
+ ppd->ccti_entries = NULL;
+bail:
+ /* User is intentionally disabling the congestion control agent */
+ if (!qib_cc_table_size)
+ return 0;
+
+ if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
+ qib_cc_table_size = 0;
+ qib_dev_err(dd,
+ "Congestion Control table size %d less than minimum %d for port %d\n",
+ qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
+ }
+
+ qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
+ port);
+ return 0;
+}
+
+static int init_pioavailregs(struct qib_devdata *dd)
+{
+ int ret, pidx;
+ u64 *status_page;
+
+ dd->pioavailregs_dma = dma_alloc_coherent(
+ &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys,
+ GFP_KERNEL);
+ if (!dd->pioavailregs_dma) {
+ qib_dev_err(dd,
+ "failed to allocate PIOavail reg area in memory\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * We really want L2 cache aligned, but for current CPUs of
+ * interest, they are the same.
+ */
+ status_page = (u64 *)
+ ((char *) dd->pioavailregs_dma +
+ ((2 * L1_CACHE_BYTES +
+ dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES));
+ /* device status comes first, for backwards compatibility */
+ dd->devstatusp = status_page;
+ *status_page++ = 0;
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ dd->pport[pidx].statusp = status_page;
+ *status_page++ = 0;
+ }
+
+ /*
+ * Setup buffer to hold freeze and other messages, accessible to
+ * apps, following statusp. This is per-unit, not per port.
+ */
+ dd->freezemsg = (char *) status_page;
+ *dd->freezemsg = 0;
+ /* length of msg buffer is "whatever is left" */
+ ret = (char *) status_page - (char *) dd->pioavailregs_dma;
+ dd->freezelen = PAGE_SIZE - ret;
+
+ ret = 0;
+
+done:
+ return ret;
+}
+
+/**
+ * init_shadow_tids - allocate the shadow TID array
+ * @dd: the qlogic_ib device
+ *
+ * allocate the shadow TID array, so we can qib_munlock previous
+ * entries. It may make more sense to move the pageshadow to the
+ * ctxt data structure, so we only allocate memory for ctxts actually
+ * in use, since we at 8k per ctxt, now.
+ * We don't want failures here to prevent use of the driver/chip,
+ * so no return value.
+ */
+static void init_shadow_tids(struct qib_devdata *dd)
+{
+ struct page **pages;
+ dma_addr_t *addrs;
+
+ pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
+ if (!pages) {
+ qib_dev_err(dd,
+ "failed to allocate shadow page * array, no expected sends!\n");
+ goto bail;
+ }
+
+ addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
+ if (!addrs) {
+ qib_dev_err(dd,
+ "failed to allocate shadow dma handle array, no expected sends!\n");
+ goto bail_free;
+ }
+
+ dd->pageshadow = pages;
+ dd->physshadow = addrs;
+ return;
+
+bail_free:
+ vfree(pages);
+bail:
+ dd->pageshadow = NULL;
+}
+
+/*
+ * Do initialization for device that is only needed on
+ * first detect, not on resets.
+ */
+static int loadtime_init(struct qib_devdata *dd)
+{
+ int ret = 0;
+
+ if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) &
+ QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) {
+ qib_dev_err(dd,
+ "Driver only handles version %d, chip swversion is %d (%llx), failng\n",
+ QIB_CHIP_SWVERSION,
+ (int)(dd->revision >>
+ QLOGIC_IB_R_SOFTWARE_SHIFT) &
+ QLOGIC_IB_R_SOFTWARE_MASK,
+ (unsigned long long) dd->revision);
+ ret = -ENOSYS;
+ goto done;
+ }
+
+ if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK)
+ qib_devinfo(dd->pcidev, "%s", dd->boardversion);
+
+ spin_lock_init(&dd->pioavail_lock);
+ spin_lock_init(&dd->sendctrl_lock);
+ spin_lock_init(&dd->uctxt_lock);
+ spin_lock_init(&dd->qib_diag_trans_lock);
+ spin_lock_init(&dd->eep_st_lock);
+ mutex_init(&dd->eep_lock);
+
+ if (qib_mini_init)
+ goto done;
+
+ ret = init_pioavailregs(dd);
+ init_shadow_tids(dd);
+
+ qib_get_eeprom_info(dd);
+
+ /* setup time (don't start yet) to verify we got interrupt */
+ init_timer(&dd->intrchk_timer);
+ dd->intrchk_timer.function = verify_interrupt;
+ dd->intrchk_timer.data = (unsigned long) dd;
+
+ ret = qib_cq_init(dd);
+done:
+ return ret;
+}
+
+/**
+ * init_after_reset - re-initialize after a reset
+ * @dd: the qlogic_ib device
+ *
+ * sanity check at least some of the values after reset, and
+ * ensure no receive or transmit (explicitly, in case reset
+ * failed
+ */
+static int init_after_reset(struct qib_devdata *dd)
+{
+ int i;
+
+ /*
+ * Ensure chip does no sends or receives, tail updates, or
+ * pioavail updates while we re-initialize. This is mostly
+ * for the driver data structures, not chip registers.
+ */
+ for (i = 0; i < dd->num_pports; ++i) {
+ /*
+ * ctxt == -1 means "all contexts". Only really safe for
+ * _dis_abling things, as here.
+ */
+ dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS |
+ QIB_RCVCTRL_INTRAVAIL_DIS |
+ QIB_RCVCTRL_TAILUPD_DIS, -1);
+ /* Redundant across ports for some, but no big deal. */
+ dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS |
+ QIB_SENDCTRL_AVAIL_DIS);
+ }
+
+ return 0;
+}
+
+static void enable_chip(struct qib_devdata *dd)
+{
+ u64 rcvmask;
+ int i;
+
+ /*
+ * Enable PIO send, and update of PIOavail regs to memory.
+ */
+ for (i = 0; i < dd->num_pports; ++i)
+ dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB |
+ QIB_SENDCTRL_AVAIL_ENB);
+ /*
+ * Enable kernel ctxts' receive and receive interrupt.
+ * Other ctxts done as user opens and inits them.
+ */
+ rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB;
+ rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ?
+ QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB;
+ for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ struct qib_ctxtdata *rcd = dd->rcd[i];
+
+ if (rcd)
+ dd->f_rcvctrl(rcd->ppd, rcvmask, i);
+ }
+}
+
+static void verify_interrupt(unsigned long opaque)
+{
+ struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ u64 int_counter;
+
+ if (!dd)
+ return; /* being torn down */
+
+ /*
+ * If we don't have a lid or any interrupts, let the user know and
+ * don't bother checking again.
+ */
+ int_counter = qib_int_counter(dd) - dd->z_int_counter;
+ if (int_counter == 0) {
+ if (!dd->f_intr_fallback(dd))
+ dev_err(&dd->pcidev->dev,
+ "No interrupts detected, not usable.\n");
+ else /* re-arm the timer to see if fallback works */
+ mod_timer(&dd->intrchk_timer, jiffies + HZ/2);
+ }
+}
+
+static void init_piobuf_state(struct qib_devdata *dd)
+{
+ int i, pidx;
+ u32 uctxts;
+
+ /*
+ * Ensure all buffers are free, and fifos empty. Buffers
+ * are common, so only do once for port 0.
+ *
+ * After enable and qib_chg_pioavailkernel so we can safely
+ * enable pioavail updates and PIOENABLE. After this, packets
+ * are ready and able to go out.
+ */
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL);
+ for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH);
+
+ /*
+ * If not all sendbufs are used, add the one to each of the lower
+ * numbered contexts. pbufsctxt and lastctxt_piobuf are
+ * calculated in chip-specific code because it may cause some
+ * chip-specific adjustments to be made.
+ */
+ uctxts = dd->cfgctxts - dd->first_user_ctxt;
+ dd->ctxts_extrabuf = dd->pbufsctxt ?
+ dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0;
+
+ /*
+ * Set up the shadow copies of the piobufavail registers,
+ * which we compare against the chip registers for now, and
+ * the in memory DMA'ed copies of the registers.
+ * By now pioavail updates to memory should have occurred, so
+ * copy them into our working/shadow registers; this is in
+ * case something went wrong with abort, but mostly to get the
+ * initial values of the generation bit correct.
+ */
+ for (i = 0; i < dd->pioavregs; i++) {
+ __le64 tmp;
+
+ tmp = dd->pioavailregs_dma[i];
+ /*
+ * Don't need to worry about pioavailkernel here
+ * because we will call qib_chg_pioavailkernel() later
+ * in initialization, to busy out buffers as needed.
+ */
+ dd->pioavailshadow[i] = le64_to_cpu(tmp);
+ }
+ while (i < ARRAY_SIZE(dd->pioavailshadow))
+ dd->pioavailshadow[i++] = 0; /* for debugging sanity */
+
+ /* after pioavailshadow is setup */
+ qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k,
+ TXCHK_CHG_TYPE_KERN, NULL);
+ dd->f_initvl15_bufs(dd);
+}
+
+/**
+ * qib_create_workqueues - create per port workqueues
+ * @dd: the qlogic_ib device
+ */
+static int qib_create_workqueues(struct qib_devdata *dd)
+{
+ int pidx;
+ struct qib_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (!ppd->qib_wq) {
+ char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */
+ snprintf(wq_name, sizeof(wq_name), "qib%d_%d",
+ dd->unit, pidx);
+ ppd->qib_wq =
+ create_singlethread_workqueue(wq_name);
+ if (!ppd->qib_wq)
+ goto wq_error;
+ }
+ }
+ return 0;
+wq_error:
+ pr_err("create_singlethread_workqueue failed for port %d\n",
+ pidx + 1);
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (ppd->qib_wq) {
+ destroy_workqueue(ppd->qib_wq);
+ ppd->qib_wq = NULL;
+ }
+ }
+ return -ENOMEM;
+}
+
+static void qib_free_pportdata(struct qib_pportdata *ppd)
+{
+ free_percpu(ppd->ibport_data.pmastats);
+ ppd->ibport_data.pmastats = NULL;
+}
+
+/**
+ * qib_init - do the actual initialization sequence on the chip
+ * @dd: the qlogic_ib device
+ * @reinit: reinitializing, so don't allocate new memory
+ *
+ * Do the actual initialization sequence on the chip. This is done
+ * both from the init routine called from the PCI infrastructure, and
+ * when we reset the chip, or detect that it was reset internally,
+ * or it's administratively re-enabled.
+ *
+ * Memory allocation here and in called routines is only done in
+ * the first case (reinit == 0). We have to be careful, because even
+ * without memory allocation, we need to re-write all the chip registers
+ * TIDs, etc. after the reset or enable has completed.
+ */
+int qib_init(struct qib_devdata *dd, int reinit)
+{
+ int ret = 0, pidx, lastfail = 0;
+ u32 portok = 0;
+ unsigned i;
+ struct qib_ctxtdata *rcd;
+ struct qib_pportdata *ppd;
+ unsigned long flags;
+
+ /* Set linkstate to unknown, so we can watch for a transition. */
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED |
+ QIBL_LINKDOWN | QIBL_LINKINIT |
+ QIBL_LINKV);
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ }
+
+ if (reinit)
+ ret = init_after_reset(dd);
+ else
+ ret = loadtime_init(dd);
+ if (ret)
+ goto done;
+
+ /* Bypass most chip-init, to get to device creation */
+ if (qib_mini_init)
+ return 0;
+
+ ret = dd->f_late_initreg(dd);
+ if (ret)
+ goto done;
+
+ /* dd->rcd can be NULL if early init failed */
+ for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ /*
+ * Set up the (kernel) rcvhdr queue and egr TIDs. If doing
+ * re-init, the simplest way to handle this is to free
+ * existing, and re-allocate.
+ * Need to re-create rest of ctxt 0 ctxtdata as well.
+ */
+ rcd = dd->rcd[i];
+ if (!rcd)
+ continue;
+
+ lastfail = qib_create_rcvhdrq(dd, rcd);
+ if (!lastfail)
+ lastfail = qib_setup_eagerbufs(rcd);
+ if (lastfail) {
+ qib_dev_err(dd,
+ "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+ continue;
+ }
+ }
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ int mtu;
+ if (lastfail)
+ ret = lastfail;
+ ppd = dd->pport + pidx;
+ mtu = ib_mtu_enum_to_int(qib_ibmtu);
+ if (mtu == -1) {
+ mtu = QIB_DEFAULT_MTU;
+ qib_ibmtu = 0; /* don't leave invalid value */
+ }
+ /* set max we can ever have for this driver load */
+ ppd->init_ibmaxlen = min(mtu > 2048 ?
+ dd->piosize4k : dd->piosize2k,
+ dd->rcvegrbufsize +
+ (dd->rcvhdrentsize << 2));
+ /*
+ * Have to initialize ibmaxlen, but this will normally
+ * change immediately in qib_set_mtu().
+ */
+ ppd->ibmaxlen = ppd->init_ibmaxlen;
+ qib_set_mtu(ppd, mtu);
+
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_IB_LINK_DISABLED;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+
+ lastfail = dd->f_bringup_serdes(ppd);
+ if (lastfail) {
+ qib_devinfo(dd->pcidev,
+ "Failed to bringup IB port %u\n", ppd->port);
+ lastfail = -ENETDOWN;
+ continue;
+ }
+
+ portok++;
+ }
+
+ if (!portok) {
+ /* none of the ports initialized */
+ if (!ret && lastfail)
+ ret = lastfail;
+ else if (!ret)
+ ret = -ENETDOWN;
+ /* but continue on, so we can debug cause */
+ }
+
+ enable_chip(dd);
+
+ init_piobuf_state(dd);
+
+done:
+ if (!ret) {
+ /* chip is OK for user apps; mark it as initialized */
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ /*
+ * Set status even if port serdes is not initialized
+ * so that diags will work.
+ */
+ *ppd->statusp |= QIB_STATUS_CHIP_PRESENT |
+ QIB_STATUS_INITTED;
+ if (!ppd->link_speed_enabled)
+ continue;
+ if (dd->flags & QIB_HAS_SEND_DMA)
+ ret = qib_setup_sdma(ppd);
+ init_timer(&ppd->hol_timer);
+ ppd->hol_timer.function = qib_hol_event;
+ ppd->hol_timer.data = (unsigned long)ppd;
+ ppd->hol_state = QIB_HOL_UP;
+ }
+
+ /* now we can enable all interrupts from the chip */
+ dd->f_set_intr_state(dd, 1);
+
+ /*
+ * Setup to verify we get an interrupt, and fallback
+ * to an alternate if necessary and possible.
+ */
+ mod_timer(&dd->intrchk_timer, jiffies + HZ/2);
+ /* start stats retrieval timer */
+ mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER);
+ }
+
+ /* if ret is non-zero, we probably should do some cleanup here... */
+ return ret;
+}
+
+/*
+ * These next two routines are placeholders in case we don't have per-arch
+ * code for controlling write combining. If explicit control of write
+ * combining is not available, performance will probably be awful.
+ */
+
+int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd)
+{
+ return -EOPNOTSUPP;
+}
+
+void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd)
+{
+}
+
+static inline struct qib_devdata *__qib_lookup(int unit)
+{
+ return idr_find(&qib_unit_table, unit);
+}
+
+struct qib_devdata *qib_lookup(int unit)
+{
+ struct qib_devdata *dd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ dd = __qib_lookup(unit);
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+
+ return dd;
+}
+
+/*
+ * Stop the timers during unit shutdown, or after an error late
+ * in initialization.
+ */
+static void qib_stop_timers(struct qib_devdata *dd)
+{
+ struct qib_pportdata *ppd;
+ int pidx;
+
+ if (dd->stats_timer.data) {
+ del_timer_sync(&dd->stats_timer);
+ dd->stats_timer.data = 0;
+ }
+ if (dd->intrchk_timer.data) {
+ del_timer_sync(&dd->intrchk_timer);
+ dd->intrchk_timer.data = 0;
+ }
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ if (ppd->hol_timer.data)
+ del_timer_sync(&ppd->hol_timer);
+ if (ppd->led_override_timer.data) {
+ del_timer_sync(&ppd->led_override_timer);
+ atomic_set(&ppd->led_override_timer_active, 0);
+ }
+ if (ppd->symerr_clear_timer.data)
+ del_timer_sync(&ppd->symerr_clear_timer);
+ }
+}
+
+/**
+ * qib_shutdown_device - shut down a device
+ * @dd: the qlogic_ib device
+ *
+ * This is called to make the device quiet when we are about to
+ * unload the driver, and also when the device is administratively
+ * disabled. It does not free any data structures.
+ * Everything it does has to be setup again by qib_init(dd, 1)
+ */
+static void qib_shutdown_device(struct qib_devdata *dd)
+{
+ struct qib_pportdata *ppd;
+ unsigned pidx;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+
+ spin_lock_irq(&ppd->lflags_lock);
+ ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT |
+ QIBL_LINKARMED | QIBL_LINKACTIVE |
+ QIBL_LINKV);
+ spin_unlock_irq(&ppd->lflags_lock);
+ *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY);
+ }
+ dd->flags &= ~QIB_INITTED;
+
+ /* mask interrupts, but not errors */
+ dd->f_set_intr_state(dd, 0);
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS |
+ QIB_RCVCTRL_CTXT_DIS |
+ QIB_RCVCTRL_INTRAVAIL_DIS |
+ QIB_RCVCTRL_PKEY_ENB, -1);
+ /*
+ * Gracefully stop all sends allowing any in progress to
+ * trickle out first.
+ */
+ dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR);
+ }
+
+ /*
+ * Enough for anything that's going to trickle out to have actually
+ * done so.
+ */
+ udelay(20);
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+ dd->f_setextled(ppd, 0); /* make sure LEDs are off */
+
+ if (dd->flags & QIB_HAS_SEND_DMA)
+ qib_teardown_sdma(ppd);
+
+ dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS |
+ QIB_SENDCTRL_SEND_DIS);
+ /*
+ * Clear SerdesEnable.
+ * We can't count on interrupts since we are stopping.
+ */
+ dd->f_quiet_serdes(ppd);
+
+ if (ppd->qib_wq) {
+ destroy_workqueue(ppd->qib_wq);
+ ppd->qib_wq = NULL;
+ }
+ qib_free_pportdata(ppd);
+ }
+
+ qib_update_eeprom_log(dd);
+}
+
+/**
+ * qib_free_ctxtdata - free a context's allocated data
+ * @dd: the qlogic_ib device
+ * @rcd: the ctxtdata structure
+ *
+ * free up any allocated data for a context
+ * This should not touch anything that would affect a simultaneous
+ * re-allocation of context data, because it is called after qib_mutex
+ * is released (and can be called from reinit as well).
+ * It should never change any chip state, or global driver state.
+ */
+void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
+{
+ if (!rcd)
+ return;
+
+ if (rcd->rcvhdrq) {
+ dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
+ rcd->rcvhdrq, rcd->rcvhdrq_phys);
+ rcd->rcvhdrq = NULL;
+ if (rcd->rcvhdrtail_kvaddr) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ rcd->rcvhdrtail_kvaddr,
+ rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrtail_kvaddr = NULL;
+ }
+ }
+ if (rcd->rcvegrbuf) {
+ unsigned e;
+
+ for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
+ void *base = rcd->rcvegrbuf[e];
+ size_t size = rcd->rcvegrbuf_size;
+
+ dma_free_coherent(&dd->pcidev->dev, size,
+ base, rcd->rcvegrbuf_phys[e]);
+ }
+ kfree(rcd->rcvegrbuf);
+ rcd->rcvegrbuf = NULL;
+ kfree(rcd->rcvegrbuf_phys);
+ rcd->rcvegrbuf_phys = NULL;
+ rcd->rcvegrbuf_chunks = 0;
+ }
+
+ kfree(rcd->tid_pg_list);
+ vfree(rcd->user_event_mask);
+ vfree(rcd->subctxt_uregbase);
+ vfree(rcd->subctxt_rcvegrbuf);
+ vfree(rcd->subctxt_rcvhdr_base);
+#ifdef CONFIG_DEBUG_FS
+ kfree(rcd->opstats);
+ rcd->opstats = NULL;
+#endif
+ kfree(rcd);
+}
+
+/*
+ * Perform a PIO buffer bandwidth write test, to verify proper system
+ * configuration. Even when all the setup calls work, occasionally
+ * BIOS or other issues can prevent write combining from working, or
+ * can cause other bandwidth problems to the chip.
+ *
+ * This test simply writes the same buffer over and over again, and
+ * measures close to the peak bandwidth to the chip (not testing
+ * data bandwidth to the wire). On chips that use an address-based
+ * trigger to send packets to the wire, this is easy. On chips that
+ * use a count to trigger, we want to make sure that the packet doesn't
+ * go out on the wire, or trigger flow control checks.
+ */
+static void qib_verify_pioperf(struct qib_devdata *dd)
+{
+ u32 pbnum, cnt, lcnt;
+ u32 __iomem *piobuf;
+ u32 *addr;
+ u64 msecs, emsecs;
+
+ piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum);
+ if (!piobuf) {
+ qib_devinfo(dd->pcidev,
+ "No PIObufs for checking perf, skipping\n");
+ return;
+ }
+
+ /*
+ * Enough to give us a reasonable test, less than piobuf size, and
+ * likely multiple of store buffer length.
+ */
+ cnt = 1024;
+
+ addr = vmalloc(cnt);
+ if (!addr) {
+ qib_devinfo(dd->pcidev,
+ "Couldn't get memory for checking PIO perf,"
+ " skipping\n");
+ goto done;
+ }
+
+ preempt_disable(); /* we want reasonably accurate elapsed time */
+ msecs = 1 + jiffies_to_msecs(jiffies);
+ for (lcnt = 0; lcnt < 10000U; lcnt++) {
+ /* wait until we cross msec boundary */
+ if (jiffies_to_msecs(jiffies) >= msecs)
+ break;
+ udelay(1);
+ }
+
+ dd->f_set_armlaunch(dd, 0);
+
+ /*
+ * length 0, no dwords actually sent
+ */
+ writeq(0, piobuf);
+ qib_flush_wc();
+
+ /*
+ * This is only roughly accurate, since even with preempt we
+ * still take interrupts that could take a while. Running for
+ * >= 5 msec seems to get us "close enough" to accurate values.
+ */
+ msecs = jiffies_to_msecs(jiffies);
+ for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
+ qib_pio_copy(piobuf + 64, addr, cnt >> 2);
+ emsecs = jiffies_to_msecs(jiffies) - msecs;
+ }
+
+ /* 1 GiB/sec, slightly over IB SDR line rate */
+ if (lcnt < (emsecs * 1024U))
+ qib_dev_err(dd,
+ "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n",
+ lcnt / (u32) emsecs);
+
+ preempt_enable();
+
+ vfree(addr);
+
+done:
+ /* disarm piobuf, so it's available again */
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum));
+ qib_sendbuf_done(dd, pbnum);
+ dd->f_set_armlaunch(dd, 1);
+}
+
+void qib_free_devdata(struct qib_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ idr_remove(&qib_unit_table, dd->unit);
+ list_del(&dd->list);
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_exit(&dd->verbs_dev);
+#endif
+ free_percpu(dd->int_counter);
+ ib_dealloc_device(&dd->verbs_dev.ibdev);
+}
+
+u64 qib_int_counter(struct qib_devdata *dd)
+{
+ int cpu;
+ u64 int_counter = 0;
+
+ for_each_possible_cpu(cpu)
+ int_counter += *per_cpu_ptr(dd->int_counter, cpu);
+ return int_counter;
+}
+
+u64 qib_sps_ints(void)
+{
+ unsigned long flags;
+ struct qib_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ list_for_each_entry(dd, &qib_dev_list, list) {
+ sps_ints += qib_int_counter(dd);
+ }
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ return sps_ints;
+}
+
+/*
+ * Allocate our primary per-unit data structure. Must be done via verbs
+ * allocator, because the verbs cleanup process both does cleanup and
+ * free of the data structure.
+ * "extra" is for chip-specific data.
+ *
+ * Use the idr mechanism to get a unit number for this unit.
+ */
+struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
+{
+ unsigned long flags;
+ struct qib_devdata *dd;
+ int ret;
+
+ dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
+ if (!dd)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&dd->list);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irqsave(&qib_devs_lock, flags);
+
+ ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT);
+ if (ret >= 0) {
+ dd->unit = ret;
+ list_add(&dd->list, &qib_dev_list);
+ }
+
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ idr_preload_end();
+
+ if (ret < 0) {
+ qib_early_err(&pdev->dev,
+ "Could not allocate unit ID: error %d\n", -ret);
+ goto bail;
+ }
+ dd->int_counter = alloc_percpu(u64);
+ if (!dd->int_counter) {
+ ret = -ENOMEM;
+ qib_early_err(&pdev->dev,
+ "Could not allocate per-cpu int_counter\n");
+ goto bail;
+ }
+
+ if (!qib_cpulist_count) {
+ u32 count = num_online_cpus();
+ qib_cpulist = kzalloc(BITS_TO_LONGS(count) *
+ sizeof(long), GFP_KERNEL);
+ if (qib_cpulist)
+ qib_cpulist_count = count;
+ else
+ qib_early_err(&pdev->dev,
+ "Could not alloc cpulist info, cpu affinity might be wrong\n");
+ }
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
+ return dd;
+bail:
+ if (!list_empty(&dd->list))
+ list_del_init(&dd->list);
+ ib_dealloc_device(&dd->verbs_dev.ibdev);
+ return ERR_PTR(ret);;
+}
+
+/*
+ * Called from freeze mode handlers, and from PCI error
+ * reporting code. Should be paranoid about state of
+ * system and data structures.
+ */
+void qib_disable_after_error(struct qib_devdata *dd)
+{
+ if (dd->flags & QIB_INITTED) {
+ u32 pidx;
+
+ dd->flags &= ~QIB_INITTED;
+ if (dd->pport)
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ struct qib_pportdata *ppd;
+
+ ppd = dd->pport + pidx;
+ if (dd->flags & QIB_PRESENT) {
+ qib_set_linkstate(ppd,
+ QIB_IB_LINKDOWN_DISABLE);
+ dd->f_setextled(ppd, 0);
+ }
+ *ppd->statusp &= ~QIB_STATUS_IB_READY;
+ }
+ }
+
+ /*
+ * Mark as having had an error for driver, and also
+ * for /sys and status word mapped to user programs.
+ * This marks unit as not usable, until reset.
+ */
+ if (dd->devstatusp)
+ *dd->devstatusp |= QIB_STATUS_HWERROR;
+}
+
+static void qib_remove_one(struct pci_dev *);
+static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
+
+#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
+#define PFX QIB_DRV_NAME ": "
+
+static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = {
+ { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) },
+ { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) },
+ { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
+
+static struct pci_driver qib_driver = {
+ .name = QIB_DRV_NAME,
+ .probe = qib_init_one,
+ .remove = qib_remove_one,
+ .id_table = qib_pci_tbl,
+ .err_handler = &qib_pci_err_handler,
+};
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+
+static int qib_notify_dca(struct notifier_block *, unsigned long, void *);
+static struct notifier_block dca_notifier = {
+ .notifier_call = qib_notify_dca,
+ .next = NULL,
+ .priority = 0
+};
+
+static int qib_notify_dca_device(struct device *device, void *data)
+{
+ struct qib_devdata *dd = dev_get_drvdata(device);
+ unsigned long event = *(unsigned long *)data;
+
+ return dd->f_notify_dca(dd, event);
+}
+
+static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
+ void *p)
+{
+ int rval;
+
+ rval = driver_for_each_device(&qib_driver.driver, NULL,
+ &event, qib_notify_dca_device);
+ return rval ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+#endif
+
+/*
+ * Do all the generic driver unit- and chip-independent memory
+ * allocation and initialization.
+ */
+static int __init qib_ib_init(void)
+{
+ int ret;
+
+ ret = qib_dev_init();
+ if (ret)
+ goto bail;
+
+ /*
+ * These must be called before the driver is registered with
+ * the PCI subsystem.
+ */
+ idr_init(&qib_unit_table);
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_register_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_init();
+#endif
+ ret = pci_register_driver(&qib_driver);
+ if (ret < 0) {
+ pr_err("Unable to register driver: error %d\n", -ret);
+ goto bail_dev;
+ }
+
+ /* not fatal if it doesn't work */
+ if (qib_init_qibfs())
+ pr_err("Unable to register ipathfs\n");
+ goto bail; /* all OK */
+
+bail_dev:
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_unregister_notify(&dca_notifier);
+#endif
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_exit();
+#endif
+ idr_destroy(&qib_unit_table);
+ qib_dev_cleanup();
+bail:
+ return ret;
+}
+
+module_init(qib_ib_init);
+
+/*
+ * Do the non-unit driver cleanup, memory free, etc. at unload.
+ */
+static void __exit qib_ib_cleanup(void)
+{
+ int ret;
+
+ ret = qib_exit_qibfs();
+ if (ret)
+ pr_err(
+ "Unable to cleanup counter filesystem: error %d\n",
+ -ret);
+
+#ifdef CONFIG_INFINIBAND_QIB_DCA
+ dca_unregister_notify(&dca_notifier);
+#endif
+ pci_unregister_driver(&qib_driver);
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_exit();
+#endif
+
+ qib_cpulist_count = 0;
+ kfree(qib_cpulist);
+
+ idr_destroy(&qib_unit_table);
+ qib_dev_cleanup();
+}
+
+module_exit(qib_ib_cleanup);
+
+/* this can only be called after a successful initialization */
+static void cleanup_device_data(struct qib_devdata *dd)
+{
+ int ctxt;
+ int pidx;
+ struct qib_ctxtdata **tmp;
+ unsigned long flags;
+
+ /* users can't do anything more with chip */
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ if (dd->pport[pidx].statusp)
+ *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
+
+ spin_lock(&dd->pport[pidx].cc_shadow_lock);
+
+ kfree(dd->pport[pidx].congestion_entries);
+ dd->pport[pidx].congestion_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries);
+ dd->pport[pidx].ccti_entries = NULL;
+ kfree(dd->pport[pidx].ccti_entries_shadow);
+ dd->pport[pidx].ccti_entries_shadow = NULL;
+ kfree(dd->pport[pidx].congestion_entries_shadow);
+ dd->pport[pidx].congestion_entries_shadow = NULL;
+
+ spin_unlock(&dd->pport[pidx].cc_shadow_lock);
+ }
+
+ if (!qib_wc_pat)
+ qib_disable_wc(dd);
+
+ if (dd->pioavailregs_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *) dd->pioavailregs_dma,
+ dd->pioavailregs_phys);
+ dd->pioavailregs_dma = NULL;
+ }
+
+ if (dd->pageshadow) {
+ struct page **tmpp = dd->pageshadow;
+ dma_addr_t *tmpd = dd->physshadow;
+ int i;
+
+ for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) {
+ int ctxt_tidbase = ctxt * dd->rcvtidcnt;
+ int maxtid = ctxt_tidbase + dd->rcvtidcnt;
+
+ for (i = ctxt_tidbase; i < maxtid; i++) {
+ if (!tmpp[i])
+ continue;
+ pci_unmap_page(dd->pcidev, tmpd[i],
+ PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ qib_release_user_pages(&tmpp[i], 1);
+ tmpp[i] = NULL;
+ }
+ }
+
+ dd->pageshadow = NULL;
+ vfree(tmpp);
+ dd->physshadow = NULL;
+ vfree(tmpd);
+ }
+
+ /*
+ * Free any resources still in use (usually just kernel contexts)
+ * at unload; we do for ctxtcnt, because that's what we allocate.
+ * We acquire lock to be really paranoid that rcd isn't being
+ * accessed from some interrupt-related code (that should not happen,
+ * but best to be sure).
+ */
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ tmp = dd->rcd;
+ dd->rcd = NULL;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) {
+ struct qib_ctxtdata *rcd = tmp[ctxt];
+
+ tmp[ctxt] = NULL; /* debugging paranoia */
+ qib_free_ctxtdata(dd, rcd);
+ }
+ kfree(tmp);
+ kfree(dd->boardname);
+ qib_cq_exit(dd);
+}
+
+/*
+ * Clean up on unit shutdown, or error during unit load after
+ * successful initialization.
+ */
+static void qib_postinit_cleanup(struct qib_devdata *dd)
+{
+ /*
+ * Clean up chip-specific stuff.
+ * We check for NULL here, because it's outside
+ * the kregbase check, and we need to call it
+ * after the free_irq. Thus it's possible that
+ * the function pointers were never initialized.
+ */
+ if (dd->f_cleanup)
+ dd->f_cleanup(dd);
+
+ qib_pcie_ddcleanup(dd);
+
+ cleanup_device_data(dd);
+
+ qib_free_devdata(dd);
+}
+
+static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int ret, j, pidx, initfail;
+ struct qib_devdata *dd = NULL;
+
+ ret = qib_pcie_init(pdev, ent);
+ if (ret)
+ goto bail;
+
+ /*
+ * Do device-specific initialiation, function table setup, dd
+ * allocation, etc.
+ */
+ switch (ent->device) {
+ case PCI_DEVICE_ID_QLOGIC_IB_6120:
+#ifdef CONFIG_PCI_MSI
+ dd = qib_init_iba6120_funcs(pdev, ent);
+#else
+ qib_early_err(&pdev->dev,
+ "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n",
+ ent->device);
+ dd = ERR_PTR(-ENODEV);
+#endif
+ break;
+
+ case PCI_DEVICE_ID_QLOGIC_IB_7220:
+ dd = qib_init_iba7220_funcs(pdev, ent);
+ break;
+
+ case PCI_DEVICE_ID_QLOGIC_IB_7322:
+ dd = qib_init_iba7322_funcs(pdev, ent);
+ break;
+
+ default:
+ qib_early_err(&pdev->dev,
+ "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
+ ret = -ENODEV;
+ }
+
+ if (IS_ERR(dd))
+ ret = PTR_ERR(dd);
+ if (ret)
+ goto bail; /* error already printed */
+
+ ret = qib_create_workqueues(dd);
+ if (ret)
+ goto bail;
+
+ /* do the generic initialization */
+ initfail = qib_init(dd, 0);
+
+ ret = qib_register_ib_device(dd);
+
+ /*
+ * Now ready for use. this should be cleared whenever we
+ * detect a reset, or initiate one. If earlier failure,
+ * we still create devices, so diags, etc. can be used
+ * to determine cause of problem.
+ */
+ if (!qib_mini_init && !initfail && !ret)
+ dd->flags |= QIB_INITTED;
+
+ j = qib_device_create(dd);
+ if (j)
+ qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
+ j = qibfs_add(dd);
+ if (j)
+ qib_dev_err(dd, "Failed filesystem setup for counters: %d\n",
+ -j);
+
+ if (qib_mini_init || initfail || ret) {
+ qib_stop_timers(dd);
+ flush_workqueue(ib_wq);
+ for (pidx = 0; pidx < dd->num_pports; ++pidx)
+ dd->f_quiet_serdes(dd->pport + pidx);
+ if (qib_mini_init)
+ goto bail;
+ if (!j) {
+ (void) qibfs_remove(dd);
+ qib_device_remove(dd);
+ }
+ if (!ret)
+ qib_unregister_ib_device(dd);
+ qib_postinit_cleanup(dd);
+ if (initfail)
+ ret = initfail;
+ goto bail;
+ }
+
+ if (!qib_wc_pat) {
+ ret = qib_enable_wc(dd);
+ if (ret) {
+ qib_dev_err(dd,
+ "Write combining not enabled (err %d): performance may be poor\n",
+ -ret);
+ ret = 0;
+ }
+ }
+
+ qib_verify_pioperf(dd);
+bail:
+ return ret;
+}
+
+static void qib_remove_one(struct pci_dev *pdev)
+{
+ struct qib_devdata *dd = pci_get_drvdata(pdev);
+ int ret;
+
+ /* unregister from IB core */
+ qib_unregister_ib_device(dd);
+
+ /*
+ * Disable the IB link, disable interrupts on the device,
+ * clear dma engines, etc.
+ */
+ if (!qib_mini_init)
+ qib_shutdown_device(dd);
+
+ qib_stop_timers(dd);
+
+ /* wait until all of our (qsfp) queue_work() calls complete */
+ flush_workqueue(ib_wq);
+
+ ret = qibfs_remove(dd);
+ if (ret)
+ qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n",
+ -ret);
+
+ qib_device_remove(dd);
+
+ qib_postinit_cleanup(dd);
+}
+
+/**
+ * qib_create_rcvhdrq - create a receive header queue
+ * @dd: the qlogic_ib device
+ * @rcd: the context data
+ *
+ * This must be contiguous memory (from an i/o perspective), and must be
+ * DMA'able (which means for some systems, it will go through an IOMMU,
+ * or be forced into a low address range).
+ */
+int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
+{
+ unsigned amt;
+ int old_node_id;
+
+ if (!rcd->rcvhdrq) {
+ dma_addr_t phys_hdrqtail;
+ gfp_t gfp_flags;
+
+ amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE);
+ gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
+ GFP_USER : GFP_KERNEL;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
+ rcd->rcvhdrq = dma_alloc_coherent(
+ &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
+ gfp_flags | __GFP_COMP);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
+
+ if (!rcd->rcvhdrq) {
+ qib_dev_err(dd,
+ "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+ amt, rcd->ctxt);
+ goto bail;
+ }
+
+ if (rcd->ctxt >= dd->first_user_ctxt) {
+ rcd->user_event_mask = vmalloc_user(PAGE_SIZE);
+ if (!rcd->user_event_mask)
+ goto bail_free_hdrq;
+ }
+
+ if (!(dd->flags & QIB_NODMA_RTAIL)) {
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
+ rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
+ &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+ gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
+ if (!rcd->rcvhdrtail_kvaddr)
+ goto bail_free;
+ rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
+ }
+
+ rcd->rcvhdrq_size = amt;
+ }
+
+ /* clear for security and sanity on each use */
+ memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
+ if (rcd->rcvhdrtail_kvaddr)
+ memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE);
+ return 0;
+
+bail_free:
+ qib_dev_err(dd,
+ "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+ rcd->ctxt);
+ vfree(rcd->user_event_mask);
+ rcd->user_event_mask = NULL;
+bail_free_hdrq:
+ dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
+ rcd->rcvhdrq_phys);
+ rcd->rcvhdrq = NULL;
+bail:
+ return -ENOMEM;
+}
+
+/**
+ * allocate eager buffers, both kernel and user contexts.
+ * @rcd: the context we are setting up.
+ *
+ * Allocate the eager TID buffers and program them into hip.
+ * They are no longer completely contiguous, we do multiple allocation
+ * calls. Otherwise we get the OOM code involved, by asking for too
+ * much per call, with disastrous results on some kernels.
+ */
+int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
+ size_t size;
+ gfp_t gfp_flags;
+ int old_node_id;
+
+ /*
+ * GFP_USER, but without GFP_FS, so buffer cache can be
+ * coalesced (we hope); otherwise, even at order 4,
+ * heavy filesystem activity makes these fail, and we can
+ * use compound pages.
+ */
+ gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
+
+ egrcnt = rcd->rcvegrcnt;
+ egroff = rcd->rcvegr_tid_base;
+ egrsize = dd->rcvegrbufsize;
+
+ chunk = rcd->rcvegrbuf_chunks;
+ egrperchunk = rcd->rcvegrbufs_perchunk;
+ size = rcd->rcvegrbuf_size;
+ if (!rcd->rcvegrbuf) {
+ rcd->rcvegrbuf =
+ kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
+ GFP_KERNEL, rcd->node_id);
+ if (!rcd->rcvegrbuf)
+ goto bail;
+ }
+ if (!rcd->rcvegrbuf_phys) {
+ rcd->rcvegrbuf_phys =
+ kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
+ GFP_KERNEL, rcd->node_id);
+ if (!rcd->rcvegrbuf_phys)
+ goto bail_rcvegrbuf;
+ }
+ for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
+ if (rcd->rcvegrbuf[e])
+ continue;
+
+ old_node_id = dev_to_node(&dd->pcidev->dev);
+ set_dev_node(&dd->pcidev->dev, rcd->node_id);
+ rcd->rcvegrbuf[e] =
+ dma_alloc_coherent(&dd->pcidev->dev, size,
+ &rcd->rcvegrbuf_phys[e],
+ gfp_flags);
+ set_dev_node(&dd->pcidev->dev, old_node_id);
+ if (!rcd->rcvegrbuf[e])
+ goto bail_rcvegrbuf_phys;
+ }
+
+ rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0];
+
+ for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) {
+ dma_addr_t pa = rcd->rcvegrbuf_phys[chunk];
+ unsigned i;
+
+ /* clear for security and sanity on each use */
+ memset(rcd->rcvegrbuf[chunk], 0, size);
+
+ for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
+ dd->f_put_tid(dd, e + egroff +
+ (u64 __iomem *)
+ ((char __iomem *)
+ dd->kregbase +
+ dd->rcvegrbase),
+ RCVHQ_RCV_TYPE_EAGER, pa);
+ pa += egrsize;
+ }
+ cond_resched(); /* don't hog the cpu */
+ }
+
+ return 0;
+
+bail_rcvegrbuf_phys:
+ for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++)
+ dma_free_coherent(&dd->pcidev->dev, size,
+ rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]);
+ kfree(rcd->rcvegrbuf_phys);
+ rcd->rcvegrbuf_phys = NULL;
+bail_rcvegrbuf:
+ kfree(rcd->rcvegrbuf);
+ rcd->rcvegrbuf = NULL;
+bail:
+ return -ENOMEM;
+}
+
+/*
+ * Note: Changes to this routine should be mirrored
+ * for the diagnostics routine qib_remap_ioaddr32().
+ * There is also related code for VL15 buffers in qib_init_7322_variables().
+ * The teardown code that unmaps is in qib_pcie_ddcleanup()
+ */
+int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen)
+{
+ u64 __iomem *qib_kregbase = NULL;
+ void __iomem *qib_piobase = NULL;
+ u64 __iomem *qib_userbase = NULL;
+ u64 qib_kreglen;
+ u64 qib_pio2koffset = dd->piobufbase & 0xffffffff;
+ u64 qib_pio4koffset = dd->piobufbase >> 32;
+ u64 qib_pio2klen = dd->piobcnt2k * dd->palign;
+ u64 qib_pio4klen = dd->piobcnt4k * dd->align4k;
+ u64 qib_physaddr = dd->physaddr;
+ u64 qib_piolen;
+ u64 qib_userlen = 0;
+
+ /*
+ * Free the old mapping because the kernel will try to reuse the
+ * old mapping and not create a new mapping with the
+ * write combining attribute.
+ */
+ iounmap(dd->kregbase);
+ dd->kregbase = NULL;
+
+ /*
+ * Assumes chip address space looks like:
+ * - kregs + sregs + cregs + uregs (in any order)
+ * - piobufs (2K and 4K bufs in either order)
+ * or:
+ * - kregs + sregs + cregs (in any order)
+ * - piobufs (2K and 4K bufs in either order)
+ * - uregs
+ */
+ if (dd->piobcnt4k == 0) {
+ qib_kreglen = qib_pio2koffset;
+ qib_piolen = qib_pio2klen;
+ } else if (qib_pio2koffset < qib_pio4koffset) {
+ qib_kreglen = qib_pio2koffset;
+ qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen;
+ } else {
+ qib_kreglen = qib_pio4koffset;
+ qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen;
+ }
+ qib_piolen += vl15buflen;
+ /* Map just the configured ports (not all hw ports) */
+ if (dd->uregbase > qib_kreglen)
+ qib_userlen = dd->ureg_align * dd->cfgctxts;
+
+ /* Sanity checks passed, now create the new mappings */
+ qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen);
+ if (!qib_kregbase)
+ goto bail;
+
+ qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen);
+ if (!qib_piobase)
+ goto bail_kregbase;
+
+ if (qib_userlen) {
+ qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase,
+ qib_userlen);
+ if (!qib_userbase)
+ goto bail_piobase;
+ }
+
+ dd->kregbase = qib_kregbase;
+ dd->kregend = (u64 __iomem *)
+ ((char __iomem *) qib_kregbase + qib_kreglen);
+ dd->piobase = qib_piobase;
+ dd->pio2kbase = (void __iomem *)
+ (((char __iomem *) dd->piobase) +
+ qib_pio2koffset - qib_kreglen);
+ if (dd->piobcnt4k)
+ dd->pio4kbase = (void __iomem *)
+ (((char __iomem *) dd->piobase) +
+ qib_pio4koffset - qib_kreglen);
+ if (qib_userlen)
+ /* ureg will now be accessed relative to dd->userbase */
+ dd->userbase = qib_userbase;
+ return 0;
+
+bail_piobase:
+ iounmap(qib_piobase);
+bail_kregbase:
+ iounmap(qib_kregbase);
+bail:
+ return -ENOMEM;
+}
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
new file mode 100644
index 00000000000..f4918f2165e
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+/**
+ * qib_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+ strlcat(msg, "[", msgl);
+ strlcat(msg, hwmsg, msgl);
+ strlcat(msg, "]", msgl);
+}
+
+/**
+ * qib_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs,
+ size_t nhwerrmsgs, char *msg, size_t msgl)
+{
+ int i;
+
+ for (i = 0; i < nhwerrmsgs; i++)
+ if (hwerrs & hwerrmsgs[i].mask)
+ qib_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+}
+
+static void signal_ib_event(struct qib_pportdata *ppd, enum ib_event_type ev)
+{
+ struct ib_event event;
+ struct qib_devdata *dd = ppd->dd;
+
+ event.device = &dd->verbs_dev.ibdev;
+ event.element.port_num = ppd->port;
+ event.event = ev;
+ ib_dispatch_event(&event);
+}
+
+void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs)
+{
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+ u32 lstate;
+ u8 ltstate;
+ enum ib_event_type ev = 0;
+
+ lstate = dd->f_iblink_state(ibcs); /* linkstate */
+ ltstate = dd->f_ibphys_portstate(ibcs);
+
+ /*
+ * If linkstate transitions into INIT from any of the various down
+ * states, or if it transitions from any of the up (INIT or better)
+ * states into any of the down states (except link recovery), then
+ * call the chip-specific code to take appropriate actions.
+ *
+ * ppd->lflags could be 0 if this is the first time the interrupt
+ * handlers has been called but the link is already up.
+ */
+ if (lstate >= IB_PORT_INIT &&
+ (!ppd->lflags || (ppd->lflags & QIBL_LINKDOWN)) &&
+ ltstate == IB_PHYSPORTSTATE_LINKUP) {
+ /* transitioned to UP */
+ if (dd->f_ib_updown(ppd, 1, ibcs))
+ goto skip_ibchange; /* chip-code handled */
+ } else if (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED |
+ QIBL_LINKACTIVE | QIBL_IB_FORCE_NOTIFY)) {
+ if (ltstate != IB_PHYSPORTSTATE_LINKUP &&
+ ltstate <= IB_PHYSPORTSTATE_CFG_TRAIN &&
+ dd->f_ib_updown(ppd, 0, ibcs))
+ goto skip_ibchange; /* chip-code handled */
+ qib_set_uevent_bits(ppd, _QIB_EVENT_LINKDOWN_BIT);
+ }
+
+ if (lstate != IB_PORT_DOWN) {
+ /* lstate is INIT, ARMED, or ACTIVE */
+ if (lstate != IB_PORT_ACTIVE) {
+ *ppd->statusp &= ~QIB_STATUS_IB_READY;
+ if (ppd->lflags & QIBL_LINKACTIVE)
+ ev = IB_EVENT_PORT_ERR;
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ if (lstate == IB_PORT_ARMED) {
+ ppd->lflags |= QIBL_LINKARMED | QIBL_LINKV;
+ ppd->lflags &= ~(QIBL_LINKINIT |
+ QIBL_LINKDOWN | QIBL_LINKACTIVE);
+ } else {
+ ppd->lflags |= QIBL_LINKINIT | QIBL_LINKV;
+ ppd->lflags &= ~(QIBL_LINKARMED |
+ QIBL_LINKDOWN | QIBL_LINKACTIVE);
+ }
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ /* start a 75msec timer to clear symbol errors */
+ mod_timer(&ppd->symerr_clear_timer,
+ msecs_to_jiffies(75));
+ } else if (ltstate == IB_PHYSPORTSTATE_LINKUP &&
+ !(ppd->lflags & QIBL_LINKACTIVE)) {
+ /* active, but not active defered */
+ qib_hol_up(ppd); /* useful only for 6120 now */
+ *ppd->statusp |=
+ QIB_STATUS_IB_READY | QIB_STATUS_IB_CONF;
+ qib_clear_symerror_on_linkup((unsigned long)ppd);
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_LINKACTIVE | QIBL_LINKV;
+ ppd->lflags &= ~(QIBL_LINKINIT |
+ QIBL_LINKDOWN | QIBL_LINKARMED);
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ if (dd->flags & QIB_HAS_SEND_DMA)
+ qib_sdma_process_event(ppd,
+ qib_sdma_event_e30_go_running);
+ ev = IB_EVENT_PORT_ACTIVE;
+ dd->f_setextled(ppd, 1);
+ }
+ } else { /* down */
+ if (ppd->lflags & QIBL_LINKACTIVE)
+ ev = IB_EVENT_PORT_ERR;
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags |= QIBL_LINKDOWN | QIBL_LINKV;
+ ppd->lflags &= ~(QIBL_LINKINIT |
+ QIBL_LINKACTIVE | QIBL_LINKARMED);
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ *ppd->statusp &= ~QIB_STATUS_IB_READY;
+ }
+
+skip_ibchange:
+ ppd->lastibcstat = ibcs;
+ if (ev)
+ signal_ib_event(ppd, ev);
+ return;
+}
+
+void qib_clear_symerror_on_linkup(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+
+ if (ppd->lflags & QIBL_LINKACTIVE)
+ return;
+
+ ppd->ibport_data.z_symbol_error_counter =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
+}
+
+/*
+ * Handle receive interrupts for user ctxts; this means a user
+ * process was waiting for a packet to arrive, and didn't want
+ * to poll.
+ */
+void qib_handle_urcv(struct qib_devdata *dd, u64 ctxtr)
+{
+ struct qib_ctxtdata *rcd;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ for (i = dd->first_user_ctxt; dd->rcd && i < dd->cfgctxts; i++) {
+ if (!(ctxtr & (1ULL << i)))
+ continue;
+ rcd = dd->rcd[i];
+ if (!rcd || !rcd->cnt)
+ continue;
+
+ if (test_and_clear_bit(QIB_CTXT_WAITING_RCV, &rcd->flag)) {
+ wake_up_interruptible(&rcd->wait);
+ dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_DIS,
+ rcd->ctxt);
+ } else if (test_and_clear_bit(QIB_CTXT_WAITING_URG,
+ &rcd->flag)) {
+ rcd->urgent++;
+ wake_up_interruptible(&rcd->wait);
+ }
+ }
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+}
+
+void qib_bad_intrstatus(struct qib_devdata *dd)
+{
+ static int allbits;
+
+ /* separate routine, for better optimization of qib_intr() */
+
+ /*
+ * We print the message and disable interrupts, in hope of
+ * having a better chance of debugging the problem.
+ */
+ qib_dev_err(dd,
+ "Read of chip interrupt status failed disabling interrupts\n");
+ if (allbits++) {
+ /* disable interrupt delivery, something is very wrong */
+ if (allbits == 2)
+ dd->f_set_intr_state(dd, 0);
+ if (allbits == 3) {
+ qib_dev_err(dd,
+ "2nd bad interrupt status, unregistering interrupts\n");
+ dd->flags |= QIB_BADINTR;
+ dd->flags &= ~QIB_INITTED;
+ dd->f_free_irq(dd);
+ }
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
new file mode 100644
index 00000000000..3b9afccaaad
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2006, 2007, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "qib.h"
+
+/**
+ * qib_alloc_lkey - allocate an lkey
+ * @mr: memory region that this lkey protects
+ * @dma_region: 0->normal key, 1->restricted DMA key
+ *
+ * Returns 0 if successful, otherwise returns -errno.
+ *
+ * Increments mr reference count as required.
+ *
+ * Sets the lkey field mr for non-dma regions.
+ *
+ */
+
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
+{
+ unsigned long flags;
+ u32 r;
+ u32 n;
+ int ret = 0;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
+
+ spin_lock_irqsave(&rkt->lock, flags);
+
+ /* special case for dma_mr lkey == 0 */
+ if (dma_region) {
+ struct qib_mregion *tmr;
+
+ tmr = rcu_access_pointer(dev->dma_mr);
+ if (!tmr) {
+ qib_get_mr(mr);
+ rcu_assign_pointer(dev->dma_mr, mr);
+ mr->lkey_published = 1;
+ }
+ goto success;
+ }
+
+ /* Find the next available LKEY */
+ r = rkt->next;
+ n = r;
+ for (;;) {
+ if (rkt->table[r] == NULL)
+ break;
+ r = (r + 1) & (rkt->max - 1);
+ if (r == n)
+ goto bail;
+ }
+ rkt->next = (r + 1) & (rkt->max - 1);
+ /*
+ * Make sure lkey is never zero which is reserved to indicate an
+ * unrestricted LKEY.
+ */
+ rkt->gen++;
+ mr->lkey = (r << (32 - ib_qib_lkey_table_size)) |
+ ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen)
+ << 8);
+ if (mr->lkey == 0) {
+ mr->lkey |= 1 << 8;
+ rkt->gen++;
+ }
+ qib_get_mr(mr);
+ rcu_assign_pointer(rkt->table[r], mr);
+ mr->lkey_published = 1;
+success:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+out:
+ return ret;
+bail:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ret = -ENOMEM;
+ goto out;
+}
+
+/**
+ * qib_free_lkey - free an lkey
+ * @mr: mr to free from tables
+ */
+void qib_free_lkey(struct qib_mregion *mr)
+{
+ unsigned long flags;
+ u32 lkey = mr->lkey;
+ u32 r;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
+
+ spin_lock_irqsave(&rkt->lock, flags);
+ if (!mr->lkey_published)
+ goto out;
+ if (lkey == 0)
+ rcu_assign_pointer(dev->dma_mr, NULL);
+ else {
+ r = lkey >> (32 - ib_qib_lkey_table_size);
+ rcu_assign_pointer(rkt->table[r], NULL);
+ }
+ qib_put_mr(mr);
+ mr->lkey_published = 0;
+out:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+}
+
+/**
+ * qib_lkey_ok - check IB SGE for validity and initialize
+ * @rkt: table containing lkey to check SGE against
+ * @pd: protection domain
+ * @isge: outgoing internal SGE
+ * @sge: SGE to check
+ * @acc: access flags
+ *
+ * Return 1 if valid and successful, otherwise returns 0.
+ *
+ * increments the reference count upon success
+ *
+ * Check the IB SGE for validity and initialize our internal version
+ * of it.
+ */
+int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
+ struct qib_sge *isge, struct ib_sge *sge, int acc)
+{
+ struct qib_mregion *mr;
+ unsigned n, m;
+ size_t off;
+
+ /*
+ * We use LKEY == zero for kernel virtual addresses
+ * (see qib_get_dma_mr and qib_dma.c).
+ */
+ rcu_read_lock();
+ if (sge->lkey == 0) {
+ struct qib_ibdev *dev = to_idev(pd->ibpd.device);
+
+ if (pd->user)
+ goto bail;
+ mr = rcu_dereference(dev->dma_mr);
+ if (!mr)
+ goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
+
+ isge->mr = mr;
+ isge->vaddr = (void *) sge->addr;
+ isge->length = sge->length;
+ isge->sge_length = sge->length;
+ isge->m = 0;
+ isge->n = 0;
+ goto ok;
+ }
+ mr = rcu_dereference(
+ rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+ goto bail;
+
+ off = sge->addr - mr->user_base;
+ if (unlikely(sge->addr < mr->user_base ||
+ off + sge->length > mr->length ||
+ (mr->access_flags & acc) != acc))
+ goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
+
+ off += mr->offset;
+ if (mr->page_shift) {
+ /*
+ page sizes are uniform power of 2 so no loop is necessary
+ entries_spanned_by_off is the number of times the loop below
+ would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off/QIB_SEGSZ;
+ n = entries_spanned_by_off%QIB_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ }
+ isge->mr = mr;
+ isge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ isge->length = mr->map[m]->segs[n].length - off;
+ isge->sge_length = sge->length;
+ isge->m = m;
+ isge->n = n;
+ok:
+ return 1;
+bail:
+ rcu_read_unlock();
+ return 0;
+}
+
+/**
+ * qib_rkey_ok - check the IB virtual address, length, and RKEY
+ * @qp: qp for validation
+ * @sge: SGE state
+ * @len: length of data
+ * @vaddr: virtual address to place data
+ * @rkey: rkey to check
+ * @acc: access flags
+ *
+ * Return 1 if successful, otherwise 0.
+ *
+ * increments the reference count upon success
+ */
+int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+ u32 len, u64 vaddr, u32 rkey, int acc)
+{
+ struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+ struct qib_mregion *mr;
+ unsigned n, m;
+ size_t off;
+
+ /*
+ * We use RKEY == zero for kernel virtual addresses
+ * (see qib_get_dma_mr and qib_dma.c).
+ */
+ rcu_read_lock();
+ if (rkey == 0) {
+ struct qib_pd *pd = to_ipd(qp->ibqp.pd);
+ struct qib_ibdev *dev = to_idev(pd->ibpd.device);
+
+ if (pd->user)
+ goto bail;
+ mr = rcu_dereference(dev->dma_mr);
+ if (!mr)
+ goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
+
+ sge->mr = mr;
+ sge->vaddr = (void *) vaddr;
+ sge->length = len;
+ sge->sge_length = len;
+ sge->m = 0;
+ sge->n = 0;
+ goto ok;
+ }
+
+ mr = rcu_dereference(
+ rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
+ if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+ goto bail;
+
+ off = vaddr - mr->iova;
+ if (unlikely(vaddr < mr->iova || off + len > mr->length ||
+ (mr->access_flags & acc) == 0))
+ goto bail;
+ if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
+ goto bail;
+ rcu_read_unlock();
+
+ off += mr->offset;
+ if (mr->page_shift) {
+ /*
+ page sizes are uniform power of 2 so no loop is necessary
+ entries_spanned_by_off is the number of times the loop below
+ would have executed.
+ */
+ size_t entries_spanned_by_off;
+
+ entries_spanned_by_off = off >> mr->page_shift;
+ off -= (entries_spanned_by_off << mr->page_shift);
+ m = entries_spanned_by_off/QIB_SEGSZ;
+ n = entries_spanned_by_off%QIB_SEGSZ;
+ } else {
+ m = 0;
+ n = 0;
+ while (off >= mr->map[m]->segs[n].length) {
+ off -= mr->map[m]->segs[n].length;
+ n++;
+ if (n >= QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ }
+ sge->mr = mr;
+ sge->vaddr = mr->map[m]->segs[n].vaddr + off;
+ sge->length = mr->map[m]->segs[n].length - off;
+ sge->sge_length = len;
+ sge->m = m;
+ sge->n = n;
+ok:
+ return 1;
+bail:
+ rcu_read_unlock();
+ return 0;
+}
+
+/*
+ * Initialize the memory region specified by the work reqeust.
+ */
+int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
+{
+ struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+ struct qib_pd *pd = to_ipd(qp->ibqp.pd);
+ struct qib_mregion *mr;
+ u32 rkey = wr->wr.fast_reg.rkey;
+ unsigned i, n, m;
+ int ret = -EINVAL;
+ unsigned long flags;
+ u64 *page_list;
+ size_t ps;
+
+ spin_lock_irqsave(&rkt->lock, flags);
+ if (pd->user || rkey == 0)
+ goto bail;
+
+ mr = rcu_dereference_protected(
+ rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))],
+ lockdep_is_held(&rkt->lock));
+ if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
+ goto bail;
+
+ if (wr->wr.fast_reg.page_list_len > mr->max_segs)
+ goto bail;
+
+ ps = 1UL << wr->wr.fast_reg.page_shift;
+ if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
+ goto bail;
+
+ mr->user_base = wr->wr.fast_reg.iova_start;
+ mr->iova = wr->wr.fast_reg.iova_start;
+ mr->lkey = rkey;
+ mr->length = wr->wr.fast_reg.length;
+ mr->access_flags = wr->wr.fast_reg.access_flags;
+ page_list = wr->wr.fast_reg.page_list->page_list;
+ m = 0;
+ n = 0;
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+ mr->map[m]->segs[n].vaddr = (void *) page_list[i];
+ mr->map[m]->segs[n].length = ps;
+ if (++n == QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+
+ ret = 0;
+bail:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
new file mode 100644
index 00000000000..22c720e5740
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -0,0 +1,2533 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_smi.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+static int reply(struct ib_smp *smp)
+{
+ /*
+ * The verbs framework will handle the directed/LID route
+ * packet changes.
+ */
+ smp->method = IB_MGMT_METHOD_GET_RESP;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ smp->status |= IB_SMP_DIRECTION;
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int reply_failure(struct ib_smp *smp)
+{
+ /*
+ * The verbs framework will handle the directed/LID route
+ * packet changes.
+ */
+ smp->method = IB_MGMT_METHOD_GET_RESP;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ smp->status |= IB_SMP_DIRECTION;
+ return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
+}
+
+static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
+{
+ struct ib_mad_send_buf *send_buf;
+ struct ib_mad_agent *agent;
+ struct ib_smp *smp;
+ int ret;
+ unsigned long flags;
+ unsigned long timeout;
+
+ agent = ibp->send_agent;
+ if (!agent)
+ return;
+
+ /* o14-3.2.1 */
+ if (!(ppd_from_ibp(ibp)->lflags & QIBL_LINKACTIVE))
+ return;
+
+ /* o14-2 */
+ if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+ return;
+
+ send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR,
+ IB_MGMT_MAD_DATA, GFP_ATOMIC);
+ if (IS_ERR(send_buf))
+ return;
+
+ smp = send_buf->mad;
+ smp->base_version = IB_MGMT_BASE_VERSION;
+ smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
+ smp->class_version = 1;
+ smp->method = IB_MGMT_METHOD_TRAP;
+ ibp->tid++;
+ smp->tid = cpu_to_be64(ibp->tid);
+ smp->attr_id = IB_SMP_ATTR_NOTICE;
+ /* o14-1: smp->mkey = 0; */
+ memcpy(smp->data, data, len);
+
+ spin_lock_irqsave(&ibp->lock, flags);
+ if (!ibp->sm_ah) {
+ if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+ struct ib_ah *ah;
+
+ ah = qib_create_qp0_ah(ibp, ibp->sm_lid);
+ if (IS_ERR(ah))
+ ret = PTR_ERR(ah);
+ else {
+ send_buf->ah = ah;
+ ibp->sm_ah = to_iah(ah);
+ ret = 0;
+ }
+ } else
+ ret = -EINVAL;
+ } else {
+ send_buf->ah = &ibp->sm_ah->ibah;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&ibp->lock, flags);
+
+ if (!ret)
+ ret = ib_post_send_mad(send_buf, NULL);
+ if (!ret) {
+ /* 4.096 usec. */
+ timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
+ ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+ } else {
+ ib_free_send_mad(send_buf);
+ ibp->trap_timeout = 0;
+ }
+}
+
+/*
+ * Send a bad [PQ]_Key trap (ch. 14.3.8).
+ */
+void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
+ u32 qp1, u32 qp2, __be16 lid1, __be16 lid2)
+{
+ struct ib_mad_notice_attr data;
+
+ if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
+ ibp->pkey_violations++;
+ else
+ ibp->qkey_violations++;
+ ibp->n_pkt_drops++;
+
+ /* Send violation trap */
+ data.generic_type = IB_NOTICE_TYPE_SECURITY;
+ data.prod_type_msb = 0;
+ data.prod_type_lsb = IB_NOTICE_PROD_CA;
+ data.trap_num = trap_num;
+ data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid);
+ data.toggle_count = 0;
+ memset(&data.details, 0, sizeof data.details);
+ data.details.ntc_257_258.lid1 = lid1;
+ data.details.ntc_257_258.lid2 = lid2;
+ data.details.ntc_257_258.key = cpu_to_be32(key);
+ data.details.ntc_257_258.sl_qp1 = cpu_to_be32((sl << 28) | qp1);
+ data.details.ntc_257_258.qp2 = cpu_to_be32(qp2);
+
+ qib_send_trap(ibp, &data, sizeof data);
+}
+
+/*
+ * Send a bad M_Key trap (ch. 14.3.9).
+ */
+static void qib_bad_mkey(struct qib_ibport *ibp, struct ib_smp *smp)
+{
+ struct ib_mad_notice_attr data;
+
+ /* Send violation trap */
+ data.generic_type = IB_NOTICE_TYPE_SECURITY;
+ data.prod_type_msb = 0;
+ data.prod_type_lsb = IB_NOTICE_PROD_CA;
+ data.trap_num = IB_NOTICE_TRAP_BAD_MKEY;
+ data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid);
+ data.toggle_count = 0;
+ memset(&data.details, 0, sizeof data.details);
+ data.details.ntc_256.lid = data.issuer_lid;
+ data.details.ntc_256.method = smp->method;
+ data.details.ntc_256.attr_id = smp->attr_id;
+ data.details.ntc_256.attr_mod = smp->attr_mod;
+ data.details.ntc_256.mkey = smp->mkey;
+ if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ u8 hop_cnt;
+
+ data.details.ntc_256.dr_slid = smp->dr_slid;
+ data.details.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
+ hop_cnt = smp->hop_cnt;
+ if (hop_cnt > ARRAY_SIZE(data.details.ntc_256.dr_rtn_path)) {
+ data.details.ntc_256.dr_trunc_hop |=
+ IB_NOTICE_TRAP_DR_TRUNC;
+ hop_cnt = ARRAY_SIZE(data.details.ntc_256.dr_rtn_path);
+ }
+ data.details.ntc_256.dr_trunc_hop |= hop_cnt;
+ memcpy(data.details.ntc_256.dr_rtn_path, smp->return_path,
+ hop_cnt);
+ }
+
+ qib_send_trap(ibp, &data, sizeof data);
+}
+
+/*
+ * Send a Port Capability Mask Changed trap (ch. 14.3.11).
+ */
+void qib_cap_mask_chg(struct qib_ibport *ibp)
+{
+ struct ib_mad_notice_attr data;
+
+ data.generic_type = IB_NOTICE_TYPE_INFO;
+ data.prod_type_msb = 0;
+ data.prod_type_lsb = IB_NOTICE_PROD_CA;
+ data.trap_num = IB_NOTICE_TRAP_CAP_MASK_CHG;
+ data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid);
+ data.toggle_count = 0;
+ memset(&data.details, 0, sizeof data.details);
+ data.details.ntc_144.lid = data.issuer_lid;
+ data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
+
+ qib_send_trap(ibp, &data, sizeof data);
+}
+
+/*
+ * Send a System Image GUID Changed trap (ch. 14.3.12).
+ */
+void qib_sys_guid_chg(struct qib_ibport *ibp)
+{
+ struct ib_mad_notice_attr data;
+
+ data.generic_type = IB_NOTICE_TYPE_INFO;
+ data.prod_type_msb = 0;
+ data.prod_type_lsb = IB_NOTICE_PROD_CA;
+ data.trap_num = IB_NOTICE_TRAP_SYS_GUID_CHG;
+ data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid);
+ data.toggle_count = 0;
+ memset(&data.details, 0, sizeof data.details);
+ data.details.ntc_145.lid = data.issuer_lid;
+ data.details.ntc_145.new_sys_guid = ib_qib_sys_image_guid;
+
+ qib_send_trap(ibp, &data, sizeof data);
+}
+
+/*
+ * Send a Node Description Changed trap (ch. 14.3.13).
+ */
+void qib_node_desc_chg(struct qib_ibport *ibp)
+{
+ struct ib_mad_notice_attr data;
+
+ data.generic_type = IB_NOTICE_TYPE_INFO;
+ data.prod_type_msb = 0;
+ data.prod_type_lsb = IB_NOTICE_PROD_CA;
+ data.trap_num = IB_NOTICE_TRAP_CAP_MASK_CHG;
+ data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid);
+ data.toggle_count = 0;
+ memset(&data.details, 0, sizeof data.details);
+ data.details.ntc_144.lid = data.issuer_lid;
+ data.details.ntc_144.local_changes = 1;
+ data.details.ntc_144.change_flags = IB_NOTICE_TRAP_NODE_DESC_CHG;
+
+ qib_send_trap(ibp, &data, sizeof data);
+}
+
+static int subn_get_nodedescription(struct ib_smp *smp,
+ struct ib_device *ibdev)
+{
+ if (smp->attr_mod)
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
+
+ return reply(smp);
+}
+
+static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ u32 vendor, majrev, minrev;
+ unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
+
+ /* GUID 0 is illegal */
+ if (smp->attr_mod || pidx >= dd->num_pports ||
+ dd->pport[pidx].guid == 0)
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ nip->port_guid = dd->pport[pidx].guid;
+
+ nip->base_version = 1;
+ nip->class_version = 1;
+ nip->node_type = 1; /* channel adapter */
+ nip->num_ports = ibdev->phys_port_cnt;
+ /* This is already in network order */
+ nip->sys_guid = ib_qib_sys_image_guid;
+ nip->node_guid = dd->pport->guid; /* Use first-port GUID as node */
+ nip->partition_cap = cpu_to_be16(qib_get_npkeys(dd));
+ nip->device_id = cpu_to_be16(dd->deviceid);
+ majrev = dd->majrev;
+ minrev = dd->minrev;
+ nip->revision = cpu_to_be32((majrev << 16) | minrev);
+ nip->local_port_num = port;
+ vendor = dd->vendorid;
+ nip->vendor_id[0] = QIB_SRC_OUI_1;
+ nip->vendor_id[1] = QIB_SRC_OUI_2;
+ nip->vendor_id[2] = QIB_SRC_OUI_3;
+
+ return reply(smp);
+}
+
+static int subn_get_guidinfo(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
+ __be64 *p = (__be64 *) smp->data;
+ unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
+
+ /* 32 blocks of 8 64-bit GUIDs per block */
+
+ memset(smp->data, 0, sizeof(smp->data));
+
+ if (startgx == 0 && pidx < dd->num_pports) {
+ struct qib_pportdata *ppd = dd->pport + pidx;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ __be64 g = ppd->guid;
+ unsigned i;
+
+ /* GUID 0 is illegal */
+ if (g == 0)
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else {
+ /* The first is a copy of the read-only HW GUID. */
+ p[0] = g;
+ for (i = 1; i < QIB_GUIDS_PER_PORT; i++)
+ p[i] = ibp->guids[i - 1];
+ }
+ } else
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return reply(smp);
+}
+
+static void set_link_width_enabled(struct qib_pportdata *ppd, u32 w)
+{
+ (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LWID_ENB, w);
+}
+
+static void set_link_speed_enabled(struct qib_pportdata *ppd, u32 s)
+{
+ (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_SPD_ENB, s);
+}
+
+static int get_overrunthreshold(struct qib_pportdata *ppd)
+{
+ return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OVERRUN_THRESH);
+}
+
+/**
+ * set_overrunthreshold - set the overrun threshold
+ * @ppd: the physical port data
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_overrunthreshold(struct qib_pportdata *ppd, unsigned n)
+{
+ (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OVERRUN_THRESH,
+ (u32)n);
+ return 0;
+}
+
+static int get_phyerrthreshold(struct qib_pportdata *ppd)
+{
+ return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PHYERR_THRESH);
+}
+
+/**
+ * set_phyerrthreshold - set the physical error threshold
+ * @ppd: the physical port data
+ * @n: the new threshold
+ *
+ * Note that this will only take effect when the link state changes.
+ */
+static int set_phyerrthreshold(struct qib_pportdata *ppd, unsigned n)
+{
+ (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PHYERR_THRESH,
+ (u32)n);
+ return 0;
+}
+
+/**
+ * get_linkdowndefaultstate - get the default linkdown state
+ * @ppd: the physical port data
+ *
+ * Returns zero if the default is POLL, 1 if the default is SLEEP.
+ */
+static int get_linkdowndefaultstate(struct qib_pportdata *ppd)
+{
+ return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT) ==
+ IB_LINKINITCMD_SLEEP;
+}
+
+static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
+{
+ int valid_mkey = 0;
+ int ret = 0;
+
+ /* Is the mkey in the process of expiring? */
+ if (ibp->mkey_lease_timeout &&
+ time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+ /* Clear timeout and mkey protection field. */
+ ibp->mkey_lease_timeout = 0;
+ ibp->mkeyprot = 0;
+ }
+
+ if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 ||
+ ibp->mkey == smp->mkey)
+ valid_mkey = 1;
+
+ /* Unset lease timeout on any valid Get/Set/TrapRepress */
+ if (valid_mkey && ibp->mkey_lease_timeout &&
+ (smp->method == IB_MGMT_METHOD_GET ||
+ smp->method == IB_MGMT_METHOD_SET ||
+ smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
+ ibp->mkey_lease_timeout = 0;
+
+ if (!valid_mkey) {
+ switch (smp->method) {
+ case IB_MGMT_METHOD_GET:
+ /* Bad mkey not a violation below level 2 */
+ if (ibp->mkeyprot < 2)
+ break;
+ case IB_MGMT_METHOD_SET:
+ case IB_MGMT_METHOD_TRAP_REPRESS:
+ if (ibp->mkey_violations != 0xFFFF)
+ ++ibp->mkey_violations;
+ if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
+ ibp->mkey_lease_timeout = jiffies +
+ ibp->mkey_lease_period * HZ;
+ /* Generate a trap notice. */
+ qib_bad_mkey(ibp, smp);
+ ret = 1;
+ }
+ }
+
+ return ret;
+}
+
+static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ struct qib_devdata *dd;
+ struct qib_pportdata *ppd;
+ struct qib_ibport *ibp;
+ struct ib_port_info *pip = (struct ib_port_info *)smp->data;
+ u8 mtu;
+ int ret;
+ u32 state;
+ u32 port_num = be32_to_cpu(smp->attr_mod);
+
+ if (port_num == 0)
+ port_num = port;
+ else {
+ if (port_num > ibdev->phys_port_cnt) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ ret = reply(smp);
+ goto bail;
+ }
+ if (port_num != port) {
+ ibp = to_iport(ibdev, port_num);
+ ret = check_mkey(ibp, smp, 0);
+ if (ret) {
+ ret = IB_MAD_RESULT_FAILURE;
+ goto bail;
+ }
+ }
+ }
+
+ dd = dd_from_ibdev(ibdev);
+ /* IB numbers ports from 1, hdw from 0 */
+ ppd = dd->pport + (port_num - 1);
+ ibp = &ppd->ibport_data;
+
+ /* Clear all fields. Only set the non-zero fields. */
+ memset(smp->data, 0, sizeof(smp->data));
+
+ /* Only return the mkey if the protection field allows it. */
+ if (!(smp->method == IB_MGMT_METHOD_GET &&
+ ibp->mkey != smp->mkey &&
+ ibp->mkeyprot == 1))
+ pip->mkey = ibp->mkey;
+ pip->gid_prefix = ibp->gid_prefix;
+ pip->lid = cpu_to_be16(ppd->lid);
+ pip->sm_lid = cpu_to_be16(ibp->sm_lid);
+ pip->cap_mask = cpu_to_be32(ibp->port_cap_flags);
+ /* pip->diag_code; */
+ pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+ pip->local_port_num = port;
+ pip->link_width_enabled = ppd->link_width_enabled;
+ pip->link_width_supported = ppd->link_width_supported;
+ pip->link_width_active = ppd->link_width_active;
+ state = dd->f_iblink_state(ppd->lastibcstat);
+ pip->linkspeed_portstate = ppd->link_speed_supported << 4 | state;
+
+ pip->portphysstate_linkdown =
+ (dd->f_ibphys_portstate(ppd->lastibcstat) << 4) |
+ (get_linkdowndefaultstate(ppd) ? 1 : 2);
+ pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+ pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) |
+ ppd->link_speed_enabled;
+ switch (ppd->ibmtu) {
+ default: /* something is wrong; fall through */
+ case 4096:
+ mtu = IB_MTU_4096;
+ break;
+ case 2048:
+ mtu = IB_MTU_2048;
+ break;
+ case 1024:
+ mtu = IB_MTU_1024;
+ break;
+ case 512:
+ mtu = IB_MTU_512;
+ break;
+ case 256:
+ mtu = IB_MTU_256;
+ break;
+ }
+ pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl;
+ pip->vlcap_inittype = ppd->vls_supported << 4; /* InitType = 0 */
+ pip->vl_high_limit = ibp->vl_high_limit;
+ pip->vl_arb_high_cap =
+ dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP);
+ pip->vl_arb_low_cap =
+ dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_LOW_CAP);
+ /* InitTypeReply = 0 */
+ pip->inittypereply_mtucap = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
+ /* HCAs ignore VLStallCount and HOQLife */
+ /* pip->vlstallcnt_hoqlife; */
+ pip->operationalvl_pei_peo_fpi_fpo =
+ dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4;
+ pip->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+ /* P_KeyViolations are counted by hardware. */
+ pip->pkey_violations = cpu_to_be16(ibp->pkey_violations);
+ pip->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+ /* Only the hardware GUID is supported for now */
+ pip->guid_cap = QIB_GUIDS_PER_PORT;
+ pip->clientrereg_resv_subnetto = ibp->subnet_timeout;
+ /* 32.768 usec. response time (guessing) */
+ pip->resv_resptimevalue = 3;
+ pip->localphyerrors_overrunerrors =
+ (get_phyerrthreshold(ppd) << 4) |
+ get_overrunthreshold(ppd);
+ /* pip->max_credit_hint; */
+ if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+ u32 v;
+
+ v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY);
+ pip->link_roundtrip_latency[0] = v >> 16;
+ pip->link_roundtrip_latency[1] = v >> 8;
+ pip->link_roundtrip_latency[2] = v;
+ }
+
+ ret = reply(smp);
+
+bail:
+ return ret;
+}
+
+/**
+ * get_pkeys - return the PKEY table
+ * @dd: the qlogic_ib device
+ * @port: the IB port number
+ * @pkeys: the pkey table is placed here
+ */
+static int get_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
+{
+ struct qib_pportdata *ppd = dd->pport + port - 1;
+ /*
+ * always a kernel context, no locking needed.
+ * If we get here with ppd setup, no need to check
+ * that pd is valid.
+ */
+ struct qib_ctxtdata *rcd = dd->rcd[ppd->hw_pidx];
+
+ memcpy(pkeys, rcd->pkeys, sizeof(rcd->pkeys));
+
+ return 0;
+}
+
+static int subn_get_pkeytable(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
+ u16 *p = (u16 *) smp->data;
+ __be16 *q = (__be16 *) smp->data;
+
+ /* 64 blocks of 32 16-bit P_Key entries */
+
+ memset(smp->data, 0, sizeof(smp->data));
+ if (startpx == 0) {
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ unsigned i, n = qib_get_npkeys(dd);
+
+ get_pkeys(dd, port, p);
+
+ for (i = 0; i < n; i++)
+ q[i] = cpu_to_be16(p[i]);
+ } else
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return reply(smp);
+}
+
+static int subn_set_guidinfo(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ u32 startgx = 8 * be32_to_cpu(smp->attr_mod);
+ __be64 *p = (__be64 *) smp->data;
+ unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
+
+ /* 32 blocks of 8 64-bit GUIDs per block */
+
+ if (startgx == 0 && pidx < dd->num_pports) {
+ struct qib_pportdata *ppd = dd->pport + pidx;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ unsigned i;
+
+ /* The first entry is read-only. */
+ for (i = 1; i < QIB_GUIDS_PER_PORT; i++)
+ ibp->guids[i - 1] = p[i];
+ } else
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ /* The only GUID we support is the first read-only entry. */
+ return subn_get_guidinfo(smp, ibdev, port);
+}
+
+/**
+ * subn_set_portinfo - set port information
+ * @smp: the incoming SM packet
+ * @ibdev: the infiniband device
+ * @port: the port on the device
+ *
+ * Set Portinfo (see ch. 14.2.5.6).
+ */
+static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ struct ib_port_info *pip = (struct ib_port_info *)smp->data;
+ struct ib_event event;
+ struct qib_devdata *dd;
+ struct qib_pportdata *ppd;
+ struct qib_ibport *ibp;
+ u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80);
+ unsigned long flags;
+ u16 lid, smlid;
+ u8 lwe;
+ u8 lse;
+ u8 state;
+ u8 vls;
+ u8 msl;
+ u16 lstate;
+ int ret, ore, mtu;
+ u32 port_num = be32_to_cpu(smp->attr_mod);
+
+ if (port_num == 0)
+ port_num = port;
+ else {
+ if (port_num > ibdev->phys_port_cnt)
+ goto err;
+ /* Port attributes can only be set on the receiving port */
+ if (port_num != port)
+ goto get_only;
+ }
+
+ dd = dd_from_ibdev(ibdev);
+ /* IB numbers ports from 1, hdw from 0 */
+ ppd = dd->pport + (port_num - 1);
+ ibp = &ppd->ibport_data;
+ event.device = ibdev;
+ event.element.port_num = port;
+
+ ibp->mkey = pip->mkey;
+ ibp->gid_prefix = pip->gid_prefix;
+ ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
+
+ lid = be16_to_cpu(pip->lid);
+ /* Must be a valid unicast LID address. */
+ if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
+ if (ppd->lid != lid)
+ qib_set_uevent_bits(ppd, _QIB_EVENT_LID_CHANGE_BIT);
+ if (ppd->lmc != (pip->mkeyprot_resv_lmc & 7))
+ qib_set_uevent_bits(ppd, _QIB_EVENT_LMC_CHANGE_BIT);
+ qib_set_lid(ppd, lid, pip->mkeyprot_resv_lmc & 7);
+ event.event = IB_EVENT_LID_CHANGE;
+ ib_dispatch_event(&event);
+ }
+
+ smlid = be16_to_cpu(pip->sm_lid);
+ msl = pip->neighbormtu_mastersmsl & 0xF;
+ /* Must be a valid unicast LID address. */
+ if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+ spin_lock_irqsave(&ibp->lock, flags);
+ if (ibp->sm_ah) {
+ if (smlid != ibp->sm_lid)
+ ibp->sm_ah->attr.dlid = smlid;
+ if (msl != ibp->sm_sl)
+ ibp->sm_ah->attr.sl = msl;
+ }
+ spin_unlock_irqrestore(&ibp->lock, flags);
+ if (smlid != ibp->sm_lid)
+ ibp->sm_lid = smlid;
+ if (msl != ibp->sm_sl)
+ ibp->sm_sl = msl;
+ event.event = IB_EVENT_SM_CHANGE;
+ ib_dispatch_event(&event);
+ }
+
+ /* Allow 1x or 4x to be set (see 14.2.6.6). */
+ lwe = pip->link_width_enabled;
+ if (lwe) {
+ if (lwe == 0xFF)
+ set_link_width_enabled(ppd, ppd->link_width_supported);
+ else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (lwe != ppd->link_width_enabled)
+ set_link_width_enabled(ppd, lwe);
+ }
+
+ lse = pip->linkspeedactive_enabled & 0xF;
+ if (lse) {
+ /*
+ * The IB 1.2 spec. only allows link speed values
+ * 1, 3, 5, 7, 15. 1.2.1 extended to allow specific
+ * speeds.
+ */
+ if (lse == 15)
+ set_link_speed_enabled(ppd,
+ ppd->link_speed_supported);
+ else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else if (lse != ppd->link_speed_enabled)
+ set_link_speed_enabled(ppd, lse);
+ }
+
+ /* Set link down default state. */
+ switch (pip->portphysstate_linkdown & 0xF) {
+ case 0: /* NOP */
+ break;
+ case 1: /* SLEEP */
+ (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT,
+ IB_LINKINITCMD_SLEEP);
+ break;
+ case 2: /* POLL */
+ (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT,
+ IB_LINKINITCMD_POLL);
+ break;
+ default:
+ smp->status |= IB_SMP_INVALID_FIELD;
+ }
+
+ ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
+ ibp->vl_high_limit = pip->vl_high_limit;
+ (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT,
+ ibp->vl_high_limit);
+
+ mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
+ if (mtu == -1)
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ qib_set_mtu(ppd, mtu);
+
+ /* Set operational VLs */
+ vls = (pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF;
+ if (vls) {
+ if (vls > ppd->vls_supported)
+ smp->status |= IB_SMP_INVALID_FIELD;
+ else
+ (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls);
+ }
+
+ if (pip->mkey_violations == 0)
+ ibp->mkey_violations = 0;
+
+ if (pip->pkey_violations == 0)
+ ibp->pkey_violations = 0;
+
+ if (pip->qkey_violations == 0)
+ ibp->qkey_violations = 0;
+
+ ore = pip->localphyerrors_overrunerrors;
+ if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ if (set_overrunthreshold(ppd, (ore & 0xF)))
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
+
+ /*
+ * Do the port state change now that the other link parameters
+ * have been set.
+ * Changing the port physical state only makes sense if the link
+ * is down or is being set to down.
+ */
+ state = pip->linkspeed_portstate & 0xF;
+ lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
+ if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ /*
+ * Only state changes of DOWN, ARM, and ACTIVE are valid
+ * and must be in the correct state to take effect (see 7.2.6).
+ */
+ switch (state) {
+ case IB_PORT_NOP:
+ if (lstate == 0)
+ break;
+ /* FALLTHROUGH */
+ case IB_PORT_DOWN:
+ if (lstate == 0)
+ lstate = QIB_IB_LINKDOWN_ONLY;
+ else if (lstate == 1)
+ lstate = QIB_IB_LINKDOWN_SLEEP;
+ else if (lstate == 2)
+ lstate = QIB_IB_LINKDOWN;
+ else if (lstate == 3)
+ lstate = QIB_IB_LINKDOWN_DISABLE;
+ else {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ break;
+ }
+ spin_lock_irqsave(&ppd->lflags_lock, flags);
+ ppd->lflags &= ~QIBL_LINKV;
+ spin_unlock_irqrestore(&ppd->lflags_lock, flags);
+ qib_set_linkstate(ppd, lstate);
+ /*
+ * Don't send a reply if the response would be sent
+ * through the disabled port.
+ */
+ if (lstate == QIB_IB_LINKDOWN_DISABLE && smp->hop_cnt) {
+ ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ goto done;
+ }
+ qib_wait_linkstate(ppd, QIBL_LINKV, 10);
+ break;
+ case IB_PORT_ARMED:
+ qib_set_linkstate(ppd, QIB_IB_LINKARM);
+ break;
+ case IB_PORT_ACTIVE:
+ qib_set_linkstate(ppd, QIB_IB_LINKACTIVE);
+ break;
+ default:
+ smp->status |= IB_SMP_INVALID_FIELD;
+ }
+
+ if (clientrereg) {
+ event.event = IB_EVENT_CLIENT_REREGISTER;
+ ib_dispatch_event(&event);
+ }
+
+ ret = subn_get_portinfo(smp, ibdev, port);
+
+ /* restore re-reg bit per o14-12.2.1 */
+ pip->clientrereg_resv_subnetto |= clientrereg;
+
+ goto get_only;
+
+err:
+ smp->status |= IB_SMP_INVALID_FIELD;
+get_only:
+ ret = subn_get_portinfo(smp, ibdev, port);
+done:
+ return ret;
+}
+
+/**
+ * rm_pkey - decrecment the reference count for the given PKEY
+ * @dd: the qlogic_ib device
+ * @key: the PKEY index
+ *
+ * Return true if this was the last reference and the hardware table entry
+ * needs to be changed.
+ */
+static int rm_pkey(struct qib_pportdata *ppd, u16 key)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
+ if (ppd->pkeys[i] != key)
+ continue;
+ if (atomic_dec_and_test(&ppd->pkeyrefs[i])) {
+ ppd->pkeys[i] = 0;
+ ret = 1;
+ goto bail;
+ }
+ break;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * add_pkey - add the given PKEY to the hardware table
+ * @dd: the qlogic_ib device
+ * @key: the PKEY
+ *
+ * Return an error code if unable to add the entry, zero if no change,
+ * or 1 if the hardware PKEY register needs to be updated.
+ */
+static int add_pkey(struct qib_pportdata *ppd, u16 key)
+{
+ int i;
+ u16 lkey = key & 0x7FFF;
+ int any = 0;
+ int ret;
+
+ if (lkey == 0x7FFF) {
+ ret = 0;
+ goto bail;
+ }
+
+ /* Look for an empty slot or a matching PKEY. */
+ for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
+ if (!ppd->pkeys[i]) {
+ any++;
+ continue;
+ }
+ /* If it matches exactly, try to increment the ref count */
+ if (ppd->pkeys[i] == key) {
+ if (atomic_inc_return(&ppd->pkeyrefs[i]) > 1) {
+ ret = 0;
+ goto bail;
+ }
+ /* Lost the race. Look for an empty slot below. */
+ atomic_dec(&ppd->pkeyrefs[i]);
+ any++;
+ }
+ /*
+ * It makes no sense to have both the limited and unlimited
+ * PKEY set at the same time since the unlimited one will
+ * disable the limited one.
+ */
+ if ((ppd->pkeys[i] & 0x7FFF) == lkey) {
+ ret = -EEXIST;
+ goto bail;
+ }
+ }
+ if (!any) {
+ ret = -EBUSY;
+ goto bail;
+ }
+ for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
+ if (!ppd->pkeys[i] &&
+ atomic_inc_return(&ppd->pkeyrefs[i]) == 1) {
+ /* for qibstats, etc. */
+ ppd->pkeys[i] = key;
+ ret = 1;
+ goto bail;
+ }
+ }
+ ret = -EBUSY;
+
+bail:
+ return ret;
+}
+
+/**
+ * set_pkeys - set the PKEY table for ctxt 0
+ * @dd: the qlogic_ib device
+ * @port: the IB port number
+ * @pkeys: the PKEY table
+ */
+static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
+{
+ struct qib_pportdata *ppd;
+ struct qib_ctxtdata *rcd;
+ int i;
+ int changed = 0;
+
+ /*
+ * IB port one/two always maps to context zero/one,
+ * always a kernel context, no locking needed
+ * If we get here with ppd setup, no need to check
+ * that rcd is valid.
+ */
+ ppd = dd->pport + (port - 1);
+ rcd = dd->rcd[ppd->hw_pidx];
+
+ for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) {
+ u16 key = pkeys[i];
+ u16 okey = rcd->pkeys[i];
+
+ if (key == okey)
+ continue;
+ /*
+ * The value of this PKEY table entry is changing.
+ * Remove the old entry in the hardware's array of PKEYs.
+ */
+ if (okey & 0x7FFF)
+ changed |= rm_pkey(ppd, okey);
+ if (key & 0x7FFF) {
+ int ret = add_pkey(ppd, key);
+
+ if (ret < 0)
+ key = 0;
+ else
+ changed |= ret;
+ }
+ rcd->pkeys[i] = key;
+ }
+ if (changed) {
+ struct ib_event event;
+
+ (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
+
+ event.event = IB_EVENT_PKEY_CHANGE;
+ event.device = &dd->verbs_dev.ibdev;
+ event.element.port_num = port;
+ ib_dispatch_event(&event);
+ }
+ return 0;
+}
+
+static int subn_set_pkeytable(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
+ __be16 *p = (__be16 *) smp->data;
+ u16 *q = (u16 *) smp->data;
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ unsigned i, n = qib_get_npkeys(dd);
+
+ for (i = 0; i < n; i++)
+ q[i] = be16_to_cpu(p[i]);
+
+ if (startpx != 0 || set_pkeys(dd, port, q) != 0)
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return subn_get_pkeytable(smp, ibdev, port);
+}
+
+static int subn_get_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ u8 *p = (u8 *) smp->data;
+ unsigned i;
+
+ memset(smp->data, 0, sizeof(smp->data));
+
+ if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP))
+ smp->status |= IB_SMP_UNSUP_METHOD;
+ else
+ for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2)
+ *p++ = (ibp->sl_to_vl[i] << 4) | ibp->sl_to_vl[i + 1];
+
+ return reply(smp);
+}
+
+static int subn_set_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ u8 *p = (u8 *) smp->data;
+ unsigned i;
+
+ if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) {
+ smp->status |= IB_SMP_UNSUP_METHOD;
+ return reply(smp);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2, p++) {
+ ibp->sl_to_vl[i] = *p >> 4;
+ ibp->sl_to_vl[i + 1] = *p & 0xF;
+ }
+ qib_set_uevent_bits(ppd_from_ibp(to_iport(ibdev, port)),
+ _QIB_EVENT_SL2VL_CHANGE_BIT);
+
+ return subn_get_sl_to_vl(smp, ibdev, port);
+}
+
+static int subn_get_vl_arb(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ unsigned which = be32_to_cpu(smp->attr_mod) >> 16;
+ struct qib_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
+
+ memset(smp->data, 0, sizeof(smp->data));
+
+ if (ppd->vls_supported == IB_VL_VL0)
+ smp->status |= IB_SMP_UNSUP_METHOD;
+ else if (which == IB_VLARB_LOWPRI_0_31)
+ (void) ppd->dd->f_get_ib_table(ppd, QIB_IB_TBL_VL_LOW_ARB,
+ smp->data);
+ else if (which == IB_VLARB_HIGHPRI_0_31)
+ (void) ppd->dd->f_get_ib_table(ppd, QIB_IB_TBL_VL_HIGH_ARB,
+ smp->data);
+ else
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return reply(smp);
+}
+
+static int subn_set_vl_arb(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ unsigned which = be32_to_cpu(smp->attr_mod) >> 16;
+ struct qib_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
+
+ if (ppd->vls_supported == IB_VL_VL0)
+ smp->status |= IB_SMP_UNSUP_METHOD;
+ else if (which == IB_VLARB_LOWPRI_0_31)
+ (void) ppd->dd->f_set_ib_table(ppd, QIB_IB_TBL_VL_LOW_ARB,
+ smp->data);
+ else if (which == IB_VLARB_HIGHPRI_0_31)
+ (void) ppd->dd->f_set_ib_table(ppd, QIB_IB_TBL_VL_HIGH_ARB,
+ smp->data);
+ else
+ smp->status |= IB_SMP_INVALID_FIELD;
+
+ return subn_get_vl_arb(smp, ibdev, port);
+}
+
+static int subn_trap_repress(struct ib_smp *smp, struct ib_device *ibdev,
+ u8 port)
+{
+ /*
+ * For now, we only send the trap once so no need to process this.
+ * o13-6, o13-7,
+ * o14-3.a4 The SMA shall not send any message in response to a valid
+ * SubnTrapRepress() message.
+ */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+}
+
+static int pma_get_classportinfo(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev)
+{
+ struct ib_class_port_info *p =
+ (struct ib_class_port_info *)pmp->data;
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ if (pmp->mad_hdr.attr_mod != 0)
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+
+ /* Note that AllPortSelect is not valid */
+ p->base_version = 1;
+ p->class_version = 1;
+ p->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ /*
+ * Set the most significant bit of CM2 to indicate support for
+ * congestion statistics
+ */
+ p->reserved[0] = dd->psxmitwait_supported << 7;
+ /*
+ * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
+ */
+ p->resp_time_value = 18;
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portsamplescontrol *p =
+ (struct ib_pma_portsamplescontrol *)pmp->data;
+ struct qib_ibdev *dev = to_idev(ibdev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ unsigned long flags;
+ u8 port_select = p->port_select;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ p->port_select = port_select;
+ if (pmp->mad_hdr.attr_mod != 0 || port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+ goto bail;
+ }
+ spin_lock_irqsave(&ibp->lock, flags);
+ p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS);
+ p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
+ p->counter_width = 4; /* 32 bit counters */
+ p->counter_mask0_9 = COUNTER_MASK0_9;
+ p->sample_start = cpu_to_be32(ibp->pma_sample_start);
+ p->sample_interval = cpu_to_be32(ibp->pma_sample_interval);
+ p->tag = cpu_to_be16(ibp->pma_tag);
+ p->counter_select[0] = ibp->pma_counter_select[0];
+ p->counter_select[1] = ibp->pma_counter_select[1];
+ p->counter_select[2] = ibp->pma_counter_select[2];
+ p->counter_select[3] = ibp->pma_counter_select[3];
+ p->counter_select[4] = ibp->pma_counter_select[4];
+ spin_unlock_irqrestore(&ibp->lock, flags);
+
+bail:
+ return reply((struct ib_smp *) pmp);
+}
+
+static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portsamplescontrol *p =
+ (struct ib_pma_portsamplescontrol *)pmp->data;
+ struct qib_ibdev *dev = to_idev(ibdev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ unsigned long flags;
+ u8 status, xmit_flags;
+ int ret;
+
+ if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ spin_lock_irqsave(&ibp->lock, flags);
+
+ /* Port Sampling code owns the PS* HW counters */
+ xmit_flags = ppd->cong_stats.flags;
+ ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_SAMPLE;
+ status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
+ if (status == IB_PMA_SAMPLE_STATUS_DONE ||
+ (status == IB_PMA_SAMPLE_STATUS_RUNNING &&
+ xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) {
+ ibp->pma_sample_start = be32_to_cpu(p->sample_start);
+ ibp->pma_sample_interval = be32_to_cpu(p->sample_interval);
+ ibp->pma_tag = be16_to_cpu(p->tag);
+ ibp->pma_counter_select[0] = p->counter_select[0];
+ ibp->pma_counter_select[1] = p->counter_select[1];
+ ibp->pma_counter_select[2] = p->counter_select[2];
+ ibp->pma_counter_select[3] = p->counter_select[3];
+ ibp->pma_counter_select[4] = p->counter_select[4];
+ dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval,
+ ibp->pma_sample_start);
+ }
+ spin_unlock_irqrestore(&ibp->lock, flags);
+
+ ret = pma_get_portsamplescontrol(pmp, ibdev, port);
+
+bail:
+ return ret;
+}
+
+static u64 get_counter(struct qib_ibport *ibp, struct qib_pportdata *ppd,
+ __be16 sel)
+{
+ u64 ret;
+
+ switch (sel) {
+ case IB_PMA_PORT_XMIT_DATA:
+ ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITDATA);
+ break;
+ case IB_PMA_PORT_RCV_DATA:
+ ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSRCVDATA);
+ break;
+ case IB_PMA_PORT_XMIT_PKTS:
+ ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITPKTS);
+ break;
+ case IB_PMA_PORT_RCV_PKTS:
+ ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSRCVPKTS);
+ break;
+ case IB_PMA_PORT_XMIT_WAIT:
+ ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITWAIT);
+ break;
+ default:
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/* This function assumes that the xmit_wait lock is already held */
+static u64 xmit_wait_get_value_delta(struct qib_pportdata *ppd)
+{
+ u32 delta;
+
+ delta = get_counter(&ppd->ibport_data, ppd,
+ IB_PMA_PORT_XMIT_WAIT);
+ return ppd->cong_stats.counter + delta;
+}
+
+static void cache_hw_sample_counters(struct qib_pportdata *ppd)
+{
+ struct qib_ibport *ibp = &ppd->ibport_data;
+
+ ppd->cong_stats.counter_cache.psxmitdata =
+ get_counter(ibp, ppd, IB_PMA_PORT_XMIT_DATA);
+ ppd->cong_stats.counter_cache.psrcvdata =
+ get_counter(ibp, ppd, IB_PMA_PORT_RCV_DATA);
+ ppd->cong_stats.counter_cache.psxmitpkts =
+ get_counter(ibp, ppd, IB_PMA_PORT_XMIT_PKTS);
+ ppd->cong_stats.counter_cache.psrcvpkts =
+ get_counter(ibp, ppd, IB_PMA_PORT_RCV_PKTS);
+ ppd->cong_stats.counter_cache.psxmitwait =
+ get_counter(ibp, ppd, IB_PMA_PORT_XMIT_WAIT);
+}
+
+static u64 get_cache_hw_sample_counters(struct qib_pportdata *ppd,
+ __be16 sel)
+{
+ u64 ret;
+
+ switch (sel) {
+ case IB_PMA_PORT_XMIT_DATA:
+ ret = ppd->cong_stats.counter_cache.psxmitdata;
+ break;
+ case IB_PMA_PORT_RCV_DATA:
+ ret = ppd->cong_stats.counter_cache.psrcvdata;
+ break;
+ case IB_PMA_PORT_XMIT_PKTS:
+ ret = ppd->cong_stats.counter_cache.psxmitpkts;
+ break;
+ case IB_PMA_PORT_RCV_PKTS:
+ ret = ppd->cong_stats.counter_cache.psrcvpkts;
+ break;
+ case IB_PMA_PORT_XMIT_WAIT:
+ ret = ppd->cong_stats.counter_cache.psxmitwait;
+ break;
+ default:
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portsamplesresult *p =
+ (struct ib_pma_portsamplesresult *)pmp->data;
+ struct qib_ibdev *dev = to_idev(ibdev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ unsigned long flags;
+ u8 status;
+ int i;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+ spin_lock_irqsave(&ibp->lock, flags);
+ p->tag = cpu_to_be16(ibp->pma_tag);
+ if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
+ p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
+ else {
+ status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
+ p->sample_status = cpu_to_be16(status);
+ if (status == IB_PMA_SAMPLE_STATUS_DONE) {
+ cache_hw_sample_counters(ppd);
+ ppd->cong_stats.counter =
+ xmit_wait_get_value_delta(ppd);
+ dd->f_set_cntr_sample(ppd,
+ QIB_CONG_TIMER_PSINTERVAL, 0);
+ ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+ p->counter[i] = cpu_to_be32(
+ get_cache_hw_sample_counters(
+ ppd, ibp->pma_counter_select[i]));
+ spin_unlock_irqrestore(&ibp->lock, flags);
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portsamplesresult_ext *p =
+ (struct ib_pma_portsamplesresult_ext *)pmp->data;
+ struct qib_ibdev *dev = to_idev(ibdev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ unsigned long flags;
+ u8 status;
+ int i;
+
+ /* Port Sampling code owns the PS* HW counters */
+ memset(pmp->data, 0, sizeof(pmp->data));
+ spin_lock_irqsave(&ibp->lock, flags);
+ p->tag = cpu_to_be16(ibp->pma_tag);
+ if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
+ p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
+ else {
+ status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
+ p->sample_status = cpu_to_be16(status);
+ /* 64 bits */
+ p->extended_width = cpu_to_be32(0x80000000);
+ if (status == IB_PMA_SAMPLE_STATUS_DONE) {
+ cache_hw_sample_counters(ppd);
+ ppd->cong_stats.counter =
+ xmit_wait_get_value_delta(ppd);
+ dd->f_set_cntr_sample(ppd,
+ QIB_CONG_TIMER_PSINTERVAL, 0);
+ ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+ p->counter[i] = cpu_to_be64(
+ get_cache_hw_sample_counters(
+ ppd, ibp->pma_counter_select[i]));
+ spin_unlock_irqrestore(&ibp->lock, flags);
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int pma_get_portcounters(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+ pmp->data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_verbs_counters cntrs;
+ u8 port_select = p->port_select;
+
+ qib_get_counters(ppd, &cntrs);
+
+ /* Adjust counters for any resets done. */
+ cntrs.symbol_error_counter -= ibp->z_symbol_error_counter;
+ cntrs.link_error_recovery_counter -=
+ ibp->z_link_error_recovery_counter;
+ cntrs.link_downed_counter -= ibp->z_link_downed_counter;
+ cntrs.port_rcv_errors -= ibp->z_port_rcv_errors;
+ cntrs.port_rcv_remphys_errors -= ibp->z_port_rcv_remphys_errors;
+ cntrs.port_xmit_discards -= ibp->z_port_xmit_discards;
+ cntrs.port_xmit_data -= ibp->z_port_xmit_data;
+ cntrs.port_rcv_data -= ibp->z_port_rcv_data;
+ cntrs.port_xmit_packets -= ibp->z_port_xmit_packets;
+ cntrs.port_rcv_packets -= ibp->z_port_rcv_packets;
+ cntrs.local_link_integrity_errors -=
+ ibp->z_local_link_integrity_errors;
+ cntrs.excessive_buffer_overrun_errors -=
+ ibp->z_excessive_buffer_overrun_errors;
+ cntrs.vl15_dropped -= ibp->z_vl15_dropped;
+ cntrs.vl15_dropped += ibp->n_vl15_dropped;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ p->port_select = port_select;
+ if (pmp->mad_hdr.attr_mod != 0 || port_select != port)
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+
+ if (cntrs.symbol_error_counter > 0xFFFFUL)
+ p->symbol_error_counter = cpu_to_be16(0xFFFF);
+ else
+ p->symbol_error_counter =
+ cpu_to_be16((u16)cntrs.symbol_error_counter);
+ if (cntrs.link_error_recovery_counter > 0xFFUL)
+ p->link_error_recovery_counter = 0xFF;
+ else
+ p->link_error_recovery_counter =
+ (u8)cntrs.link_error_recovery_counter;
+ if (cntrs.link_downed_counter > 0xFFUL)
+ p->link_downed_counter = 0xFF;
+ else
+ p->link_downed_counter = (u8)cntrs.link_downed_counter;
+ if (cntrs.port_rcv_errors > 0xFFFFUL)
+ p->port_rcv_errors = cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_errors =
+ cpu_to_be16((u16) cntrs.port_rcv_errors);
+ if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
+ p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_remphys_errors =
+ cpu_to_be16((u16)cntrs.port_rcv_remphys_errors);
+ if (cntrs.port_xmit_discards > 0xFFFFUL)
+ p->port_xmit_discards = cpu_to_be16(0xFFFF);
+ else
+ p->port_xmit_discards =
+ cpu_to_be16((u16)cntrs.port_xmit_discards);
+ if (cntrs.local_link_integrity_errors > 0xFUL)
+ cntrs.local_link_integrity_errors = 0xFUL;
+ if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
+ cntrs.excessive_buffer_overrun_errors = 0xFUL;
+ p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
+ cntrs.excessive_buffer_overrun_errors;
+ if (cntrs.vl15_dropped > 0xFFFFUL)
+ p->vl15_dropped = cpu_to_be16(0xFFFF);
+ else
+ p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
+ if (cntrs.port_xmit_data > 0xFFFFFFFFUL)
+ p->port_xmit_data = cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data);
+ if (cntrs.port_rcv_data > 0xFFFFFFFFUL)
+ p->port_rcv_data = cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data);
+ if (cntrs.port_xmit_packets > 0xFFFFFFFFUL)
+ p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_xmit_packets =
+ cpu_to_be32((u32)cntrs.port_xmit_packets);
+ if (cntrs.port_rcv_packets > 0xFFFFFFFFUL)
+ p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF);
+ else
+ p->port_rcv_packets =
+ cpu_to_be32((u32) cntrs.port_rcv_packets);
+
+ return reply((struct ib_smp *) pmp);
+}
+
+static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ /* Congestion PMA packets start at offset 24 not 64 */
+ struct ib_pma_portcounters_cong *p =
+ (struct ib_pma_portcounters_cong *)pmp->reserved;
+ struct qib_verbs_counters cntrs;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = dd_from_ppd(ppd);
+ u32 port_select = be32_to_cpu(pmp->mad_hdr.attr_mod) & 0xFF;
+ u64 xmit_wait_counter;
+ unsigned long flags;
+
+ /*
+ * This check is performed only in the GET method because the
+ * SET method ends up calling this anyway.
+ */
+ if (!dd->psxmitwait_supported)
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+ if (port_select != port)
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+
+ qib_get_counters(ppd, &cntrs);
+ spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+ xmit_wait_counter = xmit_wait_get_value_delta(ppd);
+ spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+
+ /* Adjust counters for any resets done. */
+ cntrs.symbol_error_counter -= ibp->z_symbol_error_counter;
+ cntrs.link_error_recovery_counter -=
+ ibp->z_link_error_recovery_counter;
+ cntrs.link_downed_counter -= ibp->z_link_downed_counter;
+ cntrs.port_rcv_errors -= ibp->z_port_rcv_errors;
+ cntrs.port_rcv_remphys_errors -=
+ ibp->z_port_rcv_remphys_errors;
+ cntrs.port_xmit_discards -= ibp->z_port_xmit_discards;
+ cntrs.local_link_integrity_errors -=
+ ibp->z_local_link_integrity_errors;
+ cntrs.excessive_buffer_overrun_errors -=
+ ibp->z_excessive_buffer_overrun_errors;
+ cntrs.vl15_dropped -= ibp->z_vl15_dropped;
+ cntrs.vl15_dropped += ibp->n_vl15_dropped;
+ cntrs.port_xmit_data -= ibp->z_port_xmit_data;
+ cntrs.port_rcv_data -= ibp->z_port_rcv_data;
+ cntrs.port_xmit_packets -= ibp->z_port_xmit_packets;
+ cntrs.port_rcv_packets -= ibp->z_port_rcv_packets;
+
+ memset(pmp->reserved, 0, sizeof(pmp->reserved) +
+ sizeof(pmp->data));
+
+ /*
+ * Set top 3 bits to indicate interval in picoseconds in
+ * remaining bits.
+ */
+ p->port_check_rate =
+ cpu_to_be16((QIB_XMIT_RATE_PICO << 13) |
+ (dd->psxmitwait_check_rate &
+ ~(QIB_XMIT_RATE_PICO << 13)));
+ p->port_adr_events = cpu_to_be64(0);
+ p->port_xmit_wait = cpu_to_be64(xmit_wait_counter);
+ p->port_xmit_data = cpu_to_be64(cntrs.port_xmit_data);
+ p->port_rcv_data = cpu_to_be64(cntrs.port_rcv_data);
+ p->port_xmit_packets =
+ cpu_to_be64(cntrs.port_xmit_packets);
+ p->port_rcv_packets =
+ cpu_to_be64(cntrs.port_rcv_packets);
+ if (cntrs.symbol_error_counter > 0xFFFFUL)
+ p->symbol_error_counter = cpu_to_be16(0xFFFF);
+ else
+ p->symbol_error_counter =
+ cpu_to_be16(
+ (u16)cntrs.symbol_error_counter);
+ if (cntrs.link_error_recovery_counter > 0xFFUL)
+ p->link_error_recovery_counter = 0xFF;
+ else
+ p->link_error_recovery_counter =
+ (u8)cntrs.link_error_recovery_counter;
+ if (cntrs.link_downed_counter > 0xFFUL)
+ p->link_downed_counter = 0xFF;
+ else
+ p->link_downed_counter =
+ (u8)cntrs.link_downed_counter;
+ if (cntrs.port_rcv_errors > 0xFFFFUL)
+ p->port_rcv_errors = cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_errors =
+ cpu_to_be16((u16) cntrs.port_rcv_errors);
+ if (cntrs.port_rcv_remphys_errors > 0xFFFFUL)
+ p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
+ else
+ p->port_rcv_remphys_errors =
+ cpu_to_be16(
+ (u16)cntrs.port_rcv_remphys_errors);
+ if (cntrs.port_xmit_discards > 0xFFFFUL)
+ p->port_xmit_discards = cpu_to_be16(0xFFFF);
+ else
+ p->port_xmit_discards =
+ cpu_to_be16((u16)cntrs.port_xmit_discards);
+ if (cntrs.local_link_integrity_errors > 0xFUL)
+ cntrs.local_link_integrity_errors = 0xFUL;
+ if (cntrs.excessive_buffer_overrun_errors > 0xFUL)
+ cntrs.excessive_buffer_overrun_errors = 0xFUL;
+ p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) |
+ cntrs.excessive_buffer_overrun_errors;
+ if (cntrs.vl15_dropped > 0xFFFFUL)
+ p->vl15_dropped = cpu_to_be16(0xFFFF);
+ else
+ p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped);
+
+ return reply((struct ib_smp *)pmp);
+}
+
+static void qib_snapshot_pmacounters(
+ struct qib_ibport *ibp,
+ struct qib_pma_counters *pmacounters)
+{
+ struct qib_pma_counters *p;
+ int cpu;
+
+ memset(pmacounters, 0, sizeof(*pmacounters));
+ for_each_possible_cpu(cpu) {
+ p = per_cpu_ptr(ibp->pmastats, cpu);
+ pmacounters->n_unicast_xmit += p->n_unicast_xmit;
+ pmacounters->n_unicast_rcv += p->n_unicast_rcv;
+ pmacounters->n_multicast_xmit += p->n_multicast_xmit;
+ pmacounters->n_multicast_rcv += p->n_multicast_rcv;
+ }
+}
+
+static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters_ext *p =
+ (struct ib_pma_portcounters_ext *)pmp->data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
+ u8 port_select = p->port_select;
+
+ memset(pmp->data, 0, sizeof(pmp->data));
+
+ p->port_select = port_select;
+ if (pmp->mad_hdr.attr_mod != 0 || port_select != port) {
+ pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+ goto bail;
+ }
+
+ qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
+
+ /* Adjust counters for any resets done. */
+ swords -= ibp->z_port_xmit_data;
+ rwords -= ibp->z_port_rcv_data;
+ spkts -= ibp->z_port_xmit_packets;
+ rpkts -= ibp->z_port_rcv_packets;
+
+ p->port_xmit_data = cpu_to_be64(swords);
+ p->port_rcv_data = cpu_to_be64(rwords);
+ p->port_xmit_packets = cpu_to_be64(spkts);
+ p->port_rcv_packets = cpu_to_be64(rpkts);
+
+ qib_snapshot_pmacounters(ibp, &pma);
+
+ p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit
+ - ibp->z_unicast_xmit);
+ p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv
+ - ibp->z_unicast_rcv);
+ p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit
+ - ibp->z_multicast_xmit);
+ p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv
+ - ibp->z_multicast_rcv);
+
+bail:
+ return reply((struct ib_smp *) pmp);
+}
+
+static int pma_set_portcounters(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+ pmp->data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_verbs_counters cntrs;
+
+ /*
+ * Since the HW doesn't support clearing counters, we save the
+ * current count and subtract it from future responses.
+ */
+ qib_get_counters(ppd, &cntrs);
+
+ if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
+ ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
+
+ if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY)
+ ibp->z_link_error_recovery_counter =
+ cntrs.link_error_recovery_counter;
+
+ if (p->counter_select & IB_PMA_SEL_LINK_DOWNED)
+ ibp->z_link_downed_counter = cntrs.link_downed_counter;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS)
+ ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS)
+ ibp->z_port_rcv_remphys_errors =
+ cntrs.port_rcv_remphys_errors;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS)
+ ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
+
+ if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS)
+ ibp->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+
+ if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS)
+ ibp->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
+ ibp->n_vl15_dropped = 0;
+ ibp->z_vl15_dropped = cntrs.vl15_dropped;
+ }
+
+ if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA)
+ ibp->z_port_xmit_data = cntrs.port_xmit_data;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA)
+ ibp->z_port_rcv_data = cntrs.port_rcv_data;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS)
+ ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
+
+ if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS)
+ ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
+
+ return pma_get_portcounters(pmp, ibdev, port);
+}
+
+static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = dd_from_ppd(ppd);
+ struct qib_verbs_counters cntrs;
+ u32 counter_select = (be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24) & 0xFF;
+ int ret = 0;
+ unsigned long flags;
+
+ qib_get_counters(ppd, &cntrs);
+ /* Get counter values before we save them */
+ ret = pma_get_portcounters_cong(pmp, ibdev, port);
+
+ if (counter_select & IB_PMA_SEL_CONG_XMIT) {
+ spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+ ppd->cong_stats.counter = 0;
+ dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL,
+ 0x0);
+ spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+ }
+ if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) {
+ ibp->z_port_xmit_data = cntrs.port_xmit_data;
+ ibp->z_port_rcv_data = cntrs.port_rcv_data;
+ ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
+ ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
+ }
+ if (counter_select & IB_PMA_SEL_CONG_ALL) {
+ ibp->z_symbol_error_counter =
+ cntrs.symbol_error_counter;
+ ibp->z_link_error_recovery_counter =
+ cntrs.link_error_recovery_counter;
+ ibp->z_link_downed_counter =
+ cntrs.link_downed_counter;
+ ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
+ ibp->z_port_rcv_remphys_errors =
+ cntrs.port_rcv_remphys_errors;
+ ibp->z_port_xmit_discards =
+ cntrs.port_xmit_discards;
+ ibp->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+ ibp->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+ ibp->n_vl15_dropped = 0;
+ ibp->z_vl15_dropped = cntrs.vl15_dropped;
+ }
+
+ return ret;
+}
+
+static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+ pmp->data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
+
+ qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
+
+ if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
+ ibp->z_port_xmit_data = swords;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA)
+ ibp->z_port_rcv_data = rwords;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS)
+ ibp->z_port_xmit_packets = spkts;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
+ ibp->z_port_rcv_packets = rpkts;
+
+ qib_snapshot_pmacounters(ibp, &pma);
+
+ if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
+ ibp->z_unicast_xmit = pma.n_unicast_xmit;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
+ ibp->z_unicast_rcv = pma.n_unicast_rcv;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
+ ibp->z_multicast_xmit = pma.n_multicast_xmit;
+
+ if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
+ ibp->z_multicast_rcv = pma.n_multicast_rcv;
+
+ return pma_get_portcounters_ext(pmp, ibdev, port);
+}
+
+static int process_subn(struct ib_device *ibdev, int mad_flags,
+ u8 port, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_smp *smp = (struct ib_smp *)out_mad;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ int ret;
+
+ *out_mad = *in_mad;
+ if (smp->class_version != 1) {
+ smp->status |= IB_SMP_UNSUP_VERSION;
+ ret = reply(smp);
+ goto bail;
+ }
+
+ ret = check_mkey(ibp, smp, mad_flags);
+ if (ret) {
+ u32 port_num = be32_to_cpu(smp->attr_mod);
+
+ /*
+ * If this is a get/set portinfo, we already check the
+ * M_Key if the MAD is for another port and the M_Key
+ * is OK on the receiving port. This check is needed
+ * to increment the error counters when the M_Key
+ * fails to match on *both* ports.
+ */
+ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
+ (smp->method == IB_MGMT_METHOD_GET ||
+ smp->method == IB_MGMT_METHOD_SET) &&
+ port_num && port_num <= ibdev->phys_port_cnt &&
+ port != port_num)
+ (void) check_mkey(to_iport(ibdev, port_num), smp, 0);
+ ret = IB_MAD_RESULT_FAILURE;
+ goto bail;
+ }
+
+ switch (smp->method) {
+ case IB_MGMT_METHOD_GET:
+ switch (smp->attr_id) {
+ case IB_SMP_ATTR_NODE_DESC:
+ ret = subn_get_nodedescription(smp, ibdev);
+ goto bail;
+ case IB_SMP_ATTR_NODE_INFO:
+ ret = subn_get_nodeinfo(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_GUID_INFO:
+ ret = subn_get_guidinfo(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_PORT_INFO:
+ ret = subn_get_portinfo(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_PKEY_TABLE:
+ ret = subn_get_pkeytable(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_SL_TO_VL_TABLE:
+ ret = subn_get_sl_to_vl(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_VL_ARB_TABLE:
+ ret = subn_get_vl_arb(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_SM_INFO:
+ if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+ ret = IB_MAD_RESULT_SUCCESS |
+ IB_MAD_RESULT_CONSUMED;
+ goto bail;
+ }
+ if (ibp->port_cap_flags & IB_PORT_SM) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ /* FALLTHROUGH */
+ default:
+ smp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply(smp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (smp->attr_id) {
+ case IB_SMP_ATTR_GUID_INFO:
+ ret = subn_set_guidinfo(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_PORT_INFO:
+ ret = subn_set_portinfo(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_PKEY_TABLE:
+ ret = subn_set_pkeytable(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_SL_TO_VL_TABLE:
+ ret = subn_set_sl_to_vl(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_VL_ARB_TABLE:
+ ret = subn_set_vl_arb(smp, ibdev, port);
+ goto bail;
+ case IB_SMP_ATTR_SM_INFO:
+ if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+ ret = IB_MAD_RESULT_SUCCESS |
+ IB_MAD_RESULT_CONSUMED;
+ goto bail;
+ }
+ if (ibp->port_cap_flags & IB_PORT_SM) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ /* FALLTHROUGH */
+ default:
+ smp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply(smp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_TRAP_REPRESS:
+ if (smp->attr_id == IB_SMP_ATTR_NOTICE)
+ ret = subn_trap_repress(smp, ibdev, port);
+ else {
+ smp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply(smp);
+ }
+ goto bail;
+
+ case IB_MGMT_METHOD_TRAP:
+ case IB_MGMT_METHOD_REPORT:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+
+ case IB_MGMT_METHOD_SEND:
+ if (ib_get_smp_direction(smp) &&
+ smp->attr_id == QIB_VENDOR_IPG) {
+ ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PORT,
+ smp->data[0]);
+ ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+ } else
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+
+ default:
+ smp->status |= IB_SMP_UNSUP_METHOD;
+ ret = reply(smp);
+ }
+
+bail:
+ return ret;
+}
+
+static int process_perf(struct ib_device *ibdev, u8 port,
+ struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
+ int ret;
+
+ *out_mad = *in_mad;
+ if (pmp->mad_hdr.class_version != 1) {
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ switch (pmp->mad_hdr.method) {
+ case IB_MGMT_METHOD_GET:
+ switch (pmp->mad_hdr.attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ ret = pma_get_classportinfo(pmp, ibdev);
+ goto bail;
+ case IB_PMA_PORT_SAMPLES_CONTROL:
+ ret = pma_get_portsamplescontrol(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_SAMPLES_RESULT:
+ ret = pma_get_portsamplesresult(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_SAMPLES_RESULT_EXT:
+ ret = pma_get_portsamplesresult_ext(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS:
+ ret = pma_get_portcounters(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ ret = pma_get_portcounters_ext(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS_CONG:
+ ret = pma_get_portcounters_cong(pmp, ibdev, port);
+ goto bail;
+ default:
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (pmp->mad_hdr.attr_id) {
+ case IB_PMA_PORT_SAMPLES_CONTROL:
+ ret = pma_set_portsamplescontrol(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS:
+ ret = pma_set_portcounters(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ ret = pma_set_portcounters_ext(pmp, ibdev, port);
+ goto bail;
+ case IB_PMA_PORT_COUNTERS_CONG:
+ ret = pma_set_portcounters_cong(pmp, ibdev, port);
+ goto bail;
+ default:
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) pmp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_TRAP:
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+
+ default:
+ pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
+ ret = reply((struct ib_smp *) pmp);
+ }
+
+bail:
+ return ret;
+}
+
+static int cc_get_classportinfo(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev)
+{
+ struct ib_cc_classportinfo_attr *p =
+ (struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->base_version = 1;
+ p->class_version = 1;
+ p->cap_mask = 0;
+
+ /*
+ * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
+ */
+ p->resp_time_value = 18;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_info(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_info_attr *p =
+ (struct ib_cc_info_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ p->congestion_info = 0;
+ p->control_table_cap = ppd->cc_max_table_entries;
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ int i;
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct ib_cc_congestion_entry_shadow *entries;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ entries = ppd->congestion_entries_shadow->entries;
+ p->port_control = cpu_to_be16(
+ ppd->congestion_entries_shadow->port_control);
+ p->control_map = cpu_to_be16(
+ ppd->congestion_entries_shadow->control_map);
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ p->entries[i].ccti_increase = entries[i].ccti_increase;
+ p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
+ p->entries[i].trigger_threshold = entries[i].trigger_threshold;
+ p->entries[i].ccti_min = entries[i].ccti_min;
+ }
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 max_cct_block;
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ max_cct_block =
+ (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
+ max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
+
+ if (cct_block_index > max_cct_block) {
+ spin_unlock(&ppd->cc_shadow_lock);
+ goto bail;
+ }
+
+ ccp->attr_mod = cpu_to_be32(cct_block_index);
+
+ cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
+
+ cct_entry--;
+
+ p->ccti_limit = cpu_to_be16(cct_entry);
+
+ entries = &ppd->ccti_entries_shadow->
+ entries[IB_CCT_ENTRIES * cct_block_index];
+ cct_entry %= IB_CCT_ENTRIES;
+
+ for (i = 0; i <= cct_entry; i++)
+ p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_congestion_setting_attr *p =
+ (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ int i;
+
+ ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
+
+ for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+ ppd->congestion_entries[i].ccti_increase =
+ p->entries[i].ccti_increase;
+
+ ppd->congestion_entries[i].ccti_timer =
+ be16_to_cpu(p->entries[i].ccti_timer);
+
+ ppd->congestion_entries[i].trigger_threshold =
+ p->entries[i].trigger_threshold;
+
+ ppd->congestion_entries[i].ccti_min =
+ p->entries[i].ccti_min;
+ }
+
+ return reply((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
+ struct ib_device *ibdev, u8 port)
+{
+ struct ib_cc_table_attr *p =
+ (struct ib_cc_table_attr *)ccp->mgmt_data;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+ u32 cct_entry;
+ struct ib_cc_table_entry_shadow *entries;
+ int i;
+
+ /* Is the table index more than what is supported? */
+ if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+ goto bail;
+
+ /* If this packet is the first in the sequence then
+ * zero the total table entry count.
+ */
+ if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
+ ppd->total_cct_entry = 0;
+
+ cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
+
+ /* ccti_limit is 0 to 63 */
+ ppd->total_cct_entry += (cct_entry + 1);
+
+ if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
+ goto bail;
+
+ ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
+
+ entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
+
+ for (i = 0; i <= cct_entry; i++)
+ entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
+
+ spin_lock(&ppd->cc_shadow_lock);
+
+ ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
+ memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
+ (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
+
+ ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
+ ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
+ memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
+ IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
+
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return reply((struct ib_smp *) ccp);
+
+bail:
+ return reply_failure((struct ib_smp *) ccp);
+}
+
+static int check_cc_key(struct qib_ibport *ibp,
+ struct ib_cc_mad *ccp, int mad_flags)
+{
+ return 0;
+}
+
+static int process_cc(struct ib_device *ibdev, int mad_flags,
+ u8 port, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ int ret;
+
+ *out_mad = *in_mad;
+
+ if (ccp->class_version != 2) {
+ ccp->status |= IB_SMP_UNSUP_VERSION;
+ ret = reply((struct ib_smp *)ccp);
+ goto bail;
+ }
+
+ ret = check_cc_key(ibp, ccp, mad_flags);
+ if (ret)
+ goto bail;
+
+ switch (ccp->method) {
+ case IB_MGMT_METHOD_GET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CLASSPORTINFO:
+ ret = cc_get_classportinfo(ccp, ibdev);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_INFO:
+ ret = cc_get_congestion_info(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_get_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_get_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_SET:
+ switch (ccp->attr_id) {
+ case IB_CC_ATTR_CA_CONGESTION_SETTING:
+ ret = cc_set_congestion_setting(ccp, ibdev, port);
+ goto bail;
+
+ case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+ ret = cc_set_congestion_control_table(ccp, ibdev, port);
+ goto bail;
+
+ /* FALLTHROUGH */
+ default:
+ ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+ ret = reply((struct ib_smp *) ccp);
+ goto bail;
+ }
+
+ case IB_MGMT_METHOD_GET_RESP:
+ /*
+ * The ib_mad module will call us to process responses
+ * before checking for other consumers.
+ * Just tell the caller to process it normally.
+ */
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+
+ case IB_MGMT_METHOD_TRAP:
+ default:
+ ccp->status |= IB_SMP_UNSUP_METHOD;
+ ret = reply((struct ib_smp *) ccp);
+ }
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_process_mad - process an incoming MAD packet
+ * @ibdev: the infiniband device this packet came in on
+ * @mad_flags: MAD flags
+ * @port: the port number this packet came in on
+ * @in_wc: the work completion entry for this packet
+ * @in_grh: the global route header for this packet
+ * @in_mad: the incoming MAD
+ * @out_mad: any outgoing MAD reply
+ *
+ * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
+ * interested in processing.
+ *
+ * Note that the verbs framework has already done the MAD sanity checks,
+ * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ * MADs.
+ *
+ * This is called by the ib_mad module.
+ */
+int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ int ret;
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ switch (in_mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
+ goto bail;
+
+ case IB_MGMT_CLASS_PERF_MGMT:
+ ret = process_perf(ibdev, port, in_mad, out_mad);
+ goto bail;
+
+ case IB_MGMT_CLASS_CONG_MGMT:
+ if (!ppd->congestion_entries_shadow ||
+ !qib_cc_table_size) {
+ ret = IB_MAD_RESULT_SUCCESS;
+ goto bail;
+ }
+ ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
+ goto bail;
+
+ default:
+ ret = IB_MAD_RESULT_SUCCESS;
+ }
+
+bail:
+ return ret;
+}
+
+static void send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+static void xmit_wait_timer_func(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+ struct qib_devdata *dd = dd_from_ppd(ppd);
+ unsigned long flags;
+ u8 status;
+
+ spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+ if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) {
+ status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
+ if (status == IB_PMA_SAMPLE_STATUS_DONE) {
+ /* save counter cache */
+ cache_hw_sample_counters(ppd);
+ ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
+ } else
+ goto done;
+ }
+ ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd);
+ dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0);
+done:
+ spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+ mod_timer(&ppd->cong_stats.timer, jiffies + HZ);
+}
+
+int qib_create_agents(struct qib_ibdev *dev)
+{
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct ib_mad_agent *agent;
+ struct qib_ibport *ibp;
+ int p;
+ int ret;
+
+ for (p = 0; p < dd->num_pports; p++) {
+ ibp = &dd->pport[p].ibport_data;
+ agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
+ NULL, 0, send_handler,
+ NULL, NULL);
+ if (IS_ERR(agent)) {
+ ret = PTR_ERR(agent);
+ goto err;
+ }
+
+ /* Initialize xmit_wait structure */
+ dd->pport[p].cong_stats.counter = 0;
+ init_timer(&dd->pport[p].cong_stats.timer);
+ dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func;
+ dd->pport[p].cong_stats.timer.data =
+ (unsigned long)(&dd->pport[p]);
+ dd->pport[p].cong_stats.timer.expires = 0;
+ add_timer(&dd->pport[p].cong_stats.timer);
+
+ ibp->send_agent = agent;
+ }
+
+ return 0;
+
+err:
+ for (p = 0; p < dd->num_pports; p++) {
+ ibp = &dd->pport[p].ibport_data;
+ if (ibp->send_agent) {
+ agent = ibp->send_agent;
+ ibp->send_agent = NULL;
+ ib_unregister_mad_agent(agent);
+ }
+ }
+
+ return ret;
+}
+
+void qib_free_agents(struct qib_ibdev *dev)
+{
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct ib_mad_agent *agent;
+ struct qib_ibport *ibp;
+ int p;
+
+ for (p = 0; p < dd->num_pports; p++) {
+ ibp = &dd->pport[p].ibport_data;
+ if (ibp->send_agent) {
+ agent = ibp->send_agent;
+ ibp->send_agent = NULL;
+ ib_unregister_mad_agent(agent);
+ }
+ if (ibp->sm_ah) {
+ ib_destroy_ah(&ibp->sm_ah->ibah);
+ ibp->sm_ah = NULL;
+ }
+ if (dd->pport[p].cong_stats.timer.data)
+ del_timer_sync(&dd->pport[p].cong_stats.timer);
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
new file mode 100644
index 00000000000..941d4d50d8e
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QIB_MAD_H
+#define _QIB_MAD_H
+
+#include <rdma/ib_pma.h>
+
+#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004)
+#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008)
+#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C)
+#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C)
+
+struct ib_node_info {
+ u8 base_version;
+ u8 class_version;
+ u8 node_type;
+ u8 num_ports;
+ __be64 sys_guid;
+ __be64 node_guid;
+ __be64 port_guid;
+ __be16 partition_cap;
+ __be16 device_id;
+ __be32 revision;
+ u8 local_port_num;
+ u8 vendor_id[3];
+} __packed;
+
+struct ib_mad_notice_attr {
+ u8 generic_type;
+ u8 prod_type_msb;
+ __be16 prod_type_lsb;
+ __be16 trap_num;
+ __be16 issuer_lid;
+ __be16 toggle_count;
+
+ union {
+ struct {
+ u8 details[54];
+ } raw_data;
+
+ struct {
+ __be16 reserved;
+ __be16 lid; /* where violation happened */
+ u8 port_num; /* where violation happened */
+ } __packed ntc_129_131;
+
+ struct {
+ __be16 reserved;
+ __be16 lid; /* LID where change occurred */
+ u8 reserved2;
+ u8 local_changes; /* low bit - local changes */
+ __be32 new_cap_mask; /* new capability mask */
+ u8 reserved3;
+ u8 change_flags; /* low 3 bits only */
+ } __packed ntc_144;
+
+ struct {
+ __be16 reserved;
+ __be16 lid; /* lid where sys guid changed */
+ __be16 reserved2;
+ __be64 new_sys_guid;
+ } __packed ntc_145;
+
+ struct {
+ __be16 reserved;
+ __be16 lid;
+ __be16 dr_slid;
+ u8 method;
+ u8 reserved2;
+ __be16 attr_id;
+ __be32 attr_mod;
+ __be64 mkey;
+ u8 reserved3;
+ u8 dr_trunc_hop;
+ u8 dr_rtn_path[30];
+ } __packed ntc_256;
+
+ struct {
+ __be16 reserved;
+ __be16 lid1;
+ __be16 lid2;
+ __be32 key;
+ __be32 sl_qp1; /* SL: high 4 bits */
+ __be32 qp2; /* high 8 bits reserved */
+ union ib_gid gid1;
+ union ib_gid gid2;
+ } __packed ntc_257_258;
+
+ } details;
+};
+
+/*
+ * Generic trap/notice types
+ */
+#define IB_NOTICE_TYPE_FATAL 0x80
+#define IB_NOTICE_TYPE_URGENT 0x81
+#define IB_NOTICE_TYPE_SECURITY 0x82
+#define IB_NOTICE_TYPE_SM 0x83
+#define IB_NOTICE_TYPE_INFO 0x84
+
+/*
+ * Generic trap/notice producers
+ */
+#define IB_NOTICE_PROD_CA cpu_to_be16(1)
+#define IB_NOTICE_PROD_SWITCH cpu_to_be16(2)
+#define IB_NOTICE_PROD_ROUTER cpu_to_be16(3)
+#define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4)
+
+/*
+ * Generic trap/notice numbers
+ */
+#define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129)
+#define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130)
+#define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131)
+#define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144)
+#define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145)
+#define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256)
+#define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257)
+#define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258)
+
+/*
+ * Repress trap/notice flags
+ */
+#define IB_NOTICE_REPRESS_LLI_THRESH (1 << 0)
+#define IB_NOTICE_REPRESS_EBO_THRESH (1 << 1)
+#define IB_NOTICE_REPRESS_FLOW_UPDATE (1 << 2)
+#define IB_NOTICE_REPRESS_CAP_MASK_CHG (1 << 3)
+#define IB_NOTICE_REPRESS_SYS_GUID_CHG (1 << 4)
+#define IB_NOTICE_REPRESS_BAD_MKEY (1 << 5)
+#define IB_NOTICE_REPRESS_BAD_PKEY (1 << 6)
+#define IB_NOTICE_REPRESS_BAD_QKEY (1 << 7)
+
+/*
+ * Generic trap/notice other local changes flags (trap 144).
+ */
+#define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */
+#define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */
+#define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01
+
+/*
+ * Generic trap/notice M_Key volation flags in dr_trunc_hop (trap 256).
+ */
+#define IB_NOTICE_TRAP_DR_NOTICE 0x80
+#define IB_NOTICE_TRAP_DR_TRUNC 0x40
+
+struct ib_vl_weight_elem {
+ u8 vl; /* Only low 4 bits, upper 4 bits reserved */
+ u8 weight;
+};
+
+#define IB_VLARB_LOWPRI_0_31 1
+#define IB_VLARB_LOWPRI_32_63 2
+#define IB_VLARB_HIGHPRI_0_31 3
+#define IB_VLARB_HIGHPRI_32_63 4
+
+#define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00)
+
+struct ib_pma_portcounters_cong {
+ u8 reserved;
+ u8 reserved1;
+ __be16 port_check_rate;
+ __be16 symbol_error_counter;
+ u8 link_error_recovery_counter;
+ u8 link_downed_counter;
+ __be16 port_rcv_errors;
+ __be16 port_rcv_remphys_errors;
+ __be16 port_rcv_switch_relay_errors;
+ __be16 port_xmit_discards;
+ u8 port_xmit_constraint_errors;
+ u8 port_rcv_constraint_errors;
+ u8 reserved2;
+ u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */
+ __be16 reserved3;
+ __be16 vl15_dropped;
+ __be64 port_xmit_data;
+ __be64 port_rcv_data;
+ __be64 port_xmit_packets;
+ __be64 port_rcv_packets;
+ __be64 port_xmit_wait;
+ __be64 port_adr_events;
+} __packed;
+
+#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
+#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
+
+#define QIB_XMIT_RATE_UNSUPPORTED 0x0
+#define QIB_XMIT_RATE_PICO 0x7
+/* number of 4nsec cycles equaling 2secs */
+#define QIB_CONG_TIMER_PSINTERVAL 0x1DCD64EC
+
+#define IB_PMA_SEL_CONG_ALL 0x01
+#define IB_PMA_SEL_CONG_PORT_DATA 0x02
+#define IB_PMA_SEL_CONG_XMIT 0x04
+#define IB_PMA_SEL_CONG_ROUTING 0x08
+
+/*
+ * Congestion control class attributes
+ */
+#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001)
+#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002)
+#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011)
+#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012)
+#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013)
+#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014)
+#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015)
+#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016)
+#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017)
+#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018)
+
+/* generalizations for threshold values */
+#define IB_CC_THRESHOLD_NONE 0x0
+#define IB_CC_THRESHOLD_MIN 0x1
+#define IB_CC_THRESHOLD_MAX 0xf
+
+/* CCA MAD header constants */
+#define IB_CC_MAD_LOGDATA_LEN 32
+#define IB_CC_MAD_MGMTDATA_LEN 192
+
+struct ib_cc_mad {
+ u8 base_version;
+ u8 mgmt_class;
+ u8 class_version;
+ u8 method;
+ __be16 status;
+ __be16 class_specific;
+ __be64 tid;
+ __be16 attr_id;
+ __be16 resv;
+ __be32 attr_mod;
+ __be64 cckey;
+
+ /* For CongestionLog attribute only */
+ u8 log_data[IB_CC_MAD_LOGDATA_LEN];
+
+ u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
+} __packed;
+
+/*
+ * Congestion Control class portinfo capability mask bits
+ */
+#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0)
+#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1)
+#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2)
+#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8)
+
+struct ib_cc_classportinfo_attr {
+ u8 base_version;
+ u8 class_version;
+ __be16 cap_mask;
+ u8 reserved[3];
+ u8 resp_time_value; /* only lower 5 bits */
+ union ib_gid redirect_gid;
+ __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 redirect_lid;
+ __be16 redirect_pkey;
+ __be32 redirect_qp; /* only lower 24 bits */
+ __be32 redirect_qkey;
+ union ib_gid trap_gid;
+ __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
+ __be16 trap_lid;
+ __be16 trap_pkey;
+ __be32 trap_hl_qp; /* 8, 24 bits respectively */
+ __be32 trap_qkey;
+} __packed;
+
+/* Congestion control traps */
+#define IB_CC_TRAP_KEY_VIOLATION 0x0000
+
+struct ib_cc_trap_key_violation_attr {
+ __be16 source_lid;
+ u8 method;
+ u8 reserved1;
+ __be16 attrib_id;
+ __be32 attrib_mod;
+ __be32 qp;
+ __be64 cckey;
+ u8 sgid[16];
+ u8 padding[24];
+} __packed;
+
+/* Congestion info flags */
+#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
+#define IB_CC_TABLE_CAP_DEFAULT 31
+
+struct ib_cc_info_attr {
+ __be16 congestion_info;
+ u8 control_table_cap; /* Multiple of 64 entry unit CCTs */
+} __packed;
+
+struct ib_cc_key_info_attr {
+ __be64 cckey;
+ u8 protect;
+ __be16 lease_period;
+ __be16 violations;
+} __packed;
+
+#define IB_CC_CL_CA_LOGEVENTS_LEN 208
+
+struct ib_cc_log_attr {
+ u8 log_type;
+ u8 congestion_flags;
+ __be16 threshold_event_counter;
+ __be16 threshold_congestion_event_map;
+ __be16 current_time_stamp;
+ u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
+} __packed;
+
+#define IB_CC_CLEC_SERVICETYPE_RC 0x0
+#define IB_CC_CLEC_SERVICETYPE_UC 0x1
+#define IB_CC_CLEC_SERVICETYPE_RD 0x2
+#define IB_CC_CLEC_SERVICETYPE_UD 0x3
+
+struct ib_cc_log_event {
+ u8 local_qp_cn_entry;
+ u8 remote_qp_number_cn_entry[3];
+ u8 sl_cn_entry:4;
+ u8 service_type_cn_entry:4;
+ __be32 remote_lid_cn_entry;
+ __be32 timestamp_cn_entry;
+} __packed;
+
+/* Sixteen congestion entries */
+#define IB_CC_CCS_ENTRIES 16
+
+/* Port control flags */
+#define IB_CC_CCS_PC_SL_BASED 0x01
+
+struct ib_cc_congestion_entry {
+ u8 ccti_increase;
+ __be16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_entry_shadow {
+ u8 ccti_increase;
+ u16 ccti_timer;
+ u8 trigger_threshold;
+ u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_setting_attr {
+ __be16 port_control;
+ __be16 control_map;
+ struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+struct ib_cc_congestion_setting_attr_shadow {
+ u16 port_control;
+ u16 control_map;
+ struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
+#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
+
+/* 64 Congestion Control table entries in a single MAD */
+#define IB_CCT_ENTRIES 64
+#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
+
+struct ib_cc_table_entry {
+ __be16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_entry_shadow {
+ u16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_attr {
+ __be16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+struct ib_cc_table_attr_shadow {
+ u16 ccti_limit; /* max CCTI for cc table */
+ struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+#define CC_TABLE_SHADOW_MAX \
+ (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
+
+struct cc_table_shadow {
+ u16 ccti_last_entry;
+ struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
+} __packed;
+
+/*
+ * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
+ * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
+ * We support 5 counters which only count the mandatory quantities.
+ */
+#define COUNTER_MASK(q, n) (q << ((9 - n) * 3))
+#define COUNTER_MASK0_9 \
+ cpu_to_be32(COUNTER_MASK(1, 0) | \
+ COUNTER_MASK(1, 1) | \
+ COUNTER_MASK(1, 2) | \
+ COUNTER_MASK(1, 3) | \
+ COUNTER_MASK(1, 4))
+
+#endif /* _QIB_MAD_H */
diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c
new file mode 100644
index 00000000000..8b73a11d571
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_mmap.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <asm/pgtable.h>
+
+#include "qib_verbs.h"
+
+/**
+ * qib_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct qib_mmap_info
+ */
+void qib_release_mmap_info(struct kref *ref)
+{
+ struct qib_mmap_info *ip =
+ container_of(ref, struct qib_mmap_info, ref);
+ struct qib_ibdev *dev = to_idev(ip->context->device);
+
+ spin_lock_irq(&dev->pending_lock);
+ list_del(&ip->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+
+ vfree(ip->obj);
+ kfree(ip);
+}
+
+/*
+ * open and close keep track of how many times the CQ is mapped,
+ * to avoid releasing it.
+ */
+static void qib_vma_open(struct vm_area_struct *vma)
+{
+ struct qib_mmap_info *ip = vma->vm_private_data;
+
+ kref_get(&ip->ref);
+}
+
+static void qib_vma_close(struct vm_area_struct *vma)
+{
+ struct qib_mmap_info *ip = vma->vm_private_data;
+
+ kref_put(&ip->ref, qib_release_mmap_info);
+}
+
+static struct vm_operations_struct qib_vm_ops = {
+ .open = qib_vma_open,
+ .close = qib_vma_close,
+};
+
+/**
+ * qib_mmap - create a new mmap region
+ * @context: the IB user context of the process making the mmap() call
+ * @vma: the VMA to be initialized
+ * Return zero if the mmap is OK. Otherwise, return an errno.
+ */
+int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct qib_ibdev *dev = to_idev(context->device);
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long size = vma->vm_end - vma->vm_start;
+ struct qib_mmap_info *ip, *pp;
+ int ret = -EINVAL;
+
+ /*
+ * Search the device's list of objects waiting for a mmap call.
+ * Normally, this list is very short since a call to create a
+ * CQ, QP, or SRQ is soon followed by a call to mmap().
+ */
+ spin_lock_irq(&dev->pending_lock);
+ list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
+ pending_mmaps) {
+ /* Only the creator is allowed to mmap the object */
+ if (context != ip->context || (__u64) offset != ip->offset)
+ continue;
+ /* Don't allow a mmap larger than the object. */
+ if (size > ip->size)
+ break;
+
+ list_del_init(&ip->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+
+ ret = remap_vmalloc_range(vma, ip->obj, 0);
+ if (ret)
+ goto done;
+ vma->vm_ops = &qib_vm_ops;
+ vma->vm_private_data = ip;
+ qib_vma_open(vma);
+ goto done;
+ }
+ spin_unlock_irq(&dev->pending_lock);
+done:
+ return ret;
+}
+
+/*
+ * Allocate information for qib_mmap
+ */
+struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev,
+ u32 size,
+ struct ib_ucontext *context,
+ void *obj) {
+ struct qib_mmap_info *ip;
+
+ ip = kmalloc(sizeof *ip, GFP_KERNEL);
+ if (!ip)
+ goto bail;
+
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ INIT_LIST_HEAD(&ip->pending_mmaps);
+ ip->size = size;
+ ip->context = context;
+ ip->obj = obj;
+ kref_init(&ip->ref);
+
+bail:
+ return ip;
+}
+
+void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
+ u32 size, void *obj)
+{
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irq(&dev->mmap_offset_lock);
+ if (dev->mmap_offset == 0)
+ dev->mmap_offset = PAGE_SIZE;
+ ip->offset = dev->mmap_offset;
+ dev->mmap_offset += size;
+ spin_unlock_irq(&dev->mmap_offset_lock);
+
+ ip->size = size;
+ ip->obj = obj;
+}
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
new file mode 100644
index 00000000000..9bbb55347cc
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_smi.h>
+
+#include "qib.h"
+
+/* Fast memory region */
+struct qib_fmr {
+ struct ib_fmr ibfmr;
+ struct qib_mregion mr; /* must be last */
+};
+
+static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+ return container_of(ibfmr, struct qib_fmr, ibfmr);
+}
+
+static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
+ int count)
+{
+ int m, i = 0;
+ int rval = 0;
+
+ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
+ for (; i < m; i++) {
+ mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL);
+ if (!mr->map[i])
+ goto bail;
+ }
+ mr->mapsz = m;
+ init_completion(&mr->comp);
+ /* count returning the ptr to user */
+ atomic_set(&mr->refcount, 1);
+ mr->pd = pd;
+ mr->max_segs = count;
+out:
+ return rval;
+bail:
+ while (i)
+ kfree(mr->map[--i]);
+ rval = -ENOMEM;
+ goto out;
+}
+
+static void deinit_qib_mregion(struct qib_mregion *mr)
+{
+ int i = mr->mapsz;
+
+ mr->mapsz = 0;
+ while (i)
+ kfree(mr->map[--i]);
+}
+
+
+/**
+ * qib_get_dma_mr - get a DMA memory region
+ * @pd: protection domain for this memory region
+ * @acc: access flags
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ * Note that all DMA addresses should be created via the
+ * struct ib_dma_mapping_ops functions (see qib_dma.c).
+ */
+struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct qib_mr *mr = NULL;
+ struct ib_mr *ret;
+ int rval;
+
+ if (to_ipd(pd)->user) {
+ ret = ERR_PTR(-EPERM);
+ goto bail;
+ }
+
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ rval = init_qib_mregion(&mr->mr, pd, 0);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail;
+ }
+
+
+ rval = qib_alloc_lkey(&mr->mr, 1);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail_mregion;
+ }
+
+ mr->mr.access_flags = acc;
+ ret = &mr->ibmr;
+done:
+ return ret;
+
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
+bail:
+ kfree(mr);
+ goto done;
+}
+
+static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
+{
+ struct qib_mr *mr;
+ int rval = -ENOMEM;
+ int m;
+
+ /* Allocate struct plus pointers to first level page tables. */
+ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
+ mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
+ if (!mr)
+ goto bail;
+
+ rval = init_qib_mregion(&mr->mr, pd, count);
+ if (rval)
+ goto bail;
+ /*
+ * ib_reg_phys_mr() will initialize mr->ibmr except for
+ * lkey and rkey.
+ */
+ rval = qib_alloc_lkey(&mr->mr, 0);
+ if (rval)
+ goto bail_mregion;
+ mr->ibmr.lkey = mr->mr.lkey;
+ mr->ibmr.rkey = mr->mr.lkey;
+done:
+ return mr;
+
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
+bail:
+ kfree(mr);
+ mr = ERR_PTR(rval);
+ goto done;
+}
+
+/**
+ * qib_reg_phys_mr - register a physical memory region
+ * @pd: protection domain for this memory region
+ * @buffer_list: pointer to the list of physical buffers to register
+ * @num_phys_buf: the number of physical buffers to register
+ * @iova_start: the starting address passed over IB which maps to this MR
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ */
+struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start)
+{
+ struct qib_mr *mr;
+ int n, m, i;
+ struct ib_mr *ret;
+
+ mr = alloc_mr(num_phys_buf, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
+ goto bail;
+ }
+
+ mr->mr.user_base = *iova_start;
+ mr->mr.iova = *iova_start;
+ mr->mr.access_flags = acc;
+
+ m = 0;
+ n = 0;
+ for (i = 0; i < num_phys_buf; i++) {
+ mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
+ mr->mr.map[m]->segs[n].length = buffer_list[i].size;
+ mr->mr.length += buffer_list[i].size;
+ n++;
+ if (n == QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+
+ ret = &mr->ibmr;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_reg_user_mr - register a userspace memory region
+ * @pd: protection domain for this memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @mr_access_flags: access flags for this memory region
+ * @udata: unused by the QLogic_IB driver
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ */
+struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct qib_mr *mr;
+ struct ib_umem *umem;
+ struct scatterlist *sg;
+ int n, m, entry;
+ struct ib_mr *ret;
+
+ if (length == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ umem = ib_umem_get(pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(umem))
+ return (void *) umem;
+
+ n = umem->nmap;
+
+ mr = alloc_mr(n, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
+ ib_umem_release(umem);
+ goto bail;
+ }
+
+ mr->mr.user_base = start;
+ mr->mr.iova = virt_addr;
+ mr->mr.length = length;
+ mr->mr.offset = umem->offset;
+ mr->mr.access_flags = mr_access_flags;
+ mr->umem = umem;
+
+ if (is_power_of_2(umem->page_size))
+ mr->mr.page_shift = ilog2(umem->page_size);
+ m = 0;
+ n = 0;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ void *vaddr;
+
+ vaddr = page_address(sg_page(sg));
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
+ n++;
+ if (n == QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ ret = &mr->ibmr;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_dereg_mr - unregister and free a memory region
+ * @ibmr: the memory region to free
+ *
+ * Returns 0 on success.
+ *
+ * Note that this is called to free MRs created by qib_get_dma_mr()
+ * or qib_reg_user_mr().
+ */
+int qib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct qib_mr *mr = to_imr(ibmr);
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&mr->mr);
+
+ qib_put_mr(&mr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&mr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&mr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&mr->mr);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+ kfree(mr);
+out:
+ return ret;
+}
+
+/*
+ * Allocate a memory region usable with the
+ * IB_WR_FAST_REG_MR send work request.
+ *
+ * Return the memory region on success, otherwise return an errno.
+ */
+struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
+{
+ struct qib_mr *mr;
+
+ mr = alloc_mr(max_page_list_len, pd);
+ if (IS_ERR(mr))
+ return (struct ib_mr *)mr;
+
+ return &mr->ibmr;
+}
+
+struct ib_fast_reg_page_list *
+qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
+{
+ unsigned size = page_list_len * sizeof(u64);
+ struct ib_fast_reg_page_list *pl;
+
+ if (size > PAGE_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ pl = kzalloc(sizeof *pl, GFP_KERNEL);
+ if (!pl)
+ return ERR_PTR(-ENOMEM);
+
+ pl->page_list = kzalloc(size, GFP_KERNEL);
+ if (!pl->page_list)
+ goto err_free;
+
+ return pl;
+
+err_free:
+ kfree(pl);
+ return ERR_PTR(-ENOMEM);
+}
+
+void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
+{
+ kfree(pl->page_list);
+ kfree(pl);
+}
+
+/**
+ * qib_alloc_fmr - allocate a fast memory region
+ * @pd: the protection domain for this memory region
+ * @mr_access_flags: access flags for this memory region
+ * @fmr_attr: fast memory region attributes
+ *
+ * Returns the memory region on success, otherwise returns an errno.
+ */
+struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct qib_fmr *fmr;
+ int m;
+ struct ib_fmr *ret;
+ int rval = -ENOMEM;
+
+ /* Allocate struct plus pointers to first level page tables. */
+ m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
+ fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
+ if (!fmr)
+ goto bail;
+
+ rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ if (rval)
+ goto bail;
+
+ /*
+ * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
+ * rkey.
+ */
+ rval = qib_alloc_lkey(&fmr->mr, 0);
+ if (rval)
+ goto bail_mregion;
+ fmr->ibfmr.rkey = fmr->mr.lkey;
+ fmr->ibfmr.lkey = fmr->mr.lkey;
+ /*
+ * Resources are allocated but no valid mapping (RKEY can't be
+ * used).
+ */
+ fmr->mr.access_flags = mr_access_flags;
+ fmr->mr.max_segs = fmr_attr->max_pages;
+ fmr->mr.page_shift = fmr_attr->page_shift;
+
+ ret = &fmr->ibfmr;
+done:
+ return ret;
+
+bail_mregion:
+ deinit_qib_mregion(&fmr->mr);
+bail:
+ kfree(fmr);
+ ret = ERR_PTR(rval);
+ goto done;
+}
+
+/**
+ * qib_map_phys_fmr - set up a fast memory region
+ * @ibmfr: the fast memory region to set up
+ * @page_list: the list of pages to associate with the fast memory region
+ * @list_len: the number of pages to associate with the fast memory region
+ * @iova: the virtual address of the start of the fast memory region
+ *
+ * This may be called from interrupt context.
+ */
+
+int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova)
+{
+ struct qib_fmr *fmr = to_ifmr(ibfmr);
+ struct qib_lkey_table *rkt;
+ unsigned long flags;
+ int m, n, i;
+ u32 ps;
+ int ret;
+
+ i = atomic_read(&fmr->mr.refcount);
+ if (i > 2)
+ return -EBUSY;
+
+ if (list_len > fmr->mr.max_segs) {
+ ret = -EINVAL;
+ goto bail;
+ }
+ rkt = &to_idev(ibfmr->device)->lk_table;
+ spin_lock_irqsave(&rkt->lock, flags);
+ fmr->mr.user_base = iova;
+ fmr->mr.iova = iova;
+ ps = 1 << fmr->mr.page_shift;
+ fmr->mr.length = list_len * ps;
+ m = 0;
+ n = 0;
+ for (i = 0; i < list_len; i++) {
+ fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
+ fmr->mr.map[m]->segs[n].length = ps;
+ if (++n == QIB_SEGSZ) {
+ m++;
+ n = 0;
+ }
+ }
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_unmap_fmr - unmap fast memory regions
+ * @fmr_list: the list of fast memory regions to unmap
+ *
+ * Returns 0 on success.
+ */
+int qib_unmap_fmr(struct list_head *fmr_list)
+{
+ struct qib_fmr *fmr;
+ struct qib_lkey_table *rkt;
+ unsigned long flags;
+
+ list_for_each_entry(fmr, fmr_list, ibfmr.list) {
+ rkt = &to_idev(fmr->ibfmr.device)->lk_table;
+ spin_lock_irqsave(&rkt->lock, flags);
+ fmr->mr.user_base = 0;
+ fmr->mr.iova = 0;
+ fmr->mr.length = 0;
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ }
+ return 0;
+}
+
+/**
+ * qib_dealloc_fmr - deallocate a fast memory region
+ * @ibfmr: the fast memory region to deallocate
+ *
+ * Returns 0 on success.
+ */
+int qib_dealloc_fmr(struct ib_fmr *ibfmr)
+{
+ struct qib_fmr *fmr = to_ifmr(ibfmr);
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&fmr->mr);
+ qib_put_mr(&fmr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&fmr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&fmr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&fmr->mr);
+ kfree(fmr);
+out:
+ return ret;
+}
+
+void mr_rcu_callback(struct rcu_head *list)
+{
+ struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
+
+ complete(&mr->comp);
+}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
new file mode 100644
index 00000000000..61a0046efb7
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -0,0 +1,715 @@
+/*
+ * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <linux/aer.h>
+#include <linux/module.h>
+
+#include "qib.h"
+
+/*
+ * This file contains PCIe utility routines that are common to the
+ * various QLogic InfiniPath adapters
+ */
+
+/*
+ * Code to adjust PCIe capabilities.
+ * To minimize the change footprint, we call it
+ * from qib_pcie_params, which every chip-specific
+ * file calls, even though this violates some
+ * expectations of harmlessness.
+ */
+static void qib_tune_pcie_caps(struct qib_devdata *);
+static void qib_tune_pcie_coalesce(struct qib_devdata *);
+
+/*
+ * Do all the common PCIe setup and initialization.
+ * devdata is not yet allocated, and is not allocated until after this
+ * routine returns success. Therefore qib_dev_err() can't be used for error
+ * printing.
+ */
+int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int ret;
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ /*
+ * This can happen (in theory) iff:
+ * We did a chip reset, and then failed to reprogram the
+ * BAR, or the chip reset due to an internal error. We then
+ * unloaded the driver and reloaded it.
+ *
+ * Both reset cases set the BAR back to initial state. For
+ * the latter case, the AER sticky error bit at offset 0x718
+ * should be set, but the Linux kernel doesn't yet know
+ * about that, it appears. If the original BAR was retained
+ * in the kernel data structures, this may be OK.
+ */
+ qib_early_err(&pdev->dev, "pci enable failed: error %d\n",
+ -ret);
+ goto done;
+ }
+
+ ret = pci_request_regions(pdev, QIB_DRV_NAME);
+ if (ret) {
+ qib_devinfo(pdev, "pci_request_regions fails: err %d\n", -ret);
+ goto bail;
+ }
+
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (ret) {
+ /*
+ * If the 64 bit setup fails, try 32 bit. Some systems
+ * do not setup 64 bit maps on systems with 2GB or less
+ * memory installed.
+ */
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (ret) {
+ qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret);
+ goto bail;
+ }
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ } else
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (ret) {
+ qib_early_err(&pdev->dev,
+ "Unable to set DMA consistent mask: %d\n", ret);
+ goto bail;
+ }
+
+ pci_set_master(pdev);
+ ret = pci_enable_pcie_error_reporting(pdev);
+ if (ret) {
+ qib_early_err(&pdev->dev,
+ "Unable to enable pcie error reporting: %d\n",
+ ret);
+ ret = 0;
+ }
+ goto done;
+
+bail:
+ pci_disable_device(pdev);
+ pci_release_regions(pdev);
+done:
+ return ret;
+}
+
+/*
+ * Do remaining PCIe setup, once dd is allocated, and save away
+ * fields required to re-initialize after a chip reset, or for
+ * various other purposes
+ */
+int qib_pcie_ddinit(struct qib_devdata *dd, struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ unsigned long len;
+ resource_size_t addr;
+
+ dd->pcidev = pdev;
+ pci_set_drvdata(pdev, dd);
+
+ addr = pci_resource_start(pdev, 0);
+ len = pci_resource_len(pdev, 0);
+
+#if defined(__powerpc__)
+ /* There isn't a generic way to specify writethrough mappings */
+ dd->kregbase = __ioremap(addr, len, _PAGE_NO_CACHE | _PAGE_WRITETHRU);
+#else
+ dd->kregbase = ioremap_nocache(addr, len);
+#endif
+
+ if (!dd->kregbase)
+ return -ENOMEM;
+
+ dd->kregend = (u64 __iomem *)((void __iomem *) dd->kregbase + len);
+ dd->physaddr = addr; /* used for io_remap, etc. */
+
+ /*
+ * Save BARs to rewrite after device reset. Save all 64 bits of
+ * BAR, just in case.
+ */
+ dd->pcibar0 = addr;
+ dd->pcibar1 = addr >> 32;
+ dd->deviceid = ent->device; /* save for later use */
+ dd->vendorid = ent->vendor;
+
+ return 0;
+}
+
+/*
+ * Do PCIe cleanup, after chip-specific cleanup, etc. Just prior
+ * to releasing the dd memory.
+ * void because none of the core pcie cleanup returns are void
+ */
+void qib_pcie_ddcleanup(struct qib_devdata *dd)
+{
+ u64 __iomem *base = (void __iomem *) dd->kregbase;
+
+ dd->kregbase = NULL;
+ iounmap(base);
+ if (dd->piobase)
+ iounmap(dd->piobase);
+ if (dd->userbase)
+ iounmap(dd->userbase);
+ if (dd->piovl15base)
+ iounmap(dd->piovl15base);
+
+ pci_disable_device(dd->pcidev);
+ pci_release_regions(dd->pcidev);
+
+ pci_set_drvdata(dd->pcidev, NULL);
+}
+
+static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
+ struct qib_msix_entry *qib_msix_entry)
+{
+ int ret;
+ int nvec = *msixcnt;
+ struct msix_entry *msix_entry;
+ int i;
+
+ ret = pci_msix_vec_count(dd->pcidev);
+ if (ret < 0)
+ goto do_intx;
+
+ nvec = min(nvec, ret);
+
+ /* We can't pass qib_msix_entry array to qib_msix_setup
+ * so use a dummy msix_entry array and copy the allocated
+ * irq back to the qib_msix_entry array. */
+ msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL);
+ if (!msix_entry)
+ goto do_intx;
+
+ for (i = 0; i < nvec; i++)
+ msix_entry[i] = qib_msix_entry[i].msix;
+
+ ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
+ if (ret < 0)
+ goto free_msix_entry;
+ else
+ nvec = ret;
+
+ for (i = 0; i < nvec; i++)
+ qib_msix_entry[i].msix = msix_entry[i];
+
+ kfree(msix_entry);
+ *msixcnt = nvec;
+ return;
+
+free_msix_entry:
+ kfree(msix_entry);
+
+do_intx:
+ qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, "
+ "falling back to INTx\n", nvec, ret);
+ *msixcnt = 0;
+ qib_enable_intx(dd->pcidev);
+}
+
+/**
+ * We save the msi lo and hi values, so we can restore them after
+ * chip reset (the kernel PCI infrastructure doesn't yet handle that
+ * correctly.
+ */
+static int qib_msi_setup(struct qib_devdata *dd, int pos)
+{
+ struct pci_dev *pdev = dd->pcidev;
+ u16 control;
+ int ret;
+
+ ret = pci_enable_msi(pdev);
+ if (ret)
+ qib_dev_err(dd,
+ "pci_enable_msi failed: %d, interrupts may not work\n",
+ ret);
+ /* continue even if it fails, we may still be OK... */
+
+ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
+ &dd->msi_lo);
+ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI,
+ &dd->msi_hi);
+ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control);
+ /* now save the data (vector) info */
+ pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT)
+ ? 12 : 8),
+ &dd->msi_data);
+ return ret;
+}
+
+int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
+ struct qib_msix_entry *entry)
+{
+ u16 linkstat, speed;
+ int pos = 0, ret = 1;
+
+ if (!pci_is_pcie(dd->pcidev)) {
+ qib_dev_err(dd, "Can't find PCI Express capability!\n");
+ /* set up something... */
+ dd->lbus_width = 1;
+ dd->lbus_speed = 2500; /* Gen1, 2.5GHz */
+ goto bail;
+ }
+
+ pos = dd->pcidev->msix_cap;
+ if (nent && *nent && pos) {
+ qib_msix_setup(dd, pos, nent, entry);
+ ret = 0; /* did it, either MSIx or INTx */
+ } else {
+ pos = dd->pcidev->msi_cap;
+ if (pos)
+ ret = qib_msi_setup(dd, pos);
+ else
+ qib_dev_err(dd, "No PCI MSI or MSIx capability!\n");
+ }
+ if (!pos)
+ qib_enable_intx(dd->pcidev);
+
+ pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
+ /*
+ * speed is bits 0-3, linkwidth is bits 4-8
+ * no defines for them in headers
+ */
+ speed = linkstat & 0xf;
+ linkstat >>= 4;
+ linkstat &= 0x1f;
+ dd->lbus_width = linkstat;
+
+ switch (speed) {
+ case 1:
+ dd->lbus_speed = 2500; /* Gen1, 2.5GHz */
+ break;
+ case 2:
+ dd->lbus_speed = 5000; /* Gen1, 5GHz */
+ break;
+ default: /* not defined, assume gen1 */
+ dd->lbus_speed = 2500;
+ break;
+ }
+
+ /*
+ * Check against expected pcie width and complain if "wrong"
+ * on first initialization, not afterwards (i.e., reset).
+ */
+ if (minw && linkstat < minw)
+ qib_dev_err(dd,
+ "PCIe width %u (x%u HCA), performance reduced\n",
+ linkstat, minw);
+
+ qib_tune_pcie_caps(dd);
+
+ qib_tune_pcie_coalesce(dd);
+
+bail:
+ /* fill in string, even on errors */
+ snprintf(dd->lbus_info, sizeof(dd->lbus_info),
+ "PCIe,%uMHz,x%u\n", dd->lbus_speed, dd->lbus_width);
+ return ret;
+}
+
+/*
+ * Setup pcie interrupt stuff again after a reset. I'd like to just call
+ * pci_enable_msi() again for msi, but when I do that,
+ * the MSI enable bit doesn't get set in the command word, and
+ * we switch to to a different interrupt vector, which is confusing,
+ * so I instead just do it all inline. Perhaps somehow can tie this
+ * into the PCIe hotplug support at some point
+ */
+int qib_reinit_intr(struct qib_devdata *dd)
+{
+ int pos;
+ u16 control;
+ int ret = 0;
+
+ /* If we aren't using MSI, don't restore it */
+ if (!dd->msi_lo)
+ goto bail;
+
+ pos = dd->pcidev->msi_cap;
+ if (!pos) {
+ qib_dev_err(dd,
+ "Can't find MSI capability, can't restore MSI settings\n");
+ ret = 0;
+ /* nothing special for MSIx, just MSI */
+ goto bail;
+ }
+ pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO,
+ dd->msi_lo);
+ pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI,
+ dd->msi_hi);
+ pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control);
+ if (!(control & PCI_MSI_FLAGS_ENABLE)) {
+ control |= PCI_MSI_FLAGS_ENABLE;
+ pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS,
+ control);
+ }
+ /* now rewrite the data (vector) info */
+ pci_write_config_word(dd->pcidev, pos +
+ ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
+ dd->msi_data);
+ ret = 1;
+bail:
+ if (!ret && (dd->flags & QIB_HAS_INTX)) {
+ qib_enable_intx(dd->pcidev);
+ ret = 1;
+ }
+
+ /* and now set the pci master bit again */
+ pci_set_master(dd->pcidev);
+
+ return ret;
+}
+
+/*
+ * Disable msi interrupt if enabled, and clear msi_lo.
+ * This is used primarily for the fallback to INTx, but
+ * is also used in reinit after reset, and during cleanup.
+ */
+void qib_nomsi(struct qib_devdata *dd)
+{
+ dd->msi_lo = 0;
+ pci_disable_msi(dd->pcidev);
+}
+
+/*
+ * Same as qib_nosmi, but for MSIx.
+ */
+void qib_nomsix(struct qib_devdata *dd)
+{
+ pci_disable_msix(dd->pcidev);
+}
+
+/*
+ * Similar to pci_intx(pdev, 1), except that we make sure
+ * msi(x) is off.
+ */
+void qib_enable_intx(struct pci_dev *pdev)
+{
+ u16 cw, new;
+ int pos;
+
+ /* first, turn on INTx */
+ pci_read_config_word(pdev, PCI_COMMAND, &cw);
+ new = cw & ~PCI_COMMAND_INTX_DISABLE;
+ if (new != cw)
+ pci_write_config_word(pdev, PCI_COMMAND, new);
+
+ pos = pdev->msi_cap;
+ if (pos) {
+ /* then turn off MSI */
+ pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw);
+ new = cw & ~PCI_MSI_FLAGS_ENABLE;
+ if (new != cw)
+ pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new);
+ }
+ pos = pdev->msix_cap;
+ if (pos) {
+ /* then turn off MSIx */
+ pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw);
+ new = cw & ~PCI_MSIX_FLAGS_ENABLE;
+ if (new != cw)
+ pci_write_config_word(pdev, pos + PCI_MSIX_FLAGS, new);
+ }
+}
+
+/*
+ * These two routines are helper routines for the device reset code
+ * to move all the pcie code out of the chip-specific driver code.
+ */
+void qib_pcie_getcmd(struct qib_devdata *dd, u16 *cmd, u8 *iline, u8 *cline)
+{
+ pci_read_config_word(dd->pcidev, PCI_COMMAND, cmd);
+ pci_read_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline);
+ pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline);
+}
+
+void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
+{
+ int r;
+ r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+ dd->pcibar0);
+ if (r)
+ qib_dev_err(dd, "rewrite of BAR0 failed: %d\n", r);
+ r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+ dd->pcibar1);
+ if (r)
+ qib_dev_err(dd, "rewrite of BAR1 failed: %d\n", r);
+ /* now re-enable memory access, and restore cosmetic settings */
+ pci_write_config_word(dd->pcidev, PCI_COMMAND, cmd);
+ pci_write_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline);
+ pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline);
+ r = pci_enable_device(dd->pcidev);
+ if (r)
+ qib_dev_err(dd,
+ "pci_enable_device failed after reset: %d\n", r);
+}
+
+
+static int qib_pcie_coalesce;
+module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
+MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
+
+/*
+ * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
+ * chipsets. This is known to be unsafe for some revisions of some
+ * of these chipsets, with some BIOS settings, and enabling it on those
+ * systems may result in the system crashing, and/or data corruption.
+ */
+static void qib_tune_pcie_coalesce(struct qib_devdata *dd)
+{
+ int r;
+ struct pci_dev *parent;
+ u16 devid;
+ u32 mask, bits, val;
+
+ if (!qib_pcie_coalesce)
+ return;
+
+ /* Find out supported and configured values for parent (root) */
+ parent = dd->pcidev->bus->self;
+ if (parent->bus->parent) {
+ qib_devinfo(dd->pcidev, "Parent not root\n");
+ return;
+ }
+ if (!pci_is_pcie(parent))
+ return;
+ if (parent->vendor != 0x8086)
+ return;
+
+ /*
+ * - bit 12: Max_rdcmp_Imt_EN: need to set to 1
+ * - bit 11: COALESCE_FORCE: need to set to 0
+ * - bit 10: COALESCE_EN: need to set to 1
+ * (but limitations on some on some chipsets)
+ *
+ * On the Intel 5000, 5100, and 7300 chipsets, there is
+ * also: - bit 25:24: COALESCE_MODE, need to set to 0
+ */
+ devid = parent->device;
+ if (devid >= 0x25e2 && devid <= 0x25fa) {
+ /* 5000 P/V/X/Z */
+ if (parent->revision <= 0xb2)
+ bits = 1U << 10;
+ else
+ bits = 7U << 10;
+ mask = (3U << 24) | (7U << 10);
+ } else if (devid >= 0x65e2 && devid <= 0x65fa) {
+ /* 5100 */
+ bits = 1U << 10;
+ mask = (3U << 24) | (7U << 10);
+ } else if (devid >= 0x4021 && devid <= 0x402e) {
+ /* 5400 */
+ bits = 7U << 10;
+ mask = 7U << 10;
+ } else if (devid >= 0x3604 && devid <= 0x360a) {
+ /* 7300 */
+ bits = 7U << 10;
+ mask = (3U << 24) | (7U << 10);
+ } else {
+ /* not one of the chipsets that we know about */
+ return;
+ }
+ pci_read_config_dword(parent, 0x48, &val);
+ val &= ~mask;
+ val |= bits;
+ r = pci_write_config_dword(parent, 0x48, val);
+}
+
+/*
+ * BIOS may not set PCIe bus-utilization parameters for best performance.
+ * Check and optionally adjust them to maximize our throughput.
+ */
+static int qib_pcie_caps;
+module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
+MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
+
+static void qib_tune_pcie_caps(struct qib_devdata *dd)
+{
+ struct pci_dev *parent;
+ u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
+ u16 rc_mrrs, ep_mrrs, max_mrrs;
+
+ /* Find out supported and configured values for parent (root) */
+ parent = dd->pcidev->bus->self;
+ if (!pci_is_root_bus(parent->bus)) {
+ qib_devinfo(dd->pcidev, "Parent not root\n");
+ return;
+ }
+
+ if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
+ return;
+
+ rc_mpss = parent->pcie_mpss;
+ rc_mps = ffs(pcie_get_mps(parent)) - 8;
+ /* Find out supported and configured values for endpoint (us) */
+ ep_mpss = dd->pcidev->pcie_mpss;
+ ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8;
+
+ /* Find max payload supported by root, endpoint */
+ if (rc_mpss > ep_mpss)
+ rc_mpss = ep_mpss;
+
+ /* If Supported greater than limit in module param, limit it */
+ if (rc_mpss > (qib_pcie_caps & 7))
+ rc_mpss = qib_pcie_caps & 7;
+ /* If less than (allowed, supported), bump root payload */
+ if (rc_mpss > rc_mps) {
+ rc_mps = rc_mpss;
+ pcie_set_mps(parent, 128 << rc_mps);
+ }
+ /* If less than (allowed, supported), bump endpoint payload */
+ if (rc_mpss > ep_mps) {
+ ep_mps = rc_mpss;
+ pcie_set_mps(dd->pcidev, 128 << ep_mps);
+ }
+
+ /*
+ * Now the Read Request size.
+ * No field for max supported, but PCIe spec limits it to 4096,
+ * which is code '5' (log2(4096) - 7)
+ */
+ max_mrrs = 5;
+ if (max_mrrs > ((qib_pcie_caps >> 4) & 7))
+ max_mrrs = (qib_pcie_caps >> 4) & 7;
+
+ max_mrrs = 128 << max_mrrs;
+ rc_mrrs = pcie_get_readrq(parent);
+ ep_mrrs = pcie_get_readrq(dd->pcidev);
+
+ if (max_mrrs > rc_mrrs) {
+ rc_mrrs = max_mrrs;
+ pcie_set_readrq(parent, rc_mrrs);
+ }
+ if (max_mrrs > ep_mrrs) {
+ ep_mrrs = max_mrrs;
+ pcie_set_readrq(dd->pcidev, ep_mrrs);
+ }
+}
+/* End of PCIe capability tuning */
+
+/*
+ * From here through qib_pci_err_handler definition is invoked via
+ * PCI error infrastructure, registered via pci
+ */
+static pci_ers_result_t
+qib_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct qib_devdata *dd = pci_get_drvdata(pdev);
+ pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED;
+
+ switch (state) {
+ case pci_channel_io_normal:
+ qib_devinfo(pdev, "State Normal, ignoring\n");
+ break;
+
+ case pci_channel_io_frozen:
+ qib_devinfo(pdev, "State Frozen, requesting reset\n");
+ pci_disable_device(pdev);
+ ret = PCI_ERS_RESULT_NEED_RESET;
+ break;
+
+ case pci_channel_io_perm_failure:
+ qib_devinfo(pdev, "State Permanent Failure, disabling\n");
+ if (dd) {
+ /* no more register accesses! */
+ dd->flags &= ~QIB_PRESENT;
+ qib_disable_after_error(dd);
+ }
+ /* else early, or other problem */
+ ret = PCI_ERS_RESULT_DISCONNECT;
+ break;
+
+ default: /* shouldn't happen */
+ qib_devinfo(pdev, "QIB PCI errors detected (state %d)\n",
+ state);
+ break;
+ }
+ return ret;
+}
+
+static pci_ers_result_t
+qib_pci_mmio_enabled(struct pci_dev *pdev)
+{
+ u64 words = 0U;
+ struct qib_devdata *dd = pci_get_drvdata(pdev);
+ pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED;
+
+ if (dd && dd->pport) {
+ words = dd->f_portcntr(dd->pport, QIBPORTCNTR_WORDRCV);
+ if (words == ~0ULL)
+ ret = PCI_ERS_RESULT_NEED_RESET;
+ }
+ qib_devinfo(pdev,
+ "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ words, ret);
+ return ret;
+}
+
+static pci_ers_result_t
+qib_pci_slot_reset(struct pci_dev *pdev)
+{
+ qib_devinfo(pdev, "QIB slot_reset function called, ignored\n");
+ return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static pci_ers_result_t
+qib_pci_link_reset(struct pci_dev *pdev)
+{
+ qib_devinfo(pdev, "QIB link_reset function called, ignored\n");
+ return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static void
+qib_pci_resume(struct pci_dev *pdev)
+{
+ struct qib_devdata *dd = pci_get_drvdata(pdev);
+ qib_devinfo(pdev, "QIB resume function called\n");
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+ /*
+ * Running jobs will fail, since it's asynchronous
+ * unlike sysfs-requested reset. Better than
+ * doing nothing.
+ */
+ qib_init(dd, 1); /* same as re-init after reset */
+}
+
+const struct pci_error_handlers qib_pci_err_handler = {
+ .error_detected = qib_pci_error_detected,
+ .mmio_enabled = qib_pci_mmio_enabled,
+ .link_reset = qib_pci_link_reset,
+ .slot_reset = qib_pci_slot_reset,
+ .resume = qib_pci_resume,
+};
diff --git a/drivers/infiniband/hw/qib/qib_pio_copy.c b/drivers/infiniband/hw/qib/qib_pio_copy.c
new file mode 100644
index 00000000000..10b8c444dd3
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_pio_copy.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2009 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "qib.h"
+
+/**
+ * qib_pio_copy - copy data to MMIO space, in multiples of 32-bits
+ * @to: destination, in MMIO space (must be 64-bit aligned)
+ * @from: source (must be 64-bit aligned)
+ * @count: number of 32-bit quantities to copy
+ *
+ * Copy data from kernel space to MMIO space, in multiples of 32 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ */
+void qib_pio_copy(void __iomem *to, const void *from, size_t count)
+{
+#ifdef CONFIG_64BIT
+ u64 __iomem *dst = to;
+ const u64 *src = from;
+ const u64 *end = src + (count >> 1);
+
+ while (src < end)
+ __raw_writeq(*src++, dst++);
+ if (count & 1)
+ __raw_writel(*(const u32 *)src, dst);
+#else
+ u32 __iomem *dst = to;
+ const u32 *src = from;
+ const u32 *end = src + count;
+
+ while (src < end)
+ __raw_writel(*src++, dst++);
+#endif
+}
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
new file mode 100644
index 00000000000..7fcc150d603
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -0,0 +1,1384 @@
+/*
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/jhash.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/seq_file.h>
+#endif
+
+#include "qib.h"
+
+#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
+#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
+
+static inline unsigned mk_qpn(struct qib_qpn_table *qpt,
+ struct qpn_map *map, unsigned off)
+{
+ return (map - qpt->map) * BITS_PER_PAGE + off;
+}
+
+static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
+ struct qpn_map *map, unsigned off,
+ unsigned n)
+{
+ if (qpt->mask) {
+ off++;
+ if (((off & qpt->mask) >> 1) >= n)
+ off = (off | qpt->mask) + 2;
+ } else
+ off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
+ return off;
+}
+
+/*
+ * Convert the AETH credit code into the number of credits.
+ */
+static u32 credit_table[31] = {
+ 0, /* 0 */
+ 1, /* 1 */
+ 2, /* 2 */
+ 3, /* 3 */
+ 4, /* 4 */
+ 6, /* 5 */
+ 8, /* 6 */
+ 12, /* 7 */
+ 16, /* 8 */
+ 24, /* 9 */
+ 32, /* A */
+ 48, /* B */
+ 64, /* C */
+ 96, /* D */
+ 128, /* E */
+ 192, /* F */
+ 256, /* 10 */
+ 384, /* 11 */
+ 512, /* 12 */
+ 768, /* 13 */
+ 1024, /* 14 */
+ 1536, /* 15 */
+ 2048, /* 16 */
+ 3072, /* 17 */
+ 4096, /* 18 */
+ 6144, /* 19 */
+ 8192, /* 1A */
+ 12288, /* 1B */
+ 16384, /* 1C */
+ 24576, /* 1D */
+ 32768 /* 1E */
+};
+
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+{
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
+
+ /*
+ * Free the page if someone raced with us installing it.
+ */
+
+ spin_lock(&qpt->lock);
+ if (map->page)
+ free_page(page);
+ else
+ map->page = (void *)page;
+ spin_unlock(&qpt->lock);
+}
+
+/*
+ * Allocate the next available QPN or
+ * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
+ */
+static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
+ enum ib_qp_type type, u8 port)
+{
+ u32 i, offset, max_scan, qpn;
+ struct qpn_map *map;
+ u32 ret;
+
+ if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
+ unsigned n;
+
+ ret = type == IB_QPT_GSI;
+ n = 1 << (ret + 2 * (port - 1));
+ spin_lock(&qpt->lock);
+ if (qpt->flags & n)
+ ret = -EINVAL;
+ else
+ qpt->flags |= n;
+ spin_unlock(&qpt->lock);
+ goto bail;
+ }
+
+ qpn = qpt->last + 2;
+ if (qpn >= QPN_MAX)
+ qpn = 2;
+ if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
+ qpn = (qpn | qpt->mask) + 2;
+ offset = qpn & BITS_PER_PAGE_MASK;
+ map = &qpt->map[qpn / BITS_PER_PAGE];
+ max_scan = qpt->nmaps - !offset;
+ for (i = 0;;) {
+ if (unlikely(!map->page)) {
+ get_map_page(qpt, map);
+ if (unlikely(!map->page))
+ break;
+ }
+ do {
+ if (!test_and_set_bit(offset, map->page)) {
+ qpt->last = qpn;
+ ret = qpn;
+ goto bail;
+ }
+ offset = find_next_offset(qpt, map, offset,
+ dd->n_krcv_queues);
+ qpn = mk_qpn(qpt, map, offset);
+ /*
+ * This test differs from alloc_pidmap().
+ * If find_next_offset() does find a zero
+ * bit, we don't need to check for QPN
+ * wrapping around past our starting QPN.
+ * We just need to be sure we don't loop
+ * forever.
+ */
+ } while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
+ /*
+ * In order to keep the number of pages allocated to a
+ * minimum, we scan the all existing pages before increasing
+ * the size of the bitmap table.
+ */
+ if (++i > max_scan) {
+ if (qpt->nmaps == QPNMAP_ENTRIES)
+ break;
+ map = &qpt->map[qpt->nmaps++];
+ offset = 0;
+ } else if (map < &qpt->map[qpt->nmaps]) {
+ ++map;
+ offset = 0;
+ } else {
+ map = &qpt->map[0];
+ offset = 2;
+ }
+ qpn = mk_qpn(qpt, map, offset);
+ }
+
+ ret = -ENOMEM;
+
+bail:
+ return ret;
+}
+
+static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
+{
+ struct qpn_map *map;
+
+ map = qpt->map + qpn / BITS_PER_PAGE;
+ if (map->page)
+ clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
+}
+
+static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
+{
+ return jhash_1word(qpn, dev->qp_rnd) &
+ (dev->qp_table_size - 1);
+}
+
+
+/*
+ * Put the QP into the hash table.
+ * The hash table holds a reference to the QP.
+ */
+static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
+{
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ unsigned long flags;
+ unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
+
+ atomic_inc(&qp->refcount);
+ spin_lock_irqsave(&dev->qpt_lock, flags);
+
+ if (qp->ibqp.qp_num == 0)
+ rcu_assign_pointer(ibp->qp0, qp);
+ else if (qp->ibqp.qp_num == 1)
+ rcu_assign_pointer(ibp->qp1, qp);
+ else {
+ qp->next = dev->qp_table[n];
+ rcu_assign_pointer(dev->qp_table[n], qp);
+ }
+
+ spin_unlock_irqrestore(&dev->qpt_lock, flags);
+}
+
+/*
+ * Remove the QP from the table so it can't be found asynchronously by
+ * the receive interrupt routine.
+ */
+static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
+{
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
+ unsigned long flags;
+ int removed = 1;
+
+ spin_lock_irqsave(&dev->qpt_lock, flags);
+
+ if (rcu_dereference_protected(ibp->qp0,
+ lockdep_is_held(&dev->qpt_lock)) == qp) {
+ rcu_assign_pointer(ibp->qp0, NULL);
+ } else if (rcu_dereference_protected(ibp->qp1,
+ lockdep_is_held(&dev->qpt_lock)) == qp) {
+ rcu_assign_pointer(ibp->qp1, NULL);
+ } else {
+ struct qib_qp *q;
+ struct qib_qp __rcu **qpp;
+
+ removed = 0;
+ qpp = &dev->qp_table[n];
+ for (; (q = rcu_dereference_protected(*qpp,
+ lockdep_is_held(&dev->qpt_lock))) != NULL;
+ qpp = &q->next)
+ if (q == qp) {
+ rcu_assign_pointer(*qpp,
+ rcu_dereference_protected(qp->next,
+ lockdep_is_held(&dev->qpt_lock)));
+ removed = 1;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&dev->qpt_lock, flags);
+ if (removed) {
+ synchronize_rcu();
+ atomic_dec(&qp->refcount);
+ }
+}
+
+/**
+ * qib_free_all_qps - check for QPs still in use
+ * @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
+ */
+unsigned qib_free_all_qps(struct qib_devdata *dd)
+{
+ struct qib_ibdev *dev = &dd->verbs_dev;
+ unsigned long flags;
+ struct qib_qp *qp;
+ unsigned n, qp_inuse = 0;
+
+ for (n = 0; n < dd->num_pports; n++) {
+ struct qib_ibport *ibp = &dd->pport[n].ibport_data;
+
+ if (!qib_mcast_tree_empty(ibp))
+ qp_inuse++;
+ rcu_read_lock();
+ if (rcu_dereference(ibp->qp0))
+ qp_inuse++;
+ if (rcu_dereference(ibp->qp1))
+ qp_inuse++;
+ rcu_read_unlock();
+ }
+
+ spin_lock_irqsave(&dev->qpt_lock, flags);
+ for (n = 0; n < dev->qp_table_size; n++) {
+ qp = rcu_dereference_protected(dev->qp_table[n],
+ lockdep_is_held(&dev->qpt_lock));
+ rcu_assign_pointer(dev->qp_table[n], NULL);
+
+ for (; qp; qp = rcu_dereference_protected(qp->next,
+ lockdep_is_held(&dev->qpt_lock)))
+ qp_inuse++;
+ }
+ spin_unlock_irqrestore(&dev->qpt_lock, flags);
+ synchronize_rcu();
+
+ return qp_inuse;
+}
+
+/**
+ * qib_lookup_qpn - return the QP with the given QPN
+ * @qpt: the QP table
+ * @qpn: the QP number to look up
+ *
+ * The caller is responsible for decrementing the QP reference count
+ * when done.
+ */
+struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
+{
+ struct qib_qp *qp = NULL;
+
+ rcu_read_lock();
+ if (unlikely(qpn <= 1)) {
+ if (qpn == 0)
+ qp = rcu_dereference(ibp->qp0);
+ else
+ qp = rcu_dereference(ibp->qp1);
+ if (qp)
+ atomic_inc(&qp->refcount);
+ } else {
+ struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
+ unsigned n = qpn_hash(dev, qpn);
+
+ for (qp = rcu_dereference(dev->qp_table[n]); qp;
+ qp = rcu_dereference(qp->next))
+ if (qp->ibqp.qp_num == qpn) {
+ atomic_inc(&qp->refcount);
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return qp;
+}
+
+/**
+ * qib_reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ */
+static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type)
+{
+ qp->remote_qpn = 0;
+ qp->qkey = 0;
+ qp->qp_access_flags = 0;
+ atomic_set(&qp->s_dma_busy, 0);
+ qp->s_flags &= QIB_S_SIGNAL_REQ_WR;
+ qp->s_hdrwords = 0;
+ qp->s_wqe = NULL;
+ qp->s_draining = 0;
+ qp->s_next_psn = 0;
+ qp->s_last_psn = 0;
+ qp->s_sending_psn = 0;
+ qp->s_sending_hpsn = 0;
+ qp->s_psn = 0;
+ qp->r_psn = 0;
+ qp->r_msn = 0;
+ if (type == IB_QPT_RC) {
+ qp->s_state = IB_OPCODE_RC_SEND_LAST;
+ qp->r_state = IB_OPCODE_RC_SEND_LAST;
+ } else {
+ qp->s_state = IB_OPCODE_UC_SEND_LAST;
+ qp->r_state = IB_OPCODE_UC_SEND_LAST;
+ }
+ qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+ qp->r_nak_state = 0;
+ qp->r_aflags = 0;
+ qp->r_flags = 0;
+ qp->s_head = 0;
+ qp->s_tail = 0;
+ qp->s_cur = 0;
+ qp->s_acked = 0;
+ qp->s_last = 0;
+ qp->s_ssn = 1;
+ qp->s_lsn = 0;
+ qp->s_mig_state = IB_MIG_MIGRATED;
+ memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+ qp->r_head_ack_queue = 0;
+ qp->s_tail_ack_queue = 0;
+ qp->s_num_rd_atomic = 0;
+ if (qp->r_rq.wq) {
+ qp->r_rq.wq->head = 0;
+ qp->r_rq.wq->tail = 0;
+ }
+ qp->r_sge.num_sge = 0;
+}
+
+static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
+{
+ unsigned n;
+
+ if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+ qib_put_ss(&qp->s_rdma_read_sge);
+
+ qib_put_ss(&qp->r_sge);
+
+ if (clr_sends) {
+ while (qp->s_last != qp->s_head) {
+ struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
+ unsigned i;
+
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct qib_sge *sge = &wqe->sg_list[i];
+
+ qib_put_mr(sge->mr);
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ }
+ if (qp->s_rdma_mr) {
+ qib_put_mr(qp->s_rdma_mr);
+ qp->s_rdma_mr = NULL;
+ }
+ }
+
+ if (qp->ibqp.qp_type != IB_QPT_RC)
+ return;
+
+ for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
+ struct qib_ack_entry *e = &qp->s_ack_queue[n];
+
+ if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
+ e->rdma_sge.mr) {
+ qib_put_mr(e->rdma_sge.mr);
+ e->rdma_sge.mr = NULL;
+ }
+ }
+}
+
+/**
+ * qib_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
+ * @err: the receive completion error to signal if a RWQE is active
+ *
+ * Flushes both send and receive work queues.
+ * Returns true if last WQE event should be generated.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
+ */
+int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
+{
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+ struct ib_wc wc;
+ int ret = 0;
+
+ if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
+ goto bail;
+
+ qp->state = IB_QPS_ERR;
+
+ if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
+ qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ }
+
+ if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
+ qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
+
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
+ qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
+ list_del_init(&qp->iowait);
+ }
+ spin_unlock(&dev->pending_lock);
+
+ if (!(qp->s_flags & QIB_S_BUSY)) {
+ qp->s_hdrwords = 0;
+ if (qp->s_rdma_mr) {
+ qib_put_mr(qp->s_rdma_mr);
+ qp->s_rdma_mr = NULL;
+ }
+ if (qp->s_tx) {
+ qib_put_txreq(qp->s_tx);
+ qp->s_tx = NULL;
+ }
+ }
+
+ /* Schedule the sending tasklet to drain the send work queue. */
+ if (qp->s_last != qp->s_head)
+ qib_schedule_send(qp);
+
+ clear_mr_refs(qp, 0);
+
+ memset(&wc, 0, sizeof(wc));
+ wc.qp = &qp->ibqp;
+ wc.opcode = IB_WC_RECV;
+
+ if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) {
+ wc.wr_id = qp->r_wr_id;
+ wc.status = err;
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wc.status = IB_WC_WR_FLUSH_ERR;
+
+ if (qp->r_rq.wq) {
+ struct qib_rwq *wq;
+ u32 head;
+ u32 tail;
+
+ spin_lock(&qp->r_rq.lock);
+
+ /* sanity check pointers before trusting them */
+ wq = qp->r_rq.wq;
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ while (tail != head) {
+ wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+ if (++tail >= qp->r_rq.size)
+ tail = 0;
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ }
+ wq->tail = tail;
+
+ spin_unlock(&qp->r_rq.lock);
+ } else if (qp->ibqp.event_handler)
+ ret = 1;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_modify_qp - modify the attributes of a queue pair
+ * @ibqp: the queue pair who's attributes we're modifying
+ * @attr: the new attributes
+ * @attr_mask: the mask of attributes to modify
+ * @udata: user data for libibverbs.so
+ *
+ * Returns 0 on success, otherwise returns an errno.
+ */
+int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct qib_ibdev *dev = to_idev(ibqp->device);
+ struct qib_qp *qp = to_iqp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ struct ib_event ev;
+ int lastwqe = 0;
+ int mig = 0;
+ int ret;
+ u32 pmtu = 0; /* for gcc warning only */
+
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_lock);
+
+ cur_state = attr_mask & IB_QP_CUR_STATE ?
+ attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
+ goto inval;
+
+ if (attr_mask & IB_QP_AV) {
+ if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
+ goto inval;
+ if (qib_check_ah(qp->ibqp.device, &attr->ah_attr))
+ goto inval;
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
+ goto inval;
+ if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
+ goto inval;
+ if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
+ goto inval;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
+ goto inval;
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER)
+ if (attr->min_rnr_timer > 31)
+ goto inval;
+
+ if (attr_mask & IB_QP_PORT)
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI ||
+ attr->port_num == 0 ||
+ attr->port_num > ibqp->device->phys_port_cnt)
+ goto inval;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ if (attr->dest_qp_num > QIB_QPN_MASK)
+ goto inval;
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ if (attr->retry_cnt > 7)
+ goto inval;
+
+ if (attr_mask & IB_QP_RNR_RETRY)
+ if (attr->rnr_retry > 7)
+ goto inval;
+
+ /*
+ * Don't allow invalid path_mtu values. OK to set greater
+ * than the active mtu (or even the max_cap, if we have tuned
+ * that to a small mtu. We'll set qp->path_mtu
+ * to the lesser of requested attribute mtu and active,
+ * for packetizing messages.
+ * Note that the QP port has to be set in INIT and MTU in RTR.
+ */
+ if (attr_mask & IB_QP_PATH_MTU) {
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int mtu, pidx = qp->port_num - 1;
+
+ mtu = ib_mtu_enum_to_int(attr->path_mtu);
+ if (mtu == -1)
+ goto inval;
+ if (mtu > dd->pport[pidx].ibmtu) {
+ switch (dd->pport[pidx].ibmtu) {
+ case 4096:
+ pmtu = IB_MTU_4096;
+ break;
+ case 2048:
+ pmtu = IB_MTU_2048;
+ break;
+ case 1024:
+ pmtu = IB_MTU_1024;
+ break;
+ case 512:
+ pmtu = IB_MTU_512;
+ break;
+ case 256:
+ pmtu = IB_MTU_256;
+ break;
+ default:
+ pmtu = IB_MTU_2048;
+ }
+ } else
+ pmtu = attr->path_mtu;
+ }
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE) {
+ if (attr->path_mig_state == IB_MIG_REARM) {
+ if (qp->s_mig_state == IB_MIG_ARMED)
+ goto inval;
+ if (new_state != IB_QPS_RTS)
+ goto inval;
+ } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
+ if (qp->s_mig_state == IB_MIG_REARM)
+ goto inval;
+ if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
+ goto inval;
+ if (qp->s_mig_state == IB_MIG_ARMED)
+ mig = 1;
+ } else
+ goto inval;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC)
+ goto inval;
+
+ switch (new_state) {
+ case IB_QPS_RESET:
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->iowait))
+ list_del_init(&qp->iowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irq(&qp->r_lock);
+ /* Stop the sending work queue and retry timer */
+ cancel_work_sync(&qp->s_work);
+ del_timer_sync(&qp->s_timer);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ if (qp->s_tx) {
+ qib_put_txreq(qp->s_tx);
+ qp->s_tx = NULL;
+ }
+ remove_qp(dev, qp);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_lock);
+ clear_mr_refs(qp, 1);
+ qib_reset_qp(qp, ibqp->qp_type);
+ }
+ break;
+
+ case IB_QPS_RTR:
+ /* Allow event to retrigger if QP set to RTR more than once */
+ qp->r_flags &= ~QIB_R_COMM_EST;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQD:
+ qp->s_draining = qp->s_last != qp->s_cur;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_SQE:
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ goto inval;
+ qp->state = new_state;
+ break;
+
+ case IB_QPS_ERR:
+ lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ break;
+
+ default:
+ qp->state = new_state;
+ break;
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX)
+ qp->s_pkey_index = attr->pkey_index;
+
+ if (attr_mask & IB_QP_PORT)
+ qp->port_num = attr->port_num;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ qp->remote_qpn = attr->dest_qp_num;
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK;
+ qp->s_psn = qp->s_next_psn;
+ qp->s_sending_psn = qp->s_next_psn;
+ qp->s_last_psn = qp->s_next_psn - 1;
+ qp->s_sending_hpsn = qp->s_last_psn;
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN)
+ qp->r_psn = attr->rq_psn & QIB_PSN_MASK;
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ qp->qp_access_flags = attr->qp_access_flags;
+
+ if (attr_mask & IB_QP_AV) {
+ qp->remote_ah_attr = attr->ah_attr;
+ qp->s_srate = attr->ah_attr.static_rate;
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ qp->alt_ah_attr = attr->alt_ah_attr;
+ qp->s_alt_pkey_index = attr->alt_pkey_index;
+ }
+
+ if (attr_mask & IB_QP_PATH_MIG_STATE) {
+ qp->s_mig_state = attr->path_mig_state;
+ if (mig) {
+ qp->remote_ah_attr = qp->alt_ah_attr;
+ qp->port_num = qp->alt_ah_attr.port_num;
+ qp->s_pkey_index = qp->s_alt_pkey_index;
+ }
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ qp->path_mtu = pmtu;
+ qp->pmtu = ib_mtu_enum_to_int(pmtu);
+ }
+
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ qp->s_retry_cnt = attr->retry_cnt;
+ qp->s_retry = attr->retry_cnt;
+ }
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ qp->s_rnr_retry_cnt = attr->rnr_retry;
+ qp->s_rnr_retry = attr->rnr_retry;
+ }
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER)
+ qp->r_min_rnr_timer = attr->min_rnr_timer;
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ qp->timeout = attr->timeout;
+ qp->timeout_jiffies =
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+ 1000UL);
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ qp->qkey = attr->qkey;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ qp->s_max_rd_atomic = attr->max_rd_atomic;
+
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irq(&qp->r_lock);
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ insert_qp(dev, qp);
+
+ if (lastwqe) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ if (mig) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_PATH_MIG;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ ret = 0;
+ goto bail;
+
+inval:
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irq(&qp->r_lock);
+ ret = -EINVAL;
+
+bail:
+ return ret;
+}
+
+int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+
+ attr->qp_state = qp->state;
+ attr->cur_qp_state = attr->qp_state;
+ attr->path_mtu = qp->path_mtu;
+ attr->path_mig_state = qp->s_mig_state;
+ attr->qkey = qp->qkey;
+ attr->rq_psn = qp->r_psn & QIB_PSN_MASK;
+ attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK;
+ attr->dest_qp_num = qp->remote_qpn;
+ attr->qp_access_flags = qp->qp_access_flags;
+ attr->cap.max_send_wr = qp->s_size - 1;
+ attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
+ attr->cap.max_send_sge = qp->s_max_sge;
+ attr->cap.max_recv_sge = qp->r_rq.max_sge;
+ attr->cap.max_inline_data = 0;
+ attr->ah_attr = qp->remote_ah_attr;
+ attr->alt_ah_attr = qp->alt_ah_attr;
+ attr->pkey_index = qp->s_pkey_index;
+ attr->alt_pkey_index = qp->s_alt_pkey_index;
+ attr->en_sqd_async_notify = 0;
+ attr->sq_draining = qp->s_draining;
+ attr->max_rd_atomic = qp->s_max_rd_atomic;
+ attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
+ attr->min_rnr_timer = qp->r_min_rnr_timer;
+ attr->port_num = qp->port_num;
+ attr->timeout = qp->timeout;
+ attr->retry_cnt = qp->s_retry_cnt;
+ attr->rnr_retry = qp->s_rnr_retry_cnt;
+ attr->alt_port_num = qp->alt_ah_attr.port_num;
+ attr->alt_timeout = qp->alt_timeout;
+
+ init_attr->event_handler = qp->ibqp.event_handler;
+ init_attr->qp_context = qp->ibqp.qp_context;
+ init_attr->send_cq = qp->ibqp.send_cq;
+ init_attr->recv_cq = qp->ibqp.recv_cq;
+ init_attr->srq = qp->ibqp.srq;
+ init_attr->cap = attr->cap;
+ if (qp->s_flags & QIB_S_SIGNAL_REQ_WR)
+ init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+ else
+ init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->qp_type = qp->ibqp.qp_type;
+ init_attr->port_num = qp->port_num;
+ return 0;
+}
+
+/**
+ * qib_compute_aeth - compute the AETH (syndrome + MSN)
+ * @qp: the queue pair to compute the AETH for
+ *
+ * Returns the AETH.
+ */
+__be32 qib_compute_aeth(struct qib_qp *qp)
+{
+ u32 aeth = qp->r_msn & QIB_MSN_MASK;
+
+ if (qp->ibqp.srq) {
+ /*
+ * Shared receive queues don't generate credits.
+ * Set the credit field to the invalid value.
+ */
+ aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT;
+ } else {
+ u32 min, max, x;
+ u32 credits;
+ struct qib_rwq *wq = qp->r_rq.wq;
+ u32 head;
+ u32 tail;
+
+ /* sanity check pointers before trusting them */
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ /*
+ * Compute the number of credits available (RWQEs).
+ * XXX Not holding the r_rq.lock here so there is a small
+ * chance that the pair of reads are not atomic.
+ */
+ credits = head - tail;
+ if ((int)credits < 0)
+ credits += qp->r_rq.size;
+ /*
+ * Binary search the credit table to find the code to
+ * use.
+ */
+ min = 0;
+ max = 31;
+ for (;;) {
+ x = (min + max) / 2;
+ if (credit_table[x] == credits)
+ break;
+ if (credit_table[x] > credits)
+ max = x;
+ else if (min == x)
+ break;
+ else
+ min = x;
+ }
+ aeth |= x << QIB_AETH_CREDIT_SHIFT;
+ }
+ return cpu_to_be32(aeth);
+}
+
+/**
+ * qib_create_qp - create a queue pair for a device
+ * @ibpd: the protection domain who's device we create the queue pair for
+ * @init_attr: the attributes of the queue pair
+ * @udata: user data for libibverbs.so
+ *
+ * Returns the queue pair on success, otherwise returns an errno.
+ *
+ * Called by the ib_create_qp() core verbs function.
+ */
+struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct qib_qp *qp;
+ int err;
+ struct qib_swqe *swq = NULL;
+ struct qib_ibdev *dev;
+ struct qib_devdata *dd;
+ size_t sz;
+ size_t sg_list_sz;
+ struct ib_qp *ret;
+
+ if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
+ init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+ init_attr->create_flags) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ /* Check receive queue parameters if no SRQ is specified. */
+ if (!init_attr->srq) {
+ if (init_attr->cap.max_recv_sge > ib_qib_max_sges ||
+ init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ if (init_attr->cap.max_send_sge +
+ init_attr->cap.max_send_wr +
+ init_attr->cap.max_recv_sge +
+ init_attr->cap.max_recv_wr == 0) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ }
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ if (init_attr->port_num == 0 ||
+ init_attr->port_num > ibpd->device->phys_port_cnt) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ case IB_QPT_UC:
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ sz = sizeof(struct qib_sge) *
+ init_attr->cap.max_send_sge +
+ sizeof(struct qib_swqe);
+ swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+ if (swq == NULL) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+ sz = sizeof(*qp);
+ sg_list_sz = 0;
+ if (init_attr->srq) {
+ struct qib_srq *srq = to_isrq(init_attr->srq);
+
+ if (srq->rq.max_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (srq->rq.max_sge - 1);
+ } else if (init_attr->cap.max_recv_sge > 1)
+ sg_list_sz = sizeof(*qp->r_sg_list) *
+ (init_attr->cap.max_recv_sge - 1);
+ qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+ if (!qp) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_swq;
+ }
+ RCU_INIT_POINTER(qp->next, NULL);
+ qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+ if (!qp->s_hdr) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
+ qp->timeout_jiffies =
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+ 1000UL);
+ if (init_attr->srq)
+ sz = 0;
+ else {
+ qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
+ qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
+ sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
+ sizeof(struct qib_rwqe);
+ qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz);
+ if (!qp->r_rq.wq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_qp;
+ }
+ }
+
+ /*
+ * ib_create_qp() will initialize qp->ibqp
+ * except for qp->ibqp.qp_num.
+ */
+ spin_lock_init(&qp->r_lock);
+ spin_lock_init(&qp->s_lock);
+ spin_lock_init(&qp->r_rq.lock);
+ atomic_set(&qp->refcount, 0);
+ init_waitqueue_head(&qp->wait);
+ init_waitqueue_head(&qp->wait_dma);
+ init_timer(&qp->s_timer);
+ qp->s_timer.data = (unsigned long)qp;
+ INIT_WORK(&qp->s_work, qib_do_send);
+ INIT_LIST_HEAD(&qp->iowait);
+ INIT_LIST_HEAD(&qp->rspwait);
+ qp->state = IB_QPS_RESET;
+ qp->s_wq = swq;
+ qp->s_size = init_attr->cap.max_send_wr + 1;
+ qp->s_max_sge = init_attr->cap.max_send_sge;
+ if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+ qp->s_flags = QIB_S_SIGNAL_REQ_WR;
+ dev = to_idev(ibpd->device);
+ dd = dd_from_dev(dev);
+ err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
+ init_attr->port_num);
+ if (err < 0) {
+ ret = ERR_PTR(err);
+ vfree(qp->r_rq.wq);
+ goto bail_qp;
+ }
+ qp->ibqp.qp_num = err;
+ qp->port_num = init_attr->port_num;
+ qib_reset_qp(qp, init_attr->qp_type);
+ break;
+
+ default:
+ /* Don't support raw QPs */
+ ret = ERR_PTR(-ENOSYS);
+ goto bail;
+ }
+
+ init_attr->cap.max_inline_data = 0;
+
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See qib_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ if (!qp->r_rq.wq) {
+ __u64 offset = 0;
+
+ err = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else {
+ u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz;
+
+ qp->ip = qib_create_mmap_info(dev, s,
+ ibpd->uobject->context,
+ qp->r_rq.wq);
+ if (!qp->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ err = ib_copy_to_udata(udata, &(qp->ip->offset),
+ sizeof(qp->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ }
+ }
+
+ spin_lock(&dev->n_qps_lock);
+ if (dev->n_qps_allocated == ib_qib_max_qps) {
+ spin_unlock(&dev->n_qps_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_qps_allocated++;
+ spin_unlock(&dev->n_qps_lock);
+
+ if (qp->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ ret = &qp->ibqp;
+ goto bail;
+
+bail_ip:
+ if (qp->ip)
+ kref_put(&qp->ip->ref, qib_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
+ free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
+bail_qp:
+ kfree(qp->s_hdr);
+ kfree(qp);
+bail_swq:
+ vfree(swq);
+bail:
+ return ret;
+}
+
+/**
+ * qib_destroy_qp - destroy a queue pair
+ * @ibqp: the queue pair to destroy
+ *
+ * Returns 0 on success.
+ *
+ * Note that this can be called while the QP is actively sending or
+ * receiving!
+ */
+int qib_destroy_qp(struct ib_qp *ibqp)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ struct qib_ibdev *dev = to_idev(ibqp->device);
+
+ /* Make sure HW and driver activity is stopped. */
+ spin_lock_irq(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+ spin_lock(&dev->pending_lock);
+ if (!list_empty(&qp->iowait))
+ list_del_init(&qp->iowait);
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
+ spin_unlock_irq(&qp->s_lock);
+ cancel_work_sync(&qp->s_work);
+ del_timer_sync(&qp->s_timer);
+ wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
+ if (qp->s_tx) {
+ qib_put_txreq(qp->s_tx);
+ qp->s_tx = NULL;
+ }
+ remove_qp(dev, qp);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+ clear_mr_refs(qp, 1);
+ } else
+ spin_unlock_irq(&qp->s_lock);
+
+ /* all user's cleaned up, mark it available */
+ free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
+ spin_lock(&dev->n_qps_lock);
+ dev->n_qps_allocated--;
+ spin_unlock(&dev->n_qps_lock);
+
+ if (qp->ip)
+ kref_put(&qp->ip->ref, qib_release_mmap_info);
+ else
+ vfree(qp->r_rq.wq);
+ vfree(qp->s_wq);
+ kfree(qp->s_hdr);
+ kfree(qp);
+ return 0;
+}
+
+/**
+ * qib_init_qpn_table - initialize the QP number table for a device
+ * @qpt: the QPN table
+ */
+void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt)
+{
+ spin_lock_init(&qpt->lock);
+ qpt->last = 1; /* start with QPN 2 */
+ qpt->nmaps = 1;
+ qpt->mask = dd->qpn_mask;
+}
+
+/**
+ * qib_free_qpn_table - free the QP number table for a device
+ * @qpt: the QPN table
+ */
+void qib_free_qpn_table(struct qib_qpn_table *qpt)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
+ if (qpt->map[i].page)
+ free_page((unsigned long) qpt->map[i].page);
+}
+
+/**
+ * qib_get_credit - flush the send work queue of a QP
+ * @qp: the qp who's send work queue to flush
+ * @aeth: the Acknowledge Extended Transport Header
+ *
+ * The QP s_lock should be held.
+ */
+void qib_get_credit(struct qib_qp *qp, u32 aeth)
+{
+ u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
+
+ /*
+ * If the credit is invalid, we can send
+ * as many packets as we like. Otherwise, we have to
+ * honor the credit field.
+ */
+ if (credit == QIB_AETH_CREDIT_INVAL) {
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
+ qp->s_flags |= QIB_S_UNLIMITED_CREDIT;
+ if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+ qib_schedule_send(qp);
+ }
+ }
+ } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
+ /* Compute new LSN (i.e., MSN + credit) */
+ credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
+ if (qib_cmp24(credit, qp->s_lsn) > 0) {
+ qp->s_lsn = credit;
+ if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+ qib_schedule_send(qp);
+ }
+ }
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter {
+ struct qib_ibdev *dev;
+ struct qib_qp *qp;
+ int n;
+};
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
+{
+ struct qib_qp_iter *iter;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ iter->dev = dev;
+ if (qib_qp_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+int qib_qp_iter_next(struct qib_qp_iter *iter)
+{
+ struct qib_ibdev *dev = iter->dev;
+ int n = iter->n;
+ int ret = 1;
+ struct qib_qp *pqp = iter->qp;
+ struct qib_qp *qp;
+
+ rcu_read_lock();
+ for (; n < dev->qp_table_size; n++) {
+ if (pqp)
+ qp = rcu_dereference(pqp->next);
+ else
+ qp = rcu_dereference(dev->qp_table[n]);
+ pqp = qp;
+ if (qp) {
+ if (iter->qp)
+ atomic_dec(&iter->qp->refcount);
+ atomic_inc(&qp->refcount);
+ rcu_read_unlock();
+ iter->qp = qp;
+ iter->n = n;
+ return 0;
+ }
+ }
+ rcu_read_unlock();
+ if (iter->qp)
+ atomic_dec(&iter->qp->refcount);
+ return ret;
+}
+
+static const char * const qp_type_str[] = {
+ "SMI", "GSI", "RC", "UC", "UD",
+};
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
+{
+ struct qib_swqe *wqe;
+ struct qib_qp *qp = iter->qp;
+
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ seq_printf(s,
+ "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
+ iter->n,
+ qp->ibqp.qp_num,
+ qp_type_str[qp->ibqp.qp_type],
+ qp->state,
+ wqe->wr.opcode,
+ qp->s_hdrwords,
+ qp->s_flags,
+ atomic_read(&qp->s_dma_busy),
+ !list_empty(&qp->iowait),
+ qp->timeout,
+ wqe->ssn,
+ qp->s_lsn,
+ qp->s_last_psn,
+ qp->s_psn, qp->s_next_psn,
+ qp->s_sending_psn, qp->s_sending_hpsn,
+ qp->s_last, qp->s_acked, qp->s_cur,
+ qp->s_tail, qp->s_head, qp->s_size,
+ qp->remote_qpn,
+ qp->remote_ah_attr.dlid);
+}
+
+#endif
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
new file mode 100644
index 00000000000..fa71b1e666c
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -0,0 +1,556 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+#include "qib_qsfp.h"
+
+/*
+ * QSFP support for ib_qib driver, using "Two Wire Serial Interface" driver
+ * in qib_twsi.c
+ */
+#define QSFP_MAX_RETRY 4
+
+static int qsfp_read(struct qib_pportdata *ppd, int addr, void *bp, int len)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u32 out, mask;
+ int ret, cnt, pass = 0;
+ int stuck = 0;
+ u8 *buff = bp;
+
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (ret)
+ goto no_unlock;
+
+ if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) {
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ /*
+ * We presume, if we are called at all, that this board has
+ * QSFP. This is on the same i2c chain as the legacy parts,
+ * but only responds if the module is selected via GPIO pins.
+ * Further, there are very long setup and hold requirements
+ * on MODSEL.
+ */
+ mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ if (ppd->hw_pidx) {
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+ out <<= QSFP_GPIO_PORT2_SHIFT;
+ }
+
+ dd->f_gpio_mod(dd, out, mask, mask);
+
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL, and there
+ * is no way to tell if it is ready, so we must wait.
+ */
+ msleep(2);
+
+ /* Make sure TWSI bus is in sane state. */
+ ret = qib_twsi_reset(dd);
+ if (ret) {
+ qib_dev_porterr(dd, ppd->port,
+ "QSFP interface Reset for read failed\n");
+ ret = -EIO;
+ stuck = 1;
+ goto deselect;
+ }
+
+ /* All QSFP modules are at A0 */
+
+ cnt = 0;
+ while (cnt < len) {
+ unsigned in_page;
+ int wlen = len - cnt;
+ in_page = addr % QSFP_PAGESIZE;
+ if ((in_page + wlen) > QSFP_PAGESIZE)
+ wlen = QSFP_PAGESIZE - in_page;
+ ret = qib_twsi_blk_rd(dd, QSFP_DEV, addr, buff + cnt, wlen);
+ /* Some QSFP's fail first try. Retry as experiment */
+ if (ret && cnt == 0 && ++pass < QSFP_MAX_RETRY)
+ continue;
+ if (ret) {
+ /* qib_twsi_blk_rd() 1 for error, else 0 */
+ ret = -EIO;
+ goto deselect;
+ }
+ addr += wlen;
+ cnt += wlen;
+ }
+ ret = cnt;
+
+deselect:
+ /*
+ * Module could take up to 10 uSec after transfer before
+ * ready to respond to MOD_SEL negation, and there is no way
+ * to tell if it is ready, so we must wait.
+ */
+ udelay(10);
+ /* set QSFP MODSEL, RST. LP all high */
+ dd->f_gpio_mod(dd, mask, mask, mask);
+
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL
+ * going away, and there is no way to tell if it is ready.
+ * so we must wait.
+ */
+ if (stuck)
+ qib_dev_err(dd, "QSFP interface bus stuck non-idle\n");
+
+ if (pass >= QSFP_MAX_RETRY && ret)
+ qib_dev_porterr(dd, ppd->port, "QSFP failed even retrying\n");
+ else if (pass)
+ qib_dev_porterr(dd, ppd->port, "QSFP retries: %d\n", pass);
+
+ msleep(2);
+
+bail:
+ mutex_unlock(&dd->eep_lock);
+
+no_unlock:
+ return ret;
+}
+
+/*
+ * qsfp_write
+ * We do not ordinarily write the QSFP, but this is needed to select
+ * the page on non-flat QSFPs, and possibly later unusual cases
+ */
+static int qib_qsfp_write(struct qib_pportdata *ppd, int addr, void *bp,
+ int len)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u32 out, mask;
+ int ret, cnt;
+ u8 *buff = bp;
+
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (ret)
+ goto no_unlock;
+
+ if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) {
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ /*
+ * We presume, if we are called at all, that this board has
+ * QSFP. This is on the same i2c chain as the legacy parts,
+ * but only responds if the module is selected via GPIO pins.
+ * Further, there are very long setup and hold requirements
+ * on MODSEL.
+ */
+ mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ if (ppd->hw_pidx) {
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+ out <<= QSFP_GPIO_PORT2_SHIFT;
+ }
+ dd->f_gpio_mod(dd, out, mask, mask);
+
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL,
+ * and there is no way to tell if it is ready, so we must wait.
+ */
+ msleep(2);
+
+ /* Make sure TWSI bus is in sane state. */
+ ret = qib_twsi_reset(dd);
+ if (ret) {
+ qib_dev_porterr(dd, ppd->port,
+ "QSFP interface Reset for write failed\n");
+ ret = -EIO;
+ goto deselect;
+ }
+
+ /* All QSFP modules are at A0 */
+
+ cnt = 0;
+ while (cnt < len) {
+ unsigned in_page;
+ int wlen = len - cnt;
+ in_page = addr % QSFP_PAGESIZE;
+ if ((in_page + wlen) > QSFP_PAGESIZE)
+ wlen = QSFP_PAGESIZE - in_page;
+ ret = qib_twsi_blk_wr(dd, QSFP_DEV, addr, buff + cnt, wlen);
+ if (ret) {
+ /* qib_twsi_blk_wr() 1 for error, else 0 */
+ ret = -EIO;
+ goto deselect;
+ }
+ addr += wlen;
+ cnt += wlen;
+ }
+ ret = cnt;
+
+deselect:
+ /*
+ * Module could take up to 10 uSec after transfer before
+ * ready to respond to MOD_SEL negation, and there is no way
+ * to tell if it is ready, so we must wait.
+ */
+ udelay(10);
+ /* set QSFP MODSEL, RST, LP high */
+ dd->f_gpio_mod(dd, mask, mask, mask);
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL
+ * going away, and there is no way to tell if it is ready.
+ * so we must wait.
+ */
+ msleep(2);
+
+bail:
+ mutex_unlock(&dd->eep_lock);
+
+no_unlock:
+ return ret;
+}
+
+/*
+ * For validation, we want to check the checksums, even of the
+ * fields we do not otherwise use. This function reads the bytes from
+ * <first> to <next-1> and returns the 8lsbs of the sum, or <0 for errors
+ */
+static int qsfp_cks(struct qib_pportdata *ppd, int first, int next)
+{
+ int ret;
+ u16 cks;
+ u8 bval;
+
+ cks = 0;
+ while (first < next) {
+ ret = qsfp_read(ppd, first, &bval, 1);
+ if (ret < 0)
+ goto bail;
+ cks += bval;
+ ++first;
+ }
+ ret = cks & 0xFF;
+bail:
+ return ret;
+
+}
+
+int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
+{
+ int ret;
+ int idx;
+ u16 cks;
+ u8 peek[4];
+
+ /* ensure sane contents on invalid reads, for cable swaps */
+ memset(cp, 0, sizeof(*cp));
+
+ if (!qib_qsfp_mod_present(ppd)) {
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ ret = qsfp_read(ppd, 0, peek, 3);
+ if (ret < 0)
+ goto bail;
+ if ((peek[0] & 0xFE) != 0x0C)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]);
+
+ if ((peek[2] & 2) == 0) {
+ /*
+ * If cable is paged, rather than "flat memory", we need to
+ * set the page to zero, Even if it already appears to be zero.
+ */
+ u8 poke = 0;
+ ret = qib_qsfp_write(ppd, 127, &poke, 1);
+ udelay(50);
+ if (ret != 1) {
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "Failed QSFP Page set\n");
+ goto bail;
+ }
+ }
+
+ ret = qsfp_read(ppd, QSFP_MOD_ID_OFFS, &cp->id, 1);
+ if (ret < 0)
+ goto bail;
+ if ((cp->id & 0xFE) != 0x0C)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP ID byte is 0x%02X, S/B 0x0C/D\n", cp->id);
+ cks = cp->id;
+
+ ret = qsfp_read(ppd, QSFP_MOD_PWR_OFFS, &cp->pwr, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->pwr;
+
+ ret = qsfp_cks(ppd, QSFP_MOD_PWR_OFFS + 1, QSFP_MOD_LEN_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks += ret;
+
+ ret = qsfp_read(ppd, QSFP_MOD_LEN_OFFS, &cp->len, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->len;
+
+ ret = qsfp_read(ppd, QSFP_MOD_TECH_OFFS, &cp->tech, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->tech;
+
+ ret = qsfp_read(ppd, QSFP_VEND_OFFS, &cp->vendor, QSFP_VEND_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_VEND_LEN; ++idx)
+ cks += cp->vendor[idx];
+
+ ret = qsfp_read(ppd, QSFP_IBXCV_OFFS, &cp->xt_xcv, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->xt_xcv;
+
+ ret = qsfp_read(ppd, QSFP_VOUI_OFFS, &cp->oui, QSFP_VOUI_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_VOUI_LEN; ++idx)
+ cks += cp->oui[idx];
+
+ ret = qsfp_read(ppd, QSFP_PN_OFFS, &cp->partnum, QSFP_PN_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_PN_LEN; ++idx)
+ cks += cp->partnum[idx];
+
+ ret = qsfp_read(ppd, QSFP_REV_OFFS, &cp->rev, QSFP_REV_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_REV_LEN; ++idx)
+ cks += cp->rev[idx];
+
+ ret = qsfp_read(ppd, QSFP_ATTEN_OFFS, &cp->atten, QSFP_ATTEN_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_ATTEN_LEN; ++idx)
+ cks += cp->atten[idx];
+
+ ret = qsfp_cks(ppd, QSFP_ATTEN_OFFS + QSFP_ATTEN_LEN, QSFP_CC_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks += ret;
+
+ cks &= 0xFF;
+ ret = qsfp_read(ppd, QSFP_CC_OFFS, &cp->cks1, 1);
+ if (ret < 0)
+ goto bail;
+ if (cks != cp->cks1)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP cks1 is %02X, computed %02X\n", cp->cks1,
+ cks);
+
+ /* Second checksum covers 192 to (serial, date, lot) */
+ ret = qsfp_cks(ppd, QSFP_CC_OFFS + 1, QSFP_SN_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks = ret;
+
+ ret = qsfp_read(ppd, QSFP_SN_OFFS, &cp->serial, QSFP_SN_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_SN_LEN; ++idx)
+ cks += cp->serial[idx];
+
+ ret = qsfp_read(ppd, QSFP_DATE_OFFS, &cp->date, QSFP_DATE_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_DATE_LEN; ++idx)
+ cks += cp->date[idx];
+
+ ret = qsfp_read(ppd, QSFP_LOT_OFFS, &cp->lot, QSFP_LOT_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_LOT_LEN; ++idx)
+ cks += cp->lot[idx];
+
+ ret = qsfp_cks(ppd, QSFP_LOT_OFFS + QSFP_LOT_LEN, QSFP_CC_EXT_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks += ret;
+
+ ret = qsfp_read(ppd, QSFP_CC_EXT_OFFS, &cp->cks2, 1);
+ if (ret < 0)
+ goto bail;
+ cks &= 0xFF;
+ if (cks != cp->cks2)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP cks2 is %02X, computed %02X\n", cp->cks2,
+ cks);
+ return 0;
+
+bail:
+ cp->id = 0;
+ return ret;
+}
+
+const char * const qib_qsfp_devtech[16] = {
+ "850nm VCSEL", "1310nm VCSEL", "1550nm VCSEL", "1310nm FP",
+ "1310nm DFB", "1550nm DFB", "1310nm EML", "1550nm EML",
+ "Cu Misc", "1490nm DFB", "Cu NoEq", "Cu Eq",
+ "Undef", "Cu Active BothEq", "Cu FarEq", "Cu NearEq"
+};
+
+#define QSFP_DUMP_CHUNK 16 /* Holds longest string */
+#define QSFP_DEFAULT_HDR_CNT 224
+
+static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
+
+int qib_qsfp_mod_present(struct qib_pportdata *ppd)
+{
+ u32 mask;
+ int ret;
+
+ mask = QSFP_GPIO_MOD_PRS_N <<
+ (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT);
+ ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
+
+ return !((ret & mask) >>
+ ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3));
+}
+
+/*
+ * Initialize structures that control access to QSFP. Called once per port
+ * on cards that support QSFP.
+ */
+void qib_qsfp_init(struct qib_qsfp_data *qd,
+ void (*fevent)(struct work_struct *))
+{
+ u32 mask, highs;
+
+ struct qib_devdata *dd = qd->ppd->dd;
+
+ /* Initialize work struct for later QSFP events */
+ INIT_WORK(&qd->work, fevent);
+
+ /*
+ * Later, we may want more validation. For now, just set up pins and
+ * blip reset. If module is present, call qib_refresh_qsfp_cache(),
+ * to do further init.
+ */
+ mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ highs = mask - QSFP_GPIO_MOD_RST_N;
+ if (qd->ppd->hw_pidx) {
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+ highs <<= QSFP_GPIO_PORT2_SHIFT;
+ }
+ dd->f_gpio_mod(dd, highs, mask, mask);
+ udelay(20); /* Generous RST dwell */
+
+ dd->f_gpio_mod(dd, mask, mask, mask);
+ return;
+}
+
+void qib_qsfp_deinit(struct qib_qsfp_data *qd)
+{
+ /*
+ * There is nothing to do here for now. our work is scheduled
+ * with queue_work(), and flush_workqueue() from remove_one
+ * will block until all work setup with queue_work()
+ * completes.
+ */
+}
+
+int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
+{
+ struct qib_qsfp_cache cd;
+ u8 bin_buff[QSFP_DUMP_CHUNK];
+ char lenstr[6];
+ int sofar, ret;
+ int bidx = 0;
+
+ sofar = 0;
+ ret = qib_refresh_qsfp_cache(ppd, &cd);
+ if (ret < 0)
+ goto bail;
+
+ lenstr[0] = ' ';
+ lenstr[1] = '\0';
+ if (QSFP_IS_CU(cd.tech))
+ sprintf(lenstr, "%dM ", cd.len);
+
+ sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", pwr_codes +
+ (QSFP_PWR(cd.pwr) * 4));
+
+ sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", lenstr,
+ qib_qsfp_devtech[cd.tech >> 4]);
+
+ sofar += scnprintf(buf + sofar, len - sofar, "Vendor:%.*s\n",
+ QSFP_VEND_LEN, cd.vendor);
+
+ sofar += scnprintf(buf + sofar, len - sofar, "OUI:%06X\n",
+ QSFP_OUI(cd.oui));
+
+ sofar += scnprintf(buf + sofar, len - sofar, "Part#:%.*s\n",
+ QSFP_PN_LEN, cd.partnum);
+ sofar += scnprintf(buf + sofar, len - sofar, "Rev:%.*s\n",
+ QSFP_REV_LEN, cd.rev);
+ if (QSFP_IS_CU(cd.tech))
+ sofar += scnprintf(buf + sofar, len - sofar, "Atten:%d, %d\n",
+ QSFP_ATTEN_SDR(cd.atten),
+ QSFP_ATTEN_DDR(cd.atten));
+ sofar += scnprintf(buf + sofar, len - sofar, "Serial:%.*s\n",
+ QSFP_SN_LEN, cd.serial);
+ sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
+ QSFP_DATE_LEN, cd.date);
+ sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
+ QSFP_LOT_LEN, cd.date);
+
+ while (bidx < QSFP_DEFAULT_HDR_CNT) {
+ int iidx;
+ ret = qsfp_read(ppd, bidx, bin_buff, QSFP_DUMP_CHUNK);
+ if (ret < 0)
+ goto bail;
+ for (iidx = 0; iidx < ret; ++iidx) {
+ sofar += scnprintf(buf + sofar, len-sofar, " %02X",
+ bin_buff[iidx]);
+ }
+ sofar += scnprintf(buf + sofar, len - sofar, "\n");
+ bidx += QSFP_DUMP_CHUNK;
+ }
+ ret = sofar;
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
new file mode 100644
index 00000000000..91908f533a2
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* QSFP support common definitions, for ib_qib driver */
+
+#define QSFP_DEV 0xA0
+#define QSFP_PWR_LAG_MSEC 2000
+#define QSFP_MODPRS_LAG_MSEC 20
+
+/*
+ * Below are masks for various QSFP signals, for Port 1.
+ * Port2 equivalents are shifted by QSFP_GPIO_PORT2_SHIFT.
+ * _N means asserted low
+ */
+#define QSFP_GPIO_MOD_SEL_N (4)
+#define QSFP_GPIO_MOD_PRS_N (8)
+#define QSFP_GPIO_INT_N (0x10)
+#define QSFP_GPIO_MOD_RST_N (0x20)
+#define QSFP_GPIO_LP_MODE (0x40)
+#define QSFP_GPIO_PORT2_SHIFT 5
+
+#define QSFP_PAGESIZE 128
+/* Defined fields that QLogic requires of qualified cables */
+/* Byte 0 is Identifier, not checked */
+/* Byte 1 is reserved "status MSB" */
+/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
+/*
+ * Rest of first 128 not used, although 127 is reserved for page select
+ * if module is not "Flat memory".
+ */
+/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
+#define QSFP_MOD_ID_OFFS 128
+/*
+ * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class
+ * 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
+ */
+#define QSFP_MOD_PWR_OFFS 129
+/* Byte 130 is Connector type. Not QLogic req'd */
+/* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
+/* Byte 139 is encoding. code 0x01 is 8b10b. Not QLogic req'd */
+/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not QLogic req'd */
+/* Byte 141 is Extended Rate Select. Not QLogic req'd */
+/* Bytes 142..145 are lengths for various fiber types. Not QLogic req'd */
+/* Byte 146 is length for Copper. Units of 1 meter */
+#define QSFP_MOD_LEN_OFFS 146
+/*
+ * Byte 147 is Device technology. D0..3 not Qlogc req'd
+ * D4..7 select from 15 choices, translated by table:
+ */
+#define QSFP_MOD_TECH_OFFS 147
+extern const char *const qib_qsfp_devtech[16];
+/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
+#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
+/* Active Equalization includes fiber, copper full EQ, and copper far Eq */
+#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1)
+/* Attenuation should be valid for copper other than full/near Eq */
+#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
+/* Length is only valid if technology is "copper" */
+#define QSFP_IS_CU(tech) ((0xED00 >> ((tech) >> 4)) & 1)
+#define QSFP_TECH_1490 9
+
+#define QSFP_OUI(oui) (((unsigned)oui[0] << 16) | ((unsigned)oui[1] << 8) | \
+ oui[2])
+#define QSFP_OUI_AMPHENOL 0x415048
+#define QSFP_OUI_FINISAR 0x009065
+#define QSFP_OUI_GORE 0x002177
+
+/* Bytes 148..163 are Vendor Name, Left-justified Blank-filled */
+#define QSFP_VEND_OFFS 148
+#define QSFP_VEND_LEN 16
+/* Byte 164 is IB Extended tranceiver codes Bits D0..3 are SDR,DDR,QDR,EDR */
+#define QSFP_IBXCV_OFFS 164
+/* Bytes 165..167 are Vendor OUI number */
+#define QSFP_VOUI_OFFS 165
+#define QSFP_VOUI_LEN 3
+/* Bytes 168..183 are Vendor Part Number, string */
+#define QSFP_PN_OFFS 168
+#define QSFP_PN_LEN 16
+/* Bytes 184,185 are Vendor Rev. Left Justified, Blank-filled */
+#define QSFP_REV_OFFS 184
+#define QSFP_REV_LEN 2
+/*
+ * Bytes 186,187 are Wavelength, if Optical. Not Qlogic req'd
+ * If copper, they are attenuation in dB:
+ * Byte 186 is at 2.5Gb/sec (SDR), Byte 187 at 5.0Gb/sec (DDR)
+ */
+#define QSFP_ATTEN_OFFS 186
+#define QSFP_ATTEN_LEN 2
+/* Bytes 188,189 are Wavelength tolerance, not QLogic req'd */
+/* Byte 190 is Max Case Temp. Not QLogic req'd */
+/* Byte 191 is LSB of sum of bytes 128..190. Not QLogic req'd */
+#define QSFP_CC_OFFS 191
+/* Bytes 192..195 are Options implemented in qsfp. Not Qlogic req'd */
+/* Bytes 196..211 are Serial Number, String */
+#define QSFP_SN_OFFS 196
+#define QSFP_SN_LEN 16
+/* Bytes 212..219 are date-code YYMMDD (MM==1 for Jan) */
+#define QSFP_DATE_OFFS 212
+#define QSFP_DATE_LEN 6
+/* Bytes 218,219 are optional lot-code, string */
+#define QSFP_LOT_OFFS 218
+#define QSFP_LOT_LEN 2
+/* Bytes 220, 221 indicate monitoring options, Not QLogic req'd */
+/* Byte 223 is LSB of sum of bytes 192..222 */
+#define QSFP_CC_EXT_OFFS 223
+
+/*
+ * struct qib_qsfp_data encapsulates state of QSFP device for one port.
+ * it will be part of port-chip-specific data if a board supports QSFP.
+ *
+ * Since multiple board-types use QSFP, and their pport_data structs
+ * differ (in the chip-specific section), we need a pointer to its head.
+ *
+ * Avoiding premature optimization, we will have one work_struct per port,
+ * and let the (increasingly inaccurately named) eep_lock arbitrate
+ * access to common resources.
+ *
+ */
+
+/*
+ * Hold the parts of the onboard EEPROM that we care about, so we aren't
+ * coonstantly bit-boffing
+ */
+struct qib_qsfp_cache {
+ u8 id; /* must be 0x0C or 0x0D; 0 indicates invalid EEPROM read */
+ u8 pwr; /* in D6,7 */
+ u8 len; /* in meters, Cu only */
+ u8 tech;
+ char vendor[QSFP_VEND_LEN];
+ u8 xt_xcv; /* Ext. tranceiver codes, 4 lsbs are IB speed supported */
+ u8 oui[QSFP_VOUI_LEN];
+ u8 partnum[QSFP_PN_LEN];
+ u8 rev[QSFP_REV_LEN];
+ u8 atten[QSFP_ATTEN_LEN];
+ u8 cks1; /* Checksum of bytes 128..190 */
+ u8 serial[QSFP_SN_LEN];
+ u8 date[QSFP_DATE_LEN];
+ u8 lot[QSFP_LOT_LEN];
+ u8 cks2; /* Checsum of bytes 192..222 */
+};
+
+#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
+#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
+
+struct qib_qsfp_data {
+ /* Helps to find our way */
+ struct qib_pportdata *ppd;
+ struct work_struct work;
+ struct qib_qsfp_cache cache;
+ unsigned long t_insert;
+ u8 modpresent;
+};
+
+extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
+ struct qib_qsfp_cache *cp);
+extern int qib_qsfp_mod_present(struct qib_pportdata *ppd);
+extern void qib_qsfp_init(struct qib_qsfp_data *qd,
+ void (*fevent)(struct work_struct *));
+extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
new file mode 100644
index 00000000000..2f2501890c4
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -0,0 +1,2290 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/io.h>
+
+#include "qib.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_RC_##x
+
+static void rc_timeout(unsigned long arg);
+
+static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
+ u32 psn, u32 pmtu)
+{
+ u32 len;
+
+ len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
+ ss->sge = wqe->sg_list[0];
+ ss->sg_list = wqe->sg_list + 1;
+ ss->num_sge = wqe->wr.num_sge;
+ ss->total_len = wqe->length;
+ qib_skip_sge(ss, len, 0);
+ return wqe->length - len;
+}
+
+static void start_timer(struct qib_qp *qp)
+{
+ qp->s_flags |= QIB_S_TIMER;
+ qp->s_timer.function = rc_timeout;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ qp->s_timer.expires = jiffies + qp->timeout_jiffies;
+ add_timer(&qp->s_timer);
+}
+
+/**
+ * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
+ * @dev: the device for this QP
+ * @qp: a pointer to the QP
+ * @ohdr: a pointer to the IB header being constructed
+ * @pmtu: the path MTU
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
+ * Note the QP s_lock must be held.
+ */
+static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
+ struct qib_other_headers *ohdr, u32 pmtu)
+{
+ struct qib_ack_entry *e;
+ u32 hwords;
+ u32 len;
+ u32 bth0;
+ u32 bth2;
+
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto bail;
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+
+ switch (qp->s_ack_state) {
+ case OP(RDMA_READ_RESPONSE_LAST):
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->rdma_sge.mr) {
+ qib_put_mr(e->rdma_sge.mr);
+ e->rdma_sge.mr = NULL;
+ }
+ /* FALLTHROUGH */
+ case OP(ATOMIC_ACKNOWLEDGE):
+ /*
+ * We can increment the tail pointer now that the last
+ * response has been sent instead of only being
+ * constructed.
+ */
+ if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ /* FALLTHROUGH */
+ case OP(SEND_ONLY):
+ case OP(ACKNOWLEDGE):
+ /* Check for no next entry in the queue. */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+ if (qp->s_flags & QIB_S_ACK_PENDING)
+ goto normal;
+ goto bail;
+ }
+
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST)) {
+ /*
+ * If a RDMA read response is being resent and
+ * we haven't seen the duplicate request yet,
+ * then stop sending the remaining responses the
+ * responder has seen until the requester resends it.
+ */
+ len = e->rdma_sge.sge_length;
+ if (len && !e->rdma_sge.mr) {
+ qp->s_tail_ack_queue = qp->r_head_ack_queue;
+ goto bail;
+ }
+ /* Copy SGE state in case we need to resend */
+ qp->s_rdma_mr = e->rdma_sge.mr;
+ if (qp->s_rdma_mr)
+ qib_get_mr(qp->s_rdma_mr);
+ qp->s_ack_rdma_sge.sge = e->rdma_sge;
+ qp->s_ack_rdma_sge.num_sge = 1;
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ if (len > pmtu) {
+ len = pmtu;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+ } else {
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+ e->sent = 1;
+ }
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_rdma_psn = e->psn;
+ bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
+ } else {
+ /* COMPARE_SWAP or FETCH_ADD */
+ qp->s_cur_sge = NULL;
+ len = 0;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ ohdr->u.at.aeth = qib_compute_aeth(qp);
+ ohdr->u.at.atomic_ack_eth[0] =
+ cpu_to_be32(e->atomic_data >> 32);
+ ohdr->u.at.atomic_ack_eth[1] =
+ cpu_to_be32(e->atomic_data);
+ hwords += sizeof(ohdr->u.at) / sizeof(u32);
+ bth2 = e->psn & QIB_PSN_MASK;
+ e->sent = 1;
+ }
+ bth0 = qp->s_ack_state << 24;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
+ if (qp->s_rdma_mr)
+ qib_get_mr(qp->s_rdma_mr);
+ len = qp->s_ack_rdma_sge.sge.sge_length;
+ if (len > pmtu)
+ len = pmtu;
+ else {
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ e->sent = 1;
+ }
+ bth0 = qp->s_ack_state << 24;
+ bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
+ break;
+
+ default:
+normal:
+ /*
+ * Send a regular ACK.
+ * Set the s_ack_state so we wait until after sending
+ * the ACK before setting s_ack_state to ACKNOWLEDGE
+ * (see above).
+ */
+ qp->s_ack_state = OP(SEND_ONLY);
+ qp->s_flags &= ~QIB_S_ACK_PENDING;
+ qp->s_cur_sge = NULL;
+ if (qp->s_nak_state)
+ ohdr->u.aeth =
+ cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ (qp->s_nak_state <<
+ QIB_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ hwords++;
+ len = 0;
+ bth0 = OP(ACKNOWLEDGE) << 24;
+ bth2 = qp->s_ack_psn & QIB_PSN_MASK;
+ }
+ qp->s_rdma_ack_cnt++;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_size = len;
+ qib_make_ruc_header(qp, ohdr, bth0, bth2);
+ return 1;
+
+bail:
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
+ return 0;
+}
+
+/**
+ * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_rc_req(struct qib_qp *qp)
+{
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+ struct qib_other_headers *ohdr;
+ struct qib_sge_state *ss;
+ struct qib_swqe *wqe;
+ u32 hwords;
+ u32 len;
+ u32 bth0;
+ u32 bth2;
+ u32 pmtu = qp->pmtu;
+ char newreq;
+ unsigned long flags;
+ int ret = 0;
+ int delta;
+
+ ohdr = &qp->s_hdr->u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr->u.l.oth;
+
+ /*
+ * The lock is needed to synchronize between the sending tasklet,
+ * the receive interrupt handler, and timeout resends.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Sending responses has higher priority over sending requests. */
+ if ((qp->s_flags & QIB_S_RESP_PENDING) &&
+ qib_make_rc_ack(dev, qp, ohdr, pmtu))
+ goto done;
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
+ if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
+ /* will get called again */
+ goto done;
+ }
+
+ if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
+ goto bail;
+
+ if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
+ if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
+ qp->s_flags |= QIB_S_WAIT_PSN;
+ goto bail;
+ }
+ qp->s_sending_psn = qp->s_psn;
+ qp->s_sending_hpsn = qp->s_psn - 1;
+ }
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ bth0 = 0;
+
+ /* Send a request. */
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ switch (qp->s_state) {
+ default:
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
+ goto bail;
+ /*
+ * Resend an old request or start a new one.
+ *
+ * We keep track of the current SWQE so that
+ * we don't reset the "furthest progress" state
+ * if we need to back up.
+ */
+ newreq = 0;
+ if (qp->s_cur == qp->s_tail) {
+ /* Check if send work queue is empty. */
+ if (qp->s_tail == qp->s_head)
+ goto bail;
+ /*
+ * If a fence is requested, wait for previous
+ * RDMA read and atomic operations to finish.
+ */
+ if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+ qp->s_num_rd_atomic) {
+ qp->s_flags |= QIB_S_WAIT_FENCE;
+ goto bail;
+ }
+ wqe->psn = qp->s_next_psn;
+ newreq = 1;
+ }
+ /*
+ * Note that we have to be careful not to modify the
+ * original work request since we may need to resend
+ * it.
+ */
+ len = wqe->length;
+ ss = &qp->s_sge;
+ bth2 = qp->s_psn & QIB_PSN_MASK;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ /* If no credit, return. */
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+ qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
+ wqe->lpsn = wqe->psn;
+ if (len > pmtu) {
+ wqe->lpsn += (len - 1) / pmtu;
+ qp->s_state = OP(SEND_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_ONLY);
+ else {
+ qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ /* FALLTHROUGH */
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ /* If no credit, return. */
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+ qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ hwords += sizeof(struct ib_reth) / sizeof(u32);
+ wqe->lpsn = wqe->psn;
+ if (len > pmtu) {
+ wqe->lpsn += (len - 1) / pmtu;
+ qp->s_state = OP(RDMA_WRITE_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_ONLY);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after RETH */
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_READ:
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
+ if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= QIB_S_WAIT_RDMAR;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ /*
+ * Adjust s_next_psn to count the
+ * expected number of responses.
+ */
+ if (len > pmtu)
+ qp->s_next_psn += (len - 1) / pmtu;
+ wqe->lpsn = qp->s_next_psn++;
+ }
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+ ss = NULL;
+ len = 0;
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
+ if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= QIB_S_WAIT_RDMAR;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ wqe->lpsn = wqe->psn;
+ }
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ qp->s_state = OP(COMPARE_SWAP);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.swap);
+ ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ } else {
+ qp->s_state = OP(FETCH_ADD);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ ohdr->u.atomic_eth.compare_data = 0;
+ }
+ ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr >> 32);
+ ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr);
+ ohdr->u.atomic_eth.rkey = cpu_to_be32(
+ wqe->wr.wr.atomic.rkey);
+ hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
+ ss = NULL;
+ len = 0;
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ default:
+ goto bail;
+ }
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+ qp->s_len = wqe->length;
+ if (newreq) {
+ qp->s_tail++;
+ if (qp->s_tail >= qp->s_size)
+ qp->s_tail = 0;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ qp->s_psn = wqe->lpsn + 1;
+ else {
+ qp->s_psn++;
+ if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ }
+ break;
+
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ /*
+ * qp->s_state is normally set to the opcode of the
+ * last packet constructed for new requests and therefore
+ * is never set to RDMA read response.
+ * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
+ * thread to indicate a SEND needs to be restarted from an
+ * earlier PSN without interferring with the sending thread.
+ * See qib_restart_rc().
+ */
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
+ /* FALLTHROUGH */
+ case OP(SEND_FIRST):
+ qp->s_state = OP(SEND_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ bth2 = qp->s_psn++ & QIB_PSN_MASK;
+ if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ ss = &qp->s_sge;
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_LAST);
+ else {
+ qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ bth2 |= IB_BTH_REQ_ACK;
+ qp->s_cur++;
+ if (qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /*
+ * qp->s_state is normally set to the opcode of the
+ * last packet constructed for new requests and therefore
+ * is never set to RDMA read response.
+ * RDMA_READ_RESPONSE_LAST is used by the ACK processing
+ * thread to indicate a RDMA write needs to be restarted from
+ * an earlier PSN without interferring with the sending thread.
+ * See qib_restart_rc().
+ */
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_FIRST):
+ qp->s_state = OP(RDMA_WRITE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ bth2 = qp->s_psn++ & QIB_PSN_MASK;
+ if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ ss = &qp->s_sge;
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_LAST);
+ else {
+ qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ bth2 |= IB_BTH_REQ_ACK;
+ qp->s_cur++;
+ if (qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ /*
+ * qp->s_state is normally set to the opcode of the
+ * last packet constructed for new requests and therefore
+ * is never set to RDMA read response.
+ * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
+ * thread to indicate a RDMA read needs to be restarted from
+ * an earlier PSN without interferring with the sending thread.
+ * See qib_restart_rc().
+ */
+ len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+ bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
+ qp->s_psn = wqe->lpsn + 1;
+ ss = NULL;
+ len = 0;
+ qp->s_cur++;
+ if (qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+ }
+ qp->s_sending_hpsn = bth2;
+ delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
+ if (delta && delta % QIB_PSN_CREDIT == 0)
+ bth2 |= IB_BTH_REQ_ACK;
+ if (qp->s_flags & QIB_S_SEND_ONE) {
+ qp->s_flags &= ~QIB_S_SEND_ONE;
+ qp->s_flags |= QIB_S_WAIT_ACK;
+ bth2 |= IB_BTH_REQ_ACK;
+ }
+ qp->s_len -= len;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_sge = ss;
+ qp->s_cur_size = len;
+ qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * qib_send_rc_ack - Construct an ACK packet and send it
+ * @qp: a pointer to the QP
+ *
+ * This is called from qib_rc_rcv() and qib_kreceive().
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
+ */
+void qib_send_rc_ack(struct qib_qp *qp)
+{
+ struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u64 pbc;
+ u16 lrh0;
+ u32 bth0;
+ u32 hwords;
+ u32 pbufn;
+ u32 __iomem *piobuf;
+ struct qib_ib_header hdr;
+ struct qib_other_headers *ohdr;
+ u32 control;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto unlock;
+
+ /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+ if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
+ goto queue_ack;
+
+ /* Construct the header with s_lock held so APM doesn't change it. */
+ ohdr = &hdr.u.oth;
+ lrh0 = QIB_LRH_BTH;
+ /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
+ hwords = 6;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ hwords += qib_make_grh(ibp, &hdr.u.l.grh,
+ &qp->remote_ah_attr.grh, hwords, 0);
+ ohdr = &hdr.u.l.oth;
+ lrh0 = QIB_LRH_GRH;
+ }
+ /* read pkey_index w/o lock (its atomic) */
+ bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth0 |= IB_BTH_MIG_REQ;
+ if (qp->r_nak_state)
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ (qp->r_nak_state <<
+ QIB_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
+ qp->remote_ah_attr.sl << 4;
+ hdr.lrh[0] = cpu_to_be16(lrh0);
+ hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Don't try to send ACKs if the link isn't ACTIVE */
+ if (!(ppd->lflags & QIBL_LINKACTIVE))
+ goto done;
+
+ control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
+ qp->s_srate, lrh0 >> 12);
+ /* length is + 1 for the control dword */
+ pbc = ((u64) control << 32) | (hwords + 1);
+
+ piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
+ if (!piobuf) {
+ /*
+ * We are out of PIO buffers at the moment.
+ * Pass responsibility for sending the ACK to the
+ * send tasklet so that when a PIO buffer becomes
+ * available, the ACK is sent ahead of other outgoing
+ * packets.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ goto queue_ack;
+ }
+
+ /*
+ * Write the pbc.
+ * We have to flush after the PBC for correctness
+ * on some cpus or WC buffer can be written out of order.
+ */
+ writeq(pbc, piobuf);
+
+ if (dd->flags & QIB_PIO_FLUSH_WC) {
+ u32 *hdrp = (u32 *) &hdr;
+
+ qib_flush_wc();
+ qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
+ qib_flush_wc();
+ __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
+ } else
+ qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
+
+ if (dd->flags & QIB_USE_SPCL_TRIG) {
+ u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
+
+ qib_flush_wc();
+ __raw_writel(0xaebecede, piobuf + spcl_off);
+ }
+
+ qib_flush_wc();
+ qib_sendbuf_done(dd, pbufn);
+
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
+ goto done;
+
+queue_ack:
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ ibp->n_rc_qacks++;
+ qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+
+ /* Schedule the send tasklet. */
+ qib_schedule_send(qp);
+ }
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return;
+}
+
+/**
+ * reset_psn - reset the QP state to send starting from PSN
+ * @qp: the QP
+ * @psn: the packet sequence number to restart at
+ *
+ * This is called from qib_rc_rcv() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ */
+static void reset_psn(struct qib_qp *qp, u32 psn)
+{
+ u32 n = qp->s_acked;
+ struct qib_swqe *wqe = get_swqe_ptr(qp, n);
+ u32 opcode;
+
+ qp->s_cur = n;
+
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (qib_cmp24(psn, wqe->psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
+ }
+
+ /* Find the work request opcode corresponding to the given PSN. */
+ opcode = wqe->wr.opcode;
+ for (;;) {
+ int diff;
+
+ if (++n == qp->s_size)
+ n = 0;
+ if (n == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, n);
+ diff = qib_cmp24(psn, wqe->psn);
+ if (diff < 0)
+ break;
+ qp->s_cur = n;
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (diff == 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
+ }
+ opcode = wqe->wr.opcode;
+ }
+
+ /*
+ * Set the state to restart in the middle of a request.
+ * Don't change the s_sge, s_cur_sge, or s_cur_size.
+ * See qib_make_rc_req().
+ */
+ switch (opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
+ break;
+
+ case IB_WR_RDMA_READ:
+ qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+ break;
+
+ default:
+ /*
+ * This case shouldn't happen since its only
+ * one PSN per req.
+ */
+ qp->s_state = OP(SEND_LAST);
+ }
+done:
+ qp->s_psn = psn;
+ /*
+ * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
+ * asynchronously before the send tasklet can get scheduled.
+ * Doing it in qib_make_rc_req() is too late.
+ */
+ if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
+ (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
+ qp->s_flags |= QIB_S_WAIT_PSN;
+}
+
+/*
+ * Back up requester to resend the last un-ACKed request.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
+ */
+static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
+{
+ struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+ struct qib_ibport *ibp;
+
+ if (qp->s_retry == 0) {
+ if (qp->s_mig_state == IB_MIG_ARMED) {
+ qib_migrate_qp(qp);
+ qp->s_retry = qp->s_retry_cnt;
+ } else if (qp->s_last == qp->s_acked) {
+ qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ return;
+ } else /* XXX need to handle delayed completion */
+ return;
+ } else
+ qp->s_retry--;
+
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ ibp->n_rc_resends++;
+ else
+ ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+
+ qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
+ QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
+ QIB_S_WAIT_ACK);
+ if (wait)
+ qp->s_flags |= QIB_S_SEND_ONE;
+ reset_psn(qp, psn);
+}
+
+/*
+ * This is called from s_timer for missing responses.
+ */
+static void rc_timeout(unsigned long arg)
+{
+ struct qib_qp *qp = (struct qib_qp *)arg;
+ struct qib_ibport *ibp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->r_lock, flags);
+ spin_lock(&qp->s_lock);
+ if (qp->s_flags & QIB_S_TIMER) {
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ibp->n_rc_timeouts++;
+ qp->s_flags &= ~QIB_S_TIMER;
+ del_timer(&qp->s_timer);
+ qib_restart_rc(qp, qp->s_last_psn + 1, 1);
+ qib_schedule_send(qp);
+ }
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+}
+
+/*
+ * This is called from s_timer for RNR timeouts.
+ */
+void qib_rc_rnr_retry(unsigned long arg)
+{
+ struct qib_qp *qp = (struct qib_qp *)arg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_RNR) {
+ qp->s_flags &= ~QIB_S_WAIT_RNR;
+ del_timer(&qp->s_timer);
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+/*
+ * Set qp->s_sending_psn to the next PSN after the given one.
+ * This would be psn+1 except when RDMA reads are present.
+ */
+static void reset_sending_psn(struct qib_qp *qp, u32 psn)
+{
+ struct qib_swqe *wqe;
+ u32 n = qp->s_last;
+
+ /* Find the work request corresponding to the given PSN. */
+ for (;;) {
+ wqe = get_swqe_ptr(qp, n);
+ if (qib_cmp24(psn, wqe->lpsn) <= 0) {
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ qp->s_sending_psn = wqe->lpsn + 1;
+ else
+ qp->s_sending_psn = psn + 1;
+ break;
+ }
+ if (++n == qp->s_size)
+ n = 0;
+ if (n == qp->s_tail)
+ break;
+ }
+}
+
+/*
+ * This should be called with the QP s_lock held and interrupts disabled.
+ */
+void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
+{
+ struct qib_other_headers *ohdr;
+ struct qib_swqe *wqe;
+ struct ib_wc wc;
+ unsigned i;
+ u32 opcode;
+ u32 psn;
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ return;
+
+ /* Find out where the BTH is */
+ if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+ WARN_ON(!qp->s_rdma_ack_cnt);
+ qp->s_rdma_ack_cnt--;
+ return;
+ }
+
+ psn = be32_to_cpu(ohdr->bth[2]);
+ reset_sending_psn(qp, psn);
+
+ /*
+ * Start timer after a packet requesting an ACK has been sent and
+ * there are still requests that haven't been acked.
+ */
+ if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
+ !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
+ (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ start_timer(qp);
+
+ while (qp->s_last != qp->s_acked) {
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
+ qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
+ break;
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct qib_sge *sge = &wqe->sg_list[i];
+
+ qib_put_mr(sge->mr);
+ }
+ /* Post a send completion queue entry if requested. */
+ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ }
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ }
+ /*
+ * If we were waiting for sends to complete before resending,
+ * and they are now complete, restart sending.
+ */
+ if (qp->s_flags & QIB_S_WAIT_PSN &&
+ qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+ qp->s_flags &= ~QIB_S_WAIT_PSN;
+ qp->s_sending_psn = qp->s_psn;
+ qp->s_sending_hpsn = qp->s_psn - 1;
+ qib_schedule_send(qp);
+ }
+}
+
+static inline void update_last_psn(struct qib_qp *qp, u32 psn)
+{
+ qp->s_last_psn = psn;
+}
+
+/*
+ * Generate a SWQE completion.
+ * This is similar to qib_send_complete but has to check to be sure
+ * that the SGEs are not being referenced if the SWQE is being resent.
+ */
+static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
+ struct qib_swqe *wqe,
+ struct qib_ibport *ibp)
+{
+ struct ib_wc wc;
+ unsigned i;
+
+ /*
+ * Don't decrement refcount and don't generate a
+ * completion if the SWQE is being resent until the send
+ * is finished.
+ */
+ if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
+ qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct qib_sge *sge = &wqe->sg_list[i];
+
+ qib_put_mr(sge->mr);
+ }
+ /* Post a send completion queue entry if requested. */
+ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ }
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ } else
+ ibp->n_rc_delayed_comp++;
+
+ qp->s_retry = qp->s_retry_cnt;
+ update_last_psn(qp, wqe->lpsn);
+
+ /*
+ * If we are completing a request which is in the process of
+ * being resent, we can stop resending it since we know the
+ * responder has already seen it.
+ */
+ if (qp->s_acked == qp->s_cur) {
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ qp->s_acked = qp->s_cur;
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ if (qp->s_acked != qp->s_tail) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = wqe->psn;
+ }
+ } else {
+ if (++qp->s_acked >= qp->s_size)
+ qp->s_acked = 0;
+ if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
+ qp->s_draining = 0;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ }
+ return wqe;
+}
+
+/**
+ * do_rc_ack - process an incoming RC ACK
+ * @qp: the QP the ACK came in on
+ * @psn: the packet sequence number of the ACK
+ * @opcode: the opcode of the request that resulted in the ACK
+ *
+ * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ * Returns 1 if OK, 0 if current operation should be aborted (NAK).
+ */
+static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
+ u64 val, struct qib_ctxtdata *rcd)
+{
+ struct qib_ibport *ibp;
+ enum ib_wc_status status;
+ struct qib_swqe *wqe;
+ int ret = 0;
+ u32 ack_psn;
+ int diff;
+
+ /* Remove QP from retry timer */
+ if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
+ qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ }
+
+ /*
+ * Note that NAKs implicitly ACK outstanding SEND and RDMA write
+ * requests and implicitly NAK RDMA read and atomic requests issued
+ * before the NAK'ed request. The MSN won't include the NAK'ed
+ * request but will include an ACK'ed request(s).
+ */
+ ack_psn = psn;
+ if (aeth >> 29)
+ ack_psn--;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+
+ /*
+ * The MSN might be for a later WQE than the PSN indicates so
+ * only complete WQEs that the PSN finishes.
+ */
+ while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+ /*
+ * RDMA_READ_RESPONSE_ONLY is a special case since
+ * we want to generate completion events for everything
+ * before the RDMA read, copy the data, then generate
+ * the completion for the read.
+ */
+ if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+ opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+ diff == 0) {
+ ret = 1;
+ goto bail;
+ }
+ /*
+ * If this request is a RDMA read or atomic, and the ACK is
+ * for a later operation, this ACK NAKs the RDMA read or
+ * atomic. In other words, only a RDMA_READ_LAST or ONLY
+ * can ACK a RDMA read and likewise for atomic ops. Note
+ * that the NAK case can only happen if relaxed ordering is
+ * used and requests are sent after an RDMA read or atomic
+ * is sent but before the response is received.
+ */
+ if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
+ (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
+ ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
+ (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
+ /* Retry this request. */
+ if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
+ qp->r_flags |= QIB_R_RDMAR_SEQ;
+ qib_restart_rc(qp, qp->s_last_psn + 1, 0);
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_SEND;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait,
+ &rcd->qp_wait_list);
+ }
+ }
+ /*
+ * No need to process the ACK/NAK since we are
+ * restarting an earlier request.
+ */
+ goto bail;
+ }
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ u64 *vaddr = wqe->sg_list[0].vaddr;
+ *vaddr = val;
+ }
+ if (qp->s_num_rd_atomic &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+ qp->s_num_rd_atomic--;
+ /* Restart sending task if fence is complete */
+ if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
+ !qp->s_num_rd_atomic) {
+ qp->s_flags &= ~(QIB_S_WAIT_FENCE |
+ QIB_S_WAIT_ACK);
+ qib_schedule_send(qp);
+ } else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
+ qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
+ QIB_S_WAIT_ACK);
+ qib_schedule_send(qp);
+ }
+ }
+ wqe = do_rc_completion(qp, wqe, ibp);
+ if (qp->s_acked == qp->s_tail)
+ break;
+ }
+
+ switch (aeth >> 29) {
+ case 0: /* ACK */
+ ibp->n_rc_acks++;
+ if (qp->s_acked != qp->s_tail) {
+ /*
+ * We are expecting more ACKs so
+ * reset the retransmit timer.
+ */
+ start_timer(qp);
+ /*
+ * We can stop resending the earlier packets and
+ * continue with the next packet the receiver wants.
+ */
+ if (qib_cmp24(qp->s_psn, psn) <= 0)
+ reset_psn(qp, psn + 1);
+ } else if (qib_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
+ }
+ if (qp->s_flags & QIB_S_WAIT_ACK) {
+ qp->s_flags &= ~QIB_S_WAIT_ACK;
+ qib_schedule_send(qp);
+ }
+ qib_get_credit(qp, aeth);
+ qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+ qp->s_retry = qp->s_retry_cnt;
+ update_last_psn(qp, psn);
+ ret = 1;
+ goto bail;
+
+ case 1: /* RNR NAK */
+ ibp->n_rnr_naks++;
+ if (qp->s_acked == qp->s_tail)
+ goto bail;
+ if (qp->s_flags & QIB_S_WAIT_RNR)
+ goto bail;
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7)
+ qp->s_rnr_retry--;
+
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
+
+ ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+
+ reset_psn(qp, psn);
+
+ qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
+ qp->s_flags |= QIB_S_WAIT_RNR;
+ qp->s_timer.function = qib_rc_rnr_retry;
+ qp->s_timer.expires = jiffies + usecs_to_jiffies(
+ ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
+ QIB_AETH_CREDIT_MASK]);
+ add_timer(&qp->s_timer);
+ goto bail;
+
+ case 3: /* NAK */
+ if (qp->s_acked == qp->s_tail)
+ goto bail;
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
+ switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
+ QIB_AETH_CREDIT_MASK) {
+ case 0: /* PSN sequence error */
+ ibp->n_seq_naks++;
+ /*
+ * Back up to the responder's expected PSN.
+ * Note that we might get a NAK in the middle of an
+ * RDMA READ response which terminates the RDMA
+ * READ.
+ */
+ qib_restart_rc(qp, psn, 0);
+ qib_schedule_send(qp);
+ break;
+
+ case 1: /* Invalid Request */
+ status = IB_WC_REM_INV_REQ_ERR;
+ ibp->n_other_naks++;
+ goto class_b;
+
+ case 2: /* Remote Access Error */
+ status = IB_WC_REM_ACCESS_ERR;
+ ibp->n_other_naks++;
+ goto class_b;
+
+ case 3: /* Remote Operation Error */
+ status = IB_WC_REM_OP_ERR;
+ ibp->n_other_naks++;
+class_b:
+ if (qp->s_last == qp->s_acked) {
+ qib_send_complete(qp, wqe, status);
+ qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ }
+ break;
+
+ default:
+ /* Ignore other reserved NAK error codes */
+ goto reserved;
+ }
+ qp->s_retry = qp->s_retry_cnt;
+ qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+ goto bail;
+
+ default: /* 2: reserved */
+reserved:
+ /* Ignore reserved NAK codes. */
+ goto bail;
+ }
+
+bail:
+ return ret;
+}
+
+/*
+ * We have seen an out of sequence RDMA read middle or last packet.
+ * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
+ */
+static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
+ struct qib_ctxtdata *rcd)
+{
+ struct qib_swqe *wqe;
+
+ /* Remove QP from retry timer */
+ if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
+ qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ }
+
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+
+ while (qib_cmp24(psn, wqe->lpsn) > 0) {
+ if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ break;
+ wqe = do_rc_completion(qp, wqe, ibp);
+ }
+
+ ibp->n_rdma_seq++;
+ qp->r_flags |= QIB_R_RDMAR_SEQ;
+ qib_restart_rc(qp, qp->s_last_psn + 1, 0);
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_SEND;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+}
+
+/**
+ * qib_rc_rcv_resp - process an incoming RC response packet
+ * @ibp: the port this packet came in on
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @hdrsize: the header length
+ * @pmtu: the path MTU
+ *
+ * This is called from qib_rc_rcv() to process an incoming RC response
+ * packet for the given QP.
+ * Called at interrupt level.
+ */
+static void qib_rc_rcv_resp(struct qib_ibport *ibp,
+ struct qib_other_headers *ohdr,
+ void *data, u32 tlen,
+ struct qib_qp *qp,
+ u32 opcode,
+ u32 psn, u32 hdrsize, u32 pmtu,
+ struct qib_ctxtdata *rcd)
+{
+ struct qib_swqe *wqe;
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ enum ib_wc_status status;
+ unsigned long flags;
+ int diff;
+ u32 pad;
+ u32 aeth;
+ u64 val;
+
+ if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
+ /*
+ * If ACK'd PSN on SDMA busy list try to make progress to
+ * reclaim SDMA credits.
+ */
+ if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
+ (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
+
+ /*
+ * If send tasklet not running attempt to progress
+ * SDMA queue.
+ */
+ if (!(qp->s_flags & QIB_S_BUSY)) {
+ /* Acquire SDMA Lock */
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /* Invoke sdma make progress */
+ qib_sdma_make_progress(ppd);
+ /* Release SDMA Lock */
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ }
+ }
+ }
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto ack_done;
+
+ /* Ignore invalid responses. */
+ if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+ goto ack_done;
+
+ /* Ignore duplicate responses. */
+ diff = qib_cmp24(psn, qp->s_last_psn);
+ if (unlikely(diff <= 0)) {
+ /* Update credits for "ghost" ACKs */
+ if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ if ((aeth >> 29) == 0)
+ qib_get_credit(qp, aeth);
+ }
+ goto ack_done;
+ }
+
+ /*
+ * Skip everything other than the PSN we expect, if we are waiting
+ * for a reply to a restarted RDMA read or atomic op.
+ */
+ if (qp->r_flags & QIB_R_RDMAR_SEQ) {
+ if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
+ goto ack_done;
+ qp->r_flags &= ~QIB_R_RDMAR_SEQ;
+ }
+
+ if (unlikely(qp->s_acked == qp->s_tail))
+ goto ack_done;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ status = IB_WC_SUCCESS;
+
+ switch (opcode) {
+ case OP(ACKNOWLEDGE):
+ case OP(ATOMIC_ACKNOWLEDGE):
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+ __be32 *p = ohdr->u.at.atomic_ack_eth;
+
+ val = ((u64) be32_to_cpu(p[0]) << 32) |
+ be32_to_cpu(p[1]);
+ } else
+ val = 0;
+ if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
+ opcode != OP(RDMA_READ_RESPONSE_FIRST))
+ goto ack_done;
+ hdrsize += 4;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /*
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
+ */
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_middle;
+
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ /* no AETH, no ACK */
+ if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
+ goto ack_seq_err;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+read_middle:
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto ack_len_err;
+ if (unlikely(pmtu >= qp->s_rdma_read_len))
+ goto ack_len_err;
+
+ /*
+ * We got a response so update the timeout.
+ * 4.096 usec. * (1 << qp->timeout)
+ */
+ qp->s_flags |= QIB_S_TIMER;
+ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
+ if (qp->s_flags & QIB_S_WAIT_ACK) {
+ qp->s_flags &= ~QIB_S_WAIT_ACK;
+ qib_schedule_send(qp);
+ }
+
+ if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
+ qp->s_retry = qp->s_retry_cnt;
+
+ /*
+ * Update the RDMA receive state but do the copy w/o
+ * holding the locks and blocking interrupts.
+ */
+ qp->s_rdma_read_len -= pmtu;
+ update_last_psn(qp, psn);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+ goto bail;
+
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
+ goto ack_done;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 0 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen < (hdrsize + pad + 8)))
+ goto ack_len_err;
+ /*
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
+ */
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_last;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /* ACKs READ req. */
+ if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
+ goto ack_seq_err;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 1 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen <= (hdrsize + pad + 8)))
+ goto ack_len_err;
+read_last:
+ tlen -= hdrsize + pad + 8;
+ if (unlikely(tlen != qp->s_rdma_read_len))
+ goto ack_len_err;
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+ WARN_ON(qp->s_rdma_read_sge.num_sge);
+ (void) do_rc_ack(qp, aeth, psn,
+ OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
+ goto ack_done;
+ }
+
+ack_op_err:
+ status = IB_WC_LOC_QP_OP_ERR;
+ goto ack_err;
+
+ack_seq_err:
+ rdma_seq_err(qp, ibp, psn, rcd);
+ goto ack_done;
+
+ack_len_err:
+ status = IB_WC_LOC_LEN_ERR;
+ack_err:
+ if (qp->s_last == qp->s_acked) {
+ qib_send_complete(qp, wqe, status);
+ qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ }
+ack_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+bail:
+ return;
+}
+
+/**
+ * qib_rc_rcv_error - process an incoming duplicate or error RC packet
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @diff: the difference between the PSN and the expected PSN
+ *
+ * This is called from qib_rc_rcv() to process an unexpected
+ * incoming RC packet for the given QP.
+ * Called at interrupt level.
+ * Return 1 if no more processing is needed; otherwise return 0 to
+ * schedule a response to be sent.
+ */
+static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
+ void *data,
+ struct qib_qp *qp,
+ u32 opcode,
+ u32 psn,
+ int diff,
+ struct qib_ctxtdata *rcd)
+{
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_ack_entry *e;
+ unsigned long flags;
+ u8 i, prev;
+ int old_req;
+
+ if (diff > 0) {
+ /*
+ * Packet sequence error.
+ * A NAK will ACK earlier sends and RDMA writes.
+ * Don't queue the NAK if we already sent one.
+ */
+ if (!qp->r_nak_state) {
+ ibp->n_rc_seqnak++;
+ qp->r_nak_state = IB_NAK_PSN_ERROR;
+ /* Use the expected PSN. */
+ qp->r_ack_psn = qp->r_psn;
+ /*
+ * Wait to send the sequence NAK until all packets
+ * in the receive queue have been processed.
+ * Otherwise, we end up propagating congestion.
+ */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ }
+ goto done;
+ }
+
+ /*
+ * Handle a duplicate request. Don't re-execute SEND, RDMA
+ * write or atomic op. Don't NAK errors, just silently drop
+ * the duplicate request. Note that r_sge, r_len, and
+ * r_rcv_len may be in use so don't modify them.
+ *
+ * We are supposed to ACK the earliest duplicate PSN but we
+ * can coalesce an outstanding duplicate ACK. We have to
+ * send the earliest so that RDMA reads can be restarted at
+ * the requester's expected PSN.
+ *
+ * First, find where this duplicate PSN falls within the
+ * ACKs previously sent.
+ * old_req is true if there is an older response that is scheduled
+ * to be sent before sending this one.
+ */
+ e = NULL;
+ old_req = 1;
+ ibp->n_rc_dupreq++;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ for (i = qp->r_head_ack_queue; ; i = prev) {
+ if (i == qp->s_tail_ack_queue)
+ old_req = 0;
+ if (i)
+ prev = i - 1;
+ else
+ prev = QIB_MAX_RDMA_ATOMIC;
+ if (prev == qp->r_head_ack_queue) {
+ e = NULL;
+ break;
+ }
+ e = &qp->s_ack_queue[prev];
+ if (!e->opcode) {
+ e = NULL;
+ break;
+ }
+ if (qib_cmp24(psn, e->psn) >= 0) {
+ if (prev == qp->s_tail_ack_queue &&
+ qib_cmp24(psn, e->lpsn) <= 0)
+ old_req = 0;
+ break;
+ }
+ }
+ switch (opcode) {
+ case OP(RDMA_READ_REQUEST): {
+ struct ib_reth *reth;
+ u32 offset;
+ u32 len;
+
+ /*
+ * If we didn't find the RDMA read request in the ack queue,
+ * we can ignore this request.
+ */
+ if (!e || e->opcode != OP(RDMA_READ_REQUEST))
+ goto unlock_done;
+ /* RETH comes after BTH */
+ reth = &ohdr->u.rc.reth;
+ /*
+ * Address range must be a subset of the original
+ * request and start on pmtu boundaries.
+ * We reuse the old ack_queue slot since the requester
+ * should not back up and request an earlier PSN for the
+ * same request.
+ */
+ offset = ((psn - e->psn) & QIB_PSN_MASK) *
+ qp->pmtu;
+ len = be32_to_cpu(reth->length);
+ if (unlikely(offset + len != e->rdma_sge.sge_length))
+ goto unlock_done;
+ if (e->rdma_sge.mr) {
+ qib_put_mr(e->rdma_sge.mr);
+ e->rdma_sge.mr = NULL;
+ }
+ if (len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+ IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto unlock_done;
+ } else {
+ e->rdma_sge.vaddr = NULL;
+ e->rdma_sge.length = 0;
+ e->rdma_sge.sge_length = 0;
+ }
+ e->psn = psn;
+ if (old_req)
+ goto unlock_done;
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ case OP(COMPARE_SWAP):
+ case OP(FETCH_ADD): {
+ /*
+ * If we didn't find the atomic request in the ack queue
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
+ */
+ if (!e || e->opcode != (u8) opcode || old_req)
+ goto unlock_done;
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ default:
+ /*
+ * Ignore this operation if it doesn't request an ACK
+ * or an earlier RDMA read or atomic is going to be resent.
+ */
+ if (!(psn & IB_BTH_REQ_ACK) || old_req)
+ goto unlock_done;
+ /*
+ * Resend the most recent ACK if this request is
+ * after all the previous RDMA reads and atomics.
+ */
+ if (i == qp->r_head_ack_queue) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->r_psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Try to send a simple ACK to work around a Mellanox bug
+ * which doesn't accept a RDMA read response or atomic
+ * response as an ACK for earlier SENDs or RDMA writes.
+ */
+ if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Resend the RDMA read or atomic op which
+ * ACKs this duplicate request.
+ */
+ qp->s_tail_ack_queue = i;
+ break;
+ }
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_flags |= QIB_S_RESP_PENDING;
+ qp->r_nak_state = 0;
+ qib_schedule_send(qp);
+
+unlock_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return 1;
+
+send_ack:
+ return 0;
+}
+
+void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
+{
+ unsigned long flags;
+ int lastwqe;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ lastwqe = qib_error_qp(qp, err);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+
+static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
+{
+ unsigned next;
+
+ next = n + 1;
+ if (next > QIB_MAX_RDMA_ATOMIC)
+ next = 0;
+ qp->s_tail_ack_queue = next;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+}
+
+/**
+ * qib_rc_rcv - process an incoming RC packet
+ * @rcd: the context pointer
+ * @hdr: the header of this packet
+ * @has_grh: true if the header has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ *
+ * This is called from qib_qp_rcv() to process an incoming RC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_ibport *ibp = &rcd->ppd->ibport_data;
+ struct qib_other_headers *ohdr;
+ u32 opcode;
+ u32 hdrsize;
+ u32 psn;
+ u32 pad;
+ struct ib_wc wc;
+ u32 pmtu = qp->pmtu;
+ int diff;
+ struct ib_reth *reth;
+ unsigned long flags;
+ int ret;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12; /* LRH + BTH */
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
+ }
+
+ opcode = be32_to_cpu(ohdr->bth[0]);
+ if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
+ return;
+
+ psn = be32_to_cpu(ohdr->bth[2]);
+ opcode >>= 24;
+
+ /*
+ * Process responses (ACKs) before anything else. Note that the
+ * packet sequence number will be for something in the send work
+ * queue rather than the expected receive packet sequence number.
+ * In other words, this QP is the requester.
+ */
+ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+ qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
+ hdrsize, pmtu, rcd);
+ return;
+ }
+
+ /* Compute 24 bits worth of difference. */
+ diff = qib_cmp24(psn, qp->r_psn);
+ if (unlikely(diff)) {
+ if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
+ return;
+ goto send_ack;
+ }
+
+ /* Check for opcode sequence errors. */
+ switch (qp->r_state) {
+ case OP(SEND_FIRST):
+ case OP(SEND_MIDDLE):
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ break;
+ goto nack_inv;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_MIDDLE):
+ if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ break;
+ goto nack_inv;
+
+ default:
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ goto nack_inv;
+ /*
+ * Note that it is up to the requester to not send a new
+ * RDMA read or atomic operation before receiving an ACK
+ * for the previous operation.
+ */
+ break;
+ }
+
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
+ qp->r_flags |= QIB_R_COMM_EST;
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_COMM_EST;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ }
+
+ /* OK, process the packet. */
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ qp->r_rcv_len = 0;
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ case OP(RDMA_WRITE_MIDDLE):
+send_middle:
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto nack_inv;
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len))
+ goto nack_inv;
+ qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ break;
+
+ case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+ /* consume RWQE */
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ goto send_last_imm;
+
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ qp->r_rcv_len = 0;
+ if (opcode == OP(SEND_ONLY))
+ goto no_immediate_data;
+ /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
+ case OP(SEND_LAST_WITH_IMMEDIATE):
+send_last_imm:
+ wc.ex.imm_data = ohdr->u.imm_data;
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ goto send_last;
+ case OP(SEND_LAST):
+ case OP(RDMA_WRITE_LAST):
+no_immediate_data:
+ wc.wc_flags = 0;
+ wc.ex.imm_data = 0;
+send_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto nack_inv;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ wc.byte_len = tlen + qp->r_rcv_len;
+ if (unlikely(wc.byte_len > qp->r_len))
+ goto nack_inv;
+ qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ qib_put_ss(&qp->r_sge);
+ qp->r_msn++;
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ break;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ /* zero fields that are N/A */
+ wc.vendor_err = 0;
+ wc.pkey_index = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto nack_inv;
+ /* consume RWQE */
+ reth = &ohdr->u.rc.reth;
+ hdrsize += sizeof(*reth);
+ qp->r_len = be32_to_cpu(reth->length);
+ qp->r_rcv_len = 0;
+ qp->r_sge.sg_list = NULL;
+ if (qp->r_len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey & NAK */
+ ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+ rkey, IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(!ok))
+ goto nack_acc;
+ qp->r_sge.num_sge = 1;
+ } else {
+ qp->r_sge.num_sge = 0;
+ qp->r_sge.sge.mr = NULL;
+ qp->r_sge.sge.vaddr = NULL;
+ qp->r_sge.sge.length = 0;
+ qp->r_sge.sge.sge_length = 0;
+ }
+ if (opcode == OP(RDMA_WRITE_FIRST))
+ goto send_middle;
+ else if (opcode == OP(RDMA_WRITE_ONLY))
+ goto no_immediate_data;
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ wc.ex.imm_data = ohdr->u.rc.imm_data;
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ goto send_last;
+
+ case OP(RDMA_READ_REQUEST): {
+ struct qib_ack_entry *e;
+ u32 len;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ /* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
+ if (next > QIB_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ qib_update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+ qib_put_mr(e->rdma_sge.mr);
+ e->rdma_sge.mr = NULL;
+ }
+ reth = &ohdr->u.rc.reth;
+ len = be32_to_cpu(reth->length);
+ if (len) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey & NAK */
+ ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto nack_acc_unlck;
+ /*
+ * Update the next expected PSN. We add 1 later
+ * below, so only add the remainder here.
+ */
+ if (len > pmtu)
+ qp->r_psn += (len - 1) / pmtu;
+ } else {
+ e->rdma_sge.mr = NULL;
+ e->rdma_sge.vaddr = NULL;
+ e->rdma_sge.length = 0;
+ e->rdma_sge.sge_length = 0;
+ }
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn;
+ e->lpsn = qp->r_psn;
+ /*
+ * We need to increment the MSN here instead of when we
+ * finish sending the result since a duplicate request would
+ * increment it more than once.
+ */
+ qp->r_msn++;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
+
+ /* Schedule the send tasklet. */
+ qp->s_flags |= QIB_S_RESP_PENDING;
+ qib_schedule_send(qp);
+
+ goto sunlock;
+ }
+
+ case OP(COMPARE_SWAP):
+ case OP(FETCH_ADD): {
+ struct ib_atomic_eth *ateth;
+ struct qib_ack_entry *e;
+ u64 vaddr;
+ atomic64_t *maddr;
+ u64 sdata;
+ u32 rkey;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ if (next > QIB_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ qib_update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+ qib_put_mr(e->rdma_sge.mr);
+ e->rdma_sge.mr = NULL;
+ }
+ ateth = &ohdr->u.atomic_eth;
+ vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+ be32_to_cpu(ateth->vaddr[1]);
+ if (unlikely(vaddr & (sizeof(u64) - 1)))
+ goto nack_inv_unlck;
+ rkey = be32_to_cpu(ateth->rkey);
+ /* Check rkey & NAK */
+ if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ vaddr, rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_acc_unlck;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = be64_to_cpu(ateth->swap_data);
+ e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ be64_to_cpu(ateth->compare_data),
+ sdata);
+ qib_put_mr(qp->r_sge.sge.mr);
+ qp->r_sge.num_sge = 0;
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn;
+ e->lpsn = psn;
+ qp->r_msn++;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
+
+ /* Schedule the send tasklet. */
+ qp->s_flags |= QIB_S_RESP_PENDING;
+ qib_schedule_send(qp);
+
+ goto sunlock;
+ }
+
+ default:
+ /* NAK unknown opcodes. */
+ goto nack_inv;
+ }
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_ack_psn = psn;
+ qp->r_nak_state = 0;
+ /* Send an ACK if requested or required. */
+ if (psn & (1 << 31))
+ goto send_ack;
+ return;
+
+rnr_nak:
+ qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue RNR NAK for later */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ return;
+
+nack_op_err:
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue NAK for later */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ return;
+
+nack_inv_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue NAK for later */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ return;
+
+nack_acc_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_acc:
+ qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+send_ack:
+ qib_send_rc_ack(qp);
+ return;
+
+sunlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
new file mode 100644
index 00000000000..4c07a8b34ff
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -0,0 +1,819 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+/*
+ * Convert the AETH RNR timeout code into the number of microseconds.
+ */
+const u32 ib_qib_rnr_table[32] = {
+ 655360, /* 00: 655.36 */
+ 10, /* 01: .01 */
+ 20, /* 02 .02 */
+ 30, /* 03: .03 */
+ 40, /* 04: .04 */
+ 60, /* 05: .06 */
+ 80, /* 06: .08 */
+ 120, /* 07: .12 */
+ 160, /* 08: .16 */
+ 240, /* 09: .24 */
+ 320, /* 0A: .32 */
+ 480, /* 0B: .48 */
+ 640, /* 0C: .64 */
+ 960, /* 0D: .96 */
+ 1280, /* 0E: 1.28 */
+ 1920, /* 0F: 1.92 */
+ 2560, /* 10: 2.56 */
+ 3840, /* 11: 3.84 */
+ 5120, /* 12: 5.12 */
+ 7680, /* 13: 7.68 */
+ 10240, /* 14: 10.24 */
+ 15360, /* 15: 15.36 */
+ 20480, /* 16: 20.48 */
+ 30720, /* 17: 30.72 */
+ 40960, /* 18: 40.96 */
+ 61440, /* 19: 61.44 */
+ 81920, /* 1A: 81.92 */
+ 122880, /* 1B: 122.88 */
+ 163840, /* 1C: 163.84 */
+ 245760, /* 1D: 245.76 */
+ 327680, /* 1E: 327.68 */
+ 491520 /* 1F: 491.52 */
+};
+
+/*
+ * Validate a RWQE and fill in the SGE state.
+ * Return 1 if OK.
+ */
+static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
+{
+ int i, j, ret;
+ struct ib_wc wc;
+ struct qib_lkey_table *rkt;
+ struct qib_pd *pd;
+ struct qib_sge_state *ss;
+
+ rkt = &to_idev(qp->ibqp.device)->lk_table;
+ pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+ ss = &qp->r_sge;
+ ss->sg_list = qp->r_sg_list;
+ qp->r_len = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+ &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ goto bad_lkey;
+ qp->r_len += wqe->sg_list[i].length;
+ j++;
+ }
+ ss->num_sge = j;
+ ss->total_len = qp->r_len;
+ ret = 1;
+ goto bail;
+
+bad_lkey:
+ while (j) {
+ struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+
+ qib_put_mr(sge->mr);
+ }
+ ss->num_sge = 0;
+ memset(&wc, 0, sizeof(wc));
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ /* Signal solicited completion event. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ ret = 0;
+bail:
+ return ret;
+}
+
+/**
+ * qib_get_rwqe - copy the next RWQE into the QP's RWQE
+ * @qp: the QP
+ * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
+ *
+ * Return -1 if there is a local error, 0 if no RWQE is available,
+ * otherwise return 1.
+ *
+ * Can be called from interrupt level.
+ */
+int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
+{
+ unsigned long flags;
+ struct qib_rq *rq;
+ struct qib_rwq *wq;
+ struct qib_srq *srq;
+ struct qib_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
+ int ret;
+
+ if (qp->ibqp.srq) {
+ srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
+ rq = &srq->rq;
+ } else {
+ srq = NULL;
+ handler = NULL;
+ rq = &qp->r_rq;
+ }
+
+ spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ wq = rq->wq;
+ tail = wq->tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ if (unlikely(tail == wq->head)) {
+ ret = 0;
+ goto unlock;
+ }
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
+ wqe = get_rwqe_ptr(rq, tail);
+ /*
+ * Even though we update the tail index in memory, the verbs
+ * consumer is not supposed to post more entries until a
+ * completion is generated.
+ */
+ if (++tail >= rq->size)
+ tail = 0;
+ wq->tail = tail;
+ if (!wr_id_only && !qib_init_sge(qp, wqe)) {
+ ret = -1;
+ goto unlock;
+ }
+ qp->r_wr_id = wqe->wr_id;
+
+ ret = 1;
+ set_bit(QIB_R_WRID_VALID, &qp->r_aflags);
+ if (handler) {
+ u32 n;
+
+ /*
+ * Validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
+ else
+ n -= tail;
+ if (n < srq->limit) {
+ struct ib_event ev;
+
+ srq->limit = 0;
+ spin_unlock_irqrestore(&rq->lock, flags);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ handler(&ev, srq->ibsrq.srq_context);
+ goto bail;
+ }
+ }
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+bail:
+ return ret;
+}
+
+/*
+ * Switch to alternate path.
+ * The QP s_lock should be held and interrupts disabled.
+ */
+void qib_migrate_qp(struct qib_qp *qp)
+{
+ struct ib_event ev;
+
+ qp->s_mig_state = IB_MIG_MIGRATED;
+ qp->remote_ah_attr = qp->alt_ah_attr;
+ qp->port_num = qp->alt_ah_attr.port_num;
+ qp->s_pkey_index = qp->s_alt_pkey_index;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_PATH_MIG;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+}
+
+static __be64 get_sguid(struct qib_ibport *ibp, unsigned index)
+{
+ if (!index) {
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ return ppd->guid;
+ } else
+ return ibp->guids[index - 1];
+}
+
+static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
+{
+ return (gid->global.interface_id == id &&
+ (gid->global.subnet_prefix == gid_prefix ||
+ gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
+}
+
+/*
+ *
+ * This should be called with the QP r_lock held.
+ *
+ * The s_lock will be acquired around the qib_migrate_qp() call.
+ */
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, struct qib_qp *qp, u32 bth0)
+{
+ __be64 guid;
+ unsigned long flags;
+
+ if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
+ if (!has_grh) {
+ if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ goto err;
+ } else {
+ if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ goto err;
+ guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ goto err;
+ if (!gid_ok(&hdr->u.l.grh.sgid,
+ qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
+ qp->alt_ah_attr.grh.dgid.global.interface_id))
+ goto err;
+ }
+ if (!qib_pkey_ok((u16)bth0,
+ qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+ (u16)bth0,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ 0, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ goto err;
+ }
+ /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
+ if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
+ ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ goto err;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qib_migrate_qp(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ } else {
+ if (!has_grh) {
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ goto err;
+ } else {
+ if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ goto err;
+ guid = get_sguid(ibp,
+ qp->remote_ah_attr.grh.sgid_index);
+ if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ goto err;
+ if (!gid_ok(&hdr->u.l.grh.sgid,
+ qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
+ qp->remote_ah_attr.grh.dgid.global.interface_id))
+ goto err;
+ }
+ if (!qib_pkey_ok((u16)bth0,
+ qib_get_pkey(ibp, qp->s_pkey_index))) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+ (u16)bth0,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ 0, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ goto err;
+ }
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ ppd_from_ibp(ibp)->port != qp->port_num)
+ goto err;
+ if (qp->s_mig_state == IB_MIG_REARM &&
+ !(bth0 & IB_BTH_MIG_REQ))
+ qp->s_mig_state = IB_MIG_ARMED;
+ }
+
+ return 0;
+
+err:
+ return 1;
+}
+
+/**
+ * qib_ruc_loopback - handle UC and RC lookback requests
+ * @sqp: the sending QP
+ *
+ * This is called from qib_do_send() to
+ * forward a WQE addressed to the same HCA.
+ * Note that although we are single threaded due to the tasklet, we still
+ * have to protect against post_send(). We don't have to worry about
+ * receive interrupts since this is a connected protocol and all packets
+ * will pass through here.
+ */
+static void qib_ruc_loopback(struct qib_qp *sqp)
+{
+ struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
+ struct qib_qp *qp;
+ struct qib_swqe *wqe;
+ struct qib_sge *sge;
+ unsigned long flags;
+ struct ib_wc wc;
+ u64 sdata;
+ atomic64_t *maddr;
+ enum ib_wc_status send_status;
+ int release;
+ int ret;
+
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
+ qp = qib_lookup_qpn(ibp, sqp->remote_qpn);
+
+ spin_lock_irqsave(&sqp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) ||
+ !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
+
+ sqp->s_flags |= QIB_S_BUSY;
+
+again:
+ if (sqp->s_last == sqp->s_head)
+ goto clr_busy;
+ wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work reqeust. */
+ if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
+ }
+
+ /*
+ * We can rely on the entry not changing without the s_lock
+ * being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
+ */
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+
+ if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) ||
+ qp->ibqp.qp_type != sqp->ibqp.qp_type) {
+ ibp->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof wc);
+ send_status = IB_WC_SUCCESS;
+
+ release = 1;
+ sqp->s_sge.sge = wqe->sg_list[0];
+ sqp->s_sge.sg_list = wqe->sg_list + 1;
+ sqp->s_sge.num_sge = wqe->wr.num_sge;
+ sqp->s_len = wqe->length;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ /* FALLTHROUGH */
+ case IB_WR_SEND:
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ break;
+
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ /* FALLTHROUGH */
+ case IB_WR_RDMA_WRITE:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ if (wqe->length == 0)
+ break;
+ if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+ wqe->wr.wr.rdma.remote_addr,
+ wqe->wr.wr.rdma.rkey,
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
+ qp->r_sge.sg_list = NULL;
+ qp->r_sge.num_sge = 1;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_RDMA_READ:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
+ if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+ wqe->wr.wr.rdma.remote_addr,
+ wqe->wr.wr.rdma.rkey,
+ IB_ACCESS_REMOTE_READ)))
+ goto acc_err;
+ release = 0;
+ sqp->s_sge.sg_list = NULL;
+ sqp->s_sge.num_sge = 1;
+ qp->r_sge.sge = wqe->sg_list[0];
+ qp->r_sge.sg_list = wqe->sg_list + 1;
+ qp->r_sge.num_sge = wqe->wr.num_sge;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
+ if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ wqe->wr.wr.atomic.remote_addr,
+ wqe->wr.wr.atomic.rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto acc_err;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = wqe->wr.wr.atomic.compare_add;
+ *(u64 *) sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ sdata, wqe->wr.wr.atomic.swap);
+ qib_put_mr(qp->r_sge.sge.mr);
+ qp->r_sge.num_sge = 0;
+ goto send_comp;
+
+ default:
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
+ }
+
+ sge = &sqp->s_sge.sge;
+ while (sqp->s_len) {
+ u32 len = sqp->s_len;
+
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (!release)
+ qib_put_mr(sge->mr);
+ if (--sqp->s_sge.num_sge)
+ *sge = *sqp->s_sge.sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ sqp->s_len -= len;
+ }
+ if (release)
+ qib_put_ss(&qp->r_sge);
+
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ goto send_comp;
+
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ wc.port_num = 1;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ wqe->wr.send_flags & IB_SEND_SOLICITED);
+
+send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ ibp->n_loop_pkts++;
+flush_send:
+ sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+ qib_send_complete(sqp, wqe, send_status);
+ goto again;
+
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ ibp->n_rnr_naks++;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
+ }
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK))
+ goto clr_busy;
+ sqp->s_flags |= QIB_S_WAIT_RNR;
+ sqp->s_timer.function = qib_rc_rnr_retry;
+ sqp->s_timer.expires = jiffies +
+ usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
+ add_timer(&sqp->s_timer);
+ goto clr_busy;
+
+op_err:
+ send_status = IB_WC_REM_OP_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ qib_rc_error(qp, wc.status);
+
+serr:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ qib_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+done:
+ if (qp && atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+/**
+ * qib_make_grh - construct a GRH header
+ * @ibp: a pointer to the IB port
+ * @hdr: a pointer to the GRH header being constructed
+ * @grh: the global route address to send to
+ * @hwords: the number of 32 bit words of header being sent
+ * @nwords: the number of 32 bit words of data being sent
+ *
+ * Return the size of the header in 32 bit words.
+ */
+u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords)
+{
+ hdr->version_tclass_flow =
+ cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
+ (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
+ (grh->flow_label << IB_GRH_FLOW_SHIFT));
+ hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
+ /* next_hdr is defined by C8-7 in ch. 8.4.1 */
+ hdr->next_hdr = IB_GRH_NEXT_HDR;
+ hdr->hop_limit = grh->hop_limit;
+ /* The SGID is 32-bit aligned. */
+ hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+ hdr->sgid.global.interface_id = grh->sgid_index ?
+ ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
+ hdr->dgid = grh->dgid;
+
+ /* GRH header size in 32-bit words. */
+ return sizeof(struct ib_grh) / sizeof(u32);
+}
+
+void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+ u32 bth0, u32 bth2)
+{
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ u16 lrh0;
+ u32 nwords;
+ u32 extra_bytes;
+
+ /* Construct the header. */
+ extra_bytes = -qp->s_cur_size & 3;
+ nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ lrh0 = QIB_LRH_BTH;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+ &qp->remote_ah_attr.grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = QIB_LRH_GRH;
+ }
+ lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
+ qp->remote_ah_attr.sl << 4;
+ qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ qp->remote_ah_attr.src_path_bits);
+ bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
+ bth0 |= extra_bytes << 20;
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth0 |= IB_BTH_MIG_REQ;
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(bth2);
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
+}
+
+/**
+ * qib_do_send - perform a send on a QP
+ * @work: contains a pointer to the QP
+ *
+ * Process entries in the send work queue until credit or queue is
+ * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * Otherwise, two threads could send packets out of order.
+ */
+void qib_do_send(struct work_struct *work)
+{
+ struct qib_qp *qp = container_of(work, struct qib_qp, s_work);
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ int (*make_req)(struct qib_qp *qp);
+ unsigned long flags;
+
+ if ((qp->ibqp.qp_type == IB_QPT_RC ||
+ qp->ibqp.qp_type == IB_QPT_UC) &&
+ (qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) {
+ qib_ruc_loopback(qp);
+ return;
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ make_req = qib_make_rc_req;
+ else if (qp->ibqp.qp_type == IB_QPT_UC)
+ make_req = qib_make_uc_req;
+ else
+ make_req = qib_make_ud_req;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if (!qib_send_ok(qp)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return;
+ }
+
+ qp->s_flags |= QIB_S_BUSY;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ do {
+ /* Check for a constructed packet to be sent. */
+ if (qp->s_hdrwords != 0) {
+ /*
+ * If the packet cannot be sent now, return and
+ * the send tasklet will be woken up later.
+ */
+ if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
+ qp->s_cur_sge, qp->s_cur_size))
+ break;
+ /* Record that s_hdr is empty. */
+ qp->s_hdrwords = 0;
+ }
+ } while (make_req(qp));
+}
+
+/*
+ * This should be called with s_lock held.
+ */
+void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+ enum ib_wc_status status)
+{
+ u32 old_last, last;
+ unsigned i;
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ return;
+
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct qib_sge *sge = &wqe->sg_list[i];
+
+ qib_put_mr(sge->mr);
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+
+ /* See ch. 11.2.4.1 and 10.7.3.1 */
+ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS) {
+ struct ib_wc wc;
+
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = status;
+ wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+ wc.qp = &qp->ibqp;
+ if (status == IB_WC_SUCCESS)
+ wc.byte_len = wqe->length;
+ qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
+ }
+
+ last = qp->s_last;
+ old_last = last;
+ if (++last >= qp->s_size)
+ last = 0;
+ qp->s_last = last;
+ if (qp->s_acked == old_last)
+ qp->s_acked = last;
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index aa47eb54952..911205d3d5a 100644
--- a/drivers/infiniband/hw/ipath/ipath_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -32,22 +33,45 @@
*/
/*
* This file contains all of the code that is specific to the SerDes
- * on the InfiniPath 7220 chip.
+ * on the QLogic_IB 7220 chip.
*/
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
-#include "ipath_kernel.h"
-#include "ipath_registers.h"
-#include "ipath_7220.h"
+#include "qib.h"
+#include "qib_7220.h"
+
+#define SD7220_FW_NAME "qlogic/sd7220.fw"
+MODULE_FIRMWARE(SD7220_FW_NAME);
+
+/*
+ * Same as in qib_iba7220.c, but just the registers needed here.
+ * Could move whole set to qib_7220.h, but decided better to keep
+ * local.
+ */
+#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64))
+#define kr_hwerrclear KREG_IDX(HwErrClear)
+#define kr_hwerrmask KREG_IDX(HwErrMask)
+#define kr_hwerrstatus KREG_IDX(HwErrStatus)
+#define kr_ibcstatus KREG_IDX(IBCStatus)
+#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl)
+#define kr_scratch KREG_IDX(Scratch)
+#define kr_xgxs_cfg KREG_IDX(XGXSCfg)
+/* these are used only here, not in qib_iba7220.c */
+#define kr_ibsd_epb_access_ctrl KREG_IDX(ibsd_epb_access_ctrl)
+#define kr_ibsd_epb_transaction_reg KREG_IDX(ibsd_epb_transaction_reg)
+#define kr_pciesd_epb_transaction_reg KREG_IDX(pciesd_epb_transaction_reg)
+#define kr_pciesd_epb_access_ctrl KREG_IDX(pciesd_epb_access_ctrl)
+#define kr_serdes_ddsrxeq0 KREG_IDX(SerDes_DDSRXEQ0)
/*
* The IBSerDesMappTable is a memory that holds values to be stored in
- * various SerDes registers by IBC. It is not part of the normal kregs
- * map and is used in exactly one place, hence the #define below.
+ * various SerDes registers by IBC.
*/
-#define KR_IBSerDesMappTable (0x94000 / (sizeof(uint64_t)))
+#define kr_serdes_maptable KREG_IDX(IBSerDesMappTable)
/*
* Below used for sdnum parameter, selecting one of the two sections
@@ -71,42 +95,44 @@
#define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8))
/* Forward declarations. */
-static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
- u32 data, u32 mask);
-static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+ u32 data, u32 mask);
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
int mask);
-static int ipath_sd_trimdone_poll(struct ipath_devdata *dd);
-static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
- const char *where);
-static int ipath_sd_setvals(struct ipath_devdata *dd);
-static int ipath_sd_early(struct ipath_devdata *dd);
-static int ipath_sd_dactrim(struct ipath_devdata *dd);
-/* Set the registers that IBC may muck with to their default "preset" values */
-int ipath_sd7220_presets(struct ipath_devdata *dd);
-static int ipath_internal_presets(struct ipath_devdata *dd);
+static int qib_sd_trimdone_poll(struct qib_devdata *dd);
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd, const char *where);
+static int qib_sd_setvals(struct qib_devdata *dd);
+static int qib_sd_early(struct qib_devdata *dd);
+static int qib_sd_dactrim(struct qib_devdata *dd);
+static int qib_internal_presets(struct qib_devdata *dd);
/* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */
-static int ipath_sd_trimself(struct ipath_devdata *dd, int val);
-static int epb_access(struct ipath_devdata *dd, int sdnum, int claim);
-
-void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup);
+static int qib_sd_trimself(struct qib_devdata *dd, int val);
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim);
+static int qib_sd7220_ib_load(struct qib_devdata *dd,
+ const struct firmware *fw);
+static int qib_sd7220_ib_vfy(struct qib_devdata *dd,
+ const struct firmware *fw);
/*
* Below keeps track of whether the "once per power-on" initialization has
* been done, because uC code Version 1.32.17 or higher allows the uC to
* be reset at will, and Automatic Equalization may require it. So the
- * state of the reset "pin", as reflected in was_reset parameter to
- * ipath_sd7220_init() is no longer valid. Instead, we check for the
+ * state of the reset "pin", is no longer valid. Instead, we check for the
* actual uC code having been loaded.
*/
-static int ipath_ibsd_ucode_loaded(struct ipath_devdata *dd)
+static int qib_ibsd_ucode_loaded(struct qib_pportdata *ppd,
+ const struct firmware *fw)
{
- if (!dd->serdes_first_init_done && (ipath_sd7220_ib_vfy(dd) > 0))
- dd->serdes_first_init_done = 1;
- return dd->serdes_first_init_done;
+ struct qib_devdata *dd = ppd->dd;
+
+ if (!dd->cspec->serdes_first_init_done &&
+ qib_sd7220_ib_vfy(dd, fw) > 0)
+ dd->cspec->serdes_first_init_done = 1;
+ return dd->cspec->serdes_first_init_done;
}
-/* repeat #define for local use. "Real" #define is in ipath_iba7220.c */
-#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
+/* repeat #define for local use. "Real" #define is in qib_iba7220.c */
+#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
#define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF))
#define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF))
#define UC_PAR_CLR_D 8
@@ -114,25 +140,25 @@ static int ipath_ibsd_ucode_loaded(struct ipath_devdata *dd)
#define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS)
#define START_EQ1(chan) EPB_LOC(chan, 7, 0x27)
-void ipath_sd7220_clr_ibpar(struct ipath_devdata *dd)
+void qib_sd7220_clr_ibpar(struct qib_devdata *dd)
{
int ret;
/* clear, then re-enable parity errs */
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6,
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6,
UC_PAR_CLR_D, UC_PAR_CLR_M);
if (ret < 0) {
- ipath_dev_err(dd, "Failed clearing IBSerDes Parity err\n");
+ qib_dev_err(dd, "Failed clearing IBSerDes Parity err\n");
goto bail;
}
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0,
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0,
UC_PAR_CLR_M);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
udelay(4);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
- INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_write_kreg(dd, kr_hwerrclear,
+ QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+ qib_read_kreg32(dd, kr_scratch);
bail:
return;
}
@@ -146,7 +172,7 @@ bail:
#define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS)
#define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS)
-static int ipath_resync_ibepb(struct ipath_devdata *dd)
+static int qib_resync_ibepb(struct qib_devdata *dd)
{
int ret, pat, tries, chn;
u32 loc;
@@ -155,43 +181,42 @@ static int ipath_resync_ibepb(struct ipath_devdata *dd)
chn = 0;
for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) {
loc = IB_PGUDP(chn);
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
if (ret < 0) {
- ipath_dev_err(dd, "Failed read in resync\n");
+ qib_dev_err(dd, "Failed read in resync\n");
continue;
}
if (ret != 0xF0 && ret != 0x55 && tries == 0)
- ipath_dev_err(dd, "unexpected pattern in resync\n");
+ qib_dev_err(dd, "unexpected pattern in resync\n");
pat = ret ^ 0xA5; /* alternate F0 and 55 */
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF);
if (ret < 0) {
- ipath_dev_err(dd, "Failed write in resync\n");
+ qib_dev_err(dd, "Failed write in resync\n");
continue;
}
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
if (ret < 0) {
- ipath_dev_err(dd, "Failed re-read in resync\n");
+ qib_dev_err(dd, "Failed re-read in resync\n");
continue;
}
if (ret != pat) {
- ipath_dev_err(dd, "Failed compare1 in resync\n");
+ qib_dev_err(dd, "Failed compare1 in resync\n");
continue;
}
loc = IB_CMUDONE(chn);
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
if (ret < 0) {
- ipath_dev_err(dd, "Failed CMUDONE rd in resync\n");
+ qib_dev_err(dd, "Failed CMUDONE rd in resync\n");
continue;
}
if ((ret & 0x70) != ((chn << 4) | 0x40)) {
- ipath_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n",
- ret, chn);
+ qib_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n",
+ ret, chn);
continue;
}
if (++chn == 4)
break; /* Success */
}
- ipath_cdbg(VERBOSE, "Resync in %d tries\n", tries);
return (ret > 0) ? 0 : ret;
}
@@ -199,32 +224,32 @@ static int ipath_resync_ibepb(struct ipath_devdata *dd)
* Localize the stuff that should be done to change IB uC reset
* returns <0 for errors.
*/
-static int ipath_ibsd_reset(struct ipath_devdata *dd, int assert_rst)
+static int qib_ibsd_reset(struct qib_devdata *dd, int assert_rst)
{
u64 rst_val;
int ret = 0;
unsigned long flags;
- rst_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+ rst_val = qib_read_kreg64(dd, kr_ibserdesctrl);
if (assert_rst) {
/*
* Vendor recommends "interrupting" uC before reset, to
* minimize possible glitches.
*/
- spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
epb_access(dd, IB_7220_SERDES, 1);
rst_val |= 1ULL;
/* Squelch possible parity error from _asserting_ reset */
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask &
- ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val);
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask &
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+ qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
/* flush write, delay to ensure it took effect */
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
udelay(2);
/* once it's reset, can remove interrupt */
epb_access(dd, IB_7220_SERDES, -1);
- spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
} else {
/*
* Before we de-assert reset, we need to deal with
@@ -235,47 +260,47 @@ static int ipath_ibsd_reset(struct ipath_devdata *dd, int assert_rst)
*/
u64 val;
rst_val &= ~(1ULL);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask &
- ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR);
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask &
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
- ret = ipath_resync_ibepb(dd);
+ ret = qib_resync_ibepb(dd);
if (ret < 0)
- ipath_dev_err(dd, "unable to re-sync IB EPB\n");
+ qib_dev_err(dd, "unable to re-sync IB EPB\n");
/* set uC control regs to suppress parity errs */
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1);
if (ret < 0)
goto bail;
/* IB uC code past Version 1.32.17 allow suppression of wdog */
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80,
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80,
0x80);
if (ret < 0) {
- ipath_dev_err(dd, "Failed to set WDOG disable\n");
+ qib_dev_err(dd, "Failed to set WDOG disable\n");
goto bail;
}
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val);
+ qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
/* flush write, delay for startup */
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
udelay(1);
/* clear, then re-enable parity errs */
- ipath_sd7220_clr_ibpar(dd);
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
- if (val & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR) {
- ipath_dev_err(dd, "IBUC Parity still set after RST\n");
- dd->ipath_hwerrmask &=
- ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR;
+ qib_sd7220_clr_ibpar(dd);
+ val = qib_read_kreg64(dd, kr_hwerrstatus);
+ if (val & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) {
+ qib_dev_err(dd, "IBUC Parity still set after RST\n");
+ dd->cspec->hwerrmask &=
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR;
}
- ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
- dd->ipath_hwerrmask);
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask);
}
bail:
return ret;
}
-static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
- const char *where)
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
+ const char *where)
{
int ret, chn, baduns;
u64 val;
@@ -286,70 +311,74 @@ static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
/* give time for reset to settle out in EPB */
udelay(2);
- ret = ipath_resync_ibepb(dd);
+ ret = qib_resync_ibepb(dd);
if (ret < 0)
- ipath_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where);
+ qib_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where);
/* Do "sacrificial read" to get EPB in sane state after reset */
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0);
if (ret < 0)
- ipath_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where);
+ qib_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where);
/* Check/show "summary" Trim-done bit in IBCStatus */
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
- if (val & (1ULL << 11))
- ipath_cdbg(VERBOSE, "IBCS TRIMDONE set (%s)\n", where);
- else
- ipath_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where);
-
+ val = qib_read_kreg64(dd, kr_ibcstatus);
+ if (!(val & (1ULL << 11)))
+ qib_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where);
+ /*
+ * Do "dummy read/mod/wr" to get EPB in sane state after reset
+ * The default value for MPREG6 is 0.
+ */
udelay(2);
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80);
if (ret < 0)
- ipath_dev_err(dd, "Failed Dummy RMW, (%s)\n", where);
+ qib_dev_err(dd, "Failed Dummy RMW, (%s)\n", where);
udelay(10);
baduns = 0;
for (chn = 3; chn >= 0; --chn) {
/* Read CTRL reg for each channel to check TRIMDONE */
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0, 0);
if (ret < 0)
- ipath_dev_err(dd, "Failed checking TRIMDONE, chn %d"
- " (%s)\n", chn, where);
+ qib_dev_err(dd,
+ "Failed checking TRIMDONE, chn %d (%s)\n",
+ chn, where);
if (!(ret & 0x10)) {
int probe;
+
baduns |= (1 << chn);
- ipath_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
- " (%s)\n", chn, ret, where);
- probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ qib_dev_err(dd,
+ "TRIMDONE cleared on chn %d (%02X). (%s)\n",
+ chn, ret, where);
+ probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_PGUDP(0), 0, 0);
- ipath_dev_err(dd, "probe is %d (%02X)\n",
+ qib_dev_err(dd, "probe is %d (%02X)\n",
probe, probe);
- probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0, 0);
- ipath_dev_err(dd, "re-read: %d (%02X)\n",
+ qib_dev_err(dd, "re-read: %d (%02X)\n",
probe, probe);
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0x10, 0x10);
if (ret < 0)
- ipath_dev_err(dd,
+ qib_dev_err(dd,
"Err on TRIMDONE rewrite1\n");
}
}
for (chn = 3; chn >= 0; --chn) {
/* Read CTRL reg for each channel to check TRIMDONE */
if (baduns & (1 << chn)) {
- ipath_dev_err(dd,
- "Reseting TRIMDONE on chn %d (%s)\n",
+ qib_dev_err(dd,
+ "Resetting TRIMDONE on chn %d (%s)\n",
chn, where);
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
IB_CTRL2(chn), 0x10, 0x10);
if (ret < 0)
- ipath_dev_err(dd, "Failed re-setting "
- "TRIMDONE, chn %d (%s)\n",
+ qib_dev_err(dd,
+ "Failed re-setting TRIMDONE, chn %d (%s)\n",
chn, where);
}
}
@@ -361,96 +390,94 @@ static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd,
* Post IB uC code version 1.32.17, was_reset being 1 is not really
* informative, so we double-check.
*/
-int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset)
+int qib_sd7220_init(struct qib_devdata *dd)
{
+ const struct firmware *fw;
int ret = 1; /* default to failure */
- int first_reset;
- int val_stat;
+ int first_reset, was_reset;
+ /* SERDES MPU reset recorded in D0 */
+ was_reset = (qib_read_kreg64(dd, kr_ibserdesctrl) & 1);
if (!was_reset) {
/* entered with reset not asserted, we need to do it */
- ipath_ibsd_reset(dd, 1);
- ipath_sd_trimdone_monitor(dd, "Driver-reload");
+ qib_ibsd_reset(dd, 1);
+ qib_sd_trimdone_monitor(dd, "Driver-reload");
}
- /* Substitute our deduced value for was_reset */
- ret = ipath_ibsd_ucode_loaded(dd);
- if (ret < 0) {
- ret = 1;
+ ret = request_firmware(&fw, SD7220_FW_NAME, &dd->pcidev->dev);
+ if (ret) {
+ qib_dev_err(dd, "Failed to load IB SERDES image\n");
goto done;
}
- first_reset = !ret; /* First reset if IBSD uCode not yet loaded */
+ /* Substitute our deduced value for was_reset */
+ ret = qib_ibsd_ucode_loaded(dd->pport, fw);
+ if (ret < 0)
+ goto bail;
+
+ first_reset = !ret; /* First reset if IBSD uCode not yet loaded */
/*
* Alter some regs per vendor latest doc, reset-defaults
* are not right for IB.
*/
- ret = ipath_sd_early(dd);
+ ret = qib_sd_early(dd);
if (ret < 0) {
- ipath_dev_err(dd, "Failed to set IB SERDES early defaults\n");
- ret = 1;
- goto done;
+ qib_dev_err(dd, "Failed to set IB SERDES early defaults\n");
+ goto bail;
}
-
/*
* Set DAC manual trim IB.
* We only do this once after chip has been reset (usually
* same as once per system boot).
*/
if (first_reset) {
- ret = ipath_sd_dactrim(dd);
+ ret = qib_sd_dactrim(dd);
if (ret < 0) {
- ipath_dev_err(dd, "Failed IB SERDES DAC trim\n");
- ret = 1;
- goto done;
+ qib_dev_err(dd, "Failed IB SERDES DAC trim\n");
+ goto bail;
}
}
-
/*
* Set various registers (DDS and RXEQ) that will be
* controlled by IBC (in 1.2 mode) to reasonable preset values
* Calling the "internal" version avoids the "check for needed"
* and "trimdone monitor" that might be counter-productive.
*/
- ret = ipath_internal_presets(dd);
+ ret = qib_internal_presets(dd);
if (ret < 0) {
- ipath_dev_err(dd, "Failed to set IB SERDES presets\n");
- ret = 1;
- goto done;
+ qib_dev_err(dd, "Failed to set IB SERDES presets\n");
+ goto bail;
}
- ret = ipath_sd_trimself(dd, 0x80);
+ ret = qib_sd_trimself(dd, 0x80);
if (ret < 0) {
- ipath_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n");
- ret = 1;
- goto done;
+ qib_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n");
+ goto bail;
}
/* Load image, then try to verify */
- ret = 0; /* Assume success */
+ ret = 0; /* Assume success */
if (first_reset) {
int vfy;
int trim_done;
- ipath_dbg("SerDes uC was reset, reloading PRAM\n");
- ret = ipath_sd7220_ib_load(dd);
+
+ ret = qib_sd7220_ib_load(dd, fw);
if (ret < 0) {
- ipath_dev_err(dd, "Failed to load IB SERDES image\n");
- ret = 1;
- goto done;
- }
+ qib_dev_err(dd, "Failed to load IB SERDES image\n");
+ goto bail;
+ } else {
+ /* Loaded image, try to verify */
+ vfy = qib_sd7220_ib_vfy(dd, fw);
+ if (vfy != ret) {
+ qib_dev_err(dd, "SERDES PRAM VFY failed\n");
+ goto bail;
+ } /* end if verified */
+ } /* end if loaded */
- /* Loaded image, try to verify */
- vfy = ipath_sd7220_ib_vfy(dd);
- if (vfy != ret) {
- ipath_dev_err(dd, "SERDES PRAM VFY failed\n");
- ret = 1;
- goto done;
- }
/*
* Loaded and verified. Almost good...
* hold "success" in ret
*/
ret = 0;
-
/*
* Prev steps all worked, continue bringup
* De-assert RESET to uC, only in first reset, to allow
@@ -461,45 +488,49 @@ int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset)
*/
ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38);
if (ret < 0) {
- ipath_dev_err(dd, "Failed clearing START_EQ1\n");
- ret = 1;
- goto done;
+ qib_dev_err(dd, "Failed clearing START_EQ1\n");
+ goto bail;
}
- ipath_ibsd_reset(dd, 0);
+ qib_ibsd_reset(dd, 0);
/*
* If this is not the first reset, trimdone should be set
- * already.
+ * already. We may need to check about this.
*/
- trim_done = ipath_sd_trimdone_poll(dd);
+ trim_done = qib_sd_trimdone_poll(dd);
/*
* Whether or not trimdone succeeded, we need to put the
* uC back into reset to avoid a possible fight with the
* IBC state-machine.
*/
- ipath_ibsd_reset(dd, 1);
+ qib_ibsd_reset(dd, 1);
if (!trim_done) {
- ipath_dev_err(dd, "No TRIMDONE seen\n");
- ret = 1;
- goto done;
+ qib_dev_err(dd, "No TRIMDONE seen\n");
+ goto bail;
}
-
- ipath_sd_trimdone_monitor(dd, "First-reset");
+ /*
+ * DEBUG: check each time we reset if trimdone bits have
+ * gotten cleared, and re-set them.
+ */
+ qib_sd_trimdone_monitor(dd, "First-reset");
/* Remember so we do not re-do the load, dactrim, etc. */
- dd->serdes_first_init_done = 1;
+ dd->cspec->serdes_first_init_done = 1;
}
/*
- * Setup for channel training and load values for
+ * setup for channel training and load values for
* RxEq and DDS in tables used by IBC in IB1.2 mode
*/
-
- val_stat = ipath_sd_setvals(dd);
- if (val_stat < 0)
- ret = 1;
+ ret = 0;
+ if (qib_sd_setvals(dd) >= 0)
+ goto done;
+bail:
+ ret = 1;
done:
/* start relock timer regardless, but start at 1 second */
- ipath_set_relock_poll(dd, -1);
+ set_7220_relock_poll(dd, -1);
+
+ release_firmware(fw);
return ret;
}
@@ -517,7 +548,7 @@ done:
* the "claim" parameter is >0 to claim, <0 to release, 0 to query.
* Returns <0 for errors, >0 if we had ownership, else 0.
*/
-static int epb_access(struct ipath_devdata *dd, int sdnum, int claim)
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim)
{
u16 acc;
u64 accval;
@@ -525,28 +556,30 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim)
u64 oct_sel = 0;
switch (sdnum) {
- case IB_7220_SERDES :
+ case IB_7220_SERDES:
/*
* The IB SERDES "ownership" is fairly simple. A single each
* request/grant.
*/
- acc = dd->ipath_kregs->kr_ib_epbacc;
+ acc = kr_ibsd_epb_access_ctrl;
break;
- case PCIE_SERDES0 :
- case PCIE_SERDES1 :
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
/* PCIe SERDES has two "octants", need to select which */
- acc = dd->ipath_kregs->kr_pcie_epbacc;
+ acc = kr_pciesd_epb_access_ctrl;
oct_sel = (2 << (sdnum - PCIE_SERDES0));
break;
- default :
+
+ default:
return 0;
}
/* Make sure any outstanding transaction was seen */
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
udelay(15);
- accval = ipath_read_kreg32(dd, acc);
+ accval = qib_read_kreg32(dd, acc);
owned = !!(accval & EPB_ACC_GNT);
if (claim < 0) {
@@ -557,22 +590,22 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim)
* Both should be clear
*/
u64 newval = 0;
- ipath_write_kreg(dd, acc, newval);
+ qib_write_kreg(dd, acc, newval);
/* First read after write is not trustworthy */
- pollval = ipath_read_kreg32(dd, acc);
+ pollval = qib_read_kreg32(dd, acc);
udelay(5);
- pollval = ipath_read_kreg32(dd, acc);
+ pollval = qib_read_kreg32(dd, acc);
if (pollval & EPB_ACC_GNT)
owned = -1;
} else if (claim > 0) {
/* Need to claim */
u64 pollval;
u64 newval = EPB_ACC_REQ | oct_sel;
- ipath_write_kreg(dd, acc, newval);
+ qib_write_kreg(dd, acc, newval);
/* First read after write is not trustworthy */
- pollval = ipath_read_kreg32(dd, acc);
+ pollval = qib_read_kreg32(dd, acc);
udelay(5);
- pollval = ipath_read_kreg32(dd, acc);
+ pollval = qib_read_kreg32(dd, acc);
if (!(pollval & EPB_ACC_GNT))
owned = -1;
}
@@ -582,18 +615,17 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim)
/*
* Lemma to deal with race condition of write..read to epb regs
*/
-static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
+static int epb_trans(struct qib_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
{
int tries;
u64 transval;
-
- ipath_write_kreg(dd, reg, i_val);
+ qib_write_kreg(dd, reg, i_val);
/* Throw away first read, as RDY bit may be stale */
- transval = ipath_read_kreg64(dd, reg);
+ transval = qib_read_kreg64(dd, reg);
for (tries = EPB_TRANS_TRIES; tries; --tries) {
- transval = ipath_read_kreg32(dd, reg);
+ transval = qib_read_kreg32(dd, reg);
if (transval & EPB_TRANS_RDY)
break;
udelay(5);
@@ -606,9 +638,8 @@ static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
}
/**
- *
- * ipath_sd7220_reg_mod - modify SERDES register
- * @dd: the infinipath device
+ * qib_sd7220_reg_mod - modify SERDES register
+ * @dd: the qlogic_ib device
* @sdnum: which SERDES to access
* @loc: location - channel, element, register, as packed by EPB_LOC() macro.
* @wd: Write Data - value to set in register
@@ -619,8 +650,8 @@ static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
* returns current (presumed, if a write was done) contents of selected
* register, or <0 if errors.
*/
-static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
- u32 wd, u32 mask)
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+ u32 wd, u32 mask)
{
u16 trans;
u64 transval;
@@ -629,14 +660,16 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
unsigned long flags;
switch (sdnum) {
- case IB_7220_SERDES :
- trans = dd->ipath_kregs->kr_ib_epbtrans;
+ case IB_7220_SERDES:
+ trans = kr_ibsd_epb_transaction_reg;
break;
- case PCIE_SERDES0 :
- case PCIE_SERDES1 :
- trans = dd->ipath_kregs->kr_pcie_epbtrans;
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
+ trans = kr_pciesd_epb_transaction_reg;
break;
- default :
+
+ default:
return -1;
}
@@ -644,23 +677,23 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
* All access is locked in software (vs other host threads) and
* hardware (vs uC access).
*/
- spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
owned = epb_access(dd, sdnum, 1);
if (owned < 0) {
- spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
return -1;
}
ret = 0;
for (tries = EPB_TRANS_TRIES; tries; --tries) {
- transval = ipath_read_kreg32(dd, trans);
+ transval = qib_read_kreg32(dd, trans);
if (transval & EPB_TRANS_RDY)
break;
udelay(5);
}
if (tries > 0) {
- tries = 1; /* to make read-skip work */
+ tries = 1; /* to make read-skip work */
if (mask != 0xFF) {
/*
* Not a pure write, so need to read.
@@ -688,7 +721,7 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
else
ret = transval & EPB_DATA_MASK;
- spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
if (tries <= 0)
ret = -1;
return ret;
@@ -707,7 +740,7 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc,
#define EPB_RAMDATA EPB_LOC(6, 0, 5)
/* Transfer date to/from uC Program RAM of IB or PCIe SerDes */
-static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
+static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc,
u8 *buf, int cnt, int rd_notwr)
{
u16 trans;
@@ -723,29 +756,28 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
/* Pick appropriate transaction reg and "Chip select" for this serdes */
switch (sdnum) {
- case IB_7220_SERDES :
+ case IB_7220_SERDES:
csbit = 1ULL << EPB_IB_UC_CS_SHF;
- trans = dd->ipath_kregs->kr_ib_epbtrans;
+ trans = kr_ibsd_epb_transaction_reg;
break;
- case PCIE_SERDES0 :
- case PCIE_SERDES1 :
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
/* PCIe SERDES has uC "chip select" in different bit, too */
csbit = 1ULL << EPB_PCIE_UC_CS_SHF;
- trans = dd->ipath_kregs->kr_pcie_epbtrans;
+ trans = kr_pciesd_epb_transaction_reg;
break;
- default :
+
+ default:
return -1;
}
op = rd_notwr ? "Rd" : "Wr";
- spin_lock_irqsave(&dd->ipath_sdepb_lock, flags);
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
owned = epb_access(dd, sdnum, 1);
if (owned < 0) {
- spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
- ipath_dbg("Could not get %s access to %s EPB: %X, loc %X\n",
- op, (sdnum == IB_7220_SERDES) ? "IB" : "PCIe",
- owned, loc);
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
return -1;
}
@@ -758,16 +790,14 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
*/
addr = loc & 0x1FFF;
for (tries = EPB_TRANS_TRIES; tries; --tries) {
- transval = ipath_read_kreg32(dd, trans);
+ transval = qib_read_kreg32(dd, trans);
if (transval & EPB_TRANS_RDY)
break;
udelay(5);
}
sofar = 0;
- if (tries <= 0)
- ipath_dbg("No initial RDY on EPB access request\n");
- else {
+ if (tries > 0) {
/*
* Every "memory" access is doubly-indirect.
* We set two bytes of address, then read/write
@@ -778,8 +808,6 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
transval = csbit | EPB_UC_CTL |
(rd_notwr ? EPB_ROM_R : EPB_ROM_W);
tries = epb_trans(dd, trans, transval, &transval);
- if (tries <= 0)
- ipath_dbg("No EPB response to uC %s cmd\n", op);
while (tries > 0 && sofar < cnt) {
if (!sofar) {
/* Only set address at start of chunk */
@@ -787,18 +815,14 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
transval = csbit | EPB_MADDRH | addrbyte;
tries = epb_trans(dd, trans, transval,
&transval);
- if (tries <= 0) {
- ipath_dbg("No EPB response ADDRH\n");
+ if (tries <= 0)
break;
- }
addrbyte = (addr + sofar) & 0xFF;
transval = csbit | EPB_MADDRL | addrbyte;
tries = epb_trans(dd, trans, transval,
&transval);
- if (tries <= 0) {
- ipath_dbg("No EPB response ADDRL\n");
+ if (tries <= 0)
break;
- }
}
if (rd_notwr)
@@ -806,10 +830,8 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
else
transval = csbit | EPB_ROMDATA | buf[sofar];
tries = epb_trans(dd, trans, transval, &transval);
- if (tries <= 0) {
- ipath_dbg("No EPB response DATA\n");
+ if (tries <= 0)
break;
- }
if (rd_notwr)
buf[sofar] = transval & EPB_DATA_MASK;
++sofar;
@@ -817,8 +839,6 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
/* Finally, clear control-bit for Read or Write */
transval = csbit | EPB_UC_CTL;
tries = epb_trans(dd, trans, transval, &transval);
- if (tries <= 0)
- ipath_dbg("No EPB response to drop of uC %s cmd\n", op);
}
ret = sofar;
@@ -826,18 +846,16 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc,
if (epb_access(dd, sdnum, -1) < 0)
ret = -1;
- spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags);
- if (tries <= 0) {
- ipath_dbg("SERDES PRAM %s failed after %d bytes\n", op, sofar);
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ if (tries <= 0)
ret = -1;
- }
return ret;
}
#define PROG_CHUNK 64
-int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum,
- u8 *img, int len, int offset)
+static int qib_sd7220_prog_ld(struct qib_devdata *dd, int sdnum,
+ const u8 *img, int len, int offset)
{
int cnt, sofar, req;
@@ -846,8 +864,8 @@ int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum,
req = len - sofar;
if (req > PROG_CHUNK)
req = PROG_CHUNK;
- cnt = ipath_sd7220_ram_xfer(dd, sdnum, offset + sofar,
- img + sofar, req, 0);
+ cnt = qib_sd7220_ram_xfer(dd, sdnum, offset + sofar,
+ (u8 *)img + sofar, req, 0);
if (cnt < req) {
sofar = -1;
break;
@@ -860,8 +878,8 @@ int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum,
#define VFY_CHUNK 64
#define SD_PRAM_ERROR_LIMIT 42
-int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum,
- const u8 *img, int len, int offset)
+static int qib_sd7220_prog_vfy(struct qib_devdata *dd, int sdnum,
+ const u8 *img, int len, int offset)
{
int cnt, sofar, req, idx, errors;
unsigned char readback[VFY_CHUNK];
@@ -872,7 +890,7 @@ int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum,
req = len - sofar;
if (req > VFY_CHUNK)
req = VFY_CHUNK;
- cnt = ipath_sd7220_ram_xfer(dd, sdnum, sofar + offset,
+ cnt = qib_sd7220_ram_xfer(dd, sdnum, sofar + offset,
readback, req, 1);
if (cnt < req) {
/* failed in read itself */
@@ -888,11 +906,25 @@ int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum,
return errors ? -errors : sofar;
}
-/* IRQ not set up at this point in init, so we poll. */
+static int
+qib_sd7220_ib_load(struct qib_devdata *dd, const struct firmware *fw)
+{
+ return qib_sd7220_prog_ld(dd, IB_7220_SERDES, fw->data, fw->size, 0);
+}
+
+static int
+qib_sd7220_ib_vfy(struct qib_devdata *dd, const struct firmware *fw)
+{
+ return qib_sd7220_prog_vfy(dd, IB_7220_SERDES, fw->data, fw->size, 0);
+}
+
+/*
+ * IRQ not set up at this point in init, so we poll.
+ */
#define IB_SERDES_TRIM_DONE (1ULL << 11)
#define TRIM_TMO (30)
-static int ipath_sd_trimdone_poll(struct ipath_devdata *dd)
+static int qib_sd_trimdone_poll(struct qib_devdata *dd)
{
int trim_tmo, ret;
uint64_t val;
@@ -903,16 +935,15 @@ static int ipath_sd_trimdone_poll(struct ipath_devdata *dd)
*/
ret = 0;
for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) {
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ val = qib_read_kreg64(dd, kr_ibcstatus);
if (val & IB_SERDES_TRIM_DONE) {
- ipath_cdbg(VERBOSE, "TRIMDONE after %d\n", trim_tmo);
ret = 1;
break;
}
msleep(10);
}
if (trim_tmo >= TRIM_TMO) {
- ipath_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo);
+ qib_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo);
ret = 0;
}
return ret;
@@ -964,8 +995,7 @@ static struct dds_init {
};
/*
- * Next, values related to Receive Equalization.
- * In comments, FDR (Full) is IB DDR, HDR (Half) is IB SDR
+ * Now the RXEQ section of the table.
*/
/* Hardware packs an element number and register address thus: */
#define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4))
@@ -981,23 +1011,23 @@ static struct dds_init {
#define RXEQ_SDR_ZCNT 23
static struct rxeq_init {
- u16 rdesc; /* in form used in SerDesDDSRXEQ */
+ u16 rdesc; /* in form used in SerDesDDSRXEQ */
u8 rdata[4];
} rxeq_init_vals[] = {
/* Set Rcv Eq. to Preset node */
RXEQ_VAL_ALL(7, 0x27, 0x10),
/* Set DFELTHFDR/HDR thresholds */
- RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR */
+ RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR, was 0, 1, 2, 3 */
RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */
/* Set TLTHFDR/HDR theshold */
- RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR */
- RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */
+ RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR, was 0, 2, 4, 6 */
+ RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR, was 0, 1, 2, 3 */
/* Set Preamp setting 2 (ZFR/ZCNT) */
- RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR */
- RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR */
+ RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR, was 12, 16, 20, 24 */
+ RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR, was 12, 16, 20, 24 */
/* Set Preamp DC gain and Setting 1 (GFR/GHR) */
- RXEQ_VAL(7, 0x1E, 0x10, 0x10, 0x10, 0x10), /* FDR */
- RXEQ_VAL(7, 0x1F, 0x10, 0x10, 0x10, 0x10), /* HDR */
+ RXEQ_VAL(7, 0x1E, 16, 16, 16, 16), /* FDR, was 16, 17, 18, 20 */
+ RXEQ_VAL(7, 0x1F, 16, 16, 16, 16), /* HDR, was 16, 17, 18, 20 */
/* Toggle RELOCK (in VCDL_CTRL0) to lock to data */
RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */
RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */
@@ -1007,27 +1037,27 @@ static struct rxeq_init {
#define DDS_ROWS (16)
#define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals)
-static int ipath_sd_setvals(struct ipath_devdata *dd)
+static int qib_sd_setvals(struct qib_devdata *dd)
{
int idx, midx;
- int min_idx; /* Minimum index for this portion of table */
+ int min_idx; /* Minimum index for this portion of table */
uint32_t dds_reg_map;
u64 __iomem *taddr, *iaddr;
uint64_t data;
uint64_t sdctl;
- taddr = dd->ipath_kregbase + KR_IBSerDesMappTable;
- iaddr = dd->ipath_kregbase + dd->ipath_kregs->kr_ib_ddsrxeq;
+ taddr = dd->kregbase + kr_serdes_maptable;
+ iaddr = dd->kregbase + kr_serdes_ddsrxeq0;
/*
* Init the DDS section of the table.
* Each "row" of the table provokes NUM_DDS_REG writes, to the
* registers indicated in DDS_REG_MAP.
*/
- sdctl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl);
+ sdctl = qib_read_kreg64(dd, kr_ibserdesctrl);
sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8);
sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13);
- ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, sdctl);
+ qib_write_kreg(dd, kr_ibserdesctrl, sdctl);
/*
* Iterate down table within loop for each register to store.
@@ -1037,21 +1067,21 @@ static int ipath_sd_setvals(struct ipath_devdata *dd)
data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT;
writeq(data, iaddr + idx);
mmiowb();
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
dds_reg_map >>= 4;
for (midx = 0; midx < DDS_ROWS; ++midx) {
u64 __iomem *daddr = taddr + ((midx << 4) + idx);
data = dds_init_vals[midx].reg_vals[idx];
writeq(data, daddr);
mmiowb();
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
} /* End inner for (vals for this reg, each row) */
} /* end outer for (regs to be stored) */
/*
- * Init the RXEQ section of the table. As explained above the table
- * rxeq_init_vals[], this runs in a different order, as the pattern
- * of register references is more complex, but there are only
+ * Init the RXEQ section of the table.
+ * This runs in a different order, as the pattern of
+ * register references is more complex, but there are only
* four "data" values per register.
*/
min_idx = idx; /* RXEQ indices pick up where DDS left off */
@@ -1066,13 +1096,13 @@ static int ipath_sd_setvals(struct ipath_devdata *dd)
/* Store the next RXEQ register address */
writeq(rxeq_init_vals[idx].rdesc, iaddr + didx);
mmiowb();
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
/* Iterate through RXEQ values */
for (vidx = 0; vidx < 4; vidx++) {
data = rxeq_init_vals[idx].rdata[vidx];
writeq(data, taddr + (vidx << 6) + idx);
mmiowb();
- ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+ qib_read_kreg32(dd, kr_scratch);
}
} /* end outer for (Reg-writes for RXEQ) */
return 0;
@@ -1085,33 +1115,18 @@ static int ipath_sd_setvals(struct ipath_devdata *dd)
#define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8)
#define START_EQ2(chan) EPB_LOC(chan, 7, 0x28)
-static int ibsd_sto_noisy(struct ipath_devdata *dd, int loc, int val, int mask)
-{
- int ret = -1;
- int sloc; /* shifted loc, for messages */
-
- loc |= (1U << EPB_IB_QUAD0_CS_SHF);
- sloc = loc >> EPB_ADDR_SHF;
-
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, mask);
- if (ret < 0)
- ipath_dev_err(dd, "Write failed: elt %d,"
- " addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n",
- (sloc & 0xF), (sloc >> 9) & 0x3f, (sloc >> 4) & 7,
- val & 0xFF, mask & 0xFF);
- return ret;
-}
-
/*
* Repeat a "store" across all channels of the IB SerDes.
* Although nominally it inherits the "read value" of the last
* channel it modified, the only really useful return is <0 for
* failure, >= 0 for success. The parameter 'loc' is assumed to
- * be the location for the channel-0 copy of the register to
- * be modified.
+ * be the location in some channel of the register to be modified
+ * The caller can specify use of the "gang write" option of EPB,
+ * in which case we use the specified channel data for any fields
+ * not explicitely written.
*/
-static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
- int mask)
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
+ int mask)
{
int ret = -1;
int chnl;
@@ -1126,23 +1141,26 @@ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
loc |= (1U << EPB_IB_QUAD0_CS_SHF);
chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7;
if (mask != 0xFF) {
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES,
- loc & ~EPB_GLOBAL_WR, 0, 0);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ loc & ~EPB_GLOBAL_WR, 0, 0);
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- ipath_dev_err(dd, "pre-read failed: elt %d,"
- " addr 0x%X, chnl %d\n", (sloc & 0xF),
+
+ qib_dev_err(dd,
+ "pre-read failed: elt %d, addr 0x%X, chnl %d\n",
+ (sloc & 0xF),
(sloc >> 9) & 0x3f, chnl);
return ret;
}
val = (ret & ~mask) | (val & mask);
}
loc &= ~(7 << (4+EPB_ADDR_SHF));
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- ipath_dev_err(dd, "Global WR failed: elt %d,"
- " addr 0x%X, val %02X\n",
+
+ qib_dev_err(dd,
+ "Global WR failed: elt %d, addr 0x%X, val %02X\n",
(sloc & 0xF), (sloc >> 9) & 0x3f, val);
}
return ret;
@@ -1151,14 +1169,14 @@ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
loc &= ~(7 << (4+EPB_ADDR_SHF));
loc |= (1U << EPB_IB_QUAD0_CS_SHF);
for (chnl = 0; chnl < 4; ++chnl) {
- int cloc;
- cloc = loc | (chnl << (4+EPB_ADDR_SHF));
- ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask);
+ int cloc = loc | (chnl << (4+EPB_ADDR_SHF));
+
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask);
if (ret < 0) {
int sloc = loc >> EPB_ADDR_SHF;
- ipath_dev_err(dd, "Write failed: elt %d,"
- " addr 0x%X, chnl %d, val 0x%02X,"
- " mask 0x%02X\n",
+
+ qib_dev_err(dd,
+ "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n",
(sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
val & 0xFF, mask & 0xFF);
break;
@@ -1171,7 +1189,7 @@ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val,
* Set the Tx values normally modified by IBC in IB1.2 mode to default
* values, as gotten from first row of init table.
*/
-static int set_dds_vals(struct ipath_devdata *dd, struct dds_init *ddi)
+static int set_dds_vals(struct qib_devdata *dd, struct dds_init *ddi)
{
int ret;
int idx, reg, data;
@@ -1194,7 +1212,7 @@ static int set_dds_vals(struct ipath_devdata *dd, struct dds_init *ddi)
* Set the Rx values normally modified by IBC in IB1.2 mode to default
* values, as gotten from selected column of init table.
*/
-static int set_rxeq_vals(struct ipath_devdata *dd, int vsel)
+static int set_rxeq_vals(struct qib_devdata *dd, int vsel)
{
int ret;
int ridx;
@@ -1202,6 +1220,7 @@ static int set_rxeq_vals(struct ipath_devdata *dd, int vsel)
for (ridx = 0; ridx < cnt; ++ridx) {
int elt, reg, val, loc;
+
elt = rxeq_init_vals[ridx].rdesc & 0xF;
reg = rxeq_init_vals[ridx].rdesc >> 4;
loc = EPB_LOC(0, elt, reg);
@@ -1217,83 +1236,66 @@ static int set_rxeq_vals(struct ipath_devdata *dd, int vsel)
/*
* Set the default values (row 0) for DDR Driver Demphasis.
* we do this initially and whenever we turn off IB-1.2
+ *
* The "default" values for Rx equalization are also stored to
* SerDes registers. Formerly (and still default), we used set 2.
* For experimenting with cables and link-partners, we allow changing
* that via a module parameter.
*/
-static unsigned ipath_rxeq_set = 2;
-module_param_named(rxeq_default_set, ipath_rxeq_set, uint,
- S_IWUSR | S_IRUGO);
+static unsigned qib_rxeq_set = 2;
+module_param_named(rxeq_default_set, qib_rxeq_set, uint,
+ S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(rxeq_default_set,
- "Which set [0..3] of Rx Equalization values is default");
+ "Which set [0..3] of Rx Equalization values is default");
-static int ipath_internal_presets(struct ipath_devdata *dd)
+static int qib_internal_presets(struct qib_devdata *dd)
{
int ret = 0;
ret = set_dds_vals(dd, dds_init_vals + DDS_3M);
if (ret < 0)
- ipath_dev_err(dd, "Failed to set default DDS values\n");
- ret = set_rxeq_vals(dd, ipath_rxeq_set & 3);
+ qib_dev_err(dd, "Failed to set default DDS values\n");
+ ret = set_rxeq_vals(dd, qib_rxeq_set & 3);
if (ret < 0)
- ipath_dev_err(dd, "Failed to set default RXEQ values\n");
+ qib_dev_err(dd, "Failed to set default RXEQ values\n");
return ret;
}
-int ipath_sd7220_presets(struct ipath_devdata *dd)
+int qib_sd7220_presets(struct qib_devdata *dd)
{
int ret = 0;
- if (!dd->ipath_presets_needed)
+ if (!dd->cspec->presets_needed)
return ret;
- dd->ipath_presets_needed = 0;
+ dd->cspec->presets_needed = 0;
/* Assert uC reset, so we don't clash with it. */
- ipath_ibsd_reset(dd, 1);
+ qib_ibsd_reset(dd, 1);
udelay(2);
- ipath_sd_trimdone_monitor(dd, "link-down");
+ qib_sd_trimdone_monitor(dd, "link-down");
- ret = ipath_internal_presets(dd);
-return ret;
+ ret = qib_internal_presets(dd);
+ return ret;
}
-static int ipath_sd_trimself(struct ipath_devdata *dd, int val)
+static int qib_sd_trimself(struct qib_devdata *dd, int val)
{
- return ibsd_sto_noisy(dd, CMUCTRL5, val, 0xFF);
+ int loc = CMUCTRL5 | (1U << EPB_IB_QUAD0_CS_SHF);
+
+ return qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
}
-static int ipath_sd_early(struct ipath_devdata *dd)
+static int qib_sd_early(struct qib_devdata *dd)
{
- int ret = -1; /* Default failed */
- int chnl;
+ int ret;
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, RXHSCTRL0(chnl), 0xD4, 0xFF);
- if (ret < 0)
- goto bail;
- }
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, VCDL_DAC2(chnl), 0x2D, 0xFF);
- if (ret < 0)
- goto bail;
- }
- /* more fine-tuning of what will be default */
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, VCDL_CTRL2(chnl), 3, 0xF);
- if (ret < 0)
- goto bail;
- }
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, START_EQ1(chnl), 0x10, 0xFF);
- if (ret < 0)
- goto bail;
- }
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, START_EQ2(chnl), 0x30, 0xFF);
- if (ret < 0)
- goto bail;
- }
+ ret = ibsd_mod_allchnls(dd, RXHSCTRL0(0) | EPB_GLOBAL_WR, 0xD4, 0xFF);
+ if (ret < 0)
+ goto bail;
+ ret = ibsd_mod_allchnls(dd, START_EQ1(0) | EPB_GLOBAL_WR, 0x10, 0xFF);
+ if (ret < 0)
+ goto bail;
+ ret = ibsd_mod_allchnls(dd, START_EQ2(0) | EPB_GLOBAL_WR, 0x30, 0xFF);
bail:
return ret;
}
@@ -1302,50 +1304,53 @@ bail:
#define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6)
#define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF)
-static int ipath_sd_dactrim(struct ipath_devdata *dd)
+static int qib_sd_dactrim(struct qib_devdata *dd)
{
- int ret = -1; /* Default failed */
- int chnl;
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, VCDL_DAC2(0) | EPB_GLOBAL_WR, 0x2D, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ /* more fine-tuning of what will be default */
+ ret = ibsd_mod_allchnls(dd, VCDL_CTRL2(0), 3, 0xF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, BACTRL(0) | EPB_GLOBAL_WR, 0x40, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, RXHSSTATUS(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, BACTRL(chnl), 0x40, 0xFF);
- if (ret < 0)
- goto bail;
- }
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x04, 0xFF);
- if (ret < 0)
- goto bail;
- }
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, RXHSSTATUS(chnl), 0x04, 0xFF);
- if (ret < 0)
- goto bail;
- }
/*
- * delay for max possible number of steps, with slop.
+ * Delay for max possible number of steps, with slop.
* Each step is about 4usec.
*/
udelay(415);
- for (chnl = 0; chnl < 4; ++chnl) {
- ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x00, 0xFF);
- if (ret < 0)
- goto bail;
- }
+
+ ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x00, 0xFF);
+
bail:
return ret;
}
#define RELOCK_FIRST_MS 3
#define RXLSPPM(chan) EPB_LOC(chan, 0, 2)
-void ipath_toggle_rclkrls(struct ipath_devdata *dd)
+void toggle_7220_rclkrls(struct qib_devdata *dd)
{
int loc = RXLSPPM(0) | EPB_GLOBAL_WR;
int ret;
ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
if (ret < 0)
- ipath_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
else {
udelay(1);
ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
@@ -1354,109 +1359,91 @@ void ipath_toggle_rclkrls(struct ipath_devdata *dd)
udelay(1);
ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
if (ret < 0)
- ipath_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
else {
udelay(1);
ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
}
/* Now reset xgxs and IBC to complete the recovery */
- dd->ipath_f_xgxs_reset(dd);
+ dd->f_xgxs_reset(dd->pport);
}
/*
* Shut down the timer that polls for relock occasions, if needed
- * this is "hooked" from ipath_7220_quiet_serdes(), which is called
- * just before ipath_shutdown_device() in ipath_driver.c shuts down all
+ * this is "hooked" from qib_7220_quiet_serdes(), which is called
+ * just before qib_shutdown_device() in qib_driver.c shuts down all
* the other timers
*/
-void ipath_shutdown_relock_poll(struct ipath_devdata *dd)
+void shutdown_7220_relock_poll(struct qib_devdata *dd)
{
- struct ipath_relock *irp = &dd->ipath_relock_singleton;
- if (atomic_read(&irp->ipath_relock_timer_active)) {
- del_timer_sync(&irp->ipath_relock_timer);
- atomic_set(&irp->ipath_relock_timer_active, 0);
- }
+ if (dd->cspec->relock_timer_active)
+ del_timer_sync(&dd->cspec->relock_timer);
}
-static unsigned ipath_relock_by_timer = 1;
-module_param_named(relock_by_timer, ipath_relock_by_timer, uint,
- S_IWUSR | S_IRUGO);
+static unsigned qib_relock_by_timer = 1;
+module_param_named(relock_by_timer, qib_relock_by_timer, uint,
+ S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up");
-static void ipath_run_relock(unsigned long opaque)
+static void qib_run_relock(unsigned long opaque)
{
- struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
- struct ipath_relock *irp = &dd->ipath_relock_singleton;
- u64 val, ltstate;
-
- if (!(dd->ipath_flags & IPATH_INITTED)) {
- /* Not yet up, just reenable the timer for later */
- irp->ipath_relock_interval = HZ;
- mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
- return;
- }
+ struct qib_devdata *dd = (struct qib_devdata *)opaque;
+ struct qib_pportdata *ppd = dd->pport;
+ struct qib_chip_specific *cs = dd->cspec;
+ int timeoff;
/*
- * Check link-training state for "stuck" state.
+ * Check link-training state for "stuck" state, when down.
* if found, try relock and schedule another try at
* exponentially growing delay, maxed at one second.
* if not stuck, our work is done.
*/
- val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
- ltstate = ipath_ib_linktrstate(dd, val);
-
- if (ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT
- && ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
- int timeoff;
- /* Not up yet. Try again, if allowed by module-param */
- if (ipath_relock_by_timer) {
- if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
- ipath_cdbg(VERBOSE, "Skip RELOCK in AUTONEG\n");
- else if (!(dd->ipath_flags & IPATH_IB_LINK_DISABLED)) {
- ipath_cdbg(VERBOSE, "RELOCK\n");
- ipath_toggle_rclkrls(dd);
- }
+ if ((dd->flags & QIB_INITTED) && !(ppd->lflags &
+ (QIBL_IB_AUTONEG_INPROG | QIBL_LINKINIT | QIBL_LINKARMED |
+ QIBL_LINKACTIVE))) {
+ if (qib_relock_by_timer) {
+ if (!(ppd->lflags & QIBL_IB_LINK_DISABLED))
+ toggle_7220_rclkrls(dd);
}
/* re-set timer for next check */
- timeoff = irp->ipath_relock_interval << 1;
+ timeoff = cs->relock_interval << 1;
if (timeoff > HZ)
timeoff = HZ;
- irp->ipath_relock_interval = timeoff;
-
- mod_timer(&irp->ipath_relock_timer, jiffies + timeoff);
- } else {
- /* Up, so no more need to check so often */
- mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
- }
+ cs->relock_interval = timeoff;
+ } else
+ timeoff = HZ;
+ mod_timer(&cs->relock_timer, jiffies + timeoff);
}
-void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup)
+void set_7220_relock_poll(struct qib_devdata *dd, int ibup)
{
- struct ipath_relock *irp = &dd->ipath_relock_singleton;
+ struct qib_chip_specific *cs = dd->cspec;
- if (ibup > 0) {
- /* we are now up, so relax timer to 1 second interval */
- if (atomic_read(&irp->ipath_relock_timer_active))
- mod_timer(&irp->ipath_relock_timer, jiffies + HZ);
+ if (ibup) {
+ /* We are now up, relax timer to 1 second interval */
+ if (cs->relock_timer_active) {
+ cs->relock_interval = HZ;
+ mod_timer(&cs->relock_timer, jiffies + HZ);
+ }
} else {
/* Transition to down, (re-)set timer to short interval. */
- int timeout;
- timeout = (HZ * ((ibup == -1) ? 1000 : RELOCK_FIRST_MS))/1000;
+ unsigned int timeout;
+
+ timeout = msecs_to_jiffies(RELOCK_FIRST_MS);
if (timeout == 0)
timeout = 1;
/* If timer has not yet been started, do so. */
- if (atomic_inc_return(&irp->ipath_relock_timer_active) == 1) {
- init_timer(&irp->ipath_relock_timer);
- irp->ipath_relock_timer.function = ipath_run_relock;
- irp->ipath_relock_timer.data = (unsigned long) dd;
- irp->ipath_relock_interval = timeout;
- irp->ipath_relock_timer.expires = jiffies + timeout;
- add_timer(&irp->ipath_relock_timer);
+ if (!cs->relock_timer_active) {
+ cs->relock_timer_active = 1;
+ init_timer(&cs->relock_timer);
+ cs->relock_timer.function = qib_run_relock;
+ cs->relock_timer.data = (unsigned long) dd;
+ cs->relock_interval = timeout;
+ cs->relock_timer.expires = jiffies + timeout;
+ add_timer(&cs->relock_timer);
} else {
- irp->ipath_relock_interval = timeout;
- mod_timer(&irp->ipath_relock_timer, jiffies + timeout);
- atomic_dec(&irp->ipath_relock_timer_active);
+ cs->relock_interval = timeout;
+ mod_timer(&cs->relock_timer, jiffies + timeout);
}
}
}
-
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
new file mode 100644
index 00000000000..c6d6a54d2e1
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -0,0 +1,1039 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/moduleparam.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+/* default pio off, sdma on */
+static ushort sdma_descq_cnt = 256;
+module_param_named(sdma_descq_cnt, sdma_descq_cnt, ushort, S_IRUGO);
+MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
+
+/*
+ * Bits defined in the send DMA descriptor.
+ */
+#define SDMA_DESC_LAST (1ULL << 11)
+#define SDMA_DESC_FIRST (1ULL << 12)
+#define SDMA_DESC_DMA_HEAD (1ULL << 13)
+#define SDMA_DESC_USE_LARGE_BUF (1ULL << 14)
+#define SDMA_DESC_INTR (1ULL << 15)
+#define SDMA_DESC_COUNT_LSB 16
+#define SDMA_DESC_GEN_LSB 30
+
+char *qib_sdma_state_names[] = {
+ [qib_sdma_state_s00_hw_down] = "s00_HwDown",
+ [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait",
+ [qib_sdma_state_s20_idle] = "s20_Idle",
+ [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
+ [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
+ [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
+ [qib_sdma_state_s99_running] = "s99_Running",
+};
+
+char *qib_sdma_event_names[] = {
+ [qib_sdma_event_e00_go_hw_down] = "e00_GoHwDown",
+ [qib_sdma_event_e10_go_hw_start] = "e10_GoHwStart",
+ [qib_sdma_event_e20_hw_started] = "e20_HwStarted",
+ [qib_sdma_event_e30_go_running] = "e30_GoRunning",
+ [qib_sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
+ [qib_sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
+ [qib_sdma_event_e60_hw_halted] = "e60_HwHalted",
+ [qib_sdma_event_e70_go_idle] = "e70_GoIdle",
+ [qib_sdma_event_e7220_err_halted] = "e7220_ErrHalted",
+ [qib_sdma_event_e7322_err_halted] = "e7322_ErrHalted",
+ [qib_sdma_event_e90_timer_tick] = "e90_TimerTick",
+};
+
+/* declare all statics here rather than keep sorting */
+static int alloc_sdma(struct qib_pportdata *);
+static void sdma_complete(struct kref *);
+static void sdma_finalput(struct qib_sdma_state *);
+static void sdma_get(struct qib_sdma_state *);
+static void sdma_put(struct qib_sdma_state *);
+static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states);
+static void sdma_start_sw_clean_up(struct qib_pportdata *);
+static void sdma_sw_clean_up_task(unsigned long);
+static void unmap_desc(struct qib_pportdata *, unsigned);
+
+static void sdma_get(struct qib_sdma_state *ss)
+{
+ kref_get(&ss->kref);
+}
+
+static void sdma_complete(struct kref *kref)
+{
+ struct qib_sdma_state *ss =
+ container_of(kref, struct qib_sdma_state, kref);
+
+ complete(&ss->comp);
+}
+
+static void sdma_put(struct qib_sdma_state *ss)
+{
+ kref_put(&ss->kref, sdma_complete);
+}
+
+static void sdma_finalput(struct qib_sdma_state *ss)
+{
+ sdma_put(ss);
+ wait_for_completion(&ss->comp);
+}
+
+/*
+ * Complete all the sdma requests on the active list, in the correct
+ * order, and with appropriate processing. Called when cleaning up
+ * after sdma shutdown, and when new sdma requests are submitted for
+ * a link that is down. This matches what is done for requests
+ * that complete normally, it's just the full list.
+ *
+ * Must be called with sdma_lock held
+ */
+static void clear_sdma_activelist(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_txreq *txp, *txp_next;
+
+ list_for_each_entry_safe(txp, txp_next, &ppd->sdma_activelist, list) {
+ list_del_init(&txp->list);
+ if (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) {
+ unsigned idx;
+
+ idx = txp->start_idx;
+ while (idx != txp->next_descq_idx) {
+ unmap_desc(ppd, idx);
+ if (++idx == ppd->sdma_descq_cnt)
+ idx = 0;
+ }
+ }
+ if (txp->callback)
+ (*txp->callback)(txp, QIB_SDMA_TXREQ_S_ABORTED);
+ }
+}
+
+static void sdma_sw_clean_up_task(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *) opaque;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ /*
+ * At this point, the following should always be true:
+ * - We are halted, so no more descriptors are getting retired.
+ * - We are not running, so no one is submitting new work.
+ * - Only we can send the e40_sw_cleaned, so we can't start
+ * running again until we say so. So, the active list and
+ * descq are ours to play with.
+ */
+
+ /* Process all retired requests. */
+ qib_sdma_make_progress(ppd);
+
+ clear_sdma_activelist(ppd);
+
+ /*
+ * Resync count of added and removed. It is VERY important that
+ * sdma_descq_removed NEVER decrement - user_sdma depends on it.
+ */
+ ppd->sdma_descq_removed = ppd->sdma_descq_added;
+
+ /*
+ * Reset our notion of head and tail.
+ * Note that the HW registers will be reset when switching states
+ * due to calling __qib_sdma_process_event() below.
+ */
+ ppd->sdma_descq_tail = 0;
+ ppd->sdma_descq_head = 0;
+ ppd->sdma_head_dma[0] = 0;
+ ppd->sdma_generation = 0;
+
+ __qib_sdma_process_event(ppd, qib_sdma_event_e40_sw_cleaned);
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+/*
+ * This is called when changing to state qib_sdma_state_s10_hw_start_up_wait
+ * as a result of send buffer errors or send DMA descriptor errors.
+ * We want to disarm the buffers in these cases.
+ */
+static void sdma_hw_start_up(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+ unsigned bufno;
+
+ for (bufno = ss->first_sendbuf; bufno < ss->last_sendbuf; ++bufno)
+ ppd->dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_BUF(bufno));
+
+ ppd->dd->f_sdma_hw_start_up(ppd);
+}
+
+static void sdma_sw_tear_down(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+
+ /* Releasing this reference means the state machine has stopped. */
+ sdma_put(ss);
+}
+
+static void sdma_start_sw_clean_up(struct qib_pportdata *ppd)
+{
+ tasklet_hi_schedule(&ppd->sdma_sw_clean_up_task);
+}
+
+static void sdma_set_state(struct qib_pportdata *ppd,
+ enum qib_sdma_states next_state)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+ struct sdma_set_state_action *action = ss->set_state_action;
+ unsigned op = 0;
+
+ /* debugging bookkeeping */
+ ss->previous_state = ss->current_state;
+ ss->previous_op = ss->current_op;
+
+ ss->current_state = next_state;
+
+ if (action[next_state].op_enable)
+ op |= QIB_SDMA_SENDCTRL_OP_ENABLE;
+
+ if (action[next_state].op_intenable)
+ op |= QIB_SDMA_SENDCTRL_OP_INTENABLE;
+
+ if (action[next_state].op_halt)
+ op |= QIB_SDMA_SENDCTRL_OP_HALT;
+
+ if (action[next_state].op_drain)
+ op |= QIB_SDMA_SENDCTRL_OP_DRAIN;
+
+ if (action[next_state].go_s99_running_tofalse)
+ ss->go_s99_running = 0;
+
+ if (action[next_state].go_s99_running_totrue)
+ ss->go_s99_running = 1;
+
+ ss->current_op = op;
+
+ ppd->dd->f_sdma_sendctrl(ppd, ss->current_op);
+}
+
+static void unmap_desc(struct qib_pportdata *ppd, unsigned head)
+{
+ __le64 *descqp = &ppd->sdma_descq[head].qw[0];
+ u64 desc[2];
+ dma_addr_t addr;
+ size_t len;
+
+ desc[0] = le64_to_cpu(descqp[0]);
+ desc[1] = le64_to_cpu(descqp[1]);
+
+ addr = (desc[1] << 32) | (desc[0] >> 32);
+ len = (desc[0] >> 14) & (0x7ffULL << 2);
+ dma_unmap_single(&ppd->dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
+}
+
+static int alloc_sdma(struct qib_pportdata *ppd)
+{
+ ppd->sdma_descq_cnt = sdma_descq_cnt;
+ if (!ppd->sdma_descq_cnt)
+ ppd->sdma_descq_cnt = 256;
+
+ /* Allocate memory for SendDMA descriptor FIFO */
+ ppd->sdma_descq = dma_alloc_coherent(&ppd->dd->pcidev->dev,
+ ppd->sdma_descq_cnt * sizeof(u64[2]), &ppd->sdma_descq_phys,
+ GFP_KERNEL);
+
+ if (!ppd->sdma_descq) {
+ qib_dev_err(ppd->dd,
+ "failed to allocate SendDMA descriptor FIFO memory\n");
+ goto bail;
+ }
+
+ /* Allocate memory for DMA of head register to memory */
+ ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev,
+ PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL);
+ if (!ppd->sdma_head_dma) {
+ qib_dev_err(ppd->dd,
+ "failed to allocate SendDMA head memory\n");
+ goto cleanup_descq;
+ }
+ ppd->sdma_head_dma[0] = 0;
+ return 0;
+
+cleanup_descq:
+ dma_free_coherent(&ppd->dd->pcidev->dev,
+ ppd->sdma_descq_cnt * sizeof(u64[2]), (void *)ppd->sdma_descq,
+ ppd->sdma_descq_phys);
+ ppd->sdma_descq = NULL;
+ ppd->sdma_descq_phys = 0;
+bail:
+ ppd->sdma_descq_cnt = 0;
+ return -ENOMEM;
+}
+
+static void free_sdma(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+
+ if (ppd->sdma_head_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *)ppd->sdma_head_dma,
+ ppd->sdma_head_phys);
+ ppd->sdma_head_dma = NULL;
+ ppd->sdma_head_phys = 0;
+ }
+
+ if (ppd->sdma_descq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ ppd->sdma_descq_cnt * sizeof(u64[2]),
+ ppd->sdma_descq, ppd->sdma_descq_phys);
+ ppd->sdma_descq = NULL;
+ ppd->sdma_descq_phys = 0;
+ }
+}
+
+static inline void make_sdma_desc(struct qib_pportdata *ppd,
+ u64 *sdmadesc, u64 addr, u64 dwlen,
+ u64 dwoffset)
+{
+
+ WARN_ON(addr & 3);
+ /* SDmaPhyAddr[47:32] */
+ sdmadesc[1] = addr >> 32;
+ /* SDmaPhyAddr[31:0] */
+ sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
+ /* SDmaGeneration[1:0] */
+ sdmadesc[0] |= (ppd->sdma_generation & 3ULL) <<
+ SDMA_DESC_GEN_LSB;
+ /* SDmaDwordCount[10:0] */
+ sdmadesc[0] |= (dwlen & 0x7ffULL) << SDMA_DESC_COUNT_LSB;
+ /* SDmaBufOffset[12:2] */
+ sdmadesc[0] |= dwoffset & 0x7ffULL;
+}
+
+/* sdma_lock must be held */
+int qib_sdma_make_progress(struct qib_pportdata *ppd)
+{
+ struct list_head *lp = NULL;
+ struct qib_sdma_txreq *txp = NULL;
+ struct qib_devdata *dd = ppd->dd;
+ int progress = 0;
+ u16 hwhead;
+ u16 idx = 0;
+
+ hwhead = dd->f_sdma_gethead(ppd);
+
+ /* The reason for some of the complexity of this code is that
+ * not all descriptors have corresponding txps. So, we have to
+ * be able to skip over descs until we wander into the range of
+ * the next txp on the list.
+ */
+
+ if (!list_empty(&ppd->sdma_activelist)) {
+ lp = ppd->sdma_activelist.next;
+ txp = list_entry(lp, struct qib_sdma_txreq, list);
+ idx = txp->start_idx;
+ }
+
+ while (ppd->sdma_descq_head != hwhead) {
+ /* if desc is part of this txp, unmap if needed */
+ if (txp && (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) &&
+ (idx == ppd->sdma_descq_head)) {
+ unmap_desc(ppd, ppd->sdma_descq_head);
+ if (++idx == ppd->sdma_descq_cnt)
+ idx = 0;
+ }
+
+ /* increment dequed desc count */
+ ppd->sdma_descq_removed++;
+
+ /* advance head, wrap if needed */
+ if (++ppd->sdma_descq_head == ppd->sdma_descq_cnt)
+ ppd->sdma_descq_head = 0;
+
+ /* if now past this txp's descs, do the callback */
+ if (txp && txp->next_descq_idx == ppd->sdma_descq_head) {
+ /* remove from active list */
+ list_del_init(&txp->list);
+ if (txp->callback)
+ (*txp->callback)(txp, QIB_SDMA_TXREQ_S_OK);
+ /* see if there is another txp */
+ if (list_empty(&ppd->sdma_activelist))
+ txp = NULL;
+ else {
+ lp = ppd->sdma_activelist.next;
+ txp = list_entry(lp, struct qib_sdma_txreq,
+ list);
+ idx = txp->start_idx;
+ }
+ }
+ progress = 1;
+ }
+ if (progress)
+ qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd));
+ return progress;
+}
+
+/*
+ * This is called from interrupt context.
+ */
+void qib_sdma_intr(struct qib_pportdata *ppd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ __qib_sdma_intr(ppd);
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+void __qib_sdma_intr(struct qib_pportdata *ppd)
+{
+ if (__qib_sdma_running(ppd)) {
+ qib_sdma_make_progress(ppd);
+ if (!list_empty(&ppd->sdma_userpending))
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ }
+}
+
+int qib_setup_sdma(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+ int ret = 0;
+
+ ret = alloc_sdma(ppd);
+ if (ret)
+ goto bail;
+
+ /* set consistent sdma state */
+ ppd->dd->f_sdma_init_early(ppd);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ /* set up reference counting */
+ kref_init(&ppd->sdma_state.kref);
+ init_completion(&ppd->sdma_state.comp);
+
+ ppd->sdma_generation = 0;
+ ppd->sdma_descq_head = 0;
+ ppd->sdma_descq_removed = 0;
+ ppd->sdma_descq_added = 0;
+
+ ppd->sdma_intrequest = 0;
+ INIT_LIST_HEAD(&ppd->sdma_userpending);
+
+ INIT_LIST_HEAD(&ppd->sdma_activelist);
+
+ tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
+ (unsigned long)ppd);
+
+ ret = dd->f_init_sdma_regs(ppd);
+ if (ret)
+ goto bail_alloc;
+
+ qib_sdma_process_event(ppd, qib_sdma_event_e10_go_hw_start);
+
+ return 0;
+
+bail_alloc:
+ qib_teardown_sdma(ppd);
+bail:
+ return ret;
+}
+
+void qib_teardown_sdma(struct qib_pportdata *ppd)
+{
+ qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down);
+
+ /*
+ * This waits for the state machine to exit so it is not
+ * necessary to kill the sdma_sw_clean_up_task to make sure
+ * it is not running.
+ */
+ sdma_finalput(&ppd->sdma_state);
+
+ free_sdma(ppd);
+}
+
+int qib_sdma_running(struct qib_pportdata *ppd)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ ret = __qib_sdma_running(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Complete a request when sdma not running; likely only request
+ * but to simplify the code, always queue it, then process the full
+ * activelist. We process the entire list to ensure that this particular
+ * request does get it's callback, but in the correct order.
+ * Must be called with sdma_lock held
+ */
+static void complete_sdma_err_req(struct qib_pportdata *ppd,
+ struct qib_verbs_txreq *tx)
+{
+ atomic_inc(&tx->qp->s_dma_busy);
+ /* no sdma descriptors, so no unmap_desc */
+ tx->txreq.start_idx = 0;
+ tx->txreq.next_descq_idx = 0;
+ list_add_tail(&tx->txreq.list, &ppd->sdma_activelist);
+ clear_sdma_activelist(ppd);
+}
+
+/*
+ * This function queues one IB packet onto the send DMA queue per call.
+ * The caller is responsible for checking:
+ * 1) The number of send DMA descriptor entries is less than the size of
+ * the descriptor queue.
+ * 2) The IB SGE addresses and lengths are 32-bit aligned
+ * (except possibly the last SGE's length)
+ * 3) The SGE addresses are suitable for passing to dma_map_single().
+ */
+int qib_sdma_verbs_send(struct qib_pportdata *ppd,
+ struct qib_sge_state *ss, u32 dwords,
+ struct qib_verbs_txreq *tx)
+{
+ unsigned long flags;
+ struct qib_sge *sge;
+ struct qib_qp *qp;
+ int ret = 0;
+ u16 tail;
+ __le64 *descqp;
+ u64 sdmadesc[2];
+ u32 dwoffset;
+ dma_addr_t addr;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+retry:
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ complete_sdma_err_req(ppd, tx);
+ goto unlock;
+ }
+
+ if (tx->txreq.sg_count > qib_sdma_descq_freecnt(ppd)) {
+ if (qib_sdma_make_progress(ppd))
+ goto retry;
+ if (ppd->dd->flags & QIB_HAS_SDMA_TIMEOUT)
+ ppd->dd->f_sdma_set_desc_cnt(ppd,
+ ppd->sdma_descq_cnt / 2);
+ goto busy;
+ }
+
+ dwoffset = tx->hdr_dwords;
+ make_sdma_desc(ppd, sdmadesc, (u64) tx->txreq.addr, dwoffset, 0);
+
+ sdmadesc[0] |= SDMA_DESC_FIRST;
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF;
+
+ /* write to the descq */
+ tail = ppd->sdma_descq_tail;
+ descqp = &ppd->sdma_descq[tail].qw[0];
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ /* increment the tail */
+ if (++tail == ppd->sdma_descq_cnt) {
+ tail = 0;
+ descqp = &ppd->sdma_descq[0].qw[0];
+ ++ppd->sdma_generation;
+ }
+
+ tx->txreq.start_idx = tail;
+
+ sge = &ss->sge;
+ while (dwords) {
+ u32 dw;
+ u32 len;
+
+ len = dwords << 2;
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ dw = (len + 3) >> 2;
+ addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr,
+ dw << 2, DMA_TO_DEVICE);
+ if (dma_mapping_error(&ppd->dd->pcidev->dev, addr))
+ goto unmap;
+ sdmadesc[0] = 0;
+ make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset);
+ /* SDmaUseLargeBuf has to be set in every descriptor */
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF;
+ /* write to the descq */
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ /* increment the tail */
+ if (++tail == ppd->sdma_descq_cnt) {
+ tail = 0;
+ descqp = &ppd->sdma_descq[0].qw[0];
+ ++ppd->sdma_generation;
+ }
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+
+ dwoffset += dw;
+ dwords -= dw;
+ }
+
+ if (!tail)
+ descqp = &ppd->sdma_descq[ppd->sdma_descq_cnt].qw[0];
+ descqp -= 2;
+ descqp[0] |= cpu_to_le64(SDMA_DESC_LAST);
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_HEADTOHOST)
+ descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
+ descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
+
+ atomic_inc(&tx->qp->s_dma_busy);
+ tx->txreq.next_descq_idx = tail;
+ ppd->dd->f_sdma_update_tail(ppd, tail);
+ ppd->sdma_descq_added += tx->txreq.sg_count;
+ list_add_tail(&tx->txreq.list, &ppd->sdma_activelist);
+ goto unlock;
+
+unmap:
+ for (;;) {
+ if (!tail)
+ tail = ppd->sdma_descq_cnt - 1;
+ else
+ tail--;
+ if (tail == ppd->sdma_descq_tail)
+ break;
+ unmap_desc(ppd, tail);
+ }
+ qp = tx->qp;
+ qib_put_txreq(tx);
+ spin_lock(&qp->r_lock);
+ spin_lock(&qp->s_lock);
+ if (qp->ibqp.qp_type == IB_QPT_RC) {
+ /* XXX what about error sending RDMA read responses? */
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)
+ qib_error_qp(qp, IB_WC_GENERAL_ERR);
+ } else if (qp->s_wqe)
+ qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->r_lock);
+ /* return zero to process the next send work request */
+ goto unlock;
+
+busy:
+ qp = tx->qp;
+ spin_lock(&qp->s_lock);
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ struct qib_ibdev *dev;
+
+ /*
+ * If we couldn't queue the DMA request, save the info
+ * and try again later rather than destroying the
+ * buffer and undoing the side effects of the copy.
+ */
+ tx->ss = ss;
+ tx->dwords = dwords;
+ qp->s_tx = tx;
+ dev = &ppd->dd->verbs_dev;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->iowait)) {
+ struct qib_ibport *ibp;
+
+ ibp = &ppd->ibport_data;
+ ibp->n_dmawait++;
+ qp->s_flags |= QIB_S_WAIT_DMA_DESC;
+ list_add_tail(&qp->iowait, &dev->dmawait);
+ }
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock(&qp->s_lock);
+ ret = -EBUSY;
+ } else {
+ spin_unlock(&qp->s_lock);
+ qib_put_txreq(tx);
+ }
+unlock:
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return ret;
+}
+
+/*
+ * sdma_lock should be acquired before calling this routine
+ */
+void dump_sdma_state(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_desc *descq;
+ struct qib_sdma_txreq *txp, *txpnext;
+ __le64 *descqp;
+ u64 desc[2];
+ u64 addr;
+ u16 gen, dwlen, dwoffset;
+ u16 head, tail, cnt;
+
+ head = ppd->sdma_descq_head;
+ tail = ppd->sdma_descq_tail;
+ cnt = qib_sdma_descq_freecnt(ppd);
+ descq = ppd->sdma_descq;
+
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA ppd->sdma_descq_head: %u\n", head);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA ppd->sdma_descq_tail: %u\n", tail);
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sdma_descq_freecnt: %u\n", cnt);
+
+ /* print info for each entry in the descriptor queue */
+ while (head != tail) {
+ char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 };
+
+ descqp = &descq[head].qw[0];
+ desc[0] = le64_to_cpu(descqp[0]);
+ desc[1] = le64_to_cpu(descqp[1]);
+ flags[0] = (desc[0] & 1<<15) ? 'I' : '-';
+ flags[1] = (desc[0] & 1<<14) ? 'L' : 'S';
+ flags[2] = (desc[0] & 1<<13) ? 'H' : '-';
+ flags[3] = (desc[0] & 1<<12) ? 'F' : '-';
+ flags[4] = (desc[0] & 1<<11) ? 'L' : '-';
+ addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL);
+ gen = (desc[0] >> 30) & 3ULL;
+ dwlen = (desc[0] >> 14) & (0x7ffULL << 2);
+ dwoffset = (desc[0] & 0x7ffULL) << 2;
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n",
+ head, flags, addr, gen, dwlen, dwoffset);
+ if (++head == ppd->sdma_descq_cnt)
+ head = 0;
+ }
+
+ /* print dma descriptor indices from the TX requests */
+ list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist,
+ list)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n",
+ txp->start_idx, txp->next_descq_idx);
+}
+
+void qib_sdma_process_event(struct qib_pportdata *ppd,
+ enum qib_sdma_events event)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ __qib_sdma_process_event(ppd, event);
+
+ if (ppd->sdma_state.current_state == qib_sdma_state_s99_running)
+ qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd));
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+void __qib_sdma_process_event(struct qib_pportdata *ppd,
+ enum qib_sdma_events event)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+
+ switch (ss->current_state) {
+ case qib_sdma_state_s00_hw_down:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ break;
+ case qib_sdma_event_e30_go_running:
+ /*
+ * If down, but running requested (usually result
+ * of link up, then we need to start up.
+ * This can happen when hw down is requested while
+ * bringing the link up with traffic active on
+ * 7220, e.g. */
+ ss->go_s99_running = 1;
+ /* fall through and start dma engine */
+ case qib_sdma_event_e10_go_hw_start:
+ /* This reference means the state machine is started */
+ sdma_get(&ppd->sdma_state);
+ sdma_set_state(ppd,
+ qib_sdma_state_s10_hw_start_up_wait);
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ sdma_sw_tear_down(ppd);
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s10_hw_start_up_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_sw_tear_down(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ sdma_set_state(ppd, ss->go_s99_running ?
+ qib_sdma_state_s99_running :
+ qib_sdma_state_s20_idle);
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s20_idle:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_sw_tear_down(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ sdma_set_state(ppd, qib_sdma_state_s99_running);
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s30_sw_clean_up_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ sdma_set_state(ppd,
+ qib_sdma_state_s10_hw_start_up_wait);
+ sdma_hw_start_up(ppd);
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s40_hw_clean_up_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ sdma_set_state(ppd,
+ qib_sdma_state_s30_sw_clean_up_wait);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s50_hw_halt_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ sdma_set_state(ppd,
+ qib_sdma_state_s40_hw_clean_up_wait);
+ ppd->dd->f_sdma_hw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s99_running:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ sdma_set_state(ppd,
+ qib_sdma_state_s30_sw_clean_up_wait);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e70_go_idle:
+ sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait);
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ sdma_set_state(ppd,
+ qib_sdma_state_s30_sw_clean_up_wait);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait);
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+ }
+
+ ss->last_event = event;
+}
diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
new file mode 100644
index 00000000000..d6235931a1b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_srq.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "qib_verbs.h"
+
+/**
+ * qib_post_srq_receive - post a receive on a shared receive queue
+ * @ibsrq: the SRQ to post the receive on
+ * @wr: the list of work requests to post
+ * @bad_wr: A pointer to the first WR to cause a problem is put here
+ *
+ * This may be called from interrupt context.
+ */
+int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+ struct qib_rwq *wq;
+ unsigned long flags;
+ int ret;
+
+ for (; wr; wr = wr->next) {
+ struct qib_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned) wr->num_sge > srq->rq.max_sge) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&srq->rq.lock, flags);
+ wq = srq->rq.wq;
+ next = wq->head + 1;
+ if (next >= srq->rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_rwqe_ptr(&srq->rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
+ wq->head = next;
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_create_srq - create a shared receive queue
+ * @ibpd: the protection domain of the SRQ to create
+ * @srq_init_attr: the attributes of the SRQ
+ * @udata: data from libibverbs when creating a user SRQ
+ */
+struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct qib_ibdev *dev = to_idev(ibpd->device);
+ struct qib_srq *srq;
+ u32 sz;
+ struct ib_srq *ret;
+
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+ ret = ERR_PTR(-ENOSYS);
+ goto done;
+ }
+
+ if (srq_init_attr->attr.max_sge == 0 ||
+ srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
+ srq_init_attr->attr.max_wr == 0 ||
+ srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto done;
+ }
+
+ /*
+ * Need to use vmalloc() if we want to support large #s of entries.
+ */
+ srq->rq.size = srq_init_attr->attr.max_wr + 1;
+ srq->rq.max_sge = srq_init_attr->attr.max_sge;
+ sz = sizeof(struct ib_sge) * srq->rq.max_sge +
+ sizeof(struct qib_rwqe);
+ srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
+ if (!srq->rq.wq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_srq;
+ }
+
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See qib_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ int err;
+ u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;
+
+ srq->ip =
+ qib_create_mmap_info(dev, s, ibpd->uobject->context,
+ srq->rq.wq);
+ if (!srq->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+
+ err = ib_copy_to_udata(udata, &srq->ip->offset,
+ sizeof(srq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else
+ srq->ip = NULL;
+
+ /*
+ * ib_create_srq() will initialize srq->ibsrq.
+ */
+ spin_lock_init(&srq->rq.lock);
+ srq->rq.wq->head = 0;
+ srq->rq.wq->tail = 0;
+ srq->limit = srq_init_attr->attr.srq_limit;
+
+ spin_lock(&dev->n_srqs_lock);
+ if (dev->n_srqs_allocated == ib_qib_max_srqs) {
+ spin_unlock(&dev->n_srqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_srqs_allocated++;
+ spin_unlock(&dev->n_srqs_lock);
+
+ if (srq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ ret = &srq->ibsrq;
+ goto done;
+
+bail_ip:
+ kfree(srq->ip);
+bail_wq:
+ vfree(srq->rq.wq);
+bail_srq:
+ kfree(srq);
+done:
+ return ret;
+}
+
+/**
+ * qib_modify_srq - modify a shared receive queue
+ * @ibsrq: the SRQ to modify
+ * @attr: the new attributes of the SRQ
+ * @attr_mask: indicates which attributes to modify
+ * @udata: user data for libibverbs.so
+ */
+int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+ struct qib_rwq *wq;
+ int ret = 0;
+
+ if (attr_mask & IB_SRQ_MAX_WR) {
+ struct qib_rwq *owq;
+ struct qib_rwqe *p;
+ u32 sz, size, n, head, tail;
+
+ /* Check that the requested sizes are below the limits. */
+ if ((attr->max_wr > ib_qib_max_srq_wrs) ||
+ ((attr_mask & IB_SRQ_LIMIT) ?
+ attr->srq_limit : srq->limit) > attr->max_wr) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ sz = sizeof(struct qib_rwqe) +
+ srq->rq.max_sge * sizeof(struct ib_sge);
+ size = attr->max_wr + 1;
+ wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
+ if (!wq) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ /* Check that we can write the offset to mmap. */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ __u64 offset_addr;
+ __u64 offset = 0;
+
+ ret = ib_copy_from_udata(&offset_addr, udata,
+ sizeof(offset_addr));
+ if (ret)
+ goto bail_free;
+ udata->outbuf =
+ (void __user *) (unsigned long) offset_addr;
+ ret = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (ret)
+ goto bail_free;
+ }
+
+ spin_lock_irq(&srq->rq.lock);
+ /*
+ * validate head and tail pointer values and compute
+ * the number of remaining WQEs.
+ */
+ owq = srq->rq.wq;
+ head = owq->head;
+ tail = owq->tail;
+ if (head >= srq->rq.size || tail >= srq->rq.size) {
+ ret = -EINVAL;
+ goto bail_unlock;
+ }
+ n = head;
+ if (n < tail)
+ n += srq->rq.size - tail;
+ else
+ n -= tail;
+ if (size <= n) {
+ ret = -EINVAL;
+ goto bail_unlock;
+ }
+ n = 0;
+ p = wq->wq;
+ while (tail != head) {
+ struct qib_rwqe *wqe;
+ int i;
+
+ wqe = get_rwqe_ptr(&srq->rq, tail);
+ p->wr_id = wqe->wr_id;
+ p->num_sge = wqe->num_sge;
+ for (i = 0; i < wqe->num_sge; i++)
+ p->sg_list[i] = wqe->sg_list[i];
+ n++;
+ p = (struct qib_rwqe *)((char *) p + sz);
+ if (++tail >= srq->rq.size)
+ tail = 0;
+ }
+ srq->rq.wq = wq;
+ srq->rq.size = size;
+ wq->head = n;
+ wq->tail = 0;
+ if (attr_mask & IB_SRQ_LIMIT)
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+
+ vfree(owq);
+
+ if (srq->ip) {
+ struct qib_mmap_info *ip = srq->ip;
+ struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
+ u32 s = sizeof(struct qib_rwq) + size * sz;
+
+ qib_update_mmap_info(dev, ip, s, wq);
+
+ /*
+ * Return the offset to mmap.
+ * See qib_mmap() for details.
+ */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ ret = ib_copy_to_udata(udata, &ip->offset,
+ sizeof(ip->offset));
+ if (ret)
+ goto bail;
+ }
+
+ /*
+ * Put user mapping info onto the pending list
+ * unless it already is on the list.
+ */
+ spin_lock_irq(&dev->pending_lock);
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps,
+ &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+ } else if (attr_mask & IB_SRQ_LIMIT) {
+ spin_lock_irq(&srq->rq.lock);
+ if (attr->srq_limit >= srq->rq.size)
+ ret = -EINVAL;
+ else
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+ }
+ goto bail;
+
+bail_unlock:
+ spin_unlock_irq(&srq->rq.lock);
+bail_free:
+ vfree(wq);
+bail:
+ return ret;
+}
+
+int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+
+ attr->max_wr = srq->rq.size - 1;
+ attr->max_sge = srq->rq.max_sge;
+ attr->srq_limit = srq->limit;
+ return 0;
+}
+
+/**
+ * qib_destroy_srq - destroy a shared receive queue
+ * @ibsrq: the SRQ to destroy
+ */
+int qib_destroy_srq(struct ib_srq *ibsrq)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+ struct qib_ibdev *dev = to_idev(ibsrq->device);
+
+ spin_lock(&dev->n_srqs_lock);
+ dev->n_srqs_allocated--;
+ spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip)
+ kref_put(&srq->ip->ref, qib_release_mmap_info);
+ else
+ vfree(srq->rq.wq);
+ kfree(srq);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
new file mode 100644
index 00000000000..3c8e4e3caca
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -0,0 +1,842 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/ctype.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+/* start of per-port functions */
+/*
+ * Get/Set heartbeat enable. OR of 1=enabled, 2=auto
+ */
+static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret;
+
+ ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT);
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
+ size_t count)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret;
+ u16 val;
+
+ ret = kstrtou16(buf, 0, &val);
+ if (ret) {
+ qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ return ret;
+ }
+
+ /*
+ * Set the "intentional" heartbeat enable per either of
+ * "Enable" and "Auto", as these are normally set together.
+ * This bit is consulted when leaving loopback mode,
+ * because entering loopback mode overrides it and automatically
+ * disables heartbeat.
+ */
+ ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
+ return ret < 0 ? ret : count;
+}
+
+static ssize_t store_loopback(struct qib_pportdata *ppd, const char *buf,
+ size_t count)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret = count, r;
+
+ r = dd->f_set_ib_loopback(ppd, buf);
+ if (r < 0)
+ ret = r;
+
+ return ret;
+}
+
+static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf,
+ size_t count)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret;
+ u16 val;
+
+ ret = kstrtou16(buf, 0, &val);
+ if (ret) {
+ qib_dev_err(dd, "attempt to set invalid LED override\n");
+ return ret;
+ }
+
+ qib_set_led_override(ppd, val);
+ return count;
+}
+
+static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
+{
+ ssize_t ret;
+
+ if (!ppd->statusp)
+ ret = -EINVAL;
+ else
+ ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long) *(ppd->statusp));
+ return ret;
+}
+
+/*
+ * For userland compatibility, these offsets must remain fixed.
+ * They are strings for QIB_STATUS_*
+ */
+static const char * const qib_status_str[] = {
+ "Initted",
+ "",
+ "",
+ "",
+ "",
+ "Present",
+ "IB_link_up",
+ "IB_configured",
+ "",
+ "Fatal_Hardware_Error",
+ NULL,
+};
+
+static ssize_t show_status_str(struct qib_pportdata *ppd, char *buf)
+{
+ int i, any;
+ u64 s;
+ ssize_t ret;
+
+ if (!ppd->statusp) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ s = *(ppd->statusp);
+ *buf = '\0';
+ for (any = i = 0; s && qib_status_str[i]; i++) {
+ if (s & 1) {
+ /* if overflow */
+ if (any && strlcat(buf, " ", PAGE_SIZE) >= PAGE_SIZE)
+ break;
+ if (strlcat(buf, qib_status_str[i], PAGE_SIZE) >=
+ PAGE_SIZE)
+ break;
+ any = 1;
+ }
+ s >>= 1;
+ }
+ if (any)
+ strlcat(buf, "\n", PAGE_SIZE);
+
+ ret = strlen(buf);
+
+bail:
+ return ret;
+}
+
+/* end of per-port functions */
+
+/*
+ * Start of per-port file structures and support code
+ * Because we are fitting into other infrastructure, we have to supply the
+ * full set of kobject/sysfs_ops structures and routines.
+ */
+#define QIB_PORT_ATTR(name, mode, show, store) \
+ static struct qib_port_attr qib_port_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+struct qib_port_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct qib_pportdata *, char *);
+ ssize_t (*store)(struct qib_pportdata *, const char *, size_t);
+};
+
+QIB_PORT_ATTR(loopback, S_IWUSR, NULL, store_loopback);
+QIB_PORT_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+QIB_PORT_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
+ store_hrtbt_enb);
+QIB_PORT_ATTR(status, S_IRUGO, show_status, NULL);
+QIB_PORT_ATTR(status_str, S_IRUGO, show_status_str, NULL);
+
+static struct attribute *port_default_attributes[] = {
+ &qib_port_attr_loopback.attr,
+ &qib_port_attr_led_override.attr,
+ &qib_port_attr_hrtbt_enable.attr,
+ &qib_port_attr_status.attr,
+ &qib_port_attr_status_str.attr,
+ NULL
+};
+
+/*
+ * Start of per-port congestion control structures and support code
+ */
+
+/*
+ * Congestion control table size followed by table entries
+ */
+static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
+ return -EINVAL;
+
+ ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
+ + sizeof(__be16);
+
+ if (pos > ret)
+ return -EINVAL;
+
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->ccti_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static void qib_port_release(struct kobject *kobj)
+{
+ /* nothing to do since memory is freed by qib_free_devdata() */
+}
+
+static struct kobj_type qib_port_cc_ktype = {
+ .release = qib_port_release,
+};
+
+static struct bin_attribute cc_table_bin_attr = {
+ .attr = {.name = "cc_table_bin", .mode = 0444},
+ .read = read_cc_table_bin,
+ .size = PAGE_SIZE,
+};
+
+/*
+ * Congestion settings: port control, control map and an array of 16
+ * entries for the congestion entries - increase, timer, event log
+ * trigger threshold and the minimum injection rate delay.
+ */
+static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ int ret;
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return -EINVAL;
+
+ ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
+
+ if (pos > ret)
+ return -EINVAL;
+ if (count > ret - pos)
+ count = ret - pos;
+
+ if (!count)
+ return count;
+
+ spin_lock(&ppd->cc_shadow_lock);
+ memcpy(buf, ppd->congestion_entries_shadow, count);
+ spin_unlock(&ppd->cc_shadow_lock);
+
+ return count;
+}
+
+static struct bin_attribute cc_setting_bin_attr = {
+ .attr = {.name = "cc_settings_bin", .mode = 0444},
+ .read = read_cc_setting_bin,
+ .size = PAGE_SIZE,
+};
+
+
+static ssize_t qib_portattr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct qib_port_attr *pattr =
+ container_of(attr, struct qib_port_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_kobj);
+
+ return pattr->show(ppd, buf);
+}
+
+static ssize_t qib_portattr_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t len)
+{
+ struct qib_port_attr *pattr =
+ container_of(attr, struct qib_port_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_kobj);
+
+ return pattr->store(ppd, buf, len);
+}
+
+
+static const struct sysfs_ops qib_port_ops = {
+ .show = qib_portattr_show,
+ .store = qib_portattr_store,
+};
+
+static struct kobj_type qib_port_ktype = {
+ .release = qib_port_release,
+ .sysfs_ops = &qib_port_ops,
+ .default_attrs = port_default_attributes
+};
+
+/* Start sl2vl */
+
+#define QIB_SL2VL_ATTR(N) \
+ static struct qib_sl2vl_attr qib_sl2vl_attr_##N = { \
+ .attr = { .name = __stringify(N), .mode = 0444 }, \
+ .sl = N \
+ }
+
+struct qib_sl2vl_attr {
+ struct attribute attr;
+ int sl;
+};
+
+QIB_SL2VL_ATTR(0);
+QIB_SL2VL_ATTR(1);
+QIB_SL2VL_ATTR(2);
+QIB_SL2VL_ATTR(3);
+QIB_SL2VL_ATTR(4);
+QIB_SL2VL_ATTR(5);
+QIB_SL2VL_ATTR(6);
+QIB_SL2VL_ATTR(7);
+QIB_SL2VL_ATTR(8);
+QIB_SL2VL_ATTR(9);
+QIB_SL2VL_ATTR(10);
+QIB_SL2VL_ATTR(11);
+QIB_SL2VL_ATTR(12);
+QIB_SL2VL_ATTR(13);
+QIB_SL2VL_ATTR(14);
+QIB_SL2VL_ATTR(15);
+
+static struct attribute *sl2vl_default_attributes[] = {
+ &qib_sl2vl_attr_0.attr,
+ &qib_sl2vl_attr_1.attr,
+ &qib_sl2vl_attr_2.attr,
+ &qib_sl2vl_attr_3.attr,
+ &qib_sl2vl_attr_4.attr,
+ &qib_sl2vl_attr_5.attr,
+ &qib_sl2vl_attr_6.attr,
+ &qib_sl2vl_attr_7.attr,
+ &qib_sl2vl_attr_8.attr,
+ &qib_sl2vl_attr_9.attr,
+ &qib_sl2vl_attr_10.attr,
+ &qib_sl2vl_attr_11.attr,
+ &qib_sl2vl_attr_12.attr,
+ &qib_sl2vl_attr_13.attr,
+ &qib_sl2vl_attr_14.attr,
+ &qib_sl2vl_attr_15.attr,
+ NULL
+};
+
+static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct qib_sl2vl_attr *sattr =
+ container_of(attr, struct qib_sl2vl_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, sl2vl_kobj);
+ struct qib_ibport *qibp = &ppd->ibport_data;
+
+ return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]);
+}
+
+static const struct sysfs_ops qib_sl2vl_ops = {
+ .show = sl2vl_attr_show,
+};
+
+static struct kobj_type qib_sl2vl_ktype = {
+ .release = qib_port_release,
+ .sysfs_ops = &qib_sl2vl_ops,
+ .default_attrs = sl2vl_default_attributes
+};
+
+/* End sl2vl */
+
+/* Start diag_counters */
+
+#define QIB_DIAGC_ATTR(N) \
+ static struct qib_diagc_attr qib_diagc_attr_##N = { \
+ .attr = { .name = __stringify(N), .mode = 0664 }, \
+ .counter = offsetof(struct qib_ibport, n_##N) \
+ }
+
+struct qib_diagc_attr {
+ struct attribute attr;
+ size_t counter;
+};
+
+QIB_DIAGC_ATTR(rc_resends);
+QIB_DIAGC_ATTR(rc_acks);
+QIB_DIAGC_ATTR(rc_qacks);
+QIB_DIAGC_ATTR(rc_delayed_comp);
+QIB_DIAGC_ATTR(seq_naks);
+QIB_DIAGC_ATTR(rdma_seq);
+QIB_DIAGC_ATTR(rnr_naks);
+QIB_DIAGC_ATTR(other_naks);
+QIB_DIAGC_ATTR(rc_timeouts);
+QIB_DIAGC_ATTR(loop_pkts);
+QIB_DIAGC_ATTR(pkt_drops);
+QIB_DIAGC_ATTR(dmawait);
+QIB_DIAGC_ATTR(unaligned);
+QIB_DIAGC_ATTR(rc_dupreq);
+QIB_DIAGC_ATTR(rc_seqnak);
+
+static struct attribute *diagc_default_attributes[] = {
+ &qib_diagc_attr_rc_resends.attr,
+ &qib_diagc_attr_rc_acks.attr,
+ &qib_diagc_attr_rc_qacks.attr,
+ &qib_diagc_attr_rc_delayed_comp.attr,
+ &qib_diagc_attr_seq_naks.attr,
+ &qib_diagc_attr_rdma_seq.attr,
+ &qib_diagc_attr_rnr_naks.attr,
+ &qib_diagc_attr_other_naks.attr,
+ &qib_diagc_attr_rc_timeouts.attr,
+ &qib_diagc_attr_loop_pkts.attr,
+ &qib_diagc_attr_pkt_drops.attr,
+ &qib_diagc_attr_dmawait.attr,
+ &qib_diagc_attr_unaligned.attr,
+ &qib_diagc_attr_rc_dupreq.attr,
+ &qib_diagc_attr_rc_seqnak.attr,
+ NULL
+};
+
+static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct qib_diagc_attr *dattr =
+ container_of(attr, struct qib_diagc_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, diagc_kobj);
+ struct qib_ibport *qibp = &ppd->ibport_data;
+
+ return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter));
+}
+
+static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t size)
+{
+ struct qib_diagc_attr *dattr =
+ container_of(attr, struct qib_diagc_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, diagc_kobj);
+ struct qib_ibport *qibp = &ppd->ibport_data;
+ u32 val;
+ int ret;
+
+ ret = kstrtou32(buf, 0, &val);
+ if (ret)
+ return ret;
+ *(u32 *)((char *) qibp + dattr->counter) = val;
+ return size;
+}
+
+static const struct sysfs_ops qib_diagc_ops = {
+ .show = diagc_attr_show,
+ .store = diagc_attr_store,
+};
+
+static struct kobj_type qib_diagc_ktype = {
+ .release = qib_port_release,
+ .sysfs_ops = &qib_diagc_ops,
+ .default_attrs = diagc_default_attributes
+};
+
+/* End diag_counters */
+
+/* end of per-port file structures and support code */
+
+/*
+ * Start of per-unit (or driver, in some cases, but replicated
+ * per unit) functions (these get a device *)
+ */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+
+ return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int ret;
+
+ if (!dd->boardname)
+ ret = -EINVAL;
+ else
+ ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
+ return ret;
+}
+
+static ssize_t show_version(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
+}
+
+static ssize_t show_boardversion(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
+}
+
+
+static ssize_t show_localbus_info(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
+}
+
+
+static ssize_t show_nctxts(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* Return the number of user ports (contexts) available. */
+ /* The calculation below deals with a special case where
+ * cfgctxts is set to 1 on a single-port board. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
+ (dd->cfgctxts - dd->first_user_ctxt));
+}
+
+static ssize_t show_nfreectxts(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* Return the number of free user ports (contexts) available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
+}
+
+static ssize_t show_serial(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ buf[sizeof dd->serial] = '\0';
+ memcpy(buf, dd->serial, sizeof dd->serial);
+ strcat(buf, "\n");
+ return strlen(buf);
+}
+
+static ssize_t store_chip_reset(struct device *device,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int ret;
+
+ if (count < 5 || memcmp(buf, "reset", 5) || !dd->diag_client) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ ret = qib_reset_device(dd->unit);
+bail:
+ return ret < 0 ? ret : count;
+}
+
+static ssize_t show_logged_errs(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int idx, count;
+
+ /* force consistency with actual EEPROM */
+ if (qib_update_eeprom_log(dd) != 0)
+ return -ENXIO;
+
+ count = 0;
+ for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) {
+ count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+ dd->eep_st_errs[idx],
+ idx == (QIB_EEP_LOG_CNT - 1) ? '\n' : ' ');
+ }
+
+ return count;
+}
+
+/*
+ * Dump tempsense regs. in decimal, to ease shell-scripts.
+ */
+static ssize_t show_tempsense(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int ret;
+ int idx;
+ u8 regvals[8];
+
+ ret = -ENXIO;
+ for (idx = 0; idx < 8; ++idx) {
+ if (idx == 6)
+ continue;
+ ret = dd->f_tempsense_rd(dd, idx);
+ if (ret < 0)
+ break;
+ regvals[idx] = ret;
+ }
+ if (idx == 8)
+ ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
+ *(signed char *)(regvals),
+ *(signed char *)(regvals + 1),
+ regvals[2], regvals[3],
+ *(signed char *)(regvals + 5),
+ *(signed char *)(regvals + 7));
+ return ret;
+}
+
+/*
+ * end of per-unit (or driver, in some cases, but replicated
+ * per unit) functions
+ */
+
+/* start of per-unit file structures and support code */
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
+static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
+static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
+static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
+static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
+static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
+static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
+static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
+
+static struct device_attribute *qib_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_hca_type,
+ &dev_attr_board_id,
+ &dev_attr_version,
+ &dev_attr_nctxts,
+ &dev_attr_nfreectxts,
+ &dev_attr_serial,
+ &dev_attr_boardversion,
+ &dev_attr_logged_errors,
+ &dev_attr_tempsense,
+ &dev_attr_localbus_info,
+ &dev_attr_chip_reset,
+};
+
+int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
+ struct kobject *kobj)
+{
+ struct qib_pportdata *ppd;
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ int ret;
+
+ if (!port_num || port_num > dd->num_pports) {
+ qib_dev_err(dd,
+ "Skipping infiniband class with invalid port %u\n",
+ port_num);
+ ret = -ENODEV;
+ goto bail;
+ }
+ ppd = &dd->pport[port_num - 1];
+
+ ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj,
+ "linkcontrol");
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping linkcontrol sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail;
+ }
+ kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
+
+ ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj,
+ "sl2vl");
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping sl2vl sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_link;
+ }
+ kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
+
+ ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj,
+ "diag_counters");
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping diag_counters sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_sl;
+ }
+ kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
+
+ if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+ return 0;
+
+ ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
+ kobj, "CCMgtA");
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_diagc;
+ }
+
+ kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc;
+ }
+
+ ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ if (ret) {
+ qib_dev_err(dd,
+ "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+ ret, port_num);
+ goto bail_cc_entry_bin;
+ }
+
+ qib_devinfo(dd->pcidev,
+ "IB%u: Congestion Control Agent enabled for port %d\n",
+ dd->unit, port_num);
+
+ return 0;
+
+bail_cc_entry_bin:
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
+bail_cc:
+ kobject_put(&ppd->pport_cc_kobj);
+bail_diagc:
+ kobject_put(&ppd->diagc_kobj);
+bail_sl:
+ kobject_put(&ppd->sl2vl_kobj);
+bail_link:
+ kobject_put(&ppd->pport_kobj);
+bail:
+ return ret;
+}
+
+/*
+ * Register and create our files in /sys/class/infiniband.
+ */
+int qib_verbs_register_sysfs(struct qib_devdata *dd)
+{
+ struct ib_device *dev = &dd->verbs_dev.ibdev;
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
+ ret = device_create_file(&dev->dev, qib_attributes[i]);
+ if (ret)
+ goto bail;
+ }
+
+ return 0;
+bail:
+ for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i)
+ device_remove_file(&dev->dev, qib_attributes[i]);
+ return ret;
+}
+
+/*
+ * Unregister and remove our files in /sys/class/infiniband.
+ */
+void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
+{
+ struct qib_pportdata *ppd;
+ int i;
+
+ for (i = 0; i < dd->num_pports; i++) {
+ ppd = &dd->pport[i];
+ if (qib_cc_table_size &&
+ ppd->congestion_entries_shadow) {
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_setting_bin_attr);
+ sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+ &cc_table_bin_attr);
+ kobject_put(&ppd->pport_cc_kobj);
+ }
+ kobject_put(&ppd->sl2vl_kobj);
+ kobject_put(&ppd->pport_kobj);
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c
new file mode 100644
index 00000000000..647f7beb1b0
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_twsi.c
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+
+/*
+ * QLogic_IB "Two Wire Serial Interface" driver.
+ * Originally written for a not-quite-i2c serial eeprom, which is
+ * still used on some supported boards. Later boards have added a
+ * variety of other uses, most board-specific, so the bit-boffing
+ * part has been split off to this file, while the other parts
+ * have been moved to chip-specific files.
+ *
+ * We have also dropped all pretense of fully generic (e.g. pretend
+ * we don't know whether '1' is the higher voltage) interface, as
+ * the restrictions of the generic i2c interface (e.g. no access from
+ * driver itself) make it unsuitable for this use.
+ */
+
+#define READ_CMD 1
+#define WRITE_CMD 0
+
+/**
+ * i2c_wait_for_writes - wait for a write
+ * @dd: the qlogic_ib device
+ *
+ * We use this instead of udelay directly, so we can make sure
+ * that previous register writes have been flushed all the way
+ * to the chip. Since we are delaying anyway, the cost doesn't
+ * hurt, and makes the bit twiddling more regular
+ */
+static void i2c_wait_for_writes(struct qib_devdata *dd)
+{
+ /*
+ * implicit read of EXTStatus is as good as explicit
+ * read of scratch, if all we want to do is flush
+ * writes.
+ */
+ dd->f_gpio_mod(dd, 0, 0, 0);
+ rmb(); /* inlined, so prevent compiler reordering */
+}
+
+/*
+ * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that
+ * for "almost compliant" modules
+ */
+#define SCL_WAIT_USEC 1000
+
+/* BUF_WAIT is time bus must be free between STOP or ACK and to next START.
+ * Should be 20, but some chips need more.
+ */
+#define TWSI_BUF_WAIT_USEC 60
+
+static void scl_out(struct qib_devdata *dd, u8 bit)
+{
+ u32 mask;
+
+ udelay(1);
+
+ mask = 1UL << dd->gpio_scl_num;
+
+ /* SCL is meant to be bare-drain, so never set "OUT", just DIR */
+ dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask);
+
+ /*
+ * Allow for slow slaves by simple
+ * delay for falling edge, sampling on rise.
+ */
+ if (!bit)
+ udelay(2);
+ else {
+ int rise_usec;
+ for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
+ if (mask & dd->f_gpio_mod(dd, 0, 0, 0))
+ break;
+ udelay(2);
+ }
+ if (rise_usec <= 0)
+ qib_dev_err(dd, "SCL interface stuck low > %d uSec\n",
+ SCL_WAIT_USEC);
+ }
+ i2c_wait_for_writes(dd);
+}
+
+static void sda_out(struct qib_devdata *dd, u8 bit)
+{
+ u32 mask;
+
+ mask = 1UL << dd->gpio_sda_num;
+
+ /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
+ dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask);
+
+ i2c_wait_for_writes(dd);
+ udelay(2);
+}
+
+static u8 sda_in(struct qib_devdata *dd, int wait)
+{
+ int bnum;
+ u32 read_val, mask;
+
+ bnum = dd->gpio_sda_num;
+ mask = (1UL << bnum);
+ /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
+ dd->f_gpio_mod(dd, 0, 0, mask);
+ read_val = dd->f_gpio_mod(dd, 0, 0, 0);
+ if (wait)
+ i2c_wait_for_writes(dd);
+ return (read_val & mask) >> bnum;
+}
+
+/**
+ * i2c_ackrcv - see if ack following write is true
+ * @dd: the qlogic_ib device
+ */
+static int i2c_ackrcv(struct qib_devdata *dd)
+{
+ u8 ack_received;
+
+ /* AT ENTRY SCL = LOW */
+ /* change direction, ignore data */
+ ack_received = sda_in(dd, 1);
+ scl_out(dd, 1);
+ ack_received = sda_in(dd, 1) == 0;
+ scl_out(dd, 0);
+ return ack_received;
+}
+
+static void stop_cmd(struct qib_devdata *dd);
+
+/**
+ * rd_byte - read a byte, sending STOP on last, else ACK
+ * @dd: the qlogic_ib device
+ *
+ * Returns byte shifted out of device
+ */
+static int rd_byte(struct qib_devdata *dd, int last)
+{
+ int bit_cntr, data;
+
+ data = 0;
+
+ for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
+ data <<= 1;
+ scl_out(dd, 1);
+ data |= sda_in(dd, 0);
+ scl_out(dd, 0);
+ }
+ if (last) {
+ scl_out(dd, 1);
+ stop_cmd(dd);
+ } else {
+ sda_out(dd, 0);
+ scl_out(dd, 1);
+ scl_out(dd, 0);
+ sda_out(dd, 1);
+ }
+ return data;
+}
+
+/**
+ * wr_byte - write a byte, one bit at a time
+ * @dd: the qlogic_ib device
+ * @data: the byte to write
+ *
+ * Returns 0 if we got the following ack, otherwise 1
+ */
+static int wr_byte(struct qib_devdata *dd, u8 data)
+{
+ int bit_cntr;
+ u8 bit;
+
+ for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
+ bit = (data >> bit_cntr) & 1;
+ sda_out(dd, bit);
+ scl_out(dd, 1);
+ scl_out(dd, 0);
+ }
+ return (!i2c_ackrcv(dd)) ? 1 : 0;
+}
+
+/*
+ * issue TWSI start sequence:
+ * (both clock/data high, clock high, data low while clock is high)
+ */
+static void start_seq(struct qib_devdata *dd)
+{
+ sda_out(dd, 1);
+ scl_out(dd, 1);
+ sda_out(dd, 0);
+ udelay(1);
+ scl_out(dd, 0);
+}
+
+/**
+ * stop_seq - transmit the stop sequence
+ * @dd: the qlogic_ib device
+ *
+ * (both clock/data low, clock high, data high while clock is high)
+ */
+static void stop_seq(struct qib_devdata *dd)
+{
+ scl_out(dd, 0);
+ sda_out(dd, 0);
+ scl_out(dd, 1);
+ sda_out(dd, 1);
+}
+
+/**
+ * stop_cmd - transmit the stop condition
+ * @dd: the qlogic_ib device
+ *
+ * (both clock/data low, clock high, data high while clock is high)
+ */
+static void stop_cmd(struct qib_devdata *dd)
+{
+ stop_seq(dd);
+ udelay(TWSI_BUF_WAIT_USEC);
+}
+
+/**
+ * qib_twsi_reset - reset I2C communication
+ * @dd: the qlogic_ib device
+ */
+
+int qib_twsi_reset(struct qib_devdata *dd)
+{
+ int clock_cycles_left = 9;
+ int was_high = 0;
+ u32 pins, mask;
+
+ /* Both SCL and SDA should be high. If not, there
+ * is something wrong.
+ */
+ mask = (1UL << dd->gpio_scl_num) | (1UL << dd->gpio_sda_num);
+
+ /*
+ * Force pins to desired innocuous state.
+ * This is the default power-on state with out=0 and dir=0,
+ * So tri-stated and should be floating high (barring HW problems)
+ */
+ dd->f_gpio_mod(dd, 0, 0, mask);
+
+ /*
+ * Clock nine times to get all listeners into a sane state.
+ * If SDA does not go high at any point, we are wedged.
+ * One vendor recommends then issuing START followed by STOP.
+ * we cannot use our "normal" functions to do that, because
+ * if SCL drops between them, another vendor's part will
+ * wedge, dropping SDA and keeping it low forever, at the end of
+ * the next transaction (even if it was not the device addressed).
+ * So our START and STOP take place with SCL held high.
+ */
+ while (clock_cycles_left--) {
+ scl_out(dd, 0);
+ scl_out(dd, 1);
+ /* Note if SDA is high, but keep clocking to sync slave */
+ was_high |= sda_in(dd, 0);
+ }
+
+ if (was_high) {
+ /*
+ * We saw a high, which we hope means the slave is sync'd.
+ * Issue START, STOP, pause for T_BUF.
+ */
+
+ pins = dd->f_gpio_mod(dd, 0, 0, 0);
+ if ((pins & mask) != mask)
+ qib_dev_err(dd, "GPIO pins not at rest: %d\n",
+ pins & mask);
+ /* Drop SDA to issue START */
+ udelay(1); /* Guarantee .6 uSec setup */
+ sda_out(dd, 0);
+ udelay(1); /* Guarantee .6 uSec hold */
+ /* At this point, SCL is high, SDA low. Raise SDA for STOP */
+ sda_out(dd, 1);
+ udelay(TWSI_BUF_WAIT_USEC);
+ }
+
+ return !was_high;
+}
+
+#define QIB_TWSI_START 0x100
+#define QIB_TWSI_STOP 0x200
+
+/* Write byte to TWSI, optionally prefixed with START or suffixed with
+ * STOP.
+ * returns 0 if OK (ACK received), else != 0
+ */
+static int qib_twsi_wr(struct qib_devdata *dd, int data, int flags)
+{
+ int ret = 1;
+ if (flags & QIB_TWSI_START)
+ start_seq(dd);
+
+ ret = wr_byte(dd, data); /* Leaves SCL low (from i2c_ackrcv()) */
+
+ if (flags & QIB_TWSI_STOP)
+ stop_cmd(dd);
+ return ret;
+}
+
+/* Added functionality for IBA7220-based cards */
+#define QIB_TEMP_DEV 0x98
+
+/*
+ * qib_twsi_blk_rd
+ * Formerly called qib_eeprom_internal_read, and only used for eeprom,
+ * but now the general interface for data transfer from twsi devices.
+ * One vestige of its former role is that it recognizes a device
+ * QIB_TWSI_NO_DEV and does the correct operation for the legacy part,
+ * which responded to all TWSI device codes, interpreting them as
+ * address within device. On all other devices found on board handled by
+ * this driver, the device is followed by a one-byte "address" which selects
+ * the "register" or "offset" within the device from which data should
+ * be read.
+ */
+int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr,
+ void *buffer, int len)
+{
+ int ret;
+ u8 *bp = buffer;
+
+ ret = 1;
+
+ if (dev == QIB_TWSI_NO_DEV) {
+ /* legacy not-really-I2C */
+ addr = (addr << 1) | READ_CMD;
+ ret = qib_twsi_wr(dd, addr, QIB_TWSI_START);
+ } else {
+ /* Actual I2C */
+ ret = qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START);
+ if (ret) {
+ stop_cmd(dd);
+ ret = 1;
+ goto bail;
+ }
+ /*
+ * SFF spec claims we do _not_ stop after the addr
+ * but simply issue a start with the "read" dev-addr.
+ * Since we are implicitely waiting for ACK here,
+ * we need t_buf (nominally 20uSec) before that start,
+ * and cannot rely on the delay built in to the STOP
+ */
+ ret = qib_twsi_wr(dd, addr, 0);
+ udelay(TWSI_BUF_WAIT_USEC);
+
+ if (ret) {
+ qib_dev_err(dd,
+ "Failed to write interface read addr %02X\n",
+ addr);
+ ret = 1;
+ goto bail;
+ }
+ ret = qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START);
+ }
+ if (ret) {
+ stop_cmd(dd);
+ ret = 1;
+ goto bail;
+ }
+
+ /*
+ * block devices keeps clocking data out as long as we ack,
+ * automatically incrementing the address. Some have "pages"
+ * whose boundaries will not be crossed, but the handling
+ * of these is left to the caller, who is in a better
+ * position to know.
+ */
+ while (len-- > 0) {
+ /*
+ * Get and store data, sending ACK if length remaining,
+ * else STOP
+ */
+ *bp++ = rd_byte(dd, !len);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/*
+ * qib_twsi_blk_wr
+ * Formerly called qib_eeprom_internal_write, and only used for eeprom,
+ * but now the general interface for data transfer to twsi devices.
+ * One vestige of its former role is that it recognizes a device
+ * QIB_TWSI_NO_DEV and does the correct operation for the legacy part,
+ * which responded to all TWSI device codes, interpreting them as
+ * address within device. On all other devices found on board handled by
+ * this driver, the device is followed by a one-byte "address" which selects
+ * the "register" or "offset" within the device to which data should
+ * be written.
+ */
+int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
+ const void *buffer, int len)
+{
+ int sub_len;
+ const u8 *bp = buffer;
+ int max_wait_time, i;
+ int ret;
+ ret = 1;
+
+ while (len > 0) {
+ if (dev == QIB_TWSI_NO_DEV) {
+ if (qib_twsi_wr(dd, (addr << 1) | WRITE_CMD,
+ QIB_TWSI_START)) {
+ goto failed_write;
+ }
+ } else {
+ /* Real I2C */
+ if (qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START))
+ goto failed_write;
+ ret = qib_twsi_wr(dd, addr, 0);
+ if (ret) {
+ qib_dev_err(dd,
+ "Failed to write interface write addr %02X\n",
+ addr);
+ goto failed_write;
+ }
+ }
+
+ sub_len = min(len, 4);
+ addr += sub_len;
+ len -= sub_len;
+
+ for (i = 0; i < sub_len; i++)
+ if (qib_twsi_wr(dd, *bp++, 0))
+ goto failed_write;
+
+ stop_cmd(dd);
+
+ /*
+ * Wait for write complete by waiting for a successful
+ * read (the chip replies with a zero after the write
+ * cmd completes, and before it writes to the eeprom.
+ * The startcmd for the read will fail the ack until
+ * the writes have completed. We do this inline to avoid
+ * the debug prints that are in the real read routine
+ * if the startcmd fails.
+ * We also use the proper device address, so it doesn't matter
+ * whether we have real eeprom_dev. Legacy likes any address.
+ */
+ max_wait_time = 100;
+ while (qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START)) {
+ stop_cmd(dd);
+ if (!--max_wait_time)
+ goto failed_write;
+ }
+ /* now read (and ignore) the resulting byte */
+ rd_byte(dd, 1);
+ }
+
+ ret = 0;
+ goto bail;
+
+failed_write:
+ stop_cmd(dd);
+ ret = 1;
+
+bail:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
new file mode 100644
index 00000000000..31d3561400a
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/moduleparam.h>
+
+#include "qib.h"
+
+static unsigned qib_hol_timeout_ms = 3000;
+module_param_named(hol_timeout_ms, qib_hol_timeout_ms, uint, S_IRUGO);
+MODULE_PARM_DESC(hol_timeout_ms,
+ "duration of user app suspension after link failure");
+
+unsigned qib_sdma_fetch_arb = 1;
+module_param_named(fetch_arb, qib_sdma_fetch_arb, uint, S_IRUGO);
+MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration");
+
+/**
+ * qib_disarm_piobufs - cancel a range of PIO buffers
+ * @dd: the qlogic_ib device
+ * @first: the first PIO buffer to cancel
+ * @cnt: the number of PIO buffers to cancel
+ *
+ * Cancel a range of PIO buffers. Used at user process close,
+ * in case it died while writing to a PIO buffer.
+ */
+void qib_disarm_piobufs(struct qib_devdata *dd, unsigned first, unsigned cnt)
+{
+ unsigned long flags;
+ unsigned i;
+ unsigned last;
+
+ last = first + cnt;
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ for (i = first; i < last; i++) {
+ __clear_bit(i, dd->pio_need_disarm);
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i));
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/*
+ * This is called by a user process when it sees the DISARM_BUFS event
+ * bit is set.
+ */
+int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned i;
+ unsigned last;
+ unsigned n = 0;
+
+ last = rcd->pio_base + rcd->piocnt;
+ /*
+ * Don't need uctxt_lock here, since user has called in to us.
+ * Clear at start in case more interrupts set bits while we
+ * are disarming
+ */
+ if (rcd->user_event_mask) {
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt, if any
+ */
+ clear_bit(_QIB_EVENT_DISARM_BUFS_BIT, &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ clear_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[i]);
+ }
+ spin_lock_irq(&dd->pioavail_lock);
+ for (i = rcd->pio_base; i < last; i++) {
+ if (__test_and_clear_bit(i, dd->pio_need_disarm)) {
+ n++;
+ dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i));
+ }
+ }
+ spin_unlock_irq(&dd->pioavail_lock);
+ return 0;
+}
+
+static struct qib_pportdata *is_sdma_buf(struct qib_devdata *dd, unsigned i)
+{
+ struct qib_pportdata *ppd;
+ unsigned pidx;
+
+ for (pidx = 0; pidx < dd->num_pports; pidx++) {
+ ppd = dd->pport + pidx;
+ if (i >= ppd->sdma_state.first_sendbuf &&
+ i < ppd->sdma_state.last_sendbuf)
+ return ppd;
+ }
+ return NULL;
+}
+
+/*
+ * Return true if send buffer is being used by a user context.
+ * Sets _QIB_EVENT_DISARM_BUFS_BIT in user_event_mask as a side effect
+ */
+static int find_ctxt(struct qib_devdata *dd, unsigned bufn)
+{
+ struct qib_ctxtdata *rcd;
+ unsigned ctxt;
+ int ret = 0;
+
+ spin_lock(&dd->uctxt_lock);
+ for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) {
+ rcd = dd->rcd[ctxt];
+ if (!rcd || bufn < rcd->pio_base ||
+ bufn >= rcd->pio_base + rcd->piocnt)
+ continue;
+ if (rcd->user_event_mask) {
+ int i;
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt, if any
+ */
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[i]);
+ }
+ ret = 1;
+ break;
+ }
+ spin_unlock(&dd->uctxt_lock);
+
+ return ret;
+}
+
+/*
+ * Disarm a set of send buffers. If the buffer might be actively being
+ * written to, mark the buffer to be disarmed later when it is not being
+ * written to.
+ *
+ * This should only be called from the IRQ error handler.
+ */
+void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask,
+ unsigned cnt)
+{
+ struct qib_pportdata *ppd, *pppd[QIB_MAX_IB_PORTS];
+ unsigned i;
+ unsigned long flags;
+
+ for (i = 0; i < dd->num_pports; i++)
+ pppd[i] = NULL;
+
+ for (i = 0; i < cnt; i++) {
+ int which;
+ if (!test_bit(i, mask))
+ continue;
+ /*
+ * If the buffer is owned by the DMA hardware,
+ * reset the DMA engine.
+ */
+ ppd = is_sdma_buf(dd, i);
+ if (ppd) {
+ pppd[ppd->port] = ppd;
+ continue;
+ }
+ /*
+ * If the kernel is writing the buffer or the buffer is
+ * owned by a user process, we can't clear it yet.
+ */
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ if (test_bit(i, dd->pio_writing) ||
+ (!test_bit(i << 1, dd->pioavailkernel) &&
+ find_ctxt(dd, i))) {
+ __set_bit(i, dd->pio_need_disarm);
+ which = 0;
+ } else {
+ which = 1;
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i));
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+ }
+
+ /* do cancel_sends once per port that had sdma piobufs in error */
+ for (i = 0; i < dd->num_pports; i++)
+ if (pppd[i])
+ qib_cancel_sends(pppd[i]);
+}
+
+/**
+ * update_send_bufs - update shadow copy of the PIO availability map
+ * @dd: the qlogic_ib device
+ *
+ * called whenever our local copy indicates we have run out of send buffers
+ */
+static void update_send_bufs(struct qib_devdata *dd)
+{
+ unsigned long flags;
+ unsigned i;
+ const unsigned piobregs = dd->pioavregs;
+
+ /*
+ * If the generation (check) bits have changed, then we update the
+ * busy bit for the corresponding PIO buffer. This algorithm will
+ * modify positions to the value they already have in some cases
+ * (i.e., no change), but it's faster than changing only the bits
+ * that have changed.
+ *
+ * We would like to do this atomicly, to avoid spinlocks in the
+ * critical send path, but that's not really possible, given the
+ * type of changes, and that this routine could be called on
+ * multiple cpu's simultaneously, so we lock in this routine only,
+ * to avoid conflicting updates; all we change is the shadow, and
+ * it's a single 64 bit memory location, so by definition the update
+ * is atomic in terms of what other cpu's can see in testing the
+ * bits. The spin_lock overhead isn't too bad, since it only
+ * happens when all buffers are in use, so only cpu overhead, not
+ * latency or bandwidth is affected.
+ */
+ if (!dd->pioavailregs_dma)
+ return;
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ for (i = 0; i < piobregs; i++) {
+ u64 pchbusy, pchg, piov, pnew;
+
+ piov = le64_to_cpu(dd->pioavailregs_dma[i]);
+ pchg = dd->pioavailkernel[i] &
+ ~(dd->pioavailshadow[i] ^ piov);
+ pchbusy = pchg << QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT;
+ if (pchg && (pchbusy & dd->pioavailshadow[i])) {
+ pnew = dd->pioavailshadow[i] & ~pchbusy;
+ pnew |= piov & pchbusy;
+ dd->pioavailshadow[i] = pnew;
+ }
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/*
+ * Debugging code and stats updates if no pio buffers available.
+ */
+static noinline void no_send_bufs(struct qib_devdata *dd)
+{
+ dd->upd_pio_shadow = 1;
+
+ /* not atomic, but if we lose a stat count in a while, that's OK */
+ qib_stats.sps_nopiobufs++;
+}
+
+/*
+ * Common code for normal driver send buffer allocation, and reserved
+ * allocation.
+ *
+ * Do appropriate marking as busy, etc.
+ * Returns buffer pointer if one is found, otherwise NULL.
+ */
+u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
+ u32 first, u32 last)
+{
+ unsigned i, j, updated = 0;
+ unsigned nbufs;
+ unsigned long flags;
+ unsigned long *shadow = dd->pioavailshadow;
+ u32 __iomem *buf;
+
+ if (!(dd->flags & QIB_PRESENT))
+ return NULL;
+
+ nbufs = last - first + 1; /* number in range to check */
+ if (dd->upd_pio_shadow) {
+update_shadow:
+ /*
+ * Minor optimization. If we had no buffers on last call,
+ * start out by doing the update; continue and do scan even
+ * if no buffers were updated, to be paranoid.
+ */
+ update_send_bufs(dd);
+ updated++;
+ }
+ i = first;
+ /*
+ * While test_and_set_bit() is atomic, we do that and then the
+ * change_bit(), and the pair is not. See if this is the cause
+ * of the remaining armlaunch errors.
+ */
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ if (dd->last_pio >= first && dd->last_pio <= last)
+ i = dd->last_pio + 1;
+ if (!first)
+ /* adjust to min possible */
+ nbufs = last - dd->min_kernel_pio + 1;
+ for (j = 0; j < nbufs; j++, i++) {
+ if (i > last)
+ i = !first ? dd->min_kernel_pio : first;
+ if (__test_and_set_bit((2 * i) + 1, shadow))
+ continue;
+ /* flip generation bit */
+ __change_bit(2 * i, shadow);
+ /* remember that the buffer can be written to now */
+ __set_bit(i, dd->pio_writing);
+ if (!first && first != last) /* first == last on VL15, avoid */
+ dd->last_pio = i;
+ break;
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+
+ if (j == nbufs) {
+ if (!updated)
+ /*
+ * First time through; shadow exhausted, but may be
+ * buffers available, try an update and then rescan.
+ */
+ goto update_shadow;
+ no_send_bufs(dd);
+ buf = NULL;
+ } else {
+ if (i < dd->piobcnt2k)
+ buf = (u32 __iomem *)(dd->pio2kbase +
+ i * dd->palign);
+ else if (i < dd->piobcnt2k + dd->piobcnt4k || !dd->piovl15base)
+ buf = (u32 __iomem *)(dd->pio4kbase +
+ (i - dd->piobcnt2k) * dd->align4k);
+ else
+ buf = (u32 __iomem *)(dd->piovl15base +
+ (i - (dd->piobcnt2k + dd->piobcnt4k)) *
+ dd->align4k);
+ if (pbufnum)
+ *pbufnum = i;
+ dd->upd_pio_shadow = 0;
+ }
+
+ return buf;
+}
+
+/*
+ * Record that the caller is finished writing to the buffer so we don't
+ * disarm it while it is being written and disarm it now if needed.
+ */
+void qib_sendbuf_done(struct qib_devdata *dd, unsigned n)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ __clear_bit(n, dd->pio_writing);
+ if (__test_and_clear_bit(n, dd->pio_need_disarm))
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(n));
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/**
+ * qib_chg_pioavailkernel - change which send buffers are available for kernel
+ * @dd: the qlogic_ib device
+ * @start: the starting send buffer number
+ * @len: the number of send buffers
+ * @avail: true if the buffers are available for kernel use, false otherwise
+ */
+void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start,
+ unsigned len, u32 avail, struct qib_ctxtdata *rcd)
+{
+ unsigned long flags;
+ unsigned end;
+ unsigned ostart = start;
+
+ /* There are two bits per send buffer (busy and generation) */
+ start *= 2;
+ end = start + len * 2;
+
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ /* Set or clear the busy bit in the shadow. */
+ while (start < end) {
+ if (avail) {
+ unsigned long dma;
+ int i;
+
+ /*
+ * The BUSY bit will never be set, because we disarm
+ * the user buffers before we hand them back to the
+ * kernel. We do have to make sure the generation
+ * bit is set correctly in shadow, since it could
+ * have changed many times while allocated to user.
+ * We can't use the bitmap functions on the full
+ * dma array because it is always little-endian, so
+ * we have to flip to host-order first.
+ * BITS_PER_LONG is slightly wrong, since it's
+ * always 64 bits per register in chip...
+ * We only work on 64 bit kernels, so that's OK.
+ */
+ i = start / BITS_PER_LONG;
+ __clear_bit(QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT + start,
+ dd->pioavailshadow);
+ dma = (unsigned long)
+ le64_to_cpu(dd->pioavailregs_dma[i]);
+ if (test_bit((QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT +
+ start) % BITS_PER_LONG, &dma))
+ __set_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT +
+ start, dd->pioavailshadow);
+ else
+ __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->pioavailshadow);
+ __set_bit(start, dd->pioavailkernel);
+ if ((start >> 1) < dd->min_kernel_pio)
+ dd->min_kernel_pio = start >> 1;
+ } else {
+ __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT,
+ dd->pioavailshadow);
+ __clear_bit(start, dd->pioavailkernel);
+ if ((start >> 1) > dd->min_kernel_pio)
+ dd->min_kernel_pio = start >> 1;
+ }
+ start += 2;
+ }
+
+ if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1)
+ dd->last_pio = dd->min_kernel_pio - 1;
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+
+ dd->f_txchk_change(dd, ostart, len, avail, rcd);
+}
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent. Used whenever we need to be
+ * sure the send side is idle. Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo. The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if a normal send had happened.
+ */
+void qib_cancel_sends(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_ctxtdata *rcd;
+ unsigned long flags;
+ unsigned ctxt;
+ unsigned i;
+ unsigned last;
+
+ /*
+ * Tell PSM to disarm buffers again before trying to reuse them.
+ * We need to be sure the rcd doesn't change out from under us
+ * while we do so. We hold the two locks sequentially. We might
+ * needlessly set some need_disarm bits as a result, if the
+ * context is closed after we release the uctxt_lock, but that's
+ * fairly benign, and safer than nesting the locks.
+ */
+ for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) {
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ rcd = dd->rcd[ctxt];
+ if (rcd && rcd->ppd == ppd) {
+ last = rcd->pio_base + rcd->piocnt;
+ if (rcd->user_event_mask) {
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt,
+ * if any
+ */
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[i]);
+ }
+ i = rcd->pio_base;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ for (; i < last; i++)
+ __set_bit(i, dd->pio_need_disarm);
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+ } else
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ }
+
+ if (!(dd->flags & QIB_HAS_SEND_DMA))
+ dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL |
+ QIB_SENDCTRL_FLUSH);
+}
+
+/*
+ * Force an update of in-memory copy of the pioavail registers, when
+ * needed for any of a variety of reasons.
+ * If already off, this routine is a nop, on the assumption that the
+ * caller (or set of callers) will "do the right thing".
+ * This is a per-device operation, so just the first port.
+ */
+void qib_force_pio_avail_update(struct qib_devdata *dd)
+{
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+}
+
+void qib_hol_down(struct qib_pportdata *ppd)
+{
+ /*
+ * Cancel sends when the link goes DOWN so that we aren't doing it
+ * at INIT when we might be trying to send SMI packets.
+ */
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+ qib_cancel_sends(ppd);
+}
+
+/*
+ * Link is at INIT.
+ * We start the HoL timer so we can detect stuck packets blocking SMP replies.
+ * Timer may already be running, so use mod_timer, not add_timer.
+ */
+void qib_hol_init(struct qib_pportdata *ppd)
+{
+ if (ppd->hol_state != QIB_HOL_INIT) {
+ ppd->hol_state = QIB_HOL_INIT;
+ mod_timer(&ppd->hol_timer,
+ jiffies + msecs_to_jiffies(qib_hol_timeout_ms));
+ }
+}
+
+/*
+ * Link is up, continue any user processes, and ensure timer
+ * is a nop, if running. Let timer keep running, if set; it
+ * will nop when it sees the link is up.
+ */
+void qib_hol_up(struct qib_pportdata *ppd)
+{
+ ppd->hol_state = QIB_HOL_UP;
+}
+
+/*
+ * This is only called via the timer.
+ */
+void qib_hol_event(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+
+ /* If hardware error, etc, skip. */
+ if (!(ppd->dd->flags & QIB_INITTED))
+ return;
+
+ if (ppd->hol_state != QIB_HOL_UP) {
+ /*
+ * Try to flush sends in case a stuck packet is blocking
+ * SMP replies.
+ */
+ qib_hol_down(ppd);
+ mod_timer(&ppd->hol_timer,
+ jiffies + msecs_to_jiffies(qib_hol_timeout_ms));
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
new file mode 100644
index 00000000000..aa3a8035bb6
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "qib.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_UC_##x
+
+/**
+ * qib_make_uc_req - construct a request packet (SEND, RDMA write)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_uc_req(struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ struct qib_swqe *wqe;
+ unsigned long flags;
+ u32 hwords;
+ u32 bth0;
+ u32 len;
+ u32 pmtu = qp->pmtu;
+ int ret = 0;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
+ if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ ohdr = &qp->s_hdr->u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr->u.l.oth;
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ bth0 = 0;
+
+ /* Get the next send request. */
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ qp->s_wqe = NULL;
+ switch (qp->s_state) {
+ default:
+ if (!(ib_qib_state_ops[qp->state] &
+ QIB_PROCESS_NEXT_SEND_OK))
+ goto bail;
+ /* Check if send work queue is empty. */
+ if (qp->s_cur == qp->s_head)
+ goto bail;
+ /*
+ * Start a new request.
+ */
+ wqe->psn = qp->s_next_psn;
+ qp->s_psn = qp->s_next_psn;
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+ len = wqe->length;
+ qp->s_len = len;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (len > pmtu) {
+ qp->s_state = OP(SEND_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_ONLY);
+ else {
+ qp->s_state =
+ OP(SEND_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ hwords += sizeof(struct ib_reth) / 4;
+ if (len > pmtu) {
+ qp->s_state = OP(RDMA_WRITE_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_ONLY);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the RETH */
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ default:
+ goto bail;
+ }
+ break;
+
+ case OP(SEND_FIRST):
+ qp->s_state = OP(SEND_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_LAST);
+ else {
+ qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ qp->s_state = OP(RDMA_WRITE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_LAST);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+ }
+ qp->s_len -= len;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_sge = &qp->s_sge;
+ qp->s_cur_size = len;
+ qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
+ qp->s_next_psn++ & QIB_PSN_MASK);
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * qib_uc_rcv - handle an incoming UC packet
+ * @ibp: the port the packet came in on
+ * @hdr: the header of the packet
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the length of the packet
+ * @qp: the QP for this packet.
+ *
+ * This is called from qib_qp_rcv() to process an incoming UC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ u32 opcode;
+ u32 hdrsize;
+ u32 psn;
+ u32 pad;
+ struct ib_wc wc;
+ u32 pmtu = qp->pmtu;
+ struct ib_reth *reth;
+ int ret;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12; /* LRH + BTH */
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
+ }
+
+ opcode = be32_to_cpu(ohdr->bth[0]);
+ if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
+ return;
+
+ psn = be32_to_cpu(ohdr->bth[2]);
+ opcode >>= 24;
+
+ /* Compare the PSN verses the expected PSN. */
+ if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
+ /*
+ * Handle a sequence error.
+ * Silently drop any current message.
+ */
+ qp->r_psn = psn;
+inv:
+ if (qp->r_state == OP(SEND_FIRST) ||
+ qp->r_state == OP(SEND_MIDDLE)) {
+ set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+ qp->r_sge.num_sge = 0;
+ } else
+ qib_put_ss(&qp->r_sge);
+ qp->r_state = OP(SEND_LAST);
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+ goto send_first;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ goto rdma_first;
+
+ default:
+ goto drop;
+ }
+ }
+
+ /* Check for opcode sequence errors. */
+ switch (qp->r_state) {
+ case OP(SEND_FIRST):
+ case OP(SEND_MIDDLE):
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ break;
+ goto inv;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_MIDDLE):
+ if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ break;
+ goto inv;
+
+ default:
+ if (opcode == OP(SEND_FIRST) ||
+ opcode == OP(SEND_ONLY) ||
+ opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_FIRST) ||
+ opcode == OP(RDMA_WRITE_ONLY) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ break;
+ goto inv;
+ }
+
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
+ qp->r_flags |= QIB_R_COMM_EST;
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_COMM_EST;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ }
+
+ /* OK, process the packet. */
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+send_first:
+ if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+ qp->r_sge = qp->s_rdma_read_sge;
+ else {
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto drop;
+ /*
+ * qp->s_rdma_read_sge will be the owner
+ * of the mr references.
+ */
+ qp->s_rdma_read_sge = qp->r_sge;
+ }
+ qp->r_rcv_len = 0;
+ if (opcode == OP(SEND_ONLY))
+ goto no_immediate_data;
+ else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
+ goto send_last_imm;
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto rewind;
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len))
+ goto rewind;
+ qib_copy_sge(&qp->r_sge, data, pmtu, 0);
+ break;
+
+ case OP(SEND_LAST_WITH_IMMEDIATE):
+send_last_imm:
+ wc.ex.imm_data = ohdr->u.imm_data;
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ goto send_last;
+ case OP(SEND_LAST):
+no_immediate_data:
+ wc.ex.imm_data = 0;
+ wc.wc_flags = 0;
+send_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto rewind;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ wc.byte_len = tlen + qp->r_rcv_len;
+ if (unlikely(wc.byte_len > qp->r_len))
+ goto rewind;
+ wc.opcode = IB_WC_RECV;
+ qib_copy_sge(&qp->r_sge, data, tlen, 0);
+ qib_put_ss(&qp->s_rdma_read_sge);
+last_imm:
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ /* zero fields that are N/A */
+ wc.vendor_err = 0;
+ wc.pkey_index = 0;
+ wc.dlid_path_bits = 0;
+ wc.port_num = 0;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
+rdma_first:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE))) {
+ goto drop;
+ }
+ reth = &ohdr->u.rc.reth;
+ hdrsize += sizeof(*reth);
+ qp->r_len = be32_to_cpu(reth->length);
+ qp->r_rcv_len = 0;
+ qp->r_sge.sg_list = NULL;
+ if (qp->r_len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey */
+ ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+ vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(!ok))
+ goto drop;
+ qp->r_sge.num_sge = 1;
+ } else {
+ qp->r_sge.num_sge = 0;
+ qp->r_sge.sge.mr = NULL;
+ qp->r_sge.sge.vaddr = NULL;
+ qp->r_sge.sge.length = 0;
+ qp->r_sge.sge.sge_length = 0;
+ }
+ if (opcode == OP(RDMA_WRITE_ONLY))
+ goto rdma_last;
+ else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
+ wc.ex.imm_data = ohdr->u.rc.imm_data;
+ goto rdma_last_imm;
+ }
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto drop;
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len))
+ goto drop;
+ qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ break;
+
+ case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+ wc.ex.imm_data = ohdr->u.imm_data;
+rdma_last_imm:
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto drop;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
+ goto drop;
+ if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+ qib_put_ss(&qp->s_rdma_read_sge);
+ else {
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto drop;
+ }
+ wc.byte_len = qp->r_len;
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ qib_put_ss(&qp->r_sge);
+ goto last_imm;
+
+ case OP(RDMA_WRITE_LAST):
+rdma_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto drop;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
+ goto drop;
+ qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ qib_put_ss(&qp->r_sge);
+ break;
+
+ default:
+ /* Drop packet for unknown opcodes. */
+ goto drop;
+ }
+ qp->r_psn++;
+ qp->r_state = opcode;
+ return;
+
+rewind:
+ set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+ qp->r_sge.num_sge = 0;
+drop:
+ ibp->n_pkt_drops++;
+ return;
+
+op_err:
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ return;
+
+}
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
new file mode 100644
index 00000000000..aaf7039f8ed
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_smi.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+/**
+ * qib_ud_loopback - handle send on loopback QPs
+ * @sqp: the sending QP
+ * @swqe: the send work request
+ *
+ * This is called from qib_make_ud_req() to forward a WQE addressed
+ * to the same HCA.
+ * Note that the receive interrupt handler may be calling qib_ud_rcv()
+ * while this is being called.
+ */
+static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
+{
+ struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
+ struct qib_pportdata *ppd;
+ struct qib_qp *qp;
+ struct ib_ah_attr *ah_attr;
+ unsigned long flags;
+ struct qib_sge_state ssge;
+ struct qib_sge *sge;
+ struct ib_wc wc;
+ u32 length;
+ enum ib_qp_type sqptype, dqptype;
+
+ qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+ if (!qp) {
+ ibp->n_pkt_drops++;
+ return;
+ }
+
+ sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : sqp->ibqp.qp_type;
+ dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : qp->ibqp.qp_type;
+
+ if (dqptype != sqptype ||
+ !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+ ibp->n_pkt_drops++;
+ goto drop;
+ }
+
+ ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ppd = ppd_from_ibp(ibp);
+
+ if (qp->ibqp.qp_num > 1) {
+ u16 pkey1;
+ u16 pkey2;
+ u16 lid;
+
+ pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index);
+ pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
+ if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
+ lid = ppd->lid | (ah_attr->src_path_bits &
+ ((1 << ppd->lmc) - 1));
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1,
+ ah_attr->sl,
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ cpu_to_be16(lid),
+ cpu_to_be16(ah_attr->dlid));
+ goto drop;
+ }
+ }
+
+ /*
+ * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
+ * Qkeys with the high order bit set mean use the
+ * qkey from the QP context instead of the WR (see 10.2.5).
+ */
+ if (qp->ibqp.qp_num) {
+ u32 qkey;
+
+ qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
+ sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+ if (unlikely(qkey != qp->qkey)) {
+ u16 lid;
+
+ lid = ppd->lid | (ah_attr->src_path_bits &
+ ((1 << ppd->lmc) - 1));
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
+ ah_attr->sl,
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ cpu_to_be16(lid),
+ cpu_to_be16(ah_attr->dlid));
+ goto drop;
+ }
+ }
+
+ /*
+ * A GRH is expected to precede the data even if not
+ * present on the wire.
+ */
+ length = swqe->length;
+ memset(&wc, 0, sizeof wc);
+ wc.byte_len = length + sizeof(struct ib_grh);
+
+ if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = swqe->wr.ex.imm_data;
+ }
+
+ spin_lock_irqsave(&qp->r_lock, flags);
+
+ /*
+ * Get the next work request entry to find where to put the data.
+ */
+ if (qp->r_flags & QIB_R_REUSE_SGE)
+ qp->r_flags &= ~QIB_R_REUSE_SGE;
+ else {
+ int ret;
+
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0) {
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ goto bail_unlock;
+ }
+ if (!ret) {
+ if (qp->ibqp.qp_num == 0)
+ ibp->n_vl15_dropped++;
+ goto bail_unlock;
+ }
+ }
+ /* Silently drop packets which are too big. */
+ if (unlikely(wc.byte_len > qp->r_len)) {
+ qp->r_flags |= QIB_R_REUSE_SGE;
+ ibp->n_pkt_drops++;
+ goto bail_unlock;
+ }
+
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ qib_copy_sge(&qp->r_sge, &ah_attr->grh,
+ sizeof(struct ib_grh), 1);
+ wc.wc_flags |= IB_WC_GRH;
+ } else
+ qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ ssge.sg_list = swqe->sg_list + 1;
+ ssge.sge = *swqe->sg_list;
+ ssge.num_sge = swqe->wr.num_sge;
+ sge = &ssge.sge;
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ssge.num_sge)
+ *sge = *ssge.sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+ qib_put_ss(&qp->r_sge);
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ goto bail_unlock;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = sqp->ibqp.qp_num;
+ wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
+ swqe->wr.wr.ud.pkey_index : 0;
+ wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+ wc.sl = ah_attr->sl;
+ wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.port_num = qp->port_num;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ swqe->wr.send_flags & IB_SEND_SOLICITED);
+ ibp->n_loop_pkts++;
+bail_unlock:
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+drop:
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+/**
+ * qib_make_ud_req - construct a UD request packet
+ * @qp: the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_ud_req(struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ struct ib_ah_attr *ah_attr;
+ struct qib_pportdata *ppd;
+ struct qib_ibport *ibp;
+ struct qib_swqe *wqe;
+ unsigned long flags;
+ u32 nwords;
+ u32 extra_bytes;
+ u32 bth0;
+ u16 lrh0;
+ u16 lid;
+ int ret = 0;
+ int next_cur;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ if (qp->s_cur == qp->s_head)
+ goto bail;
+
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ next_cur = qp->s_cur + 1;
+ if (next_cur >= qp->s_size)
+ next_cur = 0;
+
+ /* Construct the header. */
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ppd = ppd_from_ibp(ibp);
+ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
+ if (ah_attr->dlid != QIB_PERMISSIVE_LID)
+ this_cpu_inc(ibp->pmastats->n_multicast_xmit);
+ else
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
+ } else {
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
+ lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ if (unlikely(lid == ppd->lid)) {
+ /*
+ * If DMAs are in progress, we can't generate
+ * a completion for the loopback packet since
+ * it would be out of order.
+ * XXX Instead of waiting, we could queue a
+ * zero length descriptor so we get a callback.
+ */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ qp->s_cur = next_cur;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qib_ud_loopback(qp, wqe);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+ goto done;
+ }
+ }
+
+ qp->s_cur = next_cur;
+ extra_bytes = -wqe->length & 3;
+ nwords = (wqe->length + extra_bytes) >> 2;
+
+ /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
+ qp->s_hdrwords = 7;
+ qp->s_cur_size = wqe->length;
+ qp->s_cur_sge = &qp->s_sge;
+ qp->s_srate = ah_attr->static_rate;
+ qp->s_wqe = wqe;
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ /* Header size in 32-bit words. */
+ qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+ &ah_attr->grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = QIB_LRH_GRH;
+ ohdr = &qp->s_hdr->u.l.oth;
+ /*
+ * Don't worry about sending to locally attached multicast
+ * QPs. It is unspecified by the spec. what happens.
+ */
+ } else {
+ /* Header size in 32-bit words. */
+ lrh0 = QIB_LRH_BTH;
+ ohdr = &qp->s_hdr->u.oth;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ qp->s_hdrwords++;
+ ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+ bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
+ } else
+ bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+ lrh0 |= ah_attr->sl << 4;
+ if (qp->ibqp.qp_type == IB_QPT_SMI)
+ lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
+ else
+ lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
+ qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
+ qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ lid = ppd->lid;
+ if (lid) {
+ lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ qp->s_hdr->lrh[3] = cpu_to_be16(lid);
+ } else
+ qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ bth0 |= extra_bytes << 20;
+ bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
+ qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
+ wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ /*
+ * Use the multicast QP if the destination LID is a multicast LID.
+ */
+ ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
+ ah_attr->dlid != QIB_PERMISSIVE_LID ?
+ cpu_to_be32(QIB_MULTICAST_QPN) :
+ cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+ /*
+ * Qkeys with the high order bit set mean use the
+ * qkey from the QP context instead of the WR (see 10.2.5).
+ */
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
+ qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
+
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
+{
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = ppd->dd;
+ unsigned ctxt = ppd->hw_pidx;
+ unsigned i;
+
+ pkey &= 0x7fff; /* remove limited/full membership bit */
+
+ for (i = 0; i < ARRAY_SIZE(dd->rcd[ctxt]->pkeys); ++i)
+ if ((dd->rcd[ctxt]->pkeys[i] & 0x7fff) == pkey)
+ return i;
+
+ /*
+ * Should not get here, this means hardware failed to validate pkeys.
+ * Punt and return index 0.
+ */
+ return 0;
+}
+
+/**
+ * qib_ud_rcv - receive an incoming UD packet
+ * @ibp: the port the packet came in on
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from qib_qp_rcv() to process an incoming UD packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ int opcode;
+ u32 hdrsize;
+ u32 pad;
+ struct ib_wc wc;
+ u32 qkey;
+ u32 src_qp;
+ u16 dlid;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
+ }
+ qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
+ src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
+
+ /*
+ * Get the number of bytes the message was padded by
+ * and drop incomplete packets.
+ */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto drop;
+
+ tlen -= hdrsize + pad + 4;
+
+ /*
+ * Check that the permissive LID is only used on QP0
+ * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
+ */
+ if (qp->ibqp.qp_num) {
+ if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
+ hdr->lrh[3] == IB_LID_PERMISSIVE))
+ goto drop;
+ if (qp->ibqp.qp_num > 1) {
+ u16 pkey1, pkey2;
+
+ pkey1 = be32_to_cpu(ohdr->bth[0]);
+ pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
+ if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+ pkey1,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) &
+ 0xF,
+ src_qp, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ return;
+ }
+ }
+ if (unlikely(qkey != qp->qkey)) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ src_qp, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ return;
+ }
+ /* Drop invalid MAD packets (see 13.5.3.1). */
+ if (unlikely(qp->ibqp.qp_num == 1 &&
+ (tlen != 256 ||
+ (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
+ goto drop;
+ } else {
+ struct ib_smp *smp;
+
+ /* Drop invalid MAD packets (see 13.5.3.1). */
+ if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)
+ goto drop;
+ smp = (struct ib_smp *) data;
+ if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
+ hdr->lrh[3] == IB_LID_PERMISSIVE) &&
+ smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ goto drop;
+ }
+
+ /*
+ * The opcode is in the low byte when its in network order
+ * (top byte when in host order).
+ */
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ if (qp->ibqp.qp_num > 1 &&
+ opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
+ wc.ex.imm_data = ohdr->u.ud.imm_data;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ tlen -= sizeof(u32);
+ } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
+ wc.ex.imm_data = 0;
+ wc.wc_flags = 0;
+ } else
+ goto drop;
+
+ /*
+ * A GRH is expected to precede the data even if not
+ * present on the wire.
+ */
+ wc.byte_len = tlen + sizeof(struct ib_grh);
+
+ /*
+ * Get the next work request entry to find where to put the data.
+ */
+ if (qp->r_flags & QIB_R_REUSE_SGE)
+ qp->r_flags &= ~QIB_R_REUSE_SGE;
+ else {
+ int ret;
+
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0) {
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ return;
+ }
+ if (!ret) {
+ if (qp->ibqp.qp_num == 0)
+ ibp->n_vl15_dropped++;
+ return;
+ }
+ }
+ /* Silently drop packets which are too big. */
+ if (unlikely(wc.byte_len > qp->r_len)) {
+ qp->r_flags |= QIB_R_REUSE_SGE;
+ goto drop;
+ }
+ if (has_grh) {
+ qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
+ sizeof(struct ib_grh), 1);
+ wc.wc_flags |= IB_WC_GRH;
+ } else
+ qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
+ qib_put_ss(&qp->r_sge);
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ return;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = IB_WC_RECV;
+ wc.vendor_err = 0;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = src_qp;
+ wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
+ qib_lookup_pkey(ibp, be32_to_cpu(ohdr->bth[0])) : 0;
+ wc.slid = be16_to_cpu(hdr->lrh[3]);
+ wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
+ dlid = be16_to_cpu(hdr->lrh[1]);
+ /*
+ * Save the LMC lower bits if the destination LID is a unicast LID.
+ */
+ wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
+ dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
+ wc.port_num = qp->port_num;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ return;
+
+drop:
+ ibp->n_pkt_drops++;
+}
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
new file mode 100644
index 00000000000..2bc1d2b9629
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/device.h>
+
+#include "qib.h"
+
+static void __qib_release_user_pages(struct page **p, size_t num_pages,
+ int dirty)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages; i++) {
+ if (dirty)
+ set_page_dirty_lock(p[i]);
+ put_page(p[i]);
+ }
+}
+
+/*
+ * Call with current->mm->mmap_sem held.
+ */
+static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
+{
+ unsigned long lock_limit;
+ size_t got;
+ int ret;
+
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ for (got = 0; got < num_pages; got += ret) {
+ ret = get_user_pages(current, current->mm,
+ start_page + got * PAGE_SIZE,
+ num_pages - got, 1, 1,
+ p + got, vma);
+ if (ret < 0)
+ goto bail_release;
+ }
+
+ current->mm->pinned_vm += num_pages;
+
+ ret = 0;
+ goto bail;
+
+bail_release:
+ __qib_release_user_pages(p, got, 0);
+bail:
+ return ret;
+}
+
+/**
+ * qib_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page,
+ unsigned long offset, size_t size, int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_page(hwdev, phys, size, direction);
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * qib_get_user_pages - lock user pages into memory
+ * @start_page: the start page
+ * @num_pages: the number of pages
+ * @p: the output page structures
+ *
+ * This function takes a given start page (page aligned user virtual
+ * address) and pins it and the following specified number of pages. For
+ * now, num_pages is always 1, but that will probably change at some point
+ * (because caller is doing expected sends on a single virtually contiguous
+ * buffer, so we can do all pages at once).
+ */
+int qib_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p)
+{
+ int ret;
+
+ down_write(&current->mm->mmap_sem);
+
+ ret = __qib_get_user_pages(start_page, num_pages, p, NULL);
+
+ up_write(&current->mm->mmap_sem);
+
+ return ret;
+}
+
+void qib_release_user_pages(struct page **p, size_t num_pages)
+{
+ if (current->mm) /* during close after signal, mm can be NULL */
+ down_write(&current->mm->mmap_sem);
+
+ __qib_release_user_pages(p, num_pages, 1);
+
+ if (current->mm) {
+ current->mm->pinned_vm -= num_pages;
+ up_write(&current->mm->mmap_sem);
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
new file mode 100644
index 00000000000..d2806cae234
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -0,0 +1,1465 @@
+/*
+ * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_user_sdma.h"
+
+/* minimum size of header */
+#define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
+/* expected size of headers (for dma_pool) */
+#define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
+/* attempt to drain the queue for 5secs */
+#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
+
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+ struct rb_node node;
+ int refcount;
+ pid_t pid;
+};
+
+struct qib_user_sdma_pkt {
+ struct list_head list; /* list element */
+
+ u8 tiddma; /* if this is NEW tid-sdma */
+ u8 largepkt; /* this is large pkt from kmalloc */
+ u16 frag_size; /* frag size used by PSM */
+ u16 index; /* last header index or push index */
+ u16 naddr; /* dimension of addr (1..3) ... */
+ u16 addrlimit; /* addr array size */
+ u16 tidsmidx; /* current tidsm index */
+ u16 tidsmcount; /* tidsm array item count */
+ u16 payload_size; /* payload size so far for header */
+ u32 bytes_togo; /* bytes for processing */
+ u32 counter; /* sdma pkts queued counter for this entry */
+ struct qib_tid_session_member *tidsm; /* tid session member array */
+ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
+ u64 added; /* global descq number of entries */
+
+ struct {
+ u16 offset; /* offset for kvaddr, addr */
+ u16 length; /* length in page */
+ u16 first_desc; /* first desc */
+ u16 last_desc; /* last desc */
+ u16 put_page; /* should we put_page? */
+ u16 dma_mapped; /* is page dma_mapped? */
+ u16 dma_length; /* for dma_unmap_page() */
+ u16 padding;
+ struct page *page; /* may be NULL (coherent mem) */
+ void *kvaddr; /* FIXME: only for pio hack */
+ dma_addr_t addr;
+ } addr[4]; /* max pages, any more and we coalesce */
+};
+
+struct qib_user_sdma_queue {
+ /*
+ * pkts sent to dma engine are queued on this
+ * list head. the type of the elements of this
+ * list are struct qib_user_sdma_pkt...
+ */
+ struct list_head sent;
+
+ /*
+ * Because above list will be accessed by both process and
+ * signal handler, we need a spinlock for it.
+ */
+ spinlock_t sent_lock ____cacheline_aligned_in_smp;
+
+ /* headers with expected length are allocated from here... */
+ char header_cache_name[64];
+ struct dma_pool *header_cache;
+
+ /* packets are allocated from the slab cache... */
+ char pkt_slab_name[64];
+ struct kmem_cache *pkt_slab;
+
+ /* as packets go on the queued queue, they are counted... */
+ u32 counter;
+ u32 sent_counter;
+ /* pending packets, not sending yet */
+ u32 num_pending;
+ /* sending packets, not complete yet */
+ u32 num_sending;
+ /* global descq number of entry of last sending packet */
+ u64 added;
+
+ /* dma page table */
+ struct rb_root dma_pages_root;
+
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+
+ /* protect everything above... */
+ struct mutex lock;
+};
+
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ sdma_rb_node = container_of(node,
+ struct qib_user_sdma_rb_node, node);
+ if (pid < sdma_rb_node->pid)
+ node = node->rb_left;
+ else if (pid > sdma_rb_node->pid)
+ node = node->rb_right;
+ else
+ return sdma_rb_node;
+ }
+ return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+ struct rb_node **node = &(root->rb_node);
+ struct rb_node *parent = NULL;
+ struct qib_user_sdma_rb_node *got;
+
+ while (*node) {
+ got = container_of(*node, struct qib_user_sdma_rb_node, node);
+ parent = *node;
+ if (new->pid < got->pid)
+ node = &((*node)->rb_left);
+ else if (new->pid > got->pid)
+ node = &((*node)->rb_right);
+ else
+ return 0;
+ }
+
+ rb_link_node(&new->node, parent, node);
+ rb_insert_color(&new->node, root);
+ return 1;
+}
+
+struct qib_user_sdma_queue *
+qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
+{
+ struct qib_user_sdma_queue *pq =
+ kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+
+ if (!pq)
+ goto done;
+
+ pq->counter = 0;
+ pq->sent_counter = 0;
+ pq->num_pending = 0;
+ pq->num_sending = 0;
+ pq->added = 0;
+ pq->sdma_rb_node = NULL;
+
+ INIT_LIST_HEAD(&pq->sent);
+ spin_lock_init(&pq->sent_lock);
+ mutex_init(&pq->lock);
+
+ snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
+ "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt);
+ pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
+ sizeof(struct qib_user_sdma_pkt),
+ 0, 0, NULL);
+
+ if (!pq->pkt_slab)
+ goto err_kfree;
+
+ snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
+ "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt);
+ pq->header_cache = dma_pool_create(pq->header_cache_name,
+ dev,
+ QIB_USER_SDMA_EXP_HEADER_LENGTH,
+ 4, 0);
+ if (!pq->header_cache)
+ goto err_slab;
+
+ pq->dma_pages_root = RB_ROOT;
+
+ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+ current->pid);
+ if (sdma_rb_node) {
+ sdma_rb_node->refcount++;
+ } else {
+ int ret;
+ sdma_rb_node = kmalloc(sizeof(
+ struct qib_user_sdma_rb_node), GFP_KERNEL);
+ if (!sdma_rb_node)
+ goto err_rb;
+
+ sdma_rb_node->refcount = 1;
+ sdma_rb_node->pid = current->pid;
+
+ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+ sdma_rb_node);
+ BUG_ON(ret == 0);
+ }
+ pq->sdma_rb_node = sdma_rb_node;
+
+ goto done;
+
+err_rb:
+ dma_pool_destroy(pq->header_cache);
+err_slab:
+ kmem_cache_destroy(pq->pkt_slab);
+err_kfree:
+ kfree(pq);
+ pq = NULL;
+
+done:
+ return pq;
+}
+
+static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+ int i, u16 offset, u16 len,
+ u16 first_desc, u16 last_desc,
+ u16 put_page, u16 dma_mapped,
+ struct page *page, void *kvaddr,
+ dma_addr_t dma_addr, u16 dma_length)
+{
+ pkt->addr[i].offset = offset;
+ pkt->addr[i].length = len;
+ pkt->addr[i].first_desc = first_desc;
+ pkt->addr[i].last_desc = last_desc;
+ pkt->addr[i].put_page = put_page;
+ pkt->addr[i].dma_mapped = dma_mapped;
+ pkt->addr[i].page = page;
+ pkt->addr[i].kvaddr = kvaddr;
+ pkt->addr[i].addr = dma_addr;
+ pkt->addr[i].dma_length = dma_length;
+}
+
+static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ size_t len, dma_addr_t *dma_addr)
+{
+ void *hdr;
+
+ if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
+ hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ dma_addr);
+ else
+ hdr = NULL;
+
+ if (!hdr) {
+ hdr = kmalloc(len, GFP_KERNEL);
+ if (!hdr)
+ return NULL;
+
+ *dma_addr = 0;
+ }
+
+ return hdr;
+}
+
+static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ struct page *page, u16 put,
+ u16 offset, u16 len, void *kvaddr)
+{
+ __le16 *pbc16;
+ void *pbcvaddr;
+ struct qib_message_header *hdr;
+ u16 newlen, pbclen, lastdesc, dma_mapped;
+ u32 vcto;
+ union qib_seqnum seqnum;
+ dma_addr_t pbcdaddr;
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ page, offset, len, DMA_TO_DEVICE);
+ int ret = 0;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ /*
+ * dma mapping error, pkt has not managed
+ * this page yet, return the page here so
+ * the caller can ignore this page.
+ */
+ if (put) {
+ put_page(page);
+ } else {
+ /* coalesce case */
+ kunmap(page);
+ __free_page(page);
+ }
+ ret = -ENOMEM;
+ goto done;
+ }
+ offset = 0;
+ dma_mapped = 1;
+
+
+next_fragment:
+
+ /*
+ * In tid-sdma, the transfer length is restricted by
+ * receiver side current tid page length.
+ */
+ if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length)
+ newlen = pkt->tidsm[pkt->tidsmidx].length;
+ else
+ newlen = len;
+
+ /*
+ * Then the transfer length is restricted by MTU.
+ * the last descriptor flag is determined by:
+ * 1. the current packet is at frag size length.
+ * 2. the current tid page is done if tid-sdma.
+ * 3. there is no more byte togo if sdma.
+ */
+ lastdesc = 0;
+ if ((pkt->payload_size + newlen) >= pkt->frag_size) {
+ newlen = pkt->frag_size - pkt->payload_size;
+ lastdesc = 1;
+ } else if (pkt->tiddma) {
+ if (newlen == pkt->tidsm[pkt->tidsmidx].length)
+ lastdesc = 1;
+ } else {
+ if (newlen == pkt->bytes_togo)
+ lastdesc = 1;
+ }
+
+ /* fill the next fragment in this page */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ offset, newlen, /* offset, len */
+ 0, lastdesc, /* first last desc */
+ put, dma_mapped, /* put page, dma mapped */
+ page, kvaddr, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->bytes_togo -= newlen;
+ pkt->payload_size += newlen;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* If there is no more byte togo. (lastdesc==1) */
+ if (pkt->bytes_togo == 0) {
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ goto done;
+ }
+
+ /* If tid-sdma, advance tid info. */
+ if (pkt->tiddma) {
+ pkt->tidsm[pkt->tidsmidx].length -= newlen;
+ if (pkt->tidsm[pkt->tidsmidx].length) {
+ pkt->tidsm[pkt->tidsmidx].offset += newlen;
+ } else {
+ pkt->tidsmidx++;
+ if (pkt->tidsmidx == pkt->tidsmcount) {
+ ret = -EFAULT;
+ goto done;
+ }
+ }
+ }
+
+ /*
+ * If this is NOT the last descriptor. (newlen==len)
+ * the current packet is not done yet, but the current
+ * send side page is done.
+ */
+ if (lastdesc == 0)
+ goto done;
+
+ /*
+ * If running this driver under PSM with message size
+ * fitting into one transfer unit, it is not possible
+ * to pass this line. otherwise, it is a buggggg.
+ */
+
+ /*
+ * Since the current packet is done, and there are more
+ * bytes togo, we need to create a new sdma header, copying
+ * from previous sdma header and modify both.
+ */
+ pbclen = pkt->addr[pkt->index].length;
+ pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr);
+ if (!pbcvaddr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Copy the previous sdma header to new sdma header */
+ pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr;
+ memcpy(pbcvaddr, pbc16, pbclen);
+
+ /* Modify the previous sdma header */
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* turn on the header suppression */
+ hdr->iph.pkt_flags =
+ cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2);
+ /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
+ hdr->flags &= ~(0x04|0x20);
+ } else {
+ /* turn off extra bytes: 20-21 bits */
+ hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF);
+ /* turn off ACK_REQ: 0x04 */
+ hdr->flags &= ~(0x04);
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* The packet is done, header is not dma mapped yet.
+ * it should be from kmalloc */
+ if (!pkt->addr[pkt->index].addr) {
+ pkt->addr[pkt->index].addr =
+ dma_map_single(&dd->pcidev->dev,
+ pkt->addr[pkt->index].kvaddr,
+ pkt->addr[pkt->index].dma_length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ pkt->addr[pkt->index].addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ pkt->addr[pkt->index].dma_mapped = 1;
+ }
+
+ /* Modify the new sdma header */
+ pbc16 = (__le16 *)pbcvaddr;
+ hdr = (struct qib_message_header *)&pbc16[4];
+
+ /* New pbc length */
+ pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2));
+
+ /* New packet length */
+ hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
+
+ if (pkt->tiddma) {
+ /* Set new tid and offset for new sdma header */
+ hdr->iph.ver_ctxt_tid_offset = cpu_to_le32(
+ (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) +
+ (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) +
+ (pkt->tidsm[pkt->tidsmidx].offset>>2));
+ } else {
+ /* Middle protocol new packet offset */
+ hdr->uwords[2] += pkt->payload_size;
+ }
+
+ /* New kdeth checksum */
+ vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
+ hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
+ be16_to_cpu(hdr->lrh[2]) -
+ ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
+ le16_to_cpu(hdr->iph.pkt_flags));
+
+ /* Next sequence number in new sdma header */
+ seqnum.val = be32_to_cpu(hdr->bth[2]);
+ if (pkt->tiddma)
+ seqnum.seq++;
+ else
+ seqnum.pkt++;
+ hdr->bth[2] = cpu_to_be32(seqnum.val);
+
+ /* Init new sdma header. */
+ qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
+ 0, pbclen, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbcvaddr, /* struct page, virt addr */
+ pbcdaddr, pbclen); /* dma addr, dma length */
+ pkt->index = pkt->naddr;
+ pkt->payload_size = 0;
+ pkt->naddr++;
+ if (pkt->naddr == pkt->addrlimit) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* Prepare for next fragment in this page */
+ if (newlen != len) {
+ if (dma_mapped) {
+ put = 0;
+ dma_mapped = 0;
+ page = NULL;
+ kvaddr = NULL;
+ }
+ len -= newlen;
+ offset += newlen;
+
+ goto next_fragment;
+ }
+
+done:
+ return ret;
+}
+
+/* we've too many pages in the iovec, coalesce to a single page */
+static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov)
+{
+ int ret = 0;
+ struct page *page = alloc_page(GFP_KERNEL);
+ void *mpage_save;
+ char *mpage;
+ int i;
+ int len = 0;
+
+ if (!page) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ mpage = kmap(page);
+ mpage_save = mpage;
+ for (i = 0; i < niov; i++) {
+ int cfur;
+
+ cfur = copy_from_user(mpage,
+ iov[i].iov_base, iov[i].iov_len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_unmap;
+ }
+
+ mpage += iov[i].iov_len;
+ len += iov[i].iov_len;
+ }
+
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ page, 0, 0, len, mpage_save);
+ goto done;
+
+free_unmap:
+ kunmap(page);
+ __free_page(page);
+done:
+ return ret;
+}
+
+/*
+ * How many pages in this iovec element?
+ */
+static int qib_user_sdma_num_pages(const struct iovec *iov)
+{
+ const unsigned long addr = (unsigned long) iov->iov_base;
+ const unsigned long len = iov->iov_len;
+ const unsigned long spage = addr & PAGE_MASK;
+ const unsigned long epage = (addr + len - 1) & PAGE_MASK;
+
+ return 1 + ((epage - spage) >> PAGE_SHIFT);
+}
+
+static void qib_user_sdma_free_pkt_frag(struct device *dev,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ int frag)
+{
+ const int i = frag;
+
+ if (pkt->addr[i].page) {
+ /* only user data has page */
+ if (pkt->addr[i].dma_mapped)
+ dma_unmap_page(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].dma_length,
+ DMA_TO_DEVICE);
+
+ if (pkt->addr[i].kvaddr)
+ kunmap(pkt->addr[i].page);
+
+ if (pkt->addr[i].put_page)
+ put_page(pkt->addr[i].page);
+ else
+ __free_page(pkt->addr[i].page);
+ } else if (pkt->addr[i].kvaddr) {
+ /* for headers */
+ if (pkt->addr[i].dma_mapped) {
+ /* from kmalloc & dma mapped */
+ dma_unmap_single(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].dma_length,
+ DMA_TO_DEVICE);
+ kfree(pkt->addr[i].kvaddr);
+ } else if (pkt->addr[i].addr) {
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
+ pkt->addr[i].kvaddr, pkt->addr[i].addr);
+ } else {
+ /* from kmalloc but not dma mapped */
+ kfree(pkt->addr[i].kvaddr);
+ }
+ }
+}
+
+/* return number of pages pinned... */
+static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ unsigned long addr, int tlen, int npages)
+{
+ struct page *pages[8];
+ int i, j;
+ int ret = 0;
+
+ while (npages) {
+ if (npages > 8)
+ j = 8;
+ else
+ j = npages;
+
+ ret = get_user_pages_fast(addr, j, 0, pages);
+ if (ret != j) {
+ i = 0;
+ j = ret;
+ ret = -ENOMEM;
+ goto free_pages;
+ }
+
+ for (i = 0; i < j; i++) {
+ /* map the pages... */
+ unsigned long fofs = addr & ~PAGE_MASK;
+ int flen = ((fofs + tlen) > PAGE_SIZE) ?
+ (PAGE_SIZE - fofs) : tlen;
+
+ ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
+ pages[i], 1, fofs, flen, NULL);
+ if (ret < 0) {
+ /* current page has beed taken
+ * care of inside above call.
+ */
+ i++;
+ goto free_pages;
+ }
+
+ addr += flen;
+ tlen -= flen;
+ }
+
+ npages -= j;
+ }
+
+ goto done;
+
+ /* if error, return all pages not managed by pkt */
+free_pages:
+ while (i < j)
+ put_page(pages[i++]);
+
+done:
+ return ret;
+}
+
+static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov)
+{
+ int ret = 0;
+ unsigned long idx;
+
+ for (idx = 0; idx < niov; idx++) {
+ const int npages = qib_user_sdma_num_pages(iov + idx);
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+
+ ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
+ iov[idx].iov_len, npages);
+ if (ret < 0)
+ goto free_pkt;
+ }
+
+ goto done;
+
+free_pkt:
+ /* we need to ignore the first entry here */
+ for (idx = 1; idx < pkt->naddr; idx++)
+ qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+
+ /* need to dma unmap the first entry, this is to restore to
+ * the original state so that caller can free the memory in
+ * error condition. Caller does not know if dma mapped or not*/
+ if (pkt->addr[0].dma_mapped) {
+ dma_unmap_single(&dd->pcidev->dev,
+ pkt->addr[0].addr,
+ pkt->addr[0].dma_length,
+ DMA_TO_DEVICE);
+ pkt->addr[0].addr = 0;
+ pkt->addr[0].dma_mapped = 0;
+ }
+
+done:
+ return ret;
+}
+
+static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov, int npages)
+{
+ int ret = 0;
+
+ if (pkt->frag_size == pkt->bytes_togo &&
+ npages >= ARRAY_SIZE(pkt->addr))
+ ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov);
+ else
+ ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
+
+ return ret;
+}
+
+/* free a packet list -- return counter value of last packet */
+static void qib_user_sdma_free_pkt_list(struct device *dev,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *list)
+{
+ struct qib_user_sdma_pkt *pkt, *pkt_next;
+
+ list_for_each_entry_safe(pkt, pkt_next, list, list) {
+ int i;
+
+ for (i = 0; i < pkt->naddr; i++)
+ qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
+ }
+ INIT_LIST_HEAD(list);
+}
+
+/*
+ * copy headers, coalesce etc -- pq->lock must be held
+ *
+ * we queue all the packets to list, returning the
+ * number of bytes total. list must be empty initially,
+ * as, if there is an error we clean it...
+ */
+static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long niov,
+ struct list_head *list,
+ int *maxpkts, int *ndesc)
+{
+ unsigned long idx = 0;
+ int ret = 0;
+ int npkts = 0;
+ __le32 *pbc;
+ dma_addr_t dma_addr;
+ struct qib_user_sdma_pkt *pkt = NULL;
+ size_t len;
+ size_t nw;
+ u32 counter = pq->counter;
+ u16 frag_size;
+
+ while (idx < niov && npkts < *maxpkts) {
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+ const unsigned long idx_save = idx;
+ unsigned pktnw;
+ unsigned pktnwc;
+ int nfrags = 0;
+ int npages = 0;
+ int bytes_togo = 0;
+ int tiddma = 0;
+ int cfur;
+
+ len = iov[idx].iov_len;
+ nw = len >> 2;
+
+ if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
+ len > PAGE_SIZE || len & 3 || addr & 3) {
+ ret = -EINVAL;
+ goto free_list;
+ }
+
+ pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr);
+ if (!pbc) {
+ ret = -ENOMEM;
+ goto free_list;
+ }
+
+ cfur = copy_from_user(pbc, iov[idx].iov_base, len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pbc;
+ }
+
+ /*
+ * This assignment is a bit strange. it's because the
+ * the pbc counts the number of 32 bit words in the full
+ * packet _except_ the first word of the pbc itself...
+ */
+ pktnwc = nw - 1;
+
+ /*
+ * pktnw computation yields the number of 32 bit words
+ * that the caller has indicated in the PBC. note that
+ * this is one less than the total number of words that
+ * goes to the send DMA engine as the first 32 bit word
+ * of the PBC itself is not counted. Armed with this count,
+ * we can verify that the packet is consistent with the
+ * iovec lengths.
+ */
+ pktnw = le32_to_cpu(*pbc) & 0xFFFF;
+ if (pktnw < pktnwc) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ idx++;
+ while (pktnwc < pktnw && idx < niov) {
+ const size_t slen = iov[idx].iov_len;
+ const unsigned long faddr =
+ (unsigned long) iov[idx].iov_base;
+
+ if (slen & 3 || faddr & 3 || !slen) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ npages += qib_user_sdma_num_pages(&iov[idx]);
+
+ bytes_togo += slen;
+ pktnwc += slen >> 2;
+ idx++;
+ nfrags++;
+ }
+
+ if (pktnwc != pktnw) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF;
+ if (((frag_size ? frag_size : bytes_togo) + len) >
+ ppd->ibmaxlen) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (frag_size) {
+ int pktsize, tidsmsize, n;
+
+ n = npages*((2*PAGE_SIZE/frag_size)+1);
+ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n;
+
+ /*
+ * Determine if this is tid-sdma or just sdma.
+ */
+ tiddma = (((le32_to_cpu(pbc[7])>>
+ QLOGIC_IB_I_TID_SHIFT)&
+ QLOGIC_IB_I_TID_MASK) !=
+ QLOGIC_IB_I_TID_MASK);
+
+ if (tiddma)
+ tidsmsize = iov[idx].iov_len;
+ else
+ tidsmsize = 0;
+
+ pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+ pkt->largepkt = 1;
+ pkt->frag_size = frag_size;
+ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
+
+ if (tiddma) {
+ char *tidsm = (char *)pkt + pktsize;
+ cfur = copy_from_user(tidsm,
+ iov[idx].iov_base, tidsmsize);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pkt;
+ }
+ pkt->tidsm =
+ (struct qib_tid_session_member *)tidsm;
+ pkt->tidsmcount = tidsmsize/
+ sizeof(struct qib_tid_session_member);
+ pkt->tidsmidx = 0;
+ idx++;
+ }
+
+ /*
+ * pbc 'fill1' field is borrowed to pass frag size,
+ * we need to clear it after picking frag size, the
+ * hardware requires this field to be zero.
+ */
+ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
+ } else {
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+ pkt->largepkt = 0;
+ pkt->frag_size = bytes_togo;
+ pkt->addrlimit = ARRAY_SIZE(pkt->addr);
+ }
+ pkt->bytes_togo = bytes_togo;
+ pkt->payload_size = 0;
+ pkt->counter = counter;
+ pkt->tiddma = tiddma;
+
+ /* setup the first header */
+ qib_user_sdma_init_frag(pkt, 0, /* index */
+ 0, len, /* offset, len */
+ 1, 0, /* first last desc */
+ 0, 0, /* put page, dma mapped */
+ NULL, pbc, /* struct page, virt addr */
+ dma_addr, len); /* dma addr, dma length */
+ pkt->index = 0;
+ pkt->naddr = 1;
+
+ if (nfrags) {
+ ret = qib_user_sdma_init_payload(dd, pq, pkt,
+ iov + idx_save + 1,
+ nfrags, npages);
+ if (ret < 0)
+ goto free_pkt;
+ } else {
+ /* since there is no payload, mark the
+ * header as the last desc. */
+ pkt->addr[0].last_desc = 1;
+
+ if (dma_addr == 0) {
+ /*
+ * the header is not dma mapped yet.
+ * it should be from kmalloc.
+ */
+ dma_addr = dma_map_single(&dd->pcidev->dev,
+ pbc, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev,
+ dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pkt->addr[0].addr = dma_addr;
+ pkt->addr[0].dma_mapped = 1;
+ }
+ }
+
+ counter++;
+ npkts++;
+ pkt->pq = pq;
+ pkt->index = 0; /* reset index for push on hw */
+ *ndesc += pkt->naddr;
+
+ list_add_tail(&pkt->list, list);
+ }
+
+ *maxpkts = npkts;
+ ret = idx;
+ goto done;
+
+free_pkt:
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+ kmem_cache_free(pq->pkt_slab, pkt);
+free_pbc:
+ if (dma_addr)
+ dma_pool_free(pq->header_cache, pbc, dma_addr);
+ else
+ kfree(pbc);
+free_list:
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
+done:
+ return ret;
+}
+
+static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
+ u32 c)
+{
+ pq->sent_counter = c;
+}
+
+/* try to clean out queue -- needs pq->lock */
+static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct list_head free_list;
+ struct qib_user_sdma_pkt *pkt;
+ struct qib_user_sdma_pkt *pkt_prev;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!pq->num_sending)
+ return 0;
+
+ INIT_LIST_HEAD(&free_list);
+
+ /*
+ * We need this spin lock here because interrupt handler
+ * might modify this list in qib_user_sdma_send_desc(), also
+ * we can not get interrupted, otherwise it is a deadlock.
+ */
+ spin_lock_irqsave(&pq->sent_lock, flags);
+ list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
+ s64 descd = ppd->sdma_descq_removed - pkt->added;
+
+ if (descd < 0)
+ break;
+
+ list_move_tail(&pkt->list, &free_list);
+
+ /* one more packet cleaned */
+ ret++;
+ pq->num_sending--;
+ }
+ spin_unlock_irqrestore(&pq->sent_lock, flags);
+
+ if (!list_empty(&free_list)) {
+ u32 counter;
+
+ pkt = list_entry(free_list.prev,
+ struct qib_user_sdma_pkt, list);
+ counter = pkt->counter;
+
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ qib_user_sdma_set_complete_counter(pq, counter);
+ }
+
+ return ret;
+}
+
+void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
+{
+ if (!pq)
+ return;
+
+ pq->sdma_rb_node->refcount--;
+ if (pq->sdma_rb_node->refcount == 0) {
+ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+ kfree(pq->sdma_rb_node);
+ }
+ dma_pool_destroy(pq->header_cache);
+ kmem_cache_destroy(pq->pkt_slab);
+ kfree(pq);
+}
+
+/* clean descriptor queue, returns > 0 if some elements cleaned */
+static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ ret = qib_sdma_make_progress(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ return ret;
+}
+
+/* we're in close, drain packets so that we can cleanup successfully... */
+void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq)
+{
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+ int i;
+
+ if (!pq)
+ return;
+
+ for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
+ mutex_lock(&pq->lock);
+ if (!pq->num_pending && !pq->num_sending) {
+ mutex_unlock(&pq->lock);
+ break;
+ }
+ qib_user_sdma_hwqueue_clean(ppd);
+ qib_user_sdma_queue_clean(ppd, pq);
+ mutex_unlock(&pq->lock);
+ msleep(10);
+ }
+
+ if (pq->num_pending || pq->num_sending) {
+ struct qib_user_sdma_pkt *pkt;
+ struct qib_user_sdma_pkt *pkt_prev;
+ struct list_head free_list;
+
+ mutex_lock(&pq->lock);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ /*
+ * Since we hold sdma_lock, it is safe without sent_lock.
+ */
+ if (pq->num_pending) {
+ list_for_each_entry_safe(pkt, pkt_prev,
+ &ppd->sdma_userpending, list) {
+ if (pkt->pq == pq) {
+ list_move_tail(&pkt->list, &pq->sent);
+ pq->num_pending--;
+ pq->num_sending++;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
+ INIT_LIST_HEAD(&free_list);
+ list_splice_init(&pq->sent, &free_list);
+ pq->num_sending = 0;
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ mutex_unlock(&pq->lock);
+ }
+}
+
+static inline __le64 qib_sdma_make_desc0(u8 gen,
+ u64 addr, u64 dwlen, u64 dwoffset)
+{
+ return cpu_to_le64(/* SDmaPhyAddr[31:0] */
+ ((addr & 0xfffffffcULL) << 32) |
+ /* SDmaGeneration[1:0] */
+ ((gen & 3ULL) << 30) |
+ /* SDmaDwordCount[10:0] */
+ ((dwlen & 0x7ffULL) << 16) |
+ /* SDmaBufOffset[12:2] */
+ (dwoffset & 0x7ffULL));
+}
+
+static inline __le64 qib_sdma_make_first_desc0(__le64 descq)
+{
+ return descq | cpu_to_le64(1ULL << 12);
+}
+
+static inline __le64 qib_sdma_make_last_desc0(__le64 descq)
+{
+ /* last */ /* dma head */
+ return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
+}
+
+static inline __le64 qib_sdma_make_desc1(u64 addr)
+{
+ /* SDmaPhyAddr[47:32] */
+ return cpu_to_le64(addr >> 32);
+}
+
+static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
+ struct qib_user_sdma_pkt *pkt, int idx,
+ unsigned ofs, u16 tail, u8 gen)
+{
+ const u64 addr = (u64) pkt->addr[idx].addr +
+ (u64) pkt->addr[idx].offset;
+ const u64 dwlen = (u64) pkt->addr[idx].length / 4;
+ __le64 *descqp;
+ __le64 descq0;
+
+ descqp = &ppd->sdma_descq[tail].qw[0];
+
+ descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs);
+ if (pkt->addr[idx].first_desc)
+ descq0 = qib_sdma_make_first_desc0(descq0);
+ if (pkt->addr[idx].last_desc) {
+ descq0 = qib_sdma_make_last_desc0(descq0);
+ if (ppd->sdma_intrequest) {
+ descq0 |= cpu_to_le64(1ULL << 15);
+ ppd->sdma_intrequest = 0;
+ }
+ }
+
+ descqp[0] = descq0;
+ descqp[1] = qib_sdma_make_desc1(addr);
+}
+
+void qib_user_sdma_send_desc(struct qib_pportdata *ppd,
+ struct list_head *pktlist)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u16 nfree, nsent;
+ u16 tail, tail_c;
+ u8 gen, gen_c;
+
+ nfree = qib_sdma_descq_freecnt(ppd);
+ if (!nfree)
+ return;
+
+retry:
+ nsent = 0;
+ tail_c = tail = ppd->sdma_descq_tail;
+ gen_c = gen = ppd->sdma_generation;
+ while (!list_empty(pktlist)) {
+ struct qib_user_sdma_pkt *pkt =
+ list_entry(pktlist->next, struct qib_user_sdma_pkt,
+ list);
+ int i, j, c = 0;
+ unsigned ofs = 0;
+ u16 dtail = tail;
+
+ for (i = pkt->index; i < pkt->naddr && nfree; i++) {
+ qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen);
+ ofs += pkt->addr[i].length >> 2;
+
+ if (++tail == ppd->sdma_descq_cnt) {
+ tail = 0;
+ ++gen;
+ ppd->sdma_intrequest = 1;
+ } else if (tail == (ppd->sdma_descq_cnt>>1)) {
+ ppd->sdma_intrequest = 1;
+ }
+ nfree--;
+ if (pkt->addr[i].last_desc == 0)
+ continue;
+
+ /*
+ * If the packet is >= 2KB mtu equivalent, we
+ * have to use the large buffers, and have to
+ * mark each descriptor as part of a large
+ * buffer packet.
+ */
+ if (ofs > dd->piosize2kmax_dwords) {
+ for (j = pkt->index; j <= i; j++) {
+ ppd->sdma_descq[dtail].qw[0] |=
+ cpu_to_le64(1ULL << 14);
+ if (++dtail == ppd->sdma_descq_cnt)
+ dtail = 0;
+ }
+ }
+ c += i + 1 - pkt->index;
+ pkt->index = i + 1; /* index for next first */
+ tail_c = dtail = tail;
+ gen_c = gen;
+ ofs = 0; /* reset for next packet */
+ }
+
+ ppd->sdma_descq_added += c;
+ nsent += c;
+ if (pkt->index == pkt->naddr) {
+ pkt->added = ppd->sdma_descq_added;
+ pkt->pq->added = pkt->added;
+ pkt->pq->num_pending--;
+ spin_lock(&pkt->pq->sent_lock);
+ pkt->pq->num_sending++;
+ list_move_tail(&pkt->list, &pkt->pq->sent);
+ spin_unlock(&pkt->pq->sent_lock);
+ }
+ if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt)
+ break;
+ }
+
+ /* advance the tail on the chip if necessary */
+ if (ppd->sdma_descq_tail != tail_c) {
+ ppd->sdma_generation = gen_c;
+ dd->f_sdma_update_tail(ppd, tail_c);
+ }
+
+ if (nfree && !list_empty(pktlist))
+ goto retry;
+
+ return;
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *pktlist, int count)
+{
+ unsigned long flags;
+
+ if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
+ return -ECOMM;
+
+ /* non-blocking mode */
+ if (pq->sdma_rb_node->refcount > 1) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ pq->num_pending += count;
+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return 0;
+ }
+
+ /* In this case, descriptors from this process are not
+ * linked to ppd pending queue, interrupt handler
+ * won't update this process, it is OK to directly
+ * modify without sdma lock.
+ */
+
+
+ pq->num_pending += count;
+ /*
+ * Blocking mode for single rail process, we must
+ * release/regain sdma_lock to give other process
+ * chance to make progress. This is important for
+ * performance.
+ */
+ do {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ qib_user_sdma_send_desc(ppd, pktlist);
+ if (!list_empty(pktlist))
+ qib_sdma_make_progress(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ } while (!list_empty(pktlist));
+
+ return 0;
+}
+
+int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
+ struct qib_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim)
+{
+ struct qib_devdata *dd = rcd->dd;
+ struct qib_pportdata *ppd = rcd->ppd;
+ int ret = 0;
+ struct list_head list;
+ int npkts = 0;
+
+ INIT_LIST_HEAD(&list);
+
+ mutex_lock(&pq->lock);
+
+ /* why not -ECOMM like qib_user_sdma_push_pkts() below? */
+ if (!qib_sdma_running(ppd))
+ goto done_unlock;
+
+ /* if I have packets not complete yet */
+ if (pq->added > ppd->sdma_descq_removed)
+ qib_user_sdma_hwqueue_clean(ppd);
+ /* if I have complete packets to be freed */
+ if (pq->num_sending)
+ qib_user_sdma_queue_clean(ppd, pq);
+
+ while (dim) {
+ int mxp = 1;
+ int ndesc = 0;
+
+ ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
+ iov, dim, &list, &mxp, &ndesc);
+ if (ret < 0)
+ goto done_unlock;
+ else {
+ dim -= ret;
+ iov += ret;
+ }
+
+ /* force packets onto the sdma hw queue... */
+ if (!list_empty(&list)) {
+ /*
+ * Lazily clean hw queue.
+ */
+ if (qib_sdma_descq_freecnt(ppd) < ndesc) {
+ qib_user_sdma_hwqueue_clean(ppd);
+ if (pq->num_sending)
+ qib_user_sdma_queue_clean(ppd, pq);
+ }
+
+ ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp);
+ if (ret < 0)
+ goto done_unlock;
+ else {
+ npkts += mxp;
+ pq->counter += mxp;
+ }
+ }
+ }
+
+done_unlock:
+ if (!list_empty(&list))
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
+ mutex_unlock(&pq->lock);
+
+ return (ret < 0) ? ret : npkts;
+}
+
+int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq)
+{
+ int ret = 0;
+
+ mutex_lock(&pq->lock);
+ qib_user_sdma_hwqueue_clean(ppd);
+ ret = qib_user_sdma_queue_clean(ppd, pq);
+ mutex_unlock(&pq->lock);
+
+ return ret;
+}
+
+u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq)
+{
+ return pq ? pq->sent_counter : 0;
+}
+
+u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq)
+{
+ return pq ? pq->counter : 0;
+}
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h
new file mode 100644
index 00000000000..ce8cbaf6a5c
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+
+struct qib_user_sdma_queue;
+
+struct qib_user_sdma_queue *
+qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
+
+int qib_user_sdma_writev(struct qib_ctxtdata *pd,
+ struct qib_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim);
+
+int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq);
+
+void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq);
+
+u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
+u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
new file mode 100644
index 00000000000..9bcfbd84298
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -0,0 +1,2336 @@
+/*
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/utsname.h>
+#include <linux/rculist.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+static unsigned int ib_qib_qp_table_size = 256;
+module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(qp_table_size, "QP table size");
+
+unsigned int ib_qib_lkey_table_size = 16;
+module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
+ S_IRUGO);
+MODULE_PARM_DESC(lkey_table_size,
+ "LKEY table size in bits (2^n, 1 <= n <= 23)");
+
+static unsigned int ib_qib_max_pds = 0xFFFF;
+module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
+MODULE_PARM_DESC(max_pds,
+ "Maximum number of protection domains to support");
+
+static unsigned int ib_qib_max_ahs = 0xFFFF;
+module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
+
+unsigned int ib_qib_max_cqes = 0x2FFFF;
+module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
+MODULE_PARM_DESC(max_cqes,
+ "Maximum number of completion queue entries to support");
+
+unsigned int ib_qib_max_cqs = 0x1FFFF;
+module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
+
+unsigned int ib_qib_max_qp_wrs = 0x3FFF;
+module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
+
+unsigned int ib_qib_max_qps = 16384;
+module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
+unsigned int ib_qib_max_sges = 0x60;
+module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
+MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
+
+unsigned int ib_qib_max_mcast_grps = 16384;
+module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
+MODULE_PARM_DESC(max_mcast_grps,
+ "Maximum number of multicast groups to support");
+
+unsigned int ib_qib_max_mcast_qp_attached = 16;
+module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
+ uint, S_IRUGO);
+MODULE_PARM_DESC(max_mcast_qp_attached,
+ "Maximum number of attached QPs to support");
+
+unsigned int ib_qib_max_srqs = 1024;
+module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
+
+unsigned int ib_qib_max_srq_sges = 128;
+module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
+
+unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
+module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
+
+static unsigned int ib_qib_disable_sma;
+module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(disable_sma, "Disable the SMA");
+
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; qib_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = 0,
+ [IB_QPS_INIT] = QIB_POST_RECV_OK,
+ [IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
+ [IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+ QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
+ QIB_PROCESS_NEXT_SEND_OK,
+ [IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+ QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+ QIB_POST_SEND_OK | QIB_FLUSH_SEND,
+ [IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
+ QIB_POST_SEND_OK | QIB_FLUSH_SEND,
+};
+
+struct qib_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
+static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
+ *ibucontext)
+{
+ return container_of(ibucontext, struct qib_ucontext, ibucontext);
+}
+
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_qib_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+};
+
+/*
+ * System image GUID.
+ */
+__be64 ib_qib_sys_image_guid;
+
+/**
+ * qib_copy_sge - copy data to SGE memory
+ * @ss: the SGE state
+ * @data: the data to copy
+ * @length: the length of the data
+ */
+void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ memcpy(sge->vaddr, data, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (release)
+ qib_put_mr(sge->mr);
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ data += len;
+ length -= len;
+ }
+}
+
+/**
+ * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
+ * @ss: the SGE state
+ * @length: the number of bytes to skip
+ */
+void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (release)
+ qib_put_mr(sge->mr);
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+}
+
+/*
+ * Count the number of DMA descriptors needed to send length bytes of data.
+ * Don't modify the qib_sge_state to get the count.
+ * Return zero if any of the segments is not aligned.
+ */
+static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
+{
+ struct qib_sge *sg_list = ss->sg_list;
+ struct qib_sge sge = ss->sge;
+ u8 num_sge = ss->num_sge;
+ u32 ndesc = 1; /* count the header */
+
+ while (length) {
+ u32 len = sge.length;
+
+ if (len > length)
+ len = length;
+ if (len > sge.sge_length)
+ len = sge.sge_length;
+ BUG_ON(len == 0);
+ if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
+ (len != length && (len & (sizeof(u32) - 1)))) {
+ ndesc = 0;
+ break;
+ }
+ ndesc++;
+ sge.vaddr += len;
+ sge.length -= len;
+ sge.sge_length -= len;
+ if (sge.sge_length == 0) {
+ if (--num_sge)
+ sge = *sg_list++;
+ } else if (sge.length == 0 && sge.mr->lkey) {
+ if (++sge.n >= QIB_SEGSZ) {
+ if (++sge.m >= sge.mr->mapsz)
+ break;
+ sge.n = 0;
+ }
+ sge.vaddr =
+ sge.mr->map[sge.m]->segs[sge.n].vaddr;
+ sge.length =
+ sge.mr->map[sge.m]->segs[sge.n].length;
+ }
+ length -= len;
+ }
+ return ndesc;
+}
+
+/*
+ * Copy from the SGEs to the data buffer.
+ */
+static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ memcpy(data, sge->vaddr, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ data += len;
+ length -= len;
+ }
+}
+
+/**
+ * qib_post_one_send - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
+ int *scheduled)
+{
+ struct qib_swqe *wqe;
+ u32 next;
+ int i;
+ int j;
+ int acc;
+ int ret;
+ unsigned long flags;
+ struct qib_lkey_table *rkt;
+ struct qib_pd *pd;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Check that state is OK to post send. */
+ if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
+ goto bail_inval;
+
+ /* IB spec says that num_sge == 0 is OK. */
+ if (wr->num_sge > qp->s_max_sge)
+ goto bail_inval;
+
+ /*
+ * Don't allow RDMA reads or atomic operations on UC or
+ * undefined operations.
+ * Make sure buffer is large enough to hold the result for atomics.
+ */
+ if (wr->opcode == IB_WR_FAST_REG_MR) {
+ if (qib_fast_reg_mr(qp, wr))
+ goto bail_inval;
+ } else if (qp->ibqp.qp_type == IB_QPT_UC) {
+ if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
+ goto bail_inval;
+ } else if (qp->ibqp.qp_type != IB_QPT_RC) {
+ /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)
+ goto bail_inval;
+ /* Check UD destination address PD */
+ if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ goto bail_inval;
+ } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+ (wr->num_sge == 0 ||
+ wr->sg_list[0].length < sizeof(u64) ||
+ wr->sg_list[0].addr & (sizeof(u64) - 1)))
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
+ goto bail_inval;
+
+ next = qp->s_head + 1;
+ if (next >= qp->s_size)
+ next = 0;
+ if (next == qp->s_last) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ rkt = &to_idev(qp->ibqp.device)->lk_table;
+ pd = to_ipd(qp->ibqp.pd);
+ wqe = get_swqe_ptr(qp, qp->s_head);
+ wqe->wr = *wr;
+ wqe->length = 0;
+ j = 0;
+ if (wr->num_sge) {
+ acc = wr->opcode >= IB_WR_RDMA_READ ?
+ IB_ACCESS_LOCAL_WRITE : 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ u32 length = wr->sg_list[i].length;
+ int ok;
+
+ if (length == 0)
+ continue;
+ ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
+ &wr->sg_list[i], acc);
+ if (!ok)
+ goto bail_inval_free;
+ wqe->length += length;
+ j++;
+ }
+ wqe->wr.num_sge = j;
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_RC) {
+ if (wqe->length > 0x80000000U)
+ goto bail_inval_free;
+ } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
+ qp->port_num - 1)->ibmtu)
+ goto bail_inval_free;
+ else
+ atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+ wqe->ssn = qp->s_ssn++;
+ qp->s_head = next;
+
+ ret = 0;
+ goto bail;
+
+bail_inval_free:
+ while (j) {
+ struct qib_sge *sge = &wqe->sg_list[--j];
+
+ qib_put_mr(sge->mr);
+ }
+bail_inval:
+ ret = -EINVAL;
+bail:
+ if (!ret && !wr->next &&
+ !qib_sdma_empty(
+ dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
+ qib_schedule_send(qp);
+ *scheduled = 1;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * qib_post_send - post a send on a QP
+ * @ibqp: the QP to post the send on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ int err = 0;
+ int scheduled = 0;
+
+ for (; wr; wr = wr->next) {
+ err = qib_post_one_send(qp, wr, &scheduled);
+ if (err) {
+ *bad_wr = wr;
+ goto bail;
+ }
+ }
+
+ /* Try to do the send work in the caller's context. */
+ if (!scheduled)
+ qib_do_send(&qp->s_work);
+
+bail:
+ return err;
+}
+
+/**
+ * qib_post_receive - post a receive on a QP
+ * @ibqp: the QP to post the receive on
+ * @wr: the WR to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ struct qib_rwq *wq = qp->r_rq.wq;
+ unsigned long flags;
+ int ret;
+
+ /* Check that state is OK to post receive. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ for (; wr; wr = wr->next) {
+ struct qib_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&qp->r_rq.lock, flags);
+ next = wq->head + 1;
+ if (next >= qp->r_rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
+ wq->head = next;
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_qp_rcv - processing an incoming packet on a QP
+ * @rcd: the context pointer
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from qib_ib_rcv() to process an incoming packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_ibport *ibp = &rcd->ppd->ibport_data;
+
+ spin_lock(&qp->r_lock);
+
+ /* Check for valid receive state. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+ ibp->n_pkt_drops++;
+ goto unlock;
+ }
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ if (ib_qib_disable_sma)
+ break;
+ /* FALLTHROUGH */
+ case IB_QPT_UD:
+ qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
+ break;
+
+ case IB_QPT_RC:
+ qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
+ break;
+
+ case IB_QPT_UC:
+ qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
+ break;
+
+ default:
+ break;
+ }
+
+unlock:
+ spin_unlock(&qp->r_lock);
+}
+
+/**
+ * qib_ib_rcv - process an incoming packet
+ * @rcd: the context pointer
+ * @rhdr: the header of the packet
+ * @data: the packet payload
+ * @tlen: the packet length
+ *
+ * This is called from qib_kreceive() to process an incoming packet at
+ * interrupt level. Tlen is the length of the header + data + CRC in bytes.
+ */
+void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
+{
+ struct qib_pportdata *ppd = rcd->ppd;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ struct qib_ib_header *hdr = rhdr;
+ struct qib_other_headers *ohdr;
+ struct qib_qp *qp;
+ u32 qp_num;
+ int lnh;
+ u8 opcode;
+ u16 lid;
+
+ /* 24 == LRH+BTH+CRC */
+ if (unlikely(tlen < 24))
+ goto drop;
+
+ /* Check for a valid destination LID (see ch. 7.11.1). */
+ lid = be16_to_cpu(hdr->lrh[1]);
+ if (lid < QIB_MULTICAST_LID_BASE) {
+ lid &= ~((1 << ppd->lmc) - 1);
+ if (unlikely(lid != ppd->lid))
+ goto drop;
+ }
+
+ /* Check for GRH */
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ if (lnh == QIB_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else if (lnh == QIB_LRH_GRH) {
+ u32 vtf;
+
+ ohdr = &hdr->u.l.oth;
+ if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else
+ goto drop;
+
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
+#ifdef CONFIG_DEBUG_FS
+ rcd->opstats->stats[opcode].n_bytes += tlen;
+ rcd->opstats->stats[opcode].n_packets++;
+#endif
+
+ /* Get the destination QP number. */
+ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+ if (qp_num == QIB_MULTICAST_QPN) {
+ struct qib_mcast *mcast;
+ struct qib_mcast_qp *p;
+
+ if (lnh != QIB_LRH_GRH)
+ goto drop;
+ mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
+ if (mcast == NULL)
+ goto drop;
+ this_cpu_inc(ibp->pmastats->n_multicast_rcv);
+ list_for_each_entry_rcu(p, &mcast->qp_list, list)
+ qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
+ /*
+ * Notify qib_multicast_detach() if it is waiting for us
+ * to finish.
+ */
+ if (atomic_dec_return(&mcast->refcount) <= 1)
+ wake_up(&mcast->wait);
+ } else {
+ if (rcd->lookaside_qp) {
+ if (rcd->lookaside_qpn != qp_num) {
+ if (atomic_dec_and_test(
+ &rcd->lookaside_qp->refcount))
+ wake_up(
+ &rcd->lookaside_qp->wait);
+ rcd->lookaside_qp = NULL;
+ }
+ }
+ if (!rcd->lookaside_qp) {
+ qp = qib_lookup_qpn(ibp, qp_num);
+ if (!qp)
+ goto drop;
+ rcd->lookaside_qp = qp;
+ rcd->lookaside_qpn = qp_num;
+ } else
+ qp = rcd->lookaside_qp;
+ this_cpu_inc(ibp->pmastats->n_unicast_rcv);
+ qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
+ }
+ return;
+
+drop:
+ ibp->n_pkt_drops++;
+}
+
+/*
+ * This is called from a timer to check for QPs
+ * which need kernel memory in order to send a packet.
+ */
+static void mem_timer(unsigned long data)
+{
+ struct qib_ibdev *dev = (struct qib_ibdev *) data;
+ struct list_head *list = &dev->memwait;
+ struct qib_qp *qp = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ if (!list_empty(list)) {
+ qp = list_entry(list->next, struct qib_qp, iowait);
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ if (!list_empty(list))
+ mod_timer(&dev->mem_timer, jiffies + 1);
+ }
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ if (qp) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_KMEM) {
+ qp->s_flags &= ~QIB_S_WAIT_KMEM;
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+static void update_sge(struct qib_sge_state *ss, u32 length)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ sge->vaddr += length;
+ sge->length -= length;
+ sge->sge_length -= length;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ return;
+ sge->n = 0;
+ }
+ sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
+ u32 length, unsigned flush_wc)
+{
+ u32 extra = 0;
+ u32 data = 0;
+ u32 last;
+
+ while (1) {
+ u32 len = ss->sge.length;
+ u32 off;
+
+ if (len > length)
+ len = length;
+ if (len > ss->sge.sge_length)
+ len = ss->sge.sge_length;
+ BUG_ON(len == 0);
+ /* If the source address is not aligned, try to align it. */
+ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+ if (off) {
+ u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+ ~(sizeof(u32) - 1));
+ u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+ u32 y;
+
+ y = sizeof(u32) - off;
+ if (len > y)
+ len = y;
+ if (len + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, extra *
+ BITS_PER_BYTE);
+ len = sizeof(u32) - extra;
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, len, extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += len;
+ }
+ } else if (extra) {
+ /* Source address is aligned. */
+ u32 *addr = (u32 *) ss->sge.vaddr;
+ int shift = extra * BITS_PER_BYTE;
+ int ushift = 32 - shift;
+ u32 l = len;
+
+ while (l >= sizeof(u32)) {
+ u32 v = *addr;
+
+ data |= set_upper_bits(v, shift);
+ __raw_writel(data, piobuf);
+ data = get_upper_bits(v, ushift);
+ piobuf++;
+ addr++;
+ l -= sizeof(u32);
+ }
+ /*
+ * We still have 'extra' number of bytes leftover.
+ */
+ if (l) {
+ u32 v = *addr;
+
+ if (l + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, shift);
+ len -= l + extra - sizeof(u32);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, l, extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += l;
+ }
+ } else if (len == length) {
+ last = data;
+ break;
+ }
+ } else if (len == length) {
+ u32 w;
+
+ /*
+ * Need to round up for the last dword in the
+ * packet.
+ */
+ w = (len + 3) >> 2;
+ qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
+ piobuf += w - 1;
+ last = ((u32 *) ss->sge.vaddr)[w - 1];
+ break;
+ } else {
+ u32 w = len >> 2;
+
+ qib_pio_copy(piobuf, ss->sge.vaddr, w);
+ piobuf += w;
+
+ extra = len & (sizeof(u32) - 1);
+ if (extra) {
+ u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+ /* Clear unused upper bytes */
+ data = clear_upper_bytes(v, extra, 0);
+ }
+ }
+ update_sge(ss, len);
+ length -= len;
+ }
+ /* Update address before sending packet. */
+ update_sge(ss, length);
+ if (flush_wc) {
+ /* must flush early everything before trigger word */
+ qib_flush_wc();
+ __raw_writel(last, piobuf);
+ /* be sure trigger word is written */
+ qib_flush_wc();
+ } else
+ __raw_writel(last, piobuf);
+}
+
+static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
+ struct qib_qp *qp)
+{
+ struct qib_verbs_txreq *tx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock(&dev->pending_lock);
+
+ if (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+
+ list_del(l);
+ spin_unlock(&dev->pending_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ } else {
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
+ list_empty(&qp->iowait)) {
+ dev->n_txwait++;
+ qp->s_flags |= QIB_S_WAIT_TX;
+ list_add_tail(&qp->iowait, &dev->txwait);
+ }
+ qp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock(&dev->pending_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ tx = ERR_PTR(-EBUSY);
+ }
+ return tx;
+}
+
+static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
+ struct qib_qp *qp)
+{
+ struct qib_verbs_txreq *tx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ /* assume the list non empty */
+ if (likely(!list_empty(&dev->txreq_free))) {
+ struct list_head *l = dev->txreq_free.next;
+
+ list_del(l);
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ } else {
+ /* call slow path to get the extra lock */
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+ tx = __get_txreq(dev, qp);
+ }
+ return tx;
+}
+
+void qib_put_txreq(struct qib_verbs_txreq *tx)
+{
+ struct qib_ibdev *dev;
+ struct qib_qp *qp;
+ unsigned long flags;
+
+ qp = tx->qp;
+ dev = to_idev(qp->ibqp.device);
+
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ if (tx->mr) {
+ qib_put_mr(tx->mr);
+ tx->mr = NULL;
+ }
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
+ tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
+ dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
+ tx->txreq.addr, tx->hdr_dwords << 2,
+ DMA_TO_DEVICE);
+ kfree(tx->align_buf);
+ }
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+
+ /* Put struct back on free list */
+ list_add(&tx->txreq.list, &dev->txreq_free);
+
+ if (!list_empty(&dev->txwait)) {
+ /* Wake up first QP wanting a free struct */
+ qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_TX) {
+ qp->s_flags &= ~QIB_S_WAIT_TX;
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ } else
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+}
+
+/*
+ * This is called when there are send DMA descriptors that might be
+ * available.
+ *
+ * This is called with ppd->sdma_lock held.
+ */
+void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
+{
+ struct qib_qp *qp, *nqp;
+ struct qib_qp *qps[20];
+ struct qib_ibdev *dev;
+ unsigned i, n;
+
+ n = 0;
+ dev = &ppd->dd->verbs_dev;
+ spin_lock(&dev->pending_lock);
+
+ /* Search wait list for first QP wanting DMA descriptors. */
+ list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
+ if (qp->port_num != ppd->port)
+ continue;
+ if (n == ARRAY_SIZE(qps))
+ break;
+ if (qp->s_tx->txreq.sg_count > avail)
+ break;
+ avail -= qp->s_tx->txreq.sg_count;
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ qps[n++] = qp;
+ }
+
+ spin_unlock(&dev->pending_lock);
+
+ for (i = 0; i < n; i++) {
+ qp = qps[i];
+ spin_lock(&qp->s_lock);
+ if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
+ qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
+ qib_schedule_send(qp);
+ }
+ spin_unlock(&qp->s_lock);
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+/*
+ * This is called with ppd->sdma_lock held.
+ */
+static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
+{
+ struct qib_verbs_txreq *tx =
+ container_of(cookie, struct qib_verbs_txreq, txreq);
+ struct qib_qp *qp = tx->qp;
+
+ spin_lock(&qp->s_lock);
+ if (tx->wqe)
+ qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ else if (qp->ibqp.qp_type == IB_QPT_RC) {
+ struct qib_ib_header *hdr;
+
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
+ hdr = &tx->align_buf->hdr;
+ else {
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+
+ hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
+ }
+ qib_rc_send_complete(qp, hdr);
+ }
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ if (qp->state == IB_QPS_RESET)
+ wake_up(&qp->wait_dma);
+ else if (qp->s_flags & QIB_S_WAIT_DMA) {
+ qp->s_flags &= ~QIB_S_WAIT_DMA;
+ qib_schedule_send(qp);
+ }
+ }
+ spin_unlock(&qp->s_lock);
+
+ qib_put_txreq(tx);
+}
+
+static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->iowait)) {
+ if (list_empty(&dev->memwait))
+ mod_timer(&dev->mem_timer, jiffies + 1);
+ qp->s_flags |= QIB_S_WAIT_KMEM;
+ list_add_tail(&qp->iowait, &dev->memwait);
+ }
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~QIB_S_BUSY;
+ ret = -EBUSY;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ return ret;
+}
+
+static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_verbs_txreq *tx;
+ struct qib_pio_header *phdr;
+ u32 control;
+ u32 ndesc;
+ int ret;
+
+ tx = qp->s_tx;
+ if (tx) {
+ qp->s_tx = NULL;
+ /* resend previously constructed packet */
+ ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
+ goto bail;
+ }
+
+ tx = get_txreq(dev, qp);
+ if (IS_ERR(tx))
+ goto bail_tx;
+
+ control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
+ be16_to_cpu(hdr->lrh[0]) >> 12);
+ tx->qp = qp;
+ atomic_inc(&qp->refcount);
+ tx->wqe = qp->s_wqe;
+ tx->mr = qp->s_rdma_mr;
+ if (qp->s_rdma_mr)
+ qp->s_rdma_mr = NULL;
+ tx->txreq.callback = sdma_complete;
+ if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
+ tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
+ else
+ tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
+ if (plen + 1 > dd->piosize2kmax_dwords)
+ tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
+
+ if (len) {
+ /*
+ * Don't try to DMA if it takes more descriptors than
+ * the queue holds.
+ */
+ ndesc = qib_count_sge(ss, len);
+ if (ndesc >= ppd->sdma_descq_cnt)
+ ndesc = 0;
+ } else
+ ndesc = 1;
+ if (ndesc) {
+ phdr = &dev->pio_hdrs[tx->hdr_inx];
+ phdr->pbc[0] = cpu_to_le32(plen);
+ phdr->pbc[1] = cpu_to_le32(control);
+ memcpy(&phdr->hdr, hdr, hdrwords << 2);
+ tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
+ tx->txreq.sg_count = ndesc;
+ tx->txreq.addr = dev->pio_hdrs_phys +
+ tx->hdr_inx * sizeof(struct qib_pio_header);
+ tx->hdr_dwords = hdrwords + 2; /* add PBC length */
+ ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
+ goto bail;
+ }
+
+ /* Allocate a buffer and copy the header and payload to it. */
+ tx->hdr_dwords = plen + 1;
+ phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
+ if (!phdr)
+ goto err_tx;
+ phdr->pbc[0] = cpu_to_le32(plen);
+ phdr->pbc[1] = cpu_to_le32(control);
+ memcpy(&phdr->hdr, hdr, hdrwords << 2);
+ qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
+
+ tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
+ tx->hdr_dwords << 2, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
+ goto map_err;
+ tx->align_buf = phdr;
+ tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
+ tx->txreq.sg_count = 1;
+ ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
+ goto unaligned;
+
+map_err:
+ kfree(phdr);
+err_tx:
+ qib_put_txreq(tx);
+ ret = wait_kmem(dev, qp);
+unaligned:
+ ibp->n_unaligned++;
+bail:
+ return ret;
+bail_tx:
+ ret = PTR_ERR(tx);
+ goto bail;
+}
+
+/*
+ * If we are now in the error state, return zero to flush the
+ * send work request.
+ */
+static int no_bufs_available(struct qib_qp *qp)
+{
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+ struct qib_devdata *dd;
+ unsigned long flags;
+ int ret = 0;
+
+ /*
+ * Note that as soon as want_buffer() is called and
+ * possibly before it returns, qib_ib_piobufavail()
+ * could be called. Therefore, put QP on the I/O wait list before
+ * enabling the PIO avail interrupt.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->iowait)) {
+ dev->n_piowait++;
+ qp->s_flags |= QIB_S_WAIT_PIO;
+ list_add_tail(&qp->iowait, &dev->piowait);
+ dd = dd_from_dev(dev);
+ dd->f_wantpiobuf_intr(dd, 1);
+ }
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~QIB_S_BUSY;
+ ret = -EBUSY;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
+ u32 *hdr = (u32 *) ibhdr;
+ u32 __iomem *piobuf_orig;
+ u32 __iomem *piobuf;
+ u64 pbc;
+ unsigned long flags;
+ unsigned flush_wc;
+ u32 control;
+ u32 pbufn;
+
+ control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
+ be16_to_cpu(ibhdr->lrh[0]) >> 12);
+ pbc = ((u64) control << 32) | plen;
+ piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
+ if (unlikely(piobuf == NULL))
+ return no_bufs_available(qp);
+
+ /*
+ * Write the pbc.
+ * We have to flush after the PBC for correctness on some cpus
+ * or WC buffer can be written out of order.
+ */
+ writeq(pbc, piobuf);
+ piobuf_orig = piobuf;
+ piobuf += 2;
+
+ flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
+ if (len == 0) {
+ /*
+ * If there is just the header portion, must flush before
+ * writing last word of header for correctness, and after
+ * the last header word (trigger word).
+ */
+ if (flush_wc) {
+ qib_flush_wc();
+ qib_pio_copy(piobuf, hdr, hdrwords - 1);
+ qib_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+ qib_flush_wc();
+ } else
+ qib_pio_copy(piobuf, hdr, hdrwords);
+ goto done;
+ }
+
+ if (flush_wc)
+ qib_flush_wc();
+ qib_pio_copy(piobuf, hdr, hdrwords);
+ piobuf += hdrwords;
+
+ /* The common case is aligned and contained in one segment. */
+ if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+ !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+ u32 *addr = (u32 *) ss->sge.vaddr;
+
+ /* Update address before sending packet. */
+ update_sge(ss, len);
+ if (flush_wc) {
+ qib_pio_copy(piobuf, addr, dwords - 1);
+ /* must flush early everything before trigger word */
+ qib_flush_wc();
+ __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+ /* be sure trigger word is written */
+ qib_flush_wc();
+ } else
+ qib_pio_copy(piobuf, addr, dwords);
+ goto done;
+ }
+ copy_io(piobuf, ss, len, flush_wc);
+done:
+ if (dd->flags & QIB_USE_SPCL_TRIG) {
+ u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
+ qib_flush_wc();
+ __raw_writel(0xaebecede, piobuf_orig + spcl_off);
+ }
+ qib_sendbuf_done(dd, pbufn);
+ if (qp->s_rdma_mr) {
+ qib_put_mr(qp->s_rdma_mr);
+ qp->s_rdma_mr = NULL;
+ }
+ if (qp->s_wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ } else if (qp->ibqp.qp_type == IB_QPT_RC) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qib_rc_send_complete(qp, ibhdr);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ return 0;
+}
+
+/**
+ * qib_verbs_send - send a packet
+ * @qp: the QP to send on
+ * @hdr: the packet header
+ * @hdrwords: the number of 32-bit words in the header
+ * @ss: the SGE to send
+ * @len: the length of the packet in bytes
+ *
+ * Return zero if packet is sent or queued OK.
+ * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
+ */
+int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len)
+{
+ struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ u32 plen;
+ int ret;
+ u32 dwords = (len + 3) >> 2;
+
+ /*
+ * Calculate the send buffer trigger address.
+ * The +1 counts for the pbc control dword following the pbc length.
+ */
+ plen = hdrwords + dwords + 1;
+
+ /*
+ * VL15 packets (IB_QPT_SMI) will always use PIO, so we
+ * can defer SDMA restart until link goes ACTIVE without
+ * worrying about just how we got there.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ !(dd->flags & QIB_HAS_SEND_DMA))
+ ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+ else
+ ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+
+ return ret;
+}
+
+int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait)
+{
+ int ret;
+ struct qib_devdata *dd = ppd->dd;
+
+ if (!(dd->flags & QIB_PRESENT)) {
+ /* no hardware, freeze, etc. */
+ ret = -EINVAL;
+ goto bail;
+ }
+ *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
+ *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
+ *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
+ *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
+ *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_get_counters - get various chip counters
+ * @dd: the qlogic_ib device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int qib_get_counters(struct qib_pportdata *ppd,
+ struct qib_verbs_counters *cntrs)
+{
+ int ret;
+
+ if (!(ppd->dd->flags & QIB_PRESENT)) {
+ /* no hardware, freeze, etc. */
+ ret = -EINVAL;
+ goto bail;
+ }
+ cntrs->symbol_error_counter =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
+ cntrs->link_error_recovery_counter =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
+ /*
+ * The link downed counter counts when the other side downs the
+ * connection. We add in the number of times we downed the link
+ * due to local link integrity errors to compensate.
+ */
+ cntrs->link_downed_counter =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
+ cntrs->port_rcv_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
+ cntrs->port_rcv_errors +=
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
+ cntrs->port_rcv_errors +=
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
+ cntrs->port_rcv_remphys_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
+ cntrs->port_xmit_discards =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
+ cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_WORDSEND);
+ cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_WORDRCV);
+ cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_PKTSEND);
+ cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_PKTRCV);
+ cntrs->local_link_integrity_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
+ cntrs->excessive_buffer_overrun_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
+ cntrs->vl15_dropped =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_ib_piobufavail - callback when a PIO buffer is available
+ * @dd: the device pointer
+ *
+ * This is called from qib_intr() at interrupt level when a PIO buffer is
+ * available after qib_verbs_send() returned an error that no buffers were
+ * available. Disable the interrupt if there are no more QPs waiting.
+ */
+void qib_ib_piobufavail(struct qib_devdata *dd)
+{
+ struct qib_ibdev *dev = &dd->verbs_dev;
+ struct list_head *list;
+ struct qib_qp *qps[5];
+ struct qib_qp *qp;
+ unsigned long flags;
+ unsigned i, n;
+
+ list = &dev->piowait;
+ n = 0;
+
+ /*
+ * Note: checking that the piowait list is empty and clearing
+ * the buffer available interrupt needs to be atomic or we
+ * could end up with QPs on the wait list with the interrupt
+ * disabled.
+ */
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ while (!list_empty(list)) {
+ if (n == ARRAY_SIZE(qps))
+ goto full;
+ qp = list_entry(list->next, struct qib_qp, iowait);
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ qps[n++] = qp;
+ }
+ dd->f_wantpiobuf_intr(dd, 0);
+full:
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ for (i = 0; i < n; i++) {
+ qp = qps[i];
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_PIO) {
+ qp->s_flags &= ~QIB_S_WAIT_PIO;
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify qib_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+static int qib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ struct qib_ibdev *dev = to_idev(ibdev);
+
+ memset(props, 0, sizeof(*props));
+
+ props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+ IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+ props->page_size_cap = PAGE_SIZE;
+ props->vendor_id =
+ QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
+ props->vendor_part_id = dd->deviceid;
+ props->hw_ver = dd->minrev;
+ props->sys_image_guid = ib_qib_sys_image_guid;
+ props->max_mr_size = ~0ULL;
+ props->max_qp = ib_qib_max_qps;
+ props->max_qp_wr = ib_qib_max_qp_wrs;
+ props->max_sge = ib_qib_max_sges;
+ props->max_cq = ib_qib_max_cqs;
+ props->max_ah = ib_qib_max_ahs;
+ props->max_cqe = ib_qib_max_cqes;
+ props->max_mr = dev->lk_table.max;
+ props->max_fmr = dev->lk_table.max;
+ props->max_map_per_fmr = 32767;
+ props->max_pd = ib_qib_max_pds;
+ props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
+ props->max_qp_init_rd_atom = 255;
+ /* props->max_res_rd_atom */
+ props->max_srq = ib_qib_max_srqs;
+ props->max_srq_wr = ib_qib_max_srq_wrs;
+ props->max_srq_sge = ib_qib_max_srq_sges;
+ /* props->local_ca_ack_delay */
+ props->atomic_cap = IB_ATOMIC_GLOB;
+ props->max_pkeys = qib_get_npkeys(dd);
+ props->max_mcast_grp = ib_qib_max_mcast_grps;
+ props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+
+ return 0;
+}
+
+static int qib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ enum ib_mtu mtu;
+ u16 lid = ppd->lid;
+
+ memset(props, 0, sizeof(*props));
+ props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
+ props->lmc = ppd->lmc;
+ props->sm_lid = ibp->sm_lid;
+ props->sm_sl = ibp->sm_sl;
+ props->state = dd->f_iblink_state(ppd->lastibcstat);
+ props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
+ props->port_cap_flags = ibp->port_cap_flags;
+ props->gid_tbl_len = QIB_GUIDS_PER_PORT;
+ props->max_msg_sz = 0x80000000;
+ props->pkey_tbl_len = qib_get_npkeys(dd);
+ props->bad_pkey_cntr = ibp->pkey_violations;
+ props->qkey_viol_cntr = ibp->qkey_violations;
+ props->active_width = ppd->link_width_active;
+ /* See rate_show() */
+ props->active_speed = ppd->link_speed_active;
+ props->max_vl_num = qib_num_vls(ppd->vls_supported);
+ props->init_type_reply = 0;
+
+ props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
+ switch (ppd->ibmtu) {
+ case 4096:
+ mtu = IB_MTU_4096;
+ break;
+ case 2048:
+ mtu = IB_MTU_2048;
+ break;
+ case 1024:
+ mtu = IB_MTU_1024;
+ break;
+ case 512:
+ mtu = IB_MTU_512;
+ break;
+ case 256:
+ mtu = IB_MTU_256;
+ break;
+ default:
+ mtu = IB_MTU_2048;
+ }
+ props->active_mtu = mtu;
+ props->subnet_timeout = ibp->subnet_timeout;
+
+ return 0;
+}
+
+static int qib_modify_device(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ struct qib_devdata *dd = dd_from_ibdev(device);
+ unsigned i;
+ int ret;
+
+ if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
+ IB_DEVICE_MODIFY_NODE_DESC)) {
+ ret = -EOPNOTSUPP;
+ goto bail;
+ }
+
+ if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ memcpy(device->node_desc, device_modify->node_desc, 64);
+ for (i = 0; i < dd->num_pports; i++) {
+ struct qib_ibport *ibp = &dd->pport[i].ibport_data;
+
+ qib_node_desc_chg(ibp);
+ }
+ }
+
+ if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
+ ib_qib_sys_image_guid =
+ cpu_to_be64(device_modify->sys_image_guid);
+ for (i = 0; i < dd->num_pports; i++) {
+ struct qib_ibport *ibp = &dd->pport[i].ibport_data;
+
+ qib_sys_guid_chg(ibp);
+ }
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static int qib_modify_port(struct ib_device *ibdev, u8 port,
+ int port_modify_mask, struct ib_port_modify *props)
+{
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ ibp->port_cap_flags |= props->set_port_cap_mask;
+ ibp->port_cap_flags &= ~props->clr_port_cap_mask;
+ if (props->set_port_cap_mask || props->clr_port_cap_mask)
+ qib_cap_mask_chg(ibp);
+ if (port_modify_mask & IB_PORT_SHUTDOWN)
+ qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
+ if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
+ ibp->qkey_violations = 0;
+ return 0;
+}
+
+static int qib_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ int ret = 0;
+
+ if (!port || port > dd->num_pports)
+ ret = -EINVAL;
+ else {
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ gid->global.subnet_prefix = ibp->gid_prefix;
+ if (index == 0)
+ gid->global.interface_id = ppd->guid;
+ else if (index < QIB_GUIDS_PER_PORT)
+ gid->global.interface_id = ibp->guids[index - 1];
+ else
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct qib_ibdev *dev = to_idev(ibdev);
+ struct qib_pd *pd;
+ struct ib_pd *ret;
+
+ /*
+ * This is actually totally arbitrary. Some correctness tests
+ * assume there's a maximum number of PDs that can be allocated.
+ * We don't actually have this limit, but we fail the test if
+ * we allow allocations of more than we report for this value.
+ */
+
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ spin_lock(&dev->n_pds_lock);
+ if (dev->n_pds_allocated == ib_qib_max_pds) {
+ spin_unlock(&dev->n_pds_lock);
+ kfree(pd);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_pds_allocated++;
+ spin_unlock(&dev->n_pds_lock);
+
+ /* ib_alloc_pd() will initialize pd->ibpd. */
+ pd->user = udata != NULL;
+
+ ret = &pd->ibpd;
+
+bail:
+ return ret;
+}
+
+static int qib_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct qib_pd *pd = to_ipd(ibpd);
+ struct qib_ibdev *dev = to_idev(ibpd->device);
+
+ spin_lock(&dev->n_pds_lock);
+ dev->n_pds_allocated--;
+ spin_unlock(&dev->n_pds_lock);
+
+ kfree(pd);
+
+ return 0;
+}
+
+int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+{
+ /* A multicast address requires a GRH (see ch. 8.4.1). */
+ if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
+ ah_attr->dlid != QIB_PERMISSIVE_LID &&
+ !(ah_attr->ah_flags & IB_AH_GRH))
+ goto bail;
+ if ((ah_attr->ah_flags & IB_AH_GRH) &&
+ ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
+ goto bail;
+ if (ah_attr->dlid == 0)
+ goto bail;
+ if (ah_attr->port_num < 1 ||
+ ah_attr->port_num > ibdev->phys_port_cnt)
+ goto bail;
+ if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
+ ib_rate_to_mult(ah_attr->static_rate) < 0)
+ goto bail;
+ if (ah_attr->sl > 15)
+ goto bail;
+ return 0;
+bail:
+ return -EINVAL;
+}
+
+/**
+ * qib_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ */
+static struct ib_ah *qib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ struct qib_ah *ah;
+ struct ib_ah *ret;
+ struct qib_ibdev *dev = to_idev(pd->device);
+ unsigned long flags;
+
+ if (qib_check_ah(pd->device, ah_attr)) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ ah = kmalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ if (dev->n_ahs_allocated == ib_qib_max_ahs) {
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+ kfree(ah);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_ahs_allocated++;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ /* ib_create_ah() will initialize ah->ibah. */
+ ah->attr = *ah_attr;
+ atomic_set(&ah->refcount, 0);
+
+ ret = &ah->ibah;
+
+bail:
+ return ret;
+}
+
+struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
+{
+ struct ib_ah_attr attr;
+ struct ib_ah *ah = ERR_PTR(-EINVAL);
+ struct qib_qp *qp0;
+
+ memset(&attr, 0, sizeof attr);
+ attr.dlid = dlid;
+ attr.port_num = ppd_from_ibp(ibp)->port;
+ rcu_read_lock();
+ qp0 = rcu_dereference(ibp->qp0);
+ if (qp0)
+ ah = ib_create_ah(qp0->ibqp.pd, &attr);
+ rcu_read_unlock();
+ return ah;
+}
+
+/**
+ * qib_destroy_ah - destroy an address handle
+ * @ibah: the AH to destroy
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_destroy_ah(struct ib_ah *ibah)
+{
+ struct qib_ibdev *dev = to_idev(ibah->device);
+ struct qib_ah *ah = to_iah(ibah);
+ unsigned long flags;
+
+ if (atomic_read(&ah->refcount) != 0)
+ return -EBUSY;
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ dev->n_ahs_allocated--;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ kfree(ah);
+
+ return 0;
+}
+
+static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct qib_ah *ah = to_iah(ibah);
+
+ if (qib_check_ah(ibah->device, ah_attr))
+ return -EINVAL;
+
+ ah->attr = *ah_attr;
+
+ return 0;
+}
+
+static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct qib_ah *ah = to_iah(ibah);
+
+ *ah_attr = ah->attr;
+
+ return 0;
+}
+
+/**
+ * qib_get_npkeys - return the size of the PKEY table for context 0
+ * @dd: the qlogic_ib device
+ */
+unsigned qib_get_npkeys(struct qib_devdata *dd)
+{
+ return ARRAY_SIZE(dd->rcd[0]->pkeys);
+}
+
+/*
+ * Return the indexed PKEY from the port PKEY table.
+ * No need to validate rcd[ctxt]; the port is setup if we are here.
+ */
+unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
+{
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = ppd->dd;
+ unsigned ctxt = ppd->hw_pidx;
+ unsigned ret;
+
+ /* dd->rcd null if mini_init or some init failures */
+ if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
+ ret = 0;
+ else
+ ret = dd->rcd[ctxt]->pkeys[index];
+
+ return ret;
+}
+
+static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ int ret;
+
+ if (index >= qib_get_npkeys(dd)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ *pkey = qib_get_pkey(to_iport(ibdev, port), index);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_alloc_ucontext - allocate a ucontest
+ * @ibdev: the infiniband device
+ * @udata: not used by the QLogic_IB driver
+ */
+
+static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct qib_ucontext *context;
+ struct ib_ucontext *ret;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ ret = &context->ibucontext;
+
+bail:
+ return ret;
+}
+
+static int qib_dealloc_ucontext(struct ib_ucontext *context)
+{
+ kfree(to_iucontext(context));
+ return 0;
+}
+
+static void init_ibport(struct qib_pportdata *ppd)
+{
+ struct qib_verbs_counters cntrs;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+
+ spin_lock_init(&ibp->lock);
+ /* Set the prefix to the default value (see ch. 4.1.1) */
+ ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
+ ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
+ ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
+ IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
+ IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
+ IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
+ IB_PORT_OTHER_LOCAL_CHANGES_SUP;
+ if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
+ ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+ ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+ ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+ ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+ ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+ ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+
+ /* Snapshot current HW counters to "clear" them. */
+ qib_get_counters(ppd, &cntrs);
+ ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
+ ibp->z_link_error_recovery_counter =
+ cntrs.link_error_recovery_counter;
+ ibp->z_link_downed_counter = cntrs.link_downed_counter;
+ ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
+ ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
+ ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
+ ibp->z_port_xmit_data = cntrs.port_xmit_data;
+ ibp->z_port_rcv_data = cntrs.port_rcv_data;
+ ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
+ ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
+ ibp->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+ ibp->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+ ibp->z_vl15_dropped = cntrs.vl15_dropped;
+ RCU_INIT_POINTER(ibp->qp0, NULL);
+ RCU_INIT_POINTER(ibp->qp1, NULL);
+}
+
+/**
+ * qib_register_ib_device - register our device with the infiniband core
+ * @dd: the device data structure
+ * Return the allocated qib_ibdev pointer or NULL on error.
+ */
+int qib_register_ib_device(struct qib_devdata *dd)
+{
+ struct qib_ibdev *dev = &dd->verbs_dev;
+ struct ib_device *ibdev = &dev->ibdev;
+ struct qib_pportdata *ppd = dd->pport;
+ unsigned i, lk_tab_size;
+ int ret;
+
+ dev->qp_table_size = ib_qib_qp_table_size;
+ get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
+ dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table,
+ GFP_KERNEL);
+ if (!dev->qp_table) {
+ ret = -ENOMEM;
+ goto err_qpt;
+ }
+ for (i = 0; i < dev->qp_table_size; i++)
+ RCU_INIT_POINTER(dev->qp_table[i], NULL);
+
+ for (i = 0; i < dd->num_pports; i++)
+ init_ibport(ppd + i);
+
+ /* Only need to initialize non-zero fields. */
+ spin_lock_init(&dev->qpt_lock);
+ spin_lock_init(&dev->n_pds_lock);
+ spin_lock_init(&dev->n_ahs_lock);
+ spin_lock_init(&dev->n_cqs_lock);
+ spin_lock_init(&dev->n_qps_lock);
+ spin_lock_init(&dev->n_srqs_lock);
+ spin_lock_init(&dev->n_mcast_grps_lock);
+ init_timer(&dev->mem_timer);
+ dev->mem_timer.function = mem_timer;
+ dev->mem_timer.data = (unsigned long) dev;
+
+ qib_init_qpn_table(dd, &dev->qpn_table);
+
+ /*
+ * The top ib_qib_lkey_table_size bits are used to index the
+ * table. The lower 8 bits can be owned by the user (copied from
+ * the LKEY). The remaining bits act as a generation number or tag.
+ */
+ spin_lock_init(&dev->lk_table.lock);
+ dev->lk_table.max = 1 << ib_qib_lkey_table_size;
+ lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+ dev->lk_table.table = (struct qib_mregion __rcu **)
+ __get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
+ if (dev->lk_table.table == NULL) {
+ ret = -ENOMEM;
+ goto err_lk;
+ }
+ RCU_INIT_POINTER(dev->dma_mr, NULL);
+ for (i = 0; i < dev->lk_table.max; i++)
+ RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
+ INIT_LIST_HEAD(&dev->pending_mmaps);
+ spin_lock_init(&dev->pending_lock);
+ dev->mmap_offset = PAGE_SIZE;
+ spin_lock_init(&dev->mmap_offset_lock);
+ INIT_LIST_HEAD(&dev->piowait);
+ INIT_LIST_HEAD(&dev->dmawait);
+ INIT_LIST_HEAD(&dev->txwait);
+ INIT_LIST_HEAD(&dev->memwait);
+ INIT_LIST_HEAD(&dev->txreq_free);
+
+ if (ppd->sdma_descq_cnt) {
+ dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
+ ppd->sdma_descq_cnt *
+ sizeof(struct qib_pio_header),
+ &dev->pio_hdrs_phys,
+ GFP_KERNEL);
+ if (!dev->pio_hdrs) {
+ ret = -ENOMEM;
+ goto err_hdrs;
+ }
+ }
+
+ for (i = 0; i < ppd->sdma_descq_cnt; i++) {
+ struct qib_verbs_txreq *tx;
+
+ tx = kzalloc(sizeof *tx, GFP_KERNEL);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err_tx;
+ }
+ tx->hdr_inx = i;
+ list_add(&tx->txreq.list, &dev->txreq_free);
+ }
+
+ /*
+ * The system image GUID is supposed to be the same for all
+ * IB HCAs in a single system but since there can be other
+ * device types in the system, we can't be sure this is unique.
+ */
+ if (!ib_qib_sys_image_guid)
+ ib_qib_sys_image_guid = ppd->guid;
+
+ strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
+ ibdev->owner = THIS_MODULE;
+ ibdev->node_guid = ppd->guid;
+ ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
+ ibdev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ibdev->phys_port_cnt = dd->num_pports;
+ ibdev->num_comp_vectors = 1;
+ ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->query_device = qib_query_device;
+ ibdev->modify_device = qib_modify_device;
+ ibdev->query_port = qib_query_port;
+ ibdev->modify_port = qib_modify_port;
+ ibdev->query_pkey = qib_query_pkey;
+ ibdev->query_gid = qib_query_gid;
+ ibdev->alloc_ucontext = qib_alloc_ucontext;
+ ibdev->dealloc_ucontext = qib_dealloc_ucontext;
+ ibdev->alloc_pd = qib_alloc_pd;
+ ibdev->dealloc_pd = qib_dealloc_pd;
+ ibdev->create_ah = qib_create_ah;
+ ibdev->destroy_ah = qib_destroy_ah;
+ ibdev->modify_ah = qib_modify_ah;
+ ibdev->query_ah = qib_query_ah;
+ ibdev->create_srq = qib_create_srq;
+ ibdev->modify_srq = qib_modify_srq;
+ ibdev->query_srq = qib_query_srq;
+ ibdev->destroy_srq = qib_destroy_srq;
+ ibdev->create_qp = qib_create_qp;
+ ibdev->modify_qp = qib_modify_qp;
+ ibdev->query_qp = qib_query_qp;
+ ibdev->destroy_qp = qib_destroy_qp;
+ ibdev->post_send = qib_post_send;
+ ibdev->post_recv = qib_post_receive;
+ ibdev->post_srq_recv = qib_post_srq_receive;
+ ibdev->create_cq = qib_create_cq;
+ ibdev->destroy_cq = qib_destroy_cq;
+ ibdev->resize_cq = qib_resize_cq;
+ ibdev->poll_cq = qib_poll_cq;
+ ibdev->req_notify_cq = qib_req_notify_cq;
+ ibdev->get_dma_mr = qib_get_dma_mr;
+ ibdev->reg_phys_mr = qib_reg_phys_mr;
+ ibdev->reg_user_mr = qib_reg_user_mr;
+ ibdev->dereg_mr = qib_dereg_mr;
+ ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr;
+ ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
+ ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
+ ibdev->alloc_fmr = qib_alloc_fmr;
+ ibdev->map_phys_fmr = qib_map_phys_fmr;
+ ibdev->unmap_fmr = qib_unmap_fmr;
+ ibdev->dealloc_fmr = qib_dealloc_fmr;
+ ibdev->attach_mcast = qib_multicast_attach;
+ ibdev->detach_mcast = qib_multicast_detach;
+ ibdev->process_mad = qib_process_mad;
+ ibdev->mmap = qib_mmap;
+ ibdev->dma_ops = &qib_dma_mapping_ops;
+
+ snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
+ "Intel Infiniband HCA %s", init_utsname()->nodename);
+
+ ret = ib_register_device(ibdev, qib_create_port_files);
+ if (ret)
+ goto err_reg;
+
+ ret = qib_create_agents(dev);
+ if (ret)
+ goto err_agents;
+
+ ret = qib_verbs_register_sysfs(dd);
+ if (ret)
+ goto err_class;
+
+ goto bail;
+
+err_class:
+ qib_free_agents(dev);
+err_agents:
+ ib_unregister_device(ibdev);
+err_reg:
+err_tx:
+ while (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+ struct qib_verbs_txreq *tx;
+
+ list_del(l);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ kfree(tx);
+ }
+ if (ppd->sdma_descq_cnt)
+ dma_free_coherent(&dd->pcidev->dev,
+ ppd->sdma_descq_cnt *
+ sizeof(struct qib_pio_header),
+ dev->pio_hdrs, dev->pio_hdrs_phys);
+err_hdrs:
+ free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size));
+err_lk:
+ kfree(dev->qp_table);
+err_qpt:
+ qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
+bail:
+ return ret;
+}
+
+void qib_unregister_ib_device(struct qib_devdata *dd)
+{
+ struct qib_ibdev *dev = &dd->verbs_dev;
+ struct ib_device *ibdev = &dev->ibdev;
+ u32 qps_inuse;
+ unsigned lk_tab_size;
+
+ qib_verbs_unregister_sysfs(dd);
+
+ qib_free_agents(dev);
+
+ ib_unregister_device(ibdev);
+
+ if (!list_empty(&dev->piowait))
+ qib_dev_err(dd, "piowait list not empty!\n");
+ if (!list_empty(&dev->dmawait))
+ qib_dev_err(dd, "dmawait list not empty!\n");
+ if (!list_empty(&dev->txwait))
+ qib_dev_err(dd, "txwait list not empty!\n");
+ if (!list_empty(&dev->memwait))
+ qib_dev_err(dd, "memwait list not empty!\n");
+ if (dev->dma_mr)
+ qib_dev_err(dd, "DMA MR not NULL!\n");
+
+ qps_inuse = qib_free_all_qps(dd);
+ if (qps_inuse)
+ qib_dev_err(dd, "QP memory leak! %u still in use\n",
+ qps_inuse);
+
+ del_timer_sync(&dev->mem_timer);
+ qib_free_qpn_table(&dev->qpn_table);
+ while (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+ struct qib_verbs_txreq *tx;
+
+ list_del(l);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ kfree(tx);
+ }
+ if (dd->pport->sdma_descq_cnt)
+ dma_free_coherent(&dd->pcidev->dev,
+ dd->pport->sdma_descq_cnt *
+ sizeof(struct qib_pio_header),
+ dev->pio_hdrs, dev->pio_hdrs_phys);
+ lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+ free_pages((unsigned long) dev->lk_table.table,
+ get_order(lk_tab_size));
+ kfree(dev->qp_table);
+}
+
+/*
+ * This must be called with s_lock held.
+ */
+void qib_schedule_send(struct qib_qp *qp)
+{
+ if (qib_send_ok(qp)) {
+ struct qib_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ queue_work(ppd->qib_wq, &qp->s_work);
+ }
+}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
new file mode 100644
index 00000000000..bfc8948fdd3
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -0,0 +1,1173 @@
+/*
+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef QIB_VERBS_H
+#define QIB_VERBS_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/completion.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
+
+struct qib_ctxtdata;
+struct qib_pportdata;
+struct qib_devdata;
+struct qib_verbs_txreq;
+
+#define QIB_MAX_RDMA_ATOMIC 16
+#define QIB_GUIDS_PER_PORT 5
+
+#define QPN_MAX (1 << 24)
+#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define QIB_UVERBS_ABI_VERSION 2
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
+
+#define IB_SEQ_NAK (3 << 29)
+
+/* AETH NAK opcode values */
+#define IB_RNR_NAK 0x20
+#define IB_NAK_PSN_ERROR 0x60
+#define IB_NAK_INVALID_REQUEST 0x61
+#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
+#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
+#define IB_NAK_INVALID_RD_REQUEST 0x64
+
+/* Flags for checking QP state (see ib_qib_state_ops[]) */
+#define QIB_POST_SEND_OK 0x01
+#define QIB_POST_RECV_OK 0x02
+#define QIB_PROCESS_RECV_OK 0x04
+#define QIB_PROCESS_SEND_OK 0x08
+#define QIB_PROCESS_NEXT_SEND_OK 0x10
+#define QIB_FLUSH_SEND 0x20
+#define QIB_FLUSH_RECV 0x40
+#define QIB_PROCESS_OR_FLUSH_SEND \
+ (QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
+
+/* IB Performance Manager status values */
+#define IB_PMA_SAMPLE_STATUS_DONE 0x00
+#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
+#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
+
+/* Mandatory IB performance counter select values. */
+#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003)
+#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004)
+#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005)
+
+#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
+
+#define IB_BTH_REQ_ACK (1 << 31)
+#define IB_BTH_SOLICITED (1 << 23)
+#define IB_BTH_MIG_REQ (1 << 22)
+
+/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
+#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
+
+#define IB_GRH_VERSION 6
+#define IB_GRH_VERSION_MASK 0xF
+#define IB_GRH_VERSION_SHIFT 28
+#define IB_GRH_TCLASS_MASK 0xFF
+#define IB_GRH_TCLASS_SHIFT 20
+#define IB_GRH_FLOW_MASK 0xFFFFF
+#define IB_GRH_FLOW_SHIFT 0
+#define IB_GRH_NEXT_HDR 0x1B
+
+#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+
+/* Values for set/get portinfo VLCap OperationalVLs */
+#define IB_VL_VL0 1
+#define IB_VL_VL0_1 2
+#define IB_VL_VL0_3 3
+#define IB_VL_VL0_7 4
+#define IB_VL_VL0_14 5
+
+static inline int qib_num_vls(int vls)
+{
+ switch (vls) {
+ default:
+ case IB_VL_VL0:
+ return 1;
+ case IB_VL_VL0_1:
+ return 2;
+ case IB_VL_VL0_3:
+ return 4;
+ case IB_VL_VL0_7:
+ return 8;
+ case IB_VL_VL0_14:
+ return 15;
+ }
+}
+
+struct ib_reth {
+ __be64 vaddr;
+ __be32 rkey;
+ __be32 length;
+} __packed;
+
+struct ib_atomic_eth {
+ __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
+ __be32 rkey;
+ __be64 swap_data;
+ __be64 compare_data;
+} __packed;
+
+struct qib_other_headers {
+ __be32 bth[3];
+ union {
+ struct {
+ __be32 deth[2];
+ __be32 imm_data;
+ } ud;
+ struct {
+ struct ib_reth reth;
+ __be32 imm_data;
+ } rc;
+ struct {
+ __be32 aeth;
+ __be32 atomic_ack_eth[2];
+ } at;
+ __be32 imm_data;
+ __be32 aeth;
+ struct ib_atomic_eth atomic_eth;
+ } u;
+} __packed;
+
+/*
+ * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
+ * long (72 w/ imm_data). Only the first 56 bytes of the IB header
+ * will be in the eager header buffer. The remaining 12 or 16 bytes
+ * are in the data buffer.
+ */
+struct qib_ib_header {
+ __be16 lrh[4];
+ union {
+ struct {
+ struct ib_grh grh;
+ struct qib_other_headers oth;
+ } l;
+ struct qib_other_headers oth;
+ } u;
+} __packed;
+
+struct qib_pio_header {
+ __le32 pbc[2];
+ struct qib_ib_header hdr;
+} __packed;
+
+/*
+ * There is one struct qib_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct qib_mcast_qp.
+ */
+struct qib_mcast_qp {
+ struct list_head list;
+ struct qib_qp *qp;
+};
+
+struct qib_mcast {
+ struct rb_node rb_node;
+ union ib_gid mgid;
+ struct list_head qp_list;
+ wait_queue_head_t wait;
+ atomic_t refcount;
+ int n_attached;
+};
+
+/* Protection domain */
+struct qib_pd {
+ struct ib_pd ibpd;
+ int user; /* non-zero if created from user space */
+};
+
+/* Address Handle */
+struct qib_ah {
+ struct ib_ah ibah;
+ struct ib_ah_attr attr;
+ atomic_t refcount;
+};
+
+/*
+ * This structure is used by qib_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct qib_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ void *obj;
+ __u64 offset;
+ struct kref ref;
+ unsigned size;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct qib_cq_wc {
+ u32 head; /* index of next entry to fill */
+ u32 tail; /* index of next ib_poll_cq() entry */
+ union {
+ /* these are actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[0];
+ struct ib_wc kqueue[0];
+ };
+};
+
+/*
+ * The completion queue structure.
+ */
+struct qib_cq {
+ struct ib_cq ibcq;
+ struct kthread_work comptask;
+ struct qib_devdata *dd;
+ spinlock_t lock; /* protect changes in this struct */
+ u8 notify;
+ u8 triggered;
+ struct qib_cq_wc *queue;
+ struct qib_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct qib_seg {
+ void *vaddr;
+ size_t length;
+};
+
+/* The number of qib_segs that fit in a page. */
+#define QIB_SEGSZ (PAGE_SIZE / sizeof(struct qib_seg))
+
+struct qib_segarray {
+ struct qib_seg segs[QIB_SEGSZ];
+};
+
+struct qib_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
+ u64 user_base; /* User's address for this region */
+ u64 iova; /* IB start address of this region */
+ size_t length;
+ u32 lkey;
+ u32 offset; /* offset (bytes) to start of region */
+ int access_flags;
+ u32 max_segs; /* number of qib_segs in all the arrays */
+ u32 mapsz; /* size of the map array */
+ u8 page_shift; /* 0 - non unform/non powerof2 sizes */
+ u8 lkey_published; /* in global table */
+ struct completion comp; /* complete when refcount goes to zero */
+ struct rcu_head list;
+ atomic_t refcount;
+ struct qib_segarray *map[0]; /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct qib_sge {
+ struct qib_mregion *mr;
+ void *vaddr; /* kernel virtual address of segment */
+ u32 sge_length; /* length of the SGE */
+ u32 length; /* remaining length of the segment */
+ u16 m; /* current index: mr->map[m] */
+ u16 n; /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct qib_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct qib_mregion mr; /* must be last */
+};
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct qib_swqe {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ u32 psn; /* first packet sequence number */
+ u32 lpsn; /* last packet sequence number */
+ u32 ssn; /* send sequence number */
+ u32 length; /* total length of data in sg_list */
+ struct qib_sge sg_list[0];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct qib_rwqe {
+ u64 wr_id;
+ u8 num_sge;
+ struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct qib_rwq {
+ u32 head; /* new work requests posted to the head */
+ u32 tail; /* receives pull requests from here. */
+ struct qib_rwqe wq[0];
+};
+
+struct qib_rq {
+ struct qib_rwq *wq;
+ u32 size; /* size of RWQE array */
+ u8 max_sge;
+ spinlock_t lock /* protect changes in this struct */
+ ____cacheline_aligned_in_smp;
+};
+
+struct qib_srq {
+ struct ib_srq ibsrq;
+ struct qib_rq rq;
+ struct qib_mmap_info *ip;
+ /* send signal when number of RWQEs < limit */
+ u32 limit;
+};
+
+struct qib_sge_state {
+ struct qib_sge *sg_list; /* next SGE to be used if any */
+ struct qib_sge sge; /* progress state for the current SGE */
+ u32 total_len;
+ u8 num_sge;
+};
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct qib_ack_entry {
+ u8 opcode;
+ u8 sent;
+ u32 psn;
+ u32 lpsn;
+ union {
+ struct qib_sge rdma_sge;
+ u64 atomic_data;
+ };
+};
+
+/*
+ * Variables prefixed with s_ are for the requester (sender).
+ * Variables prefixed with r_ are for the responder (receiver).
+ * Variables prefixed with ack_ are for responder replies.
+ *
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct qib_qp {
+ struct ib_qp ibqp;
+ /* read mostly fields above and below */
+ struct ib_ah_attr remote_ah_attr;
+ struct ib_ah_attr alt_ah_attr;
+ struct qib_qp __rcu *next; /* link list for QPN hash table */
+ struct qib_swqe *s_wq; /* send work queue */
+ struct qib_mmap_info *ip;
+ struct qib_ib_header *s_hdr; /* next packet header to send */
+ unsigned long timeout_jiffies; /* computed from timeout */
+
+ enum ib_mtu path_mtu;
+ u32 remote_qpn;
+ u32 pmtu; /* decoded from path_mtu */
+ u32 qkey; /* QKEY for this QP (for UD or RD) */
+ u32 s_size; /* send work queue size */
+ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
+
+ u8 state; /* QP state */
+ u8 qp_access_flags;
+ u8 alt_timeout; /* Alternate path timeout for this QP */
+ u8 timeout; /* Timeout for this QP */
+ u8 s_srate;
+ u8 s_mig_state;
+ u8 port_num;
+ u8 s_pkey_index; /* PKEY index to use */
+ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_retry_cnt; /* number of times to retry */
+ u8 s_rnr_retry_cnt;
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
+ u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 s_draining;
+
+ /* start of read/write fields */
+
+ atomic_t refcount ____cacheline_aligned_in_smp;
+ wait_queue_head_t wait;
+
+
+ struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
+ ____cacheline_aligned_in_smp;
+ struct qib_sge_state s_rdma_read_sge;
+
+ spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
+ unsigned long r_aflags;
+ u64 r_wr_id; /* ID for current receive WQE */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
+ u32 r_len; /* total length of r_sge */
+ u32 r_rcv_len; /* receive data len processed */
+ u32 r_psn; /* expected rcv packet sequence number */
+ u32 r_msn; /* message sequence number */
+
+ u8 r_state; /* opcode of last packet received */
+ u8 r_flags;
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
+
+ struct list_head rspwait; /* link for waititing to respond */
+
+ struct qib_sge_state r_sge; /* current receive data */
+ struct qib_rq r_rq; /* receive work queue */
+
+ spinlock_t s_lock ____cacheline_aligned_in_smp;
+ struct qib_sge_state *s_cur_sge;
+ u32 s_flags;
+ struct qib_verbs_txreq *s_tx;
+ struct qib_swqe *s_wqe;
+ struct qib_sge_state s_sge; /* current send request data */
+ struct qib_mregion *s_rdma_mr;
+ atomic_t s_dma_busy;
+ u32 s_cur_size; /* size of send packet in bytes */
+ u32 s_len; /* total length of s_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
+ u32 s_next_psn; /* PSN for next request */
+ u32 s_last_psn; /* last response PSN processed */
+ u32 s_sending_psn; /* lowest PSN that is being sent */
+ u32 s_sending_hpsn; /* highest PSN that is being sent */
+ u32 s_psn; /* current packet sequence number */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
+ u32 s_head; /* new entries added here */
+ u32 s_tail; /* next entry to process */
+ u32 s_cur; /* current work queue entry */
+ u32 s_acked; /* last un-ACK'ed entry */
+ u32 s_last; /* last completed entry */
+ u32 s_ssn; /* SSN of tail entry */
+ u32 s_lsn; /* limit sequence number (credit) */
+ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
+ u16 s_rdma_ack_cnt;
+ u8 s_state; /* opcode of last packet sent */
+ u8 s_ack_state; /* opcode of packet to ACK */
+ u8 s_nak_state; /* non-zero if NAK is pending */
+ u8 r_nak_state; /* non-zero if NAK is pending */
+ u8 s_retry; /* requester retry counter */
+ u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+
+ struct qib_sge_state s_ack_rdma_sge;
+ struct timer_list s_timer;
+ struct list_head iowait; /* link for wait PIO buf */
+
+ struct work_struct s_work;
+
+ wait_queue_head_t wait_dma;
+
+ struct qib_sge r_sg_list[0] /* verified SGEs */
+ ____cacheline_aligned_in_smp;
+};
+
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define QIB_R_WRID_VALID 0
+#define QIB_R_REWIND_SGE 1
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define QIB_R_REUSE_SGE 0x01
+#define QIB_R_RDMAR_SEQ 0x02
+#define QIB_R_RSP_NAK 0x04
+#define QIB_R_RSP_SEND 0x08
+#define QIB_R_COMM_EST 0x10
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
+ * QIB_S_BUSY - send tasklet is processing the QP
+ * QIB_S_TIMER - the RC retry timer is active
+ * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
+ * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
+ * before processing the next SWQE
+ * QIB_S_WAIT_RNR - waiting for RNR timeout
+ * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA
+ * QIB_S_WAIT_PIO - waiting for a send buffer to be available
+ * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
+ * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
+ * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
+ * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
+ * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
+ * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
+ */
+#define QIB_S_SIGNAL_REQ_WR 0x0001
+#define QIB_S_BUSY 0x0002
+#define QIB_S_TIMER 0x0004
+#define QIB_S_RESP_PENDING 0x0008
+#define QIB_S_ACK_PENDING 0x0010
+#define QIB_S_WAIT_FENCE 0x0020
+#define QIB_S_WAIT_RDMAR 0x0040
+#define QIB_S_WAIT_RNR 0x0080
+#define QIB_S_WAIT_SSN_CREDIT 0x0100
+#define QIB_S_WAIT_DMA 0x0200
+#define QIB_S_WAIT_PIO 0x0400
+#define QIB_S_WAIT_TX 0x0800
+#define QIB_S_WAIT_DMA_DESC 0x1000
+#define QIB_S_WAIT_KMEM 0x2000
+#define QIB_S_WAIT_PSN 0x4000
+#define QIB_S_WAIT_ACK 0x8000
+#define QIB_S_SEND_ONE 0x10000
+#define QIB_S_UNLIMITED_CREDIT 0x20000
+
+/*
+ * Wait flags that would prevent any packet type from being sent.
+ */
+#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
+ QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
+
+/*
+ * Wait flags that would prevent send work requests from making progress.
+ */
+#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
+ QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
+ QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
+
+#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
+
+#define QIB_PSN_CREDIT 16
+
+/*
+ * Since struct qib_swqe is not a fixed size, we can't simply index into
+ * struct qib_qp.s_wq. This function does the array index computation.
+ */
+static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
+ unsigned n)
+{
+ return (struct qib_swqe *)((char *)qp->s_wq +
+ (sizeof(struct qib_swqe) +
+ qp->s_max_sge *
+ sizeof(struct qib_sge)) * n);
+}
+
+/*
+ * Since struct qib_rwqe is not a fixed size, we can't simply index into
+ * struct qib_rwq.wq. This function does the array index computation.
+ */
+static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
+{
+ return (struct qib_rwqe *)
+ ((char *) rq->wq->wq +
+ (sizeof(struct qib_rwqe) +
+ rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct qpn_map {
+ void *page;
+};
+
+struct qib_qpn_table {
+ spinlock_t lock; /* protect changes in this struct */
+ unsigned flags; /* flags for QP0/1 allocated for each port */
+ u32 last; /* last QP number allocated */
+ u32 nmaps; /* size of the map table */
+ u16 limit;
+ u16 mask;
+ /* bit map of free QP numbers other than 0/1 */
+ struct qpn_map map[QPNMAP_ENTRIES];
+};
+
+struct qib_lkey_table {
+ spinlock_t lock; /* protect changes in this struct */
+ u32 next; /* next unused index (speeds search) */
+ u32 gen; /* generation count */
+ u32 max; /* size of the table */
+ struct qib_mregion __rcu **table;
+};
+
+struct qib_opcode_stats {
+ u64 n_packets; /* number of packets */
+ u64 n_bytes; /* total number of bytes */
+};
+
+struct qib_opcode_stats_perctx {
+ struct qib_opcode_stats stats[128];
+};
+
+struct qib_pma_counters {
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+};
+
+struct qib_ibport {
+ struct qib_qp __rcu *qp0;
+ struct qib_qp __rcu *qp1;
+ struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
+ struct qib_ah *sm_ah;
+ struct qib_ah *smi_ah;
+ struct rb_root mcast_tree;
+ spinlock_t lock; /* protect changes in this struct */
+
+ /* non-zero when timer is set */
+ unsigned long mkey_lease_timeout;
+ unsigned long trap_timeout;
+ __be64 gid_prefix; /* in network order */
+ __be64 mkey;
+ __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
+ u64 tid; /* TID for traps */
+ struct qib_pma_counters __percpu *pmastats;
+ u64 z_unicast_xmit; /* starting count for PMA */
+ u64 z_unicast_rcv; /* starting count for PMA */
+ u64 z_multicast_xmit; /* starting count for PMA */
+ u64 z_multicast_rcv; /* starting count for PMA */
+ u64 z_symbol_error_counter; /* starting count for PMA */
+ u64 z_link_error_recovery_counter; /* starting count for PMA */
+ u64 z_link_downed_counter; /* starting count for PMA */
+ u64 z_port_rcv_errors; /* starting count for PMA */
+ u64 z_port_rcv_remphys_errors; /* starting count for PMA */
+ u64 z_port_xmit_discards; /* starting count for PMA */
+ u64 z_port_xmit_data; /* starting count for PMA */
+ u64 z_port_rcv_data; /* starting count for PMA */
+ u64 z_port_xmit_packets; /* starting count for PMA */
+ u64 z_port_rcv_packets; /* starting count for PMA */
+ u32 z_local_link_integrity_errors; /* starting count for PMA */
+ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
+ u32 z_vl15_dropped; /* starting count for PMA */
+ u32 n_rc_resends;
+ u32 n_rc_acks;
+ u32 n_rc_qacks;
+ u32 n_rc_delayed_comp;
+ u32 n_seq_naks;
+ u32 n_rdma_seq;
+ u32 n_rnr_naks;
+ u32 n_other_naks;
+ u32 n_loop_pkts;
+ u32 n_pkt_drops;
+ u32 n_vl15_dropped;
+ u32 n_rc_timeouts;
+ u32 n_dmawait;
+ u32 n_unaligned;
+ u32 n_rc_dupreq;
+ u32 n_rc_seqnak;
+ u32 port_cap_flags;
+ u32 pma_sample_start;
+ u32 pma_sample_interval;
+ __be16 pma_counter_select[5];
+ u16 pma_tag;
+ u16 pkey_violations;
+ u16 qkey_violations;
+ u16 mkey_violations;
+ u16 mkey_lease_period;
+ u16 sm_lid;
+ u16 repress_traps;
+ u8 sm_sl;
+ u8 mkeyprot;
+ u8 subnet_timeout;
+ u8 vl_high_limit;
+ u8 sl_to_vl[16];
+
+};
+
+
+struct qib_ibdev {
+ struct ib_device ibdev;
+ struct list_head pending_mmaps;
+ spinlock_t mmap_offset_lock; /* protect mmap_offset */
+ u32 mmap_offset;
+ struct qib_mregion __rcu *dma_mr;
+
+ /* QP numbers are shared by all IB ports */
+ struct qib_qpn_table qpn_table;
+ struct qib_lkey_table lk_table;
+ struct list_head piowait; /* list for wait PIO buf */
+ struct list_head dmawait; /* list for wait DMA */
+ struct list_head txwait; /* list for wait qib_verbs_txreq */
+ struct list_head memwait; /* list for wait kernel memory */
+ struct list_head txreq_free;
+ struct timer_list mem_timer;
+ struct qib_qp __rcu **qp_table;
+ struct qib_pio_header *pio_hdrs;
+ dma_addr_t pio_hdrs_phys;
+ /* list of QPs waiting for RNR timer */
+ spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
+ u32 qp_table_size; /* size of the hash table */
+ u32 qp_rnd; /* random bytes for hash */
+ spinlock_t qpt_lock;
+
+ u32 n_piowait;
+ u32 n_txwait;
+
+ u32 n_pds_allocated; /* number of PDs allocated for device */
+ spinlock_t n_pds_lock;
+ u32 n_ahs_allocated; /* number of AHs allocated for device */
+ spinlock_t n_ahs_lock;
+ u32 n_cqs_allocated; /* number of CQs allocated for device */
+ spinlock_t n_cqs_lock;
+ u32 n_qps_allocated; /* number of QPs allocated for device */
+ spinlock_t n_qps_lock;
+ u32 n_srqs_allocated; /* number of SRQs allocated for device */
+ spinlock_t n_srqs_lock;
+ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+ spinlock_t n_mcast_grps_lock;
+#ifdef CONFIG_DEBUG_FS
+ /* per HCA debugfs */
+ struct dentry *qib_ibdev_dbg;
+#endif
+};
+
+struct qib_verbs_counters {
+ u64 symbol_error_counter;
+ u64 link_error_recovery_counter;
+ u64 link_downed_counter;
+ u64 port_rcv_errors;
+ u64 port_rcv_remphys_errors;
+ u64 port_xmit_discards;
+ u64 port_xmit_data;
+ u64 port_rcv_data;
+ u64 port_xmit_packets;
+ u64 port_rcv_packets;
+ u32 local_link_integrity_errors;
+ u32 excessive_buffer_overrun_errors;
+ u32 vl15_dropped;
+};
+
+static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct qib_mr, ibmr);
+}
+
+static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct qib_pd, ibpd);
+}
+
+static inline struct qib_ah *to_iah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct qib_ah, ibah);
+}
+
+static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct qib_cq, ibcq);
+}
+
+static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct qib_srq, ibsrq);
+}
+
+static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct qib_qp, ibqp);
+}
+
+static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct qib_ibdev, ibdev);
+}
+
+/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int qib_send_ok(struct qib_qp *qp)
+{
+ return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
+ (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
+ !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+}
+
+/*
+ * This must be called with s_lock held.
+ */
+void qib_schedule_send(struct qib_qp *qp);
+
+static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
+{
+ u16 p1 = pkey1 & 0x7FFF;
+ u16 p2 = pkey2 & 0x7FFF;
+
+ /*
+ * Low 15 bits must be non-zero and match, and
+ * one of the two must be a full member.
+ */
+ return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0);
+}
+
+void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
+ u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
+void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_sys_guid_chg(struct qib_ibport *ibp);
+void qib_node_desc_chg(struct qib_ibport *ibp);
+int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+int qib_create_agents(struct qib_ibdev *dev);
+void qib_free_agents(struct qib_ibdev *dev);
+
+/*
+ * Compare the lower 24 bits of the two values.
+ * Returns an integer <, ==, or > than zero.
+ */
+static inline int qib_cmp24(u32 a, u32 b)
+{
+ return (((int) a) - ((int) b)) << 8;
+}
+
+struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
+
+int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait);
+
+int qib_get_counters(struct qib_pportdata *ppd,
+ struct qib_verbs_counters *cntrs);
+
+int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_mcast_tree_empty(struct qib_ibport *ibp);
+
+__be32 qib_compute_aeth(struct qib_qp *qp);
+
+struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
+
+struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int qib_destroy_qp(struct ib_qp *ibqp);
+
+int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr);
+
+unsigned qib_free_all_qps(struct qib_devdata *dd);
+
+void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
+
+void qib_free_qpn_table(struct qib_qpn_table *qpt);
+
+#ifdef CONFIG_DEBUG_FS
+
+struct qib_qp_iter;
+
+struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev);
+
+int qib_qp_iter_next(struct qib_qp_iter *iter);
+
+void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
+
+#endif
+
+void qib_get_credit(struct qib_qp *qp, u32 aeth);
+
+unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
+
+void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
+
+void qib_put_txreq(struct qib_verbs_txreq *tx);
+
+int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len);
+
+void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+ int release);
+
+void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+
+void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+
+struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
+
+void qib_rc_rnr_retry(unsigned long arg);
+
+void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+
+void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+
+void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
+
+void qib_free_lkey(struct qib_mregion *mr);
+
+int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
+ struct qib_sge *isge, struct ib_sge *sge, int acc);
+
+int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+ u32 len, u64 vaddr, u32 rkey, int acc);
+
+int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+
+int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata);
+
+int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+
+int qib_destroy_srq(struct ib_srq *ibsrq);
+
+int qib_cq_init(struct qib_devdata *dd);
+
+void qib_cq_exit(struct qib_devdata *dd);
+
+void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
+
+int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+
+struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int qib_destroy_cq(struct ib_cq *ibcq);
+
+int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+
+int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+
+struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
+
+struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start);
+
+struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+
+int qib_dereg_mr(struct ib_mr *ibmr);
+
+struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+
+struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
+ struct ib_device *ibdev, int page_list_len);
+
+void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
+
+int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr);
+
+struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+
+int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+
+int qib_unmap_fmr(struct list_head *fmr_list);
+
+int qib_dealloc_fmr(struct ib_fmr *ibfmr);
+
+static inline void qib_get_mr(struct qib_mregion *mr)
+{
+ atomic_inc(&mr->refcount);
+}
+
+void mr_rcu_callback(struct rcu_head *list);
+
+static inline void qib_put_mr(struct qib_mregion *mr)
+{
+ if (unlikely(atomic_dec_and_test(&mr->refcount)))
+ call_rcu(&mr->list, mr_rcu_callback);
+}
+
+static inline void qib_put_ss(struct qib_sge_state *ss)
+{
+ while (ss->num_sge) {
+ qib_put_mr(ss->sge.mr);
+ if (--ss->num_sge)
+ ss->sge = *ss->sg_list++;
+ }
+}
+
+
+void qib_release_mmap_info(struct kref *ref);
+
+struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+
+void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
+ u32 size, void *obj);
+
+int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
+
+void qib_migrate_qp(struct qib_qp *qp);
+
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, struct qib_qp *qp, u32 bth0);
+
+u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords);
+
+void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+ u32 bth0, u32 bth2);
+
+void qib_do_send(struct work_struct *work);
+
+void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+ enum ib_wc_status status);
+
+void qib_send_rc_ack(struct qib_qp *qp);
+
+int qib_make_rc_req(struct qib_qp *qp);
+
+int qib_make_uc_req(struct qib_qp *qp);
+
+int qib_make_ud_req(struct qib_qp *qp);
+
+int qib_register_ib_device(struct qib_devdata *);
+
+void qib_unregister_ib_device(struct qib_devdata *);
+
+void qib_ib_rcv(struct qib_ctxtdata *, void *, void *, u32);
+
+void qib_ib_piobufavail(struct qib_devdata *);
+
+unsigned qib_get_npkeys(struct qib_devdata *);
+
+unsigned qib_get_pkey(struct qib_ibport *, unsigned);
+
+extern const enum ib_wc_opcode ib_qib_wc_opcode[];
+
+/*
+ * Below HCA-independent IB PhysPortState values, returned
+ * by the f_ibphys_portstate() routine.
+ */
+#define IB_PHYSPORTSTATE_SLEEP 1
+#define IB_PHYSPORTSTATE_POLL 2
+#define IB_PHYSPORTSTATE_DISABLED 3
+#define IB_PHYSPORTSTATE_CFG_TRAIN 4
+#define IB_PHYSPORTSTATE_LINKUP 5
+#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
+#define IB_PHYSPORTSTATE_CFG_DEBOUNCE 8
+#define IB_PHYSPORTSTATE_CFG_IDLE 0xB
+#define IB_PHYSPORTSTATE_RECOVERY_RETRAIN 0xC
+#define IB_PHYSPORTSTATE_RECOVERY_WAITRMT 0xE
+#define IB_PHYSPORTSTATE_RECOVERY_IDLE 0xF
+#define IB_PHYSPORTSTATE_CFG_ENH 0x10
+#define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
+
+extern const int ib_qib_state_ops[];
+
+extern __be64 ib_qib_sys_image_guid; /* in network order */
+
+extern unsigned int ib_qib_lkey_table_size;
+
+extern unsigned int ib_qib_max_cqes;
+
+extern unsigned int ib_qib_max_cqs;
+
+extern unsigned int ib_qib_max_qp_wrs;
+
+extern unsigned int ib_qib_max_qps;
+
+extern unsigned int ib_qib_max_sges;
+
+extern unsigned int ib_qib_max_mcast_grps;
+
+extern unsigned int ib_qib_max_mcast_qp_attached;
+
+extern unsigned int ib_qib_max_srqs;
+
+extern unsigned int ib_qib_max_srq_sges;
+
+extern unsigned int ib_qib_max_srq_wrs;
+
+extern const u32 ib_qib_rnr_table[];
+
+extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
+
+#endif /* QIB_VERBS_H */
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
new file mode 100644
index 00000000000..dabb697b1c2
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/rculist.h>
+
+#include "qib.h"
+
+/**
+ * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
+ * @qp: the QP to link
+ */
+static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp)
+{
+ struct qib_mcast_qp *mqp;
+
+ mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
+ if (!mqp)
+ goto bail;
+
+ mqp->qp = qp;
+ atomic_inc(&qp->refcount);
+
+bail:
+ return mqp;
+}
+
+static void qib_mcast_qp_free(struct qib_mcast_qp *mqp)
+{
+ struct qib_qp *qp = mqp->qp;
+
+ /* Notify qib_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+
+ kfree(mqp);
+}
+
+/**
+ * qib_mcast_alloc - allocate the multicast GID structure
+ * @mgid: the multicast GID
+ *
+ * A list of QPs will be attached to this structure.
+ */
+static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid)
+{
+ struct qib_mcast *mcast;
+
+ mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
+ if (!mcast)
+ goto bail;
+
+ mcast->mgid = *mgid;
+ INIT_LIST_HEAD(&mcast->qp_list);
+ init_waitqueue_head(&mcast->wait);
+ atomic_set(&mcast->refcount, 0);
+ mcast->n_attached = 0;
+
+bail:
+ return mcast;
+}
+
+static void qib_mcast_free(struct qib_mcast *mcast)
+{
+ struct qib_mcast_qp *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
+ qib_mcast_qp_free(p);
+
+ kfree(mcast);
+}
+
+/**
+ * qib_mcast_find - search the global table for the given multicast GID
+ * @ibp: the IB port structure
+ * @mgid: the multicast GID to search for
+ *
+ * Returns NULL if not found.
+ *
+ * The caller is responsible for decrementing the reference count if found.
+ */
+struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid)
+{
+ struct rb_node *n;
+ unsigned long flags;
+ struct qib_mcast *mcast;
+
+ spin_lock_irqsave(&ibp->lock, flags);
+ n = ibp->mcast_tree.rb_node;
+ while (n) {
+ int ret;
+
+ mcast = rb_entry(n, struct qib_mcast, rb_node);
+
+ ret = memcmp(mgid->raw, mcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else {
+ atomic_inc(&mcast->refcount);
+ spin_unlock_irqrestore(&ibp->lock, flags);
+ goto bail;
+ }
+ }
+ spin_unlock_irqrestore(&ibp->lock, flags);
+
+ mcast = NULL;
+
+bail:
+ return mcast;
+}
+
+/**
+ * qib_mcast_add - insert mcast GID into table and attach QP struct
+ * @mcast: the mcast GID table
+ * @mqp: the QP to attach
+ *
+ * Return zero if both were added. Return EEXIST if the GID was already in
+ * the table but the QP was added. Return ESRCH if the QP was already
+ * attached and neither structure was added.
+ */
+static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp,
+ struct qib_mcast *mcast, struct qib_mcast_qp *mqp)
+{
+ struct rb_node **n = &ibp->mcast_tree.rb_node;
+ struct rb_node *pn = NULL;
+ int ret;
+
+ spin_lock_irq(&ibp->lock);
+
+ while (*n) {
+ struct qib_mcast *tmcast;
+ struct qib_mcast_qp *p;
+
+ pn = *n;
+ tmcast = rb_entry(pn, struct qib_mcast, rb_node);
+
+ ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0) {
+ n = &pn->rb_left;
+ continue;
+ }
+ if (ret > 0) {
+ n = &pn->rb_right;
+ continue;
+ }
+
+ /* Search the QP list to see if this is already there. */
+ list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
+ if (p->qp == mqp->qp) {
+ ret = ESRCH;
+ goto bail;
+ }
+ }
+ if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) {
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ tmcast->n_attached++;
+
+ list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
+ ret = EEXIST;
+ goto bail;
+ }
+
+ spin_lock(&dev->n_mcast_grps_lock);
+ if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) {
+ spin_unlock(&dev->n_mcast_grps_lock);
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ dev->n_mcast_grps_allocated++;
+ spin_unlock(&dev->n_mcast_grps_lock);
+
+ mcast->n_attached++;
+
+ list_add_tail_rcu(&mqp->list, &mcast->qp_list);
+
+ atomic_inc(&mcast->refcount);
+ rb_link_node(&mcast->rb_node, pn, n);
+ rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
+
+ ret = 0;
+
+bail:
+ spin_unlock_irq(&ibp->lock);
+
+ return ret;
+}
+
+int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ struct qib_ibdev *dev = to_idev(ibqp->device);
+ struct qib_ibport *ibp;
+ struct qib_mcast *mcast;
+ struct qib_mcast_qp *mqp;
+ int ret;
+
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /*
+ * Allocate data structures since its better to do this outside of
+ * spin locks and it will most likely be needed.
+ */
+ mcast = qib_mcast_alloc(gid);
+ if (mcast == NULL) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ mqp = qib_mcast_qp_alloc(qp);
+ if (mqp == NULL) {
+ qib_mcast_free(mcast);
+ ret = -ENOMEM;
+ goto bail;
+ }
+ ibp = to_iport(ibqp->device, qp->port_num);
+ switch (qib_mcast_add(dev, ibp, mcast, mqp)) {
+ case ESRCH:
+ /* Neither was used: OK to attach the same QP twice. */
+ qib_mcast_qp_free(mqp);
+ qib_mcast_free(mcast);
+ break;
+
+ case EEXIST: /* The mcast wasn't used */
+ qib_mcast_free(mcast);
+ break;
+
+ case ENOMEM:
+ /* Exceeded the maximum number of mcast groups. */
+ qib_mcast_qp_free(mqp);
+ qib_mcast_free(mcast);
+ ret = -ENOMEM;
+ goto bail;
+
+ default:
+ break;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ struct qib_ibdev *dev = to_idev(ibqp->device);
+ struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
+ struct qib_mcast *mcast = NULL;
+ struct qib_mcast_qp *p, *tmp;
+ struct rb_node *n;
+ int last = 0;
+ int ret;
+
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irq(&ibp->lock);
+
+ /* Find the GID in the mcast table. */
+ n = ibp->mcast_tree.rb_node;
+ while (1) {
+ if (n == NULL) {
+ spin_unlock_irq(&ibp->lock);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ mcast = rb_entry(n, struct qib_mcast, rb_node);
+ ret = memcmp(gid->raw, mcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ break;
+ }
+
+ /* Search the QP list. */
+ list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
+ if (p->qp != qp)
+ continue;
+ /*
+ * We found it, so remove it, but don't poison the forward
+ * link until we are sure there are no list walkers.
+ */
+ list_del_rcu(&p->list);
+ mcast->n_attached--;
+
+ /* If this was the last attached QP, remove the GID too. */
+ if (list_empty(&mcast->qp_list)) {
+ rb_erase(&mcast->rb_node, &ibp->mcast_tree);
+ last = 1;
+ }
+ break;
+ }
+
+ spin_unlock_irq(&ibp->lock);
+
+ if (p) {
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ qib_mcast_qp_free(p);
+ }
+ if (last) {
+ atomic_dec(&mcast->refcount);
+ wait_event(mcast->wait, !atomic_read(&mcast->refcount));
+ qib_mcast_free(mcast);
+ spin_lock_irq(&dev->n_mcast_grps_lock);
+ dev->n_mcast_grps_allocated--;
+ spin_unlock_irq(&dev->n_mcast_grps_lock);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int qib_mcast_tree_empty(struct qib_ibport *ibp)
+{
+ return ibp->mcast_tree.rb_node == NULL;
+}
diff --git a/drivers/infiniband/hw/ipath/ipath_7220.h b/drivers/infiniband/hw/qib/qib_wc_ppc64.c
index 74fa5cc5131..673cf4c22eb 100644
--- a/drivers/infiniband/hw/ipath/ipath_7220.h
+++ b/drivers/infiniband/hw/qib/qib_wc_ppc64.c
@@ -1,7 +1,5 @@
-#ifndef _IPATH_7220_H
-#define _IPATH_7220_H
/*
- * Copyright (c) 2007 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,25 +31,32 @@
*/
/*
- * This header file provides the declarations and common definitions
- * for (mostly) manipulation of the SerDes blocks within the IBA7220.
- * the functions declared should only be called from within other
- * 7220-related files such as ipath_iba7220.c or ipath_sd7220.c.
+ * This file is conditionally built on PowerPC only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
*/
-int ipath_sd7220_presets(struct ipath_devdata *dd);
-int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset);
-int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum, u8 *img,
- int len, int offset);
-int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, const u8 *img,
- int len, int offset);
-/*
- * Below used for sdnum parameter, selecting one of the two sections
- * used for PCIe, or the single SerDes used for IB, which is the
- * only one currently used
- */
-#define IB_7220_SERDES 2
-int ipath_sd7220_ib_load(struct ipath_devdata *dd);
-int ipath_sd7220_ib_vfy(struct ipath_devdata *dd);
+#include "qib.h"
-#endif /* _IPATH_7220_H */
+/**
+ * qib_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ *
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int qib_enable_wc(struct qib_devdata *dd)
+{
+ return 0;
+}
+
+/**
+ * qib_unordered_wc - indicate whether write combining is unordered
+ *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
+ */
+int qib_unordered_wc(void)
+{
+ return 1;
+}
diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
new file mode 100644
index 00000000000..1d7281c5a02
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on x86_64 only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include <linux/pci.h>
+#include <asm/mtrr.h>
+#include <asm/processor.h>
+
+#include "qib.h"
+
+/**
+ * qib_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ *
+ * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
+ * write combining.
+ */
+int qib_enable_wc(struct qib_devdata *dd)
+{
+ int ret = 0;
+ u64 pioaddr, piolen;
+ unsigned bits;
+ const unsigned long addr = pci_resource_start(dd->pcidev, 0);
+ const size_t len = pci_resource_len(dd->pcidev, 0);
+
+ /*
+ * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
+ * chip. Linux (possibly the hardware) requires it to be on a power
+ * of 2 address matching the length (which has to be a power of 2).
+ * For rev1, that means the base address, for rev2, it will be just
+ * the PIO buffers themselves.
+ * For chips with two sets of buffers, the calculations are
+ * somewhat more complicated; we need to sum, and the piobufbase
+ * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+ * The buffers are still packed, so a single range covers both.
+ */
+ if (dd->piobcnt2k && dd->piobcnt4k) {
+ /* 2 sizes for chip */
+ unsigned long pio2kbase, pio4kbase;
+ pio2kbase = dd->piobufbase & 0xffffffffUL;
+ pio4kbase = (dd->piobufbase >> 32) & 0xffffffffUL;
+ if (pio2kbase < pio4kbase) {
+ /* all current chips */
+ pioaddr = addr + pio2kbase;
+ piolen = pio4kbase - pio2kbase +
+ dd->piobcnt4k * dd->align4k;
+ } else {
+ pioaddr = addr + pio4kbase;
+ piolen = pio2kbase - pio4kbase +
+ dd->piobcnt2k * dd->palign;
+ }
+ } else { /* single buffer size (2K, currently) */
+ pioaddr = addr + dd->piobufbase;
+ piolen = dd->piobcnt2k * dd->palign +
+ dd->piobcnt4k * dd->align4k;
+ }
+
+ for (bits = 0; !(piolen & (1ULL << bits)); bits++)
+ /* do nothing */ ;
+
+ if (piolen != (1ULL << bits)) {
+ piolen >>= bits;
+ while (piolen >>= 1)
+ bits++;
+ piolen = 1ULL << (bits + 1);
+ }
+ if (pioaddr & (piolen - 1)) {
+ u64 atmp;
+ atmp = pioaddr & ~(piolen - 1);
+ if (atmp < addr || (atmp + piolen) > (addr + len)) {
+ qib_dev_err(dd,
+ "No way to align address/size (%llx/%llx), no WC mtrr\n",
+ (unsigned long long) atmp,
+ (unsigned long long) piolen << 1);
+ ret = -ENODEV;
+ } else {
+ pioaddr = atmp;
+ piolen <<= 1;
+ }
+ }
+
+ if (!ret) {
+ int cookie;
+
+ cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
+ if (cookie < 0) {
+ {
+ qib_devinfo(dd->pcidev,
+ "mtrr_add() WC for PIO bufs failed (%d)\n",
+ cookie);
+ ret = -EINVAL;
+ }
+ } else {
+ dd->wc_cookie = cookie;
+ dd->wc_base = (unsigned long) pioaddr;
+ dd->wc_len = (unsigned long) piolen;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * qib_disable_wc - disable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ */
+void qib_disable_wc(struct qib_devdata *dd)
+{
+ if (dd->wc_cookie) {
+ int r;
+
+ r = mtrr_del(dd->wc_cookie, dd->wc_base,
+ dd->wc_len);
+ if (r < 0)
+ qib_devinfo(dd->pcidev,
+ "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+ dd->wc_cookie, dd->wc_base,
+ dd->wc_len, r);
+ dd->wc_cookie = 0; /* even on failure */
+ }
+}
+
+/**
+ * qib_unordered_wc - indicate whether write combining is ordered
+ *
+ * Because our performance depends on our ability to do write combining mmio
+ * writes in the most efficient way, we need to know if we are on an Intel
+ * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in
+ * the order completed, and so no special flushing is required to get
+ * correct ordering. Intel processors, however, will flush write buffers
+ * out in "random" orders, and so explicit ordering is needed at times.
+ */
+int qib_unordered_wc(void)
+{
+ return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
+}
diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig
new file mode 100644
index 00000000000..29ab11c34f3
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Kconfig
@@ -0,0 +1,10 @@
+config INFINIBAND_USNIC
+ tristate "Verbs support for Cisco VIC"
+ depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
+ select ENIC
+ select NET_VENDOR_CISCO
+ select PCI_IOV
+ select INFINIBAND_USER_ACCESS
+ ---help---
+ This is a low-level driver for Cisco's Virtual Interface
+ Cards (VICs), including the VIC 1240 and 1280 cards.
diff --git a/drivers/infiniband/hw/usnic/Makefile b/drivers/infiniband/hw/usnic/Makefile
new file mode 100644
index 00000000000..99fb2db47cd
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Makefile
@@ -0,0 +1,15 @@
+ccflags-y := -Idrivers/net/ethernet/cisco/enic
+
+obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o
+
+usnic_verbs-y=\
+usnic_fwd.o \
+usnic_transport.o \
+usnic_uiom.o \
+usnic_uiom_interval_tree.o \
+usnic_vnic.o \
+usnic_ib_main.o \
+usnic_ib_qp_grp.o \
+usnic_ib_sysfs.o \
+usnic_ib_verbs.o \
+usnic_debugfs.o \
diff --git a/drivers/infiniband/hw/usnic/usnic.h b/drivers/infiniband/hw/usnic/usnic.h
new file mode 100644
index 00000000000..5be13d8991b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_H_
+#define USNIC_H_
+
+#define DRV_NAME "usnic_verbs"
+
+#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */
+
+#define DRV_VERSION "1.0.3"
+#define DRV_RELDATE "December 19, 2013"
+
+#endif /* USNIC_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h
new file mode 100644
index 00000000000..04a66229584
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_abi.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#ifndef USNIC_ABI_H
+#define USNIC_ABI_H
+
+/* ABI between userspace and kernel */
+#define USNIC_UVERBS_ABI_VERSION 4
+
+#define USNIC_QP_GRP_MAX_WQS 8
+#define USNIC_QP_GRP_MAX_RQS 8
+#define USNIC_QP_GRP_MAX_CQS 16
+
+enum usnic_transport_type {
+ USNIC_TRANSPORT_UNKNOWN = 0,
+ USNIC_TRANSPORT_ROCE_CUSTOM = 1,
+ USNIC_TRANSPORT_IPV4_UDP = 2,
+ USNIC_TRANSPORT_MAX = 3,
+};
+
+struct usnic_transport_spec {
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ uint32_t sock_fd;
+ } udp;
+ };
+};
+
+struct usnic_ib_create_qp_cmd {
+ struct usnic_transport_spec spec;
+};
+
+/*TODO: Future - usnic_modify_qp needs to pass in generic filters */
+struct usnic_ib_create_qp_resp {
+ u32 vfid;
+ u32 qp_grp_id;
+ u64 bar_bus_addr;
+ u32 bar_len;
+/*
+ * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
+ * expands the scope of ABI to many files.
+ */
+ u32 wq_cnt;
+ u32 rq_cnt;
+ u32 cq_cnt;
+ u32 wq_idx[USNIC_QP_GRP_MAX_WQS];
+ u32 rq_idx[USNIC_QP_GRP_MAX_RQS];
+ u32 cq_idx[USNIC_QP_GRP_MAX_CQS];
+ u32 transport;
+ u32 reserved[9];
+};
+
+#endif /* USNIC_ABI_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
new file mode 100644
index 00000000000..39356726614
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_PKT_HDR_H
+#define USNIC_CMN_PKT_HDR_H
+
+#define USNIC_ROCE_ETHERTYPE (0x8915)
+#define USNIC_ROCE_GRH_VER (8)
+#define USNIC_PROTO_VER (1)
+#define USNIC_ROCE_GRH_VER_SHIFT (4)
+
+#endif /* USNIC_COMMON_PKT_HDR_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h
new file mode 100644
index 00000000000..9d737ed5e55
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_util.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_UTIL_H
+#define USNIC_CMN_UTIL_H
+
+static inline void
+usnic_mac_to_gid(const char *const mac, char *raw_gid)
+{
+ raw_gid[0] = 0xfe;
+ raw_gid[1] = 0x80;
+ memset(&raw_gid[2], 0, 6);
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
+{
+ raw_gid[0] = 0xfe;
+ raw_gid[1] = 0x80;
+ memset(&raw_gid[2], 0, 2);
+ memcpy(&raw_gid[4], &inaddr, 4);
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
+{
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+#endif /* USNIC_COMMON_UTIL_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
new file mode 100644
index 00000000000..5d13860161a
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "usnic.h"
+#include "usnic_log.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_transport.h"
+
+static struct dentry *debugfs_root;
+static struct dentry *flows_dentry;
+
+static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ char buf[500];
+ int res;
+
+ if (*ppos > 0)
+ return 0;
+
+ res = scnprintf(buf, sizeof(buf),
+ "version: %s\n"
+ "build date: %s\n",
+ DRV_VERSION, DRV_RELDATE);
+
+ return simple_read_from_buffer(data, count, ppos, buf, res);
+}
+
+static const struct file_operations usnic_debugfs_buildinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = usnic_debugfs_buildinfo_read
+};
+
+static ssize_t flowinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ int n;
+ int left;
+ char *ptr;
+ char buf[512];
+
+ qp_flow = f->private_data;
+ ptr = buf;
+ left = count;
+
+ if (*ppos > 0)
+ return 0;
+
+ spin_lock(&qp_flow->qp_grp->lock);
+ n = scnprintf(ptr, left,
+ "QP Grp ID: %d Transport: %s ",
+ qp_flow->qp_grp->grp_id,
+ usnic_transport_to_str(qp_flow->trans_type));
+ UPDATE_PTR_LEFT(n, ptr, left);
+ if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ n = scnprintf(ptr, left, "Port_Num:%hu\n",
+ qp_flow->usnic_roce.port_num);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) {
+ n = usnic_transport_sock_to_str(ptr, left,
+ qp_flow->udp.sock);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ spin_unlock(&qp_flow->qp_grp->lock);
+
+ return simple_read_from_buffer(data, count, ppos, buf, ptr - buf);
+}
+
+static const struct file_operations flowinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = flowinfo_read,
+};
+
+void usnic_debugfs_init(void)
+{
+ debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
+ if (IS_ERR(debugfs_root)) {
+ usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n");
+ goto out_clear_root;
+ }
+
+ flows_dentry = debugfs_create_dir("flows", debugfs_root);
+ if (IS_ERR_OR_NULL(flows_dentry)) {
+ usnic_err("Failed to create debugfs flow dir with err %ld\n",
+ PTR_ERR(flows_dentry));
+ goto out_free_root;
+ }
+
+ debugfs_create_file("build-info", S_IRUGO, debugfs_root,
+ NULL, &usnic_debugfs_buildinfo_ops);
+ return;
+
+out_free_root:
+ debugfs_remove_recursive(debugfs_root);
+out_clear_root:
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_exit(void)
+{
+ if (!debugfs_root)
+ return;
+
+ debugfs_remove_recursive(debugfs_root);
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (IS_ERR_OR_NULL(flows_dentry))
+ return;
+
+ scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name),
+ "%u", qp_flow->flow->flow_id);
+ qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name,
+ S_IRUGO,
+ flows_dentry,
+ qp_flow,
+ &flowinfo_ops);
+ if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
+ usnic_err("Failed to create dbg fs entry for flow %u\n",
+ qp_flow->flow->flow_id);
+ }
+}
+
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
+ debugfs_remove(qp_flow->dbgfs_dentry);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.h b/drivers/infiniband/hw/usnic/usnic_debugfs.h
new file mode 100644
index 00000000000..4087d24a88f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef USNIC_DEBUGFS_H_
+#define USNIC_DEBUGFS_H_
+
+#include "usnic_ib_qp_grp.h"
+
+void usnic_debugfs_init(void);
+
+void usnic_debugfs_exit(void);
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow);
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow);
+
+#endif /*!USNIC_DEBUGFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
new file mode 100644
index 00000000000..e3c9bd9d3ba
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "enic_api.h"
+#include "usnic_common_pkt_hdr.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+
+static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0,
+ u64 *a1)
+{
+ int status;
+ struct net_device *netdev = ufdev->netdev;
+
+ lockdep_assert_held(&ufdev->lock);
+
+ status = enic_api_devcmd_proxy_by_index(netdev,
+ vnic_idx,
+ cmd,
+ a0, a1,
+ 1000);
+ if (status) {
+ if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) {
+ usnic_dbg("Dev %s vnic idx %u cmd %u already deleted",
+ ufdev->name, vnic_idx, cmd);
+ } else {
+ usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n",
+ ufdev->name, vnic_idx, cmd,
+ status);
+ }
+ } else {
+ usnic_dbg("Dev %s vnic idx %u cmd %u success",
+ ufdev->name, vnic_idx, cmd);
+ }
+
+ return status;
+}
+
+static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1)
+{
+ int status;
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1);
+ spin_unlock(&ufdev->lock);
+
+ return status;
+}
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
+{
+ struct usnic_fwd_dev *ufdev;
+
+ ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL);
+ if (!ufdev)
+ return NULL;
+
+ ufdev->pdev = pdev;
+ ufdev->netdev = pci_get_drvdata(pdev);
+ spin_lock_init(&ufdev->lock);
+ strncpy(ufdev->name, netdev_name(ufdev->netdev),
+ sizeof(ufdev->name) - 1);
+
+ return ufdev;
+}
+
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev)
+{
+ kfree(ufdev);
+}
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN])
+{
+ spin_lock(&ufdev->lock);
+ memcpy(&ufdev->mac, mac, sizeof(ufdev->mac));
+ spin_unlock(&ufdev->lock);
+}
+
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr)
+{
+ int status;
+
+ spin_lock(&ufdev->lock);
+ if (ufdev->inaddr == 0) {
+ ufdev->inaddr = inaddr;
+ status = 0;
+ } else {
+ status = -EFAULT;
+ }
+ spin_unlock(&ufdev->lock);
+
+ return status;
+}
+
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->inaddr = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 1;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->mtu = mtu;
+ spin_unlock(&ufdev->lock);
+}
+
+static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev)
+{
+ lockdep_assert_held(&ufdev->lock);
+
+ if (!ufdev->link_up)
+ return -EPERM;
+
+ return 0;
+}
+
+static int validate_filter_locked(struct usnic_fwd_dev *ufdev,
+ struct filter *filter)
+{
+
+ lockdep_assert_held(&ufdev->lock);
+
+ if (filter->type == FILTER_IPV4_5TUPLE) {
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD))
+ return -EACCES;
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT))
+ return -EBUSY;
+ else if (ufdev->inaddr == 0)
+ return -EINVAL;
+ else if (filter->u.ipv4.dst_port == 0)
+ return -ERANGE;
+ else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr)
+ return -EFAULT;
+ else
+ return 0;
+ }
+
+ return 0;
+}
+
+static void fill_tlv(struct filter_tlv *tlv, struct filter *filter,
+ struct filter_action *action)
+{
+ tlv->type = CLSF_TLV_FILTER;
+ tlv->length = sizeof(struct filter);
+ *((struct filter *)&tlv->val) = *filter;
+
+ tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) +
+ sizeof(struct filter));
+ tlv->type = CLSF_TLV_ACTION;
+ tlv->length = sizeof(struct filter_action);
+ *((struct filter_action *)&tlv->val) = *action;
+}
+
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *uaction)
+{
+ struct filter_tlv *tlv;
+ struct pci_dev *pdev;
+ struct usnic_fwd_flow *flow;
+ uint64_t a0, a1;
+ uint64_t tlv_size;
+ dma_addr_t tlv_pa;
+ int status;
+
+ pdev = ufdev->pdev;
+ tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) +
+ sizeof(struct filter_action));
+
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
+ if (!tlv) {
+ usnic_err("Failed to allocate memory\n");
+ status = -ENOMEM;
+ goto out_free_flow;
+ }
+
+ fill_tlv(tlv, filter, &uaction->action);
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_dev_ready_locked(ufdev);
+ if (status) {
+ usnic_err("Forwarding dev %s not ready with status %d\n",
+ ufdev->name, status);
+ goto out_free_tlv;
+ }
+
+ status = validate_filter_locked(ufdev, filter);
+ if (status) {
+ usnic_err("Failed to validate filter with status %d\n",
+ status);
+ goto out_free_tlv;
+ }
+
+ /* Issue Devcmd */
+ a0 = tlv_pa;
+ a1 = tlv_size;
+ status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx,
+ CMD_ADD_FILTER, &a0, &a1);
+ if (status) {
+ usnic_err("VF %s Filter add failed with status:%d",
+ ufdev->name, status);
+ status = -EFAULT;
+ goto out_free_tlv;
+ } else {
+ usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0);
+ }
+
+ flow->flow_id = (uint32_t) a0;
+ flow->vnic_idx = uaction->vnic_idx;
+ flow->ufdev = ufdev;
+
+out_free_tlv:
+ spin_unlock(&ufdev->lock);
+ pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
+ if (!status)
+ return flow;
+out_free_flow:
+ kfree(flow);
+ return ERR_PTR(status);
+}
+
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow)
+{
+ int status;
+ u64 a0, a1;
+
+ a0 = flow->flow_id;
+
+ status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx,
+ CMD_DEL_FILTER, &a0, &a1);
+ if (status) {
+ if (status == ERR_EINVAL) {
+ usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d",
+ flow->flow_id, flow->vnic_idx,
+ flow->ufdev->name, status);
+ } else {
+ usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id, status);
+ }
+ status = 0;
+ /*
+ * Log the error and fake success to the caller because if
+ * a flow fails to be deleted in the firmware, it is an
+ * unrecoverable error.
+ */
+ } else {
+ usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id);
+ }
+
+ kfree(flow);
+ return status;
+}
+
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ struct net_device *pf_netdev;
+ u64 a0, a1;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED",
+ netdev_name(pf_netdev),
+ vnic_idx, qp_idx);
+ }
+
+ return status;
+}
+
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ u64 a0, a1;
+ struct net_device *pf_netdev;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx);
+ }
+
+ return status;
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
new file mode 100644
index 00000000000..93713a2230b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_FWD_H_
+#define USNIC_FWD_H_
+
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/in.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_pkt_hdr.h"
+#include "vnic_devcmd.h"
+
+struct usnic_fwd_dev {
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ spinlock_t lock;
+ /*
+ * The following fields can be read directly off the device.
+ * However, they should be set by a accessor function, except name,
+ * which cannot be changed.
+ */
+ bool link_up;
+ char mac[ETH_ALEN];
+ unsigned int mtu;
+ __be32 inaddr;
+ char name[IFNAMSIZ+1];
+};
+
+struct usnic_fwd_flow {
+ uint32_t flow_id;
+ struct usnic_fwd_dev *ufdev;
+ unsigned int vnic_idx;
+};
+
+struct usnic_filter_action {
+ int vnic_idx;
+ struct filter_action action;
+};
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev);
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev);
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]);
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr);
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu);
+
+/*
+ * Allocate a flow on this forwarding device. Whoever calls this function,
+ * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or
+ * NETDEV_DOWN is seen, flow will no longer function and must be
+ * immediately freed by calling usnic_dealloc_flow.
+ */
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *action);
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow);
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+
+static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
+ uint32_t usnic_id)
+{
+ filter->type = FILTER_USNIC_ID;
+ filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE;
+ filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
+ FILTER_FIELD_USNIC_ID |
+ FILTER_FIELD_USNIC_PROTO;
+ filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER <<
+ USNIC_ROCE_GRH_VER_SHIFT) |
+ USNIC_PROTO_VER;
+ filter->u.usnic.usnic_id = usnic_id;
+}
+
+static inline void usnic_fwd_init_udp_filter(struct filter *filter,
+ uint32_t daddr, uint16_t dport)
+{
+ filter->type = FILTER_IPV4_5TUPLE;
+ filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO;
+ filter->u.ipv4.protocol = PROTO_UDP;
+
+ if (daddr) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD;
+ filter->u.ipv4.dst_addr = daddr;
+ }
+
+ if (dport) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT;
+ filter->u.ipv4.dst_port = dport;
+ }
+}
+
+#endif /* !USNIC_FWD_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h
new file mode 100644
index 00000000000..e5a9297dd1b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_H_
+#define USNIC_IB_H_
+
+#include <linux/iommu.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_verbs.h>
+
+
+#include "usnic.h"
+#include "usnic_abi.h"
+#include "usnic_vnic.h"
+
+#define USNIC_IB_PORT_CNT 1
+#define USNIC_IB_NUM_COMP_VECTORS 1
+
+extern unsigned int usnic_ib_share_vf;
+
+struct usnic_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ /* Protected by usnic_ib_dev->usdev_lock */
+ struct list_head qp_grp_list;
+ struct list_head link;
+};
+
+struct usnic_ib_pd {
+ struct ib_pd ibpd;
+ struct usnic_uiom_pd *umem_pd;
+};
+
+struct usnic_ib_mr {
+ struct ib_mr ibmr;
+ struct usnic_uiom_reg *umem;
+};
+
+struct usnic_ib_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ struct usnic_fwd_dev *ufdev;
+ struct list_head ib_dev_link;
+ struct list_head vf_dev_list;
+ struct list_head ctx_list;
+ struct mutex usdev_lock;
+
+ /* provisioning information */
+ struct kref vf_cnt;
+ unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX];
+
+ /* sysfs vars for QPN reporting */
+ struct kobject *qpn_kobj;
+};
+
+struct usnic_ib_vf {
+ struct usnic_ib_dev *pf;
+ spinlock_t lock;
+ struct usnic_vnic *vnic;
+ unsigned int qp_grp_ref_cnt;
+ struct usnic_ib_pd *pd;
+ struct list_head link;
+};
+
+static inline
+struct usnic_ib_dev *to_usdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct usnic_ib_dev, ib_dev);
+}
+
+static inline
+struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_pd *to_upd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct usnic_ib_pd, ibpd);
+}
+
+static inline
+struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_mr *to_umr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct usnic_ib_mr, ibmr);
+}
+void usnic_ib_log_vf(struct usnic_ib_vf *vf);
+
+#define UPDATE_PTR_LEFT(N, P, L) \
+do { \
+ L -= (N); \
+ P += (N); \
+} while (0)
+
+#endif /* USNIC_IB_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
new file mode 100644
index 00000000000..fb6d026f92c
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Upinder Malhi <umalhi@cisco.com>
+ * Author: Anant Deepak <anadeepa@cisco.com>
+ * Author: Cesare Cantu' <cantuc@cisco.com>
+ * Author: Jeff Squyres <jsquyres@cisco.com>
+ * Author: Kiran Thirumalai <kithirum@cisco.com>
+ * Author: Xuyang Wang <xuywang@cisco.com>
+ * Author: Reese Faucette <rfaucett@cisco.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_log.h"
+#include "usnic_fwd.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_transport.h"
+#include "usnic_uiom.h"
+#include "usnic_ib_sysfs.h"
+
+unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR;
+unsigned int usnic_ib_share_vf = 1;
+
+static const char usnic_version[] =
+ DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static DEFINE_MUTEX(usnic_ib_ibdev_list_lock);
+static LIST_HEAD(usnic_ib_ibdev_list);
+
+/* Callback dump funcs */
+static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_vf *vf = obj;
+ return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+}
+/* End callback dump funcs */
+
+static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
+{
+ usnic_vnic_dump(vf->vnic, buf, buf_sz, vf,
+ usnic_ib_dump_vf_hdr,
+ usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows);
+}
+
+void usnic_ib_log_vf(struct usnic_ib_vf *vf)
+{
+ char buf[1000];
+ usnic_ib_dump_vf(vf, buf, sizeof(buf));
+ usnic_dbg("%s\n", buf);
+}
+
+/* Start of netdev section */
+static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
+{
+ const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN",
+ "NETDEV_REBOOT", "NETDEV_CHANGE",
+ "NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU",
+ "NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE",
+ "NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP",
+ "NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE",
+ "NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE",
+ "NETDEV_NOTIFY_PEERS", "NETDEV_JOIN"
+ };
+
+ if (event >= ARRAY_SIZE(event2str))
+ return "UNKNOWN_NETDEV_EVENT";
+ else
+ return event2str[event];
+}
+
+static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
+{
+ struct usnic_ib_ucontext *ctx;
+ struct usnic_ib_qp_grp *qp_grp;
+ enum ib_qp_state cur_state;
+ int status;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ list_for_each_entry(ctx, &us_ibdev->ctx_list, link) {
+ list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) {
+ cur_state = qp_grp->state;
+ if (cur_state == IB_QPS_INIT ||
+ cur_state == IB_QPS_RTR ||
+ cur_state == IB_QPS_RTS) {
+ status = usnic_ib_qp_grp_modify(qp_grp,
+ IB_QPS_ERR,
+ NULL);
+ if (status) {
+ usnic_err("Failed to transistion qp grp %u from %s to %s\n",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string
+ (cur_state),
+ usnic_ib_qp_grp_state_to_string
+ (IB_QPS_ERR));
+ }
+ }
+ }
+ }
+}
+
+static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event)
+{
+ struct net_device *netdev;
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ netdev = us_ibdev->netdev;
+ switch (event) {
+ case NETDEV_REBOOT:
+ usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ if (!us_ibdev->ufdev->link_up &&
+ netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+ usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+ ib_event.event = IB_EVENT_PORT_ACTIVE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else if (us_ibdev->ufdev->link_up &&
+ !netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_down(us_ibdev->ufdev);
+ usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else {
+ usnic_dbg("Ignoring %s on %s\n",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ case NETDEV_CHANGEADDR:
+ if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
+ sizeof(us_ibdev->ufdev->mac))) {
+ usnic_dbg("Ignoring addr change on %s\n",
+ us_ibdev->ib_dev.name);
+ } else {
+ usnic_info(" %s old mac: %pM new mac: %pM\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mac,
+ netdev->dev_addr);
+ usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ }
+
+ break;
+ case NETDEV_CHANGEMTU:
+ if (us_ibdev->ufdev->mtu != netdev->mtu) {
+ usnic_info("MTU Change on %s old: %u new: %u\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mtu, netdev->mtu);
+ usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ } else {
+ usnic_dbg("Ignoring MTU change on %s\n",
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ default:
+ usnic_dbg("Ignoring event %s on %s",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+}
+
+static int usnic_ib_netdevice_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_usdev_event(us_ibdev, event);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block usnic_ib_netdevice_notifier = {
+ .notifier_call = usnic_ib_netdevice_event
+};
+/* End of netdev section */
+
+/* Start of inet section */
+static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct ib_event ib_event;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+
+ switch (event) {
+ case NETDEV_DOWN:
+ usnic_info("%s via ip notifiers",
+ usnic_ib_netdev_event_to_string(event));
+ usnic_fwd_del_ipaddr(us_ibdev->ufdev);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
+ usnic_info("%s via ip notifiers: ip %pI4",
+ usnic_ib_netdev_event_to_string(event),
+ &us_ibdev->ufdev->inaddr);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ default:
+ usnic_info("Ignoring event %s on %s",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static int usnic_ib_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *netdev = ifa->ifa_dev->dev;
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_inet_event(us_ibdev, event, ptr);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+static struct notifier_block usnic_ib_inetaddr_notifier = {
+ .notifier_call = usnic_ib_inetaddr_event
+};
+/* End of inet section*/
+
+/* Start of PF discovery section */
+static void *usnic_ib_device_add(struct pci_dev *dev)
+{
+ struct usnic_ib_dev *us_ibdev;
+ union ib_gid gid;
+ struct in_ifaddr *in;
+ struct net_device *netdev;
+
+ usnic_dbg("\n");
+ netdev = pci_get_drvdata(dev);
+
+ us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
+ if (IS_ERR_OR_NULL(us_ibdev)) {
+ usnic_err("Device %s context alloc failed\n",
+ netdev_name(pci_get_drvdata(dev)));
+ return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
+ }
+
+ us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
+ if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
+ usnic_err("Failed to alloc ufdev for %s with err %ld\n",
+ pci_name(dev), PTR_ERR(us_ibdev->ufdev));
+ goto err_dealloc;
+ }
+
+ mutex_init(&us_ibdev->usdev_lock);
+ INIT_LIST_HEAD(&us_ibdev->vf_dev_list);
+ INIT_LIST_HEAD(&us_ibdev->ctx_list);
+
+ us_ibdev->pdev = dev;
+ us_ibdev->netdev = pci_get_drvdata(dev);
+ us_ibdev->ib_dev.owner = THIS_MODULE;
+ us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
+ us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
+ us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
+ us_ibdev->ib_dev.dma_device = &dev->dev;
+ us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
+ strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
+
+ us_ibdev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+ us_ibdev->ib_dev.query_device = usnic_ib_query_device;
+ us_ibdev->ib_dev.query_port = usnic_ib_query_port;
+ us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
+ us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
+ us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
+ us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
+ us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
+ us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
+ us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
+ us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
+ us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
+ us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
+ us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
+ us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
+ us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
+ us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
+ us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
+ us_ibdev->ib_dev.mmap = usnic_ib_mmap;
+ us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
+ us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
+ us_ibdev->ib_dev.post_send = usnic_ib_post_send;
+ us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
+ us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
+ us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
+ us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
+
+
+ if (ib_register_device(&us_ibdev->ib_dev, NULL))
+ goto err_fwd_dealloc;
+
+ usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
+ usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr);
+ if (netif_carrier_ok(us_ibdev->netdev))
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+
+ in = ((struct in_device *)(netdev->ip_ptr))->ifa_list;
+ if (in != NULL)
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address);
+
+ usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr,
+ us_ibdev->ufdev->inaddr, &gid.raw[0]);
+ memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ kref_init(&us_ibdev->vf_cnt);
+
+ usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
+ us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
+ us_ibdev->ufdev->mtu);
+ return us_ibdev;
+
+err_fwd_dealloc:
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+err_dealloc:
+ usnic_err("failed -- deallocing device\n");
+ ib_dealloc_device(&us_ibdev->ib_dev);
+ return NULL;
+}
+
+static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
+{
+ usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_sysfs_unregister_usdev(us_ibdev);
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+ ib_unregister_device(&us_ibdev->ib_dev);
+ ib_dealloc_device(&us_ibdev->ib_dev);
+}
+
+static void usnic_ib_undiscover_pf(struct kref *kref)
+{
+ struct usnic_ib_dev *us_ibdev, *tmp;
+ struct pci_dev *dev;
+ bool found = false;
+
+ dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev;
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry_safe(us_ibdev, tmp,
+ &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == dev) {
+ list_del(&us_ibdev->ib_dev_link);
+ usnic_ib_device_remove(us_ibdev);
+ found = true;
+ break;
+ }
+ }
+
+ WARN(!found, "Failed to remove PF %s\n", pci_name(dev));
+
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+}
+
+static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct pci_dev *parent_pci, *vf_pci;
+ int err;
+
+ vf_pci = usnic_vnic_get_pdev(vnic);
+ parent_pci = pci_physfn(vf_pci);
+
+ BUG_ON(!parent_pci);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == parent_pci) {
+ kref_get(&us_ibdev->vf_cnt);
+ goto out;
+ }
+ }
+
+ us_ibdev = usnic_ib_device_add(parent_pci);
+ if (IS_ERR_OR_NULL(us_ibdev)) {
+ us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
+ goto out;
+ }
+
+ err = usnic_ib_sysfs_register_usdev(us_ibdev);
+ if (err) {
+ usnic_ib_device_remove(us_ibdev);
+ us_ibdev = ERR_PTR(err);
+ goto out;
+ }
+
+ list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list);
+out:
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+ return us_ibdev;
+}
+/* End of PF discovery section */
+
+/* Start of PCI section */
+
+static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = {
+ {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)},
+ {0,}
+};
+
+static int usnic_ib_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int err;
+ struct usnic_ib_dev *pf;
+ struct usnic_ib_vf *vf;
+ enum usnic_vnic_res_type res_type;
+
+ vf = kzalloc(sizeof(*vf), GFP_KERNEL);
+ if (!vf)
+ return -ENOMEM;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ usnic_err("Failed to enable %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_clean_vf;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ usnic_err("Failed to request region for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_disable_device;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, vf);
+
+ vf->vnic = usnic_vnic_alloc(pdev);
+ if (IS_ERR_OR_NULL(vf->vnic)) {
+ err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM;
+ usnic_err("Failed to alloc vnic for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_release_regions;
+ }
+
+ pf = usnic_ib_discover_pf(vf->vnic);
+ if (IS_ERR_OR_NULL(pf)) {
+ usnic_err("Failed to discover pf of vnic %s with err%ld\n",
+ pci_name(pdev), PTR_ERR(pf));
+ err = pf ? PTR_ERR(pf) : -EFAULT;
+ goto out_clean_vnic;
+ }
+
+ vf->pf = pf;
+ spin_lock_init(&vf->lock);
+ mutex_lock(&pf->usdev_lock);
+ list_add_tail(&vf->link, &pf->vf_dev_list);
+ /*
+ * Save max settings (will be same for each VF, easier to re-write than
+ * to say "if (!set) { set_values(); set=1; }
+ */
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL+1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic,
+ res_type);
+ }
+
+ mutex_unlock(&pf->usdev_lock);
+
+ usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
+ pf->ib_dev.name);
+ usnic_ib_log_vf(vf);
+ return 0;
+
+out_clean_vnic:
+ usnic_vnic_free(vf->vnic);
+out_release_regions:
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+out_disable_device:
+ pci_disable_device(pdev);
+out_clean_vf:
+ kfree(vf);
+ return err;
+}
+
+static void usnic_ib_pci_remove(struct pci_dev *pdev)
+{
+ struct usnic_ib_vf *vf = pci_get_drvdata(pdev);
+ struct usnic_ib_dev *pf = vf->pf;
+
+ mutex_lock(&pf->usdev_lock);
+ list_del(&vf->link);
+ mutex_unlock(&pf->usdev_lock);
+
+ kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf);
+ usnic_vnic_free(vf->vnic);
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ kfree(vf);
+
+ usnic_info("Removed VF %s\n", pci_name(pdev));
+}
+
+/* PCI driver entry points */
+static struct pci_driver usnic_ib_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = usnic_ib_pci_ids,
+ .probe = usnic_ib_pci_probe,
+ .remove = usnic_ib_pci_remove,
+};
+/* End of PCI section */
+
+/* Start of module section */
+static int __init usnic_ib_init(void)
+{
+ int err;
+
+ printk_once(KERN_INFO "%s", usnic_version);
+
+ err = usnic_uiom_init(DRV_NAME);
+ if (err) {
+ usnic_err("Unable to initalize umem with err %d\n", err);
+ return err;
+ }
+
+ if (pci_register_driver(&usnic_ib_pci_driver)) {
+ usnic_err("Unable to register with PCI\n");
+ goto out_umem_fini;
+ }
+
+ err = register_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ if (err) {
+ usnic_err("Failed to register netdev notifier\n");
+ goto out_pci_unreg;
+ }
+
+ err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ if (err) {
+ usnic_err("Failed to register inet addr notifier\n");
+ goto out_unreg_netdev_notifier;
+ }
+
+ err = usnic_transport_init();
+ if (err) {
+ usnic_err("Failed to initialize transport\n");
+ goto out_unreg_inetaddr_notifier;
+ }
+
+ usnic_debugfs_init();
+
+ return 0;
+
+out_unreg_inetaddr_notifier:
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+out_unreg_netdev_notifier:
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+out_pci_unreg:
+ pci_unregister_driver(&usnic_ib_pci_driver);
+out_umem_fini:
+ usnic_uiom_fini();
+
+ return err;
+}
+
+static void __exit usnic_ib_destroy(void)
+{
+ usnic_dbg("\n");
+ usnic_debugfs_exit();
+ usnic_transport_fini();
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ pci_unregister_driver(&usnic_ib_pci_driver);
+ usnic_uiom_fini();
+}
+
+MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver");
+MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR);
+module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3");
+MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs");
+MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids);
+
+module_init(usnic_ib_init);
+module_exit(usnic_ib_destroy);
+/* End of module section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
new file mode 100644
index 00000000000..f8dfd76be89
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+#include "usnic_fwd.h"
+#include "usnic_uiom.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_ib_sysfs.h"
+#include "usnic_transport.h"
+
+#define DFLT_RQ_IDX 0
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return "Rst";
+ case IB_QPS_INIT:
+ return "Init";
+ case IB_QPS_RTR:
+ return "RTR";
+ case IB_QPS_RTS:
+ return "RTS";
+ case IB_QPS_SQD:
+ return "SQD";
+ case IB_QPS_SQE:
+ return "SQE";
+ case IB_QPS_ERR:
+ return "ERR";
+ default:
+ return "UNKOWN STATE";
+
+ }
+}
+
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz)
+{
+ return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID");
+}
+
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_qp_grp *qp_grp = obj;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ if (obj) {
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(
+ qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic),
+ default_flow->flow->flow_id);
+ } else {
+ return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A");
+ }
+}
+
+static struct usnic_vnic_res_chunk *
+get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp)
+{
+ lockdep_assert_held(&qp_grp->lock);
+ /*
+ * The QP res chunk, used to derive qp indices,
+ * are just indices of the RQs
+ */
+ return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+}
+
+static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ int status;
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+
+ lockdep_assert_held(&qp_grp->lock);
+
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n",
+ res->vnic_idx, qp_grp->ufdev->name,
+ vnic_idx, status);
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ for (i--; i >= 0; i--) {
+ res = res_chunk->res[i];
+ usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ }
+
+ return status;
+}
+
+static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+ int status = 0;
+
+ lockdep_assert_held(&qp_grp->lock);
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n",
+ res->vnic_idx,
+ qp_grp->ufdev->name,
+ vnic_idx, status);
+ }
+ }
+
+ return status;
+
+}
+
+static int init_filter_action(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_filter_action *uaction)
+{
+ struct usnic_vnic_res_chunk *res_chunk;
+
+ res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get %s with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+ uaction->action.type = FILTER_ACTION_RQ_STEERING;
+ uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx;
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ uint16_t port_num;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ port_num = trans_spec->usnic_roce.port_num;
+
+ /* Reserve Port */
+ port_num = usnic_transport_rsrv_port(trans_type, port_num);
+ if (port_num == 0)
+ return ERR_PTR(-EINVAL);
+
+ /* Create Flow */
+ usnic_fwd_init_usnic_filter(&filter, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_unreserve_port;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ usnic_err("Unable to alloc flow failed with err %ld\n",
+ PTR_ERR(flow));
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_unreserve_port;
+ }
+
+ /* Create Flow Handle */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->usnic_roce.port_num = port_num;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_unreserve_port:
+ usnic_transport_unrsrv_port(trans_type, port_num);
+ return ERR_PTR(err);
+}
+
+static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_unrsrv_port(qp_flow->trans_type,
+ qp_flow->usnic_roce.port_num);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct socket *sock;
+ int sock_fd;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ uint32_t addr;
+ uint16_t port_num;
+ int proto;
+
+ trans_type = trans_spec->trans_type;
+ sock_fd = trans_spec->udp.sock_fd;
+
+ /* Get and check socket */
+ sock = usnic_transport_get_socket(sock_fd);
+ if (IS_ERR_OR_NULL(sock))
+ return ERR_CAST(sock);
+
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num);
+ if (err)
+ goto out_put_sock;
+
+ if (proto != IPPROTO_UDP) {
+ usnic_err("Protocol for fd %d is not UDP", sock_fd);
+ err = -EPERM;
+ goto out_put_sock;
+ }
+
+ /* Create flow */
+ usnic_fwd_init_udp_filter(&filter, addr, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_put_sock;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ usnic_err("Unable to alloc flow failed with err %ld\n",
+ PTR_ERR(flow));
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_put_sock;
+ }
+
+ /* Create qp_flow */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->udp.sock = sock;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_put_sock:
+ usnic_transport_put_socket(sock);
+ return ERR_PTR(err);
+}
+
+static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_put_socket(qp_flow->udp.sock);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_and_add_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ qp_flow = create_roce_custom_flow(qp_grp, trans_spec);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ qp_flow = create_udp_flow(qp_grp, trans_spec);
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n",
+ trans_spec->trans_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!IS_ERR_OR_NULL(qp_flow)) {
+ list_add_tail(&qp_flow->link, &qp_grp->flows_lst);
+ usnic_debugfs_flow_add(qp_flow);
+ }
+
+
+ return qp_flow;
+}
+
+static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_debugfs_flow_remove(qp_flow);
+ list_del(&qp_flow->link);
+
+ switch (qp_flow->trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ release_roce_custom_flow(qp_flow);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ release_udp_flow(qp_flow);
+ break;
+ default:
+ WARN(1, "Unsupported transport %u\n",
+ qp_flow->trans_type);
+ break;
+ }
+}
+
+static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow, *tmp;
+ list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link)
+ release_and_remove_flow(qp_flow);
+}
+
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data)
+{
+ int status = 0;
+ int vnic_idx;
+ struct ib_event ib_event;
+ enum ib_qp_state old_state;
+ struct usnic_transport_spec *trans_spec;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ old_state = qp_grp->state;
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+ trans_spec = (struct usnic_transport_spec *) data;
+
+ spin_lock(&qp_grp->lock);
+ switch (new_state) {
+ case IB_QPS_RESET:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ /* NO-OP */
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ status = 0;
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ case IB_QPS_ERR:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_INIT:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Optional to specify filters.
+ */
+ status = 0;
+ }
+ break;
+ case IB_QPS_INIT:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Doesn't make sense to go into INIT state
+ * from INIT state w/o adding filters.
+ */
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ status = disable_qp_grp(qp_grp);
+ break;
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ switch (old_state) {
+ case IB_QPS_INIT:
+ status = enable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTS:
+ switch (old_state) {
+ case IB_QPS_RTR:
+ /* NO-OP FOR NOW */
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_ERR:
+ ib_event.device = &qp_grp->vf->pf->ib_dev;
+ ib_event.element.qp = &qp_grp->ibqp;
+ ib_event.event = IB_EVENT_QP_FATAL;
+
+ switch (old_state) {
+ case IB_QPS_RESET:
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ default:
+ status = -EINVAL;
+ }
+ spin_unlock(&qp_grp->lock);
+
+ if (!status) {
+ qp_grp->state = new_state;
+ usnic_info("Transistioned %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ } else {
+ usnic_err("Failed to transistion %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ }
+
+ return status;
+}
+
+static struct usnic_vnic_res_chunk**
+alloc_res_chunk_list(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec, void *owner_obj)
+{
+ enum usnic_vnic_res_type res_type;
+ struct usnic_vnic_res_chunk **res_chunk_list;
+ int err, i, res_cnt, res_lst_sz;
+
+ for (res_lst_sz = 0;
+ res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL;
+ res_lst_sz++) {
+ /* Do Nothing */
+ }
+
+ res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1),
+ GFP_ATOMIC);
+ if (!res_chunk_list)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL;
+ i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type,
+ res_cnt, owner_obj);
+ if (IS_ERR_OR_NULL(res_chunk_list[i])) {
+ err = res_chunk_list[i] ?
+ PTR_ERR(res_chunk_list[i]) : -ENOMEM;
+ usnic_err("Failed to get %s from %s with err %d\n",
+ usnic_vnic_res_type_to_str(res_type),
+ usnic_vnic_pci_name(vnic),
+ err);
+ goto out_free_res;
+ }
+ }
+
+ return res_chunk_list;
+
+out_free_res:
+ for (i--; i > 0; i--)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+ return ERR_PTR(err);
+}
+
+static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list)
+{
+ int i;
+ for (i = 0; res_chunk_list[i]; i++)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+}
+
+static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_ib_qp_grp *qp_grp)
+{
+ int err;
+ struct pci_dev *pdev;
+
+ lockdep_assert_held(&vf->lock);
+
+ pdev = usnic_vnic_get_pdev(vf->vnic);
+ if (vf->qp_grp_ref_cnt == 0) {
+ err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev);
+ if (err) {
+ usnic_err("Failed to attach %s to domain\n",
+ pci_name(pdev));
+ return err;
+ }
+ vf->pd = pd;
+ }
+ vf->qp_grp_ref_cnt++;
+
+ WARN_ON(vf->pd != pd);
+ qp_grp->vf = vf;
+
+ return 0;
+}
+
+static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct pci_dev *pdev;
+ struct usnic_ib_pd *pd;
+
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ pd = qp_grp->vf->pd;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (--qp_grp->vf->qp_grp_ref_cnt == 0) {
+ qp_grp->vf->pd = NULL;
+ usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev);
+ }
+ qp_grp->vf = NULL;
+}
+
+static void log_spec(struct usnic_vnic_res_spec *res_spec)
+{
+ char buf[512];
+ usnic_vnic_spec_dump(buf, sizeof(buf), res_spec);
+ usnic_dbg("%s\n", buf);
+}
+
+static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow,
+ uint32_t *id)
+{
+ enum usnic_transport_type trans_type = qp_flow->trans_type;
+ int err;
+ uint16_t port_num = 0;
+
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ *id = qp_flow->usnic_roce.port_num;
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ err = usnic_transport_sock_get_addr(qp_flow->udp.sock,
+ NULL, NULL,
+ &port_num);
+ if (err)
+ return err;
+ /*
+ * Copy port_num to stack first and then to *id,
+ * so that the short to int cast works for little
+ * and big endian systems.
+ */
+ *id = port_num;
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n", trans_type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *transport_spec)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int err;
+ enum usnic_transport_type transport = transport_spec->trans_type;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ lockdep_assert_held(&vf->lock);
+
+ err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
+ res_spec);
+ if (err) {
+ usnic_err("Spec does not meet miniumum req for transport %d\n",
+ transport);
+ log_spec(res_spec);
+ return ERR_PTR(err);
+ }
+
+ qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
+ if (!qp_grp) {
+ usnic_err("Unable to alloc qp_grp - Out of memory\n");
+ return NULL;
+ }
+
+ qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
+ qp_grp);
+ if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
+ err = qp_grp->res_chunk_list ?
+ PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
+ usnic_err("Unable to alloc res for %d with err %d\n",
+ qp_grp->grp_id, err);
+ goto out_free_qp_grp;
+ }
+
+ err = qp_grp_and_vf_bind(vf, pd, qp_grp);
+ if (err)
+ goto out_free_res;
+
+ INIT_LIST_HEAD(&qp_grp->flows_lst);
+ spin_lock_init(&qp_grp->lock);
+ qp_grp->ufdev = ufdev;
+ qp_grp->state = IB_QPS_RESET;
+ qp_grp->owner_pid = current->pid;
+
+ qp_flow = create_and_add_flow(qp_grp, transport_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ usnic_err("Unable to create and add flow with err %ld\n",
+ PTR_ERR(qp_flow));
+ err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ goto out_qp_grp_vf_unbind;
+ }
+
+ err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id);
+ if (err)
+ goto out_release_flow;
+ qp_grp->ibqp.qp_num = qp_grp->grp_id;
+
+ usnic_ib_sysfs_qpn_add(qp_grp);
+
+ return qp_grp;
+
+out_release_flow:
+ release_and_remove_flow(qp_flow);
+out_qp_grp_vf_unbind:
+ qp_grp_and_vf_unbind(qp_grp);
+out_free_res:
+ free_qp_grp_res(qp_grp->res_chunk_list);
+out_free_qp_grp:
+ kfree(qp_grp);
+
+ return ERR_PTR(err);
+}
+
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ release_and_remove_all_flows(qp_grp);
+ usnic_ib_sysfs_qpn_remove(qp_grp);
+ qp_grp_and_vf_unbind(qp_grp);
+ free_qp_grp_res(qp_grp->res_chunk_list);
+ kfree(qp_grp);
+}
+
+struct usnic_vnic_res_chunk*
+usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type res_type)
+{
+ int i;
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ if (qp_grp->res_chunk_list[i]->type == res_type)
+ return qp_grp->res_chunk_list[i];
+ }
+
+ return ERR_PTR(-EINVAL);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
new file mode 100644
index 00000000000..b0aafe8db0c
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_QP_GRP_H_
+#define USNIC_IB_QP_GRP_H_
+
+#include <linux/debugfs.h>
+#include <rdma/ib_verbs.h>
+
+#include "usnic_ib.h"
+#include "usnic_abi.h"
+#include "usnic_fwd.h"
+#include "usnic_vnic.h"
+
+/*
+ * The qp group struct represents all the hw resources needed to present a ib_qp
+ */
+struct usnic_ib_qp_grp {
+ struct ib_qp ibqp;
+ enum ib_qp_state state;
+ int grp_id;
+
+ struct usnic_fwd_dev *ufdev;
+ struct usnic_ib_ucontext *ctx;
+ struct list_head flows_lst;
+
+ struct usnic_vnic_res_chunk **res_chunk_list;
+
+ pid_t owner_pid;
+ struct usnic_ib_vf *vf;
+ struct list_head link;
+
+ spinlock_t lock;
+
+ struct kobject kobj;
+};
+
+struct usnic_ib_qp_grp_flow {
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ struct socket *sock;
+ } udp;
+ };
+ struct usnic_ib_qp_grp *qp_grp;
+ struct list_head link;
+
+ /* Debug FS */
+ struct dentry *dbgfs_dentry;
+ char dentry_name[32];
+};
+
+static const struct
+usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
+ { /*USNIC_TRANSPORT_UNKNOWN*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_ROCE_CUSTOM*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_IPV4_UDP*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+};
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state);
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz);
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz);
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *trans_spec);
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp);
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data);
+struct usnic_vnic_res_chunk
+*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type type);
+static inline
+struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct usnic_ib_qp_grp, ibqp);
+}
+#endif /* USNIC_IB_QP_GRP_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
new file mode 100644
index 00000000000..27dc67c1689
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_vnic.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_log.h"
+
+static ssize_t usnic_ib_show_fw_ver(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ struct ethtool_drvinfo info;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
+}
+
+static ssize_t usnic_ib_show_board(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ unsigned short subsystem_device_id;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ subsystem_device_id = us_ibdev->pdev->subsystem_device;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
+}
+
+/*
+ * Report the configuration for this PF
+ */
+static ssize_t
+usnic_ib_show_config(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ char *ptr;
+ unsigned left;
+ unsigned n;
+ enum usnic_vnic_res_type res_type;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ /* Buffer space limit is 1 page */
+ ptr = buf;
+ left = PAGE_SIZE;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
+ char *busname;
+
+ /*
+ * bus name seems to come with annoying prefix.
+ * Remove it if it is predictable
+ */
+ busname = us_ibdev->pdev->bus->name;
+ if (strncmp(busname, "PCI Bus ", 8) == 0)
+ busname += 8;
+
+ n = scnprintf(ptr, left,
+ "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
+ us_ibdev->ib_dev.name,
+ busname,
+ PCI_SLOT(us_ibdev->pdev->devfn),
+ PCI_FUNC(us_ibdev->pdev->devfn),
+ netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac,
+ atomic_read(&us_ibdev->vf_cnt.refcount));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ if (us_ibdev->vf_res_cnt[res_type] == 0)
+ continue;
+ n = scnprintf(ptr, left, " %d %s%s",
+ us_ibdev->vf_res_cnt[res_type],
+ usnic_vnic_res_type_to_str(res_type),
+ (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ?
+ "," : "");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else {
+ n = scnprintf(ptr, left, "%s: no VFs\n",
+ us_ibdev->ib_dev.name);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return ptr - buf;
+}
+
+static ssize_t
+usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ netdev_name(us_ibdev->netdev));
+}
+
+static ssize_t
+usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ atomic_read(&us_ibdev->vf_cnt.refcount));
+}
+
+static ssize_t
+usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int qp_per_vf;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+
+ return scnprintf(buf, PAGE_SIZE,
+ "%d\n", qp_per_vf);
+}
+
+static ssize_t
+usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
+}
+
+static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
+static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
+static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
+static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
+static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
+static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
+
+static struct device_attribute *usnic_class_attributes[] = {
+ &dev_attr_fw_ver,
+ &dev_attr_board_id,
+ &dev_attr_config,
+ &dev_attr_iface,
+ &dev_attr_max_vf,
+ &dev_attr_qp_per_vf,
+ &dev_attr_cq_per_vf,
+};
+
+struct qpn_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf);
+};
+
+/*
+ * Definitions for supporting QPN entries in sysfs
+ */
+static ssize_t
+usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct qpn_attribute *qpn_attr;
+
+ qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj);
+ qpn_attr = container_of(attr, struct qpn_attribute, attr);
+
+ return qpn_attr->show(qp_grp, buf);
+}
+
+static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = {
+ .show = usnic_ib_qpn_attr_show
+};
+
+#define QPN_ATTR_RO(NAME) \
+struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME)
+
+static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx);
+}
+
+static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ int i, j, n;
+ int left;
+ char *ptr;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *vnic_res;
+
+ left = PAGE_SIZE;
+ ptr = buf;
+
+ n = scnprintf(ptr, left,
+ "QPN: %d State: (%s) PID: %u VF Idx: %hu ",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ res_chunk = qp_grp->res_chunk_list[i];
+ for (j = 0; j < res_chunk->cnt; j++) {
+ vnic_res = res_chunk->res[j];
+ n = scnprintf(ptr, left, "%s[%d] ",
+ usnic_vnic_res_type_to_str(vnic_res->type),
+ vnic_res->vnic_idx);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ }
+
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ return ptr - buf;
+}
+
+static QPN_ATTR_RO(context);
+static QPN_ATTR_RO(summary);
+
+static struct attribute *usnic_ib_qpn_default_attrs[] = {
+ &qpn_attr_context.attr,
+ &qpn_attr_summary.attr,
+ NULL
+};
+
+static struct kobj_type usnic_ib_qpn_type = {
+ .sysfs_ops = &usnic_ib_qpn_sysfs_ops,
+ .default_attrs = usnic_ib_qpn_default_attrs
+};
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ int err;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ err = device_create_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ if (err) {
+ usnic_err("Failed to create device file %d for %s eith err %d",
+ i, us_ibdev->ib_dev.name, err);
+ return -EINVAL;
+ }
+ }
+
+ /* create kernel object for looking at individual QPs */
+ kobject_get(&us_ibdev->ib_dev.dev.kobj);
+ us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
+ &us_ibdev->ib_dev.dev.kobj);
+ if (us_ibdev->qpn_kobj == NULL) {
+ kobject_put(&us_ibdev->ib_dev.dev.kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ device_remove_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ }
+
+ kobject_put(us_ibdev->qpn_kobj);
+}
+
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int err;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type,
+ kobject_get(us_ibdev->qpn_kobj),
+ "%d", qp_grp->grp_id);
+ if (err) {
+ kobject_put(us_ibdev->qpn_kobj);
+ return;
+ }
+}
+
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ kobject_put(&qp_grp->kobj);
+ kobject_put(us_ibdev->qpn_kobj);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
new file mode 100644
index 00000000000..0d09b493cd0
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_SYSFS_H_
+#define USNIC_IB_SYSFS_H_
+
+#include "usnic_ib.h"
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
+
+#endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
new file mode 100644
index 00000000000..53bd6a2d9cd
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_ib.h"
+#include "usnic_common_util.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_transport.h"
+
+#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
+
+static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
+{
+ *fw_ver = (u64) *fw_ver_str;
+}
+
+static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_create_qp_resp resp;
+ struct pci_dev *pdev;
+ struct vnic_dev_bar *bar;
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ int i, err;
+
+ memset(&resp, 0, sizeof(resp));
+
+ us_ibdev = qp_grp->vf->pf;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (!pdev) {
+ usnic_err("Failed to get pdev of qp_grp %d\n",
+ qp_grp->grp_id);
+ return -EFAULT;
+ }
+
+ bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
+ if (!bar) {
+ usnic_err("Failed to get bar0 of qp_grp %d vf %s",
+ qp_grp->grp_id, pci_name(pdev));
+ return -EFAULT;
+ }
+
+ resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
+ resp.bar_bus_addr = bar->bus_addr;
+ resp.bar_len = bar->len;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
+ resp.rq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.rq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
+ resp.wq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.wq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
+ resp.cq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.cq_idx[i] = chunk->res[i]->vnic_idx;
+
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ resp.transport = default_flow->trans_type;
+
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (err) {
+ usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+ return err;
+ }
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp*
+find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
+ struct usnic_ib_pd *pd,
+ struct usnic_transport_spec *trans_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ struct usnic_ib_vf *vf;
+ struct usnic_vnic *vnic;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct device *dev, **dev_list;
+ int i, found = 0;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ if (list_empty(&us_ibdev->vf_dev_list)) {
+ usnic_info("No vfs to allocate\n");
+ return NULL;
+ }
+
+ if (usnic_ib_share_vf) {
+ /* Try to find resouces on a used vf which is in pd */
+ dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
+ for (i = 0; dev_list[i]; i++) {
+ dev = dev_list[i];
+ vf = pci_get_drvdata(to_pci_dev(dev));
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (!usnic_vnic_check_room(vnic, res_spec)) {
+ usnic_dbg("Found used vnic %s from %s\n",
+ us_ibdev->ib_dev.name,
+ pci_name(usnic_vnic_get_pdev(
+ vnic)));
+ found = 1;
+ break;
+ }
+ spin_unlock(&vf->lock);
+
+ }
+ usnic_uiom_free_dev_list(dev_list);
+ }
+
+ if (!found) {
+ /* Try to find resources on an unused vf */
+ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (vf->qp_grp_ref_cnt == 0 &&
+ usnic_vnic_check_room(vnic, res_spec) == 0) {
+ found = 1;
+ break;
+ }
+ spin_unlock(&vf->lock);
+ }
+ }
+
+ if (!found) {
+ usnic_info("No free qp grp found on %s\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
+ trans_spec);
+ spin_unlock(&vf->lock);
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ usnic_err("Failed to allocate qp_grp\n");
+ return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
+ }
+
+ return qp_grp;
+}
+
+static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_vf *vf = qp_grp->vf;
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+
+ spin_lock(&vf->lock);
+ usnic_ib_qp_grp_destroy(qp_grp);
+ spin_unlock(&vf->lock);
+}
+
+static void eth_speed_to_ib_speed(int speed, u8 *active_speed,
+ u8 *active_width)
+{
+ if (speed <= 10000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_FDR10;
+ } else if (speed <= 20000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_DDR;
+ } else if (speed <= 30000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ } else if (speed <= 40000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR10;
+ } else {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ }
+}
+
+static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
+{
+ if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
+ cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Start of ib callback functions */
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ union ib_gid gid;
+ struct ethtool_drvinfo info;
+ struct ethtool_cmd cmd;
+ int qp_per_vf;
+
+ usnic_dbg("\n");
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+ memset(props, 0, sizeof(*props));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid.raw[0]);
+ memcpy(&props->sys_image_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
+ props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
+ props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
+ props->vendor_id = PCI_VENDOR_ID_CISCO;
+ props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
+ props->hw_ver = us_ibdev->pdev->subsystem_device;
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+ props->max_qp = qp_per_vf *
+ atomic_read(&us_ibdev->vf_cnt.refcount);
+ props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
+ atomic_read(&us_ibdev->vf_cnt.refcount);
+ props->max_pd = USNIC_UIOM_MAX_PD_CNT;
+ props->max_mr = USNIC_UIOM_MAX_MR_CNT;
+ props->local_ca_ack_delay = 0;
+ props->max_pkeys = 0;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ props->max_qp_rd_atom = 0;
+ props->max_qp_init_rd_atom = 0;
+ props->max_res_rd_atom = 0;
+ props->max_srq = 0;
+ props->max_srq_wr = 0;
+ props->max_srq_sge = 0;
+ props->max_fast_reg_page_list_len = 0;
+ props->max_mcast_grp = 0;
+ props->max_mcast_qp_attach = 0;
+ props->max_total_mcast_qp_attach = 0;
+ props->max_map_per_fmr = 0;
+ /* Owned by Userspace
+ * max_qp_wr, max_sge, max_sge_rd, max_cqe */
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ struct ethtool_cmd cmd;
+
+ usnic_dbg("\n");
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+ memset(props, 0, sizeof(*props));
+
+ props->lid = 0;
+ props->lmc = 1;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+
+ if (!us_ibdev->ufdev->link_up) {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+ } else if (!us_ibdev->ufdev->inaddr) {
+ props->state = IB_PORT_INIT;
+ props->phys_state = 4;
+ } else {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ props->port_cap_flags = 0;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->bad_pkey_cntr = 0;
+ props->qkey_viol_cntr = 0;
+ eth_speed_to_ib_speed(cmd.speed, &props->active_speed,
+ &props->active_width);
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
+ /* Userspace will adjust for hdrs */
+ props->max_msg_sz = us_ibdev->ufdev->mtu;
+ props->max_vl_num = 1;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ int err;
+
+ usnic_dbg("\n");
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ usnic_dbg("\n");
+ qp_attr->qp_state = qp_grp->state;
+ qp_attr->cur_qp_state = qp_grp->state;
+
+ switch (qp_grp->ibqp.qp_type) {
+ case IB_QPT_UD:
+ qp_attr->qkey = 0;
+ break;
+ default:
+ usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ mutex_unlock(&vf->pf->usdev_lock);
+ return 0;
+
+err_out:
+ mutex_unlock(&vf->pf->usdev_lock);
+ return err;
+}
+
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ if (index > 1)
+ return -EINVAL;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid->raw[0]);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ if (index > 1)
+ return -EINVAL;
+
+ *pkey = 0xffff;
+ return 0;
+}
+
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_pd *pd;
+ void *umem_pd;
+
+ usnic_dbg("\n");
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
+ if (IS_ERR_OR_NULL(umem_pd)) {
+ kfree(pd);
+ return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM);
+ }
+
+ usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
+ pd, context, ibdev->name);
+ return &pd->ibpd;
+}
+
+int usnic_ib_dealloc_pd(struct ib_pd *pd)
+{
+ usnic_info("freeing domain 0x%p\n", pd);
+
+ usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+ kfree(pd);
+ return 0;
+}
+
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int err;
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_ucontext *ucontext;
+ int cq_cnt;
+ struct usnic_vnic_res_spec res_spec;
+ struct usnic_ib_create_qp_cmd cmd;
+ struct usnic_transport_spec trans_spec;
+
+ usnic_dbg("\n");
+
+ ucontext = to_uucontext(pd->uobject->context);
+ us_ibdev = to_usdev(pd->device);
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
+ if (err) {
+ usnic_err("%s: cannot copy udata for create_qp\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = create_qp_validate_user_data(cmd);
+ if (err) {
+ usnic_err("%s: Failed to validate user data\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (init_attr->qp_type != IB_QPT_UD) {
+ usnic_err("%s asked to make a non-UD QP: %d\n",
+ us_ibdev->ib_dev.name, init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ trans_spec = cmd.spec;
+ mutex_lock(&us_ibdev->usdev_lock);
+ cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
+ res_spec = min_transport_spec[trans_spec.trans_type];
+ usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
+ qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd),
+ &trans_spec,
+ &res_spec);
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM;
+ goto out_release_mutex;
+ }
+
+ err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
+ if (err) {
+ err = -EBUSY;
+ goto out_release_qp_grp;
+ }
+
+ qp_grp->ctx = ucontext;
+ list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
+ usnic_ib_log_vf(qp_grp->vf);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return &qp_grp->ibqp;
+
+out_release_qp_grp:
+ qp_grp_destroy(qp_grp);
+out_release_mutex:
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
+ usnic_err("Failed to move qp grp %u to reset\n",
+ qp_grp->grp_id);
+ }
+
+ list_del(&qp_grp->link);
+ qp_grp_destroy(qp_grp);
+ mutex_unlock(&vf->pf->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int status;
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(ibqp);
+
+ /* TODO: Future Support All States */
+ mutex_lock(&qp_grp->vf->pf->usdev_lock);
+ if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
+ } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
+ } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+ } else {
+ usnic_err("Unexpected combination mask: %u state: %u\n",
+ attr_mask & IB_QP_STATE, attr->qp_state);
+ status = -EINVAL;
+ }
+
+ mutex_unlock(&qp_grp->vf->pf->usdev_lock);
+ return status;
+}
+
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct ib_cq *cq;
+
+ usnic_dbg("\n");
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-EBUSY);
+
+ return cq;
+}
+
+int usnic_ib_destroy_cq(struct ib_cq *cq)
+{
+ usnic_dbg("\n");
+ kfree(cq);
+ return 0;
+}
+
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_mr *mr;
+ int err;
+
+ usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
+ virt_addr, length);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (IS_ERR_OR_NULL(mr))
+ return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+
+ mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
+ access_flags, 0);
+ if (IS_ERR_OR_NULL(mr->umem)) {
+ err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
+ goto err_free;
+ }
+
+ mr->ibmr.lkey = mr->ibmr.rkey = 0;
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct usnic_ib_mr *mr = to_umr(ibmr);
+
+ usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
+
+ usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing);
+ kfree(mr);
+ return 0;
+}
+
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_ucontext *context;
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&context->qp_grp_list);
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_add_tail(&context->link, &us_ibdev->ctx_list);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return &context->ibucontext;
+}
+
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
+ usnic_dbg("\n");
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ BUG_ON(!list_empty(&context->qp_grp_list));
+ list_del(&context->link);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ kfree(context);
+ return 0;
+}
+
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ struct usnic_ib_ucontext *uctx = to_ucontext(context);
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ struct vnic_dev_bar *bar;
+ dma_addr_t bus_addr;
+ unsigned int len;
+ unsigned int vfid;
+
+ usnic_dbg("\n");
+
+ us_ibdev = to_usdev(context->device);
+ vma->vm_flags |= VM_IO;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vfid = vma->vm_pgoff;
+ usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
+ vma->vm_pgoff, PAGE_SHIFT, vfid);
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
+ vf = qp_grp->vf;
+ if (usnic_vnic_get_index(vf->vnic) == vfid) {
+ bar = usnic_vnic_get_bar(vf->vnic, 0);
+ if ((vma->vm_end - vma->vm_start) != bar->len) {
+ usnic_err("Bar0 Len %lu - Request map %lu\n",
+ bar->len,
+ vma->vm_end - vma->vm_start);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return -EINVAL;
+ }
+ bus_addr = bar->bus_addr;
+ len = bar->len;
+ usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
+ &bus_addr, bar->vaddr, bar->len);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return remap_pfn_range(vma,
+ vma->vm_start,
+ bus_addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+ }
+
+ mutex_unlock(&us_ibdev->usdev_lock);
+ usnic_err("No VF %u found\n", vfid);
+ return -EINVAL;
+}
+
+/* In ib callbacks section - Start of stub funcs */
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-EPERM);
+}
+
+int usnic_ib_destroy_ah(struct ib_ah *ah)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-ENOMEM);
+}
+
+
+/* In ib callbacks section - End of stub funcs */
+/* End of ib callbacks section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
new file mode 100644
index 00000000000..bb864f5aed7
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_VERBS_H_
+#define USNIC_IB_VERBS_H_
+
+#include "usnic_ib.h"
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num);
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props);
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid);
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey);
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd);
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int usnic_ib_destroy_qp(struct ib_qp *qp);
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq);
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int usnic_ib_dereg_mr(struct ib_mr *ibmr);
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma);
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr);
+int usnic_ib_destroy_ah(struct ib_ah *ah);
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags);
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc);
+#endif /* !USNIC_IB_VERBS_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_log.h b/drivers/infiniband/hw/usnic/usnic_log.h
new file mode 100644
index 00000000000..75777a66c68
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_log.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_LOG_H_
+#define USNIC_LOG_H_
+
+#include "usnic.h"
+
+extern unsigned int usnic_log_lvl;
+
+#define USNIC_LOG_LVL_NONE (0)
+#define USNIC_LOG_LVL_ERR (1)
+#define USNIC_LOG_LVL_INFO (2)
+#define USNIC_LOG_LVL_DBG (3)
+
+#define usnic_printk(lvl, args...) \
+ do { \
+ printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \
+ __LINE__); \
+ printk(args); \
+ } while (0)
+
+#define usnic_dbg(args...) \
+ do { \
+ if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_info(args...) \
+do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_err(args...) \
+ do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \
+ usnic_printk(KERN_ERR, args); \
+ } \
+ } while (0)
+#endif /* !USNIC_LOG_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
new file mode 100644
index 00000000000..ddef6f77a78
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bitmap.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/inet_sock.h>
+
+#include "usnic_transport.h"
+#include "usnic_log.h"
+
+/* ROCE */
+static unsigned long *roce_bitmap;
+static u16 roce_next_port = 1;
+#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/)
+static DEFINE_SPINLOCK(roce_bitmap_lock);
+
+const char *usnic_transport_to_str(enum usnic_transport_type type)
+{
+ switch (type) {
+ case USNIC_TRANSPORT_UNKNOWN:
+ return "Unknown";
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ return "roce custom";
+ case USNIC_TRANSPORT_IPV4_UDP:
+ return "IPv4 UDP";
+ case USNIC_TRANSPORT_MAX:
+ return "Max?";
+ default:
+ return "Not known";
+ }
+}
+
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock)
+{
+ int err;
+ uint32_t addr;
+ uint16_t port;
+ int proto;
+
+ memset(buf, 0, buf_sz);
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port);
+ if (err)
+ return 0;
+
+ return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu",
+ proto, &addr, port);
+}
+
+/*
+ * reserve a port number. if "0" specified, we will try to pick one
+ * starting at roce_next_port. roce_next_port will take on the values
+ * 1..4096
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ port_num = bitmap_find_next_zero_area(roce_bitmap,
+ ROCE_BITMAP_SZ,
+ roce_next_port /* start */,
+ 1 /* nr */,
+ 0 /* align */);
+ roce_next_port = (port_num & 4095) + 1;
+ } else if (test_bit(port_num, roce_bitmap)) {
+ usnic_err("Failed to allocate port for %s\n",
+ usnic_transport_to_str(type));
+ spin_unlock(&roce_bitmap_lock);
+ goto out_fail;
+ }
+ bitmap_set(roce_bitmap, port_num, 1);
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Failed to allocate port - transport %s unsupported\n",
+ usnic_transport_to_str(type));
+ goto out_fail;
+ }
+
+ usnic_dbg("Allocating port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+ return port_num;
+
+out_fail:
+ return 0;
+}
+
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ usnic_err("Unreserved unvalid port num 0 for %s\n",
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+
+ if (!test_bit(port_num, roce_bitmap)) {
+ usnic_err("Unreserving invalid %hu for %s\n",
+ port_num,
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+ bitmap_clear(roce_bitmap, port_num, 1);
+ usnic_dbg("Freeing port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+out_roce_custom:
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Freeing invalid port %hu for %d\n", port_num, type);
+ }
+}
+
+struct socket *usnic_transport_get_socket(int sock_fd)
+{
+ struct socket *sock;
+ int err;
+ char buf[25];
+
+ /* sockfd_lookup will internally do a fget */
+ sock = sockfd_lookup(sock_fd, &err);
+ if (!sock) {
+ usnic_err("Unable to lookup socket for fd %d with err %d\n",
+ sock_fd, err);
+ return ERR_PTR(-ENOENT);
+ }
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Get sock %s\n", buf);
+
+ return sock;
+}
+
+void usnic_transport_put_socket(struct socket *sock)
+{
+ char buf[100];
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Put sock %s\n", buf);
+ sockfd_put(sock);
+}
+
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port)
+{
+ int len;
+ int err;
+ struct sockaddr_in sock_addr;
+
+ err = sock->ops->getname(sock,
+ (struct sockaddr *)&sock_addr,
+ &len, 0);
+ if (err)
+ return err;
+
+ if (sock_addr.sin_family != AF_INET)
+ return -EINVAL;
+
+ if (proto)
+ *proto = sock->sk->sk_protocol;
+ if (port)
+ *port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port);
+ if (addr)
+ *addr = ntohl(((struct sockaddr_in *)
+ &sock_addr)->sin_addr.s_addr);
+
+ return 0;
+}
+
+int usnic_transport_init(void)
+{
+ roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL);
+ if (!roce_bitmap) {
+ usnic_err("Failed to allocate bit map");
+ return -ENOMEM;
+ }
+
+ /* Do not ever allocate bit 0, hence set it here */
+ bitmap_set(roce_bitmap, 0, 1);
+ return 0;
+}
+
+void usnic_transport_fini(void)
+{
+ kfree(roce_bitmap);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h
new file mode 100644
index 00000000000..7e5dc6d9f46
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_TRANSPORT_H_
+#define USNIC_TRANSPORT_H_
+
+#include "usnic_abi.h"
+
+const char *usnic_transport_to_str(enum usnic_transport_type trans_type);
+/*
+ * Returns number of bytes written, excluding null terminator. If
+ * nothing was written, the function returns 0.
+ */
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock);
+/*
+ * Reserve a port. If "port_num" is set, then the function will try
+ * to reserve that particular port.
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num);
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num);
+/*
+ * Do a fget on the socket refered to by sock_fd and returns the socket.
+ * Socket will not be destroyed before usnic_transport_put_socket has
+ * been called.
+ */
+struct socket *usnic_transport_get_socket(int sock_fd);
+void usnic_transport_put_socket(struct socket *sock);
+/*
+ * Call usnic_transport_get_socket before calling *_sock_get_addr
+ */
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port);
+int usnic_transport_init(void);
+void usnic_transport_fini(void);
+#endif /* !USNIC_TRANSPORT_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
new file mode 100644
index 00000000000..801a1d6937e
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/hugetlb.h>
+#include <linux/dma-attrs.h>
+#include <linux/iommu.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_uiom_interval_tree.h"
+
+static struct workqueue_struct *usnic_uiom_wq;
+
+#define USNIC_UIOM_PAGE_CHUNK \
+ ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\
+ ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
+ (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
+
+static void usnic_uiom_reg_account(struct work_struct *work)
+{
+ struct usnic_uiom_reg *umem = container_of(work,
+ struct usnic_uiom_reg, work);
+
+ down_write(&umem->mm->mmap_sem);
+ umem->mm->locked_vm -= umem->diff;
+ up_write(&umem->mm->mmap_sem);
+ mmput(umem->mm);
+ kfree(umem);
+}
+
+static int usnic_uiom_dma_fault(struct iommu_domain *domain,
+ struct device *dev,
+ unsigned long iova, int flags,
+ void *token)
+{
+ usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
+ dev_name(dev),
+ domain, iova, flags);
+ return -ENOSYS;
+}
+
+static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
+{
+ struct usnic_uiom_chunk *chunk, *tmp;
+ struct page *page;
+ struct scatterlist *sg;
+ int i;
+ dma_addr_t pa;
+
+ list_for_each_entry_safe(chunk, tmp, chunk_list, list) {
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ page = sg_page(sg);
+ pa = sg_phys(sg);
+ if (dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
+ usnic_dbg("pa: %pa\n", &pa);
+ }
+ kfree(chunk);
+ }
+}
+
+static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
+ int dmasync, struct list_head *chunk_list)
+{
+ struct page **page_list;
+ struct scatterlist *sg;
+ struct usnic_uiom_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret;
+ int off;
+ int i;
+ int flags;
+ dma_addr_t pa;
+ DEFINE_DMA_ATTRS(attrs);
+
+ if (dmasync)
+ dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+
+ if (!can_do_mlock())
+ return -EPERM;
+
+ INIT_LIST_HEAD(chunk_list);
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ locked = npages + current->mm->locked_vm;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ flags = IOMMU_READ | IOMMU_CACHE;
+ flags |= (writable) ? IOMMU_WRITE : 0;
+ cur_base = addr & PAGE_MASK;
+ ret = 0;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE / sizeof(struct page *)),
+ 1, !writable, page_list, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ npages -= ret;
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof(*chunk) +
+ sizeof(struct scatterlist) *
+ min_t(int, ret, USNIC_UIOM_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK);
+ sg_init_table(chunk->page_list, chunk->nents);
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ sg_set_page(sg, page_list[i + off],
+ PAGE_SIZE, 0);
+ pa = sg_phys(sg);
+ usnic_dbg("va: 0x%lx pa: %pa\n",
+ cur_base + i*PAGE_SIZE, &pa);
+ }
+ cur_base += chunk->nents * PAGE_SIZE;
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0)
+ usnic_uiom_put_pages(chunk_list, 0);
+ else
+ current->mm->locked_vm = locked;
+
+ up_write(&current->mm->mmap_sem);
+ free_page((unsigned long) page_list);
+ return ret;
+}
+
+static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ long unsigned va, size;
+
+ list_for_each_entry_safe(interval, tmp, intervals, link) {
+ va = interval->start << PAGE_SHIFT;
+ size = ((interval->last - interval->start) + 1) << PAGE_SHIFT;
+ while (size > 0) {
+ /* Workaround for RH 970401 */
+ usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE);
+ iommu_unmap(pd->domain, va, PAGE_SIZE);
+ va += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ }
+}
+
+static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd,
+ struct usnic_uiom_reg *uiomr,
+ int dirty)
+{
+ int npages;
+ unsigned long vpn_start, vpn_last;
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int writable = 0;
+ LIST_HEAD(rm_intervals);
+
+ npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+ vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ spin_lock(&pd->lock);
+ usnic_uiom_remove_interval(&pd->rb_root, vpn_start,
+ vpn_last, &rm_intervals);
+ usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd);
+
+ list_for_each_entry_safe(interval, tmp, &rm_intervals, link) {
+ if (interval->flags & IOMMU_WRITE)
+ writable = 1;
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable);
+ spin_unlock(&pd->lock);
+}
+
+static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_reg *uiomr)
+{
+ int i, err;
+ size_t size;
+ struct usnic_uiom_chunk *chunk;
+ struct usnic_uiom_interval_node *interval_node;
+ dma_addr_t pa;
+ dma_addr_t pa_start = 0;
+ dma_addr_t pa_end = 0;
+ long int va_start = -EINVAL;
+ struct usnic_uiom_pd *pd = uiomr->pd;
+ long int va = uiomr->va & PAGE_MASK;
+ int flags = IOMMU_READ | IOMMU_CACHE;
+
+ flags |= (uiomr->writable) ? IOMMU_WRITE : 0;
+ chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk,
+ list);
+ list_for_each_entry(interval_node, intervals, link) {
+iter_chunk:
+ for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) {
+ pa = sg_phys(&chunk->page_list[i]);
+ if ((va >> PAGE_SHIFT) < interval_node->start)
+ continue;
+
+ if ((va >> PAGE_SHIFT) == interval_node->start) {
+ /* First page of the interval */
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ WARN_ON(va_start == -EINVAL);
+
+ if ((pa_end + PAGE_SIZE != pa) &&
+ (pa != pa_start)) {
+ /* PAs are not contiguous */
+ size = pa_end - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ if ((va >> PAGE_SHIFT) == interval_node->last) {
+ /* Last page of the interval */
+ size = pa - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ break;
+ }
+
+ if (pa != pa_start)
+ pa_end += PAGE_SIZE;
+ }
+
+ if (i == chunk->nents) {
+ /*
+ * Hit last entry of the chunk,
+ * hence advance to next chunk
+ */
+ chunk = list_first_entry(&chunk->list,
+ struct usnic_uiom_chunk,
+ list);
+ goto iter_chunk;
+ }
+ }
+
+ return 0;
+
+err_out:
+ usnic_uiom_unmap_sorted_intervals(intervals, pd);
+ return err;
+}
+
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int writable, int dmasync)
+{
+ struct usnic_uiom_reg *uiomr;
+ unsigned long va_base, vpn_start, vpn_last;
+ unsigned long npages;
+ int offset, err;
+ LIST_HEAD(sorted_diff_intervals);
+
+ /*
+ * Intel IOMMU map throws an error if a translation entry is
+ * changed from read to write. This module may not unmap
+ * and then remap the entry after fixing the permission
+ * b/c this open up a small windows where hw DMA may page fault
+ * Hence, make all entries to be writable.
+ */
+ writable = 1;
+
+ va_base = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT;
+ vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL);
+ if (!uiomr)
+ return ERR_PTR(-ENOMEM);
+
+ uiomr->va = va_base;
+ uiomr->offset = offset;
+ uiomr->length = size;
+ uiomr->writable = writable;
+ uiomr->pd = pd;
+
+ err = usnic_uiom_get_pages(addr, size, writable, dmasync,
+ &uiomr->chunk_list);
+ if (err) {
+ usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_free_uiomr;
+ }
+
+ spin_lock(&pd->lock);
+ err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0,
+ IOMMU_WRITE,
+ &pd->rb_root,
+ &sorted_diff_intervals);
+ if (err) {
+ usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_pages;
+ }
+
+ err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr);
+ if (err) {
+ usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_intervals;
+
+ }
+
+ err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0);
+ if (err) {
+ usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_unmap_intervals;
+ }
+
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+ spin_unlock(&pd->lock);
+
+ return uiomr;
+
+out_unmap_intervals:
+ usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd);
+out_put_intervals:
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+out_put_pages:
+ usnic_uiom_put_pages(&uiomr->chunk_list, 0);
+ spin_unlock(&pd->lock);
+out_free_uiomr:
+ kfree(uiomr);
+ return ERR_PTR(err);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
+{
+ struct mm_struct *mm;
+ unsigned long diff;
+
+ __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ kfree(uiomr);
+ return;
+ }
+
+ diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. In that case
+ * we defer the vm_locked accounting to the system workqueue.
+ */
+ if (closing) {
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
+ uiomr->mm = mm;
+ uiomr->diff = diff;
+
+ queue_work(usnic_uiom_wq, &uiomr->work);
+ return;
+ }
+ } else
+ down_write(&mm->mmap_sem);
+
+ current->mm->locked_vm -= diff;
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ kfree(uiomr);
+}
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
+{
+ struct usnic_uiom_pd *pd;
+ void *domain;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
+ if (IS_ERR_OR_NULL(domain)) {
+ usnic_err("Failed to allocate IOMMU domain with err %ld\n",
+ PTR_ERR(pd->domain));
+ kfree(pd);
+ return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM);
+ }
+
+ iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
+
+ spin_lock_init(&pd->lock);
+ INIT_LIST_HEAD(&pd->devs);
+
+ return pd;
+}
+
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd)
+{
+ iommu_domain_free(pd->domain);
+ kfree(pd);
+}
+
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int err;
+
+ uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC);
+ if (!uiom_dev)
+ return -ENOMEM;
+ uiom_dev->dev = dev;
+
+ err = iommu_attach_device(pd->domain, dev);
+ if (err)
+ goto out_free_dev;
+
+ if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) {
+ usnic_err("IOMMU of %s does not support cache coherency\n",
+ dev_name(dev));
+ err = -EINVAL;
+ goto out_detach_device;
+ }
+
+ spin_lock(&pd->lock);
+ list_add_tail(&uiom_dev->link, &pd->devs);
+ pd->dev_cnt++;
+ spin_unlock(&pd->lock);
+
+ return 0;
+
+out_detach_device:
+ iommu_detach_device(pd->domain, dev);
+out_free_dev:
+ kfree(uiom_dev);
+ return err;
+}
+
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int found = 0;
+
+ spin_lock(&pd->lock);
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ if (uiom_dev->dev == dev) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ usnic_err("Unable to free dev %s - not found\n",
+ dev_name(dev));
+ spin_unlock(&pd->lock);
+ return;
+ }
+
+ list_del(&uiom_dev->link);
+ pd->dev_cnt--;
+ spin_unlock(&pd->lock);
+
+ return iommu_detach_device(pd->domain, dev);
+}
+
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ struct device **devs;
+ int i = 0;
+
+ spin_lock(&pd->lock);
+ devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC);
+ if (!devs) {
+ devs = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ devs[i++] = uiom_dev->dev;
+ }
+out:
+ spin_unlock(&pd->lock);
+ return devs;
+}
+
+void usnic_uiom_free_dev_list(struct device **devs)
+{
+ kfree(devs);
+}
+
+int usnic_uiom_init(char *drv_name)
+{
+ if (!iommu_present(&pci_bus_type)) {
+ usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
+ return -EPERM;
+ }
+
+ usnic_uiom_wq = create_workqueue(drv_name);
+ if (!usnic_uiom_wq) {
+ usnic_err("Unable to alloc wq for drv %s\n", drv_name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_uiom_fini(void)
+{
+ flush_workqueue(usnic_uiom_wq);
+ destroy_workqueue(usnic_uiom_wq);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
new file mode 100644
index 00000000000..70440996e8f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_H_
+#define USNIC_UIOM_H_
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+#include "usnic_uiom_interval_tree.h"
+
+#define USNIC_UIOM_READ (1)
+#define USNIC_UIOM_WRITE (2)
+
+#define USNIC_UIOM_MAX_PD_CNT (1000)
+#define USNIC_UIOM_MAX_MR_CNT (1000000)
+#define USNIC_UIOM_MAX_MR_SIZE (~0UL)
+#define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE)
+
+struct usnic_uiom_dev {
+ struct device *dev;
+ struct list_head link;
+};
+
+struct usnic_uiom_pd {
+ struct iommu_domain *domain;
+ spinlock_t lock;
+ struct rb_root rb_root;
+ struct list_head devs;
+ int dev_cnt;
+};
+
+struct usnic_uiom_reg {
+ struct usnic_uiom_pd *pd;
+ unsigned long va;
+ size_t length;
+ int offset;
+ int page_size;
+ int writable;
+ struct list_head chunk_list;
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long diff;
+};
+
+struct usnic_uiom_chunk {
+ struct list_head list;
+ int nents;
+ struct scatterlist page_list[0];
+};
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
+ struct device *dev);
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd);
+void usnic_uiom_free_dev_list(struct device **devs);
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int access, int dmasync);
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing);
+int usnic_uiom_init(char *drv_name);
+void usnic_uiom_fini(void);
+#endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
new file mode 100644
index 00000000000..3a4288e0fba
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/list_sort.h>
+
+#include <linux/interval_tree_generic.h>
+#include "usnic_uiom_interval_tree.h"
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \
+ do { \
+ node = usnic_uiom_interval_node_alloc(start, \
+ end, ref_cnt, flags); \
+ if (!node) { \
+ err = -ENOMEM; \
+ goto err_out; \
+ } \
+ } while (0)
+
+#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list))
+
+#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \
+ err_out, list) \
+ do { \
+ MAKE_NODE(node, start, end, \
+ ref_cnt, flags, err, \
+ err_out); \
+ MARK_FOR_ADD(node, list); \
+ } while (0)
+
+#define FLAGS_EQUAL(flags1, flags2, mask) \
+ (((flags1) & (mask)) == ((flags2) & (mask)))
+
+static struct usnic_uiom_interval_node*
+usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt,
+ int flags)
+{
+ struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval),
+ GFP_ATOMIC);
+ if (!interval)
+ return NULL;
+
+ interval->start = start;
+ interval->last = last;
+ interval->flags = flags;
+ interval->ref_cnt = ref_cnt;
+
+ return interval;
+}
+
+static int interval_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct usnic_uiom_interval_node *node_a, *node_b;
+
+ node_a = list_entry(a, struct usnic_uiom_interval_node, link);
+ node_b = list_entry(b, struct usnic_uiom_interval_node, link);
+
+ /* long to int */
+ if (node_a->start < node_b->start)
+ return -1;
+ else if (node_a->start > node_b->start)
+ return 1;
+
+ return 0;
+}
+
+static void
+find_intervals_intersection_sorted(struct rb_root *root, unsigned long start,
+ unsigned long last,
+ struct list_head *list)
+{
+ struct usnic_uiom_interval_node *node;
+
+ INIT_LIST_HEAD(list);
+
+ for (node = usnic_uiom_interval_tree_iter_first(root, start, last);
+ node;
+ node = usnic_uiom_interval_tree_iter_next(node, start, last))
+ list_add_tail(&node->link, list);
+
+ list_sort(NULL, list, interval_cmp);
+}
+
+int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last,
+ int flags, int flag_mask,
+ struct rb_root *root,
+ struct list_head *diff_set)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int err = 0;
+ long int pivot = start;
+ LIST_HEAD(intersection_set);
+
+ INIT_LIST_HEAD(diff_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ if (pivot < interval->start) {
+ MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1,
+ 1, flags, err, err_out,
+ diff_set);
+ pivot = interval->start;
+ }
+
+ /*
+ * Invariant: Set [start, pivot] is either in diff_set or root,
+ * but not in both.
+ */
+
+ if (pivot > interval->last) {
+ continue;
+ } else if (pivot <= interval->last &&
+ FLAGS_EQUAL(interval->flags, flags,
+ flag_mask)) {
+ pivot = interval->last + 1;
+ }
+ }
+
+ if (pivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out,
+ diff_set);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, diff_set, link) {
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ return err;
+}
+
+void usnic_uiom_put_interval_set(struct list_head *intervals)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ list_for_each_entry_safe(interval, tmp, intervals, link)
+ kfree(interval);
+}
+
+int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start,
+ unsigned long last, int flags)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ unsigned long istart, ilast;
+ int iref_cnt, iflags;
+ unsigned long lpivot = start;
+ int err = 0;
+ LIST_HEAD(to_add);
+ LIST_HEAD(intersection_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ /*
+ * Invariant - lpivot is the left edge of next interval to be
+ * inserted
+ */
+ istart = interval->start;
+ ilast = interval->last;
+ iref_cnt = interval->ref_cnt;
+ iflags = interval->flags;
+
+ if (istart < lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else if (istart > lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags,
+ err, err_out, &to_add);
+ lpivot = istart;
+ } else {
+ lpivot = istart;
+ }
+
+ if (ilast > last) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ }
+
+ lpivot = ilast + 1;
+ }
+
+ if (lpivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out,
+ &to_add);
+
+ list_for_each_entry_safe(interval, tmp, &intersection_set, link) {
+ usnic_uiom_interval_tree_remove(interval, root);
+ kfree(interval);
+ }
+
+ list_for_each_entry(interval, &to_add, link)
+ usnic_uiom_interval_tree_insert(interval, root);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, &to_add, link)
+ kfree(interval);
+
+ return err;
+}
+
+void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start,
+ unsigned long last, struct list_head *removed)
+{
+ struct usnic_uiom_interval_node *interval;
+
+ for (interval = usnic_uiom_interval_tree_iter_first(root, start, last);
+ interval;
+ interval = usnic_uiom_interval_tree_iter_next(interval,
+ start,
+ last)) {
+ if (--interval->ref_cnt == 0)
+ list_add_tail(&interval->link, removed);
+ }
+
+ list_for_each_entry(interval, removed, link)
+ usnic_uiom_interval_tree_remove(interval, root);
+}
+
+INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb,
+ unsigned long, __subtree_last,
+ START, LAST, , usnic_uiom_interval_tree)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
new file mode 100644
index 00000000000..d4f752e258f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_INTERVAL_TREE_H_
+#define USNIC_UIOM_INTERVAL_TREE_H_
+
+#include <linux/rbtree.h>
+
+struct usnic_uiom_interval_node {
+ struct rb_node rb;
+ struct list_head link;
+ unsigned long start;
+ unsigned long last;
+ unsigned long __subtree_last;
+ unsigned int ref_cnt;
+ int flags;
+};
+
+extern void
+usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node,
+ struct rb_root *root);
+extern void
+usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node,
+ struct rb_root *root);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_first(struct rb_root *root,
+ unsigned long start,
+ unsigned long last);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node,
+ unsigned long start, unsigned long last);
+/*
+ * Inserts {start...last} into {root}. If there are overlaps,
+ * nodes will be broken up and merged
+ */
+int usnic_uiom_insert_interval(struct rb_root *root,
+ unsigned long start, unsigned long last,
+ int flags);
+/*
+ * Removed {start...last} from {root}. The nodes removed are returned in
+ * 'removed.' The caller is responsibile for freeing memory of nodes in
+ * 'removed.'
+ */
+void usnic_uiom_remove_interval(struct rb_root *root,
+ unsigned long start, unsigned long last,
+ struct list_head *removed);
+/*
+ * Returns {start...last} - {root} (relative complement of {start...last} in
+ * {root}) in diff_set sorted ascendingly
+ */
+int usnic_uiom_get_intervals_diff(unsigned long start,
+ unsigned long last, int flags,
+ int flag_mask,
+ struct rb_root *root,
+ struct list_head *diff_set);
+/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */
+void usnic_uiom_put_interval_set(struct list_head *intervals);
+#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
new file mode 100644
index 00000000000..656b88c39ed
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "usnic_ib.h"
+#include "vnic_resource.h"
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+
+struct usnic_vnic {
+ struct vnic_dev *vdev;
+ struct vnic_dev_bar bar[PCI_NUM_RESOURCES];
+ struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX];
+ spinlock_t res_lock;
+};
+
+static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ vnic_res_type,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ vnic_res_type,
+ static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return RES_TYPE_MAX;
+
+ return usnic_vnic_type_2_vnic_type[res_type];
+}
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ desc,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ desc,
+ static const char * const usnic_vnic_res_type_desc[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return "unknown";
+
+ return usnic_vnic_res_type_desc[res_type];
+
+}
+
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic)
+{
+ return pci_name(usnic_vnic_get_pdev(vnic));
+}
+
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf,
+ int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int))
+{
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_vnic_res *res;
+ struct vnic_dev_bar *bar0;
+ int i, j, offset;
+
+ offset = 0;
+ bar0 = usnic_vnic_get_bar(vnic, 0);
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ",
+ usnic_vnic_get_index(vnic),
+ &bar0->bus_addr,
+ bar0->vaddr, bar0->len);
+ if (printtitle)
+ offset += printtitle(hdr_obj, buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|RES\t|CTRL_PIN\t\t|IN_USE\t");
+ if (printcols)
+ offset += printcols(buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+
+ spin_lock(&vnic->res_lock);
+ for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) {
+ chunk = &vnic->chunks[i];
+ for (j = 0; j < chunk->cnt; j++) {
+ res = chunk->res[j];
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|%s[%u]\t|0x%p\t|%u\t",
+ usnic_vnic_res_type_to_str(res->type),
+ res->vnic_idx, res->ctrl, !!res->owner);
+ if (printrow) {
+ offset += printrow(res->owner, buf + offset,
+ buf_sz - offset);
+ }
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "\n");
+ }
+ }
+ spin_unlock(&vnic->res_lock);
+ return offset;
+}
+
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt)
+{
+ int i;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ if (spec->resources[i].type == trgt_type) {
+ spec->resources[i].cnt = cnt;
+ return;
+ }
+ }
+
+ WARN_ON(1);
+}
+
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int found, i, j;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ found = 0;
+
+ for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) {
+ if (res_spec->resources[i].type !=
+ min_spec->resources[i].type)
+ continue;
+ found = 1;
+ if (min_spec->resources[i].cnt >
+ res_spec->resources[i].cnt)
+ return -EINVAL;
+ break;
+ }
+
+ if (!found)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+ int i;
+ int offset = 0;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "Res: %s Cnt: %d ",
+ usnic_vnic_res_type_to_str(res_type),
+ res_cnt);
+ }
+
+ return offset;
+}
+
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int i;
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ if (res_type == USNIC_VNIC_RES_TYPE_EOL)
+ break;
+
+ if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].cnt;
+}
+
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].free_cnt;
+}
+
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
+ int cnt, void *owner)
+{
+ struct usnic_vnic_res_chunk *src, *ret;
+ struct usnic_vnic_res *res;
+ int i;
+
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+ return ERR_PTR(-EINVAL);
+
+ ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
+ if (!ret) {
+ usnic_err("Failed to allocate chunk for %s - Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
+ if (!ret->res) {
+ usnic_err("Failed to allocate resources for %s. Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
+ }
+
+ spin_unlock(&vnic->res_lock);
+ ret->type = type;
+ ret->vnic = vnic;
+ WARN_ON(ret->cnt != cnt);
+
+ return ret;
+}
+
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
+{
+
+ struct usnic_vnic_res *res;
+ int i;
+ struct usnic_vnic *vnic = chunk->vnic;
+
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
+
+ kfree(chunk->res);
+ kfree(chunk);
+}
+
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic)
+{
+ return usnic_vnic_get_pdev(vnic)->devfn - 1;
+}
+
+static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ struct usnic_vnic_res_chunk *chunk)
+{
+ int cnt, err, i;
+ struct usnic_vnic_res *res;
+
+ cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
+ if (cnt < 1)
+ return -EINVAL;
+
+ chunk->cnt = chunk->free_cnt = cnt;
+ chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
+ if (!chunk->res)
+ return -ENOMEM;
+
+ for (i = 0; i < cnt; i++) {
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ res->type = type;
+ res->vnic_idx = i;
+ res->vnic = vnic;
+ res->ctrl = vnic_dev_get_res(vnic->vdev,
+ _to_vnic_res_type(type), i);
+ chunk->res[i] = res;
+ }
+
+ chunk->vnic = vnic;
+ return 0;
+fail:
+ for (i--; i >= 0; i--)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+ return err;
+}
+
+static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk)
+{
+ int i;
+ for (i = 0; i < chunk->cnt; i++)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+}
+
+static int usnic_vnic_discover_resources(struct pci_dev *pdev,
+ struct usnic_vnic *vnic)
+{
+ enum usnic_vnic_res_type res_type;
+ int i;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ vnic->bar[i].len = pci_resource_len(pdev, i);
+ vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len);
+ if (!vnic->bar[i].vaddr) {
+ usnic_err("Cannot memory-map BAR %d, aborting\n",
+ i);
+ err = -ENODEV;
+ goto out_clean_bar;
+ }
+ vnic->bar[i].bus_addr = pci_resource_start(pdev, i);
+ }
+
+ vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar,
+ ARRAY_SIZE(vnic->bar));
+ if (!vnic->vdev) {
+ usnic_err("Failed to register device %s\n",
+ pci_name(pdev));
+ err = -EINVAL;
+ goto out_clean_bar;
+ }
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
+ err = usnic_vnic_alloc_res_chunk(vnic, res_type,
+ &vnic->chunks[res_type]);
+ if (err) {
+ usnic_err("Failed to alloc res %s with err %d\n",
+ usnic_vnic_res_type_to_str(res_type),
+ err);
+ goto out_clean_chunks;
+ }
+ }
+
+ return 0;
+
+out_clean_chunks:
+ for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+ vnic_dev_unregister(vnic->vdev);
+out_clean_bar:
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ if (!vnic->bar[i].vaddr)
+ break;
+
+ iounmap(vnic->bar[i].vaddr);
+ }
+
+ return err;
+}
+
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic)
+{
+ return vnic_dev_get_pdev(vnic->vdev);
+}
+
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num)
+{
+ return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL;
+}
+
+static void usnic_vnic_release_resources(struct usnic_vnic *vnic)
+{
+ int i;
+ struct pci_dev *pdev;
+ enum usnic_vnic_res_type res_type;
+
+ pdev = usnic_vnic_get_pdev(vnic);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+
+ vnic_dev_unregister(vnic->vdev);
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ iounmap(vnic->bar[i].vaddr);
+ }
+}
+
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev)
+{
+ struct usnic_vnic *vnic;
+ int err = 0;
+
+ if (!pci_is_enabled(pdev)) {
+ usnic_err("PCI dev %s is disabled\n", pci_name(pdev));
+ return ERR_PTR(-EINVAL);
+ }
+
+ vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
+ if (!vnic) {
+ usnic_err("Failed to alloc vnic for %s - out of memory\n",
+ pci_name(pdev));
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock_init(&vnic->res_lock);
+
+ err = usnic_vnic_discover_resources(pdev, vnic);
+ if (err) {
+ usnic_err("Failed to discover %s resources with err %d\n",
+ pci_name(pdev), err);
+ goto out_free_vnic;
+ }
+
+ usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic));
+
+ return vnic;
+
+out_free_vnic:
+ kfree(vnic);
+
+ return ERR_PTR(err);
+}
+
+void usnic_vnic_free(struct usnic_vnic *vnic)
+{
+ usnic_vnic_release_resources(vnic);
+ kfree(vnic);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.h b/drivers/infiniband/hw/usnic/usnic_vnic.h
new file mode 100644
index 00000000000..14d931a8829
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_VNIC_H_
+#define USNIC_VNIC_H_
+
+#include <linux/pci.h>
+
+#include "vnic_dev.h"
+
+/* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */
+#define USNIC_VNIC_RES_TYPES \
+ DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \
+ DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \
+ DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \
+ DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \
+ DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \
+ DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\
+
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t,
+enum usnic_vnic_res_type {
+ USNIC_VNIC_RES_TYPES
+};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+struct usnic_vnic_res {
+ enum usnic_vnic_res_type type;
+ unsigned int vnic_idx;
+ struct usnic_vnic *vnic;
+ void __iomem *ctrl;
+ void *owner;
+};
+
+struct usnic_vnic_res_chunk {
+ enum usnic_vnic_res_type type;
+ int cnt;
+ int free_cnt;
+ struct usnic_vnic_res **res;
+ struct usnic_vnic *vnic;
+};
+
+struct usnic_vnic_res_desc {
+ enum usnic_vnic_res_type type;
+ uint16_t cnt;
+};
+
+struct usnic_vnic_res_spec {
+ struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX];
+};
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type);
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic);
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int));
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt);
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ int cnt,
+ void *owner);
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk);
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic);
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num);
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev);
+void usnic_vnic_free(struct usnic_vnic *vnic);
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic);
+
+#endif /*!USNIC_VNIC_H_*/