From 47d06e532e95b71c0db3839ebdef3fe8812fca2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 10 Sep 2013 10:52:35 -0400 Subject: random: run random_int_secret_init() run after all late_initcalls The some platforms (e.g., ARM) initializes their clocks as late_initcalls for some unknown reason. So make sure random_int_secret_init() is run after all of the late_initcalls are run. Cc: stable@vger.kernel.org Signed-off-by: "Theodore Ts'o" --- include/linux/random.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index 3b9377d6b7a..6312dd9ba44 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -17,6 +17,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); +extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; -- cgit v1.2.3-70-g09d2 From 82aeef0bf03684b377678c00c05e613f30dca39c Mon Sep 17 00:00:00 2001 From: "Li, Zhen-Hua" Date: Fri, 13 Sep 2013 14:27:32 +0800 Subject: x86/iommu: correct ICS register offset According to Intel Vt-D specs, the offset of Invalidation complete status register should be 0x9C, not 0x98. See Intel's VT-d spec, Revision 1.3, Chapter 10.4, Page 98; Signed-off-by: Li, Zhen-Hua Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 78e2ada50cd..d380c5e6800 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -55,7 +55,7 @@ #define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */ #define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ -#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */ +#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) -- cgit v1.2.3-70-g09d2 From 19872d20c890073c5207d9e02bb8f14d451a11eb Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 9 Sep 2013 18:33:54 +0200 Subject: HID: uhid: allocate static minor udev has this nice feature of creating "dead" /dev/ device-nodes if it finds a devnode: modalias. Once the node is accessed, the kernel automatically loads the module that provides the node. However, this requires udev to know the major:minor code to use for the node. This feature was introduced by: commit 578454ff7eab61d13a26b568f99a89a2c9edc881 Author: Kay Sievers Date: Thu May 20 18:07:20 2010 +0200 driver core: add devname module aliases to allow module on-demand auto-loading However, uhid uses dynamic minor numbers so this doesn't actually work. We need to load uhid to know which minor it's going to use. Hence, allocate a static minor (just like uinput does) and we're good to go. Reported-by: Tom Gundersen Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/uhid.c | 3 ++- include/linux/miscdevice.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 5bf2fb78584..93b00d76374 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -615,7 +615,7 @@ static const struct file_operations uhid_fops = { static struct miscdevice uhid_misc = { .fops = &uhid_fops, - .minor = MISC_DYNAMIC_MINOR, + .minor = UHID_MINOR, .name = UHID_NAME, }; @@ -634,4 +634,5 @@ module_exit(uhid_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Herrmann "); MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem"); +MODULE_ALIAS_MISCDEV(UHID_MINOR); MODULE_ALIAS("devname:" UHID_NAME); diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 09c2300ddb3..cb358355ef4 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -45,6 +45,7 @@ #define MAPPER_CTRL_MINOR 236 #define LOOP_CTRL_MINOR 237 #define VHOST_NET_MINOR 238 +#define UHID_MINOR 239 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.3-70-g09d2 From 26794942461f438a6bc725ec7294b08a6bd782c4 Mon Sep 17 00:00:00 2001 From: David Miller Date: Wed, 2 Oct 2013 14:25:09 -0400 Subject: mm: Fix generic hugetlb pte check return type. The include/asm-generic/hugetlb.h stubs that just vector huge_pte_*() calls to the pte_*() implementations won't work in certain situations. x86 and sparc, for example, return "unsigned long" from the bit checks, and just go "return pte_val(pte) & PTE_BIT_FOO;" But since huge_pte_*() returns 'int', if any high bits on 64-bit are relevant, they get chopped off. The net effect is that we can loop forever trying to COW a huge page, because the huge_pte_write() check signals false all the time. Reported-by: Gurudas Pai Tested-by: Gurudas Pai Signed-off-by: David S. Miller Acked-by: David Rientjes --- include/asm-generic/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index d06079c774a..99b490b4d05 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -6,12 +6,12 @@ static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) return mk_pte(page, pgprot); } -static inline int huge_pte_write(pte_t pte) +static inline unsigned long huge_pte_write(pte_t pte) { return pte_write(pte); } -static inline int huge_pte_dirty(pte_t pte) +static inline unsigned long huge_pte_dirty(pte_t pte) { return pte_dirty(pte); } -- cgit v1.2.3-70-g09d2 From 9886167d20c0720dcfb01e62cdff4d906b226f43 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Oct 2013 16:02:23 +0200 Subject: perf: Fix perf_pmu_migrate_context While auditing the list_entry usage due to a trinity bug I found that perf_pmu_migrate_context violates the rules for perf_event::event_entry. The problem is that perf_event::event_entry is a RCU list element, and hence we must wait for a full RCU grace period before re-using the element after deletion. Therefore the usage in perf_pmu_migrate_context() which re-uses the entry immediately is broken. For now introduce another list_head into perf_event for this specific usage. This doesn't actually fix the trinity report because that never goes through this code. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-mkj72lxagw1z8fvjm648iznw@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 24 +++++++++++++++++++++++- kernel/events/core.c | 6 +++--- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 866e85c5eb9..c8ba627c1d6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -294,9 +294,31 @@ struct ring_buffer; */ struct perf_event { #ifdef CONFIG_PERF_EVENTS - struct list_head group_entry; + /* + * entry onto perf_event_context::event_list; + * modifications require ctx->lock + * RCU safe iterations. + */ struct list_head event_entry; + + /* + * XXX: group_entry and sibling_list should be mutually exclusive; + * either you're a sibling on a group, or you're the group leader. + * Rework the code to always use the same list element. + * + * Locked for modification by both ctx->mutex and ctx->lock; holding + * either sufficies for read. + */ + struct list_head group_entry; struct list_head sibling_list; + + /* + * We need storage to track the entries in perf_pmu_migrate_context; we + * cannot use the event_entry because of RCU and we want to keep the + * group in tact which avoids us using the other two entries. + */ + struct list_head migrate_entry; + struct hlist_node hlist_entry; int nr_siblings; int group_flags; diff --git a/kernel/events/core.c b/kernel/events/core.c index cb4238e85b3..d49a9d29334 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7234,15 +7234,15 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) perf_remove_from_context(event); unaccount_event_cpu(event, src_cpu); put_ctx(src_ctx); - list_add(&event->event_entry, &events); + list_add(&event->migrate_entry, &events); } mutex_unlock(&src_ctx->mutex); synchronize_rcu(); mutex_lock(&dst_ctx->mutex); - list_for_each_entry_safe(event, tmp, &events, event_entry) { - list_del(&event->event_entry); + list_for_each_entry_safe(event, tmp, &events, migrate_entry) { + list_del(&event->migrate_entry); if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; account_event_cpu(event, dst_cpu); -- cgit v1.2.3-70-g09d2 From d623a0e19dcbc4e44a8db047158815d7f8c2b839 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Oct 2013 10:22:01 -0700 Subject: ARM: dts: Fix pinctrl mask for omap3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wake-up interrupt bit is available on omap3/4/5 processors unlike what we claim. Without fixing it we cannot use it on omap3 and the system configured for wake-up events will just hang on wake-up. Cc: Grygorii Strashko Cc: Benoît Cousson Cc: devicetree@vger.kernel.org Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3.dtsi | 4 ++-- arch/arm/mach-omap2/mux.h | 4 +--- include/dt-bindings/pinctrl/omap.h | 4 +--- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 7d95cda1fae..b41bd57f432 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -108,7 +108,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; omap3_pmx_wkup: pinmux@0x48002a00 { @@ -117,7 +117,7 @@ #address-cells = <1>; #size-cells = <0>; pinctrl-single,register-width = <16>; - pinctrl-single,function-mask = <0x7f1f>; + pinctrl-single,function-mask = <0xff1f>; }; gpio1: gpio@48310000 { diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h index 5d2080ef792..16f78a990d0 100644 --- a/arch/arm/mach-omap2/mux.h +++ b/arch/arm/mach-omap2/mux.h @@ -28,7 +28,7 @@ #define OMAP_PULL_UP (1 << 4) #define OMAP_ALTELECTRICALSEL (1 << 5) -/* 34xx specific mux bit defines */ +/* omap3/4/5 specific mux bit defines */ #define OMAP_INPUT_EN (1 << 8) #define OMAP_OFF_EN (1 << 9) #define OMAP_OFFOUT_EN (1 << 10) @@ -36,8 +36,6 @@ #define OMAP_OFF_PULL_EN (1 << 12) #define OMAP_OFF_PULL_UP (1 << 13) #define OMAP_WAKEUP_EN (1 << 14) - -/* 44xx specific mux bit defines */ #define OMAP_WAKEUP_EVENT (1 << 15) /* Active pin states */ diff --git a/include/dt-bindings/pinctrl/omap.h b/include/dt-bindings/pinctrl/omap.h index edbd250809c..bed35e36fd2 100644 --- a/include/dt-bindings/pinctrl/omap.h +++ b/include/dt-bindings/pinctrl/omap.h @@ -23,7 +23,7 @@ #define PULL_UP (1 << 4) #define ALTELECTRICALSEL (1 << 5) -/* 34xx specific mux bit defines */ +/* omap3/4/5 specific mux bit defines */ #define INPUT_EN (1 << 8) #define OFF_EN (1 << 9) #define OFFOUT_EN (1 << 10) @@ -31,8 +31,6 @@ #define OFF_PULL_EN (1 << 12) #define OFF_PULL_UP (1 << 13) #define WAKEUP_EN (1 << 14) - -/* 44xx specific mux bit defines */ #define WAKEUP_EVENT (1 << 15) /* Active pin states */ -- cgit v1.2.3-70-g09d2 From c1868b822515313c72445e70b7d9e47d8815bc52 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 11 Sep 2013 16:35:25 +0300 Subject: mlx5: Remove checksum on command interface commands Checksum calculations consume CPU resources and can be significant to the rate of resource creation/destruction. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 26 ++++++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/main.c | 21 +++++---------------- include/linux/mlx5/device.h | 2 +- include/linux/mlx5/driver.h | 4 +--- 4 files changed, 21 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 5472cbd3402..db3a6584939 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -180,28 +180,32 @@ static int verify_block_sig(struct mlx5_cmd_prot_block *block) return 0; } -static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token) +static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, + int csum) { block->token = token; - block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2); - block->sig = ~xor8_buf(block, sizeof(*block) - 1); + if (csum) { + block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - + sizeof(block->data) - 2); + block->sig = ~xor8_buf(block, sizeof(*block) - 1); + } } -static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token) +static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) { struct mlx5_cmd_mailbox *next = msg->next; while (next) { - calc_block_sig(next->buf, token); + calc_block_sig(next->buf, token, csum); next = next->next; } } -static void set_signature(struct mlx5_cmd_work_ent *ent) +static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) { ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); - calc_chain_sig(ent->in, ent->token); - calc_chain_sig(ent->out, ent->token); + calc_chain_sig(ent->in, ent->token, csum); + calc_chain_sig(ent->out, ent->token, csum); } static void poll_timeout(struct mlx5_cmd_work_ent *ent) @@ -539,8 +543,7 @@ static void cmd_work_handler(struct work_struct *work) lay->type = MLX5_PCI_CMD_XPORT; lay->token = ent->token; lay->status_own = CMD_OWNER_HW; - if (!cmd->checksum_disabled) - set_signature(ent); + set_signature(ent, !cmd->checksum_disabled); dump_command(dev, ent, 1); ktime_get_ts(&ent->ts1); @@ -773,8 +776,6 @@ static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE); block = next->buf; - if (xor8_buf(block, sizeof(*block)) != 0xff) - return -EINVAL; memcpy(to, block->data, copy); to += copy; @@ -1361,6 +1362,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_map; } + cmd->checksum_disabled = 1; cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; cmd->bitmask = (1 << cmd->max_reg_cmds) - 1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b47739b0b5f..bc0f5fb66e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -165,9 +165,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL; struct mlx5_cmd_query_hca_cap_mbox_in query_ctx; struct mlx5_cmd_set_hca_cap_mbox_out set_out; - struct mlx5_profile *prof = dev->profile; u64 flags; - int csum = 1; int err; memset(&query_ctx, 0, sizeof(query_ctx)); @@ -197,20 +195,14 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) memcpy(&set_ctx->hca_cap, &query_out->hca_cap, sizeof(set_ctx->hca_cap)); - if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) { - csum = !!prof->cmdif_csum; - flags = be64_to_cpu(set_ctx->hca_cap.flags); - if (csum) - flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - else - flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; - - set_ctx->hca_cap.flags = cpu_to_be64(flags); - } - if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE) set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp; + flags = be64_to_cpu(query_out->hca_cap.flags); + /* disable checksum */ + flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM; + + set_ctx->hca_cap.flags = cpu_to_be64(flags); memset(&set_out, 0, sizeof(set_out)); set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12); set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP); @@ -225,9 +217,6 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) if (err) goto query_ex; - if (!csum) - dev->cmd.checksum_disabled = 1; - query_ex: kfree(query_out); kfree(set_ctx); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 68029b30c3d..770e3b448b3 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -181,7 +181,7 @@ enum { MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_DCT = 1LL << 41, - MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 1LL << 46, + MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; enum { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8888381fc15..2cfc4309d45 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -747,8 +747,7 @@ static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) enum { MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, - MLX5_PROF_MASK_CMDIF_CSUM = (u64)1 << 1, - MLX5_PROF_MASK_MR_CACHE = (u64)1 << 2, + MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, }; enum { @@ -758,7 +757,6 @@ enum { struct mlx5_profile { u64 mask; u32 log_max_qp; - int cmdif_csum; struct { int size; int limit; -- cgit v1.2.3-70-g09d2 From 2f6daec14d02deb84e7896a93196d78fbe9956a2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 11 Sep 2013 16:35:29 +0300 Subject: mlx5: Fix layout of struct mlx5_init_seg The layout of struct health_buffer was not according to firmware specification. Fix it to comply. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 770e3b448b3..5eb4e31af22 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -417,7 +417,7 @@ struct mlx5_init_seg { struct health_buffer health; __be32 rsvd2[884]; __be32 health_counter; - __be32 rsvd3[1023]; + __be32 rsvd3[1019]; __be64 ieee1588_clk; __be32 ieee1588_clk_type; __be32 clr_intx; -- cgit v1.2.3-70-g09d2 From ada9f5d007971a71d619e2abf66ebd3a9a399413 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Sep 2013 16:35:34 +0300 Subject: IB/mlx5: Fix eq names to display nicely in /proc/interrupts It's helpful for a driver to put the pci slot name in its interrupt names, so /proc/interrupts will show the pci slot of the device. Signed-off-by: Sagi Grimberg Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 +++- include/linux/mlx5/driver.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b267c65261c..b1a6cb3a280 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) static int alloc_comp_eqs(struct mlx5_ib_dev *dev) { struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + char name[MLX5_MAX_EQ_NAME]; struct mlx5_eq *eq, *n; int ncomp_vec; int nent; @@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev) goto clean; } - snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(&dev->mdev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - eq->name, - &dev->mdev.priv.uuari.uars[0]); + name, &dev->mdev.priv.uuari.uars[0]); if (err) { kfree(eq); goto clean; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 443cc4d7b02..2231d93cc7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -366,9 +366,11 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, goto err_in; } + snprintf(eq->name, MLX5_MAX_EQ_NAME, "%s@pci:%s", + name, pci_name(dev->pdev)); eq->eqn = out.eq_number; err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0, - name, eq); + eq->name, eq); if (err) goto err_eq; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2cfc4309d45..6b8c496572c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -82,7 +82,7 @@ enum { }; enum { - MLX5_MAX_EQ_NAME = 20 + MLX5_MAX_EQ_NAME = 32 }; enum { -- cgit v1.2.3-70-g09d2 From 61875f30daf60305712e25b209ef41ced2635bad Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 21 Sep 2013 13:58:22 -0400 Subject: random: allow architectures to optionally define random_get_entropy() Allow architectures which have a disabled get_cycles() function to provide a random_get_entropy() function which provides a fine-grained, rapidly changing counter that can be used by the /dev/random driver. For example, an architecture might have a rapidly changing register used to control random TLB cache eviction, or DRAM refresh that doesn't meet the requirements of get_cycles(), but which is good enough for the needs of the random driver. Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- drivers/char/random.c | 8 ++++---- include/linux/timex.h | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/char/random.c b/drivers/char/random.c index 92e6c67e1ae..2d5daf9b58e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -643,7 +643,7 @@ struct timer_rand_state { */ void add_device_randomness(const void *buf, unsigned int size) { - unsigned long time = get_cycles() ^ jiffies; + unsigned long time = random_get_entropy() ^ jiffies; mix_pool_bytes(&input_pool, buf, size, NULL); mix_pool_bytes(&input_pool, &time, sizeof(time), NULL); @@ -680,7 +680,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) goto out; sample.jiffies = jiffies; - sample.cycles = get_cycles(); + sample.cycles = random_get_entropy(); sample.num = num; mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL); @@ -747,7 +747,7 @@ void add_interrupt_randomness(int irq, int irq_flags) struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; - __u32 input[4], cycles = get_cycles(); + __u32 input[4], cycles = random_get_entropy(); input[0] = cycles ^ jiffies; input[1] = irq; @@ -1485,7 +1485,7 @@ unsigned int get_random_int(void) hash = get_cpu_var(get_random_int_hash); - hash[0] += current->pid + jiffies + get_cycles(); + hash[0] += current->pid + jiffies + random_get_entropy(); md5_transform(hash, random_int_secret); ret = hash[0]; put_cpu_var(get_random_int_hash); diff --git a/include/linux/timex.h b/include/linux/timex.h index b3726e61368..da4c32dbb2a 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -64,6 +64,20 @@ #include +#ifndef random_get_entropy +/* + * The random_get_entropy() function is used by the /dev/random driver + * in order to extract entropy via the relative unpredictability of + * when an interrupt takes places versus a high speed, fine-grained + * timing source or cycle counter. Since it will be occurred on every + * single interrupt, it must have a very low cost/overhead. + * + * By default we use get_cycles() for this purpose, but individual + * architectures may override this in their asm/timex.h header file. + */ +#define random_get_entropy() get_cycles() +#endif + /* * SHIFT_PLL is used as a dampening factor to define how much we * adjust the frequency correction for a given offset in PLL mode. -- cgit v1.2.3-70-g09d2 From ebff5fa9d545574324095d9c6a3cb80c9157abc5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Oct 2013 15:12:04 +1000 Subject: Revert "i915: Update VGA arbiter support for newer devices" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 81b5c7bc8de3e6f63419139c2fc91bf81dea8a7d. Adding drm/i915 into the vga arbiter chain means that X (in a piece of well-meant paranoia) will do a get/put on the vga decoding around _every_ accel call down into the ddx. Which results in some nice performance disasters [1]. This really breaks userspace, by disabling DRI for everyone, and stops OpenGL from working, this isn't limited to just the i915 but both the integrated and discrete GPUs on multi-gpu systems, in other words this causes untold worlds of pain, Ville tried to come up with a Great Hack to fiddle the required VGA I/O ops behind everyone's back using stop_machine, but that didn't really work out [2]. Given that we're fairly late in the -rc stage for such games let's just revert this all. One thing we might want to keep is to delay the disabling of the vga decoding until the fbdev emulation and the fbcon screen is set up. If we kill vga mem decoding beforehand fbcon can end up with a white square in the top-left corner it tried to save from the vga memory for a seamless transition. And we have bug reports on older platforms which seem to match these symptoms. But again that's something to play around with in -next. References: [1] http://lists.x.org/archives/xorg-devel/2013-September/037763.html References: [2] http://www.spinics.net/lists/intel-gfx/msg34062.html Cc: Alex Williamson Cc: Ville Syrjälä Cc: Chris Wilson Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 9 +++------ drivers/gpu/drm/i915/intel_display.c | 25 ------------------------- include/linux/vgaarb.h | 7 ------- 3 files changed, 3 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 52f5ad8037c..d5c784d4867 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1290,12 +1290,9 @@ static int i915_load_modeset_init(struct drm_device *dev) * then we do not take part in VGA arbitration and the * vga_client_register() fails with -ENODEV. */ - if (!HAS_PCH_SPLIT(dev)) { - ret = vga_client_register(dev->pdev, dev, NULL, - i915_vga_set_decode); - if (ret && ret != -ENODEV) - goto out; - } + ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode); + if (ret && ret != -ENODEV) + goto out; intel_register_dsm_handler(); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index aaea3ec811e..581fb4b2f76 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10038,15 +10038,6 @@ static void i915_disable_vga(struct drm_device *dev) outb(SR01, VGA_SR_INDEX); sr1 = inb(VGA_SR_DATA); outb(sr1 | 1<<5, VGA_SR_DATA); - - /* Disable VGA memory on Intel HD */ - if (HAS_PCH_SPLIT(dev)) { - outb(inb(VGA_MSR_READ) & ~VGA_MSR_MEM_EN, VGA_MSR_WRITE); - vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO | - VGA_RSRC_NORMAL_IO | - VGA_RSRC_NORMAL_MEM); - } - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); udelay(300); @@ -10054,20 +10045,6 @@ static void i915_disable_vga(struct drm_device *dev) POSTING_READ(vga_reg); } -static void i915_enable_vga(struct drm_device *dev) -{ - /* Enable VGA memory on Intel HD */ - if (HAS_PCH_SPLIT(dev)) { - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ) | VGA_MSR_MEM_EN, VGA_MSR_WRITE); - vga_set_legacy_decoding(dev->pdev, VGA_RSRC_LEGACY_IO | - VGA_RSRC_LEGACY_MEM | - VGA_RSRC_NORMAL_IO | - VGA_RSRC_NORMAL_MEM); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); - } -} - void intel_modeset_init_hw(struct drm_device *dev) { intel_init_power_well(dev); @@ -10559,8 +10536,6 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_disable_fbc(dev); - i915_enable_vga(dev); - intel_disable_gt_powersave(dev); ironlake_teardown_rc6(dev); diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h index 80cf8173a65..2c02f3a8d2b 100644 --- a/include/linux/vgaarb.h +++ b/include/linux/vgaarb.h @@ -65,15 +65,8 @@ struct pci_dev; * out of the arbitration process (and can be safe to take * interrupts at any time. */ -#if defined(CONFIG_VGA_ARB) extern void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes); -#else -static inline void vga_set_legacy_decoding(struct pci_dev *pdev, - unsigned int decodes) -{ -} -#endif /** * vga_get - acquire & locks VGA resources -- cgit v1.2.3-70-g09d2 From 3f0116c3238a96bc18ad4b4acefe4e7be32fa861 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 10 Oct 2013 10:16:30 +0200 Subject: compiler/gcc4: Add quirk for 'asm goto' miscompilation bug Fengguang Wu, Oleg Nesterov and Peter Zijlstra tracked down a kernel crash to a GCC bug: GCC miscompiles certain 'asm goto' constructs, as outlined here: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 Implement a workaround suggested by Jakub Jelinek. Reported-and-tested-by: Fengguang Wu Reported-by: Oleg Nesterov Reported-by: Peter Zijlstra Suggested-by: Jakub Jelinek Reviewed-by: Richard Henderson Cc: Linus Torvalds Cc: Andrew Morton Cc: Signed-off-by: Ingo Molnar --- arch/arm/include/asm/jump_label.h | 2 +- arch/mips/include/asm/jump_label.h | 2 +- arch/powerpc/include/asm/jump_label.h | 2 +- arch/s390/include/asm/jump_label.h | 2 +- arch/sparc/include/asm/jump_label.h | 2 +- arch/x86/include/asm/cpufeature.h | 6 +++--- arch/x86/include/asm/jump_label.h | 2 +- arch/x86/include/asm/mutex_64.h | 4 ++-- include/linux/compiler-gcc4.h | 15 +++++++++++++++ 9 files changed, 26 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index bfc198c7591..863c892b4aa 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -16,7 +16,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" JUMP_LABEL_NOP "\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 4d6d77ed9b9..e194f957ca8 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -22,7 +22,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\tnop\n\t" + asm_volatile_goto("1:\tnop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index ae098c438f0..f016bb699b5 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -19,7 +19,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 6c32190dc73..346b1c85ffb 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -15,7 +15,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("0: brcl 0,0\n" + asm_volatile_goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" ASM_ALIGN "\n" ASM_PTR " 0b, %l[label], %0\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index 5080d16a832..ec2e2e2aba7 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -9,7 +9,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:\n\t" + asm_volatile_goto("1:\n\t" "nop\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index d3f5c63078d..89270b4318d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -374,7 +374,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) * Catch too early usage of this before alternatives * have run. */ - asm goto("1: jmp %l[t_warn]\n" + asm_volatile_goto("1: jmp %l[t_warn]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -388,7 +388,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) #endif - asm goto("1: jmp %l[t_no]\n" + asm_volatile_goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" @@ -453,7 +453,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) * have. Thus, we force the jump to the widest, 4-byte, signed relative * offset even though the last would often fit in less bytes. */ - asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" + asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" "2:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 64507f35800..6a2cefb4395 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -18,7 +18,7 @@ static __always_inline bool arch_static_branch(struct static_key *key) { - asm goto("1:" + asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index e7e6751648e..07537a44216 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -20,7 +20,7 @@ static inline void __mutex_fastpath_lock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " decl %0\n" + asm_volatile_goto(LOCK_PREFIX " decl %0\n" " jns %l[exit]\n" : : "m" (v->counter) : "memory", "cc" @@ -75,7 +75,7 @@ static inline int __mutex_fastpath_lock_retval(atomic_t *count) static inline void __mutex_fastpath_unlock(atomic_t *v, void (*fail_fn)(atomic_t *)) { - asm volatile goto(LOCK_PREFIX " incl %0\n" + asm_volatile_goto(LOCK_PREFIX " incl %0\n" " jg %l[exit]\n" : : "m" (v->counter) : "memory", "cc" diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 842de225055..ded429966c1 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -65,6 +65,21 @@ #define __visible __attribute__((externally_visible)) #endif +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * Fixed in GCC 4.8.2 and later versions. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#if GCC_VERSION <= 40801 +# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) +#else +# define asm_volatile_goto(x...) do { asm goto(x); } while (0) +#endif #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP #if GCC_VERSION >= 40400 -- cgit v1.2.3-70-g09d2 From c5d5a58d7ff977289c4bba8eae447c9afa66516b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 11 Oct 2013 00:07:01 -0700 Subject: ASoC: rcar: fixup generation checker Current rcar is using rsnd_is_gen1/gen2() to checking its IP generation, but it needs data mask. This patch fixes it up. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- include/sound/rcar_snd.h | 1 + sound/soc/sh/rcar/rsnd.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/rcar_snd.h b/include/sound/rcar_snd.h index fe66533e9b7..fb0a312bcb8 100644 --- a/include/sound/rcar_snd.h +++ b/include/sound/rcar_snd.h @@ -68,6 +68,7 @@ struct rsnd_scu_platform_info { * * A : generation */ +#define RSND_GEN_MASK (0xF << 0) #define RSND_GEN1 (1 << 0) /* fixme */ #define RSND_GEN2 (2 << 0) /* fixme */ diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 9cc6986a8cf..5dd87f4c919 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -220,8 +220,8 @@ int rsnd_gen_path_exit(struct rsnd_priv *priv, void __iomem *rsnd_gen_reg_get(struct rsnd_priv *priv, struct rsnd_mod *mod, enum rsnd_reg reg); -#define rsnd_is_gen1(s) ((s)->info->flags & RSND_GEN1) -#define rsnd_is_gen2(s) ((s)->info->flags & RSND_GEN2) +#define rsnd_is_gen1(s) (((s)->info->flags & RSND_GEN_MASK) == RSND_GEN1) +#define rsnd_is_gen2(s) (((s)->info->flags & RSND_GEN_MASK) == RSND_GEN2) /* * R-Car ADG -- cgit v1.2.3-70-g09d2 From 1931ee143b0ab72924944bc06e363d837ba05063 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 11 Oct 2013 09:27:28 +0200 Subject: Revert "drivers: of: add initialization code for dma reserved memory" This reverts commit 9d8eab7af79cb4ce2de5de39f82c455b1f796963. There is still no consensus on the bindings for the reserved memory and various drawbacks of the proposed solution has been shown, so the best now is to revert it completely and start again from scratch later. Signed-off-by: Marek Szyprowski Signed-off-by: Grant Likely --- Documentation/devicetree/bindings/memory.txt | 168 -------------------------- drivers/of/Kconfig | 6 - drivers/of/Makefile | 1 - drivers/of/of_reserved_mem.c | 173 --------------------------- drivers/of/platform.c | 4 - include/linux/of_reserved_mem.h | 14 --- 6 files changed, 366 deletions(-) delete mode 100644 Documentation/devicetree/bindings/memory.txt delete mode 100644 drivers/of/of_reserved_mem.c delete mode 100644 include/linux/of_reserved_mem.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/memory.txt b/Documentation/devicetree/bindings/memory.txt deleted file mode 100644 index eb246936559..00000000000 --- a/Documentation/devicetree/bindings/memory.txt +++ /dev/null @@ -1,168 +0,0 @@ -*** Memory binding *** - -The /memory node provides basic information about the address and size -of the physical memory. This node is usually filled or updated by the -bootloader, depending on the actual memory configuration of the given -hardware. - -The memory layout is described by the following node: - -/ { - #address-cells = <(n)>; - #size-cells = <(m)>; - memory { - device_type = "memory"; - reg = <(baseaddr1) (size1) - (baseaddr2) (size2) - ... - (baseaddrN) (sizeN)>; - }; - ... -}; - -A memory node follows the typical device tree rules for "reg" property: -n: number of cells used to store base address value -m: number of cells used to store size value -baseaddrX: defines a base address of the defined memory bank -sizeX: the size of the defined memory bank - - -More than one memory bank can be defined. - - -*** Reserved memory regions *** - -In /memory/reserved-memory node one can create child nodes describing -particular reserved (excluded from normal use) memory regions. Such -memory regions are usually designed for the special usage by various -device drivers. A good example are contiguous memory allocations or -memory sharing with other operating system on the same hardware board. -Those special memory regions might depend on the board configuration and -devices used on the target system. - -Parameters for each memory region can be encoded into the device tree -with the following convention: - -[(label):] (name) { - compatible = "linux,contiguous-memory-region", "reserved-memory-region"; - reg = <(address) (size)>; - (linux,default-contiguous-region); -}; - -compatible: one or more of: - - "linux,contiguous-memory-region" - enables binding of this - region to Contiguous Memory Allocator (special region for - contiguous memory allocations, shared with movable system - memory, Linux kernel-specific). - - "reserved-memory-region" - compatibility is defined, given - region is assigned for exclusive usage for by the respective - devices. - -reg: standard property defining the base address and size of - the memory region - -linux,default-contiguous-region: property indicating that the region - is the default region for all contiguous memory - allocations, Linux specific (optional) - -It is optional to specify the base address, so if one wants to use -autoconfiguration of the base address, '0' can be specified as a base -address in the 'reg' property. - -The /memory/reserved-memory node must contain the same #address-cells -and #size-cells value as the root node. - - -*** Device node's properties *** - -Once regions in the /memory/reserved-memory node have been defined, they -may be referenced by other device nodes. Bindings that wish to reference -memory regions should explicitly document their use of the following -property: - -memory-region = <&phandle_to_defined_region>; - -This property indicates that the device driver should use the memory -region pointed by the given phandle. - - -*** Example *** - -This example defines a memory consisting of 4 memory banks. 3 contiguous -regions are defined for Linux kernel, one default of all device drivers -(named contig_mem, placed at 0x72000000, 64MiB), one dedicated to the -framebuffer device (labelled display_mem, placed at 0x78000000, 8MiB) -and one for multimedia processing (labelled multimedia_mem, placed at -0x77000000, 64MiB). 'display_mem' region is then assigned to fb@12300000 -device for DMA memory allocations (Linux kernel drivers will use CMA is -available or dma-exclusive usage otherwise). 'multimedia_mem' is -assigned to scaler@12500000 and codec@12600000 devices for contiguous -memory allocations when CMA driver is enabled. - -The reason for creating a separate region for framebuffer device is to -match the framebuffer base address to the one configured by bootloader, -so once Linux kernel drivers starts no glitches on the displayed boot -logo appears. Scaller and codec drivers should share the memory -allocations. - -/ { - #address-cells = <1>; - #size-cells = <1>; - - /* ... */ - - memory { - reg = <0x40000000 0x10000000 - 0x50000000 0x10000000 - 0x60000000 0x10000000 - 0x70000000 0x10000000>; - - reserved-memory { - #address-cells = <1>; - #size-cells = <1>; - - /* - * global autoconfigured region for contiguous allocations - * (used only with Contiguous Memory Allocator) - */ - contig_region@0 { - compatible = "linux,contiguous-memory-region"; - reg = <0x0 0x4000000>; - linux,default-contiguous-region; - }; - - /* - * special region for framebuffer - */ - display_region: region@78000000 { - compatible = "linux,contiguous-memory-region", "reserved-memory-region"; - reg = <0x78000000 0x800000>; - }; - - /* - * special region for multimedia processing devices - */ - multimedia_region: region@77000000 { - compatible = "linux,contiguous-memory-region"; - reg = <0x77000000 0x4000000>; - }; - }; - }; - - /* ... */ - - fb0: fb@12300000 { - status = "okay"; - memory-region = <&display_region>; - }; - - scaler: scaler@12500000 { - status = "okay"; - memory-region = <&multimedia_region>; - }; - - codec: codec@12600000 { - status = "okay"; - memory-region = <&multimedia_region>; - }; -}; diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 9d2009a9004..78cc7605332 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -74,10 +74,4 @@ config OF_MTD depends on MTD def_bool y -config OF_RESERVED_MEM - depends on OF_FLATTREE && (DMA_CMA || (HAVE_GENERIC_DMA_COHERENT && HAVE_MEMBLOCK)) - def_bool y - help - Initialization code for DMA reserved memory - endmenu # OF diff --git a/drivers/of/Makefile b/drivers/of/Makefile index ed9660adad7..efd05102c40 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -9,4 +9,3 @@ obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o obj-$(CONFIG_OF_MTD) += of_mtd.o -obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c deleted file mode 100644 index 0fe40c7d690..00000000000 --- a/drivers/of/of_reserved_mem.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Device tree based initialization code for reserved memory. - * - * Copyright (c) 2013 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * Author: Marek Szyprowski - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License or (at your optional) any later version of the license. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_RESERVED_REGIONS 16 -struct reserved_mem { - phys_addr_t base; - unsigned long size; - struct cma *cma; - char name[32]; -}; -static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS]; -static int reserved_mem_count; - -static int __init fdt_scan_reserved_mem(unsigned long node, const char *uname, - int depth, void *data) -{ - struct reserved_mem *rmem = &reserved_mem[reserved_mem_count]; - phys_addr_t base, size; - int is_cma, is_reserved; - unsigned long len; - const char *status; - __be32 *prop; - - is_cma = IS_ENABLED(CONFIG_DMA_CMA) && - of_flat_dt_is_compatible(node, "linux,contiguous-memory-region"); - is_reserved = of_flat_dt_is_compatible(node, "reserved-memory-region"); - - if (!is_reserved && !is_cma) { - /* ignore node and scan next one */ - return 0; - } - - status = of_get_flat_dt_prop(node, "status", &len); - if (status && strcmp(status, "okay") != 0) { - /* ignore disabled node nad scan next one */ - return 0; - } - - prop = of_get_flat_dt_prop(node, "reg", &len); - if (!prop || (len < (dt_root_size_cells + dt_root_addr_cells) * - sizeof(__be32))) { - pr_err("Reserved mem: node %s, incorrect \"reg\" property\n", - uname); - /* ignore node and scan next one */ - return 0; - } - base = dt_mem_next_cell(dt_root_addr_cells, &prop); - size = dt_mem_next_cell(dt_root_size_cells, &prop); - - if (!size) { - /* ignore node and scan next one */ - return 0; - } - - pr_info("Reserved mem: found %s, memory base %lx, size %ld MiB\n", - uname, (unsigned long)base, (unsigned long)size / SZ_1M); - - if (reserved_mem_count == ARRAY_SIZE(reserved_mem)) - return -ENOSPC; - - rmem->base = base; - rmem->size = size; - strlcpy(rmem->name, uname, sizeof(rmem->name)); - - if (is_cma) { - struct cma *cma; - if (dma_contiguous_reserve_area(size, base, 0, &cma) == 0) { - rmem->cma = cma; - reserved_mem_count++; - if (of_get_flat_dt_prop(node, - "linux,default-contiguous-region", - NULL)) - dma_contiguous_set_default(cma); - } - } else if (is_reserved) { - if (memblock_remove(base, size) == 0) - reserved_mem_count++; - else - pr_err("Failed to reserve memory for %s\n", uname); - } - - return 0; -} - -static struct reserved_mem *get_dma_memory_region(struct device *dev) -{ - struct device_node *node; - const char *name; - int i; - - node = of_parse_phandle(dev->of_node, "memory-region", 0); - if (!node) - return NULL; - - name = kbasename(node->full_name); - for (i = 0; i < reserved_mem_count; i++) - if (strcmp(name, reserved_mem[i].name) == 0) - return &reserved_mem[i]; - return NULL; -} - -/** - * of_reserved_mem_device_init() - assign reserved memory region to given device - * - * This function assign memory region pointed by "memory-region" device tree - * property to the given device. - */ -void of_reserved_mem_device_init(struct device *dev) -{ - struct reserved_mem *region = get_dma_memory_region(dev); - if (!region) - return; - - if (region->cma) { - dev_set_cma_area(dev, region->cma); - pr_info("Assigned CMA %s to %s device\n", region->name, - dev_name(dev)); - } else { - if (dma_declare_coherent_memory(dev, region->base, region->base, - region->size, DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) != 0) - pr_info("Declared reserved memory %s to %s device\n", - region->name, dev_name(dev)); - } -} - -/** - * of_reserved_mem_device_release() - release reserved memory device structures - * - * This function releases structures allocated for memory region handling for - * the given device. - */ -void of_reserved_mem_device_release(struct device *dev) -{ - struct reserved_mem *region = get_dma_memory_region(dev); - if (!region && !region->cma) - dma_release_declared_memory(dev); -} - -/** - * early_init_dt_scan_reserved_mem() - create reserved memory regions - * - * This function grabs memory from early allocator for device exclusive use - * defined in device tree structures. It should be called by arch specific code - * once the early allocator (memblock) has been activated and all other - * subsystems have already allocated/reserved memory. - */ -void __init early_init_dt_scan_reserved_mem(void) -{ - of_scan_flat_dt_by_path("/memory/reserved-memory", - fdt_scan_reserved_mem, NULL); -} diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 9b439ac63d8..f6dcde22082 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -21,7 +21,6 @@ #include #include #include -#include #include const struct of_device_id of_default_bus_match_table[] = { @@ -219,8 +218,6 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; - of_reserved_mem_device_init(&dev->dev); - /* We do not fill the DMA ops for platform devices by default. * This is currently the responsibility of the platform code * to do such, possibly using a device notifier @@ -228,7 +225,6 @@ static struct platform_device *of_platform_device_create_pdata( if (of_device_add(dev) != 0) { platform_device_put(dev); - of_reserved_mem_device_release(&dev->dev); return NULL; } diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h deleted file mode 100644 index c8412825581..00000000000 --- a/include/linux/of_reserved_mem.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __OF_RESERVED_MEM_H -#define __OF_RESERVED_MEM_H - -#ifdef CONFIG_OF_RESERVED_MEM -void of_reserved_mem_device_init(struct device *dev); -void of_reserved_mem_device_release(struct device *dev); -void early_init_dt_scan_reserved_mem(void); -#else -static inline void of_reserved_mem_device_init(struct device *dev) { } -static inline void of_reserved_mem_device_release(struct device *dev) { } -static inline void early_init_dt_scan_reserved_mem(void) { } -#endif - -#endif /* __OF_RESERVED_MEM_H */ -- cgit v1.2.3-70-g09d2 From 32c37fc30c52508711ea6a108cfd5855b8a07176 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 14 Oct 2013 15:24:55 +0200 Subject: usb-storage: add quirk for mandatory READ_CAPACITY_16 Some USB drive enclosures do not correctly report an overflow condition if they hold a drive with a capacity over 2TB and are confronted with a READ_CAPACITY_10. They answer with their capacity modulo 2TB. The generic layer cannot cope with that. It must be told to use READ_CAPACITY_16 from the beginning. Signed-off-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/scsiglue.c | 5 ++++- drivers/usb/storage/unusual_devs.h | 7 +++++++ include/linux/usb_usual.h | 4 +++- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 94d75edef77..18509e6c21a 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -211,8 +211,11 @@ static int slave_configure(struct scsi_device *sdev) /* * Many devices do not respond properly to READ_CAPACITY_16. * Tell the SCSI layer to try READ_CAPACITY_10 first. + * However some USB 3.0 drive enclosures return capacity + * modulo 2TB. Those must use READ_CAPACITY_16 */ - sdev->try_rc_10_first = 1; + if (!(us->fflags & US_FL_NEEDS_CAP16)) + sdev->try_rc_10_first = 1; /* assume SPC3 or latter devices support sense size > 18 */ if (sdev->scsi_level > SCSI_SPC_2) diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index c015f2c1672..de32cfa5bfa 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1925,6 +1925,13 @@ UNUSUAL_DEV( 0x1652, 0x6600, 0x0201, 0x0201, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_RESIDUE ), +/* Reported by Oliver Neukum */ +UNUSUAL_DEV( 0x174c, 0x55aa, 0x0100, 0x0100, + "ASMedia", + "AS2105", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NEEDS_CAP16), + /* Reported by Jesse Feddema */ UNUSUAL_DEV( 0x177f, 0x0400, 0x0000, 0x0000, "Yarvik", diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index bf99cd01be2..63035686603 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -66,7 +66,9 @@ US_FLAG(INITIAL_READ10, 0x00100000) \ /* Initial READ(10) (and others) must be retried */ \ US_FLAG(WRITE_CACHE, 0x00200000) \ - /* Write Cache status is not available */ + /* Write Cache status is not available */ \ + US_FLAG(NEEDS_CAP16, 0x00400000) + /* cannot handle READ_CAPACITY_10 */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3-70-g09d2 From 4942642080ea82d99ab5b653abb9a12b7ba31f4a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 16 Oct 2013 13:46:59 -0700 Subject: mm: memcg: handle non-error OOM situations more gracefully Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full callstack on OOM") assumed that only a few places that can trigger a memcg OOM situation do not return VM_FAULT_OOM, like optional page cache readahead. But there are many more and it's impractical to annotate them all. First of all, we don't want to invoke the OOM killer when the failed allocation is gracefully handled, so defer the actual kill to the end of the fault handling as well. This simplifies the code quite a bit for added bonus. Second, since a failed allocation might not be the abrupt end of the fault, the memcg OOM handler needs to be re-entrant until the fault finishes for subsequent allocation attempts. If an allocation is attempted after the task already OOMed, allow it to bypass the limit so that it can quickly finish the fault and invoke the OOM killer. Reported-by: azurIt Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 50 ++++------------ include/linux/sched.h | 7 +-- mm/filemap.c | 11 +--- mm/memcontrol.c | 139 +++++++++++++++++---------------------------- mm/memory.c | 18 ++++-- mm/oom_kill.c | 2 +- 6 files changed, 79 insertions(+), 148 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ecc82b37c4c..b3e7a667e03 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -137,47 +137,24 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage); -/** - * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task - * @new: true to enable, false to disable - * - * Toggle whether a failed memcg charge should invoke the OOM killer - * or just return -ENOMEM. Returns the previous toggle state. - * - * NOTE: Any path that enables the OOM killer before charging must - * call mem_cgroup_oom_synchronize() afterward to finalize the - * OOM handling and clean up. - */ -static inline bool mem_cgroup_toggle_oom(bool new) +static inline void mem_cgroup_oom_enable(void) { - bool old; - - old = current->memcg_oom.may_oom; - current->memcg_oom.may_oom = new; - - return old; + WARN_ON(current->memcg_oom.may_oom); + current->memcg_oom.may_oom = 1; } -static inline void mem_cgroup_enable_oom(void) +static inline void mem_cgroup_oom_disable(void) { - bool old = mem_cgroup_toggle_oom(true); - - WARN_ON(old == true); -} - -static inline void mem_cgroup_disable_oom(void) -{ - bool old = mem_cgroup_toggle_oom(false); - - WARN_ON(old == false); + WARN_ON(!current->memcg_oom.may_oom); + current->memcg_oom.may_oom = 0; } static inline bool task_in_memcg_oom(struct task_struct *p) { - return p->memcg_oom.in_memcg_oom; + return p->memcg_oom.memcg; } -bool mem_cgroup_oom_synchronize(void); +bool mem_cgroup_oom_synchronize(bool wait); #ifdef CONFIG_MEMCG_SWAP extern int do_swap_account; @@ -402,16 +379,11 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page, { } -static inline bool mem_cgroup_toggle_oom(bool new) -{ - return false; -} - -static inline void mem_cgroup_enable_oom(void) +static inline void mem_cgroup_oom_enable(void) { } -static inline void mem_cgroup_disable_oom(void) +static inline void mem_cgroup_oom_disable(void) { } @@ -420,7 +392,7 @@ static inline bool task_in_memcg_oom(struct task_struct *p) return false; } -static inline bool mem_cgroup_oom_synchronize(void) +static inline bool mem_cgroup_oom_synchronize(bool wait) { return false; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 6682da36b29..e27baeeda3f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1394,11 +1394,10 @@ struct task_struct { } memcg_batch; unsigned int memcg_kmem_skip_account; struct memcg_oom_info { + struct mem_cgroup *memcg; + gfp_t gfp_mask; + int order; unsigned int may_oom:1; - unsigned int in_memcg_oom:1; - unsigned int oom_locked:1; - int wakeups; - struct mem_cgroup *wait_on_memcg; } memcg_oom; #endif #ifdef CONFIG_UPROBES diff --git a/mm/filemap.c b/mm/filemap.c index 1e6aec4a2d2..ae4846ff484 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1616,7 +1616,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = mapping->host; pgoff_t offset = vmf->pgoff; struct page *page; - bool memcg_oom; pgoff_t size; int ret = 0; @@ -1625,11 +1624,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; /* - * Do we have something in the page cache already? Either - * way, try readahead, but disable the memcg OOM killer for it - * as readahead is optional and no errors are propagated up - * the fault stack. The OOM killer is enabled while trying to - * instantiate the faulting page individually below. + * Do we have something in the page cache already? */ page = find_get_page(mapping, offset); if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { @@ -1637,14 +1632,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * We found the page, so try async readahead before * waiting for the lock. */ - memcg_oom = mem_cgroup_toggle_oom(false); do_async_mmap_readahead(vma, ra, file, page, offset); - mem_cgroup_toggle_oom(memcg_oom); } else if (!page) { /* No page in the page cache at all */ - memcg_oom = mem_cgroup_toggle_oom(false); do_sync_mmap_readahead(vma, ra, file, offset); - mem_cgroup_toggle_oom(memcg_oom); count_vm_event(PGMAJFAULT); mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); ret = VM_FAULT_MAJOR; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5335b2b6be7..65fc6a44984 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2161,110 +2161,59 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) memcg_wakeup_oom(memcg); } -/* - * try to call OOM killer - */ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { - bool locked; - int wakeups; - if (!current->memcg_oom.may_oom) return; - - current->memcg_oom.in_memcg_oom = 1; - /* - * As with any blocking lock, a contender needs to start - * listening for wakeups before attempting the trylock, - * otherwise it can miss the wakeup from the unlock and sleep - * indefinitely. This is just open-coded because our locking - * is so particular to memcg hierarchies. + * We are in the middle of the charge context here, so we + * don't want to block when potentially sitting on a callstack + * that holds all kinds of filesystem and mm locks. + * + * Also, the caller may handle a failed allocation gracefully + * (like optional page cache readahead) and so an OOM killer + * invocation might not even be necessary. + * + * That's why we don't do anything here except remember the + * OOM context and then deal with it at the end of the page + * fault when the stack is unwound, the locks are released, + * and when we know whether the fault was overall successful. */ - wakeups = atomic_read(&memcg->oom_wakeups); - mem_cgroup_mark_under_oom(memcg); - - locked = mem_cgroup_oom_trylock(memcg); - - if (locked) - mem_cgroup_oom_notify(memcg); - - if (locked && !memcg->oom_kill_disable) { - mem_cgroup_unmark_under_oom(memcg); - mem_cgroup_out_of_memory(memcg, mask, order); - mem_cgroup_oom_unlock(memcg); - /* - * There is no guarantee that an OOM-lock contender - * sees the wakeups triggered by the OOM kill - * uncharges. Wake any sleepers explicitely. - */ - memcg_oom_recover(memcg); - } else { - /* - * A system call can just return -ENOMEM, but if this - * is a page fault and somebody else is handling the - * OOM already, we need to sleep on the OOM waitqueue - * for this memcg until the situation is resolved. - * Which can take some time because it might be - * handled by a userspace task. - * - * However, this is the charge context, which means - * that we may sit on a large call stack and hold - * various filesystem locks, the mmap_sem etc. and we - * don't want the OOM handler to deadlock on them - * while we sit here and wait. Store the current OOM - * context in the task_struct, then return -ENOMEM. - * At the end of the page fault handler, with the - * stack unwound, pagefault_out_of_memory() will check - * back with us by calling - * mem_cgroup_oom_synchronize(), possibly putting the - * task to sleep. - */ - current->memcg_oom.oom_locked = locked; - current->memcg_oom.wakeups = wakeups; - css_get(&memcg->css); - current->memcg_oom.wait_on_memcg = memcg; - } + css_get(&memcg->css); + current->memcg_oom.memcg = memcg; + current->memcg_oom.gfp_mask = mask; + current->memcg_oom.order = order; } /** * mem_cgroup_oom_synchronize - complete memcg OOM handling + * @handle: actually kill/wait or just clean up the OOM state * - * This has to be called at the end of a page fault if the the memcg - * OOM handler was enabled and the fault is returning %VM_FAULT_OOM. + * This has to be called at the end of a page fault if the memcg OOM + * handler was enabled. * - * Memcg supports userspace OOM handling, so failed allocations must + * Memcg supports userspace OOM handling where failed allocations must * sleep on a waitqueue until the userspace task resolves the * situation. Sleeping directly in the charge context with all kinds * of locks held is not a good idea, instead we remember an OOM state * in the task and mem_cgroup_oom_synchronize() has to be called at - * the end of the page fault to put the task to sleep and clean up the - * OOM state. + * the end of the page fault to complete the OOM handling. * * Returns %true if an ongoing memcg OOM situation was detected and - * finalized, %false otherwise. + * completed, %false otherwise. */ -bool mem_cgroup_oom_synchronize(void) +bool mem_cgroup_oom_synchronize(bool handle) { + struct mem_cgroup *memcg = current->memcg_oom.memcg; struct oom_wait_info owait; - struct mem_cgroup *memcg; + bool locked; /* OOM is global, do not handle */ - if (!current->memcg_oom.in_memcg_oom) - return false; - - /* - * We invoked the OOM killer but there is a chance that a kill - * did not free up any charges. Everybody else might already - * be sleeping, so restart the fault and keep the rampage - * going until some charges are released. - */ - memcg = current->memcg_oom.wait_on_memcg; if (!memcg) - goto out; + return false; - if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) - goto out_memcg; + if (!handle) + goto cleanup; owait.memcg = memcg; owait.wait.flags = 0; @@ -2273,13 +2222,25 @@ bool mem_cgroup_oom_synchronize(void) INIT_LIST_HEAD(&owait.wait.task_list); prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); - /* Only sleep if we didn't miss any wakeups since OOM */ - if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups) + mem_cgroup_mark_under_oom(memcg); + + locked = mem_cgroup_oom_trylock(memcg); + + if (locked) + mem_cgroup_oom_notify(memcg); + + if (locked && !memcg->oom_kill_disable) { + mem_cgroup_unmark_under_oom(memcg); + finish_wait(&memcg_oom_waitq, &owait.wait); + mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask, + current->memcg_oom.order); + } else { schedule(); - finish_wait(&memcg_oom_waitq, &owait.wait); -out_memcg: - mem_cgroup_unmark_under_oom(memcg); - if (current->memcg_oom.oom_locked) { + mem_cgroup_unmark_under_oom(memcg); + finish_wait(&memcg_oom_waitq, &owait.wait); + } + + if (locked) { mem_cgroup_oom_unlock(memcg); /* * There is no guarantee that an OOM-lock contender @@ -2288,10 +2249,9 @@ out_memcg: */ memcg_oom_recover(memcg); } +cleanup: + current->memcg_oom.memcg = NULL; css_put(&memcg->css); - current->memcg_oom.wait_on_memcg = NULL; -out: - current->memcg_oom.in_memcg_oom = 0; return true; } @@ -2705,6 +2665,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, || fatal_signal_pending(current))) goto bypass; + if (unlikely(task_in_memcg_oom(current))) + goto bypass; + /* * We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the diff --git a/mm/memory.c b/mm/memory.c index f7b7692c05e..1311f26497e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3865,15 +3865,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, * space. Kernel faults are handled more gracefully. */ if (flags & FAULT_FLAG_USER) - mem_cgroup_enable_oom(); + mem_cgroup_oom_enable(); ret = __handle_mm_fault(mm, vma, address, flags); - if (flags & FAULT_FLAG_USER) - mem_cgroup_disable_oom(); - - if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))) - mem_cgroup_oom_synchronize(); + if (flags & FAULT_FLAG_USER) { + mem_cgroup_oom_disable(); + /* + * The task may have entered a memcg OOM situation but + * if the allocation error was handled gracefully (no + * VM_FAULT_OOM), there is no need to kill anything. + * Just clean up the OOM state peacefully. + */ + if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) + mem_cgroup_oom_synchronize(false); + } return ret; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 314e9d27438..6738c47f1f7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -680,7 +680,7 @@ void pagefault_out_of_memory(void) { struct zonelist *zonelist; - if (mem_cgroup_oom_synchronize()) + if (mem_cgroup_oom_synchronize(true)) return; zonelist = node_zonelist(first_online_node, GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From 2421ad48f4aed63bc890e8f3c53ed581a542fb66 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 17 Oct 2013 15:44:48 +0200 Subject: ACPI / PM: Drop two functions that are not used any more Two functions defined in device_pm.c, acpi_dev_pm_add_dependent() and acpi_dev_pm_remove_dependent(), have no callers and may be dropped, so drop them. Moreover, they are the only functions adding entries to and removing entries from the power_dependent list in struct acpi_device, so drop that list too. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 56 ------------------------------------------------ drivers/acpi/scan.c | 1 - include/acpi/acpi_bus.h | 7 ------ 3 files changed, 64 deletions(-) (limited to 'include') diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 59d3202f6b3..a94383d1f35 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1025,60 +1025,4 @@ void acpi_dev_pm_detach(struct device *dev, bool power_off) } } EXPORT_SYMBOL_GPL(acpi_dev_pm_detach); - -/** - * acpi_dev_pm_add_dependent - Add physical device depending for PM. - * @handle: Handle of ACPI device node. - * @depdev: Device depending on that node for PM. - */ -void acpi_dev_pm_add_dependent(acpi_handle handle, struct device *depdev) -{ - struct acpi_device_physical_node *dep; - struct acpi_device *adev; - - if (!depdev || acpi_bus_get_device(handle, &adev)) - return; - - mutex_lock(&adev->physical_node_lock); - - list_for_each_entry(dep, &adev->power_dependent, node) - if (dep->dev == depdev) - goto out; - - dep = kzalloc(sizeof(*dep), GFP_KERNEL); - if (dep) { - dep->dev = depdev; - list_add_tail(&dep->node, &adev->power_dependent); - } - - out: - mutex_unlock(&adev->physical_node_lock); -} -EXPORT_SYMBOL_GPL(acpi_dev_pm_add_dependent); - -/** - * acpi_dev_pm_remove_dependent - Remove physical device depending for PM. - * @handle: Handle of ACPI device node. - * @depdev: Device depending on that node for PM. - */ -void acpi_dev_pm_remove_dependent(acpi_handle handle, struct device *depdev) -{ - struct acpi_device_physical_node *dep; - struct acpi_device *adev; - - if (!depdev || acpi_bus_get_device(handle, &adev)) - return; - - mutex_lock(&adev->physical_node_lock); - - list_for_each_entry(dep, &adev->power_dependent, node) - if (dep->dev == depdev) { - list_del(&dep->node); - kfree(dep); - break; - } - - mutex_unlock(&adev->physical_node_lock); -} -EXPORT_SYMBOL_GPL(acpi_dev_pm_remove_dependent); #endif /* CONFIG_PM */ diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 407ad13cac2..fee8a297c7d 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -999,7 +999,6 @@ int acpi_device_add(struct acpi_device *device, INIT_LIST_HEAD(&device->wakeup_list); INIT_LIST_HEAD(&device->physical_node_list); mutex_init(&device->physical_node_lock); - INIT_LIST_HEAD(&device->power_dependent); new_bus_id = kzalloc(sizeof(struct acpi_device_bus_id), GFP_KERNEL); if (!new_bus_id) { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 02e113bb8b7..d9019821aa6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -311,7 +311,6 @@ struct acpi_device { unsigned int physical_node_count; struct list_head physical_node_list; struct mutex physical_node_lock; - struct list_head power_dependent; void (*remove)(struct acpi_device *); }; @@ -456,8 +455,6 @@ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, acpi_status acpi_remove_pm_notifier(struct acpi_device *adev, acpi_notify_handler handler); int acpi_pm_device_sleep_state(struct device *, int *, int); -void acpi_dev_pm_add_dependent(acpi_handle handle, struct device *depdev); -void acpi_dev_pm_remove_dependent(acpi_handle handle, struct device *depdev); #else static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev, acpi_notify_handler handler, @@ -478,10 +475,6 @@ static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m) return (m >= ACPI_STATE_D0 && m <= ACPI_STATE_D3_COLD) ? m : ACPI_STATE_D0; } -static inline void acpi_dev_pm_add_dependent(acpi_handle handle, - struct device *depdev) {} -static inline void acpi_dev_pm_remove_dependent(acpi_handle handle, - struct device *depdev) {} #endif #ifdef CONFIG_PM_RUNTIME -- cgit v1.2.3-70-g09d2 From 94468783cd960aa14b22503dd59afd14efb785aa Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 16 Oct 2013 19:18:41 -0700 Subject: usb: usb_phy_gen: refine conditional declaration of usb_nop_xceiv_register Commit 3fa4d734 (usb: phy: rename nop_usb_xceiv => usb_phy_gen_xceiv) changed the conditional around the declaration of usb_nop_xceiv_register from #if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE)) to #if IS_ENABLED(CONFIG_NOP_USB_XCEIV) While that looks the same, it is semantically different. The first expression is true if CONFIG_NOP_USB_XCEIV is built as module and if the including code is built as module. The second expression is true if code depending on CONFIG_NOP_USB_XCEIV if built as module or into the kernel. As a result, the arm:allmodconfig build fails with arch/arm/mach-omap2/built-in.o: In function `omap3_evm_init': arch/arm/mach-omap2/board-omap3evm.c:703: undefined reference to `usb_nop_xceiv_register' Fix the problem by reverting to the old conditional. Cc: Josh Boyer Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/usb_phy_gen_xceiv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/usb/usb_phy_gen_xceiv.h b/include/linux/usb/usb_phy_gen_xceiv.h index f9a7e7bc925..11d85b9c1b0 100644 --- a/include/linux/usb/usb_phy_gen_xceiv.h +++ b/include/linux/usb/usb_phy_gen_xceiv.h @@ -12,7 +12,7 @@ struct usb_phy_gen_xceiv_platform_data { unsigned int needs_reset:1; }; -#if IS_ENABLED(CONFIG_NOP_USB_XCEIV) +#if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE)) /* sometimes transceivers are accessed only through e.g. ULPI */ extern void usb_nop_xceiv_register(void); extern void usb_nop_xceiv_unregister(void); -- cgit v1.2.3-70-g09d2 From e87b3998d795123b4139bc3f25490dd236f68212 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 15 Oct 2013 22:01:29 -0400 Subject: net: dst: provide accessor function to dst->xfrm dst->xfrm is conditionally defined. Provide accessor funtion that is always available. Signed-off-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/dst.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 3bc4865f826..3c4c944096c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -479,10 +479,22 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, { return dst_orig; } + +static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) +{ + return NULL; +} + #else extern struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, struct sock *sk, int flags); + +/* skb attached with this dst needs transformation if dst->xfrm is valid */ +static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) +{ + return dst->xfrm; +} #endif #endif /* _NET_DST_H */ -- cgit v1.2.3-70-g09d2 From 9e5f1721907fcfbd4b575bcafa0314188f7330a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Oct 2013 15:28:38 +0300 Subject: yam: integer underflow in yam_ioctl() We cap bitrate at YAM_MAXBITRATE in yam_ioctl(), but it could also be negative. I don't know the impact of using a negative bitrate but let's prevent it. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/linux/yam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/yam.h b/include/linux/yam.h index 7fe28228b27..512cdc2fb80 100644 --- a/include/linux/yam.h +++ b/include/linux/yam.h @@ -77,6 +77,6 @@ struct yamdrv_ioctl_cfg { struct yamdrv_ioctl_mcs { int cmd; - int bitrate; + unsigned int bitrate; unsigned char bits[YAM_FPGA_SIZE]; }; -- cgit v1.2.3-70-g09d2 From bc5bd37ce48c66e9192ad2e7231e9678880f6f8e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Oct 2013 09:49:02 +0100 Subject: drm: Pad drm_mode_get_connector to 64-bit boundary Pavel Roskin reported that DRM_IOCTL_MODE_GETCONNECTOR was overwritting the 4 bytes beyond the end of its structure with a 32-bit userspace running on a 64-bit kernel. This is due to the padding gcc inserts as the drm_mode_get_connector struct includes a u64 and its size is not a natural multiple of u64s. 64-bit kernel: sizeof(drm_mode_get_connector)=80, alignof=8 sizeof(drm_mode_get_encoder)=20, alignof=4 sizeof(drm_mode_modeinfo)=68, alignof=4 32-bit userspace: sizeof(drm_mode_get_connector)=76, alignof=4 sizeof(drm_mode_get_encoder)=20, alignof=4 sizeof(drm_mode_modeinfo)=68, alignof=4 Fortuituously we can insert explicit padding to the tail of our structures without breaking ABI. Reported-by: Pavel Roskin Signed-off-by: Chris Wilson Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- include/uapi/drm/drm_mode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 550811712f7..28acbaf4a81 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -223,6 +223,8 @@ struct drm_mode_get_connector { __u32 connection; __u32 mm_width, mm_height; /**< HxW in millimeters */ __u32 subpixel; + + __u32 pad; }; #define DRM_MODE_PROP_PENDING (1<<0) -- cgit v1.2.3-70-g09d2 From f2e5ddcc0d12f9c4c7b254358ad245c9dddce13b Mon Sep 17 00:00:00 2001 From: Seif Mazareeb Date: Thu, 17 Oct 2013 20:33:21 -0700 Subject: net: fix cipso packet validation when !NETLABEL When CONFIG_NETLABEL is disabled, the cipso_v4_validate() function could loop forever in the main loop if opt[opt_iter +1] == 0, this will causing a kernel crash in an SMP system, since the CPU executing this function will stall /not respond to IPIs. This problem can be reproduced by running the IP Stack Integrity Checker (http://isic.sourceforge.net) using the following command on a Linux machine connected to DUT: "icmpsic -s rand -d -r 123456" wait (1-2 min) Signed-off-by: Seif Mazareeb Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a7a683e30b6..a8c2ef6d3b9 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -290,6 +290,7 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, unsigned char err_offset = 0; u8 opt_len = opt[1]; u8 opt_iter; + u8 tag_len; if (opt_len < 8) { err_offset = 1; @@ -302,11 +303,12 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, } for (opt_iter = 6; opt_iter < opt_len;) { - if (opt[opt_iter + 1] > (opt_len - opt_iter)) { + tag_len = opt[opt_iter + 1]; + if ((tag_len == 0) || (opt[opt_iter + 1] > (opt_len - opt_iter))) { err_offset = opt_iter + 1; goto out; } - opt_iter += opt[opt_iter + 1]; + opt_iter += tag_len; } out: -- cgit v1.2.3-70-g09d2 From 7afbddfae9931bf113c01bc5c6780dda3602ef6c Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Thu, 10 Oct 2013 11:10:55 +0200 Subject: IB/core: Temporarily disable create_flow/destroy_flow uverbs The create_flow/destroy_flow uverbs and the associated extensions to the user-kernel verbs ABI are under review and are too experimental to freeze at this point. So userspace is not exposed to experimental features and an uinstable ABI, temporarily disable this for v3.12 (with a Kconfig option behind staging to reenable it if desired). The feature will be enabled after proper cleanup for v3.13. Signed-off-by: Yann Droneaud Link: http://marc.info/?i=cover.1381351016.git.ydroneaud@opteya.com Link: http://marc.info/?i=cover.1381177342.git.ydroneaud@opteya.com [ Add a Kconfig option to reenable these verbs. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/Kconfig | 11 +++++++++++ drivers/infiniband/core/uverbs.h | 2 ++ drivers/infiniband/core/uverbs_cmd.c | 4 ++++ drivers/infiniband/core/uverbs_main.c | 6 ++++++ drivers/infiniband/hw/mlx4/main.c | 2 ++ include/uapi/rdma/ib_user_verbs.h | 6 ++++++ 6 files changed, 31 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 5ceda710f51..b84791f03a2 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -31,6 +31,17 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from . +config INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING + bool "Experimental and unstable ABI for userspace access to flow steering verbs" + depends on INFINIBAND_USER_ACCESS + depends on STAGING + ---help--- + The final ABI for userspace access to flow steering verbs + has not been defined. To use the current ABI, *WHICH WILL + CHANGE IN THE FUTURE*, say Y here. + + If unsure, say N. + config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index d040b877475..d8f9c6c272d 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -217,7 +217,9 @@ IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING IB_UVERBS_DECLARE_CMD(create_flow); IB_UVERBS_DECLARE_CMD(destroy_flow); +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index f2b81b9ee0d..2f0f01b70e3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -54,7 +54,9 @@ static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ @@ -2599,6 +2601,7 @@ out_put: return ret ? ret : in_len; } +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec, union ib_flow_spec *ib_spec) { @@ -2824,6 +2827,7 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, return ret ? ret : in_len; } +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uverbs_create_xsrq *cmd, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 75ad86c4abf..2df31f68ea0 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -115,8 +115,10 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow, [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ }; static void ib_uverbs_add_one(struct ib_device *device); @@ -605,6 +607,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) return -ENOSYS; +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) { struct ib_uverbs_cmd_hdr_ex hdr_ex; @@ -621,6 +624,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, (hdr_ex.out_words + hdr_ex.provider_out_words) * 4); } else { +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ if (hdr.in_words * 4 != count) return -EINVAL; @@ -628,7 +632,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, buf + sizeof(hdr), hdr.in_words * 4, hdr.out_words * 4); +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING } +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d6c5a73becf..f0612645de9 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1691,9 +1691,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.create_flow = mlx4_ib_create_flow; ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW); +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ } mlx4_ib_alloc_eqs(dev, ibdev); diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 0b233c56b0e..e3ddd86c90a 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -87,8 +87,10 @@ enum { IB_USER_VERBS_CMD_CLOSE_XRCD, IB_USER_VERBS_CMD_CREATE_XSRQ, IB_USER_VERBS_CMD_OPEN_QP, +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING IB_USER_VERBS_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_CMD_DESTROY_FLOW +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ }; /* @@ -126,6 +128,7 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING struct ib_uverbs_cmd_hdr_ex { __u32 command; __u16 in_words; @@ -134,6 +137,7 @@ struct ib_uverbs_cmd_hdr_ex { __u16 provider_out_words; __u32 cmd_hdr_reserved; }; +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ struct ib_uverbs_get_context { __u64 response; @@ -696,6 +700,7 @@ struct ib_uverbs_detach_mcast { __u64 driver_data[0]; }; +#ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING struct ib_kern_eth_filter { __u8 dst_mac[6]; __u8 src_mac[6]; @@ -780,6 +785,7 @@ struct ib_uverbs_destroy_flow { __u32 comp_mask; __u32 flow_handle; }; +#endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */ struct ib_uverbs_create_srq { __u64 response; -- cgit v1.2.3-70-g09d2 From 96dc809514fb2328605198a0602b67554d8cce7b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 20 Oct 2013 15:43:03 +0300 Subject: ipv6: always prefer rt6i_gateway if present In v3.9 6fd6ce2056de2709 ("ipv6: Do not depend on rt->n in ip6_finish_output2()." changed the behaviour of ip6_finish_output2() such that the recently introduced rt6_nexthop() is used instead of an assigned neighbor. As rt6_nexthop() prefers rt6i_gateway only for gatewayed routes this causes a problem for users like IPVS, xt_TEE and RAW(hdrincl) if they want to use different address for routing compared to the destination address. Another case is when redirect can create RTF_DYNAMIC route without RTF_GATEWAY flag, we ignore the rt6i_gateway in rt6_nexthop(). Fix the above problems by considering the rt6i_gateway if present, so that traffic routed to address on local subnet is not wrongly diverted to the destination address. Thanks to Simon Horman and Phil Oester for spotting the problematic commit. Thanks to Hannes Frederic Sowa for his review and help in testing. Reported-by: Phil Oester Reported-by: Mark Brooks Signed-off-by: Julian Anastasov Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f525e7038cc..481404abdf6 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -196,7 +196,7 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) { - if (rt->rt6i_flags & RTF_GATEWAY) + if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) return &rt->rt6i_gateway; return dest; } -- cgit v1.2.3-70-g09d2 From 550bab42f83308c9d6ab04a980cc4333cef1c8fa Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 20 Oct 2013 15:43:04 +0300 Subject: ipv6: fill rt6i_gateway with nexthop address Make sure rt6i_gateway contains nexthop information in all routes returned from lookup or when routes are directly attached to skb for generated ICMP packets. The effect of this patch should be a faster version of rt6_nexthop() and the consideration of local addresses as nexthop. Signed-off-by: Julian Anastasov Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_route.h | 6 ++---- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/route.c | 8 ++++++-- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 481404abdf6..2b786b7e358 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -194,11 +194,9 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); } -static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) +static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) { - if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) - return &rt->rt6i_gateway; - return dest; + return &rt->rt6i_gateway; } #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 975624b8d2e..91fb4e8212f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + nexthop = rt6_nexthop((struct rt6_info *)dst); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -874,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c979dd96d82..c1ee3813e1a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -851,7 +851,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; - rt->rt6i_gateway = *daddr; } rt->rt6i_flags |= RTF_CACHE; @@ -1338,6 +1337,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; atomic_set(&rt->dst.__refcnt, 1); + rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; @@ -1873,7 +1873,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; + if (ort->rt6i_flags & RTF_GATEWAY) + rt->rt6i_gateway = ort->rt6i_gateway; + else + rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == (RTF_DEFAULT | RTF_ADDRCONF)) @@ -2160,6 +2163,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, else rt->rt6i_flags |= RTF_LOCAL; + rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); -- cgit v1.2.3-70-g09d2 From 7e4d8a193f2f1b551dc8d10acca6aea4356f3b86 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 21 Oct 2013 19:09:58 +0200 Subject: mac802154: correct a typo in ieee802154_alloc_device() prototype This has no other impact than a cosmetic one. Signed-off-by: Alexandre Belloni Signed-off-by: David S. Miller --- include/net/mac802154.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac802154.h b/include/net/mac802154.h index d0d11df9cba..807d6b7a943 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -133,7 +133,7 @@ struct ieee802154_ops { /* Basic interface to register ieee802154 device */ struct ieee802154_dev * -ieee802154_alloc_device(size_t priv_data_lex, struct ieee802154_ops *ops); +ieee802154_alloc_device(size_t priv_data_len, struct ieee802154_ops *ops); void ieee802154_free_device(struct ieee802154_dev *dev); int ieee802154_register_device(struct ieee802154_dev *dev); void ieee802154_unregister_device(struct ieee802154_dev *dev); -- cgit v1.2.3-70-g09d2