From 2053a1db41193c2b5e1f47a91aaba0fd63ba7102 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 Oct 2013 09:47:22 -0700 Subject: target: Fix assignment of LUN in tracepoints The unpacked_lun field in the SCSI target tracepoints should be initialized with cmd->orig_fe_lun rather than cmd->se_lun->unpacked_lun for two reasons: - most importantly, if we are in the cmd_complete tracepoint returning a check condition due to no LUN found, cmd->se_lun will be NULL and we'll crash trying to dereference it. - also, in any case, cmd->se_lun->unpacked_lun is an internal index into the target's internal set of LUNs; cmd->orig_fe_lun is much more useful and interesting, since it's the value the initiator actually sent. Signed-off-by: Roland Dreier Cc: # 3.11+ Signed-off-by: Nicholas Bellinger --- include/trace/events/target.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/target.h b/include/trace/events/target.h index aef8fc35402..da9cc0f05c9 100644 --- a/include/trace/events/target.h +++ b/include/trace/events/target.h @@ -144,7 +144,7 @@ TRACE_EVENT(target_sequencer_start, ), TP_fast_assign( - __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->unpacked_lun = cmd->orig_fe_lun; __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; @@ -182,7 +182,7 @@ TRACE_EVENT(target_cmd_complete, ), TP_fast_assign( - __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->unpacked_lun = cmd->orig_fe_lun; __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; -- cgit v1.2.3-70-g09d2 From 8fb479a47c869820966e7298f38038aa334d889c Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 23 Oct 2013 23:36:30 +0200 Subject: netpoll: fix rx_hook() interface by passing the skb Right now skb->data is passed to rx_hook() even if the skb has not been linearised and without giving rx_hook() a way to linearise it. Change the rx_hook() interface and make it accept the skb and the offset to the UDP payload as arguments. rx_hook() is also renamed to rx_skb_hook() to ensure that out of the tree users notice the API change. In this way any rx_skb_hook() implementation can perform all the needed operations to properly (and safely) access the skb data. Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller --- include/linux/netpoll.h | 5 +++-- net/core/netpoll.c | 31 ++++++++++++++++++------------- 2 files changed, 21 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index f3c7c24bec1..fbfdb9d8d3a 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -24,7 +24,8 @@ struct netpoll { struct net_device *dev; char dev_name[IFNAMSIZ]; const char *name; - void (*rx_hook)(struct netpoll *, int, char *, int); + void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb, + int offset, int len); union inet_addr local_ip, remote_ip; bool ipv6; @@ -41,7 +42,7 @@ struct netpoll_info { unsigned long rx_flags; spinlock_t rx_lock; struct semaphore dev_lock; - struct list_head rx_np; /* netpolls that registered an rx_hook */ + struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ struct sk_buff_head txq; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc75c9e461b..8f971990677 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address, we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb) int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { - int proto, len, ulen; - int hits = 0; + int proto, len, ulen, data_len; + int hits = 0, offset; const struct iphdr *iph; struct udphdr *uh; struct netpoll *np, *tmp; + uint16_t source; if (list_empty(&npinfo->rx_np)) goto out; @@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) len -= iph->ihl*4; uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != len) goto out; @@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } } else { @@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; uh = udp_hdr(skb); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != skb->len) goto out; if (udp6_csum_init(skb, uh, IPPROTO_UDP)) @@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } #endif @@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_hook) { + if (np->rx_skb_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_flags |= NETPOLL_RX_ENABLED; list_add_tail(&np->rx, &npinfo->rx_np); -- cgit v1.2.3-70-g09d2 From 01ba16d6ec85a1ec4669c75513a76b61ec53ee50 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 24 Oct 2013 10:14:27 +0200 Subject: ipv6: reset dst.expires value when clearing expire flag On receiving a packet too big icmp error we update the expire value by calling rt6_update_expires. This function uses dst_set_expires which is implemented that it can only reduce the expiration value of the dst entry. If we insert new routing non-expiry information into the ipv6 fib where we already have a matching rt6_info we only clear the RTF_EXPIRES flag in rt6i_flags and leave the dst.expires value as is. When new mtu information arrives for that cached dst_entry we again call dst_set_expires. This time it won't update the dst.expire value because we left the dst.expire value intact from the last update. So dst_set_expires won't touch dst.expires. Fix this by resetting dst.expires when clearing the RTF_EXPIRE flag. dst_set_expires checks for a zero expiration and updates the dst.expires. In the past this (not updating dst.expires) was necessary because dst.expire was placed in a union with the dst_entry *from reference and rt6_clean_expires did assign NULL to it. This split happend in ecd9883724b78cc72ed92c98bcb1a46c764fff21 ("ipv6: fix race condition regarding dst->expires and dst->from"). Reported-by: Steinar H. Gunderson Reported-by: Valentijn Sessink Cc: YOSHIFUJI Hideaki Acked-by: Eric Dumazet Tested-by: Valentijn Sessink Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 48ec25a7fcb..5e661a97969 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -165,6 +165,7 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) static inline void rt6_clean_expires(struct rt6_info *rt) { rt->rt6i_flags &= ~RTF_EXPIRES; + rt->dst.expires = 0; } static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) -- cgit v1.2.3-70-g09d2 From bf378d341e4873ed928dc3c636252e6895a21f50 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Oct 2013 13:55:29 +0100 Subject: perf: Fix perf ring buffer memory ordering The PPC64 people noticed a missing memory barrier and crufty old comments in the perf ring buffer code. So update all the comments and add the missing barrier. When the architecture implements local_t using atomic_long_t there will be double barriers issued; but short of introducing more conditional barrier primitives this is the best we can do. Reported-by: Victor Kaplansky Tested-by: Victor Kaplansky Signed-off-by: Peter Zijlstra Cc: Mathieu Desnoyers Cc: michael@ellerman.id.au Cc: Paul McKenney Cc: Michael Neuling Cc: Frederic Weisbecker Cc: anton@samba.org Cc: benh@kernel.crashing.org Link: http://lkml.kernel.org/r/20131025173749.GG19466@laptop.lan Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 12 +++++++----- kernel/events/ring_buffer.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655a5d3..2fc1602e23b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -456,13 +456,15 @@ struct perf_event_mmap_page { /* * Control data for the mmap() data buffer. * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_event_wakeup(). + * User-space reading the @data_head value should issue an smp_rmb(), + * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. + * written by userspace to reflect the last read data, after issueing + * an smp_mb() to separate the data read from the ->data_tail store. + * In this case the kernel will not over-write unread data. + * + * See perf_output_put_handle() for the data ordering. */ __u64 data_head; /* head in the data section */ __u64 data_tail; /* user-space written tail */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index cd55144270b..9c2ddfbf452 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -87,10 +87,31 @@ again: goto out; /* - * Publish the known good head. Rely on the full barrier implied - * by atomic_dec_and_test() order the rb->head read and this - * write. + * Since the mmap() consumer (userspace) can run on a different CPU: + * + * kernel user + * + * READ ->data_tail READ ->data_head + * smp_mb() (A) smp_rmb() (C) + * WRITE $data READ $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head WRITE ->data_tail + * + * Where A pairs with D, and B pairs with C. + * + * I don't think A needs to be a full barrier because we won't in fact + * write data until we see the store from userspace. So we simply don't + * issue the data WRITE until we observe it. Be conservative for now. + * + * OTOH, D needs to be a full barrier since it separates the data READ + * from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, and for C + * an RMB is sufficient since it separates two READs. + * + * See perf_output_begin(). */ + smp_wmb(); rb->user_page->data_head = head; /* @@ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle, * Userspace could choose to issue a mb() before updating the * tail pointer. So that all reads will be completed before the * write is issued. + * + * See perf_output_put_handle(). */ tail = ACCESS_ONCE(rb->user_page->data_tail); - smp_rmb(); + smp_mb(); offset = head = local_read(&rb->head); head += size; if (unlikely(!perf_output_space(rb, tail, offset, head))) -- cgit v1.2.3-70-g09d2 From bd09d9a35111b6ffc0c7585d3853d0ec7f9f1eb4 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 30 Oct 2013 13:56:20 -0700 Subject: percpu: fix this_cpu_sub() subtrahend casting for unsigneds this_cpu_sub() is implemented as negation and addition. This patch casts the adjustment to the counter type before negation to sign extend the adjustment. This helps in cases where the counter type is wider than an unsigned adjustment. An alternative to this patch is to declare such operations unsupported, but it seemed useful to avoid surprises. This patch specifically helps the following example: unsigned int delta = 1 preempt_disable() this_cpu_write(long_counter, 0) this_cpu_sub(long_counter, delta) preempt_enable() Before this change long_counter on a 64 bit machine ends with value 0xffffffff, rather than 0xffffffffffffffff. This is because this_cpu_sub(pcp, delta) boils down to this_cpu_add(pcp, -delta), which is basically: long_counter = 0 + 0xffffffff Also apply the same cast to: __this_cpu_sub() __this_cpu_sub_return() this_cpu_sub_return() All percpu_test.ko passes, especially the following cases which previously failed: l -= ui_one; __this_cpu_sub(long_counter, ui_one); CHECK(l, long_counter, -1); l -= ui_one; this_cpu_sub(long_counter, ui_one); CHECK(l, long_counter, -1); CHECK(l, long_counter, 0xffffffffffffffff); ul -= ui_one; __this_cpu_sub(ulong_counter, ui_one); CHECK(ul, ulong_counter, -1); CHECK(ul, ulong_counter, 0xffffffffffffffff); ul = this_cpu_sub_return(ulong_counter, ui_one); CHECK(ul, ulong_counter, 2); ul = __this_cpu_sub_return(ulong_counter, ui_one); CHECK(ul, ulong_counter, 1); Signed-off-by: Greg Thelen Acked-by: Tejun Heo Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/percpu.h | 3 ++- include/linux/percpu.h | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0da5200ee79..b3e18f80030 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -128,7 +128,8 @@ do { \ do { \ typedef typeof(var) pao_T__; \ const int pao_ID__ = (__builtin_constant_p(val) && \ - ((val) == 1 || (val) == -1)) ? (val) : 0; \ + ((val) == 1 || (val) == -1)) ? \ + (int)(val) : 0; \ if (0) { \ pao_T__ pao_tmp__; \ pao_tmp__ = (val); \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cc88172c7d9..c74088ab103 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -332,7 +332,7 @@ do { \ #endif #ifndef this_cpu_sub -# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(val)) +# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef this_cpu_inc @@ -418,7 +418,7 @@ do { \ # define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) #endif -#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) @@ -586,7 +586,7 @@ do { \ #endif #ifndef __this_cpu_sub -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(val)) +# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef __this_cpu_inc @@ -668,7 +668,7 @@ do { \ __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) #endif -#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(val)) +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) -- cgit v1.2.3-70-g09d2 From 9bf76ca325d5e9208eb343f7bd4cc666f703ed30 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 3 Nov 2013 12:36:28 +0100 Subject: ipc, msg: forbid negative values for "msg{max,mnb,mni}" Negative message lengths make no sense -- so don't do negative queue lenghts or identifier counts. Prevent them from getting negative. Also change the underlying data types to be unsigned to avoid hairy surprises with sign extensions in cases where those variables get evaluated in unsigned expressions with bigger data types, e.g size_t. In case a user still wants to have "unlimited" sizes she could just use INT_MAX instead. Signed-off-by: Mathias Krause Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 6 +++--- ipc/ipc_sysctl.c | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 19c19a5eee2..f6c82de1254 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -34,9 +34,9 @@ struct ipc_namespace { int sem_ctls[4]; int used_sems; - int msg_ctlmax; - int msg_ctlmnb; - int msg_ctlmni; + unsigned int msg_ctlmax; + unsigned int msg_ctlmnb; + unsigned int msg_ctlmni; atomic_t msg_bytes; atomic_t msg_hdrs; int auto_msgmni; diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 130dfece27a..b0e99deb6d0 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, return err; } -static int proc_ipc_callback_dointvec(ctl_table *table, int write, +static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -152,15 +152,13 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipc_dointvec NULL #define proc_ipc_dointvec_minmax NULL #define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_callback_dointvec NULL +#define proc_ipc_callback_dointvec_minmax NULL #define proc_ipcauto_dointvec_minmax NULL #endif static int zero; static int one = 1; -#ifdef CONFIG_CHECKPOINT_RESTORE static int int_max = INT_MAX; -#endif static struct ctl_table ipc_kern_table[] = { { @@ -198,21 +196,27 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_callback_dointvec, + .proc_handler = proc_ipc_callback_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "sem", -- cgit v1.2.3-70-g09d2