From 220815a9665f7deca98a09ecca655044f94cfa44 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Nov 2013 18:17:04 +0100 Subject: genetlink: fix genlmsg_multicast() bug Unfortunately, I introduced a tremendously stupid bug into genlmsg_multicast() when doing all those multicast group changes: it adjusts the group number, but then passes it to genlmsg_multicast_netns() which does that again. Somehow, my tests failed to catch this, so add a warning into genlmsg_multicast_netns() and remove the offending group ID adjustment. Also add a warning to the similar code in other functions so people who misuse them are more loudly warned. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/genetlink.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ace4abf118d..771af09e90e 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -265,7 +265,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family, struct net *net, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) + if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); @@ -283,9 +283,6 @@ static inline int genlmsg_multicast(struct genl_family *family, struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - if (group >= family->n_mcgrps) - return -EINVAL; - group = family->mcgrp_offset + group; return genlmsg_multicast_netns(family, &init_net, skb, portid, group, flags); } -- cgit v1.2.3-70-g09d2 From 91398a0992c8aa18eb7749060b75761ece5ddc57 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Nov 2013 18:20:28 +0100 Subject: genetlink: fix genl_set_err() group ID Fix another really stupid bug - I introduced genl_set_err() precisely to be able to adjust the group and reject invalid ones, but then forgot to do so. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/genetlink.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 771af09e90e..1b177ed803b 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -384,6 +384,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags) static inline int genl_set_err(struct genl_family *family, struct net *net, u32 portid, u32 group, int code) { + if (WARN_ON_ONCE(group >= family->n_mcgrps)) + return -EINVAL; + group = family->mcgrp_offset + group; return netlink_set_err(net->genl_sock, portid, group, code); } -- cgit v1.2.3-70-g09d2 From 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 00:46:12 +0100 Subject: inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") conditionally updated addr_len if the msg_name is written to. The recv_error and rxpmtu functions relied on the recvmsg functions to set up addr_len before. As this does not happen any more we have to pass addr_len to those functions as well and set it to the size of the corresponding sockaddr length. This broke traceroute and such. Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") Reported-by: Brad Spengler Reported-by: Tom Labanowski Cc: mpb Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/ipv6.h | 6 ++++-- include/net/ping.h | 3 ++- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 7 +++++-- net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- 12 files changed, 26 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 217bc5bfc6c..5a25f36fe3a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -473,7 +473,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2a5f668cd68..eb198acaac1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -776,8 +776,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); diff --git a/include/net/ping.h b/include/net/ping.h index 3f67704f374..90f48417b03 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -31,7 +31,8 @@ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { - int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); int (*icmpv6_err_convert)(u8 type, u8 code, int *err); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f858266fa7..ddf32a6bc41 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 876c6ca2d8f..840cf1b9e6e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -841,10 +841,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len); + return pingv6_ops.ipv6_recv_error(sk, msg, len, + addr_len); #endif } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5cb8ddb505e..23c3e5b5bb5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5944d7d668d..44dfaa09b58 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1236,7 +1236,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a454b0ff57c..d690204a027 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8815e31a87f..a83243c3d65 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e24ff1df040..7fb4e14c467 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -466,10 +466,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81eb8cf8389..bcd5699313c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -393,10 +393,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cfd65304be6..d9b437e5500 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) -- cgit v1.2.3-70-g09d2 From 6eabca54d6781f61c7318517c1463a098acb7a87 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Mon, 25 Nov 2013 11:26:57 +0800 Subject: sctp: Restore 'resent' bit to avoid retransmitted chunks for RTT measurements Currently retransmitted DATA chunks could also be used for RTT measurements since there are no flag to identify whether the transmitted DATA chunk is a new one or a retransmitted one. This problem is introduced by commit ae19c5486 ("sctp: remove 'resent' bit from the chunk") which inappropriately removed the 'resent' bit completely, instead of doing this, we should set the resent bit only for the retransmitted DATA chunks. Signed-off-by: Xufeng Zhang Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2174d8da077..ea0ca5f6e62 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -629,6 +629,7 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ + resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ diff --git a/net/sctp/output.c b/net/sctp/output.c index e650978daf2..0e2644d0a77 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -474,10 +474,11 @@ int sctp_packet_transmit(struct sctp_packet *packet) * for a given destination transport address. */ - if (!tp->rto_pending) { + if (!chunk->resent && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } + has_data = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abb6db008df..f51ba985a36 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -446,6 +446,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } + chunk->resent = 1; + /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1375,6 +1377,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && + !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; -- cgit v1.2.3-70-g09d2 From 1431fb31ecbaba1b5718006128f0f2ed0b94e1c3 Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Tue, 3 Dec 2013 17:39:29 +0000 Subject: xen-netback: fix fragment detection in checksum setup The code to detect fragments in checksum_setup() was missing for IPv4 and too eager for IPv6. (It transpires that Windows seems to send IPv6 packets with a fragment header even if they are not a fragment - i.e. offset is zero, and M bit is not set). This patch also incorporates a fix to callers of maybe_pull_tail() where skb->network_header was being erroneously added to the length argument. Signed-off-by: Paul Durrant Signed-off-by: Zoltan Kiss Cc: Wei Liu Cc: Ian Campbell Cc: David Vrabel cc: David Miller Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 236 ++++++++++++++++++++++---------------- include/linux/ipv6.h | 1 + include/net/ipv6.h | 3 +- 3 files changed, 140 insertions(+), 100 deletions(-) (limited to 'include/net') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 64f0e0d18b8..acf13920e6d 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1149,49 +1149,72 @@ static int xenvif_set_skb_gso(struct xenvif *vif, return 0; } -static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len) +static inline int maybe_pull_tail(struct sk_buff *skb, unsigned int len, + unsigned int max) { - if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) { - /* If we need to pullup then pullup to the max, so we - * won't need to do it again. - */ - int target = min_t(int, skb->len, MAX_TCP_HEADER); - __pskb_pull_tail(skb, target - skb_headlen(skb)); - } + if (skb_headlen(skb) >= len) + return 0; + + /* If we need to pullup then pullup to the max, so we + * won't need to do it again. + */ + if (max > skb->len) + max = skb->len; + + if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) + return -ENOMEM; + + if (skb_headlen(skb) < len) + return -EPROTO; + + return 0; } +/* This value should be large enough to cover a tagged ethernet header plus + * maximally sized IP and TCP or UDP headers. + */ +#define MAX_IP_HDR_LEN 128 + static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, int recalculate_partial_csum) { - struct iphdr *iph = (void *)skb->data; - unsigned int header_size; unsigned int off; - int err = -EPROTO; + bool fragment; + int err; + + fragment = false; + + err = maybe_pull_tail(skb, + sizeof(struct iphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; - off = sizeof(struct iphdr); + if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) + fragment = true; - header_size = skb->network_header + off + MAX_IPOPTLEN; - maybe_pull_tail(skb, header_size); + off = ip_hdrlen(skb); - off = iph->ihl * 4; + err = -EPROTO; - switch (iph->protocol) { + switch (ip_hdr(skb)->protocol) { case IPPROTO_TCP: if (!skb_partial_csum_set(skb, off, offsetof(struct tcphdr, check))) goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = tcp_hdr(skb); - - header_size = skb->network_header + - off + - sizeof(struct tcphdr); - maybe_pull_tail(skb, header_size); - - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - off, - IPPROTO_TCP, 0); + err = maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); } break; case IPPROTO_UDP: @@ -1200,24 +1223,20 @@ static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb, goto out; if (recalculate_partial_csum) { - struct udphdr *udph = udp_hdr(skb); - - header_size = skb->network_header + - off + - sizeof(struct udphdr); - maybe_pull_tail(skb, header_size); - - udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - skb->len - off, - IPPROTO_UDP, 0); + err = maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IP_HDR_LEN); + if (err < 0) + goto out; + + udp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); } break; default: - if (net_ratelimit()) - netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, " - "dropping a protocol %d packet\n", - iph->protocol); goto out; } @@ -1227,75 +1246,99 @@ out: return err; } +/* This value should be large enough to cover a tagged ethernet header plus + * an IPv6 header, all options, and a maximal TCP or UDP header. + */ +#define MAX_IPV6_HDR_LEN 256 + +#define OPT_HDR(type, skb, off) \ + (type *)(skb_network_header(skb) + (off)) + static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, int recalculate_partial_csum) { - int err = -EPROTO; - struct ipv6hdr *ipv6h = (void *)skb->data; + int err; u8 nexthdr; - unsigned int header_size; unsigned int off; + unsigned int len; bool fragment; bool done; + fragment = false; done = false; off = sizeof(struct ipv6hdr); - header_size = skb->network_header + off; - maybe_pull_tail(skb, header_size); + err = maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; - nexthdr = ipv6h->nexthdr; + nexthdr = ipv6_hdr(skb)->nexthdr; - while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) && - !done) { + len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); + while (off <= len && !done) { switch (nexthdr) { case IPPROTO_DSTOPTS: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: { - struct ipv6_opt_hdr *hp = (void *)(skb->data + off); + struct ipv6_opt_hdr *hp; - header_size = skb->network_header + - off + - sizeof(struct ipv6_opt_hdr); - maybe_pull_tail(skb, header_size); + err = maybe_pull_tail(skb, + off + + sizeof(struct ipv6_opt_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); nexthdr = hp->nexthdr; off += ipv6_optlen(hp); break; } case IPPROTO_AH: { - struct ip_auth_hdr *hp = (void *)(skb->data + off); + struct ip_auth_hdr *hp; + + err = maybe_pull_tail(skb, + off + + sizeof(struct ip_auth_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct ip_auth_hdr, skb, off); + nexthdr = hp->nexthdr; + off += ipv6_authlen(hp); + break; + } + case IPPROTO_FRAGMENT: { + struct frag_hdr *hp; - header_size = skb->network_header + - off + - sizeof(struct ip_auth_hdr); - maybe_pull_tail(skb, header_size); + err = maybe_pull_tail(skb, + off + + sizeof(struct frag_hdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + hp = OPT_HDR(struct frag_hdr, skb, off); + + if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) + fragment = true; nexthdr = hp->nexthdr; - off += (hp->hdrlen+2)<<2; + off += sizeof(struct frag_hdr); break; } - case IPPROTO_FRAGMENT: - fragment = true; - /* fall through */ default: done = true; break; } } - if (!done) { - if (net_ratelimit()) - netdev_err(vif->dev, "Failed to parse packet header\n"); - goto out; - } + err = -EPROTO; - if (fragment) { - if (net_ratelimit()) - netdev_err(vif->dev, "Packet is a fragment!\n"); + if (!done || fragment) goto out; - } switch (nexthdr) { case IPPROTO_TCP: @@ -1304,17 +1347,17 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = tcp_hdr(skb); - - header_size = skb->network_header + - off + - sizeof(struct tcphdr); - maybe_pull_tail(skb, header_size); - - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, - &ipv6h->daddr, - skb->len - off, - IPPROTO_TCP, 0); + err = maybe_pull_tail(skb, + off + sizeof(struct tcphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_TCP, 0); } break; case IPPROTO_UDP: @@ -1323,25 +1366,20 @@ static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb, goto out; if (recalculate_partial_csum) { - struct udphdr *udph = udp_hdr(skb); - - header_size = skb->network_header + - off + - sizeof(struct udphdr); - maybe_pull_tail(skb, header_size); - - udph->check = ~csum_ipv6_magic(&ipv6h->saddr, - &ipv6h->daddr, - skb->len - off, - IPPROTO_UDP, 0); + err = maybe_pull_tail(skb, + off + sizeof(struct udphdr), + MAX_IPV6_HDR_LEN); + if (err < 0) + goto out; + + udp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, + IPPROTO_UDP, 0); } break; default: - if (net_ratelimit()) - netdev_err(vif->dev, - "Attempting to checksum a non-TCP/UDP packet, " - "dropping a protocol %d packet\n", - nexthdr); goto out; } diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5d89d1b808a..c56c350324e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -4,6 +4,7 @@ #include #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) +#define ipv6_authlen(p) (((p)->hdrlen+2) << 2) /* * This structure contains configuration options per IPv6 link. */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index eb198acaac1..488316e339a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -110,7 +110,8 @@ struct frag_hdr { __be32 identification; }; -#define IP6_MF 0x0001 +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 #include -- cgit v1.2.3-70-g09d2 From 7f2cbdc28c034ef2c3be729681f631d5744e3cd5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Dec 2013 20:12:04 -0800 Subject: tcp_memcontrol: Cleanup/fix cg_proto->memory_pressure handling. kill memcg_tcp_enter_memory_pressure. The only function of memcg_tcp_enter_memory_pressure was to reduce deal with the unnecessary abstraction that was tcp_memcontrol. Now that struct tcp_memcontrol is gone remove this unnecessary function, the unnecessary function pointer, and modify sk_enter_memory_pressure to set this field directly, just as sk_leave_memory_pressure cleas this field directly. This fixes a small bug I intruduced when killing struct tcp_memcontrol that caused memcg_tcp_enter_memory_pressure to never be called and thus failed to ever set cg_proto->memory_pressure. Remove the cg_proto enter_memory_pressure function as it now serves no useful purpose. Don't test cg_proto->memory_presser in sk_leave_memory_pressure before clearing it. The test was originally there to ensure that the pointer was non-NULL. Now that cg_proto is not a pointer the pointer does not matter. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/sock.h | 6 ++---- net/ipv4/tcp_memcontrol.c | 7 ------- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index e3a18ff0c38..2ef3c3eca47 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1035,7 +1035,6 @@ enum cg_proto_flags { }; struct cg_proto { - void (*enter_memory_pressure)(struct sock *sk); struct res_counter memory_allocated; /* Current allocated memory. */ struct percpu_counter sockets_allocated; /* Current number of sockets. */ int memory_pressure; @@ -1155,8 +1154,7 @@ static inline void sk_leave_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - if (cg_proto->memory_pressure) - cg_proto->memory_pressure = 0; + cg_proto->memory_pressure = 0; } } @@ -1171,7 +1169,7 @@ static inline void sk_enter_memory_pressure(struct sock *sk) struct proto *prot = sk->sk_prot; for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) - cg_proto->enter_memory_pressure(sk); + cg_proto->memory_pressure = 1; } sk->sk_prot->enter_memory_pressure(sk); diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 269a89ecd2f..f7e522c558b 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,13 +6,6 @@ #include #include -static void memcg_tcp_enter_memory_pressure(struct sock *sk) -{ - if (sk->sk_cgrp->memory_pressure) - sk->sk_cgrp->memory_pressure = 1; -} -EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* -- cgit v1.2.3-70-g09d2 From 9f70f46bd4c7267d48ef461a1d613ec9ec0d520c Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 10 Dec 2013 06:48:15 -0500 Subject: sctp: properly latch and use autoclose value from sock to association Currently, sctp associations latch a sockets autoclose value to an association at association init time, subject to capping constraints from the max_autoclose sysctl value. This leads to an odd situation where an application may set a socket level autoclose timeout, but sliently sctp will limit the autoclose timeout to something less than that. Fix this by modifying the autoclose setsockopt function to check the limit, cap it and warn the user via syslog that the timeout is capped. This will allow getsockopt to return valid autoclose timeout values that reflect what subsequent associations actually use. While were at it, also elimintate the assoc->autoclose variable, it duplicates whats in the timeout array, which leads to multiple sources for the same information, that may differ (as the former isn't subject to any capping). This gives us the timeout information in a canonical place and saves some space in the association structure as well. Signed-off-by: Neil Horman Acked-by: Vlad Yasevich CC: Wang Weidong CC: David Miller CC: Vlad Yasevich CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 ------ net/sctp/associola.c | 5 +---- net/sctp/output.c | 3 ++- net/sctp/sm_statefuns.c | 12 ++++++------ net/sctp/socket.c | 4 ++++ 5 files changed, 13 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ea0ca5f6e62..67b5d006827 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1726,12 +1726,6 @@ struct sctp_association { /* How many duplicated TSNs have we seen? */ int numduptsns; - /* Number of seconds of idle time before an association is closed. - * In the association context, this is really used as a boolean - * since the real timeout is stored in the timeouts array - */ - __u32 autoclose; - /* These are to support * "SCTP Extensions for Dynamic Reconfiguration of IP Addresses * and Enforcement of Flow and Message Limits" diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 68a27f9796d..31ed008c8e1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -154,8 +154,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -291,8 +290,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.ipv6_address = 1; INIT_LIST_HEAD(&asoc->asocs); - asoc->autoclose = sp->autoclose; - asoc->default_stream = sp->default_stream; asoc->default_ppid = sp->default_ppid; asoc->default_flags = sp->default_flags; diff --git a/net/sctp/output.c b/net/sctp/output.c index 0e2644d0a77..0fb140f8f08 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -581,7 +581,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned long timeout; /* Restart the AUTOCLOSE timer when sending data. */ - if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { + if (sctp_state(asoc, ESTABLISHED) && + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index dfe3f36ff2a..a26065be728 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -820,7 +820,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (new_asoc->autoclose) + if (new_asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -908,7 +908,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2970,7 +2970,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) force = SCTP_FORCE(); - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -3878,7 +3878,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, SCTP_CHUNK(chunk)); /* Count this as receiving DATA. */ - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -5267,7 +5267,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5346,7 +5346,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 72046b9729a..5455043f449 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2196,6 +2196,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) @@ -2205,6 +2206,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + if (sp->autoclose > net->sctp.max_autoclose) + sp->autoclose = net->sctp.max_autoclose; + return 0; } -- cgit v1.2.3-70-g09d2