diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 16 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 34 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 7 | ||||
-rw-r--r-- | net/ipv4/raw.c | 24 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 59 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 4 | ||||
-rw-r--r-- | net/ipv4/udp.c | 73 |
7 files changed, 152 insertions, 65 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e92f1fd28aa..5df2f6a0b0f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1077,12 +1077,16 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ip_mc_up(in_dev); /* fall through */ case NETDEV_CHANGEADDR: - if (IN_DEV_ARP_NOTIFY(in_dev)) - arp_send(ARPOP_REQUEST, ETH_P_ARP, - in_dev->ifa_list->ifa_address, - dev, - in_dev->ifa_list->ifa_address, - NULL, dev->dev_addr, NULL); + /* Send gratuitous ARP to notify of link change */ + if (IN_DEV_ARP_NOTIFY(in_dev)) { + struct in_ifaddr *ifa = in_dev->ifa_list; + + if (ifa) + arp_send(ARPOP_REQUEST, ETH_P_ARP, + ifa->ifa_address, dev, + ifa->ifa_address, NULL, + dev->dev_addr, NULL); + } break; case NETDEV_DOWN: ip_mc_down(in_dev); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4351ca2cf0b..537731b3bcb 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries; EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); +/* Decide when to expire the request and when to resend SYN-ACK */ +static inline void syn_ack_recalc(struct request_sock *req, const int thresh, + const int max_retries, + const u8 rskq_defer_accept, + int *expire, int *resend) +{ + if (!rskq_defer_accept) { + *expire = req->retrans >= thresh; + *resend = 1; + return; + } + *expire = req->retrans >= thresh && + (!inet_rsk(req)->acked || req->retrans >= max_retries); + /* + * Do not resend while waiting for data after ACK, + * start to resend on end of deferring period to give + * last chance for data or ACK to create established socket. + */ + *resend = !inet_rsk(req)->acked || + req->retrans >= rskq_defer_accept - 1; +} + void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long interval, const unsigned long timeout, @@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if ((req->retrans < thresh || - (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(parent, req)) { + int expire = 0, resend = 0; + + syn_ack_recalc(req, thresh, max_retries, + queue->rskq_defer_accept, + &expire, &resend); + if (!expire && + (!resend || + !req->rsk_ops->rtx_syn_ack(parent, req) || + inet_rsk(req)->acked)) { unsigned long timeo; if (req->retrans++ == 0) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0c0b6e363a2..e982b5c1ee1 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); - if (dev) { + if (dev) mreq.imr_ifindex = dev->ifindex; - dev_put(dev); - } } else - dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); + dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); err = -EADDRNOTAVAIL; if (!dev) break; + dev_put(dev); err = -EINVAL; if (sk->sk_bound_dev_if && diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 757c9171e7c..ab996f9c0fe 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -352,13 +352,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, skb->ip_summed = CHECKSUM_NONE; skb->transport_header = skb->network_header; - err = memcpy_fromiovecend((void *)iph, from, 0, length); - if (err) - goto error_fault; + err = -EFAULT; + if (memcpy_fromiovecend((void *)iph, from, 0, length)) + goto error_free; - /* We don't modify invalid header */ iphlen = iph->ihl * 4; - if (iphlen >= sizeof(*iph) && iphlen <= length) { + + /* + * We don't want to modify the ip header, but we do need to + * be sure that it won't cause problems later along the network + * stack. Specifically we want to make sure that iph->ihl is a + * sane value. If ihl points beyond the length of the buffer passed + * in, reject the frame as invalid + */ + err = -EINVAL; + if (iphlen > length) + goto error_free; + + if (iphlen >= sizeof(*iph)) { if (!iph->saddr) iph->saddr = rt->rt_src; iph->check = 0; @@ -381,8 +392,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, out: return 0; -error_fault: - err = -EFAULT; +error_free: kfree_skb(skb); error: IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 64d0af67582..98440ad8255 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk) EXPORT_SYMBOL(tcp_enter_memory_pressure); +/* Convert seconds to retransmits based on initial and max timeout */ +static u8 secs_to_retrans(int seconds, int timeout, int rto_max) +{ + u8 res = 0; + + if (seconds > 0) { + int period = timeout; + + res = 1; + while (seconds > period && res < 255) { + res++; + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return res; +} + +/* Convert retransmits to seconds based on initial and max timeout */ +static int retrans_to_secs(u8 retrans, int timeout, int rto_max) +{ + int period = 0; + + if (retrans > 0) { + period = timeout; + while (--retrans) { + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return period; +} + /* * Wait for a TCP event. * @@ -1405,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - WARN_ON(!(flags & MSG_PEEK)); + WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " + "copied %X seq %X\n", *seq, + TCP_SKB_CB(skb)->seq); } /* Well, if we have backlog, try to process it now yet. */ @@ -2163,16 +2202,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - icsk->icsk_accept_queue.rskq_defer_accept = 0; - if (val > 0) { - /* Translate value in seconds to number of - * retransmits */ - while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && - val > ((TCP_TIMEOUT_INIT / HZ) << - icsk->icsk_accept_queue.rskq_defer_accept)) - icsk->icsk_accept_queue.rskq_defer_accept++; - icsk->icsk_accept_queue.rskq_defer_accept++; - } + /* Translate value in seconds to number of retransmits */ + icsk->icsk_accept_queue.rskq_defer_accept = + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: @@ -2353,8 +2386,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : - ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); + val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, + TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624c3c9b3c2..4c03598ed92 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ + if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; return NULL; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6ec6a8a4a22..d0d436d6216 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -841,6 +841,42 @@ out: return ret; } + +/** + * first_packet_length - return length of first packet in receive queue + * @sk: socket + * + * Drops all bad checksum frames, until a valid one is found. + * Returns the length of found skb, or 0 if none is found. + */ +static unsigned int first_packet_length(struct sock *sk) +{ + struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff *skb; + unsigned int res; + + __skb_queue_head_init(&list_kill); + + spin_lock_bh(&rcvq->lock); + while ((skb = skb_peek(rcvq)) != NULL && + udp_lib_checksum_complete(skb)) { + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + __skb_unlink(skb, rcvq); + __skb_queue_tail(&list_kill, skb); + } + res = skb ? skb->len : 0; + spin_unlock_bh(&rcvq->lock); + + if (!skb_queue_empty(&list_kill)) { + lock_sock(sk); + __skb_queue_purge(&list_kill); + sk_mem_reclaim_partial(sk); + release_sock(sk); + } + return res; +} + /* * IOCTL requests applicable to the UDP protocol */ @@ -857,21 +893,16 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) case SIOCINQ: { - struct sk_buff *skb; - unsigned long amount; + unsigned int amount = first_packet_length(sk); - amount = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) { + if (amount) /* * We will only return the amount * of this packet since that is all * that will be read. */ - amount = skb->len - sizeof(struct udphdr); - } - spin_unlock_bh(&sk->sk_receive_queue.lock); + amount -= sizeof(struct udphdr); + return put_user(amount, (int __user *)arg); } @@ -1540,29 +1571,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - int is_lite = IS_UDPLITE(sk); /* Check for false positives due to checksum errors */ - if ((mask & POLLRDNORM) && - !(file->f_flags & O_NONBLOCK) && - !(sk->sk_shutdown & RCV_SHUTDOWN)) { - struct sk_buff_head *rcvq = &sk->sk_receive_queue; - struct sk_buff *skb; - - spin_lock_bh(&rcvq->lock); - while ((skb = skb_peek(rcvq)) != NULL && - udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, is_lite); - __skb_unlink(skb, rcvq); - kfree_skb(skb); - } - spin_unlock_bh(&rcvq->lock); - - /* nothing to see, move along */ - if (skb == NULL) - mask &= ~(POLLIN | POLLRDNORM); - } + if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && + !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) + mask &= ~(POLLIN | POLLRDNORM); return mask; |