diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 311 | 
1 files changed, 259 insertions, 52 deletions
| diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fcd278a7080..383ce237640 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,6 +59,10 @@ int sysctl_tcp_base_mss __read_mostly = 512;  /* By default, RFC2861 behavior.  */  int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ +EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); + +  /* Account for new data that has been sent to the network. */  static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)  { @@ -362,15 +366,45 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp)  #define OPTION_TS		(1 << 1)  #define OPTION_MD5		(1 << 2)  #define OPTION_WSCALE		(1 << 3) +#define OPTION_COOKIE_EXTENSION	(1 << 4)  struct tcp_out_options {  	u8 options;		/* bit field of OPTION_* */  	u8 ws;			/* window scale, 0 to disable */  	u8 num_sack_blocks;	/* number of SACK blocks to include */ +	u8 hash_size;		/* bytes in hash_location */  	u16 mss;		/* 0 to disable */  	__u32 tsval, tsecr;	/* need to include OPTION_TS */ +	__u8 *hash_location;	/* temporary pointer, overloaded */  }; +/* The sysctl int routines are generic, so check consistency here. + */ +static u8 tcp_cookie_size_check(u8 desired) +{ +	if (desired > 0) { +		/* previously specified */ +		return desired; +	} +	if (sysctl_tcp_cookie_size <= 0) { +		/* no default specified */ +		return 0; +	} +	if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { +		/* value too small, specify minimum */ +		return TCP_COOKIE_MIN; +	} +	if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { +		/* value too large, specify maximum */ +		return TCP_COOKIE_MAX; +	} +	if (0x1 & sysctl_tcp_cookie_size) { +		/* 8-bit multiple, illegal, fix it */ +		return (u8)(sysctl_tcp_cookie_size + 0x1); +	} +	return (u8)sysctl_tcp_cookie_size; +} +  /* Write previously computed TCP options to the packet.   *   * Beware: Something in the Internet is very sensitive to the ordering of @@ -385,17 +419,34 @@ struct tcp_out_options {   * (but it may well be that other scenarios fail similarly).   */  static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, -			      const struct tcp_out_options *opts, -			      __u8 **md5_hash) { -	if (unlikely(OPTION_MD5 & opts->options)) { -		*ptr++ = htonl((TCPOPT_NOP << 24) | -			       (TCPOPT_NOP << 16) | -			       (TCPOPT_MD5SIG << 8) | -			       TCPOLEN_MD5SIG); -		*md5_hash = (__u8 *)ptr; +			      struct tcp_out_options *opts) +{ +	u8 options = opts->options;	/* mungable copy */ + +	/* Having both authentication and cookies for security is redundant, +	 * and there's certainly not enough room.  Instead, the cookie-less +	 * extension variant is proposed. +	 * +	 * Consider the pessimal case with authentication.  The options +	 * could look like: +	 *   COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 +	 */ +	if (unlikely(OPTION_MD5 & options)) { +		if (unlikely(OPTION_COOKIE_EXTENSION & options)) { +			*ptr++ = htonl((TCPOPT_COOKIE << 24) | +				       (TCPOLEN_COOKIE_BASE << 16) | +				       (TCPOPT_MD5SIG << 8) | +				       TCPOLEN_MD5SIG); +		} else { +			*ptr++ = htonl((TCPOPT_NOP << 24) | +				       (TCPOPT_NOP << 16) | +				       (TCPOPT_MD5SIG << 8) | +				       TCPOLEN_MD5SIG); +		} +		options &= ~OPTION_COOKIE_EXTENSION; +		/* overload cookie hash location */ +		opts->hash_location = (__u8 *)ptr;  		ptr += 4; -	} else { -		*md5_hash = NULL;  	}  	if (unlikely(opts->mss)) { @@ -404,12 +455,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,  			       opts->mss);  	} -	if (likely(OPTION_TS & opts->options)) { -		if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { +	if (likely(OPTION_TS & options)) { +		if (unlikely(OPTION_SACK_ADVERTISE & options)) {  			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |  				       (TCPOLEN_SACK_PERM << 16) |  				       (TCPOPT_TIMESTAMP << 8) |  				       TCPOLEN_TIMESTAMP); +			options &= ~OPTION_SACK_ADVERTISE;  		} else {  			*ptr++ = htonl((TCPOPT_NOP << 24) |  				       (TCPOPT_NOP << 16) | @@ -420,15 +472,52 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,  		*ptr++ = htonl(opts->tsecr);  	} -	if (unlikely(OPTION_SACK_ADVERTISE & opts->options && -		     !(OPTION_TS & opts->options))) { +	/* Specification requires after timestamp, so do it now. +	 * +	 * Consider the pessimal case without authentication.  The options +	 * could look like: +	 *   MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 +	 */ +	if (unlikely(OPTION_COOKIE_EXTENSION & options)) { +		__u8 *cookie_copy = opts->hash_location; +		u8 cookie_size = opts->hash_size; + +		/* 8-bit multiple handled in tcp_cookie_size_check() above, +		 * and elsewhere. +		 */ +		if (0x2 & cookie_size) { +			__u8 *p = (__u8 *)ptr; + +			/* 16-bit multiple */ +			*p++ = TCPOPT_COOKIE; +			*p++ = TCPOLEN_COOKIE_BASE + cookie_size; +			*p++ = *cookie_copy++; +			*p++ = *cookie_copy++; +			ptr++; +			cookie_size -= 2; +		} else { +			/* 32-bit multiple */ +			*ptr++ = htonl(((TCPOPT_NOP << 24) | +					(TCPOPT_NOP << 16) | +					(TCPOPT_COOKIE << 8) | +					TCPOLEN_COOKIE_BASE) + +				       cookie_size); +		} + +		if (cookie_size > 0) { +			memcpy(ptr, cookie_copy, cookie_size); +			ptr += (cookie_size / 4); +		} +	} + +	if (unlikely(OPTION_SACK_ADVERTISE & options)) {  		*ptr++ = htonl((TCPOPT_NOP << 24) |  			       (TCPOPT_NOP << 16) |  			       (TCPOPT_SACK_PERM << 8) |  			       TCPOLEN_SACK_PERM);  	} -	if (unlikely(OPTION_WSCALE & opts->options)) { +	if (unlikely(OPTION_WSCALE & options)) {  		*ptr++ = htonl((TCPOPT_NOP << 24) |  			       (TCPOPT_WINDOW << 16) |  			       (TCPOLEN_WINDOW << 8) | @@ -463,13 +552,17 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,  				struct tcp_out_options *opts,  				struct tcp_md5sig_key **md5) {  	struct tcp_sock *tp = tcp_sk(sk); -	unsigned size = 0; +	struct tcp_cookie_values *cvp = tp->cookie_values; +	unsigned remaining = MAX_TCP_OPTION_SPACE; +	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? +			 tcp_cookie_size_check(cvp->cookie_desired) : +			 0;  #ifdef CONFIG_TCP_MD5SIG  	*md5 = tp->af_specific->md5_lookup(sk, sk);  	if (*md5) {  		opts->options |= OPTION_MD5; -		size += TCPOLEN_MD5SIG_ALIGNED; +		remaining -= TCPOLEN_MD5SIG_ALIGNED;  	}  #else  	*md5 = NULL; @@ -485,26 +578,72 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,  	 * SACKs don't matter, we never delay an ACK when we have any of those  	 * going out.  */  	opts->mss = tcp_advertise_mss(sk); -	size += TCPOLEN_MSS_ALIGNED; +	remaining -= TCPOLEN_MSS_ALIGNED;  	if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {  		opts->options |= OPTION_TS;  		opts->tsval = TCP_SKB_CB(skb)->when;  		opts->tsecr = tp->rx_opt.ts_recent; -		size += TCPOLEN_TSTAMP_ALIGNED; +		remaining -= TCPOLEN_TSTAMP_ALIGNED;  	}  	if (likely(sysctl_tcp_window_scaling)) {  		opts->ws = tp->rx_opt.rcv_wscale;  		opts->options |= OPTION_WSCALE; -		size += TCPOLEN_WSCALE_ALIGNED; +		remaining -= TCPOLEN_WSCALE_ALIGNED;  	}  	if (likely(sysctl_tcp_sack)) {  		opts->options |= OPTION_SACK_ADVERTISE;  		if (unlikely(!(OPTION_TS & opts->options))) -			size += TCPOLEN_SACKPERM_ALIGNED; +			remaining -= TCPOLEN_SACKPERM_ALIGNED;  	} -	return size; +	/* Note that timestamps are required by the specification. +	 * +	 * Odd numbers of bytes are prohibited by the specification, ensuring +	 * that the cookie is 16-bit aligned, and the resulting cookie pair is +	 * 32-bit aligned. +	 */ +	if (*md5 == NULL && +	    (OPTION_TS & opts->options) && +	    cookie_size > 0) { +		int need = TCPOLEN_COOKIE_BASE + cookie_size; + +		if (0x2 & need) { +			/* 32-bit multiple */ +			need += 2; /* NOPs */ + +			if (need > remaining) { +				/* try shrinking cookie to fit */ +				cookie_size -= 2; +				need -= 4; +			} +		} +		while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { +			cookie_size -= 4; +			need -= 4; +		} +		if (TCP_COOKIE_MIN <= cookie_size) { +			opts->options |= OPTION_COOKIE_EXTENSION; +			opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; +			opts->hash_size = cookie_size; + +			/* Remember for future incarnations. */ +			cvp->cookie_desired = cookie_size; + +			if (cvp->cookie_desired != cvp->cookie_pair_size) { +				/* Currently use random bytes as a nonce, +				 * assuming these are completely unpredictable +				 * by hostile users of the same system. +				 */ +				get_random_bytes(&cvp->cookie_pair[0], +						 cookie_size); +				cvp->cookie_pair_size = cookie_size; +			} + +			remaining -= need; +		} +	} +	return MAX_TCP_OPTION_SPACE - remaining;  }  /* Set up TCP options for SYN-ACKs. */ @@ -512,48 +651,77 @@ static unsigned tcp_synack_options(struct sock *sk,  				   struct request_sock *req,  				   unsigned mss, struct sk_buff *skb,  				   struct tcp_out_options *opts, -				   struct tcp_md5sig_key **md5) { -	unsigned size = 0; +				   struct tcp_md5sig_key **md5, +				   struct tcp_extend_values *xvp) +{  	struct inet_request_sock *ireq = inet_rsk(req); -	char doing_ts; +	unsigned remaining = MAX_TCP_OPTION_SPACE; +	u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? +			 xvp->cookie_plus : +			 0; +	bool doing_ts = ireq->tstamp_ok;  #ifdef CONFIG_TCP_MD5SIG  	*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);  	if (*md5) {  		opts->options |= OPTION_MD5; -		size += TCPOLEN_MD5SIG_ALIGNED; +		remaining -= TCPOLEN_MD5SIG_ALIGNED; + +		/* We can't fit any SACK blocks in a packet with MD5 + TS +		 * options. There was discussion about disabling SACK +		 * rather than TS in order to fit in better with old, +		 * buggy kernels, but that was deemed to be unnecessary. +		 */ +		doing_ts &= !ireq->sack_ok;  	}  #else  	*md5 = NULL;  #endif -	/* we can't fit any SACK blocks in a packet with MD5 + TS -	   options. There was discussion about disabling SACK rather than TS in -	   order to fit in better with old, buggy kernels, but that was deemed -	   to be unnecessary. */ -	doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); - +	/* We always send an MSS option. */  	opts->mss = mss; -	size += TCPOLEN_MSS_ALIGNED; +	remaining -= TCPOLEN_MSS_ALIGNED;  	if (likely(ireq->wscale_ok)) {  		opts->ws = ireq->rcv_wscale;  		opts->options |= OPTION_WSCALE; -		size += TCPOLEN_WSCALE_ALIGNED; +		remaining -= TCPOLEN_WSCALE_ALIGNED;  	}  	if (likely(doing_ts)) {  		opts->options |= OPTION_TS;  		opts->tsval = TCP_SKB_CB(skb)->when;  		opts->tsecr = req->ts_recent; -		size += TCPOLEN_TSTAMP_ALIGNED; +		remaining -= TCPOLEN_TSTAMP_ALIGNED;  	}  	if (likely(ireq->sack_ok)) {  		opts->options |= OPTION_SACK_ADVERTISE;  		if (unlikely(!doing_ts)) -			size += TCPOLEN_SACKPERM_ALIGNED; +			remaining -= TCPOLEN_SACKPERM_ALIGNED;  	} -	return size; +	/* Similar rationale to tcp_syn_options() applies here, too. +	 * If the <SYN> options fit, the same options should fit now! +	 */ +	if (*md5 == NULL && +	    doing_ts && +	    cookie_plus > TCPOLEN_COOKIE_BASE) { +		int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ + +		if (0x2 & need) { +			/* 32-bit multiple */ +			need += 2; /* NOPs */ +		} +		if (need <= remaining) { +			opts->options |= OPTION_COOKIE_EXTENSION; +			opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; +			remaining -= need; +		} else { +			/* There's no error return, so flag it. */ +			xvp->cookie_out_never = 1; /* true */ +			opts->hash_size = 0; +		} +	} +	return MAX_TCP_OPTION_SPACE - remaining;  }  /* Compute TCP options for ESTABLISHED sockets. This is not the @@ -619,7 +787,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	struct tcp_out_options opts;  	unsigned tcp_options_size, tcp_header_size;  	struct tcp_md5sig_key *md5; -	__u8 *md5_hash_location;  	struct tcphdr *th;  	int err; @@ -661,8 +828,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	/* Build TCP header and checksum it. */  	th = tcp_hdr(skb); -	th->source		= inet->sport; -	th->dest		= inet->dport; +	th->source		= inet->inet_sport; +	th->dest		= inet->inet_dport;  	th->seq			= htonl(tcb->seq);  	th->ack_seq		= htonl(tp->rcv_nxt);  	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) | @@ -690,7 +857,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  		}  	} -	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); +	tcp_options_write((__be32 *)(th + 1), tp, &opts);  	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))  		TCP_ECN_send(sk, skb, tcp_header_size); @@ -698,7 +865,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	/* Calculate the MD5 hash, as we have all we need now */  	if (md5) {  		sk->sk_route_caps &= ~NETIF_F_GSO_MASK; -		tp->af_specific->calc_md5_hash(md5_hash_location, +		tp->af_specific->calc_md5_hash(opts.hash_location,  					       md5, sk, NULL, skb);  	}  #endif @@ -1918,8 +2085,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  	 * case, when window is shrunk to zero. In this case  	 * our retransmit serves as a zero window probe.  	 */ -	if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) -	    && TCP_SKB_CB(skb)->seq != tp->snd_una) +	if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && +	    TCP_SKB_CB(skb)->seq != tp->snd_una)  		return -EAGAIN;  	if (skb->len > cur_mss) { @@ -2219,16 +2386,17 @@ int tcp_send_synack(struct sock *sk)  /* Prepare a SYN-ACK. */  struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, -				struct request_sock *req) +				struct request_sock *req, +				struct request_values *rvp)  { +	struct tcp_out_options opts; +	struct tcp_extend_values *xvp = tcp_xv(rvp);  	struct inet_request_sock *ireq = inet_rsk(req);  	struct tcp_sock *tp = tcp_sk(sk);  	struct tcphdr *th; -	int tcp_header_size; -	struct tcp_out_options opts;  	struct sk_buff *skb;  	struct tcp_md5sig_key *md5; -	__u8 *md5_hash_location; +	int tcp_header_size;  	int mss;  	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); @@ -2266,8 +2434,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  #endif  	TCP_SKB_CB(skb)->when = tcp_time_stamp;  	tcp_header_size = tcp_synack_options(sk, req, mss, -					     skb, &opts, &md5) + -			  sizeof(struct tcphdr); +					     skb, &opts, &md5, xvp) +			+ sizeof(*th);  	skb_push(skb, tcp_header_size);  	skb_reset_transport_header(skb); @@ -2284,19 +2452,58 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	 */  	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,  			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); + +	if (OPTION_COOKIE_EXTENSION & opts.options) { +		const struct tcp_cookie_values *cvp = tp->cookie_values; + +		if (cvp != NULL && +		    cvp->s_data_constant && +		    cvp->s_data_desired > 0) { +			u8 *buf = skb_put(skb, cvp->s_data_desired); + +			/* copy data directly from the listening socket. */ +			memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); +			TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; +		} + +		if (opts.hash_size > 0) { +			__u32 workspace[SHA_WORKSPACE_WORDS]; +			u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; +			u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; + +			/* Secret recipe depends on the Timestamp, (future) +			 * Sequence and Acknowledgment Numbers, Initiator +			 * Cookie, and others handled by IP variant caller. +			 */ +			*tail-- ^= opts.tsval; +			*tail-- ^= tcp_rsk(req)->rcv_isn + 1; +			*tail-- ^= TCP_SKB_CB(skb)->seq + 1; + +			/* recommended */ +			*tail-- ^= ((th->dest << 16) | th->source); +			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ + +			sha_transform((__u32 *)&xvp->cookie_bakery[0], +				      (char *)mess, +				      &workspace[0]); +			opts.hash_location = +				(__u8 *)&xvp->cookie_bakery[0]; +		} +	} +  	th->seq = htonl(TCP_SKB_CB(skb)->seq);  	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);  	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */  	th->window = htons(min(req->rcv_wnd, 65535U)); -	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); +	tcp_options_write((__be32 *)(th + 1), tp, &opts);  	th->doff = (tcp_header_size >> 2);  	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);  #ifdef CONFIG_TCP_MD5SIG  	/* Okay, we have all we need - do the md5 hash if needed */  	if (md5) { -		tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location, +		tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,  					       md5, NULL, req, skb);  	}  #endif | 
