diff options
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 125 |
1 files changed, 69 insertions, 56 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d0670f00d52..14d02ea905b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -29,27 +29,16 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif -/* - * This struct holds the first and last local port number. - */ -struct local_ports sysctl_local_ports __read_mostly = { - .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock), - .range = { 32768, 61000 }, -}; - -unsigned long *sysctl_local_reserved_ports; -EXPORT_SYMBOL(sysctl_local_reserved_ports); - -void inet_get_local_port_range(int *low, int *high) +void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; do { - seq = read_seqbegin(&sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); - *low = sysctl_local_ports.range[0]; - *high = sysctl_local_ports.range[1]; - } while (read_seqretry(&sysctl_local_ports.lock, seq)); + *low = net->ipv4.ip_local_ports.range[0]; + *high = net->ipv4.ip_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); @@ -57,8 +46,9 @@ int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax) { struct sock *sk2; - struct hlist_node *node; int reuse = sk->sk_reuse; + int reuseport = sk->sk_reuseport; + kuid_t uid = sock_i_uid((struct sock *)sk); /* * Unlike other sk lookup places we do not check @@ -67,30 +57,32 @@ int inet_csk_bind_conflict(const struct sock *sk, * one this bucket belongs to. */ - sk_for_each_bound(sk2, node, &tb->owners) { + sk_for_each_bound(sk2, &tb->owners) { if (sk != sk2 && !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + if ((!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + (!reuseport || !sk2->sk_reuseport || + (sk2->sk_state != TCP_TIME_WAIT && + !uid_eq(uid, sock_i_uid(sk2))))) { + + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } } } - return node != NULL; + return sk2 != NULL; } EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); @@ -101,33 +93,36 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_bind_hashbucket *head; - struct hlist_node *node; struct inet_bind_bucket *tb; int ret, attempts = 5; struct net *net = sock_net(sk); int smallest_size = -1, smallest_rover; + kuid_t uid = sock_i_uid(sk); local_bh_disable(); if (!snum) { int remaining, rover, low, high; again: - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; - smallest_rover = rover = net_random() % remaining + low; + smallest_rover = rover = prandom_u32() % remaining + low; smallest_size = -1; do { - if (inet_is_reserved_local_port(rover)) + if (inet_is_local_reserved_port(net, rover)) goto next_nolock; head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == rover) { - if (tb->fastreuse > 0 && - sk->sk_reuse && - sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && + sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && + uid_eq(tb->fastuid, uid))) && (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; @@ -174,7 +169,7 @@ have_snum: head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, &head->chain) if (net_eq(ib_net(tb), net) && tb->port == snum) goto tb_found; } @@ -185,14 +180,18 @@ tb_found: if (sk->sk_reuse == SK_FORCE_REUSE) goto success; - if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((tb->fastreuse > 0 && + sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size == -1) { goto success; } else { ret = 1; if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && + if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) || + (tb->fastreuseport > 0 && + sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && smallest_size != -1 && --attempts >= 0) { spin_unlock(&head->lock); goto again; @@ -212,9 +211,19 @@ tb_not_found: tb->fastreuse = 1; else tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; + if (sk->sk_reuseport) { + tb->fastreuseport = 1; + tb->fastuid = uid; + } else + tb->fastreuseport = 0; + } else { + if (tb->fastreuse && + (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) + tb->fastreuse = 0; + if (tb->fastreuseport && + (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))) + tb->fastreuseport = 0; + } success: if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); @@ -396,12 +405,12 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, flags, - (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -433,11 +442,11 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rcu_read_lock(); opt = rcu_dereference(newinet->inet_opt); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), - (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, + ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -483,9 +492,9 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, prev = &req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); - if (ireq->rmt_port == rport && - ireq->rmt_addr == raddr && - ireq->loc_addr == laddr && + if (ireq->ir_rmt_port == rport && + ireq->ir_rmt_addr == raddr && + ireq->ir_loc_addr == laddr && AF_INET_FAMILY(req->rsk_ops->family)) { WARN_ON(req->sk); *prevp = prev; @@ -502,7 +511,8 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr, + inet_rsk(req)->ir_rmt_port, lopt->hash_rnd, lopt->nr_table_entries); reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); @@ -538,7 +548,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) { - int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + int err = req->rsk_ops->rtx_syn_ack(parent, req); if (!err) req->num_retrans++; @@ -662,11 +672,13 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newsk->sk_state = TCP_SYN_RECV; newicsk->icsk_bind_hash = NULL; - inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; - inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); - inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; + inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; + inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num; + inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; @@ -714,6 +726,7 @@ EXPORT_SYMBOL(inet_csk_destroy_sock); * tcp/dccp_create_openreq_child(). */ void inet_csk_prepare_forced_close(struct sock *sk) + __releases(&sk->sk_lock.slock) { /* sk_clone_lock locked the socket and set refcnt to 2 */ bh_unlock_sock(sk); |
