diff options
Diffstat (limited to 'net/ipv4')
36 files changed, 1048 insertions, 359 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 766c5965856..24b384b7903 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -346,7 +346,8 @@ lookup_protocol: } err = -EPERM; - if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) + if (sock->type == SOCK_RAW && !kern && + !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; err = -EAFNOSUPPORT; @@ -473,6 +474,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); unsigned short snum; int chk_addr_ret; int err; @@ -496,7 +498,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -516,7 +518,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) snum = ntohs(addr->sin_port); err = -EACCES; - if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) + if (snum && snum < PROT_SOCK && + !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) goto out; /* We keep a pair of addresses. rcv_saddr is the one @@ -1251,7 +1254,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { - const struct net_protocol *ops; + const struct net_offload *ops; const struct iphdr *iph; int proto; int ihl; @@ -1275,9 +1278,9 @@ static int inet_gso_send_check(struct sk_buff *skb) err = -EPROTONOSUPPORT; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (likely(ops && ops->gso_send_check)) - err = ops->gso_send_check(skb); + ops = rcu_dereference(inet_offloads[proto]); + if (likely(ops && ops->callbacks.gso_send_check)) + err = ops->callbacks.gso_send_check(skb); rcu_read_unlock(); out: @@ -1288,7 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - const struct net_protocol *ops; + const struct net_offload *ops; struct iphdr *iph; int proto; int ihl; @@ -1325,9 +1328,9 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (likely(ops && ops->gso_segment)) - segs = ops->gso_segment(skb, features); + ops = rcu_dereference(inet_offloads[proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); rcu_read_unlock(); if (!segs || IS_ERR(segs)) @@ -1356,7 +1359,7 @@ out: static struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) { - const struct net_protocol *ops; + const struct net_offload *ops; struct sk_buff **pp = NULL; struct sk_buff *p; const struct iphdr *iph; @@ -1378,8 +1381,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, proto = iph->protocol; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (!ops || !ops->gro_receive) + ops = rcu_dereference(inet_offloads[proto]); + if (!ops || !ops->callbacks.gro_receive) goto out_unlock; if (*(u8 *)iph != 0x45) @@ -1420,7 +1423,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->gro_receive(head, skb); + pp = ops->callbacks.gro_receive(head, skb); out_unlock: rcu_read_unlock(); @@ -1435,7 +1438,7 @@ static int inet_gro_complete(struct sk_buff *skb) { __be16 newlen = htons(skb->len - skb_network_offset(skb)); struct iphdr *iph = ip_hdr(skb); - const struct net_protocol *ops; + const struct net_offload *ops; int proto = iph->protocol; int err = -ENOSYS; @@ -1443,11 +1446,11 @@ static int inet_gro_complete(struct sk_buff *skb) iph->tot_len = newlen; rcu_read_lock(); - ops = rcu_dereference(inet_protos[proto]); - if (WARN_ON(!ops || !ops->gro_complete)) + ops = rcu_dereference(inet_offloads[proto]); + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->gro_complete(skb); + err = ops->callbacks.gro_complete(skb); out_unlock: rcu_read_unlock(); @@ -1558,23 +1561,33 @@ static const struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; +static const struct net_offload tcp_offload = { + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, +}; + static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, .no_policy = 1, .netns_ok = 1, }; +static const struct net_offload udp_offload = { + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, +}; + static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = ping_err, @@ -1659,13 +1672,35 @@ static int ipv4_proc_init(void); * IP protocol layer initialiser */ +static struct packet_offload ip_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_IP), + .callbacks = { + .gso_send_check = inet_gso_send_check, + .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, + }, +}; + +static int __init ipv4_offload_init(void) +{ + /* + * Add offloads + */ + if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + pr_crit("%s: Cannot add UDP protocol offload\n", __func__); + if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) + pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + + dev_add_offload(&ip_packet_offload); + return 0; +} + +fs_initcall(ipv4_offload_init); + static struct packet_type ip_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, - .gso_send_check = inet_gso_send_check, - .gso_segment = inet_gso_segment, - .gro_receive = inet_gro_receive, - .gro_complete = inet_gro_complete, }; static int __init inet_init(void) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 47800459e4c..ce6fbdfd40b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1161,7 +1161,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCDARP: case SIOCSARP: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; case SIOCGARP: err = copy_from_user(&r, arg, sizeof(struct arpreq)); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2a6abc163ed..cc06a47f121 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -55,6 +55,7 @@ #include <linux/sysctl.h> #endif #include <linux/kmod.h> +#include <linux/netconf.h> #include <net/arp.h> #include <net/ip.h> @@ -723,7 +724,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFFLAGS: ret = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto out; break; case SIOCSIFADDR: /* Set interface address (and family) */ @@ -731,7 +732,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ ret = -EPERM; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) goto out; ret = -EINVAL; if (sin->sin_family != AF_INET) @@ -1442,6 +1443,155 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) return 0; } +static int inet_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + /* type -1 is used for ALL */ + if (type == -1 || type == NETCONFA_FORWARDING) + size += nla_total_size(4); + if (type == -1 || type == NETCONFA_RP_FILTER) + size += nla_total_size(4); + if (type == -1 || type == NETCONFA_MC_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv4_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + /* type -1 is used for ALL */ + if ((type == -1 || type == NETCONFA_FORWARDING) && + nla_put_s32(skb, NETCONFA_FORWARDING, + IPV4_DEVCONF(*devconf, FORWARDING)) < 0) + goto nla_put_failure; + if ((type == -1 || type == NETCONFA_RP_FILTER) && + nla_put_s32(skb, NETCONFA_RP_FILTER, + IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) + goto nla_put_failure; + if ((type == -1 || type == NETCONFA_MC_FORWARDING) && + nla_put_s32(skb, NETCONFA_MC_FORWARDING, + IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); +} + +static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, + [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, +}; + +static int inet_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX+1]; + struct netconfmsg *ncm; + struct sk_buff *skb; + struct ipv4_devconf *devconf; + struct in_device *in_dev; + struct net_device *dev; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_ipv4_policy); + if (err < 0) + goto errout; + + err = EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + switch (ifindex) { + case NETCONFA_IFINDEX_ALL: + devconf = net->ipv4.devconf_all; + break; + case NETCONFA_IFINDEX_DEFAULT: + devconf = net->ipv4.devconf_dflt; + break; + default: + dev = __dev_get_by_index(net, ifindex); + if (dev == NULL) + goto errout; + in_dev = __in_dev_get_rtnl(dev); + if (in_dev == NULL) + goto errout; + devconf = &in_dev->cnf; + break; + } + + err = -ENOBUFS; + skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + -1); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1467,6 +1617,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); for_each_netdev(net, dev) { struct in_device *in_dev; @@ -1474,8 +1630,11 @@ static void inet_forward_change(struct net *net) dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev) + if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + dev->ifindex, &in_dev->cnf); + } rcu_read_unlock(); } } @@ -1501,6 +1660,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write, i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); + if (i == IPV4_DEVCONF_RP_FILTER - 1 && + new_value != old_value) { + int ifindex; + + if (cnf == net->ipv4.devconf_dflt) + ifindex = NETCONFA_IFINDEX_DEFAULT; + else if (cnf == net->ipv4.devconf_all) + ifindex = NETCONFA_IFINDEX_ALL; + else { + struct in_device *idev = + container_of(cnf, struct in_device, + cnf); + ifindex = idev->dev->ifindex; + } + inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + ifindex, cnf); + } } return ret; @@ -1527,15 +1703,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); - } else if (*valp) { + } else { struct ipv4_devconf *cnf = ctl->extra1; struct in_device *idev = container_of(cnf, struct in_device, cnf); - dev_disable_lro(idev->dev); + if (*valp) + dev_disable_lro(idev->dev); + inet_netconf_notify_devconf(net, + NETCONFA_FORWARDING, + idev->dev->ifindex, + cnf); } rtnl_unlock(); rt_cache_flush(net); - } + } else + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); } return ret; @@ -1809,5 +1993,7 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); + rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, + NULL, NULL); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 825c608826d..5cd75e2dab2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -488,7 +488,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(&rt, arg, sizeof(rt))) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 71b125cd5db..4797a800faf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) unsigned int bytes; if (!new_size) - new_size = 1; + new_size = 16; bytes = new_size * sizeof(struct hlist_head *); new_info_hash = fib_info_hash_alloc(bytes); new_laddrhash = fib_info_hash_alloc(bytes); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f2eccd53174..17ff9fd7cdd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -257,7 +257,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); - inet_putpeer(peer); + if (peer) + inet_putpeer(peer); } out: return rc; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d34ce2972c8..2026542d683 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int *expire, int *resend) { if (!rskq_defer_accept) { - *expire = req->retrans >= thresh; + *expire = req->num_timeout >= thresh; *resend = 1; return; } - *expire = req->retrans >= thresh && - (!inet_rsk(req)->acked || req->retrans >= max_retries); + *expire = req->num_timeout >= thresh && + (!inet_rsk(req)->acked || req->num_timeout >= max_retries); /* * Do not resend while waiting for data after ACK, * start to resend on end of deferring period to give * last chance for data or ACK to create established socket. */ *resend = !inet_rsk(req)->acked || - req->retrans >= rskq_defer_accept - 1; + req->num_timeout >= rskq_defer_accept - 1; } +int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) +{ + int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + + if (!err) + req->num_retrans++; + return err; +} +EXPORT_SYMBOL(inet_rtx_syn_ack); + void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long interval, const unsigned long timeout, @@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, req->rsk_ops->syn_ack_timeout(parent, req); if (!expire && (!resend || - !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || + !inet_rtx_syn_ack(parent, req) || inet_rsk(req)->acked)) { unsigned long timeo; - if (req->retrans++ == 0) + if (req->num_timeout++ == 0) lopt->qlen_young--; - timeo = min((timeout << req->retrans), max_rto); + timeo = min(timeout << req->num_timeout, + max_rto); req->expires = now + timeo; reqp = &req->dl_next; continue; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 535584c00f9..7afa2c3c788 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -44,6 +44,10 @@ struct inet_diag_entry { u16 dport; u16 family; u16 userlocks; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr saddr_storage; /* for IPv4-mapped-IPv6 addresses */ + struct in6_addr daddr_storage; /* for IPv4-mapped-IPv6 addresses */ +#endif }; static DEFINE_MUTEX(inet_diag_table_mutex); @@ -105,6 +109,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, * hence this needs to be included regardless of socket family. */ @@ -428,25 +435,31 @@ static int inet_diag_bc_run(const struct nlattr *_bc, break; } - if (cond->prefix_len == 0) - break; - if (op->code == INET_DIAG_BC_S_COND) addr = entry->saddr; else addr = entry->daddr; + if (cond->family != AF_UNSPEC && + cond->family != entry->family) { + if (entry->family == AF_INET6 && + cond->family == AF_INET) { + if (addr[0] == 0 && addr[1] == 0 && + addr[2] == htonl(0xffff) && + bitstring_match(addr + 3, + cond->addr, + cond->prefix_len)) + break; + } + yes = 0; + break; + } + + if (cond->prefix_len == 0) + break; if (bitstring_match(addr, cond->addr, cond->prefix_len)) break; - if (entry->family == AF_INET6 && - cond->family == AF_INET) { - if (addr[0] == 0 && addr[1] == 0 && - addr[2] == htonl(0xffff) && - bitstring_match(addr + 3, cond->addr, - cond->prefix_len)) - break; - } yes = 0; break; } @@ -509,6 +522,55 @@ static int valid_cc(const void *bc, int len, int cc) return 0; } +/* Validate an inet_diag_hostcond. */ +static bool valid_hostcond(const struct inet_diag_bc_op *op, int len, + int *min_len) +{ + int addr_len; + struct inet_diag_hostcond *cond; + + /* Check hostcond space. */ + *min_len += sizeof(struct inet_diag_hostcond); + if (len < *min_len) + return false; + cond = (struct inet_diag_hostcond *)(op + 1); + + /* Check address family and address length. */ + switch (cond->family) { + case AF_UNSPEC: + addr_len = 0; + break; + case AF_INET: + addr_len = sizeof(struct in_addr); + break; + case AF_INET6: + addr_len = sizeof(struct in6_addr); + break; + default: + return false; + } + *min_len += addr_len; + if (len < *min_len) + return false; + + /* Check prefix length (in bits) vs address length (in bytes). */ + if (cond->prefix_len > 8 * addr_len) + return false; + + return true; +} + +/* Validate a port comparison operator. */ +static inline bool valid_port_comparison(const struct inet_diag_bc_op *op, + int len, int *min_len) +{ + /* Port comparisons put the port in a follow-on inet_diag_bc_op. */ + *min_len += sizeof(struct inet_diag_bc_op); + if (len < *min_len) + return false; + return true; +} + static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { const void *bc = bytecode; @@ -516,29 +578,39 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) while (len > 0) { const struct inet_diag_bc_op *op = bc; + int min_len = sizeof(struct inet_diag_bc_op); //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { - case INET_DIAG_BC_AUTO: case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: + if (!valid_hostcond(bc, len, &min_len)) + return -EINVAL; + break; case INET_DIAG_BC_S_GE: case INET_DIAG_BC_S_LE: case INET_DIAG_BC_D_GE: case INET_DIAG_BC_D_LE: - case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len + 4 || op->no & 3) - return -EINVAL; - if (op->no < len && - !valid_cc(bytecode, bytecode_len, len - op->no)) + if (!valid_port_comparison(bc, len, &min_len)) return -EINVAL; break; + case INET_DIAG_BC_AUTO: + case INET_DIAG_BC_JMP: case INET_DIAG_BC_NOP: break; default: return -EINVAL; } - if (op->yes < 4 || op->yes > len + 4 || op->yes & 3) + + if (op->code != INET_DIAG_BC_NOP) { + if (op->no < min_len || op->no > len + 4 || op->no & 3) + return -EINVAL; + if (op->no < len && + !valid_cc(bytecode, bytecode_len, len - op->no)) + return -EINVAL; + } + + if (op->yes < min_len || op->yes > len + 4 || op->yes & 3) return -EINVAL; bc += op->yes; len -= op->yes; @@ -596,6 +668,36 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } +/* Get the IPv4, IPv6, or IPv4-mapped-IPv6 local and remote addresses + * from a request_sock. For IPv4-mapped-IPv6 we must map IPv4 to IPv6. + */ +static inline void inet_diag_req_addrs(const struct sock *sk, + const struct request_sock *req, + struct inet_diag_entry *entry) +{ + struct inet_request_sock *ireq = inet_rsk(req); + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + if (req->rsk_ops->family == AF_INET6) { + entry->saddr = inet6_rsk(req)->loc_addr.s6_addr32; + entry->daddr = inet6_rsk(req)->rmt_addr.s6_addr32; + } else if (req->rsk_ops->family == AF_INET) { + ipv6_addr_set_v4mapped(ireq->loc_addr, + &entry->saddr_storage); + ipv6_addr_set_v4mapped(ireq->rmt_addr, + &entry->daddr_storage); + entry->saddr = entry->saddr_storage.s6_addr32; + entry->daddr = entry->daddr_storage.s6_addr32; |