diff options
Diffstat (limited to 'net/ipv4')
83 files changed, 2819 insertions, 1784 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0d109504ed8..f2b5270efda 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -243,6 +243,23 @@ void build_ehash_secret(void) } EXPORT_SYMBOL(build_ehash_secret); +static inline int inet_netns_ok(struct net *net, int protocol) +{ + int hash; + struct net_protocol *ipprot; + + if (net == &init_net) + return 1; + + hash = protocol & (MAX_INET_PROTOS - 1); + ipprot = rcu_dereference(inet_protos[hash]); + + if (ipprot == NULL) + /* raw IP is OK */ + return 1; + return ipprot->netns_ok; +} + /* * Create an inet socket. */ @@ -259,9 +276,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol) int try_loading_module = 0; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM && !inet_ehash_secret) @@ -320,6 +334,10 @@ lookup_protocol: if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; + err = -EAFNOSUPPORT; + if (!inet_netns_ok(net, protocol)) + goto out_rcu_unlock; + sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; @@ -446,7 +464,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; - chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr); + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too * many applications when removed. It is unfortunate since @@ -784,6 +802,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; int err = 0; + struct net *net = sock_net(sk); switch (cmd) { case SIOCGSTAMP: @@ -795,12 +814,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: case SIOCRTMSG: - err = ip_rt_ioctl(sk->sk_net, cmd, (void __user *)arg); + err = ip_rt_ioctl(net, cmd, (void __user *)arg); break; case SIOCDARP: case SIOCGARP: case SIOCSARP: - err = arp_ioctl(sk->sk_net, cmd, (void __user *)arg); + err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -813,7 +832,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFPFLAGS: case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(cmd, (void __user *)arg); + err = devinet_ioctl(net, cmd, (void __user *)arg); break; default: if (sk->sk_prot->ioctl) @@ -1058,8 +1077,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (sysctl_ip_dynaddr > 1) { printk(KERN_INFO "%s(): shifting inet->" - "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", - __FUNCTION__, + "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", + __func__, NIPQUAD(old_saddr), NIPQUAD(new_saddr)); } @@ -1113,7 +1132,7 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&init_net, &rt, &fl, sk, 0); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); } if (!err) sk_setup_caps(sk, &rt->u.dst); @@ -1231,6 +1250,29 @@ out: return segs; } +int inet_ctl_sock_create(struct sock **sk, unsigned short family, + unsigned short type, unsigned char protocol, + struct net *net) +{ + struct socket *sock; + int rc = sock_create_kern(family, type, protocol, &sock); + + if (rc == 0) { + *sk = sock->sk; + (*sk)->sk_allocation = GFP_ATOMIC; + /* + * Unhash it so that IP input processing does not even see it, + * we do not wish this socket to see incoming packets. + */ + (*sk)->sk_prot->unhash(*sk); + + sk_change_net(*sk, net); + } + return rc; +} + +EXPORT_SYMBOL_GPL(inet_ctl_sock_create); + unsigned long snmp_fold_field(void *mib[], int offt) { unsigned long res = 0; @@ -1283,17 +1325,20 @@ static struct net_protocol tcp_protocol = { .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, .no_policy = 1, + .netns_ok = 1, }; static struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, + .netns_ok = 1, }; static struct net_protocol icmp_protocol = { .handler = icmp_rcv, .no_policy = 1, + .netns_ok = 1, }; static int __init init_ipv4_mibs(void) @@ -1414,7 +1459,7 @@ static int __init inet_init(void) ip_init(); - tcp_v4_init(&inet_family_ops); + tcp_v4_init(); /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -1429,7 +1474,8 @@ static int __init inet_init(void) * Set the ICMP layer up */ - icmp_init(&inet_family_ops); + if (icmp_init() < 0) + panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8e17f65f400..68b72a7a180 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -242,7 +242,7 @@ static int arp_constructor(struct neighbour *neigh) return -EINVAL; } - neigh->type = inet_addr_type(&init_net, addr); + neigh->type = inet_addr_type(dev_net(dev), addr); parms = in_dev->arp_parms; __neigh_parms_put(neigh->parms); @@ -341,14 +341,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { default: case 0: /* By default announce any local IP */ - if (skb && inet_addr_type(&init_net, ip_hdr(skb)->saddr) == RTN_LOCAL) + if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) saddr = ip_hdr(skb)->saddr; break; case 1: /* Restrict announcements of saddr in same subnet */ if (!skb) break; saddr = ip_hdr(skb)->saddr; - if (inet_addr_type(&init_net, saddr) == RTN_LOCAL) { + if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) { /* saddr should be known to target */ if (inet_addr_onlink(in_dev, target, saddr)) break; @@ -424,7 +424,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) int flag = 0; /*unsigned long now; */ - if (ip_route_output_key(&init_net, &rt, &fl) < 0) + if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); @@ -475,9 +475,9 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) return 1; } - paddr = ((struct rtable*)skb->dst)->rt_gateway; + paddr = skb->rtable->rt_gateway; - if (arp_set_predefined(inet_addr_type(&init_net, paddr), haddr, paddr, dev)) + if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) return 0; n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); @@ -570,14 +570,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, * Allocate a buffer */ - skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) - + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; skb_reserve(skb, LL_RESERVED_SPACE(dev)); skb_reset_network_header(skb); - arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4)); + arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; skb->protocol = htons(ETH_P_ARP); if (src_hw == NULL) @@ -710,6 +709,7 @@ static int arp_process(struct sk_buff *skb) u16 dev_type = dev->type; int addr_type; struct neighbour *n; + struct net *net = dev_net(dev); /* arp_rcv below verifies the ARP header and verifies the device * is ARP'able. @@ -805,7 +805,7 @@ static int arp_process(struct sk_buff *skb) /* Special case: IPv4 duplicate address detection packet (RFC2131) */ if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && - inet_addr_type(&init_net, tip) == RTN_LOCAL && + inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, dev->dev_addr, sha); @@ -815,7 +815,7 @@ static int arp_process(struct sk_buff *skb) if (arp->ar_op == htons(ARPOP_REQUEST) && ip_route_input(skb, tip, sip, 0, dev) == 0) { - rt = (struct rtable*)skb->dst; + rt = skb->rtable; addr_type = rt->rt_type; if (addr_type == RTN_LOCAL) { @@ -835,7 +835,7 @@ static int arp_process(struct sk_buff *skb) goto out; } else if (IN_DEV_FORWARD(in_dev)) { if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && - (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) { + (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); @@ -858,14 +858,14 @@ static int arp_process(struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IPV4_DEVCONF_ALL(dev->nd_net, ARP_ACCEPT)) { + if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) */ if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && - inet_addr_type(&init_net, sip) == RTN_UNICAST) + inet_addr_type(net, sip) == RTN_UNICAST) n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } @@ -912,13 +912,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, { struct arphdr *arp; - if (dev->nd_net != &init_net) - goto freeskb; - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull(skb, (sizeof(struct arphdr) + - (2 * dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto freeskb; arp = arp_hdr(skb); @@ -1201,9 +1196,6 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); @@ -1318,7 +1310,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) } #endif - sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key)); + sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->primary_key)); seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); @@ -1331,7 +1323,7 @@ static void arp_format_pneigh_entry(struct seq_file *seq, int hatype = dev ? dev->type : 0; char tbuf[16]; - sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->key)); + sprintf(tbuf, NIPQUAD_FMT, NIPQUAD(*(u32*)n->key)); seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", tbuf, hatype, ATF_PUBL | ATF_PERM, "00:00:00:00:00:00", dev ? dev->name : "*"); @@ -1385,13 +1377,29 @@ static const struct file_operations arp_seq_fops = { .release = seq_release_net, }; -static int __init arp_proc_init(void) + +static int __net_init arp_net_init(struct net *net) { - if (!proc_net_fops_create(&init_net, "arp", S_IRUGO, &arp_seq_fops)) + if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) return -ENOMEM; return 0; } +static void __net_exit arp_net_exit(struct net *net) +{ + proc_net_remove(net, "arp"); +} + +static struct pernet_operations arp_net_ops = { + .init = arp_net_init, + .exit = arp_net_exit, +}; + +static int __init arp_proc_init(void) +{ + return register_pernet_subsys(&arp_net_ops); +} + #else /* CONFIG_PROC_FS */ static int __init arp_proc_init(void) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 8cd357f4128..4637ded3dba 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1800,7 +1800,6 @@ int cipso_v4_sock_setattr(struct sock *sk, } memcpy(opt->__data, buf, buf_len); opt->optlen = opt_len; - opt->is_data = 1; opt->cipso = sizeof(struct iphdr); kfree(buf); buf = NULL; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 87490f7bb0f..6848e4760f3 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -165,7 +165,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (!in_dev) goto out; INIT_RCU_HEAD(&in_dev->rcu_head); - memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt, + memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; @@ -437,7 +437,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; struct ifaddrmsg *ifm; @@ -446,9 +446,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg ASSERT_RTNL(); - if (net != &init_net) - return -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); if (err < 0) goto errout; @@ -555,14 +552,11 @@ errout: static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; ASSERT_RTNL(); - if (net != &init_net) - return -EINVAL; - ifa = rtm_to_ifaddr(net, nlh); if (IS_ERR(ifa)) return PTR_ERR(ifa); @@ -595,7 +589,7 @@ static __inline__ int inet_abc_len(__be32 addr) } -int devinet_ioctl(unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct ifreq ifr; struct sockaddr_in sin_orig; @@ -624,7 +618,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) *colon = 0; #ifdef CONFIG_KMOD - dev_load(&init_net, ifr.ifr_name); + dev_load(net, ifr.ifr_name); #endif switch (cmd) { @@ -665,7 +659,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) + if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) goto done; if (colon) @@ -878,6 +872,7 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int |