diff options
Diffstat (limited to 'net/ipv4')
135 files changed, 4459 insertions, 3335 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8514106761b..30af4a4dfcc 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -88,6 +88,7 @@ config IP_FIB_HASH config IP_MULTIPLE_TABLES bool "IP: policy routing" depends on IP_ADVANCED_ROUTER + select FIB_RULES ---help--- Normally, a router decides what to do with a received packet based solely on the packet's final destination address. If you say Y here, @@ -386,6 +387,7 @@ config INET_ESP select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 + select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES ---help--- @@ -446,24 +448,22 @@ config INET_TCP_DIAG depends on INET_DIAG def_tristate INET_DIAG -config TCP_CONG_ADVANCED +menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- Support for selection of various TCP congestion control modules. Nearly all users can safely say no here, and a safe default - selection will be made (BIC-TCP with new Reno as a fallback). + selection will be made (CUBIC with new Reno as a fallback). If unsure, say N. -# TCP Reno is builtin (required as fallback) -menu "TCP congestion control" - depends on TCP_CONG_ADVANCED +if TCP_CONG_ADVANCED config TCP_CONG_BIC tristate "Binary Increase Congestion (BIC) control" - default y + default m ---help--- BIC-TCP is a sender-side only change that ensures a linear RTT fairness under large windows while offering both scalability and @@ -477,7 +477,7 @@ config TCP_CONG_BIC config TCP_CONG_CUBIC tristate "CUBIC TCP" - default m + default y ---help--- This is version 2.0 of BIC-TCP which uses a cubic growth function among other techniques. @@ -572,12 +572,49 @@ config TCP_CONG_VENO loss packets. See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf -endmenu +choice + prompt "Default TCP congestion control" + default DEFAULT_CUBIC + help + Select the TCP congestion control that will be used by default + for all connections. -config TCP_CONG_BIC + config DEFAULT_BIC + bool "Bic" if TCP_CONG_BIC=y + + config DEFAULT_CUBIC + bool "Cubic" if TCP_CONG_CUBIC=y + + config DEFAULT_HTCP + bool "Htcp" if TCP_CONG_HTCP=y + + config DEFAULT_VEGAS + bool "Vegas" if TCP_CONG_VEGAS=y + + config DEFAULT_WESTWOOD + bool "Westwood" if TCP_CONG_WESTWOOD=y + + config DEFAULT_RENO + bool "Reno" + +endchoice + +endif + +config TCP_CONG_CUBIC tristate depends on !TCP_CONG_ADVANCED default y +config DEFAULT_TCP_CONG + string + default "bic" if DEFAULT_BIC + default "cubic" if DEFAULT_CUBIC + default "htcp" if DEFAULT_HTCP + default "vegas" if DEFAULT_VEGAS + default "westwood" if DEFAULT_WESTWOOD + default "reno" if DEFAULT_RENO + default "cubic" + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4878fc5be85..f66049e28ae 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o +obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c84a32070f8..edcf0932ac6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -67,7 +67,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/types.h> @@ -392,7 +391,7 @@ int inet_release(struct socket *sock) } /* It is off by default, see below. */ -int sysctl_ip_nonlocal_bind; +int sysctl_ip_nonlocal_bind __read_mostly; int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -988,7 +987,7 @@ void inet_unregister_protosw(struct inet_protosw *p) * Shall we try to damage output packets if routing dev changes? */ -int sysctl_ip_dynaddr; +int sysctl_ip_dynaddr __read_mostly; static int inet_sk_reselect_saddr(struct sock *sk) { @@ -997,7 +996,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct rtable *rt; __u32 old_saddr = inet->saddr; __u32 new_saddr; - __u32 daddr = inet->daddr; + __be32 daddr = inet->daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; @@ -1044,7 +1043,7 @@ int inet_sk_rebuild_header(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); - u32 daddr; + __be32 daddr; int err; /* Route is OK, nothing to do. */ @@ -1074,6 +1073,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, 0); } if (!err) @@ -1254,10 +1254,7 @@ static int __init inet_init(void) struct list_head *r; int rc = -EINVAL; - if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { - printk(KERN_CRIT "%s: panic\n", __FUNCTION__); - goto out; - } + BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); rc = proto_register(&tcp_prot, 1); if (rc) @@ -1345,10 +1342,10 @@ static int __init inet_init(void) rc = 0; out: return rc; -out_unregister_tcp_proto: - proto_unregister(&tcp_prot); out_unregister_udp_proto: proto_unregister(&udp_prot); +out_unregister_tcp_proto: + proto_unregister(&tcp_prot); goto out; } diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 1366bc6ce6a..99542977e47 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,3 +1,4 @@ +#include <linux/err.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -34,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) switch (*optptr) { case IPOPT_SEC: case 0x85: /* Some "Extended Security" crap. */ - case 0x86: /* Another "Commercial Security" crap. */ + case IPOPT_CIPSO: case IPOPT_RA: case 0x80|21: /* RFC1770 */ break; @@ -97,7 +98,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - ahp->icv(ahp, skb, ah->auth_data); + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto error; + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; @@ -119,6 +123,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; + int err = -EINVAL; struct iphdr *iph; struct ip_auth_hdr *ah; struct ah_data *ahp; @@ -166,8 +171,11 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); skb_push(skb, ihl); - ahp->icv(ahp, skb, ah->auth_data); - if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto out; + err = -EINVAL; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { x->stats.integrity_failed++; goto out; } @@ -179,7 +187,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) return 0; out: - return -EINVAL; + return err; } static void ah4_err(struct sk_buff *skb, u32 info) @@ -204,6 +212,7 @@ static int ah_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *tfm; if (!x->aalg) goto error; @@ -221,24 +230,27 @@ static int ah_init_state(struct xfrm_state *x) ahp->key = x->aalg->alg_key; ahp->key_len = (x->aalg->alg_key_len+7)/8; - ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (!ahp->tfm) + tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + + ahp->tfm = tfm; + if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) goto error; - ahp->icv = ah_hmac_digest; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_tfm(). + * after a successful crypto_alloc_hash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(ahp->tfm)) { + crypto_hash_digestsize(tfm)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm), + x->aalg->alg_name, crypto_hash_digestsize(tfm), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -253,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; @@ -262,7 +274,7 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { kfree(ahp->work_icv); - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -277,7 +289,7 @@ static void ah_destroy(struct xfrm_state *x) kfree(ahp->work_icv); ahp->work_icv = NULL; - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c8a3723bc00..cfe5c847428 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -234,7 +234,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev) static int arp_constructor(struct neighbour *neigh) { - u32 addr = *(u32*)neigh->primary_key; + __be32 addr = *(__be32*)neigh->primary_key; struct net_device *dev = neigh->dev; struct in_device *in_dev; struct neigh_parms *parms; @@ -330,10 +330,10 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb) static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) { - u32 saddr = 0; + __be32 saddr = 0; u8 *dst_ha = NULL; struct net_device *dev = neigh->dev; - u32 target = *(u32*)neigh->primary_key; + __be32 target = *(__be32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); struct in_device *in_dev = in_dev_get(dev); @@ -385,7 +385,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } static int arp_ignore(struct in_device *in_dev, struct net_device *dev, - u32 sip, u32 tip) + __be32 sip, __be32 tip) { int scope; @@ -420,7 +420,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev, return !inet_confirm_addr(dev, sip, tip, scope); } -static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) +static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } }; @@ -449,7 +449,7 @@ static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) * is allowed to use this function, it is scheduled to be removed. --ANK */ -static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct net_device * dev) +static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) { switch (addr_hint) { case RTN_LOCAL: @@ -470,7 +470,7 @@ static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, s int arp_find(unsigned char *haddr, struct sk_buff *skb) { struct net_device *dev = skb->dev; - u32 paddr; + __be32 paddr; struct neighbour *n; if (!skb->dst) { @@ -511,7 +511,7 @@ int arp_bind_neighbour(struct dst_entry *dst) if (dev == NULL) return -EINVAL; if (n == NULL) { - u32 nexthop = ((struct rtable*)dst)->rt_gateway; + __be32 nexthop = ((struct rtable*)dst)->rt_gateway; if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) nexthop = 0; n = __neigh_lookup_errno( @@ -560,8 +560,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) * Create an arp packet. If (dest_hw == NULL), we create a broadcast * message. */ -struct sk_buff *arp_create(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw) { @@ -675,8 +675,8 @@ void arp_xmit(struct sk_buff *skb) /* * Create and send an arp packet. */ -void arp_send(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +void arp_send(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw) { @@ -710,7 +710,7 @@ static int arp_process(struct sk_buff *skb) unsigned char *arp_ptr; struct rtable *rt; unsigned char *sha, *tha; - u32 sip, tip; + __be32 sip, tip; u16 dev_type = dev->type; int addr_type; struct neighbour *n; @@ -969,13 +969,13 @@ out_of_mem: static int arp_req_set(struct arpreq *r, struct net_device * dev) { - u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; int err; if (r->arp_flags&ATF_PUBL) { - u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; - if (mask && mask != 0xFFFFFFFF) + __be32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; + if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); @@ -1063,7 +1063,7 @@ static unsigned arp_state_to_flags(struct neighbour *neigh) static int arp_req_get(struct arpreq *r, struct net_device *dev) { - u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; int err = -ENXIO; @@ -1084,13 +1084,13 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) static int arp_req_delete(struct arpreq *r, struct net_device * dev) { int err; - u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; if (r->arp_flags & ATF_PUBL) { - u32 mask = + __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; - if (mask == 0xFFFFFFFF) + if (mask == htonl(0xFFFFFFFF)) return pneigh_delete(&arp_tbl, &ip, dev); if (mask == 0) { if (dev == NULL) { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c new file mode 100644 index 00000000000..a8e2e879a64 --- /dev/null +++ b/net/ipv4/cipso_ipv4.c @@ -0,0 +1,1474 @@ +/* + * CIPSO - Commercial IP Security Option + * + * This is an implementation of the CIPSO 2.2 protocol as specified in + * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in + * FIPS-188, copies of both documents can be found in the Documentation + * directory. While CIPSO never became a full IETF RFC standard many vendors + * have chosen to adopt the protocol and over the years it has become a + * de-facto standard for labeled networking. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/jhash.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/tcp.h> +#include <net/netlabel.h> +#include <net/cipso_ipv4.h> +#include <asm/bug.h> + +struct cipso_v4_domhsh_entry { + char *domain; + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; + +/* List of available DOI definitions */ +/* XXX - Updates should be minimal so having a single lock for the + * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be + * okay. */ +/* XXX - This currently assumes a minimal number of different DOIs in use, + * if in practice there are a lot of different DOIs this list should + * probably be turned into a hash table or something similar so we + * can do quick lookups. */ +static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); +static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); + +/* Label mapping cache */ +int cipso_v4_cache_enabled = 1; +int cipso_v4_cache_bucketsize = 10; +#define CIPSO_V4_CACHE_BUCKETBITS 7 +#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS) +#define CIPSO_V4_CACHE_REORDERLIMIT 10 +struct cipso_v4_map_cache_bkt { + spinlock_t lock; + u32 size; + struct list_head list; +}; +struct cipso_v4_map_cache_entry { + u32 hash; + unsigned char *key; + size_t key_len; + + struct netlbl_lsm_cache lsm_data; + + u32 activity; + struct list_head list; +}; +static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL; + +/* Restricted bitmap (tag #1) flags */ +int cipso_v4_rbm_optfmt = 0; +int cipso_v4_rbm_strictvalid = 1; + +/* + * Helper Functions + */ + +/** + * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit + * @bitmap: the bitmap + * @bitmap_len: length in bits + * @offset: starting offset + * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit + * + * Description: + * Starting at @offset, walk the bitmap from left to right until either the + * desired bit is found or we reach the end. Return the bit offset, -1 if + * not found, or -2 if error. + */ +static int cipso_v4_bitmap_walk(const unsigned char *bitmap, + u32 bitmap_len, + u32 offset, + u8 state) +{ + u32 bit_spot; + u32 byte_offset; + unsigned char bitmask; + unsigned char byte; + + /* gcc always rounds to zero when doing integer division */ + byte_offset = offset / 8; + byte = bitmap[byte_offset]; + bit_spot = offset; + bitmask = 0x80 >> (offset % 8); + + while (bit_spot < bitmap_len) { + if ((state && (byte & bitmask) == bitmask) || + (state == 0 && (byte & bitmask) == 0)) + return bit_spot; + + bit_spot++; + bitmask >>= 1; + if (bitmask == 0) { + byte = bitmap[++byte_offset]; + bitmask = 0x80; + } + } + + return -1; +} + +/** + * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap + * @bitmap: the bitmap + * @bit: the bit + * @state: if non-zero, set the bit (1) else clear the bit (0) + * + * Description: + * Set a single bit in the bitmask. Returns zero on success, negative values + * on error. + */ +static void cipso_v4_bitmap_setbit(unsigned char *bitmap, + u32 bit, + u8 state) +{ + u32 byte_spot; + u8 bitmask; + + /* gcc always rounds to zero when doing integer division */ + byte_spot = bit / 8; + bitmask = 0x80 >> (bit % 8); + if (state) + bitmap[byte_spot] |= bitmask; + else + bitmap[byte_spot] &= ~bitmask; +} + +/** + * cipso_v4_doi_domhsh_free - Frees a domain list entry + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to a domain list entry can be released + * safely. + * + */ +static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) +{ + struct cipso_v4_domhsh_entry *ptr; + + ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); + kfree(ptr->domain); + kfree(ptr); +} + +/** + * cipso_v4_cache_entry_free - Frees a cache entry + * @entry: the entry to free + * + * Description: + * This function frees the memory associated with a cache entry. + * + */ +static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) +{ + if (entry->lsm_data.free) + entry->lsm_data.free(entry->lsm_data.data); + kfree(entry->key); + kfree(entry); +} + +/** + * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache + * @key: the hash key + * @key_len: the length of the key in bytes + * + * Description: + * The CIPSO tag hashing function. Returns a 32-bit hash value. + * + */ +static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) +{ + return jhash(key, key_len, 0); +} + +/* + * Label Mapping Cache Functions + */ + +/** + * cipso_v4_cache_init - Initialize the CIPSO cache + * + * Description: + * Initializes the CIPSO label mapping cache, this function should be called + * before any of the other functions defined in this file. Returns zero on + * success, negative values on error. + * + */ +static int cipso_v4_cache_init(void) +{ + u32 iter; + + cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, + sizeof(struct cipso_v4_map_cache_bkt), + GFP_KERNEL); + if (cipso_v4_cache == NULL) + return -ENOMEM; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock_init(&cipso_v4_cache[iter].lock); + cipso_v4_cache[iter].size = 0; + INIT_LIST_HEAD(&cipso_v4_cache[iter].list); + } + + return 0; +} + +/** + * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache + * + * Description: + * Invalidates and frees any entries in the CIPSO cache. Returns zero on + * success and negative values on failure. + * + */ +void cipso_v4_cache_invalidate(void) +{ + struct cipso_v4_map_cache_entry *entry, *tmp_entry; + u32 iter; + + for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { + spin_lock_bh(&cipso_v4_cache[iter].lock); + list_for_each_entry_safe(entry, + tmp_entry, + &cipso_v4_cache[iter].list, list) { + list_del(&entry->list); + cipso_v4_cache_entry_free(entry); + } + cipso_v4_cache[iter].size = 0; + spin_unlock_bh(&cipso_v4_cache[iter].lock); + } + + return; +} + +/** + * cipso_v4_cache_check - Check the CIPSO cache for a label mapping + * @key: the buffer to check + * @key_len: buffer length in bytes + * @secattr: the security attribute struct to use + * + * Description: + * This function checks the cache to see if a label mapping already exists for + * the given key. If there is a match then the cache is adjusted and the + * @secattr struct is populated with the correct LSM security attributes. The + * cache is adjusted in the following manner if the entry is not already the + * first in the cache bucket: + * + * 1. The cache entry's activity counter is incremented + * 2. The previous (higher ranking) entry's activity counter is decremented + * 3. If the difference between the two activity counters is geater than + * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped + * + * Returns zero on success, -ENOENT for a cache miss, and other negative values + * on error. + * + */ +static int cipso_v4_cache_check(const unsigned char *key, + u32 key_len, + struct netlbl_lsm_secattr *secattr) +{ + u32 bkt; + struct cipso_v4_map_cache_entry *entry; + struct cipso_v4_map_cache_entry *prev_entry = NULL; + u32 hash; + + if (!cipso_v4_cache_enabled) + return -ENOENT; + + hash = cipso_v4_map_cache_hash(key, key_len); + bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock_bh(&cipso_v4_cache[bkt].lock); + list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { + if (entry->hash == hash && + entry->key_len == key_len && + memcmp(entry->key, key, key_len) == 0) { + entry->activity += 1; + secattr->cache.free = entry->lsm_data.free; + secattr->cache.data = entry->lsm_data.data; + if (prev_entry == NULL) { + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + return 0; + } + + if (prev_entry->activity > 0) + prev_entry->activity -= 1; + if (entry->activity > prev_entry->activity && + entry->activity - prev_entry->activity > + CIPSO_V4_CACHE_REORDERLIMIT) { + __list_del(entry->list.prev, entry->list.next); + __list_add(&entry->list, + prev_entry->list.prev, + &prev_entry->list); + } + + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + return 0; + } + prev_entry = entry; + } + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + + return -ENOENT; +} + +/** + * cipso_v4_cache_add - Add an entry to the CIPSO cache + * @skb: the packet + * @secattr: the packet's security attributes + * + * Description: + * Add a new entry into the CIPSO label mapping cache. Add the new entry to + * head of the cache bucket's list, if the cache bucket is out of room remove + * the last entry in the list first. It is important to note that there is + * currently no checking for duplicate keys. Returns zero on success, + * negative values on failure. + * + */ +int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 bkt; + struct cipso_v4_map_cache_entry *entry = NULL; + struct cipso_v4_map_cache_entry *old_entry = NULL; + unsigned char *cipso_ptr; + u32 cipso_ptr_len; + + if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) + return 0; + + cipso_ptr = CIPSO_V4_OPTPTR(skb); + cipso_ptr_len = cipso_ptr[1]; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + return -ENOMEM; + entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC); + if (entry->key == NULL) { + ret_val = -ENOMEM; + goto cache_add_failure; + } + memcpy(entry->key, cipso_ptr, cipso_ptr_len); + entry->key_len = cipso_ptr_len; + entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); + entry->lsm_data.free = secattr->cache.free; + entry->lsm_data.data = secattr->cache.data; + + bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + spin_lock_bh(&cipso_v4_cache[bkt].lock); + if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache[bkt].size += 1; + } else { + old_entry = list_entry(cipso_v4_cache[bkt].list.prev, + struct cipso_v4_map_cache_entry, list); + list_del(&old_entry->list); + list_add(&entry->list, &cipso_v4_cache[bkt].list); + cipso_v4_cache_entry_free(old_entry); + } + spin_unlock_bh(&cipso_v4_cache[bkt].lock); + + return 0; + +cache_add_failure: + if (entry) + cipso_v4_cache_entry_free(entry); + return ret_val; +} + +/* + * DOI List Functions + */ + +/** + * cipso_v4_doi_search - Searches for a DOI definition + * @doi: the DOI to search for + * + * Description: + * Search the DOI definition list for a DOI definition with a DOI value that + * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). + * Returns a pointer to the DOI definition on success and NULL on failure. + */ +static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) +{ + struct cipso_v4_doi *iter; + + list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) + if (iter->doi == doi && iter->valid) + return iter; + return NULL; +} + +/** + * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine + * @doi_def: the DOI structure + * + * Description: + * The caller defines a new DOI for use by the CIPSO engine and calls this + * function to add it to the list of acceptable domains. The caller must + * ensure that the mapping table specified in @doi_def->map meets all of the + * requirements of the mapping type (see cipso_ipv4.h for details). Returns + * zero on success and non-zero on failure. + * + */ +int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) + return -EINVAL; + + doi_def->valid = 1; + INIT_RCU_HEAD(&doi_def->rcu); + INIT_LIST_HEAD(&doi_def->dom_list); + + rcu_read_lock(); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_rlock; + spin_lock(&cipso_v4_doi_list_lock); + if (cipso_v4_doi_search(doi_def->doi) != NULL) + goto doi_add_failure_slock; + list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; + +doi_add_failure_slock: + spin_unlock(&cipso_v4_doi_list_lock); +doi_add_failure_rlock: + rcu_read_unlock(); + return -EEXIST; +} + +/** + * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine + * @doi: the DOI value + * @audit_secid: the LSM secid to use in the audit message + * @callback: the DOI cleanup/free callback + * + * Description: + * Removes a DOI definition from the CIPSO engine, @callback is called to + * free any memory. The NetLabel routines will be called to release their own + * LSM domain mappings as well as our own domain list. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_doi_remove(u32 doi, + struct netlbl_audit *audit_info, + void (*callback) (struct rcu_head * head)) +{ + struct cipso_v4_doi *doi_def; + struct cipso_v4_domhsh_entry *dom_iter; + + rcu_read_lock(); + if (cipso_v4_doi_search(doi) != NULL) { + spin_lock(&cipso_v4_doi_list_lock); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + return -ENOENT; + } + doi_def->valid = 0; + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) + if (dom_iter->valid) + netlbl_domhsh_remove(dom_iter->domain, + audit_info); + cipso_v4_cache_invalidate(); + rcu_read_unlock(); + + call_rcu(&doi_def->rcu, callback); + return 0; + } + rcu_read_unlock(); + + return -ENOENT; +} + +/** + * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * @doi: the DOI value + * + * Description: + * Searches for a valid DOI definition and if one is found it is returned to + * the caller. Otherwise NULL is returned. The caller must ensure that + * rcu_read_lock() is held while accessing the returned definition. + * + */ +struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) +{ + return cipso_v4_doi_search(doi); +} + +/** + * cipso_v4_doi_walk - Iterate through the DOI definitions + * @skip_cnt: skip past this number of DOI definitions, updated + * @callback: callback for each DOI definition + * @cb_arg: argument for the callback function + * + * Description: + * Iterate over the DOI definition list, skipping the first @skip_cnt entries. + * For each entry call @callback, if @callback returns a negative value stop + * 'walking' through the list and return. Updates the value in @skip_cnt upon + * return. Returns zero on success, negative values on failure. + * + */ +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) +{ + int ret_val = -ENOENT; + u32 doi_cnt = 0; + struct cipso_v4_doi *iter_doi; + + rcu_read_lock(); + list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) + if (iter_doi->valid) { + if (doi_cnt++ < *skip_cnt) + continue; + ret_val = callback(iter_doi, cb_arg); + if (ret_val < 0) { + doi_cnt--; + goto doi_walk_return; + } + } + +doi_walk_return: + rcu_read_unlock(); + *skip_cnt = doi_cnt; + return ret_val; +} + +/** + * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to add + * + * Description: + * Adds the @domain to the the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). This function + * does allocate memory. Returns zero on success, negative values on failure. + * + */ +int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + struct cipso_v4_domhsh_entry *new_dom; + + new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); + if (new_dom == NULL) + return -ENOMEM; + if (domain) { + new_dom->domain = kstrdup(domain, GFP_KERNEL); + if (new_dom->domain == NULL) { + kfree(new_dom); + return -ENOMEM; + } + } + new_dom->valid = 1; + INIT_RCU_HEAD(&new_dom->rcu); + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + kfree(new_dom->domain); + kfree(new_dom); + return -EEXIST; + } + list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return 0; +} + +/** + * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition + * @doi_def: the DOI definition + * @domain: the domain to remove + * + * Description: + * Removes the @domain from the DOI specified by @doi_def, this function + * should only be called by external functions (i.e. NetLabel). Returns zero + * on success and negative values on error. + * + */ +int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain) +{ + struct cipso_v4_domhsh_entry *iter; + + rcu_read_lock(); + spin_lock(&cipso_v4_doi_list_lock); + list_for_each_entry_rcu(iter, &doi_def->dom_list, list) + if (iter->valid && + ((domain != NULL && iter->domain != NULL && + strcmp(iter->domain, domain) == 0) || + (domain == NULL && iter->domain == NULL))) { + iter->valid = 0; + list_del_rcu(&iter->list); + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); + + return 0; + } + spin_unlock(&cipso_v4_doi_list_lock); + rcu_read_unlock(); + + return -ENOENT; +} + +/* + * Label Mapping Functions + */ + +/** + * cipso_v4_map_lvl_valid - Checks to see if the given level is understood + * @doi_def: the DOI definition + * @level: the level to check + * + * Description: + * Checks the given level against the given DOI definition and returns a + * negative value if the level does not have a valid mapping and a zero value + * if the level is defined by the DOI. + * + */ +static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network + * @doi_def: the DOI definition + * @host_lvl: the host MLS level + * @net_lvl: the network/CIPSO MLS level + * + * Description: + * Perform a label mapping to translate a local MLS level to the correct + * CIPSO level using the given DOI definition. Returns zero on success, + * negative values otherwise. + * + */ +static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, + u32 host_lvl, + u32 *net_lvl) +{ + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *net_lvl = host_lvl; + return 0; + case CIPSO_V4_MAP_STD: + if (host_lvl < doi_def->map.std->lvl.local_size) { + *net_lvl = doi_def->map.std->lvl.local[host_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host + * @doi_def: the DOI definition + * @net_lvl: the network/CIPSO MLS level + * @host_lvl: the host MLS level + * + * Description: + * Perform a label mapping to translate a CIPSO level to the correct local MLS + * level using the given DOI definition. Returns zero on success, negative + * values otherwise. + * + */ +static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, + u32 net_lvl, + u32 *host_lvl) +{ + struct cipso_v4_std_map_tbl *map_tbl; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + *host_lvl = net_lvl; + return 0; + case CIPSO_V4_MAP_STD: + map_tbl = doi_def->map.std; + if (net_lvl < map_tbl->lvl.cipso_size && + map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { + *host_lvl = doi_def->map.std->lvl.cipso[net_lvl]; + return 0; + } + break; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid + * @doi_def: the DOI definition + * @bitmap: category bitmap + * @bitmap_len: bitmap length in bytes + * + * Description: + * Checks the given category bitmap against the given DOI definition and + * returns a negative value if any of the categories in the bitmap do not have + * a valid mapping and a zero value if all of the categories are valid. + * + */ +static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, + const unsigned char *bitmap, + u32 bitmap_len) +{ + int cat = -1; + u32 bitmap_len_bits = bitmap_len * 8; + u32 cipso_cat_size = doi_def->map.std->cat.cipso_size; + u32 *cipso_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + return 0; + case CIPSO_V4_MAP_STD: + for (;;) { + cat = cipso_v4_bitmap_walk(bitmap, + bitmap_len_bits, + cat + 1, + 1); + if (cat < 0) + break; + if (cat >= cipso_cat_size || + cipso_array[cat] >= CIPSO_V4_INV_CAT) + return -EFAULT; + } + + if (cat == -1) + return 0; + break; + } + + return -EFAULT; +} + +/** + * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network + * @doi_def: the DOI definition + * @host_cat: the category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * @net_cat: the zero'd out category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * + * Description: + * Perform a label mapping to translate a local MLS category bitmap to the + * correct CIPSO bitmap using the given DOI definition. Returns the minimum + * size in bytes of the network bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, + const unsigned char *host_cat, + u32 host_cat_len, + unsigned char *net_cat, + u32 net_cat_len) +{ + int host_spot = -1; + u32 net_spot; + u32 net_spot_max = 0; + u32 host_clen_bits = host_cat_len * 8; + u32 net_clen_bits = net_cat_len * 8; + u32 host_cat_size = doi_def->map.std->cat.local_size; + u32 *host_cat_array = doi_def->map.std->cat.local; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + net_spot_max = host_cat_len - 1; + while (net_spot_max > 0 && host_cat[net_spot_max] == 0) + net_spot_max--; + if (net_spot_max > net_cat_len) + return -EINVAL; + memcpy(net_cat, host_cat, net_spot_max); + return net_spot_max; + case CIPSO_V4_MAP_STD: + for (;;) { + host_spot = cipso_v4_bitmap_walk(host_cat, + host_clen_bits, + host_spot + 1, + 1); + if (host_spot < 0) + break; + if (host_spot >= host_cat_size) + return -EPERM; + + net_spot = host_cat_array[host_spot]; + if (net_spot >= net_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(net_cat, net_spot, 1); + + if (net_spot > net_spot_max) + net_spot_max = net_spot; + } + + if (host_spot == -2) + return -EFAULT; + + if (++net_spot_max % 8) + return net_spot_max / 8 + 1; + return net_spot_max / 8; + } + + return -EINVAL; +} + +/** + * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host + * @doi_def: the DOI definition + * @net_cat: the category bitmap in network/CIPSO format + * @net_cat_len: the length of the CIPSO bitmap in bytes + * @host_cat: the zero'd out category bitmap in host format + * @host_cat_len: the length of the host's category bitmap in bytes + * + * Description: + * Perform a label mapping to translate a CIPSO bitmap to the correct local + * MLS category bitmap using the given DOI definition. Returns the minimum + * size in bytes of the host bitmap on success, negative values otherwise. + * + */ +static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, + const unsigned char *net_cat, + u32 net_cat_len, + unsigned char *host_cat, + u32 host_cat_len) +{ + u32 host_spot; + u32 host_spot_max = 0; + int net_spot = -1; + u32 net_clen_bits = net_cat_len * 8; + u32 host_clen_bits = host_cat_len * 8; + u32 net_cat_size = doi_def->map.std->cat.cipso_size; + u32 *net_cat_array = doi_def->map.std->cat.cipso; + + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + if (net_cat_len > host_cat_len) + return -EINVAL; + memcpy(host_cat, net_cat, net_cat_len); + return net_cat_len; + case CIPSO_V4_MAP_STD: + for (;;) { + net_spot = cipso_v4_bitmap_walk(net_cat, + net_clen_bits, + net_spot + 1, + 1); + if (net_spot < 0) + break; + if (net_spot >= net_cat_size || + net_cat_array[net_spot] >= CIPSO_V4_INV_CAT) + return -EPERM; + + host_spot = net_cat_array[net_spot]; + if (host_spot >= host_clen_bits) + return -ENOSPC; + cipso_v4_bitmap_setbit(host_cat, host_spot, 1); + + if (host_spot > host_spot_max) + host_spot_max = host_spot; + } + + if (net_spot == -2) + return -EFAULT; + + if (++host_spot_max % 8) + return host_spot_max / 8 + 1; + return host_spot_max / 8; + } + + return -EINVAL; +} + +/* + * Protocol Handling Functions + */ + +#define CIPSO_V4_HDR_LEN 6 + +/** + * cipso_v4_gentag_hdr - Generate a CIPSO option header + * @doi_def: the DOI definition + * @len: the total tag length in bytes + * @buf: the CIPSO option buffer + * + * Description: + * Write a CIPSO header into the beginning of @buffer. Return zero on success, + * negative values on failure. + * + */ +static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, + u32 len, + unsigned char *buf) +{ + if (CIPSO_V4_HDR_LEN + len > 40) + return -ENOSPC; + + buf[0] = IPOPT_CIPSO; + buf[1] = CIPSO_V4_HDR_LEN + len; + *(u32 *)&buf[2] = htonl(doi_def->doi); + + return 0; +} + +#define CIPSO_V4_TAG1_CAT_LEN 30 + +/** + * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The + * actual buffer length may be larger than the indicated size due to + * translation between host and network category bitmaps. Returns zero on + * success, negative values on failure. + * + */ +static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char **buffer, + u32 *buffer_len) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 level; + + if (secattr->mls_cat) { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN, + GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_hton(doi_def, + secattr->mls_cat, + secattr->mls_cat_len, + &buf[CIPSO_V4_HDR_LEN + 4], + CIPSO_V4_TAG1_CAT_LEN); + if (ret_val < 0) + goto gentag_failure; + + /* This will send packets using the "optimized" format when + * possibile as specified in section 3.4.2.6 of the + * CIPSO draft. */ + if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10)) + ret_val = 10; + + buf_len = 4 + ret_val; + } else { + buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC); + if (buf == NULL) + return -ENOMEM; + buf_len = 4; + } + + ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + if (ret_val != 0) + goto gentag_failure; + + ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf); + if (ret_val != 0) + goto gentag_failure; + + buf[CIPSO_V4_HDR_LEN] = 0x01; + buf[CIPSO_V4_HDR_LEN + 1] = buf_len; + buf[CIPSO_V4_HDR_LEN + 3] = level; + + *buffer = buf; + *buffer_len = CIPSO_V4_HDR_LEN + buf_len; + + return 0; + +gentag_failure: + kfree(buf); + return ret_val; +} + +/** + * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security + * attributes in @secattr. Return zero on success, negatives values on + * failure. + * + */ +static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + u8 tag_len = tag[1]; + u32 level; + + ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); + if (ret_val != 0) + return ret_val; + secattr->mls_lvl = level; + secattr->mls_lvl_vld = 1; + + if (tag_len > 4) { + switch (doi_def->type) { + case CIPSO_V4_MAP_PASS: + secattr->mls_cat_len = tag_len - 4; + break; + case CIPSO_V4_MAP_STD: + secattr->mls_cat_len = + doi_def->map.std->cat.local_size; + break; + } + secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC); + if (secattr->mls_cat == NULL) + return -ENOMEM; + + ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, + &tag[4], + tag_len - 4, + secattr->mls_cat, + secattr->mls_cat_len); + if (ret_val < 0) { + kfree(secattr->mls_cat); + return ret_val; + } + secattr->mls_cat_len = ret_val; + } + + return 0; +} + +/** + * cipso_v4_validate - Validate a CIPSO option + * @option: the start of the option, on error it is set to point to the error + * + * Description: + * This routine is called to validate a CIPSO option, it checks all of the + * fields to ensure that they are at least valid, see the draft snippet below + * for details. If the option is valid then a zero value is returned and + * the value of @option is unchanged. If the option is invalid then a + * non-zero value is returned and @option is adjusted to point to the + * offending portion of the option. From the IETF draft ... + * + * "If any field within the CIPSO options, such as the DOI identifier, is not + * recognized the IP datagram is discarded and an ICMP 'parameter problem' + * (type 12) is generated and returned. The ICMP code field is set to 'bad + * parameter' (code 0) and the pointer is set to the start of the CIPSO field + * that is unrecognized." + * + */ +int cipso_v4_validate(unsigned char **option) +{ + unsigned char *opt = *option; + unsigned char *tag; + unsigned char opt_iter; + unsigned char err_offset = 0; + u8 opt_len; + u8 tag_len; + struct cipso_v4_doi *doi_def = NULL; + u32 tag_iter; + + /* caller already checks for length values that are too large */ + opt_len = opt[1]; + if (opt_len < 8) { + err_offset = 1; + goto validate_return; + } + + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2]))); + if (doi_def == NULL) { + err_offset = 2; + goto validate_return_locked; + } + + opt_iter = 6; + tag = opt + opt_iter; + while (opt_iter < opt_len) { + for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) + if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID || + ++tag_iter == CIPSO_V4_TAG_MAXCNT) { + err_offset = opt_iter; + goto validate_return_locked; + } + + tag_len = tag[1]; + if (tag_len > (opt_len - opt_iter)) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + switch (tag[0]) { + case CIPSO_V4_TAG_RBITMAP: + if (tag_len < 4) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + + /* We are already going to do all the verification + * necessary at the socket layer so from our point of + * view it is safe to turn these checks off (and less + * work), however, the CIPSO draft says we should do + * all the CIPSO validations here but it doesn't + * really specify _exactly_ what we need to validate + * ... so, just make it a sysctl tunable. */ + if (cipso_v4_rbm_strictvalid) { + if (cipso_v4_map_lvl_valid(doi_def, + tag[3]) < 0) { + err_offset = opt_iter + 3; + goto validate_return_locked; + } + if (tag_len > 4 && + cipso_v4_map_cat_rbm_valid(doi_def, + &tag[4], + tag_len - 4) < 0) { + err_offset = opt_iter + 4; + goto validate_return_locked; + } + } + break; + default: + err_offset = opt_iter; + goto validate_return_locked; + } + + tag += tag_len; + opt_iter += tag_len; + } + +validate_return_locked: + rcu_read_unlock(); +validate_return: + *option = opt + err_offset; + return err_offset; +} + +/** + * cipso_v4_error - Send the correct reponse for a bad packet + * @skb: the packet + * @error: the error code + * @gateway: CIPSO gateway flag + * + * Description: + * Based on the error code given in @error, send an ICMP error message back to + * the originating host. From the IETF draft ... + * + * "If the contents of the CIPSO [option] are valid but the security label is + * outside of the configured host or port label range, the datagram is + * discarded and an ICMP 'destination unreachable' (type 3) is generated and + * returned. The code field of the ICMP is set to 'communication with + * destination network administratively prohibited' (code 9) or to + * 'communication with destination host administratively prohibited' + * (code 10). The value of the code is dependent on whether the originator + * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The + * recipient of the ICMP message MUST be able to handle either value. The + * same procedure is performed if a CIPSO [option] can not be added to an + * IP packet because it is too large to fit in the IP options area." + * + * "If the error is triggered by receipt of an ICMP message, the message is + * discarded and no response is permitted (consistent with general ICMP + * processing rules)." + * + */ +void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) +{ + if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) + return; + + if (gateway) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + else + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); +} + +/** + * cipso_v4_socket_setattr - Add a CIPSO option to a socket + * @sock: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sock->sk, which means it either needs to be in the + * process of being created or locked via lock_sock(sock->sk). Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + u32 iter; + unsigned char *buf = NULL; + u32 buf_len = 0; + u32 opt_len; + struct ip_options *opt = NULL; + struct sock *sk; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + sk = sock->sk; + if (sk == NULL) + return 0; + + /* XXX - This code assumes only one tag per CIPSO option which isn't + * really a good assumption to make but since we only support the MAC + * tags right now it is a safe assumption. */ + iter = 0; + do { + switch (doi_def->tags[iter]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_gentag_rbm(doi_def, + secattr, + &buf, + &buf_len); + break; + default: + ret_val = -EPERM; + goto socket_setattr_failure; + } + + iter++; + } while (ret_val != 0 && + iter < CIPSO_V4_TAG_MAXCNT && + doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); + if (ret_val != 0) + goto socket_setattr_failure; + + /* We can't use ip_options_get() directly because it makes a call to + * ip_options_get_alloc() which allocates memory with GFP_KERNEL and + * we can't block here. */ + opt_len = (buf_len + 3) & ~3; + opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); + if (opt == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + memcpy(opt->__data, buf, buf_len); + opt->optlen = opt_len; + opt->is_data = 1; + kfree(buf); + buf = NULL; + ret_val = ip_options_compile(opt, NULL); + if (ret_val != 0) + goto socket_setattr_failure; + + sk_inet = inet_sk(sk); + if (sk_inet->is_icsk) { + sk_conn = inet_csk(sk); + if (sk_inet->opt) + sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; + sk_conn->icsk_ext_hdr_len += opt->optlen; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } + opt = xchg(&sk_inet->opt, opt); + kfree(opt); + + return 0; + +socket_setattr_failure: + kfree(buf); + kfree(opt); + return ret_val; +} + +/** + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. + * + */ +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + struct inet_sock *sk_inet; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + sk_inet = inet_sk(sk); + if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) + return -ENOMSG; + cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - + sizeof(struct iphdr); + ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); + if (ret_val == 0) + return ret_val; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) { + rcu_read_unlock(); + return -ENOMSG; + } + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + rcu_read_unlock(); + + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + lock_sock(sock->sk); + ret_val = cipso_v4_sock_getattr(sock->sk, secattr); + release_sock(sock->sk); + + return ret_val; +} + +/** + * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Parse the given packet's CIPSO option and return the security attributes. + * Returns zero on success and negative values on failure. + * + */ +int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -ENOMSG; + unsigned char *cipso_ptr; + u32 doi; + struct cipso_v4_doi *doi_def; + + if (!CIPSO_V4_OPTEXIST(skb)) + return -ENOMSG; + cipso_ptr = CIPSO_V4_OPTPTR(skb); + if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) + return 0; + + doi = ntohl(*(u32 *)&cipso_ptr[2]); + rcu_read_lock(); + doi_def = cipso_v4_doi_getdef(doi); + if (doi_def == NULL) + goto skbuff_getattr_return; + switch (cipso_ptr[6]) { + case CIPSO_V4_TAG_RBITMAP: + ret_val = cipso_v4_parsetag_rbm(doi_def, + &cipso_ptr[6], + secattr); + break; + } + +skbuff_getattr_return: + rcu_read_unlock(); + return ret_val; +} + +/* + * Setup Functions + */ + +/** + * cipso_v4_init - Initialize the CIPSO module + * + * Description: + * Initialize the CIPSO module and prepare it for use. Returns zero on success + * and negative values on failure. + * + */ +static int __init cipso_v4_init(void) +{ + int ret_val; + + ret_val = cipso_v4_cache_init(); + if (ret_val != 0) + panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n", + ret_val); + + return 0; +} + +subsys_initcall(cipso_v4_init); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index ec5da4fbd9f..7b068a89195 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -25,7 +25,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct rtable *rt; - u32 saddr; + __be32 saddr; int oif; int err; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a6cc31d911e..7602c79a389 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -43,6 +43,7 @@ #include <linux/in.h> #include <linux/errno.h> #include <linux/interrupt.h> +#include <linux/if_addr.h> #include <linux/if_ether.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -62,6 +63,7 @@ #include <net/ip.h> #include <net/route.h> #include <net/ip_fib.h> +#include <net/netlink.h> struct ipv4_devconf ipv4_devconf = { .accept_redirects = 1, @@ -78,7 +80,15 @@ static struct ipv4_devconf ipv4_devconf_dflt = { .accept_source_route = 1, }; -static void rtmsg_ifa(int event, struct in_ifaddr *); +static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { + [IFA_LOCAL] = { .type = NLA_U32 }, + [IFA_ADDRESS] = { .type = NLA_U32 }, + [IFA_BROADCAST] = { .type = NLA_U32 }, + [IFA_ANYCAST] = { .type = NLA_U32 }, + [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, +}; + +static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, @@ -214,7 +224,7 @@ static void inetdev_destroy(struct in_device *in_dev) call_rcu(&in_dev->rcu_head, in_dev_rcu_put); } -int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) +int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { rcu_read_lock(); for_primary_ifa(in_dev) { @@ -229,8 +239,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) return 0; } -static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy) +static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy, struct nlmsghdr *nlh, u32 pid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -263,7 +273,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, if (!do_promote) { *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -288,7 +298,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -300,7 +310,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { @@ -319,7 +329,14 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } } -static int inet_insert_ifa(struct in_ifaddr *ifa) +static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy) +{ + __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); +} + +static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, + u32 pid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -364,12 +381,17 @@ static int inet_insert_ifa(struct in_ifaddr *ifa) /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; } +static int inet_insert_ifa(struct in_ifaddr *ifa) +{ + return __inet_insert_ifa(ifa, NULL, 0); +} + static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { struct in_device *in_dev = __in_dev_get_rtnl(dev); @@ -407,8 +429,8 @@ struct in_device *inetdev_by_index(int ifindex) /* Called only from RTNL semaphored context. No locks. */ -struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, - u32 mask) +struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, + __be32 mask) { ASSERT_RTNL(); @@ -421,87 +443,134 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; struct in_device *in_dev; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct ifaddrmsg *ifm; struct in_ifaddr *ifa, **ifap; + int err = -EINVAL; ASSERT_RTNL(); - if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) - goto out; + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + if (err < 0) + goto errout; + + ifm = nlmsg_data(nlh); + in_dev = inetdev_by_index(ifm->ifa_index); + if (in_dev == NULL) { + err = -ENODEV; + goto errout; + } + __in_dev_put(in_dev); for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if ((rta[IFA_LOCAL - 1] && - memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), - &ifa->ifa_local, 4)) || - (rta[IFA_LABEL - 1] && - rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) || - (rta[IFA_ADDRESS - 1] && - (ifm->ifa_prefixlen != ifa->ifa_prefixlen || - !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), - ifa)))) + if (tb[IFA_LOCAL] && + ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL])) + continue; + + if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) + continue; + + if (tb[IFA_ADDRESS] && + (ifm->ifa_prefixlen != ifa->ifa_prefixlen || + !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; - inet_del_ifa(in_dev, ifap, 1); + + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); return 0; } -out: - return -EADDRNOTAVAIL; + + err = -EADDRNOTAVAIL; +errout: + return err; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) { - struct rtattr **rta = arg; + struct nlattr *tb[IFA_MAX+1]; + struct in_ifaddr *ifa; + struct ifaddrmsg *ifm; struct net_device *dev; struct in_device *in_dev; - struct ifaddrmsg *ifm = NLMSG_DATA(nlh); - struct in_ifaddr *ifa; - int rc = -EINVAL; + int err = -EINVAL; - ASSERT_RTNL(); + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); + if (err < 0) + goto errout; - if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) - goto out; + ifm = nlmsg_data(nlh); + if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) + goto errout; - rc = -ENODEV; - if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) - goto out; + dev = __dev_get_by_index(ifm->ifa_index); + if (dev == NULL) { + err = -ENODEV; + goto errout; + } - rc = -ENOBUFS; - if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { + in_dev = __in_dev_get_rtnl(dev); + if (in_dev == NULL) { in_dev = inetdev_init(dev); - if (!in_dev) - goto out; + if (in_dev == NULL) { + err = -ENOBUFS; + goto errout; + } } - if ((ifa = inet_alloc_ifa()) == NULL) - goto out; + ifa = inet_alloc_ifa(); + if (ifa == NULL) { + /* + * A potential indev allocation can be left alive, it stays + * assigned to its device and is destroy with it. + */ + err = -ENOBUFS; + goto errout; + } + + in_dev_hold(in_dev); + + if (tb[IFA_ADDRESS] == NULL) + tb[IFA_ADDRESS] = tb[IFA_LOCAL]; - if (!rta[IFA_ADDRESS - 1]) - rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; - memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4); - memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); - if (rta[IFA_BROADCAST - 1]) - memcpy(&ifa->ifa_broadcast, - RTA_DATA(rta[IFA_BROADCAST - 1]), 4); - if (rta[IFA_ANYCAST - 1]) - memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4); ifa->ifa_flags = ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; - in_dev_hold(in_dev); - ifa->ifa_dev = in_dev; - if (rta[IFA_LABEL - 1]) - rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ); + ifa->ifa_dev = in_dev; + + ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]); + ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]); + + if (tb[IFA_BROADCAST]) + ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); + + if (tb[IFA_ANYCAST]) + ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]); + + if (tb[IFA_LABEL]) + nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - rc = inet_insert_ifa(ifa); -out: - return rc; + return ifa; + +errout: + return ERR_PTR(err); +} + +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct in_ifaddr *ifa; + + ASSERT_RTNL(); + + ifa = rtm_to_ifaddr(nlh); + if (IS_ERR(ifa)) + return PTR_ERR(ifa); + + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); } /* @@ -736,7 +805,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg) break; ret = 0; if (ifa->ifa_mask != sin->sin_addr.s_addr) { - u32 old_mask = ifa->ifa_mask; + __be32 old_mask = ifa->ifa_mask; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); @@ -807,9 +876,9 @@ out: return done; } -u32 inet_select_addr(const struct net_device *dev, u32 dst, int scope) +__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) { - u32 addr = 0; + __be32 addr = 0; struct in_device *in_dev; rcu_read_lock(); @@ -858,11 +927,11 @@ out: return addr; } -static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, - u32 local, int scope) +static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, + __be32 local, int scope) { int same = 0; - u32 addr = 0; + __be32 addr = 0; for_ifa(in_dev) { if (!addr && @@ -902,9 +971,9 @@ static u32 confirm_addr_indev(struct in_device *in_dev, u32 dst, * - local: address, 0=autoselect the local address * - scope: maximum allowed scope value for the local address */ -u32 inet_confirm_addr(const struct net_device *dev, u32 dst, u32 local, int scope) +__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) { - u32 addr = 0; + __be32 addr = 0; struct in_device *in_dev; if (dev) { @@ -1056,32 +1125,37 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); - ifm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + if (nlh == NULL) + return -ENOBUFS; + + ifm = nlmsg_data(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; + if (ifa->ifa_address) - RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); + NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address); + if (ifa->ifa_local) - RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); + NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local); + if (ifa->ifa_broadcast) - RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); + NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); + if (ifa->ifa_anycast) - RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); + NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast); + if (ifa->ifa_label[0]) - RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1127,19 +1201,27 @@ done: return skb->len; } -static void rtmsg_ifa(int event, struct in_ifaddr* ifa) +static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, + u32 pid) { - int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + struct sk_buff *skb; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto errout; - if (!skb) - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); - else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { + err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + if (err < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); - } else { - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); + goto errout; } + + err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); } static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { @@ -1151,9 +1233,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, .dumpit = inet_dump_fib, }, #ifdef CONFIG_IP_MULTIPLE_TABLES - [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, }, - [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, }, #endif }; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index fc2f8ce441d..13b29360d10 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,3 +1,4 @@ +#include <linux/err.h> #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> @@ -16,7 +17,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int err; struct iphdr *top_iph; struct ip_esp_hdr *esph; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; int blksize; @@ -36,7 +38,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp = x->data; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + desc.tfm = tfm; + desc.flags = 0; + blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) clen = ALIGN(clen, esp->conf.padlen); @@ -91,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) - crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + if (esp->conf.ivlen) { + if (unlikely(!esp->conf.ivinitted)) { + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 1; + } + crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); + } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -103,26 +112,27 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); - crypto_cipher_encrypt(tfm, sg, sg, clen); + err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); + if (unlikely(err)) + goto error; + if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); + crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, skb, (u8*)esph-skb->data, - sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(skb, trailer, alen); + err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, + sizeof(*esph) + esp->conf.ivlen + clen); + memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } ip_send_check(top_iph); - err = 0; - error: return err; } @@ -137,8 +147,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph; struct ip_esp_hdr *esph; struct esp_data *esp = x->data; + struct crypto_blkcipher *tfm = esp->conf.tfm; + struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; - int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; @@ -146,6 +158,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr[2]; struct scatterlist *sg; int padlen; + int err; if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) goto out; @@ -155,15 +168,16 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { - u8 sum[esp->auth.icv_full_len]; - u8 sum1[alen]; - - esp->auth.icv(esp, skb, 0, skb->len-alen, sum); + u8 sum[alen]; + + err = esp_mac_digest(esp, skb, 0, skb->len - alen); + if (err) + goto out; - if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) + if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); - if (unlikely(memcmp(sum, sum1, alen))) { + if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; goto out; } @@ -178,7 +192,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) - crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm)); + crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); sg = &esp->sgbuf[0]; @@ -188,9 +202,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen); - crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen); + err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); + if (unlikely(err)) + return err; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); @@ -237,7 +253,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (!x->props.mode) + if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -254,9 +270,9 @@ out: static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -293,11 +309,11 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_tfm(esp->conf.tfm); + crypto_free_blkcipher(esp->conf.tfm); esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; - crypto_free_tfm(esp->auth.tfm); + crypto_free_hash(esp->auth.tfm); esp->auth.tfm = NULL; kfree(esp->auth.work_icv); esp->auth.work_icv = NULL; @@ -307,6 +323,7 @@ static void esp_destroy(struct xfrm_state *x) static int esp_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; + struct crypto_blkcipher *tfm; /* null auth and encryption can have zero length keys */ if (x->aalg) { @@ -322,22 +339,27 @@ static int esp_init_state(struct xfrm_state *x) if (x->aalg) { struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *hash; esp->auth.key = x->aalg->alg_key; esp->auth.key_len = (x->aalg->alg_key_len+7)/8; - esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (esp->auth.tfm == NULL) + hash = crypto_alloc_hash(x->aalg->alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + goto error; + + esp->auth.tfm = hash; + if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) goto error; - esp->auth.icv = esp_hmac_digest; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(esp->auth.tfm)) { + crypto_hash_digestsize(hash)) { NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), + crypto_hash_digestsize(hash), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -351,24 +373,22 @@ static int esp_init_state(struct xfrm_state *x) } esp->conf.key = x->ealg->alg_key; esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); - if (esp->conf.tfm == NULL) + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) goto error; - esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); + esp->conf.tfm = tfm; + esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); esp->conf.padlen = 0; if (esp->conf.ivlen) { esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 0; } - if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ba2a70745a6..9c399a70dd5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -32,10 +32,12 @@ #include <linux/inet.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> +#include <linux/if_addr.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/init.h> +#include <linux/list.h> #include <net/ip.h> #include <net/protocol.h> @@ -50,48 +52,67 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -#define RT_TABLE_MIN RT_TABLE_MAIN - struct fib_table *ip_fib_local_table; struct fib_table *ip_fib_main_table; -#else +#define FIB_TABLE_HASHSZ 1 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -#define RT_TABLE_MIN 1 +#else -struct fib_table *fib_tables[RT_TABLE_MAX+1]; +#define FIB_TABLE_HASHSZ 256 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -struct fib_table *__fib_new_table(int id) +struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + tb = fib_get_table(id); + if (tb) + return tb; tb = fib_hash_init(id); if (!tb) return NULL; - fib_tables[id] = tb; + h = id & (FIB_TABLE_HASHSZ - 1); + hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); return tb; } +struct fib_table *fib_get_table(u32 id) +{ + struct fib_table *tb; + struct hlist_node *node; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + if (tb->tb_id == id) { + rcu_read_unlock(); + return tb; + } + } + rcu_read_unlock(); + return NULL; +} #endif /* CONFIG_IP_MULTIPLE_TABLES */ - static void fib_flush(void) { int flushed = 0; -#ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *tb; - int id; + struct hlist_node *node; + unsigned int h; - for (id = RT_TABLE_MAX; id>0; id--) { - if ((tb = fib_get_table(id))==NULL) - continue; - flushed += tb->tb_flush(tb); + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + flushed += tb->tb_flush(tb); } -#else /* CONFIG_IP_MULTIPLE_TABLES */ - flushed += ip_fib_main_table->tb_flush(ip_fib_main_table); - flushed += ip_fib_local_table->tb_flush(ip_fib_local_table); -#endif /* CONFIG_IP_MULTIPLE_TABLES */ if (flushed) rt_cache_flush(-1); @@ -101,7 +122,7 @@ static void fib_flush(void) * Find the first device with a given source address. */ -struct net_device * ip_dev_find(u32 addr) +struct net_device * ip_dev_find(__be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; @@ -125,7 +146,7 @@ out: return dev; } -unsigned inet_addr_type(u32 addr) +unsigned inet_addr_type(__be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; @@ -159,8 +180,8 @@ unsigned inet_addr_type(u32 addr) - check, that packet arrived from expected physical interface. */ -int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, - struct net_device *dev, u32 *spec_dst, u32 *itag) +int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, + struct net_device *dev, __be32 *spec_dst, u32 *itag) { struct in_device *in_dev; struct flowi fl = { .nl_u = { .ip4_u = @@ -232,42 +253,190 @@ e_inval: #ifndef CONFIG_IP_NOSIOCRT +static inline __be32 sk_extract_addr(struct sockaddr *addr) +{ + return ((struct sockaddr_in *) addr)->sin_addr.s_addr; +} + +static int put_rtax(struct nlattr *mx, int len, int type, u32 value) +{ + struct nlattr *nla; + + nla = (struct nlattr *) ((char *) mx + len); + nla->nla_type = type; + nla->nla_len = nla_attr_size(4); + *(u32 *) nla_data(nla) = value; + + return len + nla_total_size(4); +} + +static int rtentry_to_fib_config(int cmd, struct rtentry *rt, + struct fib_config *cfg) +{ + __be32 addr; + int plen; + + memset(cfg, 0, sizeof(*cfg)); + + if (rt->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* + * Check mask for validity: + * a) it must be contiguous. + * b) destination must have all host bits clear. + * c) if application forgot to set correct family (AF_INET), + * reject request unless it is absolutely clear i.e. + * both family and mask are zero. + */ + plen = 32; + addr = sk_extract_addr(&rt->rt_dst); + if (!(rt->rt_flags & RTF_HOST)) { + __be32 mask = sk_extract_addr(&rt->rt_genmask); + + if (rt->rt_genmask.sa_family != AF_INET) { + if (mask || rt->rt_genmask.sa_family) + return -EAFNOSUPPORT; + } + + if (bad_mask(mask, addr)) + return -EINVAL; + + plen = inet_mask_len(mask); + } + + cfg->fc_dst_len = plen; + cfg->fc_dst = addr; + + if (cmd != SIOCDELRT) { + cfg->fc_nlflags = NLM_F_CREATE; + cfg->fc_protocol = RTPROT_BOOT; + } + + if (rt->rt_metric) + cfg->fc_priority = rt->rt_metric - 1; + + if (rt->rt_flags & RTF_REJECT) { + cfg->fc_scope = RT_SCOPE_HOST; + cfg->fc_type = RTN_UNREACHABLE; + return 0; + } + + cfg->fc_scope = RT_SCOPE_NOWHERE; + cfg->fc_type = RTN_UNICAST; + + if (rt->rt_dev) { + char *colon; + struct net_device *dev; + char devname[IFNAMSIZ]; + + if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + + devname[IFNAMSIZ-1] = 0; + colon = strchr(devname, ':'); + if (colon) + *colon = 0; + dev = __dev_get_by_name(devname); + if (!dev) + return -ENODEV; + cfg->fc_oif = dev->ifindex; + if (colon) { + struct in_ifaddr *ifa; + struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return -ENODEV; + *colon = ':'; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + if (strcmp(ifa->ifa_label, devname) == 0) + break; + if (ifa == NULL) + return -ENODEV; + cfg->fc_prefsrc = ifa->ifa_local; + } + } + + addr = sk_extract_addr(&rt->rt_gateway); + if (rt->rt_gateway.sa_family == AF_INET && addr) { + cfg->fc_gw = addr; + if (rt->rt_flags & RTF_GATEWAY && + inet_addr_type(addr) == RTN_UNICAST) + cfg->fc_scope = RT_SCOPE_UNIVERSE; + } + + if (cmd == SIOCDELRT) + return 0; + + if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) + return -EINVAL; + + if (cfg->fc_scope == RT_SCOPE_NOWHERE) + cfg->fc_scope = RT_SCOPE_LINK; + + if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { + struct nlattr *mx; + int len = 0; + + mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); + if (mx == NULL) + return -ENOMEM; + + if (rt->rt_flags & RTF_MTU) + len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); + + if (rt->rt_flags & RTF_WINDOW) + len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); + + if (rt->rt_flags & RTF_IRTT) + len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); + + cfg->fc_mx = mx; + cfg->fc_mx_len = len; + } + + return 0; +} + /* * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ int ip_rt_ioctl(unsigned int cmd, void __user *arg) { + struct fib_config cfg; + struct rtentry rt; int err; - struct kern_rta rta; - struct rtentry r; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&r, arg, sizeof(struct rtentry))) + + if (copy_from_user(&rt, arg, sizeof(rt))) return -EFAULT; + rtnl_lock(); - err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); + err = rtentry_to_fib_config(cmd, &rt, &cfg); if (err == 0) { + struct fib_table *tb; + if (cmd == SIOCDELRT) { - struct fib_table *tb = fib_get_table(req.rtm.rtm_table); - err = -ESRCH; + tb = fib_get_table(cfg.fc_table); if (tb) - err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_delete(tb, &cfg); + else + err = -ESRCH; } else { - struct fib_table *tb = fib_new_table(req.rtm.rtm_table); - err = -ENOBUFS; + tb = fib_new_table(cfg.fc_table); if (tb) - err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_insert(tb, &cfg); + else + err = -ENOBUFS; } - kfree(rta.rta_mx); + + /* allocated by rtentry_to_fib_config() */ + kfree(cfg.fc_mx); } rtnl_unlock(); return err; @@ -284,77 +453,169 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) #endif -static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) +struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { + [RTA_DST] = { .type = NLA_U32 }, + [RTA_SRC] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U32 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_PROTOINFO] = { .type = NLA_U32 }, + [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, +}; + +static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct fib_config *cfg) { - int i; - - for (i=1; i<=RTA_MAX; i++, rta++) { - struct rtattr *attr = *rta; - if (attr) { - if (RTA_PAYLOAD(attr) < 4) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) - *rta = (struct rtattr*)RTA_DATA(attr); + struct nlattr *attr; + int err, remaining; + struct rtmsg *rtm; + + err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + memset(cfg, 0, sizeof(*cfg)); + + rtm = nlmsg_data(nlh); + cfg->fc_family = rtm->rtm_family; + cfg->fc_dst_len = rtm->rtm_dst_len; + cfg->fc_src_len = rtm->rtm_src_len; + cfg->fc_tos = rtm->rtm_tos; + cfg->fc_table = rtm->rtm_table; + cfg->fc_protocol = rtm->rtm_protocol; + cfg->fc_scope = rtm->rtm_scope; + cfg->fc_type = rtm->rtm_type; + cfg->fc_flags = rtm->rtm_flags; + cfg->fc_nlflags = nlh->nlmsg_flags; + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; + + nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { + switch (attr->nla_type) { + case RTA_DST: + cfg->fc_dst = nla_get_be32(attr); + break; + case RTA_SRC: + cfg->fc_src = nla_get_be32(attr); + break; + case RTA_OIF: + cfg->fc_oif = nla_get_u32(attr); + break; + case RTA_GATEWAY: + cfg->fc_gw = nla_get_be32(attr); + break; + case RTA_PRIORITY: + cfg->fc_priority = nla_get_u32(attr); + break; + case RTA_PREFSRC: + cfg->fc_prefsrc = nla_get_be32(attr); + break; + case RTA_METRICS: + cfg->fc_mx = nla_data(attr); + cfg->fc_mx_len = nla_len(attr); + break; + case RTA_MULTIPATH: + cfg->fc_mp = nla_data(attr); + cfg->fc_mp_len = nla_len(attr); + break; + case RTA_FLOW: + cfg->fc_flow = nla_get_u32(attr); + break; + case RTA_MP_ALGO: + cfg->fc_mp_alg = nla_get_u32(attr); + break; + case RTA_TABLE: + cfg->fc_table = nla_get_u32(attr); + break; } } + return 0; +errout: + return err; } int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_get_table(r->rtm_table); - if (tb) - return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ESRCH; + tb = fib_get_table(cfg.fc_table); + if (tb == NULL) { + err = -ESRCH; + goto errout; + } + + err = tb->tb_delete(tb, &cfg); +errout: + return err; } int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_new_table(r->rtm_table); - if (tb) - return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ENOBUFS; + tb = fib_new_table(cfg.fc_table); + if (tb == NULL) { + err = -ENOBUFS; + goto errout; + } + + err = tb->tb_insert(tb, &cfg); +errout: + return err; } int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - int t; - int s_t; + unsigned int h, s_h; + unsigned int e = 0, s_e; struct fib_table *tb; + struct hlist_node *node; + int dumped = 0; - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && - ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && + ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) return ip_rt_dump(skb, cb); - s_t = cb->args[0]; - if (s_t == 0) - s_t = cb->args[0] = RT_TABLE_MIN; - - for (t=s_t; t<=RT_TABLE_MAX; t++) { - if (t < s_t) continue; - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - if ((tb = fib_get_table(t))==NULL) - continue; - if (tb->tb_dump(tb, skb, cb) < 0) - break; + s_h = cb->args[0]; + s_e = cb->args[1]; + + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + e = 0; + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + if (e < s_e) + goto next; + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); + if (tb->tb_dump(tb, skb, cb) < 0) + goto out; + dumped = 1; +next: + e++; + } } - - cb->args[0] = t; +out: + cb->args[1] = e; + cb->args[0] = h; return skb->len; } @@ -366,17 +627,18 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) only when netlink is already locked. */ -static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) +static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { - struct fib_table * tb; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; - struct kern_rta rta; - - memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); + struct fib_table *tb; + struct fib_config cfg = { + .fc_protocol = RTPROT_KERNEL, + .fc_type = type, + .fc_dst = dst, + .fc_dst_len = dst_len, + .fc_prefsrc = ifa->ifa_local, + .fc_oif = ifa->ifa_dev->dev->ifindex, + .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, + }; if (type == RTN_UNICAST) tb = fib_new_table(RT_TABLE_MAIN); @@ -386,26 +648,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr if (tb == NULL) return; - req.nlh.nlmsg_len = sizeof(req); - req.nlh.nlmsg_type = cmd; - req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; - req.nlh.nlmsg_pid = 0; - req.nlh.nlmsg_seq = 0; + cfg.fc_table = tb->tb_id; - req.rtm.rtm_dst_len = dst_len; - req.rtm.rtm_table = tb->tb_id; - req.rtm.rtm_protocol = RTPROT_KERNEL; - req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); - req.rtm.rtm_type = type; - - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; + if (type != RTN_LOCAL) + cfg.fc_scope = RT_SCOPE_LINK; + else + cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_insert(tb, &cfg); else - tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_delete(tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -413,9 +666,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; struct in_ifaddr *prim = ifa; - u32 mask = ifa->ifa_mask; - u32 addr = ifa->ifa_local; - u32 prefix = ifa->ifa_address&mask; + __be32 mask = ifa->ifa_mask; + __be32 addr = ifa->ifa_local; + __be32 prefix = ifa->ifa_address&mask; if (ifa->ifa_flags&IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, prefix, mask); @@ -431,7 +684,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && @@ -453,8 +706,8 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa) struct net_device *dev = in_dev->dev; struct in_ifaddr *ifa1; struct in_ifaddr *prim = ifa; - u32 brd = ifa->ifa_address|~ifa->ifa_mask; - u32 any = ifa->ifa_address&ifa->ifa_mask; + __be32 brd = ifa->ifa_address|~ifa->ifa_mask; + __be32 any = ifa->ifa_address&ifa->ifa_mask; #define LOCAL_OK 1 #define BRD_OK 2 #define BRD0_OK 4 @@ -652,11 +905,17 @@ static struct notifier_block fib_netdev_notifier = { void __init ip_fib_init(void) { + unsigned int i; + + for (i = 0; i < FIB_TABLE_HASHSZ; i++) + INIT_HLIST_HEAD(&fib_table_hash[i]); #ifndef CONFIG_IP_MULTIPLE_TABLES ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); #else - fib_rules_init(); + fib4_rules_init(); #endif register_netdevice_notifier(&fib_netdev_notifier); diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 72c633b357c..107bb6cbb0b 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -51,7 +51,7 @@ static kmem_cache_t *fn_alias_kmem __read_mostly; struct fib_node { struct hlist_node fn_hash; struct list_head fn_alias; - u32 fn_key; + __be32 fn_key; }; struct fn_zone { @@ -64,7 +64,7 @@ struct fn_zone { #define FZ_HASHMASK(fz) ((fz)->fz_hashmask) int fz_order; /* Zone order */ - u32 fz_mask; + __be32 fz_mask; #define FZ_MASK(fz) ((fz)->fz_mask) }; @@ -77,7 +77,7 @@ struct fn_hash { struct fn_zone *fn_zone_list; }; -static inline u32 fn_hash(u32 key, struct fn_zone *fz) +static inline u32 fn_hash(__be32 key, struct fn_zone *fz) { u32 h = ntohl(key)>>(32 - fz->fz_order); h ^= (h>>20); @@ -87,7 +87,7 @@ static inline u32 fn_hash(u32 key, struct fn_zone *fz) return h; } -static inline u32 fz_key(u32 dst, struct fn_zone *fz) +static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) { return dst & FZ_MASK(fz); } @@ -254,7 +254,7 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result struct hlist_head *head; struct hlist_node *node; struct fib_node *f; - u32 k = fz_key(flp->fl4_dst, fz); + __be32 k = fz_key(flp->fl4_dst, fz); head = &fz->fz_hash[fn_hash(k, fz)]; hlist_for_each_entry(f, node, head, fn_hash) { @@ -365,7 +365,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) } /* Return the node in FZ matching KEY. */ -static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) +static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) { struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)]; struct hlist_node *node; @@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) return NULL; } -static int -fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fib_node *new_f, *f; struct fib_alias *fa, *new_fa; struct fn_zone *fz; struct fib_info *fi; - int z = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; - u32 key; + u8 tos = cfg->fc_tos; + __be32 key; int err; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - fz = table->fn_zones[z]; - if (!fz && !(fz = fn_new_zone(table, z))) + + fz = table->fn_zones[cfg->fc_dst_len]; + if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) return -ENOBUFS; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } - if ((fi = fib_create_info(r, rta, n, &err)) == NULL) - return err; + fi = fib_create_info(cfg); + if (IS_ERR(fi)) + return PTR_ERR(fi); if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && - (z==32 || (1<<z) > fz->fz_divisor)) + (cfg->fc_dst_len == 32 || + (1 << cfg->fc_dst_len) > fz->fz_divisor)) fn_rehash_zone(fz); f = fib_find_node(fz, key); @@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (n->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; write_lock_bh(&fib_hash_lock); fi_drop = fa->fa_info; fa->fa_info = fi; - fa->fa_type = type; - fa->fa_scope = r->rtm_scope; + fa->fa_type = cfg->fc_type; + fa->fa_scope = cfg->fc_scope; state = fa->fa_state; fa->fa_state &= ~FA_S_ACCESSED; fib_hash_genid++; @@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) goto out; } - if (!(n->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(n->nlmsg_flags&NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* @@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fz->fz_nent++; rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, + &cfg->fc_nlinfo); return 0; out_free_new_fa: @@ -537,30 +535,25 @@ out: } -static int -fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash*)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; - int z = r->rtm_dst_len; struct fn_zone *fz; - u32 key; - u8 tos = r->rtm_tos; + __be32 key; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - if ((fz = table->fn_zones[z]) == NULL) + + if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) return -ESRCH; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } f = fib_find_node(fz, key); @@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (!f) fa = NULL; else - fa = fib_find_alias(&f->fn_alias, tos, 0); + fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); if (!fa) return -ESRCH; @@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { struct fib_info *fi = fa->fa_info; - if (fa->fa_tos != tos) + if (fa->fa_tos != cfg->fc_tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, n, rta, fi) == 0) { + if ((!cfg->fc_type || + fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, int kill_fn; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, + tb->tb_id, &cfg->fc_nlinfo); kill_fn = 0; write_lock_bh(&fib_hash_lock); @@ -684,7 +678,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, struct fib_node *f; int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; hlist_for_each_entry(f, node, head, fn_hash) { struct fib_alias *fa; @@ -699,19 +693,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, tb->tb_id, fa->fa_type, fa->fa_scope, - &f->fn_key, + f->fn_key, fz->fz_order, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } next: i++; } } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -722,21 +716,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, { int h, s_h; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h=0; h < fz->fz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); if (fz->fz_hash == NULL || hlist_empty(&fz->fz_hash[h])) continue; if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -746,28 +740,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin struct fn_zone *fz; struct fn_hash *table = (struct fn_hash*)tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; read_lock(&fib_hash_lock); for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[1] = m; + cb->args[2] = m; read_unlock(&fib_hash_lock); return -1; } } read_unlock(&fib_hash_lock); - cb->args[1] = m; + cb->args[2] = m; return skb->len; } #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; @@ -972,7 +966,7 @@ static void fib_seq_stop(struct seq_file *seq, void *v) read_unlock(&fib_hash_lock); } -static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi) +static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) { static const unsigned type2flags[RTN_MAX + 1] = { [7] = RTF_REJECT, [8] = RTF_REJECT, @@ -981,7 +975,7 @@ static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi) if (fi && fi->fib_nh->nh_gw) flags |= RTF_GATEWAY; - if (mask == 0xFFFFFFFF) + if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; flags |= RTF_UP; return flags; @@ -997,7 +991,7 @@ static int fib_seq_show(struct seq_file *seq, void *v) { struct fib_iter_state *iter; char bf[128]; - u32 prefix, mask; + __be32 prefix, mask; unsigned flags; struct fib_node *f; struct fib_alias *fa; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index ef6609ea0eb..0e8b70bad4e 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -20,22 +20,17 @@ struct fib_alias { /* Exported by fib_semantics.c */ extern int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, __u32 zone, __u32 mask, + struct fib_result *res, __be32 zone, __be32 mask, int prefixlen); extern void fib_release_info(struct fib_info *); -extern struct fib_info *fib_create_info(const struct rtmsg *r, - struct kern_rta *rta, - const struct nlmsghdr *, - int *err); -extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, - struct kern_rta *rta, struct fib_info *fi); +extern struct fib_info *fib_create_info(struct fib_config *cfg); +extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, + u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); -extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req); +extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, + int dst_len, u32 tb_id, struct nl_info *info); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); extern int fib_detect_death(struct fib_info *fi, int order, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 79b04718bdf..0852b9cd065 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -5,9 +5,8 @@ * * IPv4 Forwarding Information Base: policy rules. * - * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * Thomas Graf <tgraf@suug.ch> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,463 +18,350 @@ * Marc Boucher : routing by fwmark */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/errno.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/inetdevice.h> #include <linux/netdevice.h> -#include <linux/if_arp.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/inetdevice.h> #include <linux/init.h> #include <linux/list.h> #include <linux/rcupdate.h> - #include <net/ip.h> -#include <net/protocol.h> #include <net/route.h> #include <net/tcp.h> -#include <net/sock.h> #include <net/ip_fib.h> +#include <net/fib_rules.h> -#define FRprintk(a...) +static struct fib_rules_ops fib4_rules_ops; -struct fib_rule +struct fib4_rule { - struct hlist_node hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - u32 r_src; - u32 r_srcmask; - u32 r_dst; - u32 r_dstmask; - u32 r_srcmap; - u8 r_flags; - u8 r_tos; + struct fib_rule common; + u8 dst_len; + u8 src_len; + u8 tos; + __be32 src; + __be32 srcmask; + __be32 dst; + __be32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; + u32 fwmask; #endif - int r_ifindex; #ifdef CONFIG_NET_CLS_ROUTE - __u32 r_tclassid; + u32 tclassid; #endif - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; -static struct fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFF, - .r_table = RT_TABLE_DEFAULT, - .r_action = RTN_UNICAST, +static struct fib4_rule default_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFF, + .table = RT_TABLE_DEFAULT, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule main_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFE, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST, +static struct fib4_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule local_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_table = RT_TABLE_LOCAL, - .r_action = RTN_UNICAST, +static struct fib4_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .table = RT_TABLE_LOCAL, + .action = FR_ACT_TO_TBL, + .flags = FIB_RULE_PERMANENT, + }, }; -static struct hlist_head fib_rules; +static LIST_HEAD(fib4_rules); -/* writer func called from netlink -- rtnl_sem hold*/ - -static void rtmsg_rule(int, struct fib_rule *); - -int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +#ifdef CONFIG_NET_CLS_ROUTE +u32 fib_rules_tclass(struct fib_result *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) && - rtm->rtm_tos == r->r_tos && -#ifdef CONFIG_IP_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && -#endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - err = -EPERM; - if (r == &local_rule) - break; - - hlist_del_rcu(&r->hlist); - r->r_dead = 1; - rtmsg_rule(RTM_DELRULE, r); - fib_rule_put(r); - err = 0; - break; - } - } - return err; + return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; } +#endif -/* Allocate new unique table id */ - -static struct fib_table *fib_empty_table(void) +int fib_lookup(struct flowi *flp, struct fib_result *res) { - int id; + struct fib_lookup_arg arg = { + .result = res, + }; + int err; - for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_tables[id] == NULL) - return __fib_new_table(id); - return NULL; -} + err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + res->r = arg.rule; -static inline void fib_rule_put_rcu(struct rcu_head *head) -{ - struct fib_rule *r = container_of(head, struct fib_rule, rcu); - kfree(r); + return err; } -void fib_rule_put(struct fib_rule *r) +static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, fib_rule_put_rcu); - else - printk("Freeing alive rule %p\n", r); + int err = -EAGAIN; + struct fib_table *tbl; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; + + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; + + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; } + + if ((tbl = fib_get_table(rule->table)) == NULL) + goto errout; + + err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); + if (err > 0) + err = -EAGAIN; +errout: + return err; } -/* writer func called from netlink -- rtnl_sem hold*/ -int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +void fib_select_default(const struct flowi *flp, struct fib_result *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; - - if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || - (rtm->rtm_tos & ~IPTOS_TOS_MASK)) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct fib_table *table; - if (rtm->rtm_type == RTN_UNICAST) { - if ((table = fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = table->tb_id; - } + if (res->r && res->r->action == FR_ACT_TO_TBL && + FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { + struct fib_table *tb; + if ((tb = fib_get_table(res->r->table)) != NULL) + tb->tb_select_default(tb, flp, res); } +} - new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; - - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); - new_r->r_tos = rtm->rtm_tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); -#endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = __dev_get_by_name(new_r->r_ifname); - if (dev) - new_r->r_ifindex = dev->ifindex; - } -#ifdef CONFIG_NET_CLS_ROUTE - if (rta[RTA_FLOW-1]) - memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); -#endif - r = container_of(fib_rules.first, struct fib_rule, hlist); +static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib4_rule *r = (struct fib4_rule *) rule; + __be32 daddr = fl->fl4_dst; + __be32 saddr = fl->fl4_src; - if (!new_r->r_preference) { - if (r && r->hlist.next != NULL) { - r = container_of(r->hlist.next, struct fib_rule, hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; - } - } + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; - } - atomic_inc(&new_r->r_clntref); + if (r->tos && (r->tos != fl->fl4_tos)) + return 0; - if (last) - hlist_add_after_rcu(&last->hlist, &new_r->hlist); - else - hlist_add_before_rcu(&new_r->hlist, &r->hlist); +#ifdef CONFIG_IP_ROUTE_FWMARK + if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask) + return 0; +#endif - rtmsg_rule(RTM_NEWRULE, new_r); - return 0; + return 1; } -#ifdef CONFIG_NET_CLS_ROUTE -u32 fib_rules_tclass(struct fib_result *res) +static struct fib_table *fib_empty_table(void) { - if (res->r) - return res->r->r_tclassid; - return 0; + u32 id; + + for (id = 1; id <= RT_TABLE_MAX; id++) + if (fib_get_table(id) == NULL) + return fib_new_table(id); + return NULL; } -#endif -/* callers should hold rtnl semaphore */ +static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U32 }, + [FRA_DST] = { .type = NLA_U32 }, + [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FWMASK] = { .type = NLA_U32 }, + [FRA_FLOW] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, +}; -static void fib_rules_detach(struct net_device *dev) +static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) { - struct hlist_node *node; - struct fib_rule *r; + int err = -EINVAL; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; + + if (frh->src_len > 32 || frh->dst_len > 32 || + (frh->tos & ~IPTOS_TOS_MASK)) + goto errout; + + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct fib_table *table; - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; + table = fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } + rule->table = table->tb_id; + } } -} -/* callers should hold rtnl semaphore */ + if (tb[FRA_SRC]) + rule4->src = nla_get_be32(tb[FRA_SRC]); -static void fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct fib_rule *r; + if (tb[FRA_DST]) + rule4->dst = nla_get_be32(tb[FRA_DST]); - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; +#ifdef CONFIG_IP_ROUTE_FWMARK + if (tb[FRA_FWMARK]) { + rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); + if (rule4->fwmark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + rule4->fwmask = 0xFFFFFFFF; } + + if (tb[FRA_FWMASK]) + rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]); +#endif + +#ifdef CONFIG_NET_CLS_ROUTE + if (tb[FRA_FLOW]) + rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); +#endif + + rule4->src_len = frh->src_len; + rule4->srcmask = inet_make_mask(rule4->src_len); + rule4->dst_len = frh->dst_len; + rule4->dstmask = inet_make_mask(rule4->dst_len); + rule4->tos = frh->tos; + + err = 0; +errout: + return err; } -int fib_lookup(const struct flowi *flp, struct fib_result *res) +static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) { - int err; - struct fib_rule *r, *policy; - struct fib_table *tb; - struct hlist_node *node; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; - u32 daddr = flp->fl4_dst; - u32 saddr = flp->fl4_src; + if (frh->src_len && (rule4->src_len != frh->src_len)) + return 0; -FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", - NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); + if (frh->dst_len && (rule4->dst_len != frh->dst_len)) + return 0; - rcu_read_lock(); + if (frh->tos && (rule4->tos != frh->tos)) + return 0; - hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || - (r->r_tos && r->r_tos != flp->fl4_tos) || #ifdef CONFIG_IP_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || + if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; + + if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK]))) + return 0; #endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; - -FRprintk("tb %d r %d ", r->r_table, r->r_action); - switch (r->r_action) { - case RTN_UNICAST: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } - if ((tb = fib_get_table(r->r_table)) == NULL) - continue; - err = tb->tb_lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; - } - } -FRprintk("FAILURE\n"); - rcu_read_unlock(); - return -ENETUNREACH; -} +#ifdef CONFIG_NET_CLS_ROUTE + if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) + return 0; +#endif -void fib_select_default(const struct flowi *flp, struct fib_result *res) -{ - if (res->r && res->r->r_action == RTN_UNICAST && - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { - struct fib_table *tb; - if ((tb = fib_get_table(res->r->r_table)) != NULL) - tb->tb_select_default(tb, flp, res); - } -} + if (tb[FRA_SRC] && (rule4->src != nla_get_be32(tb[FRA_SRC]))) + return 0; -static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; + if (tb[FRA_DST] && (rule4->dst != nla_get_be32(tb[FRA_DST]))) + return 0; - if (event == NETDEV_UNREGISTER) - fib_rules_detach(dev); - else if (event == NETDEV_REGISTER) - fib_rules_attach(dev); - return NOTIFY_DONE; + return 1; } +static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib4_rule *rule4 = (struct fib4_rule *) rule; -static struct notifier_block fib_rules_notifier = { - .notifier_call =fib_rules_event, -}; + frh->family = AF_INET; + frh->dst_len = rule4->dst_len; + frh->src_len = rule4->src_len; + frh->tos = rule4->tos; -static __inline__ int inet_fill_rule(struct sk_buff *skb, - struct fib_rule *r, - u32 pid, u32 seq, int event, - unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_INET; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = r->r_tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); + if (rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); + + if (rule4->fwmask || rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask); #endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 4, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 4, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); + + if (rule4->dst_len) + NLA_PUT_BE32(skb, FRA_DST, rule4->dst); + + if (rule4->src_len) + NLA_PUT_BE32(skb, FRA_SRC, rule4->src); + #ifdef CONFIG_NET_CLS_ROUTE - if (r->r_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); + if (rule4->tclassid) + NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); #endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; + return 0; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; +nla_put_failure: + return -ENOBUFS; } -/* callers should hold rtnl semaphore */ - -static void rtmsg_rule(int event, struct fib_rule *r) +int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); - - if (!skb) - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS); - else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) { - kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL); - } else { - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL); - } + return fib_rules_dump(skb, cb, AF_INET); } -int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +static u32 fib4_rule_default_pref(void) { - int idx = 0; - int s_idx = cb->args[0]; - struct fib_rule *r; - struct hlist_node *node; - - rcu_read_lock(); - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (idx < s_idx) - goto next; - if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWRULE, NLM_F_MULTI) < 0) - break; -next: - idx++; + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&fib4_rules)) { + pos = fib4_rules.next; + if (pos->next != &fib4_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; + } } - rcu_read_unlock(); - cb->args[0] = idx; - return skb->len; + return 0; } -void __init fib_rules_init(void) +static struct fib_rules_ops fib4_rules_ops = { + .family = AF_INET, + .rule_size = sizeof(struct fib4_rule), + .action = fib4_rule_action, + .match = fib4_rule_match, + .configure = fib4_rule_configure, + .compare = fib4_rule_compare, + .fill = fib4_rule_fill, + .default_pref = fib4_rule_default_pref, + .nlgroup = RTNLGRP_IPV4_RULE, + .policy = fib4_rule_policy, + .rules_list = &fib4_rules, + .owner = THIS_MODULE, +}; + +void __init fib4_rules_init(void) { - INIT_HLIST_HEAD(&fib_rules); - hlist_add_head(&local_rule.hlist, &fib_rules); - hlist_add_after(&local_rule.hlist, &main_rule.hlist); - hlist_add_after(&main_rule.hlist, &default_rule.hlist); - register_netdevice_notifier(&fib_rules_notifier); + list_add_tail(&local_rule.common.list, &fib4_rules); + list_add_tail(&main_rule.common.list, &fib4_rules); + list_add_tail(&default_rule.common.list, &fib4_rules); + + fib_rules_register(&fib4_rules_ops); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 51738000f3d..884d176e008 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -33,7 +33,6 @@ #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> -#include <linux/netlink.h> #include <linux/init.h> #include <net/arp.h> @@ -44,12 +43,14 @@ #include <net/sock.h> #include <net/ip_fib.h> #include <net/ip_mp_alg.h> +#include <net/netlink.h> +#include <net/nexthop.h> #include "fib_lookup.h" #define FSprintk(a...) -static DEFINE_RWLOCK(fib_info_lock); +static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_hash_size; @@ -159,7 +160,7 @@ void free_fib_info(struct fib_info *fi) void fib_release_info(struct fib_info *fi) { - write_lock_bh(&fib_info_lock); + spin_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) @@ -172,7 +173,7 @@ void fib_release_info(struct fib_info *fi) fi->fib_dead = 1; fib_info_put(fi); } - write_unlock_bh(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); } static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) @@ -202,7 +203,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; - val ^= fi->fib_prefsrc; + val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; return (val ^ (val >> 7) ^ (val >> 12)) & mask; @@ -247,14 +248,14 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) Used only by redirect accept routine. */ -int ip_fib_check_default(u32 gw, struct net_device *dev) +int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct hlist_node *node; struct fib_nh *nh; unsigned int hash; - read_lock(&fib_info_lock); + spin_lock(&fib_info_lock); hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; @@ -262,41 +263,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) if (nh->nh_dev == dev && nh->nh_gw == gw && !(nh->nh_flags&RTNH_F_DEAD)) { - read_unlock(&fib_info_lock); + spin_unlock(&fib_info_lock); return 0; } } - read_unlock(&fib_info_lock); + spin_unlock(&fib_info_lock); return -1; } -void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req) +void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, + int dst_len, u32 tb_id, struct nl_info *info) { struct sk_buff *skb; - u32 pid = req ? req->pid : n->nlmsg_pid; - int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); - - skb = alloc_skb(size, GFP_KERNEL); - if (!skb) - return; - - if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, - fa->fa_type, fa->fa_scope, &key, z, - fa->fa_tos, - fa->fa_info, 0) < 0) { + int payload = sizeof(struct rtmsg) + 256; + u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + + skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + if (skb == NULL) + goto errout; + + err = fib_dump_info(skb, info->pid, seq, event, tb_id, + fa->fa_type, fa->fa_scope, key, dst_len, + fa->fa_tos, fa->fa_info, 0); + if (err < 0) { kfree_skb(skb); - return; + goto errout; } - NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; - if (n->nlmsg_flags&NLM_F_ECHO) - atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); - if (n->nlmsg_flags&NLM_F_ECHO) - netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); + + err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, + info->nlh, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); } /* Return the first fib alias matching TOS with @@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order, #ifdef CONFIG_IP_ROUTE_MULTIPATH -static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) -{ - while (RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(u32*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - return 0; -} - -static int -fib_count_nexthops(struct rtattr *rta) +static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) { int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - while (nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(rtnh, remaining)) { nhs++; - nhp = RTNH_NEXT(nhp); - }; - return nhs; + rtnh = rtnh_next(rtnh, &remaining); + } + + /* leftover implies invalid nexthop configuration, discard it */ + return remaining > 0 ? 0 : nhs; } -static int -fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, + int remaining, struct fib_config *cfg) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; - nh->nh_oif = nhp->rtnh_ifindex; - nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { - nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + + nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + nh->nh_oif = rtnh->rtnh_ifindex; + nh->nh_weight = rtnh->rtnh_hops + 1; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + nh->nh_gw = nla ? nla_get_be32(nla) : 0; #ifdef CONFIG_NET_CLS_ROUTE - nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); + nla = nla_find(attrs, attrlen, RTA_FLOW); + nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); + return 0; } #endif -int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, - struct fib_info *fi) +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - struct rtnexthop *nhp; - int nhlen; + struct rtnexthop *rtnh; + int remaining; #endif - if (rta->rta_priority && - *rta->rta_priority != fi->fib_priority) + if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) + if (cfg->fc_oif || cfg->fc_gw) { + if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && + (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) return 0; return 1; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp == NULL) + if (cfg->fc_mp == NULL) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + + rtnh = cfg->fc_mp; + remaining = cfg->fc_mp_len; for_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - u32 gw; + int attrlen; - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) + + if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) return 1; - if (attrlen) { - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); - if (gw && gw != nh->nh_gw) + + attrlen = rtnh_attrlen(rtnh); + if (attrlen < 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla && nla_get_be32(nla) != nh->nh_gw) return 1; #ifdef CONFIG_NET_CLS_ROUTE - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); - if (gw && gw != nh->nh_tclassid) + nla = nla_find(attrs, attrlen, RTA_FLOW); + if (nla && nla_get_u32(nla) != nh->nh_tclassid) return 1; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); #endif return 0; @@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, |-> {local prefix} (terminal node) */ -static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) +static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, + struct fib_nh *nh) { int err; @@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; - if (r->rtm_scope >= RT_SCOPE_LINK) + if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; @@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n return 0; } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = nh->nh_gw, - .scope = r->rtm_scope + 1 } }, - .oif = nh->nh_oif }; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = nh->nh_gw, + .scope = cfg->fc_scope + 1, + }, + }, + .oif = nh->nh_oif, + }; /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) @@ -563,11 +568,11 @@ out: return 0; } -static inline unsigned int fib_laddr_hashfn(u32 val) +static inline unsigned int fib_laddr_hashfn(__be32 val) { unsigned int mask = (fib_hash_size - 1); - return (val ^ (val >> 7) ^ (val >> 14)) & mask; + return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } static struct hlist_head *fib_hash_alloc(int bytes) @@ -598,7 +603,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, unsigned int old_size = fib_hash_size; unsigned int i, bytes; - write_lock_bh(&fib_info_lock); + spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; @@ -639,46 +644,35 @@ static void fib_hash_move(struct hlist_head *new_info_hash, } fib_info_laddrhash = new_laddrhash; - write_unlock_bh(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); fib_hash_free(old_info_hash, bytes); fib_hash_free(old_laddrhash, bytes); } -struct fib_info * -fib_create_info(const struct rtmsg *r, struct kern_rta *rta, - const struct nlmsghdr *nlh, int *errp) +struct fib_info *fib_create_info(struct fib_config *cfg) { int err; struct fib_info *fi = NULL; struct fib_info *ofi; -#ifdef CONFIG_IP_ROUTE_MULTIPATH int nhs = 1; -#else - const int nhs = 1; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - u32 mp_alg = IP_MP_ALG_NONE; -#endif /* Fast check to catch the most weird cases */ - if (fib_props[r->rtm_type].scope > r->rtm_scope) + if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp) { - nhs = fib_count_nexthops(rta->rta_mp); + if (cfg->fc_mp) { + nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); if (nhs == 0) goto err_inval; } #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rta->rta_mp_alg) { - mp_alg = *rta->rta_mp_alg; - - if (mp_alg < IP_MP_ALG_NONE || - mp_alg > IP_MP_ALG_MAX) + if (cfg->fc_mp_alg) { + if (cfg->fc_mp_alg < IP_MP_ALG_NONE || + cfg->fc_mp_alg > IP_MP_ALG_MAX) goto err_inval; } #endif @@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; fib_info_cnt++; - fi->fib_protocol = r->rtm_protocol; + fi->fib_protocol = cfg->fc_protocol; + fi->fib_flags = cfg->fc_flags; + fi->fib_priority = cfg->fc_priority; + fi->fib_prefsrc = cfg->fc_prefsrc; fi->fib_nhs = nhs; change_nexthops(fi) { nh->nh_parent = fi; } endfor_nexthops(fi) - fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); - - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > RTAX_MAX) + if (cfg->fc_mx) { + struct nlattr *nla; + int remaining; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla->nla_type; + + if (type) { + if (type > RTAX_MAX) goto err_inval; - fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); + fi->fib_metrics[type - 1] = nla_get_u32(nla); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); - if (rta->rta_mp) { + if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) + err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); + if (err != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) + if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) goto err_inval; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) goto err_inval; #endif #else @@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, #endif } else { struct fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 4); + + nh->nh_oif = cfg->fc_oif; + nh->nh_gw = cfg->fc_gw; + nh->nh_flags = cfg->fc_flags; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow) - memcpy(&nh->nh_tclassid, rta->rta_flow, 4); + nh->nh_tclassid = cfg->fc_flow; #endif - nh->nh_flags = r->rtm_flags; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = mp_alg; + fi->fib_mp_alg = cfg->fc_mp_alg; #endif - if (fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (fib_props[cfg->fc_type].error) { + if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; goto link_it; } - if (r->rtm_scope > RT_SCOPE_HOST) + if (cfg->fc_scope > RT_SCOPE_HOST) goto err_inval; - if (r->rtm_scope == RT_SCOPE_HOST) { + if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ @@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; } else { change_nexthops(fi) { - if ((err = fib_check_nh(r, fi, nh)) != 0) + if ((err = fib_check_nh(cfg, fi, nh)) != 0) goto failure; } endfor_nexthops(fi) } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) + if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || + fi->fib_prefsrc != cfg->fc_dst) if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -820,7 +811,7 @@ link_it: fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - write_lock_bh(&fib_info_lock); + spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { @@ -839,24 +830,24 @@ link_it: head = &fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) - write_unlock_bh(&fib_info_lock); + spin_unlock_bh(&fib_info_lock); return fi; err_inval: err = -EINVAL; failure: - *errp = err; if (fi) { fi->fib_dead = 1; free_fib_info(fi); } - return NULL; + + return ERR_PTR(err); } /* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, __u32 zone, __u32 mask, + struct fib_result *res, __be32 zone, __be32 mask, int prefixlen) { struct fib_alias *fa; @@ -923,8 +914,7 @@ out_fill_res: res->fi = fa->fa_info; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED res->netmask = mask; - res->network = zone & - (0xFFFFFFFF >> (32 - prefixlen)); + res->network = zone & inet_make_mask(prefixlen); #endif atomic_inc(&res->fi->fib_clntref); return 0; @@ -932,229 +922,94 @@ out_fill_res: /* Find appropriate source address to this destination */ -u32 __fib_res_prefsrc(struct fib_result *res) +__be32 __fib_res_prefsrc(struct fib_result *res) { return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); } -int -fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, - struct fib_info *fi, unsigned int flags) +int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, + u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, + struct fib_info *fi, unsigned int flags) { + struct nlmsghdr *nlh; struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + if (nlh == NULL) + return -ENOBUFS; + + rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET; rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; + NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; - if (rtm->rtm_dst_len) - RTA_PUT(skb, RTA_DST, 4, dst); rtm->rtm_protocol = fi->fib_protocol; + + if (rtm->rtm_dst_len) + NLA_PUT_BE32(skb, RTA_DST, dst); + if (fi->fib_priority) - RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); + NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); + if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + if (fi->fib_prefsrc) - RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); + NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); + if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw) - RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); + NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); + if (fi->fib_nh->nh_oif) - RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); + NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); #ifdef CONFIG_NET_CLS_ROUTE if (fi->fib_nh[0].nh_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); + NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { - struct rtnexthop *nhp; - struct rtattr *mp_head; - if (skb_tailroom(skb) <= RTA_SPACE(0)) - goto rtattr_failure; - mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); + struct rtnexthop *rtnh; + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (mp == NULL) + goto nla_put_failure; for_nexthops(fi) { - if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) - goto rtattr_failure; - nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); - nhp->rtnh_flags = nh->nh_flags & 0xFF; - nhp->rtnh_hops = nh->nh_weight-1; - nhp->rtnh_ifindex = nh->nh_oif; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (rtnh == NULL) + goto nla_put_failure; + + rtnh->rtnh_flags = nh->nh_flags & 0xFF; + rtnh->rtnh_hops = nh->nh_weight - 1; + rtnh->rtnh_ifindex = nh->nh_oif; + if (nh->nh_gw) - RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); + NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); #ifdef CONFIG_NET_CLS_ROUTE if (nh->nh_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid); + NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); #endif - nhp->rtnh_len = skb->tail - (unsigned char*)nhp; + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; } endfor_nexthops(fi); - mp_head->rta_type = RTA_MULTIPATH; - mp_head->rta_len = skb->tail - (u8*)mp_head; - } -#endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -#ifndef CONFIG_IP_NOSIOCRT - -int -fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - struct kern_rta *rta, struct rtentry *r) -{ - int plen; - u32 *ptr; - - memset(rtm, 0, sizeof(*rtm)); - memset(rta, 0, sizeof(*rta)); - - if (r->rt_dst.sa_family != AF_INET) - return -EAFNOSUPPORT; - - /* Check mask for validity: - a) it must be contiguous. - b) destination must have all host bits clear. - c) if application forgot to set correct family (AF_INET), - reject request unless it is absolutely clear i.e. - both family and mask are zero. - */ - plen = 32; - ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; - if (!(r->rt_flags&RTF_HOST)) { - u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; - if (r->rt_genmask.sa_family != AF_INET) { - if (mask || r->rt_genmask.sa_family) - return -EAFNOSUPPORT; - } - if (bad_mask(mask, *ptr)) - return -EINVAL; - plen = inet_mask_len(mask); - } - - nl->nlmsg_flags = NLM_F_REQUEST; - nl->nlmsg_pid = 0; - nl->nlmsg_seq = 0; - nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); - if (cmd == SIOCDELRT) { - nl->nlmsg_type = RTM_DELROUTE; - nl->nlmsg_flags = 0; - } else { - nl->nlmsg_type = RTM_NEWROUTE; - nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; - rtm->rtm_protocol = RTPROT_BOOT; - } - - rtm->rtm_dst_len = plen; - rta->rta_dst = ptr; - - if (r->rt_metric) { - *(u32*)&r->rt_pad3 = r->rt_metric - 1; - rta->rta_priority = (u32*)&r->rt_pad3; - } - if (r->rt_flags&RTF_REJECT) { - rtm->rtm_scope = RT_SCOPE_HOST; - rtm->rtm_type = RTN_UNREACHABLE; - return 0; - } - rtm->rtm_scope = RT_SCOPE_NOWHERE; - rtm->rtm_type = RTN_UNICAST; - - if (r->rt_dev) { - char *colon; - struct net_device *dev; - char devname[IFNAMSIZ]; - - if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) - return -EFAULT; - devname[IFNAMSIZ-1] = 0; - colon = strchr(devname, ':'); - if (colon) - *colon = 0; - dev = __dev_get_by_name(devname); - if (!dev) - return -ENODEV; - rta->rta_oif = &dev->ifindex; - if (colon) { - struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) - return -ENODEV; - *colon = ':'; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) - if (strcmp(ifa->ifa_label, devname) == 0) - break; - if (ifa == NULL) - return -ENODEV; - rta->rta_prefsrc = &ifa->ifa_local; - } - } - ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; - if (r->rt_gateway.sa_family == AF_INET && *ptr) { - rta->rta_gw = ptr; - if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) - rtm->rtm_scope = RT_SCOPE_UNIVERSE; + nla_nest_end(skb, mp); } +#endif + return nlmsg_end(skb, nlh); - if (cmd == SIOCDELRT) - return 0; - - if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) - return -EINVAL; - - if (rtm->rtm_scope == RT_SCOPE_NOWHERE) - rtm->rtm_scope = RT_SCOPE_LINK; - - if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { - struct rtattr *rec; - struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); - if (mx == NULL) - return -ENOMEM; - rta->rta_mx = mx; - mx->rta_type = RTA_METRICS; - mx->rta_len = RTA_LENGTH(0); - if (r->rt_flags&RTF_MTU) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_ADVMSS; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; - } - if (r->rt_flags&RTF_WINDOW) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_WINDOW; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_window; - } - if (r->rt_flags&RTF_IRTT) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_RTT; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; - } - } - return 0; +nla_put_failure: + return nlmsg_cancel(skb, nlh); } -#endif - /* Update FIB if: - local address disappeared -> we must delete all the entries @@ -1162,7 +1017,7 @@ fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - device went down -> we must shutdown all nexthops going via it. */ -int fib_sync_down(u32 local, struct net_device *dev, int force) +int fib_sync_down(__be32 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 01801c0f885..d17990ec724 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1124,17 +1124,14 @@ err: return fa_head; } -static int -fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *new_fa; struct list_head *fa_head = NULL; struct fib_info *fi; - int plen = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; u32 key, mask; int err; struct leaf *l; @@ -1142,13 +1139,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); - pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); + pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); mask = ntohl(inet_make_mask(plen)); @@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = key & mask; - fi = fib_create_info(r, rta, nlhdr, &err); - - if (!fi) + fi = fib_create_info(cfg); + if (IS_ERR(fi)) { + err = PTR_ERR(fi); goto err; + } l = fib_find_node(t, key); fa = NULL; @@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (nlhdr->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; @@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fi_drop = fa->fa_info; new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; new_fa->fa_state &= ~FA_S_ACCESSED; @@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) { goto out; } } - if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. @@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, (fa ? &fa->fa_list : fa_head)); rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, + &cfg->fc_nlinfo); succeeded: return 0; @@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key) return 1; } -static int -fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; - int plen = r->rtm_dst_len; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; struct leaf_info *li; - if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); mask = ntohl(inet_make_mask(plen)); if (key & ~mask) @@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (fa->fa_tos != tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, nlhdr, rta, fi) == 0) { + if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, return -ESRCH; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, + &cfg->fc_nlinfo); l = fib_find_node(t, key); li = find_leaf_info(l, plen); @@ -1846,9 +1834,9 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi int i, s_i; struct fib_alias *fa; - u32 xkey = htonl(key); + __be32 xkey = htonl(key); - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1866,16 +1854,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi tb->tb_id, fa->fa_type, fa->fa_scope, - &xkey, + xkey, plen, fa->fa_tos, fa->fa_info, 0) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } i++; } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -1886,14 +1874,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str struct list_head *fa_head; struct leaf *l = NULL; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); fa_head = get_fa_head(l, plen); @@ -1904,11 +1892,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str continue; if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -1917,23 +1905,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin int m, s_m; struct trie *t = (struct trie *) tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; rcu_read_lock(); for (m = 0; m <= 32; m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { - cb->args[1] = m; + cb->args[2] = m; goto out; } } rcu_read_unlock(); - cb->args[1] = m; + cb->args[2] = m; return skb->len; out: rcu_read_unlock(); @@ -1943,9 +1931,9 @@ out: /* Fix more generic FIB names for init later */ #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; @@ -2293,7 +2281,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; - t_key prf = ntohl(MASK_PFX(tn->key, tn->pos)); + __be32 prf = htonl(MASK_PFX(tn->key, tn->pos)); if (!NODE_PARENT(n)) { if (iter->trie == trie_local) @@ -2309,7 +2297,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) } else { struct leaf *l = (struct leaf *) n; int i; - u32 val = ntohl(l->key); + __be32 val = htonl(l->key); seq_indent(seq, iter->depth); seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); @@ -2372,7 +2360,7 @@ static struct file_operations fib_trie_fops = { .release = seq_release_private, }; -static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) +static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) { static unsigned type2flags[RTN_MAX + 1] = { [7] = RTF_REJECT, [8] = RTF_REJECT, @@ -2381,7 +2369,7 @@ static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi) if (fi && fi->fib_nh->nh_gw) flags |= RTF_GATEWAY; - if (mask == 0xFFFFFFFF) + if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; flags |= RTF_UP; return flags; @@ -2415,7 +2403,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) for (i=32; i>=0; i--) { struct leaf_info *li = find_leaf_info(l, i); struct fib_alias *fa; - u32 mask, prefix; + __be32 mask, prefix; if (!li) continue; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4c86ac3d882..b39a37a4754 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -104,7 +104,7 @@ struct icmp_bxm { struct { struct icmphdr icmph; - __u32 times[3]; + __be32 times[3]; } data; int head_len; struct ip_options replyopts; @@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = { }; /* Control parameters for ECHO replies. */ -int sysctl_icmp_echo_ignore_all; -int sysctl_icmp_echo_ignore_broadcasts = 1; +int sysctl_icmp_echo_ignore_all __read_mostly; +int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; /* Control parameter - ignore bogus broadcast responses? */ -int sysctl_icmp_ignore_bogus_error_responses = 1; +int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; /* * Configurable global rate limit. @@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses = 1; * time exceeded (11), parameter problem (12) */ -int sysctl_icmp_ratelimit = 1 * HZ; -int sysctl_icmp_ratemask = 0x1818; -int sysctl_icmp_errors_use_inbound_ifaddr; +int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; +int sysctl_icmp_ratemask __read_mostly = 0x1818; +int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; /* * ICMP control array. This specifies what to do with each ICMP. @@ -381,7 +381,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; - u32 daddr; + __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; @@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .saddr = rt->rt_spec_dst, .tos = RT_TOS(skb->nh.iph->tos) } }, .proto = IPPROTO_ICMP }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -429,14 +430,14 @@ out_unlock: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) +void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; struct icmp_bxm icmp_param; struct rtable *rt = (struct rtable *)skb_in->dst; struct ipcm_cookie ipc; - u32 saddr; + __be32 saddr; u8 tos; if (!rt) @@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) } } }; + security_skb_classify_flow(skb_in, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -893,7 +895,7 @@ static void icmp_address_reply(struct sk_buff *skb) if (in_dev->ifa_list && IN_DEV_LOG_MARTIANS(in_dev) && IN_DEV_FORWARD(in_dev)) { - u32 _mask, *mp; + __be32 _mask, *mp; mp = skb_header_pointer(skb, 0, sizeof(_mask), &_mask); BUG_ON(mp == NULL); @@ -928,7 +930,7 @@ int icmp_rcv(struct sk_buff *skb) ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8e8117c19e4..6eee71647b7 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -138,14 +138,14 @@ time_before(jiffies, (in_dev)->mr_v2_seen))) static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr); +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); #endif static void ip_mc_clear_src(struct ip_mc_list *pmc); -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta); +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta); static void ip_ma_put(struct ip_mc_list *im) { @@ -426,7 +426,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, first = 1; psf_prev = NULL; for (psf=*psf_list; psf; psf=psf_next) { - u32 *psrc; + __be32 *psrc; psf_next = psf->sf_next; @@ -439,7 +439,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, if (isquery) psf->sf_gsresp = 0; - if (AVAILABLE(skb) < sizeof(u32) + + if (AVAILABLE(skb) < sizeof(__be32) + first*sizeof(struct igmpv3_grec)) { if (truncate && !first) break; /* truncate these */ @@ -455,7 +455,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, skb = add_grhead(skb, pmc, type, &pgr); first = 0; } - psrc = (u32 *)skb_put(skb, sizeof(u32)); + psrc = (__be32 *)skb_put(skb, sizeof(__be32)); *psrc = psf->sf_inaddr; scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || @@ -630,8 +630,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct igmphdr *ih; struct rtable *rt; struct net_device *dev = in_dev->dev; - u32 group = pmc ? pmc->multiaddr : 0; - u32 dst; + __be32 group = pmc ? pmc->multiaddr : 0; + __be32 dst; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); @@ -748,7 +748,7 @@ static void igmp_timer_expire(unsigned long data) } /* mark EXCLUDE-mode sources */ -static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; @@ -775,7 +775,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) return 1; } -static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) +static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) { struct ip_sf_list *psf; int i, scount; @@ -803,7 +803,7 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __u32 *srcs) return 1; } -static void igmp_heard_report(struct in_device *in_dev, u32 group) +static void igmp_heard_report(struct in_device *in_dev, __be32 group) { struct ip_mc_list *im; @@ -828,7 +828,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, struct igmphdr *ih = skb->h.igmph; struct igmpv3_query *ih3 = (struct igmpv3_query *)ih; struct ip_mc_list *im; - u32 group = ih->group; + __be32 group = ih->group; int max_delay; int mark = 0; @@ -862,7 +862,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, ih3 = (struct igmpv3_query *) skb->h.raw; if (ih3->nsrcs) { if (!pskb_may_pull(skb, sizeof(struct igmpv3_query) - + ntohs(ih3->nsrcs)*sizeof(__u32))) + + ntohs(ih3->nsrcs)*sizeof(__be32))) return; ih3 = (struct igmpv3_query *) skb->h.raw; } @@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb) goto drop; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ @@ -985,7 +985,7 @@ drop: * Add a filter to a device */ -static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; @@ -1005,7 +1005,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, u32 addr) * Remove a filter from a device */ -static void ip_mc_filter_del(struct in_device *in_dev, u32 addr) +static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) { char buf[MAX_ADDR_LEN]; struct net_device *dev = in_dev->dev; @@ -1055,7 +1055,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_unlock_bh(&in_dev->mc_tomb_lock); } -static void igmpv3_del_delrec(struct in_device *in_dev, __u32 multiaddr) +static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) { struct ip_mc_list *pmc, *pmc_prev; struct ip_sf_list *psf, *psf_next; @@ -1193,7 +1193,7 @@ static void igmp_group_added(struct ip_mc_list *im) * A socket has joined a multicast group on device dev. */ -void ip_mc_inc_group(struct in_device *in_dev, u32 addr) +void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *im; @@ -1252,7 +1252,7 @@ out: * A socket has left a multicast group on device dev */ -void ip_mc_dec_group(struct in_device *in_dev, u32 addr) +void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *i, **ip; @@ -1397,12 +1397,12 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) /* * Join a socket to a group */ -int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; -int sysctl_igmp_max_msf = IP_MAX_MSF; +int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; +int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc) + __be32 *psfsrc) { struct ip_sf_list *psf, *psf_prev; int rv = 0; @@ -1450,8 +1450,8 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, #define igmp_ifc_event(x) do { } while (0) #endif -static int ip_mc_del_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int changerec = 0; @@ -1517,7 +1517,7 @@ out_unlock: * Add multicast single-source filter to the interface list */ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, - __u32 *psfsrc, int delta) + __be32 *psfsrc, int delta) { struct ip_sf_list *psf, *psf_prev; @@ -1623,8 +1623,8 @@ static int sf_setstate(struct ip_mc_list *pmc) /* * Add multicast source filter list to the interface list */ -static int ip_mc_add_src(struct in_device *in_dev, __u32 *pmca, int sfmode, - int sfcount, __u32 *psfsrc, int delta) +static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + int sfcount, __be32 *psfsrc, int delta) { struct ip_mc_list *pmc; int isexclude; @@ -1717,7 +1717,7 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) { int err; - u32 addr = imr->imr_multiaddr.s_addr; + __be32 addr = imr->imr_multiaddr.s_addr; struct ip_mc_socklist *iml=NULL, *i; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -1791,7 +1791,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *iml, **imlp; struct in_device *in_dev; - u32 group = imr->imr_multiaddr.s_addr; + __be32 group = imr->imr_multiaddr.s_addr; u32 ifindex; int ret = -EADDRNOTAVAIL; @@ -1829,7 +1829,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct { int err; struct ip_mreqn imr; - u32 addr = mreqs->imr_multiaddr; + __be32 addr = mreqs->imr_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev = NULL; struct inet_sock *inet = inet_sk(sk); @@ -1883,7 +1883,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct rv = !0; for (i=0; i<psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } @@ -1935,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i=0; i<psl->sl_count; i++) { rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, - sizeof(__u32)); + sizeof(__be32)); if (rv == 0) break; } @@ -1960,7 +1960,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) { int err = 0; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -2044,7 +2044,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, { int err, len, count, copycount; struct ip_mreqn imr; - u32 addr = msf->imsf_multiaddr; + __be32 addr = msf->imsf_multiaddr; struct ip_mc_socklist *pmc; struct in_device *in_dev; struct inet_sock *inet = inet_sk(sk); @@ -2103,7 +2103,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, { int err, i, count, copycount; struct sockaddr_in *psin; - u32 addr; + __be32 addr; struct ip_mc_socklist *pmc; struct inet_sock *inet = inet_sk(sk); struct ip_sf_socklist *psl; @@ -2156,7 +2156,7 @@ done: /* * check if a multicast source filter allows delivery for a given <src,dst,intf> */ -int ip_mc_sf_allow(struct sock *sk, u32 loc_addr, u32 rmt_addr, int dif) +int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; @@ -2216,7 +2216,7 @@ void ip_mc_drop_socket(struct sock *sk) rtnl_unlock(); } -int ip_check_mc(struct in_device *in_dev, u32 mc_addr, u32 src_addr, u16 proto) +int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; struct ip_sf_list *psf; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e50a1bfd7cc..96bbe2a0aa1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -39,7 +39,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 }; int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = inet_rcv_saddr(sk); + const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -52,7 +52,7 @@ int inet_csk_bind_conflict(const struct sock *sk, sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; @@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; + security_req_classify_flow(req, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -341,10 +342,10 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_route_req); -static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, +static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd, const u16 synq_hsize) { - return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); + return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -355,8 +356,8 @@ static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, - const __u16 rport, const __u32 raddr, - const __u32 laddr) + const __be16 rport, const __be32 raddr, + const __be32 laddr) { const struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; @@ -509,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + + security_inet_csk_clone(newsk, req); } return newsk; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 492858e6faf..77761ac4f7b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -36,8 +36,8 @@ static const struct inet_diag_handler **inet_diag_table; struct inet_diag_entry { - u32 *saddr; - u32 *daddr; + __be32 *saddr; + __be32 *daddr; u16 sport; u16 dport; u16 family; @@ -294,7 +294,7 @@ out: return err; } -static int bitstring_match(const u32 *a1, const u32 *a2, int bits) +static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits) { int words = bits >> 5; @@ -305,8 +305,8 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits) return 0; } if (bits) { - __u32 w1, w2; - __u32 mask; + __be32 w1, w2; + __be32 mask; w1 = a1[words]; w2 = a2[words]; @@ -352,7 +352,7 @@ static int inet_diag_bc_run(const void *bc, int len, case INET_DIAG_BC_S_COND: case INET_DIAG_BC_D_COND: { struct inet_diag_hostcond *cond; - u32 *addr; + __be32 *addr; cond = (struct inet_diag_hostcond *)(op + 1); if (cond->port != -1 && diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 95fac553299..244c4f445c7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, - const unsigned short hnum, const int dif) +static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, + const __be32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -135,7 +137,7 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad const struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { - const __u32 rcv_saddr = inet->rcv_saddr; + const __be32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; if (rcv_saddr) { @@ -159,6 +161,33 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad return result; } +/* Optimize the common listener case. */ +struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const __be32 daddr, const unsigned short hnum, + const int dif) +{ + struct sock *sk = NULL; + const struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = inet_lookup_listener_slow(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; +} EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* called with local bh disabled */ @@ -168,11 +197,11 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - u32 daddr = inet->rcv_saddr; - u32 saddr = inet->daddr; + __be32 daddr = inet->rcv_saddr; + __be32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct sock *sk2; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 03ff62ebcfe..2b1a54b59c4 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -126,12 +126,9 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!peer_cachep) - panic("cannot create inet_peer_cache"); - /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ @@ -166,7 +163,7 @@ static void unlink_from_unused(struct inet_peer *p) for (u = peer_root; u != peer_avl_empty; ) { \ if (daddr == u->v4daddr) \ break; \ - if (daddr < u->v4daddr) \ + if ((__force __u32)daddr < (__force __u32)u->v4daddr) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ @@ -371,7 +368,7 @@ static int cleanup_once(unsigned long ttl) } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__u32 daddr, int create) +struct inet_peer *inet_getpeer(__be32 daddr, int create) { struct inet_peer *p, *n; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b84b53a4752..74046efdf87 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -54,15 +54,15 @@ * even the most extreme cases without allowing an attacker to measurably * harm machine performance. */ -int sysctl_ipfrag_high_thresh = 256*1024; -int sysctl_ipfrag_low_thresh = 192*1024; +int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; +int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; -int sysctl_ipfrag_max_dist = 64; +int sysctl_ipfrag_max_dist __read_mostly = 64; /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ -int sysctl_ipfrag_time = IP_FRAG_TIME; +int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; struct ipfrag_skb_cb { @@ -77,9 +77,9 @@ struct ipq { struct hlist_node list; struct list_head lru_list; /* lru list member */ u32 user; - u32 saddr; - u32 daddr; - u16 id; + __be32 saddr; + __be32 daddr; + __be16 id; u8 protocol; u8 last_in; #define COMPLETE 4 @@ -123,14 +123,15 @@ static __inline__ void ipq_unlink(struct ipq *ipq) write_unlock(&ipfrag_lock); } -static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot) +static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { - return jhash_3words((u32)id << 16 | prot, saddr, daddr, + return jhash_3words((__force u32)id << 16 | prot, + (__force u32)saddr, (__force u32)daddr, ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); } static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; +int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; static void ipfrag_secret_rebuild(unsigned long dummy) { @@ -387,8 +388,8 @@ out_nomem: static inline struct ipq *ip_find(struct iphdr *iph, u32 user) { __be16 id = iph->id; - __u32 saddr = iph->saddr; - __u32 daddr = iph->daddr; + __be32 saddr = iph->saddr; + __be32 daddr = iph->daddr; __u8 protocol = iph->protocol; unsigned int hash; struct ipq *qp; @@ -665,7 +666,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip_frag_mem); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0f9b3a31997..f5fba051df3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -393,7 +393,8 @@ out: int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - int rel_info = 0; + __be32 rel_info = 0; + __u32 n = 0; u16 flags; int grehlen = (iph->ihl<<2) + 4; struct sk_buff *skb2; @@ -422,14 +423,16 @@ out: default: return; case ICMP_PARAMETERPROB: - if (skb->h.icmph->un.gateway < (iph->ihl<<2)) + n = ntohl(skb->h.icmph->un.gateway) >> 24; + if (n < (iph->ihl<<2)) return; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - rel_info = skb->h.icmph->un.gateway - grehlen; + n -= grehlen; + rel_info = htonl(n << 24); break; case ICMP_DEST_UNREACH: @@ -440,13 +443,14 @@ out: return; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - rel_info = ntohs(skb->h.icmph->un.frag.mtu); - if (rel_info < grehlen+68) + n = ntohs(skb->h.icmph->un.frag.mtu); + if (n < grehlen+68) return; - rel_info -= grehlen; + n -= grehlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (rel_info > ntohs(eiph->tot_len)) + if (n > ntohs(eiph->tot_len)) return; + rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -508,12 +512,11 @@ out: /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb2->dst)) { + if (n > dst_mtu(skb2->dst)) { kfree_skb(skb2); return; } - skb2->dst->ops->update_pmtu(skb2->dst, rel_info); - rel_info = htonl(rel_info); + skb2->dst->ops->update_pmtu(skb2->dst, n); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { @@ -576,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb) if (flags&GRE_CSUM) { switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: csum = (u16)csum_fold(skb->csum); if (!csum) break; @@ -584,7 +587,7 @@ static int ipgre_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_COMPLETE; } offset += 4; } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 406056edc02..8dabbfc3126 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -24,6 +24,7 @@ #include <net/ip.h> #include <net/icmp.h> #include <net/route.h> +#include <net/cipso_ipv4.h> /* * Write options to IP header, record destination address to @@ -37,7 +38,7 @@ */ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, - u32 daddr, struct rtable *rt, int is_frag) + __be32 daddr, struct rtable *rt, int is_frag) { unsigned char * iph = skb->nh.raw; @@ -56,7 +57,7 @@ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, rt); if (opt->ts_needtime) { struct timeval tv; - __u32 midtime; + __be32 midtime; do_gettimeofday(&tv); midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4); @@ -90,7 +91,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) unsigned char *sptr, *dptr; int soffset, doffset; int optlen; - u32 daddr; + __be32 daddr; memset(dopt, 0, sizeof(struct ip_options)); @@ -147,7 +148,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->ts_needtime = 0; if (soffset + 8 <= optlen) { - __u32 addr; + __be32 addr; memcpy(&addr, sptr+soffset-1, 4); if (inet_addr_type(addr) != RTN_LOCAL) { @@ -164,7 +165,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) } if (sopt->srr) { unsigned char * start = sptr+sopt->srr; - u32 faddr; + __be32 faddr; optlen = start[1]; soffset = start[2]; @@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->is_strictroute = sopt->is_strictroute; } } + if (sopt->cipso) { + optlen = sptr[sopt->cipso+1]; + dopt->cipso = dopt->optlen+sizeof(struct iphdr); + memcpy(dptr, sptr+sopt->cipso, optlen); + dptr += optlen; + dopt->optlen += optlen; + } while (dopt->optlen & 3) { *dptr++ = IPOPT_END; dopt->optlen++; @@ -354,7 +362,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) goto error; } if (optptr[2] <= optlen) { - __u32 * timeptr = NULL; + __be32 *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -363,7 +371,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) - timeptr = (__u32*)&optptr[optptr[2]-1]; + timeptr = (__be32*)&optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; @@ -375,7 +383,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) opt->ts = optptr - iph; if (skb) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); - timeptr = (__u32*)&optptr[optptr[2]+3]; + timeptr = (__be32*)&optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; @@ -388,12 +396,12 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) } opt->ts = optptr - iph; { - u32 addr; + __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); if (inet_addr_type(addr) == RTN_UNICAST) break; if (skb) - timeptr = (__u32*)&optptr[optptr[2]+3]; + timeptr = (__be32*)&optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; @@ -407,10 +415,10 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) } if (timeptr) { struct timeval tv; - __u32 midtime; + __be32 midtime; do_gettimeofday(&tv); midtime = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); - memcpy(timeptr, &midtime, sizeof(__u32)); + memcpy(timeptr, &midtime, sizeof(__be32)); opt->is_changed = 1; } } else { @@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; + case IPOPT_CIPSO: + if (opt->cipso) { + pp_ptr = optptr; + goto error; + } + opt->cipso = optptr - iph; + if (cipso_v4_validate(&optptr)) { + pp_ptr = optptr; + goto error; + } + break; case IPOPT_SEC: case IPOPT_SID: default: @@ -506,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp, opt->__data[optlen++] = IPOPT_END; opt->optlen = optlen; opt->is_data = 1; - opt->is_setbyuser = 1; if (optlen && ip_options_compile(opt, NULL)) { kfree(opt); return -EINVAL; @@ -589,7 +607,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); int srrspace, srrptr; - u32 nexthop; + __be32 nexthop; struct iphdr *iph = skb->nh.iph; unsigned char * optptr = skb->nh.raw + opt->srr; struct rtable *rt = (struct rtable*)skb->dst; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a2ede167e04..fc195a44fc2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -83,7 +83,7 @@ #include <linux/netlink.h> #include <linux/tcp.h> -int sysctl_ip_default_ttl = IPDEFTTL; +int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) @@ -118,7 +118,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) * */ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, - u32 saddr, u32 daddr, struct ip_options *opt) + __be32 saddr, __be32 daddr, struct ip_options *opt) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)skb->dst; @@ -306,7 +306,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) /* Make sure we can route this packet. */ rt = (struct rtable *)__sk_dst_check(sk, 0); if (rt == NULL) { - u32 daddr; + __be32 daddr; /* Use correct destination address if we have options. */ daddr = inet->daddr; @@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * keep trying until route appears or the connection times * itself out. */ + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) goto no_route; } @@ -425,7 +426,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs; + unsigned int mtu, hlen, left, len, ll_rs, pad; int offset; __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; @@ -555,14 +556,13 @@ slow_path: left = skb->len - hlen; /* Space per frame */ ptr = raw + hlen; /* Where to start from */ -#ifdef CONFIG_BRIDGE_NETFILTER /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header */ - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); - mtu -= nf_bridge_pad(skb); -#else - ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); -#endif + * we need to make room for the encapsulating header + */ + pad = nf_bridge_pad(skb); + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); + mtu -= pad; + /* * Fragment the datagram. */ @@ -679,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk { struct iovec *iov = from; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { if (memcpy_fromiovecend(to, iov, offset, len) < 0) return -EFAULT; } else { @@ -735,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; sk->sk_sndmsg_off = 0; } @@ -843,7 +843,7 @@ int ip_append_data(struct sock *sk, length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_ALL_CSUM && !exthdrlen) - csummode = CHECKSUM_HW; + csummode = CHECKSUM_PARTIAL; inet->cork.length += length; if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && @@ -1340,7 +1340,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar char data[40]; } replyopts; struct ipcm_cookie ipc; - u32 daddr; + __be32 daddr; struct rtable *rt = (struct rtable*)skb->dst; if (ip_options_echo(&replyopts.opt, skb)) @@ -1366,6 +1366,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { .sport = skb->h.th->dest, .dport = skb->h.th->source } }, .proto = sk->sk_protocol }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) return; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2d05c4133d3..4b132953bcc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -254,7 +254,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s } void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, - u16 port, u32 info, u8 *payload) + __be16 port, u32 info, u8 *payload) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; @@ -283,7 +283,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, kfree_skb(skb); } -void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info) +void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a0c28b2b756..2017d36024d 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -32,7 +32,7 @@ struct ipcomp_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -46,7 +46,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) int err, plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; plen = skb->len; @@ -107,7 +107,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; ihlen = iph->ihl * 4; @@ -176,14 +176,14 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) return 0; out_ok: - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) ip_send_check(iph); return 0; } static void ipcomp4_err(struct sk_buff *skb, u32 info) { - u32 spi; + __be32 spi; struct iphdr *iph = (struct iphdr *)skb->data; struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; @@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; @@ -302,7 +302,7 @@ static void **ipcomp_alloc_scratches(void) return scratches; } -static void ipcomp_free_tfms(struct crypto_tfm **tfms) +static void ipcomp_free_tfms(struct crypto_comp **tfms) { struct ipcomp_tfms *pos; int cpu; @@ -324,28 +324,28 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) { struct ipcomp_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -359,12 +359,13 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; @@ -415,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); mutex_lock(&ipcomp_resource_mutex); @@ -427,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index cb8a92f18ef..1fbb38415b1 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -31,7 +31,6 @@ * -- Josef Siemes <jsiemes@web.de>, Aug 2002 */ -#include <linux/config.h> #include <linux/types.h> #include <linux/string.h> #include <linux/kernel.h> diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 76ab50b0d6e..0c455652922 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -341,7 +341,8 @@ out: int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - int rel_info = 0; + __be32 rel_info = 0; + __u32 n = 0; struct sk_buff *skb2; struct flowi fl; struct rtable *rt; @@ -354,14 +355,15 @@ out: default: return 0; case ICMP_PARAMETERPROB: - if (skb->h.icmph->un.gateway < hlen) + n = ntohl(skb->h.icmph->un.gateway) >> 24; + if (n < hlen) return 0; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - rel_info = skb->h.icmph->un.gateway - hlen; + rel_info = htonl((n - hlen) << 24); break; case ICMP_DEST_UNREACH: @@ -372,13 +374,14 @@ out: return 0; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - rel_info = ntohs(skb->h.icmph->un.frag.mtu); - if (rel_info < hlen+68) + n = ntohs(skb->h.icmph->un.frag.mtu); + if (n < hlen+68) return 0; - rel_info -= hlen; + n -= hlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (rel_info > ntohs(eiph->tot_len)) + if (n > ntohs(eiph->tot_len)) return 0; + rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -440,12 +443,11 @@ out: /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (rel_info > dst_mtu(skb2->dst)) { + if (n > dst_mtu(skb2->dst)) { kfree_skb(skb2); return 0; } - skb2->dst->ops->update_pmtu(skb2->dst, rel_info); - rel_info = htonl(rel_info); + skb2->dst->ops->update_pmtu(skb2->dst, n); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 85893eef6b1..97cfa97c8ab 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -312,7 +312,8 @@ static void ipmr_destroy_unres(struct mfc_cache *c) e = NLMSG_DATA(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -461,7 +462,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return 0; } -static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp) +static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) { int line=MFC_HASH(mcastgrp,origin); struct mfc_cache *c; @@ -512,7 +513,6 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (skb->nh.iph->version == 0) { - int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { @@ -525,7 +525,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } - err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); + + rtnl_unicast(skb, NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } @@ -1096,7 +1097,7 @@ static struct notifier_block ip_mr_notifier={ * important for multicast video. */ -static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr) +static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr)); @@ -1899,11 +1900,8 @@ void __init ip_mr_init(void) { mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), - 0, SLAB_HWCACHE_ALIGN, + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); - if (!mrt_cachep) - panic("cannot allocate ip_mrt_cache"); - init_timer(&ipmr_expire_timer); ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 87b83813cf2..8832eb517d5 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -115,9 +115,9 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port) +static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) { - return jhash_3words(addr, port, proto, ip_vs_conn_rnd) + return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK; } @@ -188,7 +188,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) * d_addr, d_port: pkt dest address (load balancer) */ static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; @@ -215,7 +215,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get } struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { struct ip_vs_conn *cp; @@ -234,7 +234,7 @@ struct ip_vs_conn *ip_vs_conn_in_get /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; @@ -274,7 +274,7 @@ struct ip_vs_conn *ip_vs_ct_in_get * d_addr, d_port: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -324,7 +324,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) /* * Fill a no_client_port connection with a client port number */ -void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __u16 cport) +void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { if (ip_vs_conn_unhash(cp)) { spin_lock(&cp->lock); @@ -508,10 +508,10 @@ int ip_vs_check_template(struct ip_vs_conn *ct) /* * Invalidate the connection template */ - if (ct->vport != 65535) { + if (ct->vport != htons(0xffff)) { if (ip_vs_conn_unhash(ct)) { - ct->dport = 65535; - ct->vport = 65535; + ct->dport = htons(0xffff); + ct->vport = htons(0xffff); ct->cport = 0; ip_vs_conn_hash(ct); } @@ -596,8 +596,8 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport, - __u32 daddr, __u16 dport, unsigned flags, +ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, + __be32 daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest) { struct ip_vs_conn *cp; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3f47ad8e1ca..6dee03935f7 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -209,14 +209,14 @@ int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, const struct sk_buff *skb, - __u16 ports[2]) + __be16 ports[2]) { struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; - __u16 dport; /* destination port to forward */ - __u32 snet; /* source network of the client, after masking */ + __be16 dport; /* destination port to forward */ + __be32 snet; /* source network of the client, after masking */ /* Mask saddr with the netmask to adjust template granularity */ snet = iph->saddr & svc->netmask; @@ -383,7 +383,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, iph->ihl*4, sizeof(_ports), _ports); @@ -446,7 +446,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; struct iphdr *iph = skb->nh.iph; pptr = skb_header_pointer(skb, iph->ihl*4, @@ -576,7 +576,7 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, /* the TCP/UDP port */ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) { - __u16 *ports = (void *)ciph + ciph->ihl*4; + __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) ports[1] = cp->vport; @@ -775,7 +775,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP)) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, ihl, sizeof(_ports), _ports); diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 6a28fafe910..f261616e460 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -283,7 +283,7 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); * Returns hash value for virtual service */ static __inline__ unsigned -ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port) +ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) { register unsigned porth = ntohs(port); @@ -365,7 +365,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) * Get service by {proto,addr,port} in the service table. */ static __inline__ struct ip_vs_service * -__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport) +__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) { unsigned hash; struct ip_vs_service *svc; @@ -410,7 +410,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport) +ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -480,7 +480,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port) +static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) { register unsigned porth = ntohs(port); @@ -531,7 +531,7 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by <proto,addr,port> in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport) +ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) { unsigned hash; struct ip_vs_dest *dest; @@ -562,7 +562,7 @@ ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport) * Lookup destination by {addr,port} in the given service */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) { struct ip_vs_dest *dest; @@ -591,7 +591,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) { struct ip_vs_dest *dest, *nxt; @@ -773,8 +773,8 @@ static int ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; int ret; EnterFunction(2); @@ -879,8 +879,8 @@ static int ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; EnterFunction(2); @@ -991,8 +991,8 @@ static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; EnterFunction(2); diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 9fee19c4c61..502111fba87 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -66,7 +66,7 @@ struct ip_vs_dh_bucket { /* * Returns hash value for IPVS DH entry */ -static inline unsigned ip_vs_dh_hashkey(__u32 addr) +static inline unsigned ip_vs_dh_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK; } @@ -76,7 +76,7 @@ static inline unsigned ip_vs_dh_hashkey(__u32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __u32 addr) +ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr) { return (tbl[ip_vs_dh_hashkey(addr)]).dest; } diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 37fafb1fbcf..e433cb0ff89 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -32,6 +32,7 @@ #include <linux/ip.h> #include <net/protocol.h> #include <net/tcp.h> +#include <asm/unaligned.h> #include <net/ip_vs.h> @@ -44,8 +45,8 @@ * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper * First port is set to the default port. */ -static int ports[IP_VS_APP_MAX_PORTS] = {21, 0}; -module_param_array(ports, int, NULL, 0); +static unsigned short ports[IP_VS_APP_MAX_PORTS] = {21, 0}; +module_param_array(ports, ushort, NULL, 0); MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands"); @@ -74,7 +75,7 @@ ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) */ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, const char *pattern, size_t plen, char term, - __u32 *addr, __u16 *port, + __be32 *addr, __be16 *port, char **start, char **end) { unsigned char p[6]; @@ -114,8 +115,8 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (i != 5) return -1; - *addr = (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0]; - *port = (p[5]<<8) | p[4]; + *addr = get_unaligned((__be32 *)p); + *port = get_unaligned((__be16 *)(p + 4)); return 1; } @@ -140,8 +141,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_limit; char *start, *end; - __u32 from; - __u16 port; + __be32 from; + __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; @@ -199,7 +200,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, from = n_cp->vaddr; port = n_cp->vport; sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), - port&255, (port>>8)&255); + ntohs(port)&255, (ntohs(port)>>8)&255); buf_len = strlen(buf); /* @@ -243,8 +244,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_start, *data_limit; char *start, *end; - __u32 to; - __u16 port; + __be32 to; + __be16 port; struct ip_vs_conn *n_cp; /* no diff required for incoming packets */ @@ -365,12 +366,6 @@ static int __init ip_vs_ftp_init(void) for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { if (!ports[i]) continue; - if (ports[i] < 0 || ports[i] > 0xffff) { - IP_VS_WARNING("ip_vs_ftp: Ignoring invalid " - "configuration port[%d] = %d\n", - i, ports[i]); - continue; - } ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); if (ret) break; diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 6e5cb92a5c8..524751e031d 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -87,7 +87,7 @@ static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; */ struct ip_vs_lblc_entry { struct list_head list; - __u32 addr; /* destination IP address */ + __be32 addr; /* destination IP address */ struct ip_vs_dest *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ }; @@ -160,7 +160,7 @@ static struct ctl_table_header * sysctl_header; * IP address to a server. */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest) +ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) { struct ip_vs_lblc_entry *en; @@ -195,7 +195,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) /* * Returns hash value for IPVS LBLC entry */ -static inline unsigned ip_vs_lblc_hashkey(__u32 addr) +static inline unsigned ip_vs_lblc_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK; } @@ -234,7 +234,7 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) * Get ip_vs_lblc_entry associated with supplied parameters. */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr) +ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) { unsigned hash; struct ip_vs_lblc_entry *en; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 32ba37ba72d..08990192b6e 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -276,7 +276,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) */ struct ip_vs_lblcr_entry { struct list_head list; - __u32 addr; /* destination IP address */ + __be32 addr; /* destination IP address */ struct ip_vs_dest_set set; /* destination server set */ unsigned long lastuse; /* last used time */ }; @@ -348,7 +348,7 @@ static struct ctl_table_header * sysctl_header; * new/free a ip_vs_lblcr_entry, which is a mapping of a destination * IP address to a server. */ -static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr) +static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) { struct ip_vs_lblcr_entry *en; @@ -381,7 +381,7 @@ static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) /* * Returns hash value for IPVS LBLCR entry */ -static inline unsigned ip_vs_lblcr_hashkey(__u32 addr) +static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; } @@ -420,7 +420,7 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) * Get ip_vs_lblcr_entry associated with supplied parameters. */ static inline struct ip_vs_lblcr_entry * -ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __u32 addr) +ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) { unsigned hash; struct ip_vs_lblcr_entry *en; diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 867d4e9c659..c4528b5c800 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -176,7 +176,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, pp->name, NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); else { - __u16 _ports[2], *pptr + __be16 _ports[2], *pptr ; pptr = skb_header_pointer(skb, offset + ih->ihl*4, sizeof(_ports), _ports); diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index bc28b1160a3..bfe779e7459 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -29,7 +29,7 @@ static struct ip_vs_conn * tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -50,7 +50,7 @@ static struct ip_vs_conn * tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -112,12 +112,12 @@ tcp_conn_schedule(struct sk_buff *skb, static inline void -tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip, - u16 oldport, u16 newport) +tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, + __be16 oldport, __be16 newport) { tcph->check = ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ 0xFFFF, + ip_vs_check_diff(oldport ^ htonl(0xFFFF), newport, tcph->check)); } @@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - tcphoff, skb->nh.iph->protocol, skb->csum)) { @@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 89d9175d8f2..54aa7603591 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -29,7 +29,7 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -54,7 +54,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ports), _ports); @@ -117,15 +117,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, static inline void -udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip, - u16 oldport, u16 newport) +udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, + __be16 oldport, __be16 newport) { uhdr->check = ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ 0xFFFF, + ip_vs_check_diff(oldport ^ htonl(0xFFFF), newport, uhdr->check)); if (!uhdr->check) - uhdr->check = 0xFFFF; + uhdr->check = htonl(0xFFFF); } static int @@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -173,7 +173,7 @@ udp_snat_handler(struct sk_buff **pskb, cp->protocol, (*pskb)->csum); if (udph->check == 0) - udph->check = 0xFFFF; + udph->check = htonl(0xFFFF); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); @@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_HW) + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - udphoff, @@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* CHECKSUM_UNNECESSARY */ + /* No need to checksum. */ break; } } diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 7775e6cc68b..338668f88fe 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -63,7 +63,7 @@ struct ip_vs_sh_bucket { /* * Returns hash value for IPVS SH entry */ -static inline unsigned ip_vs_sh_hashkey(__u32 addr) +static inline unsigned ip_vs_sh_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK; } @@ -73,7 +73,7 @@ static inline unsigned ip_vs_sh_hashkey(__u32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __u32 addr) +ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr) { return (tbl[ip_vs_sh_hashkey(addr)]).dest; } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 1bca714bda3..91a075edd68 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -48,16 +48,16 @@ struct ip_vs_sync_conn { /* Protocol, addresses and port numbers */ __u8 protocol; /* Which protocol (TCP/UDP) */ - __u16 cport; - __u16 vport; - __u16 dport; - __u32 caddr; /* client address */ - __u32 vaddr; /* virtual address */ - __u32 daddr; /* destination address */ + __be16 cport; + __be16 vport; + __be16 dport; + __be32 caddr; /* client address */ + __be32 vaddr; /* virtual address */ + __be32 daddr; /* destination address */ /* Flags and state transition */ - __u16 flags; /* status flags */ - __u16 state; /* state info */ + __be16 flags; /* status flags */ + __be16 state; /* state info */ /* The sequence options start here */ }; @@ -464,7 +464,7 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) static int bind_mcastif_addr(struct socket *sock, char *ifname) { struct net_device *dev; - u32 addr; + __be32 addr; struct sockaddr_in sin; if ((dev = __dev_get_by_name(ifname)) == NULL) @@ -836,7 +836,7 @@ static int fork_sync_thread(void *startup) int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) { - DECLARE_COMPLETION(startup); + DECLARE_COMPLETION_ONSTACK(startup); pid_t pid; if ((state == IP_VS_STATE_MASTER && sync_master_pid) || diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 52c12e9edbb..e1f77bd7c9a 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -232,7 +232,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { - __u16 _pt, *p; + __be16 _pt, *p; p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c index d25ec4ae09e..92b04823e03 100644 --- a/net/ipv4/multipath_wrandom.c +++ b/net/ipv4/multipath_wrandom.c @@ -60,8 +60,8 @@ struct multipath_dest { struct list_head list; const struct fib_nh *nh_info; - __u32 netmask; - __u32 network; + __be32 netmask; + __be32 network; unsigned char prefixlen; struct rcu_head rcu; @@ -76,7 +76,7 @@ struct multipath_route { struct list_head list; int oif; - __u32 gw; + __be32 gw; struct list_head dests; struct rcu_head rcu; @@ -128,8 +128,8 @@ static unsigned char __multipath_lookup_weight(const struct flowi *fl, /* find state entry for destination */ list_for_each_entry_rcu(d, &target_route->dests, list) { - __u32 targetnetwork = fl->fl4_dst & - (0xFFFFFFFF >> (32 - d->prefixlen)); + __be32 targetnetwork = fl->fl4_dst & + inet_make_mask(d->prefixlen); if ((targetnetwork & d->netmask) == d->network) { weight = d->nh_info->nh_weight; @@ -217,8 +217,8 @@ static void wrandom_select_route(const struct flowi *flp, *rp = decision; } -static void wrandom_set_nhinfo(__u32 network, - __u32 netmask, +static void wrandom_set_nhinfo(__be32 network, + __be32 netmask, unsigned char prefixlen, const struct fib_nh *nh) { diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6a9e34b794b..5ac15379a0c 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -128,8 +128,8 @@ EXPORT_SYMBOL(ip_nat_decode_session); */ struct ip_rt_info { - u_int32_t daddr; - u_int32_t saddr; + __be32 daddr; + __be32 saddr; u_int8_t tos; }; @@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_HW: + case CHECKSUM_COMPLETE: if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) break; if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ef0b5aac583..a55b8ff70de 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -278,17 +278,6 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_DSCP - tristate "DSCP match support" - depends on IP_NF_IPTABLES - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_AH tristate "AH match support" depends on IP_NF_IPTABLES @@ -568,17 +557,6 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_DSCP - tristate "DSCP target support" - depends on IP_NF_MANGLE - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_TTL tristate 'TTL target support' depends on IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3ded4a3af59..09aaed1a806 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -59,7 +59,6 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o @@ -68,7 +67,6 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 8d1d7a6e72a..17e1a687ab4 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -56,8 +56,6 @@ do { \ #define ARP_NF_ASSERT(x) #endif -#include <linux/netfilter_ipv4/listhelp.h> - static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, char *hdr_addr, int len) { @@ -82,7 +80,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, { char *arpptr = (char *)(arphdr + 1); char *src_devaddr, *tgt_devaddr; - u32 src_ipaddr, tgt_ipaddr; + __be32 src_ipaddr, tgt_ipaddr; int i, ret; #define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg)) @@ -208,8 +206,7 @@ static unsigned int arpt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("arp_tables: error: '%s'\n", (char *)targinfo); @@ -226,8 +223,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata) + struct arpt_table *table) { static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; @@ -302,8 +298,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); /* Target might have changed stuff. */ arp = (*pskb)->nh.arph; @@ -490,12 +485,10 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i if (t->u.kernel.target == &arpt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto out; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -562,8 +555,7 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) t = arpt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a58325c1ceb..d12b1df252a 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -11,7 +11,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { const struct arpt_mangle *mangle = targinfo; struct arphdr *arp; @@ -67,7 +67,7 @@ target(struct sk_buff **pskb, static int checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int targinfosize, unsigned int hook_mask) + void *targinfo, unsigned int hook_mask) { const struct arpt_mangle *mangle = targinfo; diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index d7c472faa53..7edea2a1696 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return arpt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index 0a7bd7f0406..6c7383a8e42 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -155,11 +155,11 @@ static int help(struct sk_buff **pskb, exp->tuple.dst.protonum = IPPROTO_TCP; exp->tuple.dst.u.tcp.port = htons(port); - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); exp->mask.dst.protonum = 0xFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.u.tcp.port = htons(0xFFFF); if (ip_nat_amanda_hook) ret = ip_nat_amanda_hook(pskb, ctinfo, off - dataoff, diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index aa459177c3f..143c4668538 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -47,7 +47,6 @@ #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/listhelp.h> #define IP_CONNTRACK_VERSION "2.4" @@ -64,17 +63,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0); void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); -struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; +struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly; static LIST_HEAD(helpers); -unsigned int ip_conntrack_htable_size = 0; -int ip_conntrack_max; -struct list_head *ip_conntrack_hash; +unsigned int ip_conntrack_htable_size __read_mostly = 0; +int ip_conntrack_max __read_mostly; +struct list_head *ip_conntrack_hash __read_mostly; static kmem_cache_t *ip_conntrack_cachep __read_mostly; static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; -unsigned int ip_ct_log_invalid; +unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); -static int ip_conntrack_vmalloc; +static int ip_conntrack_vmalloc __read_mostly; static unsigned int ip_conntrack_next_id; static unsigned int ip_conntrack_expect_next_id; @@ -150,8 +149,8 @@ static unsigned int ip_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, unsigned int size, unsigned int rnd) { - return (jhash_3words(tuple->src.ip, - (tuple->dst.ip ^ tuple->dst.protonum), + return (jhash_3words((__force u32)tuple->src.ip, + ((__force u32)tuple->dst.ip ^ tuple->dst.protonum), (tuple->src.u.all | (tuple->dst.u.all << 16)), rnd) % size); } @@ -294,15 +293,10 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) static void clean_from_lists(struct ip_conntrack *ct) { - unsigned int ho, hr; - DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&ip_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); /* Destroy all pending expectations */ ip_ct_remove_expectations(ct); @@ -313,6 +307,7 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; + struct ip_conntrack_helper *helper; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -321,6 +316,10 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); + helper = ct->helper; + if (helper && helper->destroy) + helper->destroy(ct); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ @@ -367,16 +366,6 @@ static void death_by_timeout(unsigned long ul_conntrack) ip_conntrack_put(ct); } -static inline int -conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - ASSERT_READ_LOCK(&ip_conntrack_lock); - return tuplehash_to_ctrack(i) != ignored_conntrack - && ip_ct_tuple_equal(tuple, &i->tuple); -} - struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) @@ -386,7 +375,8 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, ASSERT_READ_LOCK(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + if (tuplehash_to_ctrack(h) != ignored_conntrack && + ip_ct_tuple_equal(tuple, &h->tuple)) { CONNTRACK_STAT_INC(found); return h; } @@ -417,10 +407,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, unsigned int repl_hash) { ct->id = ++ip_conntrack_next_id; - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); + list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, + &ip_conntrack_hash[hash]); + list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, + &ip_conntrack_hash[repl_hash]); } void ip_conntrack_hash_insert(struct ip_conntrack *ct) @@ -440,6 +430,7 @@ int __ip_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -470,43 +461,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&ip_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_for_each_entry(h, &ip_conntrack_hash[hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &h->tuple)) + goto out; + list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) + if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, + &h->tuple)) + goto out; - __ip_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - CONNTRACK_STAT_INC(insert); - write_unlock_bh(&ip_conntrack_lock); - if (ct->helper) - ip_conntrack_event_cache(IPCT_HELPER, *pskb); + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + + __ip_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + CONNTRACK_STAT_INC(insert); + write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_IP_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - ip_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; - } + return NF_ACCEPT; +out: CONNTRACK_STAT_INC(insert_failed); write_unlock_bh(&ip_conntrack_lock); - return NF_DROP; } @@ -527,23 +518,21 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct ip_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); -} - static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL; + struct ip_conntrack *ct = NULL, *tmp; int dropped = 0; read_lock_bh(&ip_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) { - ct = tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); + list_for_each_entry_reverse(h, chain, list) { + tmp = tuplehash_to_ctrack(h); + if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { + ct = tmp; + atomic_inc(&ct->ct_general.use); + break; + } } read_unlock_bh(&ip_conntrack_lock); @@ -559,18 +548,16 @@ static int early_drop(struct list_head *chain) return dropped; } -static inline int helper_cmp(const struct ip_conntrack_helper *i, - const struct ip_conntrack_tuple *rtuple) -{ - return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - static struct ip_conntrack_helper * __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { - return LIST_FIND(&helpers, helper_cmp, - struct ip_conntrack_helper *, - tuple); + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) + return h; + } + return NULL; } struct ip_conntrack_helper * @@ -640,11 +627,15 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, ip_conntrack_hash_rnd_initted = 1; } + /* We don't want any race condition at early drop stage */ + atomic_inc(&ip_conntrack_count); + if (ip_conntrack_max - && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + && atomic_read(&ip_conntrack_count) > ip_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { + atomic_dec(&ip_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: table full, dropping" @@ -656,6 +647,7 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); + atomic_dec(&ip_conntrack_count); return ERR_PTR(-ENOMEM); } @@ -669,8 +661,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; - atomic_inc(&ip_conntrack_count); - return conntrack; } @@ -1062,7 +1052,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); - list_prepend(&helpers, me); + list_add(&me->list, &helpers); write_unlock_bh(&ip_conntrack_lock); return 0; @@ -1081,24 +1071,24 @@ __ip_conntrack_helper_find_byname(const char *name) return NULL; } -static inline int unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) +static inline void unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) { if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } - return 0; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) { unsigned int i; + struct ip_conntrack_tuple_hash *h; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); - LIST_DELETE(&helpers, me); + list_del(&me->list); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { @@ -1108,10 +1098,12 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) } } /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); - for (i = 0; i < ip_conntrack_htable_size; i++) - LIST_FIND_W(&ip_conntrack_hash[i], unhelp, - struct ip_conntrack_tuple_hash *, me); + list_for_each_entry(h, &unconfirmed, list) + unhelp(h, me); + for (i = 0; i < ip_conntrack_htable_size; i++) { + list_for_each_entry(h, &ip_conntrack_hash[i], list) + unhelp(h, me); + } write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1177,9 +1169,9 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct, int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16), &tuple->src.u.tcp.port); - NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16), &tuple->dst.u.tcp.port); return 0; @@ -1194,9 +1186,9 @@ int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], return -EINVAL; t->src.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); t->dst.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); return 0; } @@ -1237,46 +1229,43 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nf_conntrack_get(nskb->nfct); } -static inline int -do_iter(const struct ip_conntrack_tuple_hash *i, - int (*iter)(struct ip_conntrack *i, void *data), - void *data) -{ - return iter(tuplehash_to_ctrack(i), data); -} - /* Bring out ya dead! */ -static struct ip_conntrack_tuple_hash * +static struct ip_conntrack * get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket) { - struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - break; + list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; + } + } + list_for_each_entry(h, &unconfirmed, list) { + ct = tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); + return NULL; - return h; +found: + atomic_inc(&ct->ct_general.use); + write_unlock_bh(&ip_conntrack_lock); + return ct; } void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) { - struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; unsigned int bucket = 0; - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct ip_conntrack *ct = tuplehash_to_ctrack(h); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 1d18c863f06..93dcf960662 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -425,8 +425,8 @@ static int help(struct sk_buff **pskb, exp->tuple.src.u.tcp.port = 0; /* Don't care. */ exp->tuple.dst.protonum = IPPROTO_TCP; exp->mask = ((struct ip_conntrack_tuple) - { { 0xFFFFFFFF, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); + { { htonl(0xFFFFFFFF), { 0 } }, + { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }}); exp->expectfn = NULL; exp->flags = 0; @@ -488,7 +488,7 @@ static int __init ip_conntrack_ftp_init(void) for (i = 0; i < ports_c; i++) { ftp[i].tuple.src.u.tcp.port = htons(ports[i]); ftp[i].tuple.dst.protonum = IPPROTO_TCP; - ftp[i].mask.src.u.tcp.port = 0xFFFF; + ftp[i].mask.src.u.tcp.port = htons(0xFFFF); ftp[i].mask.dst.protonum = 0xFF; ftp[i].max_expected = 1; ftp[i].timeout = 5 * 60; /* 5 minutes */ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c index 9a39e296971..7b7441202bf 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c @@ -49,11 +49,11 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " int (*set_h245_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, H245_TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); int (*set_h225_addr_hook) (struct sk_buff ** pskb, unsigned char **data, int dataoff, TransportAddress * addr, - u_int32_t ip, u_int16_t port); + __be32 ip, u_int16_t port); int (*set_sig_addr_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct, enum ip_conntrack_info ctinfo, @@ -209,7 +209,7 @@ static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct, /****************************************************************************/ static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr, - u_int32_t * ip, u_int16_t * port) + __be32 * ip, u_int16_t * port) { unsigned char *p; @@ -232,7 +232,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; u_int16_t rtp_port; struct ip_conntrack_expect *rtp_exp; @@ -254,10 +254,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; rtp_exp->tuple.dst.u.udp.port = htons(rtp_port); rtp_exp->tuple.dst.protonum = IPPROTO_UDP; - rtp_exp->mask.src.ip = 0xFFFFFFFF; + rtp_exp->mask.src.ip = htonl(0xFFFFFFFF); rtp_exp->mask.src.u.udp.port = 0; - rtp_exp->mask.dst.ip = 0xFFFFFFFF; - rtp_exp->mask.dst.u.udp.port = 0xFFFF; + rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF); + rtp_exp->mask.dst.u.udp.port = htons(0xFFFF); rtp_exp->mask.dst.protonum = 0xFF; rtp_exp->flags = 0; @@ -271,10 +271,10 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct, rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1); rtcp_exp->tuple.dst.protonum = IPPROTO_UDP; - rtcp_exp->mask.src.ip = 0xFFFFFFFF; + rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF); rtcp_exp->mask.src.u.udp.port = 0; - rtcp_exp->mask.dst.ip = 0xFFFFFFFF; - rtcp_exp->mask.dst.u.udp.port = 0xFFFF; + rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF); + rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF); rtcp_exp->mask.dst.protonum = 0xFF; rtcp_exp->flags = 0; @@ -325,7 +325,7 @@ static int expect_t120(struct sk_buff **pskb, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -342,10 +342,10 @@ static int expect_t120(struct sk_buff **pskb, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple channels */ @@ -626,7 +626,7 @@ void ip_conntrack_h245_expect(struct ip_conntrack *new, /****************************************************************************/ int get_h225_addr(unsigned char *data, TransportAddress * addr, - u_int32_t * ip, u_int16_t * port) + __be32 * ip, u_int16_t * port) { unsigned char *p; @@ -648,7 +648,7 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -665,10 +665,10 @@ static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; @@ -709,7 +709,7 @@ static int expect_callforwarding(struct sk_buff **pskb, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -751,10 +751,10 @@ static int expect_callforwarding(struct sk_buff **pskb, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; @@ -791,7 +791,7 @@ static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); int ret; int i; - u_int32_t ip; + __be32 ip; u_int16_t port; DEBUGP("ip_ct_q931: Setup\n"); @@ -1188,7 +1188,7 @@ static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen) /****************************************************************************/ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { struct ip_conntrack_expect *exp; struct ip_conntrack_tuple tuple; @@ -1228,7 +1228,7 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, int dir = CTINFO2DIR(ctinfo); int ret = 0; int i; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1251,10 +1251,10 @@ static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = gkrouted_only ? 0xFFFFFFFF : 0; + exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0; exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; /* Accept multiple calls */ @@ -1307,7 +1307,7 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1333,10 +1333,10 @@ static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_UDP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = 0; exp->expectfn = ip_conntrack_ras_expect; @@ -1477,7 +1477,7 @@ static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct, { struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); - u_int32_t ip; + __be32 ip; u_int16_t port; DEBUGP("ip_ct_ras: ARQ\n"); @@ -1513,7 +1513,7 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp; @@ -1538,10 +1538,10 @@ static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; exp->expectfn = ip_conntrack_q931_expect; @@ -1581,7 +1581,7 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int ret = 0; - u_int32_t ip; + __be32 ip; u_int16_t port; struct ip_conntrack_expect *exp = NULL; @@ -1598,10 +1598,10 @@ static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct, exp->tuple.dst.ip = ip; exp->tuple.dst.u.tcp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.tcp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->flags = IP_CT_EXPECT_PERMANENT; exp->expectfn = ip_conntrack_q931_expect; diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index b020a33e65e..a2af5e0c7f9 100644 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -20,11 +20,11 @@ * - We can only support one single call within each session * * TODO: - * - testing of incoming PPTP calls + * - testing of incoming PPTP calls * - * Changes: + * Changes: * 2002-02-05 - Version 1.3 - * - Call ip_conntrack_unexpect_related() from + * - Call ip_conntrack_unexpect_related() from * pptp_destroy_siblings() to destroy expectations in case * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen * (Philip Craig <philipc@snapgear.com>) @@ -80,7 +80,7 @@ int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -int +void (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply); @@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct, invert_tuplepr(&inv_t, &exp->tuple); DEBUGP("trying to unexpect other dir: "); DUMP_TUPLE(&inv_t); - + exp_other = ip_conntrack_expect_find(&inv_t); if (exp_other) { /* delete other expectation. */ @@ -194,15 +194,16 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) { struct ip_conntrack_tuple t; - /* Since ct->sibling_list has literally rusted away in 2.6.11, + ip_ct_gre_keymap_destroy(ct); + /* Since ct->sibling_list has literally rusted away in 2.6.11, * we now need another way to find out about our sibling * contrack and expects... -HW */ /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout original pns->pac ct/exp\n"); @@ -210,8 +211,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id; + t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id; if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout reply pac->pns ct/exp\n"); @@ -219,94 +220,63 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static inline int -exp_gre(struct ip_conntrack *master, - u_int32_t seq, +exp_gre(struct ip_conntrack *ct, __be16 callid, __be16 peer_callid) { - struct ip_conntrack_tuple inv_tuple; - struct ip_conntrack_tuple exp_tuples[] = { - /* tuple in original direction, PNS->PAC */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, - .u = { .gre = { .key = peer_callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, - .u = { .gre = { .key = callid } }, - .protonum = IPPROTO_GRE - }, - }, - /* tuple in reply direction, PAC->PNS */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, - .u = { .gre = { .key = callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, - .u = { .gre = { .key = peer_callid } }, - .protonum = IPPROTO_GRE - }, - } - }; struct ip_conntrack_expect *exp_orig, *exp_reply; int ret = 1; - exp_orig = ip_conntrack_expect_alloc(master); + exp_orig = ip_conntrack_expect_alloc(ct); if (exp_orig == NULL) goto out; - exp_reply = ip_conntrack_expect_alloc(master); + exp_reply = ip_conntrack_expect_alloc(ct); if (exp_reply == NULL) goto out_put_orig; - memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + /* original direction, PNS->PAC */ + exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + exp_orig->tuple.src.u.gre.key = peer_callid; + exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + exp_orig->tuple.dst.u.gre.key = callid; + exp_orig->tuple.dst.protonum = IPPROTO_GRE; - exp_orig->mask.src.ip = 0xffffffff; + exp_orig->mask.src.ip = htonl(0xffffffff); exp_orig->mask.src.u.all = 0; - exp_orig->mask.dst.u.all = 0; exp_orig->mask.dst.u.gre.key = htons(0xffff); - exp_orig->mask.dst.ip = 0xffffffff; + exp_orig->mask.dst.ip = htonl(0xffffffff); exp_orig->mask.dst.protonum = 0xff; - - exp_orig->master = master; + + exp_orig->master = ct; exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; /* both expectations are identical apart from tuple */ memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); - memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); - if (ip_nat_pptp_hook_exp_gre) - ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); - else { - - DEBUGP("calling expect_related PNS->PAC"); - DUMP_TUPLE(&exp_orig->tuple); - - if (ip_conntrack_expect_related(exp_orig) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_put_both; - } + /* reply direction, PAC->PNS */ + exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + exp_reply->tuple.src.u.gre.key = callid; + exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + exp_reply->tuple.dst.u.gre.key = peer_callid; + exp_reply->tuple.dst.protonum = IPPROTO_GRE; - DEBUGP("calling expect_related PAC->PNS"); - DUMP_TUPLE(&exp_reply->tuple); - - if (ip_conntrack_expect_related(exp_reply) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_unexpect_orig; - } - - /* Add GRE keymap entries */ - if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { - DEBUGP("cannot keymap_add() exp\n"); - goto out_unexpect_both; - } - - invert_tuplepr(&inv_tuple, &exp_reply->tuple); - if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { - ip_ct_gre_keymap_destroy(master); - DEBUGP("cannot keymap_add() exp_inv\n"); - goto out_unexpect_both; - } - ret = 0; + if (ip_nat_pptp_hook_exp_gre) + ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + if (ip_conntrack_expect_related(exp_orig) != 0) + goto out_put_both; + if (ip_conntrack_expect_related(exp_reply) != 0) + goto out_unexpect_orig; + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0) + goto out_unexpect_both; + if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(ct); + goto out_unexpect_both; } + ret = 0; out_put_both: ip_conntrack_expect_put(exp_reply); @@ -322,73 +292,36 @@ out_unexpect_orig: goto out_put_both; } -static inline int +static inline int pptp_inbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 *cid, *pcid; - u_int32_t seq; - - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } + __be16 cid = 0, pcid = 0; msg = ntohs(ctlh->messageType); DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); switch (msg) { case PPTP_START_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.srep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms new control session */ - if (info->sstate < PPTP_SESSION_REQUESTED) { - DEBUGP("%s without START_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate < PPTP_SESSION_REQUESTED) + goto invalid; if (pptpReq->srep.resultCode == PPTP_START_OK) info->sstate = PPTP_SESSION_CONFIRMED; - else + else info->sstate = PPTP_SESSION_ERROR; break; case PPTP_STOP_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.strep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms end of control session */ - if (info->sstate > PPTP_SESSION_STOPREQ) { - DEBUGP("%s without STOP_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate > PPTP_SESSION_STOPREQ) + goto invalid; if (pptpReq->strep.resultCode == PPTP_STOP_OK) info->sstate = PPTP_SESSION_NONE; else @@ -396,116 +329,64 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.ocack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server accepted call, we now expect GRE frames */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; if (info->cstate != PPTP_CALL_OUT_REQ && - info->cstate != PPTP_CALL_OUT_CONF) { - DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); - break; - } - if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { + info->cstate != PPTP_CALL_OUT_CONF) + goto invalid; + + cid = pptpReq->ocack.callID; + pcid = pptpReq->ocack.peersCallID; + if (info->pns_call_id != pcid) + goto invalid; + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); + + if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_OUT_CONF; + info->pac_call_id = cid; + exp_gre(ct, cid, pcid); + } else info->cstate = PPTP_CALL_NONE; - break; - } - - cid = &pptpReq->ocack.callID; - pcid = &pptpReq->ocack.peersCallID; - - info->pac_call_id = ntohs(*cid); - - if (htons(info->pns_call_id) != *pcid) { - DEBUGP("%s for unknown callid %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } - - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], - ntohs(*cid), ntohs(*pcid)); - - info->cstate = PPTP_CALL_OUT_CONF; - - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); break; case PPTP_IN_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server tells us about incoming call request */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - pcid = &pptpReq->icack.peersCallID; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + + cid = pptpReq->icreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = ntohs(*pcid); + info->pac_call_id = cid; break; case PPTP_IN_CALL_CONNECT: - if (reqlen < sizeof(_pptpReq.iccon)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server tells us about incoming call established */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - if (info->cstate != PPTP_CALL_IN_REP - && info->cstate != PPTP_CALL_IN_CONF) { - DEBUGP("%s but never sent IN_CALL_REPLY\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; + if (info->cstate != PPTP_CALL_IN_REP && + info->cstate != PPTP_CALL_IN_CONF) + goto invalid; - pcid = &pptpReq->iccon.peersCallID; - cid = &info->pac_call_id; + pcid = pptpReq->iccon.peersCallID; + cid = info->pac_call_id; - if (info->pns_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown CallID %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } + if (info->pns_call_id != pcid) + goto invalid; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); - + exp_gre(ct, cid, pcid); break; case PPTP_CALL_DISCONNECT_NOTIFY: - if (reqlen < sizeof(_pptpReq.disc)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* server confirms disconnect */ - cid = &pptpReq->disc.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + cid = pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ @@ -513,54 +394,39 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_WAN_ERROR_NOTIFY: - break; - case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) - ? pptp_msg_name[msg]:pptp_msg_name[0], msg); - break; + goto invalid; } - if (ip_nat_pptp_hook_inbound) return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, pptpReq); - return NF_ACCEPT; +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); + return NF_ACCEPT; } static inline int pptp_outbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq, + unsigned int reqlen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 *cid, *pcid; - - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) - return NF_ACCEPT; - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) - return NF_ACCEPT; + __be16 cid = 0, pcid = 0; msg = ntohs(ctlh->messageType); DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); @@ -568,10 +434,8 @@ pptp_outbound_pkt(struct sk_buff **pskb, switch (msg) { case PPTP_START_SESSION_REQUEST: /* client requests for new control session */ - if (info->sstate != PPTP_SESSION_NONE) { - DEBUGP("%s but we already have one", - pptp_msg_name[msg]); - } + if (info->sstate != PPTP_SESSION_NONE) + goto invalid; info->sstate = PPTP_SESSION_REQUESTED; break; case PPTP_STOP_SESSION_REQUEST: @@ -580,123 +444,115 @@ pptp_outbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.ocreq)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - /* FIXME: break; */ - } - /* client initiating connection to server */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", - pptp_msg_name[msg]); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ - cid = &pptpReq->ocreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); - info->pns_call_id = ntohs(*cid); + cid = pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + info->pns_call_id = cid; break; case PPTP_IN_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - /* client answers incoming call */ - if (info->cstate != PPTP_CALL_IN_REQ - && info->cstate != PPTP_CALL_IN_REP) { - DEBUGP("%s without incall_req\n", - pptp_msg_name[msg]); - break; - } - if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { + if (info->cstate != PPTP_CALL_IN_REQ && + info->cstate != PPTP_CALL_IN_REP) + goto invalid; + + cid = pptpReq->icack.callID; + pcid = pptpReq->icack.peersCallID; + if (info->pac_call_id != pcid) + goto invalid; + DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], + ntohs(cid), ntohs(pcid)); + + if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = cid; + } else info->cstate = PPTP_CALL_NONE; - break; - } - pcid = &pptpReq->icack.peersCallID; - if (info->pac_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown call %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); - /* part two of the three-way handshake */ - info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = ntohs(pptpReq->icack.callID); break; case PPTP_CALL_CLEAR_REQUEST: /* client requests hangup of call */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("CLEAR_CALL but no session\n"); - break; - } + if (info->sstate != PPTP_SESSION_CONFIRMED) + goto invalid; /* FUTURE: iterate over all calls and check if * call ID is valid. We don't do this without newnat, * because we only know about last call */ info->cstate = PPTP_CALL_CLEAR_REQ; break; case PPTP_SET_LINK_INFO: - break; case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ break; default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0], msg); - /* unknown: no need to create GRE masq table entry */ - break; + goto invalid; } - + if (ip_nat_pptp_hook_outbound) return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, pptpReq); + return NF_ACCEPT; +invalid: + DEBUGP("invalid %s: type=%d cid=%u pcid=%u " + "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], + msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, + ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } +static const unsigned int pptp_msg_size[] = { + [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), + [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), + [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), + [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), + [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), + [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), + [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), + [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), + [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), + [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), + [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), + [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), + [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), +}; /* track caller id inside control connection, call expect_related */ -static int +static int conntrack_pptp_help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct pptp_pkt_hdr _pptph, *pptph; - struct tcphdr _tcph, *tcph; - u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; - u_int32_t datalen; int dir = CTINFO2DIR(ctinfo); struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - unsigned int nexthdr_off; - + struct tcphdr _tcph, *tcph; + struct pptp_pkt_hdr _pptph, *pptph; + struct PptpControlHeader _ctlh, *ctlh; + union pptp_ctrl_union _pptpReq, *pptpReq; + unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + unsigned int datalen, reqlen, nexthdr_off; int oldsstate, oldcstate; int ret; + u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { DEBUGP("ctinfo = %u, skipping\n", ctinfo); return NF_ACCEPT; } - + nexthdr_off = (*pskb)->nh.iph->ihl*4; tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - if (tcph->fin || tcph->rst) { - DEBUGP("RST/FIN received, timeouting GRE\n"); - /* can't do this after real newnat */ - info->cstate = PPTP_CALL_NONE; - - /* untrack this call id, unexpect GRE packets */ - pptp_destroy_siblings(ct); - } - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { DEBUGP("no full PPTP header, can't track\n"); @@ -712,6 +568,23 @@ conntrack_pptp_help(struct sk_buff **pskb, return NF_ACCEPT; } + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + msg = ntohs(ctlh->messageType); + if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) + return NF_ACCEPT; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + oldsstate = info->sstate; oldcstate = info->cstate; @@ -721,11 +594,11 @@ conntrack_pptp_help(struct sk_buff **pskb, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); DEBUGP("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); @@ -735,30 +608,31 @@ conntrack_pptp_help(struct sk_buff **pskb, } /* control protocol helper */ -static struct ip_conntrack_helper pptp = { +static struct ip_conntrack_helper pptp = { .list = { NULL, NULL }, - .name = "pptp", + .name = "pptp", .me = THIS_MODULE, .max_expected = 2, .timeout = 5 * 60, - .tuple = { .src = { .ip = 0, - .u = { .tcp = { .port = - __constant_htons(PPTP_CONTROL_PORT) } } - }, - .dst = { .ip = 0, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, .protonum = IPPROTO_TCP - } + } }, - .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = __constant_htons(0xffff) } } - }, - .dst = { .ip = 0, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, - .protonum = 0xff - } + .protonum = 0xff + } }, - .help = conntrack_pptp_help + .help = conntrack_pptp_help, + .destroy = pptp_destroy_siblings, }; extern void ip_ct_proto_gre_fini(void); @@ -768,7 +642,7 @@ extern int __init ip_ct_proto_gre_init(void); static int __init ip_conntrack_helper_pptp_init(void) { int retcode; - + retcode = ip_ct_proto_gre_init(); if (retcode < 0) return retcode; diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 44889075f3b..75f7c3db161 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -218,7 +218,8 @@ static int help(struct sk_buff **pskb, IPPROTO_TCP }}); exp->mask = ((struct ip_conntrack_tuple) { { 0, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); + { htonl(0xFFFFFFFF), + { .tcp = { htons(0xFFFF) } }, 0xFF }}); exp->expectfn = NULL; exp->flags = 0; if (ip_nat_irc_hook) @@ -266,7 +267,7 @@ static int __init ip_conntrack_irc_init(void) hlpr = &irc_helpers[i]; hlpr->tuple.src.u.tcp.port = htons(ports[i]); hlpr->tuple.dst.protonum = IPPROTO_TCP; - hlpr->mask.src.u.tcp.port = 0xFFFF; + hlpr->mask.src.u.tcp.port = htons(0xFFFF); hlpr->mask.dst.protonum = 0xFF; hlpr->max_expected = max_dcc_channels; hlpr->timeout = dcc_timeout; diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index a566a81325b..a1d6a89f64a 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -21,6 +21,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/inetdevice.h> +#include <linux/if_addr.h> #include <linux/in.h> #include <linux/ip.h> #include <net/route.h> @@ -47,7 +48,7 @@ static int help(struct sk_buff **pskb, struct iphdr *iph = (*pskb)->nh.iph; struct rtable *rt = (struct rtable *)(*pskb)->dst; struct in_device *in_dev; - u_int32_t mask = 0; + __be32 mask = 0; /* we're only interested in locally generated packets */ if ((*pskb)->sk == NULL) @@ -77,12 +78,12 @@ static int help(struct sk_buff **pskb, goto out; exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->tuple.src.u.udp.port = ntohs(NMBD_PORT); + exp->tuple.src.u.udp.port = htons(NMBD_PORT); exp->mask.src.ip = mask; - exp->mask.src.u.udp.port = 0xFFFF; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.src.u.udp.port = htons(0xFFFF); + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.udp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; @@ -114,7 +115,7 @@ static struct ip_conntrack_helper helper = { .src = { .u = { .udp = { - .port = 0xFFFF, + .port = __constant_htons(0xFFFF), } } }, diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 0d4cc92391f..53b6dffea6c 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -78,8 +78,8 @@ ctnetlink_dump_tuples_ip(struct sk_buff *skb, { struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); - NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); - NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); + NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip); + NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip); NFA_NEST_END(skb, nest_parms); @@ -110,7 +110,7 @@ ctnetlink_dump_tuples(struct sk_buff *skb, static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t status = htonl((u_int32_t) ct->status); + __be32 status = htonl((u_int32_t) ct->status); NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); return 0; @@ -122,7 +122,7 @@ static inline int ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) { long timeout_l = ct->timeout.expires - jiffies; - u_int32_t timeout; + __be32 timeout; if (timeout_l < 0) timeout = 0; @@ -192,13 +192,13 @@ ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nfattr *nest_count = NFA_NEST(skb, type); - u_int32_t tmp; + __be32 tmp; tmp = htonl(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); + NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp); tmp = htonl(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); + NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp); NFA_NEST_END(skb, nest_count); @@ -215,9 +215,9 @@ nfattr_failure: static inline int ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t mark = htonl(ct->mark); + __be32 mark = htonl(ct->mark); - NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark); return 0; nfattr_failure: @@ -230,8 +230,8 @@ nfattr_failure: static inline int ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t id = htonl(ct->id); - NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + __be32 id = htonl(ct->id); + NFA_PUT(skb, CTA_ID, sizeof(__be32), &id); return 0; nfattr_failure: @@ -241,9 +241,9 @@ nfattr_failure: static inline int ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) { - u_int32_t use = htonl(atomic_read(&ct->ct_general.use)); + __be32 use = htonl(atomic_read(&ct->ct_general.use)); - NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + NFA_PUT(skb, CTA_USE, sizeof(__be32), &use); return 0; nfattr_failure: @@ -329,11 +329,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* dump everything */ events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | - IPCT_PROTOINFO | - IPCT_HELPER | - IPCT_HELPINFO | - IPCT_NATINFO)) { + } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else @@ -385,6 +381,10 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nfattr_failure; + if (events & IPCT_MARK + && ctnetlink_dump_mark(skb, ct) < 0) + goto nfattr_failure; + nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; @@ -436,6 +436,11 @@ restart: cb->args[1] = (unsigned long)ct; goto out; } +#ifdef CONFIG_NF_CT_ACCT + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) + memset(&ct->counters, 0, sizeof(ct->counters)); +#endif } if (cb->args[1]) { cb->args[1] = 0; @@ -451,49 +456,9 @@ out: return skb->len; } -#ifdef CONFIG_IP_NF_CT_ACCT -static int -ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ip_conntrack *ct = NULL; - struct ip_conntrack_tuple_hash *h; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; - - DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, - cb->args[0], *id); - - write_lock_bh(&ip_conntrack_lock); - for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { - h = (struct ip_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - ct = tuplehash_to_ctrack(h); - if (ct->id <= *id) - continue; - if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) - goto out; - *id = ct->id; - - memset(&ct->counters, 0, sizeof(ct->counters)); - } - } -out: - write_unlock_bh(&ip_conntrack_lock); - - DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - - return skb->len; -} -#endif - static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), - [CTA_IP_V4_DST-1] = sizeof(u_int32_t), + [CTA_IP_V4_SRC-1] = sizeof(__be32), + [CTA_IP_V4_DST-1] = sizeof(__be32), }; static inline int @@ -510,11 +475,11 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) if (!tb[CTA_IP_V4_SRC-1]) return -EINVAL; - tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); if (!tb[CTA_IP_V4_DST-1]) return -EINVAL; - tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]); DEBUGP("leaving\n"); @@ -637,8 +602,8 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, } static const size_t cta_min_nat[CTA_NAT_MAX] = { - [CTA_NAT_MINIP-1] = sizeof(u_int32_t), - [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), + [CTA_NAT_MINIP-1] = sizeof(__be32), + [CTA_NAT_MAXIP-1] = sizeof(__be32), }; static inline int @@ -658,12 +623,12 @@ ctnetlink_parse_nat(struct nfattr *nat, return -EINVAL; if (tb[CTA_NAT_MINIP-1]) - range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]); if (!tb[CTA_NAT_MAXIP-1]) range->max_ip = range->min_ip; else - range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); if (range->min_ip) range->flags |= IP_NAT_RANGE_MAP_IPS; @@ -698,11 +663,11 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) } static const size_t cta_min[CTA_MAX] = { - [CTA_STATUS-1] = sizeof(u_int32_t), - [CTA_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_MARK-1] = sizeof(u_int32_t), - [CTA_USE-1] = sizeof(u_int32_t), - [CTA_ID-1] = sizeof(u_int32_t) + [CTA_STATUS-1] = sizeof(__be32), + [CTA_TIMEOUT-1] = sizeof(__be32), + [CTA_MARK-1] = sizeof(__be32), + [CTA_USE-1] = sizeof(__be32), + [CTA_ID-1] = sizeof(__be32) }; static int @@ -741,7 +706,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = tuplehash_to_ctrack(h); if (cda[CTA_ID-1]) { - u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1])); if (ct->id != id) { ip_conntrack_put(ct); return -ENOENT; @@ -775,22 +740,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (msg->nfgen_family != AF_INET) return -EAFNOSUPPORT; - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { -#ifdef CONFIG_IP_NF_CT_ACCT - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table_w, - ctnetlink_done)) != 0) - return -EINVAL; -#else +#ifndef CONFIG_IP_NF_CT_ACCT + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) return -ENOTSUPP; #endif - } else { - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) return -EINVAL; - } rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) @@ -851,7 +808,7 @@ static inline int ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) { unsigned long d; - unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); + unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1])); d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) @@ -946,7 +903,7 @@ ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) static inline int ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) { - u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); if (!del_timer(&ct->timeout)) return -ETIME; @@ -1009,7 +966,7 @@ ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) #if defined(CONFIG_IP_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif DEBUGP("all done\n"); @@ -1032,7 +989,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], if (!cda[CTA_TIMEOUT-1]) goto err; - ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; ct->status |= IPS_CONFIRMED; @@ -1049,7 +1006,7 @@ ctnetlink_create_conntrack(struct nfattr *cda[], #if defined(CONFIG_IP_NF_CONNTRACK_MARK) if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); + ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1])); #endif ct->helper = ip_conntrack_helper_find_get(rtuple); @@ -1181,8 +1138,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct ip_conntrack_expect *exp) { struct ip_conntrack *master = exp->master; - u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); - u_int32_t id = htonl(exp->id); + __be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ); + __be32 id = htonl(exp->id); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) goto nfattr_failure; @@ -1193,8 +1150,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, CTA_EXPECT_MASTER) < 0) goto nfattr_failure; - NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); - NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout); + NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id); return 0; @@ -1253,6 +1210,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; + if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + return NOTIFY_DONE; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) return NOTIFY_DONE; @@ -1312,8 +1272,8 @@ out: } static const size_t cta_min_exp[CTA_EXPECT_MAX] = { - [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_EXPECT_ID-1] = sizeof(u_int32_t) + [CTA_EXPECT_TIMEOUT-1] = sizeof(__be32), + [CTA_EXPECT_ID-1] = sizeof(__be32) }; static int @@ -1361,7 +1321,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { ip_conntrack_expect_put(exp); return -ENOENT; @@ -1415,8 +1375,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = - *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + __be32 id = + *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]); if (exp->id != ntohl(id)) { ip_conntrack_expect_put(exp); return -ENOENT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c index f891308b5e4..36f2b5e5d80 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -12,7 +12,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -unsigned int ip_ct_generic_timeout = 600*HZ; +unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 4ee016c427b..5fe026f467d 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -1,15 +1,15 @@ /* - * ip_conntrack_proto_gre.c - Version 3.0 + * ip_conntrack_proto_gre.c - Version 3.0 * * Connection tracking protocol helper module for GRE. * * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -37,7 +37,6 @@ static DEFINE_RWLOCK(ip_ct_gre_lock); #define ASSERT_READ_LOCK(x) #define ASSERT_WRITE_LOCK(x) -#include <linux/netfilter_ipv4/listhelp.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> @@ -62,7 +61,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); #define DEBUGP(x, args...) #define DUMP_TUPLE_GRE(x) #endif - + /* GRE KEYMAP HANDLING FUNCTIONS */ static LIST_HEAD(gre_keymap_list); @@ -82,12 +81,14 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) __be16 key = 0; read_lock_bh(&ip_ct_gre_lock); - km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (km) - key = km->tuple.src.u.gre.key; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t)) { + key = km->tuple.src.u.gre.key; + break; + } + } read_unlock_bh(&ip_ct_gre_lock); - + DEBUGP("lookup src key 0x%x up key for ", key); DUMP_TUPLE_GRE(t); @@ -99,28 +100,25 @@ int ip_ct_gre_keymap_add(struct ip_conntrack *ct, struct ip_conntrack_tuple *t, int reply) { - struct ip_ct_gre_keymap **exist_km, *km, *old; + struct ip_ct_gre_keymap **exist_km, *km; if (!ct->helper || strcmp(ct->helper->name, "pptp")) { DEBUGP("refusing to add GRE keymap to non-pptp session\n"); return -1; } - if (!reply) + if (!reply) exist_km = &ct->help.ct_pptp_info.keymap_orig; else exist_km = &ct->help.ct_pptp_info.keymap_reply; if (*exist_km) { /* check whether it's a retransmission */ - old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (old == *exist_km) { - DEBUGP("retransmission\n"); - return 0; + list_for_each_entry(km, &gre_keymap_list, list) { + if (gre_key_cmpfn(km, t) && km == *exist_km) + return 0; } - - DEBUGP("trying to override keymap_%s for ct %p\n", + DEBUGP("trying to override keymap_%s for ct %p\n", reply? "reply":"orig", ct); return -EEXIST; } @@ -136,7 +134,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, DUMP_TUPLE_GRE(&km->tuple); write_lock_bh(&ip_ct_gre_lock); - list_append(&gre_keymap_list, km); + list_add_tail(&km->list, &gre_keymap_list); write_unlock_bh(&ip_ct_gre_lock); return 0; @@ -154,7 +152,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) write_lock_bh(&ip_ct_gre_lock); if (ct->help.ct_pptp_info.keymap_orig) { - DEBUGP("removing %p from list\n", + DEBUGP("removing %p from list\n", ct->help.ct_pptp_info.keymap_orig); list_del(&ct->help.ct_pptp_info.keymap_orig->list); kfree(ct->help.ct_pptp_info.keymap_orig); @@ -222,7 +220,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, static int gre_print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple) { - return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", ntohs(tuple->src.u.gre.key), ntohs(tuple->dst.u.gre.key)); } @@ -252,14 +250,14 @@ static int gre_packet(struct ip_conntrack *ct, } else ip_ct_refresh_acct(ct, conntrackinfo, skb, ct->proto.gre.timeout); - + return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static int gre_new(struct ip_conntrack *ct, const struct sk_buff *skb) -{ +{ DEBUGP(": "); DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); @@ -285,9 +283,9 @@ static void gre_destroy(struct ip_conntrack *ct) } /* protocol helper struct */ -static struct ip_conntrack_protocol gre = { +static struct ip_conntrack_protocol gre = { .proto = IPPROTO_GRE, - .name = "gre", + .name = "gre", .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, @@ -325,7 +323,7 @@ void ip_ct_proto_gre_fini(void) } write_unlock_bh(&ip_ct_gre_lock); - ip_conntrack_protocol_unregister(&gre); + ip_conntrack_protocol_unregister(&gre); } EXPORT_SYMBOL(ip_ct_gre_keymap_add); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 23f1c504586..295b6fa340d 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -21,7 +21,7 @@ #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -unsigned int ip_ct_icmp_timeout = 30*HZ; +unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk @@ -261,7 +261,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, static int icmp_tuple_to_nfattr(struct sk_buff *skb, const struct ip_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16), &t->src.u.icmp.id); NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), &t->dst.u.icmp.type); @@ -287,7 +287,7 @@ static int icmp_nfattr_to_tuple(struct nfattr *tb[], tuple->dst.u.icmp.code = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); tuple->src.u.icmp.id = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); if (tuple->dst.u.icmp.type >= sizeof(invmap) || !invmap[tuple->dst.u.icmp.type]) diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 2d3612cd5f1..2443322e412 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; -static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; -static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; +static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; +static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS; +static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; static const unsigned int * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ @@ -210,7 +210,7 @@ static int sctp_print_conntrack(struct seq_file *s, for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \ offset < skb->len && \ (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ - offset += (htons(sch->length) + 3) & ~3, count++) + offset += (ntohs(sch->length) + 3) & ~3, count++) /* Some validity checks to make sure the chunks are fine */ static int do_basic_checks(struct ip_conntrack *conntrack, diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index fb920e76ec1..06e4e8a6dd9 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ -int ip_ct_tcp_be_liberal = 0; +int ip_ct_tcp_be_liberal __read_mostly = 0; /* When connection is picked up from the middle, how many packets are required to pass in each direction when we assume we are in sync - if any side uses window scaling, we lost the game. If it is set to zero, we disable picking up already established connections. */ -int ip_ct_tcp_loose = 3; +int ip_ct_tcp_loose __read_mostly = 3; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer will be started. */ -int ip_ct_tcp_max_retrans = 3; +int ip_ct_tcp_max_retrans __read_mostly = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned int ip_ct_tcp_timeout_established = 5 DAYS; -unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; -unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; -unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close = 10 SECS; +unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; +unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS; +unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; +unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; +unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; +unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; +unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; static const unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ @@ -519,8 +519,8 @@ static void tcp_sack(const struct sk_buff *skb, /* Fast path for timestamp-only option */ if (length == TCPOLEN_TSTAMP_ALIGNED*4 - && *(__u32 *)ptr == - __constant_ntohl((TCPOPT_NOP << 24) + && *(__be32 *)ptr == + __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) @@ -551,7 +551,7 @@ static void tcp_sack(const struct sk_buff *skb, for (i = 0; i < (opsize - TCPOLEN_SACK_BASE); i += TCPOLEN_SACK_PERBLOCK) { - tmp = ntohl(*((u_int32_t *)(ptr+i)+1)); + tmp = ntohl(*((__be32 *)(ptr+i)+1)); if (after(tmp, *sack)) *sack = tmp; @@ -731,13 +731,15 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end) + && state->last_end == end + && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; + state->last_win = win; state->retrans = 0; } } @@ -865,8 +867,7 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because it is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 9b2c16b4d2f..d0e8a16970e 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -18,8 +18,8 @@ #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -unsigned int ip_ct_udp_timeout = 30*HZ; -unsigned int ip_ct_udp_timeout_stream = 180*HZ; +unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ; +unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -117,8 +117,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. + * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index 4f222d6be00..f4f75995a9e 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -8,7 +8,6 @@ * published by the Free Software Foundation. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/skbuff.h> @@ -194,7 +193,7 @@ static int skp_digits_len(const char *dptr, const char *limit, int *shift) /* Simple ipaddr parser.. */ static int parse_ipaddr(const char *cp, const char **endp, - u_int32_t *ipaddr, const char *limit) + __be32 *ipaddr, const char *limit) { unsigned long int val; int i, digit = 0; @@ -228,7 +227,7 @@ static int parse_ipaddr(const char *cp, const char **endp, static int epaddr_len(const char *dptr, const char *limit, int *shift) { const char *aux = dptr; - u_int32_t ip; + __be32 ip; if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) { DEBUGP("ip: %s parse failed.!\n", dptr); @@ -303,7 +302,7 @@ int ct_sip_get_info(const char *dptr, size_t dlen, static int set_expected_rtp(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, - u_int32_t ipaddr, u_int16_t port, + __be32 ipaddr, u_int16_t port, const char *dptr) { struct ip_conntrack_expect *exp; @@ -320,10 +319,10 @@ static int set_expected_rtp(struct sk_buff **pskb, exp->tuple.dst.u.udp.port = htons(port); exp->tuple.dst.protonum = IPPROTO_UDP; - exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.ip = htonl(0xFFFFFFFF); exp->mask.src.u.udp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.udp.port = 0xFFFF; + exp->mask.dst.ip = htonl(0xFFFFFFFF); + exp->mask.dst.u.udp.port = htons(0xFFFF); exp->mask.dst.protonum = 0xFF; exp->expectfn = NULL; @@ -350,7 +349,7 @@ static int sip_help(struct sk_buff **pskb, const char *dptr; int ret = NF_ACCEPT; int matchoff, matchlen; - u_int32_t ipaddr; + __be32 ipaddr; u_int16_t port; /* No Data ? */ @@ -440,7 +439,7 @@ static int __init init(void) sip[i].tuple.dst.protonum = IPPROTO_UDP; sip[i].tuple.src.u.udp.port = htons(ports[i]); - sip[i].mask.src.u.udp.port = 0xFFFF; + sip[i].mask.src.u.udp.port = htons(0xFFFF); sip[i].mask.dst.protonum = 0xFF; sip[i].max_expected = 2; sip[i].timeout = 3 * 60; /* 3 minutes */ diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 7a9fa04a467..02135756562 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -35,7 +35,6 @@ #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -534,7 +533,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = { /* Sysctl support */ -int ip_conntrack_checksum = 1; +int ip_conntrack_checksum __read_mostly = 1; #ifdef CONFIG_SYSCTL @@ -563,7 +562,7 @@ extern unsigned int ip_ct_udp_timeout_stream; /* From ip_conntrack_proto_icmp.c */ extern unsigned int ip_ct_icmp_timeout; -/* From ip_conntrack_proto_icmp.c */ +/* From ip_conntrack_proto_generic.c */ extern unsigned int ip_ct_generic_timeout; /* Log invalid packets of a given protocol */ diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index 7e33d3bed5e..fe0b634dd37 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -70,10 +70,10 @@ static int tftp_help(struct sk_buff **pskb, return NF_DROP; exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->mask.src.ip = 0xffffffff; + exp->mask.src.ip = htonl(0xffffffff); exp->mask.src.u.udp.port = 0; - exp->mask.dst.ip = 0xffffffff; - exp->mask.dst.u.udp.port = 0xffff; + exp->mask.dst.ip = htonl(0xffffffff); + exp->mask.dst.u.udp.port = htons(0xffff); exp->mask.dst.protonum = 0xff; exp->expectfn = NULL; exp->flags = 0; @@ -129,7 +129,7 @@ static int __init ip_conntrack_tftp_init(void) tftp[i].tuple.dst.protonum = IPPROTO_UDP; tftp[i].tuple.src.u.udp.port = htons(ports[i]); tftp[i].mask.dst.protonum = 0xFF; - tftp[i].mask.src.u.udp.port = 0xFFFF; + tftp[i].mask.src.u.udp.port = htons(0xFFFF); tftp[i].max_expected = 1; tftp[i].timeout = 5 * 60; /* 5 minutes */ tftp[i].me = THIS_MODULE; diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1741d555ad0..4b6260a9740 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -22,9 +22,6 @@ #include <linux/udp.h> #include <linux/jhash.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> #include <linux/netfilter_ipv4/ip_conntrack_protocol.h> @@ -33,7 +30,6 @@ #include <linux/netfilter_ipv4/ip_nat_core.h> #include <linux/netfilter_ipv4/ip_nat_helper.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -86,7 +82,7 @@ static inline unsigned int hash_by_src(const struct ip_conntrack_tuple *tuple) { /* Original src, to ensure we map it consistently if poss. */ - return jhash_3words(tuple->src.ip, tuple->src.u.all, + return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all, tuple->dst.protonum, 0) % ip_nat_htable_size; } @@ -101,18 +97,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) write_unlock_bh(&ip_nat_lock); } -/* We do checksum mangling, so if they were wrong before they're still - * wrong. Also works for incomplete packets (eg. ICMP dest - * unreachables.) */ -u_int16_t -ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} -EXPORT_SYMBOL(ip_nat_cheat_check); - /* Is this tuple already taken? (not by us) */ int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, @@ -206,7 +190,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - u_int32_t *var_ipp; + __be32 *var_ipp; /* Host order */ u_int32_t minip, maxip, j; @@ -233,7 +217,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, * like this), even across reboots. */ minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); - j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0); + j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0); *var_ipp = htonl(minip + j % (maxip - minip + 1)); } @@ -378,12 +362,12 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, - iph->check); + iph->check = nf_csum_update(~iph->saddr, target->src.ip, + iph->check); iph->saddr = target->src.ip; } else { - iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, - iph->check); + iph->check = nf_csum_update(~iph->daddr, target->dst.ip, + iph->check); iph->daddr = target->dst.ip; } return 1; @@ -423,10 +407,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct, EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) +int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { struct { struct icmphdr icmp; @@ -434,7 +418,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, } *inside; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; + enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; @@ -443,12 +429,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { - hdrlen = (*pskb)->nh.iph->ihl * 4; - if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0))) - return 0; - } + if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) + return 0; /* Must be RELATED */ IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || @@ -487,12 +469,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb, !manip)) return 0; - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - inside->icmp.checksum = 0; - inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, - 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt inner. */ + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + inside->icmp.checksum = 0; + inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, + 0)); + } /* Change outer to look the reply to an incoming packet * (proto 0 means don't invert per-proto part). */ @@ -550,9 +534,9 @@ int ip_nat_port_range_to_nfattr(struct sk_buff *skb, const struct ip_nat_range *range) { - NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16), &range->min.tcp.port); - NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16), &range->max.tcp.port); return 0; @@ -571,7 +555,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) if (tb[CTA_PROTONAT_PORT_MIN-1]) { ret = 1; range->min.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); } if (!tb[CTA_PROTONAT_PORT_MAX-1]) { @@ -580,7 +564,7 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) } else { ret = 1; range->max.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + *(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); } return ret; diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index 3328fc5c5f5..a71c233d811 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -34,7 +34,7 @@ MODULE_DESCRIPTION("ftp NAT helper"); static int mangle_rfc959_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -57,7 +57,7 @@ mangle_rfc959_packet(struct sk_buff **pskb, /* |1|132.235.1.2|6275| */ static int mangle_eprt_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -79,7 +79,7 @@ mangle_eprt_packet(struct sk_buff **pskb, /* |1|132.235.1.2|6275| */ static int mangle_epsv_packet(struct sk_buff **pskb, - u_int32_t newip, + __be32 newip, u_int16_t port, unsigned int matchoff, unsigned int matchlen, @@ -98,7 +98,7 @@ mangle_epsv_packet(struct sk_buff **pskb, matchlen, buffer, strlen(buffer)); } -static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t, +static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, unsigned int, unsigned int, struct ip_conntrack *, @@ -120,7 +120,7 @@ static unsigned int ip_nat_ftp(struct sk_buff **pskb, struct ip_conntrack_expect *exp, u32 *seq) { - u_int32_t newip; + __be32 newip; u_int16_t port; int dir = CTINFO2DIR(ctinfo); struct ip_conntrack *ct = exp->master; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index cbcaa45370a..3bf85848055 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -27,16 +27,12 @@ #include <net/tcp.h> #include <net/udp.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_conntrack.h> #include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat_protocol.h> #include <linux/netfilter_ipv4/ip_nat_core.h> #include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -165,7 +161,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct tcphdr *tcph; - int datalen; + int oldlen, datalen; if (!skb_make_writable(pskb, (*pskb)->len)) return 0; @@ -180,13 +176,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; tcph = (void *)iph + iph->ihl*4; + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); datalen = (*pskb)->len - iph->ihl*4; - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, datalen, 0)); + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, datalen, + iph->saddr, iph->daddr, + csum_partial((char *)tcph, + datalen, 0)); + } else + tcph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ htons(0xFFFF), + htons(datalen), + tcph->check, 1); if (rep_len != match_len) { set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); @@ -221,6 +226,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct udphdr *udph; + int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ iph = (*pskb)->nh.iph; @@ -238,22 +244,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; udph = (void *)iph + iph->ihl*4; + + oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - udph->len = htons((*pskb)->len - iph->ihl*4); + datalen = (*pskb)->len - iph->ihl*4; + udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (udph->check) { - int datalen = (*pskb)->len - iph->ihl * 4; + if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) + return 1; + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, csum_partial((char *)udph, datalen, 0)); - } - + if (!udph->check) + udph->check = -1; + } else + udph->check = nf_proto_csum_update(*pskb, + htons(oldlen) ^ htons(0xFFFF), + htons(datalen), + udph->check, 1); return 1; } EXPORT_SYMBOL(ip_nat_mangle_udp_packet); @@ -267,37 +283,38 @@ sack_adjust(struct sk_buff *skb, struct ip_nat_seq *natseq) { while (sackoff < sackend) { - struct tcp_sack_block *sack; - u_int32_t new_start_seq, new_end_seq; + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; sack = (void *)skb->data + sackoff; if (after(ntohl(sack->start_seq) - natseq->offset_before, natseq->correction_pos)) - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_after; + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_after); else - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_before; - new_start_seq = htonl(new_start_seq); + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_before); if (after(ntohl(sack->end_seq) - natseq->offset_before, natseq->correction_pos)) - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_after; + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_after); else - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_before; - new_end_seq = htonl(new_end_seq); + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_before); DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", ntohl(sack->start_seq), new_start_seq, ntohl(sack->end_seq), new_end_seq); - tcph->check = - ip_nat_cheat_check(~sack->start_seq, new_start_seq, - ip_nat_cheat_check(~sack->end_seq, - new_end_seq, - tcph->check)); + tcph->check = nf_proto_csum_update(skb, + ~sack->start_seq, + new_start_seq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(skb, + ~sack->end_seq, + new_end_seq, + tcph->check, 0); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -356,7 +373,8 @@ ip_nat_seq_adjust(struct sk_buff **pskb, enum ip_conntrack_info ctinfo) { struct tcphdr *tcph; - int dir, newseq, newack; + int dir; + __be32 newseq, newack; struct ip_nat_seq *this_way, *other_way; dir = CTINFO2DIR(ctinfo); @@ -369,22 +387,20 @@ ip_nat_seq_adjust(struct sk_buff **pskb, tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = ntohl(tcph->seq) + this_way->offset_after; + newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else - newseq = ntohl(tcph->seq) + this_way->offset_before; - newseq = htonl(newseq); + newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = ntohl(tcph->ack_seq) - other_way->offset_after; + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); else - newack = ntohl(tcph->ack_seq) - other_way->offset_before; - newack = htonl(newack); + newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, - ip_nat_cheat_check(~tcph->ack_seq, - newack, - tcph->check)); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, + tcph->check, 0); + tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack, + tcph->check, 0); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c index 419b878fb46..4a7d34466ee 100644 --- a/net/ipv4/netfilter/ip_nat_helper_h323.c +++ b/net/ipv4/netfilter/ip_nat_helper_h323.c @@ -32,13 +32,13 @@ /****************************************************************************/ static int set_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, - unsigned int addroff, u_int32_t ip, u_int16_t port) + unsigned int addroff, __be32 ip, u_int16_t port) { enum ip_conntrack_info ctinfo; struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo); struct { - u_int32_t ip; - u_int16_t port; + __be32 ip; + __be16 port; } __attribute__ ((__packed__)) buf; struct tcphdr _tcph, *th; @@ -86,7 +86,7 @@ static int set_addr(struct sk_buff **pskb, static int set_h225_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, TransportAddress * addr, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port); } @@ -95,7 +95,7 @@ static int set_h225_addr(struct sk_buff **pskb, static int set_h245_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, H245_TransportAddress * addr, - u_int32_t ip, u_int16_t port) + __be32 ip, u_int16_t port) { return set_addr(pskb, data, dataoff, addr->unicastAddress.iPAddress.network, ip, port); @@ -110,7 +110,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct, struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); int i; - u_int32_t ip; + __be32 ip; u_int16_t port; for (i = 0; i < count; i++) { @@ -164,7 +164,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct, { int dir = CTINFO2DIR(ctinfo); int i; - u_int32_t ip; + __be32 ip; u_int16_t port; for (i = 0; i < count; i++) { @@ -433,7 +433,7 @@ static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct, struct ip_ct_h323_master *info = &ct->help.ct_h323_info; int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = port; - u_int32_t ip; + __be32 ip; /* Set expectations for NAT */ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c index 1d149964dc3..329fdcd7d70 100644 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -32,7 +32,7 @@ * 2005-06-10 - Version 3.0 * - kernel >= 2.6.11 version, * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) - * + * */ #include <linux/module.h> @@ -51,7 +51,7 @@ #define IP_NAT_PPTP_VERSION "3.0" -#define REQ_CID(req, off) (*(u_int16_t *)((char *)(req) + (off))) +#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); @@ -85,19 +85,17 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id; t.dst.protonum = IPPROTO_GRE; } else { DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = - htons(master->nat.help.nat_pptp_info.pns_call_id); + t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id; t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = - htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id; t.dst.protonum = IPPROTO_GRE; } @@ -149,51 +147,52 @@ pptp_outbound_pkt(struct sk_buff **pskb, { struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_callid; + u_int16_t msg; + __be16 new_callid; unsigned int cid_off; - new_callid = htons(ct_pptp_info->pns_call_id); - + new_callid = ct_pptp_info->pns_call_id; + switch (msg = ntohs(ctlh->messageType)) { - case PPTP_OUT_CALL_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); - /* FIXME: ideally we would want to reserve a call ID - * here. current netfilter NAT core is not able to do - * this :( For now we use TCP source port. This breaks - * multiple calls within one control session */ - - /* save original call ID in nat_info */ - nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; - - /* don't use tcph->source since we are at a DSTmanip - * hook (e.g. PREROUTING) and pkt is not mangled yet */ - new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; - - /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = ntohs(new_callid); - break; - case PPTP_IN_CALL_REPLY: - cid_off = offsetof(union pptp_ctrl_union, icreq.callID); - break; - case PPTP_CALL_CLEAR_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); - break; - default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); - /* fall through */ - - case PPTP_SET_LINK_INFO: - /* only need to NAT in case PAC is behind NAT box */ - case PPTP_START_SESSION_REQUEST: - case PPTP_START_SESSION_REPLY: - case PPTP_STOP_SESSION_REQUEST: - case PPTP_STOP_SESSION_REPLY: - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* no need to alter packet */ - return NF_ACCEPT; + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = new_callid; + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icack.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; } /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass @@ -212,80 +211,28 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; } -static int +static void pptp_exp_gre(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply) { - struct ip_ct_pptp_master *ct_pptp_info = - &expect_orig->master->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = - &expect_orig->master->nat.help.nat_pptp_info; - struct ip_conntrack *ct = expect_orig->master; - - struct ip_conntrack_tuple inv_t; - struct ip_conntrack_tuple *orig_t, *reply_t; + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; - /* alter expectation */ - orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - /* alter expectation for PNS->PAC direction */ - invert_tuplepr(&inv_t, &expect_orig->tuple); - expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); - expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; expect_orig->dir = IP_CT_DIR_ORIGINAL; - inv_t.src.ip = reply_t->src.ip; - inv_t.dst.ip = reply_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); - - if (!ip_conntrack_expect_related(expect_orig)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_orig)\n"); - return 1; - } /* alter expectation for PAC->PNS direction */ - invert_tuplepr(&inv_t, &expect_reply->tuple); - expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); - expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; expect_reply->dir = IP_CT_DIR_REPLY; - inv_t.src.ip = orig_t->src.ip; - inv_t.dst.ip = orig_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); - - if (!ip_conntrack_expect_related(expect_reply)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_reply)\n"); - ip_conntrack_unexpect_related(expect_orig); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { - DEBUGP("can't register original keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { - DEBUGP("can't register reply keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - ip_ct_gre_keymap_destroy(ct); - return 1; - } - - return 0; } /* inbound packets == from PAC to PNS */ @@ -297,15 +244,15 @@ pptp_inbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union *pptpReq) { struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_cid = 0, new_pcid; - unsigned int pcid_off, cid_off = 0; + u_int16_t msg; + __be16 new_pcid; + unsigned int pcid_off; - new_pcid = htons(nat_pptp_info->pns_call_id); + new_pcid = nat_pptp_info->pns_call_id; switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REPLY: pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); - cid_off = offsetof(union pptp_ctrl_union, ocack.callID); break; case PPTP_IN_CALL_CONNECT: pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); @@ -324,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? pptp_msg_name[msg]:pptp_msg_name[0]); /* fall through */ @@ -351,17 +298,6 @@ pptp_inbound_pkt(struct sk_buff **pskb, sizeof(new_pcid), (char *)&new_pcid, sizeof(new_pcid)) == 0) return NF_DROP; - - if (new_cid) { - DEBUGP("altering call id from 0x%04x to 0x%04x\n", - ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid)); - if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, - cid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_cid), (char *)&new_cid, - sizeof(new_cid)) == 0) - return NF_DROP; - } return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c index 38acfdf540e..bf91f9312b3 100644 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -6,10 +6,10 @@ * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -60,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple, } /* generate unique tuple ... */ -static int +static int gre_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range, enum ip_nat_manip_type maniptype, const struct ip_conntrack *conntrack) { static u_int16_t key; - u_int16_t *keyptr; + __be16 *keyptr; unsigned int min, i, range_size; if (maniptype == IP_NAT_MANIP_SRC) @@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.gre.key) - min + 1; } - DEBUGP("min = %u, range_size = %u\n", min, range_size); + DEBUGP("min = %u, range_size = %u\n", min, range_size); for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); @@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb, greh = (void *)(*pskb)->data + hdroff; pgreh = (struct gre_hdr_pptp *) greh; - /* we only have destination manip of a packet, since 'source key' + /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype == IP_NAT_MANIP_DST) { /* key manipulation is always dest */ @@ -129,15 +129,16 @@ gre_manip_pkt(struct sk_buff **pskb, } if (greh->csum) { /* FIXME: Never tested this code... */ - *(gre_csum(greh)) = - ip_nat_cheat_check(~*(gre_key(greh)), + *(gre_csum(greh)) = + nf_proto_csum_update(*pskb, + ~*(gre_key(greh)), tuple->dst.u.gre.key, - *(gre_csum(greh))); + *(gre_csum(greh)), 0); } *(gre_key(greh)) = tuple->dst.u.gre.key; break; case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", + DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; @@ -151,8 +152,8 @@ gre_manip_pkt(struct sk_buff **pskb, } /* nat helper struct */ -static struct ip_nat_protocol gre = { - .name = "GRE", +static struct ip_nat_protocol gre = { + .name = "GRE", .protonum = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, .in_range = gre_in_range, @@ -163,7 +164,7 @@ static struct ip_nat_protocol gre = { .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; - + int __init ip_nat_proto_gre_init(void) { return ip_nat_protocol_register(&gre); diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 31a3f4ccb99..3f6efc13ac7 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb, return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); - - hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, - tuple->src.u.icmp.id, - hdr->checksum); + hdr->checksum = nf_proto_csum_update(*pskb, + hdr->un.echo.id ^ htons(0xFFFF), + tuple->src.u.icmp.id, + hdr->checksum, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a3d14079eba..12deb13b93b 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -24,7 +24,7 @@ tcp_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - u_int16_t port; + __be16 port; if (maniptype == IP_NAT_MANIP_SRC) port = tuple->src.u.tcp.port; @@ -42,7 +42,7 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t port; - u_int16_t *portptr; + __be16 *portptr; unsigned int range_size, min, i; if (maniptype == IP_NAT_MANIP_SRC) @@ -93,8 +93,8 @@ tcp_manip_pkt(struct sk_buff **pskb, struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport, oldport; + __be32 oldip, newip; + __be16 *portptr, newport, oldport; int hdrsize = 8; /* TCP connection tracking guarantees this much */ /* this could be a inner header returned in icmp packet; in such @@ -129,10 +129,9 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, oldport ^ htons(0xFFFF), newport, + hdr->check, 0); return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index ec6053fdc86..4bbec7730d1 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -24,7 +24,7 @@ udp_in_range(const struct ip_conntrack_tuple *tuple, const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max) { - u_int16_t port; + __be16 port; if (maniptype == IP_NAT_MANIP_SRC) port = tuple->src.u.udp.port; @@ -42,7 +42,7 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack *conntrack) { static u_int16_t port; - u_int16_t *portptr; + __be16 *portptr; unsigned int range_size, min, i; if (maniptype == IP_NAT_MANIP_SRC) @@ -91,8 +91,8 @@ udp_manip_pkt(struct sk_buff **pskb, struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport; + __be32 oldip, newip; + __be16 *portptr, newport; if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; @@ -113,11 +113,16 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check) /* 0 is a special case meaning no checksum */ - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); + + if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { + hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, + hdr->check, 1); + hdr->check = nf_proto_csum_update(*pskb, + *portptr ^ htons(0xFFFF), newport, + hdr->check, 0); + if (!hdr->check) + hdr->check = -1; + } *portptr = newport; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 1aba926c1cb..a176aa3031e 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -19,14 +19,10 @@ #include <net/route.h> #include <linux/bitops.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat_core.h> #include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -104,8 +100,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -124,7 +119,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, } /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(u32 dstip, u32 srcip) +static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; @@ -147,8 +142,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -174,7 +168,6 @@ static int ipt_snat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -191,7 +184,6 @@ static int ipt_dnat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -213,7 +205,7 @@ alloc_null_binding(struct ip_conntrack *conntrack, per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). Use reply in case it's already been mangled (eg local packet). */ - u_int32_t ip + __be32 ip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); @@ -230,7 +222,7 @@ alloc_null_binding_confirmed(struct ip_conntrack *conntrack, struct ip_nat_info *info, unsigned int hooknum) { - u_int32_t ip + __be32 ip = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); @@ -255,7 +247,7 @@ int ip_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c index 6ffba63adca..71fc2730a00 100644 --- a/net/ipv4/netfilter/ip_nat_sip.c +++ b/net/ipv4/netfilter/ip_nat_sip.c @@ -60,8 +60,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb, enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; unsigned int bufflen, dataoff; - u_int32_t ip; - u_int16_t port; + __be32 ip; + __be16 port; dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); @@ -159,7 +159,7 @@ static int mangle_content_len(struct sk_buff **pskb, static unsigned int mangle_sdp(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct ip_conntrack *ct, - u_int32_t newip, u_int16_t port, + __be32 newip, u_int16_t port, const char *dptr) { char buffer[sizeof("nnn.nnn.nnn.nnn")]; @@ -195,7 +195,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb, { struct ip_conntrack *ct = exp->master; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - u_int32_t newip; + __be32 newip; u_int16_t port; DEBUGP("ip_nat_sdp():\n"); diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 18b7fbdccb6..168f45fa189 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -1211,7 +1211,7 @@ static int snmp_translate(struct ip_conntrack *ct, struct sk_buff **pskb) { struct iphdr *iph = (*pskb)->nh.iph; - struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); int dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 17de077a790..021395b6746 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -30,9 +30,6 @@ #include <net/checksum.h> #include <linux/spinlock.h> -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - #include <linux/netfilter_ipv4/ip_nat.h> #include <linux/netfilter_ipv4/ip_nat_rule.h> #include <linux/netfilter_ipv4/ip_nat_protocol.h> @@ -40,7 +37,6 @@ #include <linux/netfilter_ipv4/ip_nat_helper.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/listhelp.h> #if 0 #define DEBUGP printk @@ -110,11 +106,6 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); - /* If we had a hardware checksum before, it's now invalid */ - if ((*pskb)->ip_summed == CHECKSUM_HW) - if (skb_checksum_help(*pskb, (out == NULL))) - return NF_DROP; - ct = ip_conntrack_get(*pskb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to @@ -145,8 +136,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) + if (!ip_nat_icmp_reply_translation(ct, ctinfo, + hooknum, pskb)) return NF_DROP; else return NF_ACCEPT; @@ -200,7 +191,7 @@ ip_nat_in(unsigned int hooknum, int (*okfn)(struct sk_buff *)) { unsigned int ret; - u_int32_t daddr = (*pskb)->nh.iph->daddr; + __be32 daddr = (*pskb)->nh.iph->daddr; ret = ip_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 198ac36db86..7edad790478 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -52,15 +52,15 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode = IPQ_COPY_NONE; -static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; +static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; +static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid; -static unsigned int copy_range; +static int peer_pid __read_mostly; +static unsigned int copy_range __read_mostly; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl; +static struct sock *ipqnl __read_mostly; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); @@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { + if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || + entry->skb->ip_summed == CHECKSUM_COMPLETE) && + (*errp = skb_checksum_help(entry->skb))) { read_unlock_bh(&queue_lock); return NULL; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 048514f15f2..78a44b01c03 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -180,8 +180,7 @@ ipt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { if (net_ratelimit()) printk("ip_tables: error: `%s'\n", (char *)targinfo); @@ -217,8 +216,7 @@ ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata) + struct ipt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; @@ -308,8 +306,7 @@ ipt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data, - userdata); + t->data); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom @@ -467,8 +464,7 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data, - m->u.match_size - sizeof(*m)); + m->u.kernel.match->destroy(m->u.kernel.match, m->data); module_put(m->u.kernel.match->me); return 0; } @@ -521,7 +517,6 @@ check_match(struct ipt_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, m->data, - m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -578,12 +573,10 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto cleanup_matches; + goto err; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, - t->u.target_size - - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -655,8 +648,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data, - t->u.target_size - sizeof(*t)); + t->u.kernel.target->destroy(t->u.kernel.target, t->data); module_put(t->u.kernel.target->me); return 0; } @@ -950,73 +942,28 @@ static short compat_calc_jump(u_int16_t offset) return delta; } -struct compat_ipt_standard_target +static void compat_standard_from_user(void *dst, void *src) { - struct compat_xt_entry_target target; - compat_int_t verdict; -}; - -struct compat_ipt_standard -{ - struct compat_ipt_entry entry; - struct compat_ipt_standard_target target; -}; + int v = *(compat_int_t *)src; -#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) -#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) -#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) + if (v > 0) + v += compat_calc_jump(v); + memcpy(dst, &v, sizeof(v)); +} -static int compat_ipt_standard_fn(void *target, - void **dstptr, int *size, int convert) +static int compat_standard_to_user(void __user *dst, void *src) { - struct compat_ipt_standard_target compat_st, *pcompat_st; - struct ipt_standard_target st, *pst; - int ret; + compat_int_t cv = *(int *)src; - ret = 0; - switch (convert) { - case COMPAT_TO_USER: - pst = target; - memcpy(&compat_st.target, &pst->target, - sizeof(compat_st.target)); - compat_st.verdict = pst->verdict; - if (compat_st.verdict > 0) - compat_st.verdict -= - compat_calc_jump(compat_st.verdict); - compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN; - if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN)) - ret = -EFAULT; - *size -= IPT_ST_OFFSET; - *dstptr += IPT_ST_COMPAT_LEN; - break; - case COMPAT_FROM_USER: - pcompat_st = target; - memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN); - st.verdict = pcompat_st->verdict; - if (st.verdict > 0) - st.verdict += compat_calc_jump(st.verdict); - st.target.u.user.target_size = IPT_ST_LEN; - memcpy(*dstptr, &st, IPT_ST_LEN); - *size += IPT_ST_OFFSET; - *dstptr += IPT_ST_LEN; - break; - case COMPAT_CALC_SIZE: - *size += IPT_ST_OFFSET; - break; - default: - ret = -ENOPROTOOPT; - break; - } - return ret; + if (cv > 0) + cv -= compat_calc_jump(cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static inline int compat_calc_match(struct ipt_entry_match *m, int * size) { - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(m->u.kernel.match); return 0; } @@ -1031,10 +978,7 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, entry_offset = (void *)e - base; IPT_MATCH_ITERATE(e, compat_calc_match, &off); t = ipt_get_target(e); - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1422,17 +1366,13 @@ struct compat_ipt_replace { static inline int compat_copy_match_to_user(struct ipt_entry_match *m, void __user **dstptr, compat_uint_t *size) { - if (m->u.kernel.match->compat) - return m->u.kernel.match->compat(m, dstptr, size, - COMPAT_TO_USER); - else - return xt_compat_match(m, dstptr, size, COMPAT_TO_USER); + return xt_compat_match_to_user(m, dstptr, size); } static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, compat_uint_t *size) { - struct ipt_entry_target __user *t; + struct ipt_entry_target *t; struct compat_ipt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; @@ -1450,11 +1390,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e, if (ret) goto out; t = ipt_get_target(e); - if (t->u.kernel.target->compat) - ret = t->u.kernel.target->compat(t, dstptr, size, - COMPAT_TO_USER); - else - ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER); + ret = xt_compat_target_to_user(t, dstptr, size); if (ret) goto out; ret = -EFAULT; @@ -1486,11 +1422,7 @@ compat_check_calc_match(struct ipt_entry_match *m, return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - - if (m->u.kernel.match->compat) - m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); - else - xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); + *size += xt_compat_match_offset(match); (*i)++; return 0; @@ -1537,7 +1469,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, e->comefrom, &off, &j); if (ret != 0) - goto out; + goto cleanup_matches; t = ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -1547,14 +1479,11 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; - goto out; + goto cleanup_matches; } t->u.kernel.target = target; - if (t->u.kernel.target->compat) - t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); - else - xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); + off += xt_compat_target_offset(target); *size += off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1574,14 +1503,17 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, (*i)++; return 0; + out: + module_put(t->u.kernel.target->me); +cleanup_matches: IPT_MATCH_ITERATE(e, cleanup_match, &j); return ret; } static inline int compat_copy_match_from_user(struct ipt_entry_match *m, void **dstptr, compat_uint_t *size, const char *name, - const struct ipt_ip *ip, unsigned int hookmask) + const struct ipt_ip *ip, unsigned int hookmask, int *i) { struct ipt_entry_match *dm; struct ipt_match *match; @@ -1589,26 +1521,28 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, dm = (struct ipt_entry_match *)*dstptr; match = m->u.kernel.match; - if (match->compat) - match->compat(m, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_match(m, dstptr, size, COMPAT_FROM_USER); + xt_compat_match_from_user(m, dstptr, size); ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), name, hookmask, ip->proto, ip->invflags & IPT_INV_PROTO); if (ret) - return ret; + goto err; if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, dm->data, - dm->u.match_size - sizeof(*dm), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); - return -EINVAL; + ret = -EINVAL; + goto err; } + (*i)++; return 0; + +err: + module_put(m->u.kernel.match->me); + return ret; } static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, @@ -1619,25 +1553,23 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, struct ipt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h; + int ret, h, j; ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); + j = 0; *dstptr += sizeof(struct compat_ipt_entry); ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, - name, &de->ip, de->comefrom); + name, &de->ip, de->comefrom, &j); if (ret) - goto out; + goto cleanup_matches; de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; - if (target->compat) - target->compat(t, dstptr, size, COMPAT_FROM_USER); - else - xt_compat_target(t, dstptr, size, COMPAT_FROM_USER); + xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_IP_NUMHOOKS; h++) { @@ -1653,22 +1585,26 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, name, e->comefrom, e->ip.proto, e->ip.invflags & IPT_INV_PROTO); if (ret) - goto out; + goto err; ret = -EINVAL; if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, *size)) - goto out; + goto err; } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, de, target, - t->data, t->u.target_size - sizeof(*t), - de->comefrom)) { + t->data, de->comefrom)) { duprintf("ip_tables: compat: check failed for `%s'.\n", t->u.kernel.target->name); - goto out; + goto err; } ret = 0; -out: + return ret; + +err: + module_put(t->u.kernel.target->me); +cleanup_matches: + IPT_MATCH_ITERATE(e, cleanup_match, &j); return ret; } @@ -1989,6 +1925,8 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) return ret; } +static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); + static int compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { @@ -2002,8 +1940,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = compat_get_entries(user, len); break; default: - duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd); - ret = -EINVAL; + ret = do_ipt_get_ctl(sk, cmd, user, len); } return ret; } @@ -2185,7 +2122,6 @@ icmp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_icmp *icmpinfo = matchinfo; @@ -2200,7 +2136,9 @@ static struct ipt_target ipt_standard_target = { .targetsize = sizeof(int), .family = AF_INET, #ifdef CONFIG_COMPAT - .compat = &compat_ipt_standard_fn, + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, #endif }; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d994c5f5744..7a29d6e7baa 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -52,7 +52,7 @@ struct clusterip_config { atomic_t entries; /* number of entries/rules * referencing us */ - u_int32_t clusterip; /* the IP address */ + __be32 clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ @@ -119,7 +119,7 @@ clusterip_config_entry_put(struct clusterip_config *c) } static struct clusterip_config * -__clusterip_config_find(u_int32_t clusterip) +__clusterip_config_find(__be32 clusterip) { struct list_head *pos; @@ -136,7 +136,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip, int entry) +clusterip_config_find_get(__be32 clusterip, int entry) { struct clusterip_config *c; @@ -166,7 +166,7 @@ clusterip_config_init_nodelist(struct clusterip_config *c, } static struct clusterip_config * -clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, +clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; @@ -302,8 +302,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_clusterip_tgt_info *cipinfo = targinfo; enum ip_conntrack_info ctinfo; @@ -373,7 +372,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; @@ -389,7 +387,7 @@ checkentry(const char *tablename, return 0; } - if (e->ip.dmsk.s_addr != 0xffffffff + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); return 0; @@ -450,8 +448,7 @@ checkentry(const char *tablename, } /* drop reference count of cluster config when rule is deleted */ -static void destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +static void destroy(const struct xt_target *target, void *targinfo) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; @@ -479,9 +476,9 @@ static struct ipt_target clusterip_tgt = { /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ struct arp_payload { u_int8_t src_hw[ETH_ALEN]; - u_int32_t src_ip; + __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; - u_int32_t dst_ip; + __be32 dst_ip; } __attribute__ ((packed)); #ifdef CLUSTERIP_DEBUG diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c deleted file mode 100644 index c8e971288df..00000000000 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ /dev/null @@ -1,96 +0,0 @@ -/* iptables module for setting the IPv4 DSCP field, Version 1.8 - * - * (C) 2002 by Harald Welte <laforge@netfilter.org> - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp -*/ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_DSCP.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables DSCP modification module"); -MODULE_LICENSE("GPL"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo, - void *userinfo) -{ - const struct ipt_DSCP_info *dinfo = targinfo; - u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - - if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { - u_int16_t diffs[2]; - - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK) - | sh_dscp; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^ 0xFFFF)); - } - return IPT_CONTINUE; -} - -static int -checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp; - - if ((dscp > IPT_DSCP_MAX)) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; - } - return 1; -} - -static struct ipt_target ipt_dscp_reg = { - .name = "DSCP", - .target = target, - .targetsize = sizeof(struct ipt_DSCP_info), - .table = "mangle", - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_target(&ipt_dscp_reg); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_target(&ipt_dscp_reg); -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 4adf5c9d34f..12a818a2462 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -27,32 +27,28 @@ MODULE_DESCRIPTION("iptables ECN modification module"); static inline int set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { - if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) - != (einfo->ip_ect & IPT_ECN_IP_MASK)) { - u_int16_t diffs[2]; + struct iphdr *iph = (*pskb)->nh.iph; + __be16 oldtos; + if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; - (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); + iph = (*pskb)->nh.iph; + oldtos = iph->tos; + iph->tos &= ~IPT_ECN_IP_MASK; + iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); + iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos, + iph->check); } return 1; } /* Return 0 if there was an error. */ static inline int -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) +set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; - u_int16_t diffs[2]; + __be16 oldval; /* Not enought header? */ tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, @@ -70,22 +66,16 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, inward)) - return 0; - - diffs[0] = ((u_int16_t *)tcph)[6]; + oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - diffs[1] = ((u_int16_t *)tcph)[6]; - diffs[0] = diffs[0] ^ 0xFFFF; - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) - tcph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - tcph->check^0xFFFF)); + tcph->check = nf_proto_csum_update((*pskb), + oldval ^ htons(0xFFFF), + ((__be16 *)tcph)[6], + tcph->check, 0); return 1; } @@ -95,8 +85,7 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_ECN_info *einfo = targinfo; @@ -106,7 +95,7 @@ target(struct sk_buff **pskb, if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && (*pskb)->nh.iph->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo, (out == NULL))) + if (!set_ect_tcp(pskb, einfo)) return NF_DROP; return IPT_CONTINUE; @@ -117,7 +106,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index b98f7b08b08..7dc820df8bc 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -416,8 +416,7 @@ ipt_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_log_info *loginfo = targinfo; struct nf_loginfo li; @@ -440,7 +439,6 @@ static int ipt_log_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_log_info *loginfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index ebd94f2abf0..3dbfcfac8a8 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -42,7 +42,6 @@ masquerade_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -64,15 +63,14 @@ masquerade_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; const struct ip_nat_multi_range_compat *mr; struct ip_nat_range newrange; struct rtable *rt; - u_int32_t newsrc; + __be32 newsrc; IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 736c4b5a86a..58a88f22710 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -33,7 +33,6 @@ check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -55,12 +54,11 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t new_ip, netmask; + __be32 new_ip, netmask; const struct ip_nat_multi_range_compat *mr = targinfo; struct ip_nat_range newrange; diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index f290463232d..c0dcfe9d610 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -36,7 +36,6 @@ redirect_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -58,12 +57,11 @@ redirect_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t newdst; + __be32 newdst; const struct ip_nat_multi_range_compat *mr = targinfo; struct ip_nat_range newrange; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 269bc2067cb..fd0c05efed8 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.proto = IPPROTO_TCP; fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; + security_skb_classify_flow(skb, &fl); xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); @@ -103,8 +104,8 @@ static void send_reset(struct sk_buff *oldskb, int hook) struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; struct rtable *rt; - u_int16_t tmp_port; - u_int32_t tmp_addr; + __be16 tmp_port; + __be32 tmp_addr; int needs_ack; int hh_len; @@ -184,6 +185,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->urg_ptr = 0; /* Adjust TCP checksum */ + nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), nskb->nh.iph->saddr, @@ -226,8 +228,7 @@ static unsigned int reject(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_reject_info *reject = targinfo; @@ -275,7 +276,6 @@ static int check(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_reject_info *rejinfo = targinfo; diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c index 7169b09b5a6..b38b13328d7 100644 --- a/net/ipv4/netfilter/ipt_SAME.c +++ b/net/ipv4/netfilter/ipt_SAME.c @@ -52,7 +52,6 @@ same_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { unsigned int count, countess, rangeip, index = 0; @@ -116,8 +115,7 @@ same_check(const char *tablename, } static void -same_destroy(const struct xt_target *target, void *targinfo, - unsigned int targinfosize) +same_destroy(const struct xt_target *target, void *targinfo) { struct ipt_same_info *mr = targinfo; @@ -133,12 +131,12 @@ same_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - u_int32_t tmpip, aindex, new_ip; + u_int32_t tmpip, aindex; + __be32 new_ip; const struct ipt_same_info *same = targinfo; struct ip_nat_range newrange; const struct ip_conntrack_tuple *t; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index ef2fe5b3f0d..108b6b76311 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -21,26 +21,14 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); MODULE_DESCRIPTION("iptables TCP MSS modification module"); -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static u_int16_t -cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} - static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) { /* Beware zero-length options: make finite progress */ - if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; - else return opt[offset+1]; + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; } static unsigned int @@ -49,26 +37,21 @@ ipt_tcpmss_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; struct tcphdr *tcph; struct iphdr *iph; - u_int16_t tcplen, newtotlen, oldval, newmss; + u_int16_t tcplen, newmss; + __be16 newtotlen, oldval; unsigned int i; u_int8_t *opt; if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, out == NULL)) - return NF_DROP; - iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; - tcph = (void *)iph + iph->ihl*4; /* Since it passed flags test in tcp match, we know it is is @@ -84,54 +67,41 @@ ipt_tcpmss_target(struct sk_buff **pskb, return NF_DROP; } - if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if(!(*pskb)->dst) { + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { + if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + + sizeof(struct tcphdr)) { if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); + printk(KERN_ERR "ipt_tcpmss_target: " + "unknown or invalid path-MTU (%d)\n", + dst_mtu((*pskb)->dst)); return NF_DROP; /* or IPT_CONTINUE ?? */ } - if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst)); - return NF_DROP; /* or IPT_CONTINUE ?? */ - } - - newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr); + newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - + sizeof(struct tcphdr); } else newmss = tcpmssinfo->mss; opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ - if ((opt[i] == TCPOPT_MSS) && - ((tcph->doff*4 - i) >= TCPOLEN_MSS) && - (opt[i+1] == TCPOLEN_MSS)) { + for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && + opt[i+1] == TCPOLEN_MSS) { u_int16_t oldmss; oldmss = (opt[i+2] << 8) | opt[i+3]; - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - (oldmss <= newmss)) - return IPT_CONTINUE; + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && + oldmss <= newmss) + return IPT_CONTINUE; opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = (newmss & 0x00ff); - tcph->check = cheat_check(htons(oldmss)^0xFFFF, - htons(newmss), - tcph->check); - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu changed TCP MSS option" - " (from %u to %u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - oldmss, newmss); - goto retmodified; + tcph->check = nf_proto_csum_update(*pskb, + htons(oldmss)^htons(0xFFFF), + htons(newmss), + tcph->check, 0); + return IPT_CONTINUE; } } @@ -143,13 +113,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) { - if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target:" - " unable to allocate larger skb\n"); + if (!newskb) return NF_DROP; - } - kfree_skb(*pskb); *pskb = newskb; iph = (*pskb)->nh.iph; @@ -161,36 +126,29 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, - htons(tcplen + TCPOLEN_MSS), tcph->check); - tcplen += TCPOLEN_MSS; - + tcph->check = nf_proto_csum_update(*pskb, + htons(tcplen) ^ htons(0xFFFF), + htons(tcplen + TCPOLEN_MSS), + tcph->check, 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = (newmss & 0x00ff); - tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); + tcph->check = nf_proto_csum_update(*pskb, htonl(~0), *((__be32 *)opt), + tcph->check, 0); - oldval = ((u_int16_t *)tcph)[6]; + oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - tcph->check = cheat_check(oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], tcph->check); + tcph->check = nf_proto_csum_update(*pskb, + oldval ^ htons(0xFFFF), + ((__be16 *)tcph)[6], + tcph->check, 0); newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = cheat_check(iph->tot_len ^ 0xFFFF, - newtotlen, iph->check); + iph->check = nf_csum_update(iph->tot_len ^ htons(0xFFFF), + newtotlen, iph->check); iph->tot_len = newtotlen; - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - newmss); - - retmodified: return IPT_CONTINUE; } @@ -200,9 +158,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m) { const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; - if (strcmp(m->u.kernel.match->name, "tcp") == 0 - && (tcpinfo->flg_cmp & TH_SYN) - && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TH_SYN && + !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) return 1; return 0; @@ -214,17 +172,17 @@ ipt_tcpmss_checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; const struct ipt_entry *e = e_void; - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - ((hook_mask & ~((1 << NF_IP_FORWARD) - | (1 << NF_IP_LOCAL_OUT) - | (1 << NF_IP_POST_ROUTING))) != 0)) { - printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); + if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && + (hook_mask & ~((1 << NF_IP_FORWARD) | + (1 << NF_IP_LOCAL_OUT) | + (1 << NF_IP_POST_ROUTING))) != 0) { + printk("TCPMSS: path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); return 0; } diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 1c7a5ca399b..6b8b14ccc3d 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -26,27 +26,20 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, - void *userinfo) + const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; + struct iphdr *iph = (*pskb)->nh.iph; + __be16 oldtos; - if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { - u_int16_t diffs[2]; - + if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos - = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) - | tosinfo->tos; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); + iph = (*pskb)->nh.iph; + oldtos = iph->tos; + iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; + iph->check = nf_csum_update(oldtos ^ htons(0xFFFF), iph->tos, + iph->check); } return IPT_CONTINUE; } @@ -56,7 +49,6 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index f48892ae0be..ac9517d62af 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -23,11 +23,10 @@ static unsigned int ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct iphdr *iph; const struct ipt_TTL_info *info = targinfo; - u_int16_t diffs[2]; int new_ttl; if (!skb_make_writable(pskb, (*pskb)->len)) @@ -55,12 +54,10 @@ ipt_ttl_target(struct sk_buff **pskb, } if (new_ttl != iph->ttl) { - diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; + iph->check = nf_csum_update(htons((iph->ttl << 8)) ^ htons(0xFFFF), + htons(new_ttl << 8), + iph->check); iph->ttl = new_ttl; - diffs[1] = htons(((unsigned)iph->ttl) << 8); - iph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - iph->check^0xFFFF)); } return IPT_CONTINUE; @@ -70,7 +67,6 @@ static int ipt_ttl_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) { struct ipt_TTL_info *info = targinfo; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d46fd677fa1..2b104ea54f4 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo, void *userinfo) + const void *targinfo) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; @@ -346,7 +346,6 @@ static int ipt_ulog_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, - unsigned int targinfosize, unsigned int hookmask) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 893dae210b0..7b60eb74788 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -22,7 +22,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("iptables addrtype match"); -static inline int match_type(u_int32_t addr, u_int16_t mask) +static inline int match_type(__be32 addr, u_int16_t mask) { return !!(mask & (1 << inet_addr_type(addr))); } diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 2927135873d..1798f86bc53 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -74,7 +74,6 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, - unsigned int matchinfosize, unsigned int hook_mask) { const struct ipt_ah *ahinfo = matchinfo; diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c deleted file mode 100644 index 47177591aeb..00000000000 --- a/net/ipv4/netfilter/ipt_dscp.c +++ /dev/null @@ -1,54 +0,0 @@ -/* IP tables module for matching the value of the IPv4 DSCP field - * - * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp - * - * (C) 2002 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv4/ipt_dscp.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables DSCP matching module"); -MODULE_LICENSE("GPL"); - -static int match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_dscp_info *info = matchinfo; - const struct iphdr *iph = skb->nh.iph; - - u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; -} - -static struct ipt_match dscp_match = { - .name = "dscp", - .match = match, - .matchsize = sizeof(struct ipt_dscp_info), - .me = THIS_MODULE, -}; - -static int __init ipt_dscp_init(void) -{ - return ipt_register_match(&dscp_match); -} - -static void __exit ipt_dscp_fini(void) -{ - ipt_unregister_match(&dscp_match); - -} - -module_init(ipt_dscp_init); -module_exit(ipt_dscp_fini); diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index b2825041493..dafbdec0efc 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -88,8 +88,7 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) + void *matchinfo, unsigned int hook_mask) { const struct ipt_ecn_info *info = matchinfo; const struct ipt_ip *ip = ip_void; diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 3bd2368e1fc..33ccdbf8e79 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -50,11 +50,11 @@ static struct file_operations dl_file_ops; /* hash table crap */ struct dsthash_dst { - u_int32_t src_ip; - u_int32_t dst_ip; + __be32 src_ip; + __be32 dst_ip; /* ports have to be consecutive !!! */ - u_int16_t src_port; - u_int16_t dst_port; + __be16 src_port; + __be16 dst_port; }; struct dsthash_ent { @@ -106,8 +106,10 @@ static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) static inline u_int32_t hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst) { - return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port), - dst->src_ip, ht->rnd) % ht->cfg.size); + return (jhash_3words((__force u32)dst->dst_ip, + ((__force u32)dst->dst_port<<16 | + (__force u32)dst->src_port), + (__force u32)dst->src_ip, ht->rnd) % ht->cfg.size); } static inline struct dsthash_ent * @@ -406,7 +408,7 @@ hashlimit_match(const struct sk_buff *skb, dst.src_ip = skb->nh.iph->saddr; if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) { - u_int16_t _ports[2], *ports; + __be16 _ports[2], *ports; switch (skb->nh.iph->protocol) { case IPPROTO_TCP: @@ -478,7 +480,6 @@ hashlimit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { struct ipt_hashlimit_info *r = matchinfo; @@ -529,18 +530,46 @@ hashlimit_checkentry(const char *tablename, } static void -hashlimit_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +hashlimit_destroy(const struct xt_match *match, void *matchinfo) { struct ipt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } +#ifdef CONFIG_COMPAT +struct compat_ipt_hashlimit_info { + char name[IFNAMSIZ]; + struct hashlimit_cfg cfg; + compat_uptr_t hinfo; + compat_uptr_t master; +}; + +static void compat_from_user(void *dst, void *src) +{ + int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + + memcpy(dst, src, off); + memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off); +} + +static int compat_to_user(void __user *dst, void *src) +{ + int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); + + return copy_to_user(dst, src, off) ? -EFAULT : 0; +} +#endif + static struct ipt_match ipt_hashlimit = { .name = "hashlimit", .match = hashlimit_match, .matchsize = sizeof(struct ipt_hashlimit_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_hashlimit_info), + .compat_from_user = compat_from_user, + .compat_to_user = compat_to_user, +#endif .checkentry = hashlimit_checkentry, .destroy = hashlimit_destroy, .me = THIS_MODULE diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c index 5ac6ac023b5..78c336f12a9 100644 --- a/net/ipv4/netfilter/ipt_owner.c +++ b/net/ipv4/netfilter/ipt_owner.c @@ -56,7 +56,6 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) { const struct ipt_owner_info *info = matchinfo; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 61a2139f9cf..126db44e71a 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -35,20 +35,25 @@ static unsigned int ip_list_tot = 100; static unsigned int ip_pkt_list_tot = 20; static unsigned int ip_list_hash_size = 0; static unsigned int ip_list_perms = 0644; +static unsigned int ip_list_uid = 0; +static unsigned int ip_list_gid = 0; module_param(ip_list_tot, uint, 0400); module_param(ip_pkt_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); +module_param(ip_list_uid, uint, 0400); +module_param(ip_list_gid, uint, 0400); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); - +MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); +MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); struct recent_entry { struct list_head list; struct list_head lru_list; - u_int32_t addr; + __be32 addr; u_int8_t ttl; u_int8_t index; u_int16_t nstamps; @@ -79,17 +84,17 @@ static struct file_operations recent_fops; static u_int32_t hash_rnd; static int hash_rnd_initted; -static unsigned int recent_entry_hash(u_int32_t addr) +static unsigned int recent_entry_hash(__be32 addr) { if (!hash_rnd_initted) { get_random_bytes(&hash_rnd, 4); hash_rnd_initted = 1; } - return jhash_1word(addr, hash_rnd) & (ip_list_hash_size - 1); + return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1); } static struct recent_entry * -recent_entry_lookup(const struct recent_table *table, u_int32_t addr, u_int8_t ttl) +recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl) { struct recent_entry *e; unsigned int h; @@ -110,7 +115,7 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) } static struct recent_entry * -recent_entry_init(struct recent_table *t, u_int32_t addr, u_int8_t ttl) +recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) { struct recent_entry *e; @@ -172,7 +177,7 @@ ipt_recent_match(const struct sk_buff *skb, const struct ipt_recent_info *info = matchinfo; struct recent_table *t; struct recent_entry *e; - u_int32_t addr; + __be32 addr; u_int8_t ttl; int ret = info->invert; @@ -232,7 +237,7 @@ out: static int ipt_recent_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int matchsize, unsigned int hook_mask) + unsigned int hook_mask) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -274,6 +279,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip, goto out; } t->proc->proc_fops = &recent_fops; + t->proc->uid = ip_list_uid; + t->proc->gid = ip_list_gid; t->proc->data = t; #endif spin_lock_bh(&recent_lock); @@ -286,8 +293,7 @@ out: } static void -ipt_recent_destroy(const struct xt_match *match, void *matchinfo, - unsigned int matchsize) +ipt_recent_destroy(const struct xt_match *match, void *matchinfo) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -399,7 +405,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; - u_int32_t addr; + __be32 addr; int add; if (size > sizeof(buf)) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 7f417484bfb..e2e7dd8d790 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -90,7 +90,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int @@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4e7998beda6..e62ea2bb9c0 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int @@ -131,7 +131,7 @@ ipt_local_hook(unsigned int hook, { unsigned int ret; u_int8_t tos; - u_int32_t saddr, daddr; + __be32 saddr, daddr; unsigned long nfmark; /* root is playing with raw sockets. */ @@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook, daddr = (*pskb)->nh.iph->daddr; tos = (*pskb)->nh.iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE && ((*pskb)->nh.iph->saddr != saddr diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7912cce1e1b..bcbeb4aeacd 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -95,7 +95,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); + return ipt_do_table(pskb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 663a73ee3f2..790f00d500c 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -25,7 +25,7 @@ #include <net/netfilter/nf_conntrack_protocol.h> #include <net/netfilter/nf_conntrack_core.h> -unsigned long nf_ct_icmp_timeout = 30*HZ; +unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; #if 0 #define DEBUGP printk diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d61e2a9d394..9c6cbe3d9fb 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -173,6 +173,8 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), + SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), + SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 62b2762a242..b430cf2a4f6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -38,8 +38,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include <linux/config.h> + #include <linux/types.h> #include <asm/atomic.h> #include <asm/byteorder.h> @@ -382,8 +381,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; - u32 daddr; - u32 saddr; + __be32 daddr; + __be32 saddr; u8 tos; int err; @@ -484,6 +483,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b873cbcdd0b..c41ddba02e9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -261,6 +261,10 @@ static unsigned int rt_hash_code(u32 daddr, u32 saddr) & rt_hash_mask); } +#define rt_hash(daddr, saddr, idx) \ + rt_hash_code((__force u32)(__be32)(daddr),\ + (__force u32)(__be32)(saddr) ^ ((idx) << 5)) + #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { int bucket; @@ -1074,7 +1078,7 @@ static void ip_select_fb_ident(struct iphdr *iph) u32 salt; spin_lock_bh(&ip_fb_id_lock); - salt = secure_ip_id(ip_fallback_id ^ iph->daddr); + salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); iph->id = htons(salt & 0xFFFF); ip_fallback_id = salt; spin_unlock_bh(&ip_fb_id_lock); @@ -1118,13 +1122,13 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, - u32 saddr, struct net_device *dev) +void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, + __be32 saddr, struct net_device *dev) { int i, k; struct in_device *in_dev = in_dev_get(dev); struct rtable *rth, **rthp; - u32 skeys[2] = { saddr, 0 }; + __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; struct netevent_redirect netevent; @@ -1147,8 +1151,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { - unsigned hash = rt_hash_code(daddr, - skeys[i] ^ (ikeys[k] << 5)); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); rthp=&rt_hash_table[hash].chain; @@ -1260,9 +1263,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { - unsigned hash = rt_hash_code(rt->fl.fl4_dst, - rt->fl.fl4_src ^ - (rt->fl.oif << 5)); + unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, + rt->fl.oif); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ip_rt_advice: redirect to " "%u.%u.%u.%u/%02x dropped\n", @@ -1397,15 +1399,15 @@ unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu) int i; unsigned short old_mtu = ntohs(iph->tot_len); struct rtable *rth; - u32 skeys[2] = { iph->saddr, 0, }; - u32 daddr = iph->daddr; + __be32 skeys[2] = { iph->saddr, 0, }; + __be32 daddr = iph->daddr; unsigned short est_mtu = 0; if (ipv4_config.no_pmtu_disc) return 0; for (i = 0; i < 2; i++) { - unsigned hash = rt_hash_code(daddr, skeys[i]); + unsigned hash = rt_hash(daddr, skeys[i], 0); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1530,7 +1532,7 @@ static int ip_rt_bug(struct sk_buff *skb) void ip_rt_get_source(u8 *addr, struct rtable *rt) { - u32 src; + __be32 src; struct fib_result res; if (rt->fl.iif == 0) @@ -1596,12 +1598,12 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->rt_type = res->type; } -static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, +static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { unsigned hash; struct rtable *rth; - u32 spec_dst; + __be32 spec_dst; struct in_device *in_dev = in_dev_get(dev); u32 itag = 0; @@ -1665,7 +1667,7 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5)); + hash = rt_hash(daddr, saddr, dev->ifindex); return rt_intern_hash(hash, rth, (struct rtable**) &skb->dst); e_nobufs: @@ -1681,8 +1683,8 @@ e_inval: static void ip_handle_martian_source(struct net_device *dev, struct in_device *in_dev, struct sk_buff *skb, - u32 daddr, - u32 saddr) + __be32 daddr, + __be32 saddr) { RT_CACHE_STAT_INC(in_martian_src); #ifdef CONFIG_IP_ROUTE_VERBOSE @@ -1712,7 +1714,7 @@ static void ip_handle_martian_source(struct net_device *dev, static inline int __mkroute_input(struct sk_buff *skb, struct fib_result* res, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos, + __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { @@ -1720,7 +1722,8 @@ static inline int __mkroute_input(struct sk_buff *skb, int err; struct in_device *out_dev; unsigned flags = 0; - u32 spec_dst, itag; + __be32 spec_dst; + u32 itag; /* get a working reference to the output device */ out_dev = in_dev_get(FIB_RES_DEV(*res)); @@ -1813,7 +1816,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos) { struct rtable* rth = NULL; int err; @@ -1830,7 +1833,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); + hash = rt_hash(daddr, saddr, fl->iif); return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); } @@ -1838,7 +1841,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED struct rtable* rth = NULL, *rtres; @@ -1871,7 +1874,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash_code(daddr, saddr ^ (fl->iif << 5)); + hash = rt_hash(daddr, saddr, fl->iif); err = rt_intern_hash(hash, rth, &rtres); if (err) return err; @@ -1901,7 +1904,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, * 2. IP spoofing attempts are filtered with 100% of guarantee. */ -static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, +static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct fib_result res; @@ -1920,7 +1923,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, u32 itag = 0; struct rtable * rth; unsigned hash; - u32 spec_dst; + __be32 spec_dst; int err = -EINVAL; int free_res = 0; @@ -1936,7 +1939,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, if (MULTICAST(saddr) || BADCLASS(saddr) || LOOPBACK(saddr)) goto martian_source; - if (daddr == 0xFFFFFFFF || (saddr == 0 && daddr == 0)) + if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) goto brd_input; /* Accept zero addresses only to limited broadcast; @@ -2048,7 +2051,7 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5)); + hash = rt_hash(daddr, saddr, fl.iif); err = rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); goto done; @@ -2087,7 +2090,7 @@ martian_source: goto e_inval; } -int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct rtable * rth; @@ -2095,7 +2098,7 @@ int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, int iif = dev->ifindex; tos &= IPTOS_RT_MASK; - hash = rt_hash_code(daddr, saddr ^ (iif << 5)); + hash = rt_hash(daddr, saddr, iif); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2169,7 +2172,7 @@ static inline int __mkroute_output(struct rtable **result, if (LOOPBACK(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) return -EINVAL; - if (fl->fl4_dst == 0xFFFFFFFF) + if (fl->fl4_dst == htonl(0xFFFFFFFF)) res->type = RTN_BROADCAST; else if (MULTICAST(fl->fl4_dst)) res->type = RTN_MULTICAST; @@ -2293,8 +2296,7 @@ static inline int ip_mkroute_output_def(struct rtable **rp, int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { - hash = rt_hash_code(oldflp->fl4_dst, - oldflp->fl4_src ^ (oldflp->oif << 5)); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); err = rt_intern_hash(hash, rth, rp); } @@ -2336,9 +2338,8 @@ static inline int ip_mkroute_output(struct rtable** rp, if (err != 0) goto cleanup; - hash = rt_hash_code(oldflp->fl4_dst, - oldflp->fl4_src ^ - (oldflp->oif << 5)); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, + oldflp->oif); err = rt_intern_hash(hash, rth, rp); /* forward hop information to multipath impl. */ @@ -2417,7 +2418,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) */ if (oldflp->oif == 0 - && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) { + && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2454,7 +2455,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) goto out; /* Wrong error code */ } - if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) { + if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF)) { if (!fl.fl4_src) fl.fl4_src = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); @@ -2567,7 +2568,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp) unsigned hash; struct rtable *rth; - hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5)); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2639,51 +2640,54 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, { struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + struct nlmsghdr *nlh; struct rta_cacheinfo ci; -#ifdef CONFIG_IP_MROUTE - struct rtattr *eptr; -#endif - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); - r = NLMSG_DATA(nlh); + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + if (nlh == NULL) + return -ENOBUFS; + + r = nlmsg_data(nlh); r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; + NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); + + NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); + if (rt->fl.fl4_src) { r->rtm_src_len = 32; - RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); + NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); } if (rt->u.dst.dev) - RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); + NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); #ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); + NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { - __u32 alg = rt->rt_multipath_alg; - - RTA_PUT(skb, RTA_MP_ALGO, 4, &alg); - } + if (rt->rt_multipath_alg != IP_MP_ALG_NONE) + NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); #endif if (rt->fl.iif) - RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); + NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) - RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); + NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); + if (rt->rt_dst != rt->rt_gateway) - RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); + NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); + if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) - goto rtattr_failure; + goto nla_put_failure; + ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); ci.rta_used = rt->u.dst.__use; ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); @@ -2700,13 +2704,10 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; } } -#ifdef CONFIG_IP_MROUTE - eptr = (struct rtattr*)skb->tail; -#endif - RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + if (rt->fl.iif) { #ifdef CONFIG_IP_MROUTE - u32 dst = rt->rt_dst; + __be32 dst = rt->rt_dst; if (MULTICAST(dst) && !LOCAL_MCAST(dst) && ipv4_devconf.mc_forwarding) { @@ -2715,41 +2716,48 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (!nowait) { if (err == 0) return 0; - goto nlmsg_failure; + goto nla_put_failure; } else { if (err == -EMSGSIZE) - goto nlmsg_failure; - ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; + goto nla_put_failure; + ci.rta_error = err; } } } else #endif - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); + NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); } - nlh->nlmsg_len = skb->tail - b; - return skb->len; + NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); } int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; - u32 dst = 0; - u32 src = 0; - int iif = 0; - int err = -ENOBUFS; + __be32 dst = 0; + __be32 src = 0; + u32 iif; + int err; struct sk_buff *skb; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - goto out; + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. @@ -2760,62 +2768,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) skb->nh.iph->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - if (rta[RTA_SRC - 1]) - memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); - if (rta[RTA_DST - 1]) - memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); - if (rta[RTA_IIF - 1]) - memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); + src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; + dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; + iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; if (iif) { - struct net_device *dev = __dev_get_by_index(iif); - err = -ENODEV; - if (!dev) - goto out_free; + struct net_device *dev; + + dev = __dev_get_by_index(iif); + if (dev == NULL) { + err = -ENODEV; + goto errout_free; + } + skb->protocol = htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - rt = (struct rtable*)skb->dst; - if (!err && rt->u.dst.error) + + rt = (struct rtable*) skb->dst; + if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos } } }; - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = dst, + .saddr = src, + .tos = rtm->rtm_tos, + }, + }, + .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + }; err = ip_route_output_key(&rt, &fl); } + if (err) - goto out_free; + goto errout_free; skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (!err) - goto out_free; - if (err < 0) { - err = -EMSGSIZE; - goto out_free; - } + if (err <= 0) + goto errout_free; - err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); - if (err > 0) - err = 0; -out: return err; + err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +errout: + return err; -out_free: +errout_free: kfree_skb(skb); - goto out; + goto errout; } int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -3143,13 +3150,9 @@ int __init ip_rt_init(void) } #endif - ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", - sizeof(struct rtable), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - - if (!ipv4_dst_ops.kmem_cachep) - panic("IP: failed to allocate ip_dst_cache\n"); + ipv4_dst_ops.kmem_cachep = + kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); rt_hash_table = (struct rt_hash_bucket *) alloc_large_system_hash("IP route cache", diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e20be3331f6..661e0a4bca7 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = htonl(skb->h.th->seq) - 1; @@ -259,6 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; + security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 70cea9d08a3..e82a5be894b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -17,6 +17,7 @@ #include <net/ip.h> #include <net/route.h> #include <net/tcp.h> +#include <net/cipso_ipv4.h> /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -128,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, return ret; } +static int __init tcp_congestion_default(void) +{ + return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); +} + +late_initcall(tcp_congestion_default); ctl_table ipv4_table[] = { { @@ -697,6 +704,40 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, +#ifdef CONFIG_NETLABEL + { + .ctl_name = NET_CIPSOV4_CACHE_ENABLE, + .procname = "cipso_cache_enable", + .data = &cipso_v4_cache_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, + .procname = "cipso_cache_bucket_size", + .data = &cipso_v4_cache_bucketsize, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_OPTFMT, + .procname = "cipso_rbm_optfmt", + .data = &cipso_v4_rbm_optfmt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, + .procname = "cipso_rbm_strictvalid", + .data = &cipso_v4_rbm_strictvalid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif /* CONFIG_NETLABEL */ { .ctl_name = 0 } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 934396bb137..66e9a729f6d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -268,7 +268,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> -int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; +int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; @@ -568,7 +568,7 @@ new_segment: skb->truesize += copy; sk->sk_wmem_queued += copy; sk->sk_forward_alloc -= copy; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->gso_segs = 0; @@ -723,7 +723,7 @@ new_segment: * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, tp, skb); copy = size_goal; @@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -2205,7 +2208,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) th->fin = th->psh = 0; th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2219,7 +2222,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2254,9 +2257,7 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!tcp_hashinfo.bind_bucket_cachep) - panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index b0134ab0837..5730333cd0a 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7ff2e4273a7..af0aca1e6be 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -48,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) printk(KERN_NOTICE "TCP %s already registered\n", ca->name); ret = -EEXIST; } else { - list_add_rcu(&ca->list, &tcp_cong_list); + list_add_tail_rcu(&ca->list, &tcp_cong_list); printk(KERN_INFO "TCP %s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 2be27980ca7..a60ef38d75c 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = { static int __init cubictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index fa3e1aad660..c4fc811bf37 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 6edfe5e4510..682e7d5b6f2 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); return tcp_register_congestion_control(&htcp); } diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 7406e0c5fb8..59e691d26f6 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 159fa3f1ba6..3f884cea14f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -72,24 +72,24 @@ #include <asm/unaligned.h> #include <net/netdma.h> -int sysctl_tcp_timestamps = 1; -int sysctl_tcp_window_scaling = 1; -int sysctl_tcp_sack = 1; -int sysctl_tcp_fack = 1; -int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; -int sysctl_tcp_ecn; -int sysctl_tcp_dsack = 1; -int sysctl_tcp_app_win = 31; -int sysctl_tcp_adv_win_scale = 2; - -int sysctl_tcp_stdurg; -int sysctl_tcp_rfc1337; -int sysctl_tcp_max_orphans = NR_FILE; -int sysctl_tcp_frto; -int sysctl_tcp_nometrics_save; - -int sysctl_tcp_moderate_rcvbuf = 1; -int sysctl_tcp_abc; +int sysctl_tcp_timestamps __read_mostly = 1; +int sysctl_tcp_window_scaling __read_mostly = 1; +int sysctl_tcp_sack __read_mostly = 1; +int sysctl_tcp_fack __read_mostly = 1; +int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; +int sysctl_tcp_ecn __read_mostly; +int sysctl_tcp_dsack __read_mostly = 1; +int sysctl_tcp_app_win __read_mostly = 31; +int sysctl_tcp_adv_win_scale __read_mostly = 2; + +int sysctl_tcp_stdurg __read_mostly; +int sysctl_tcp_rfc1337 __read_mostly; +int sysctl_tcp_max_orphans __read_mostly = NR_FILE; +int sysctl_tcp_frto __read_mostly; +int sysctl_tcp_nometrics_save __read_mostly; + +int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; +int sysctl_tcp_abc __read_mostly; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ - len = skb->len; + len = skb_shinfo(skb)->gso_size ?: skb->len; if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = len; } else { @@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, return; } } + if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } @@ -933,7 +935,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; - struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); + struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; @@ -2237,13 +2239,12 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, return acked; } -static u32 tcp_usrtt(const struct sk_buff *skb) +static u32 tcp_usrtt(struct timeval *tv) { - struct timeval tv, now; + struct timeval now; do_gettimeofday(&now); - skb_get_timestamp(skb, &tv); - return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec); + return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec); } /* Remove acknowledged frames from the retransmission queue. */ @@ -2258,6 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) u32 pkts_acked = 0; void (*rtt_sample)(struct sock *sk, u32 usrtt) = icsk->icsk_ca_ops->rtt_sample; + struct timeval tv; while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) { @@ -2306,8 +2308,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) seq_rtt = -1; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - if (rtt_sample) - (*rtt_sample)(sk, tcp_usrtt(skb)); + skb_get_timestamp(skb, &tv); } if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= tcp_skb_pcount(skb); @@ -2320,8 +2321,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - if (rtt_sample) - (*rtt_sample)(sk, tcp_usrtt(skb)); + skb_get_timestamp(skb, &tv); } tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); @@ -2333,6 +2333,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) if (acked&FLAG_ACKED) { tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk, tp); + if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED)) + (*rtt_sample)(sk, tcp_usrtt(&tv)); if (icsk->icsk_ca_ops->pkts_acked) icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); @@ -2627,7 +2629,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, switch(opcode) { case TCPOPT_MSS: if(opsize==TCPOLEN_MSS && th->syn && !estab) { - u16 in_mss = ntohs(get_unaligned((__u16 *)ptr)); + u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); if (in_mss) { if (opt_rx->user_mss && opt_rx->user_mss < in_mss) in_mss = opt_rx->user_mss; @@ -2655,8 +2657,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, if ((estab && opt_rx->tstamp_ok) || (!estab && sysctl_tcp_timestamps)) { opt_rx->saw_tstamp = 1; - opt_rx->rcv_tsval = ntohl(get_unaligned((__u32 *)ptr)); - opt_rx->rcv_tsecr = ntohl(get_unaligned((__u32 *)(ptr+4))); + opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); + opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); } } break; @@ -2693,8 +2695,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, return 0; } else if (tp->rx_opt.tstamp_ok && th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { - __u32 *ptr = (__u32 *)(th + 1); - if (*ptr == ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + __be32 *ptr = (__be32 *)(th + 1); + if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { tp->rx_opt.saw_tstamp = 1; ++ptr; @@ -3909,10 +3911,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Check timestamp */ if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - __u32 *ptr = (__u32 *)(th + 1); + __be32 *ptr = (__be32 *)(th + 1); /* No? Slow path! */ - if (*ptr != ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) goto slow_path; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b04c3edd4a..c83938b8fcb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -78,8 +78,8 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> -int sysctl_tcp_tw_reuse; -int sysctl_tcp_low_latency; +int sysctl_tcp_tw_reuse __read_mostly; +int sysctl_tcp_low_latency __read_mostly; /* Check TCP sequence numbers in ICMP packets. */ #define ICMP_MIN_LENGTH 8 @@ -159,7 +159,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct tcp_sock *tp = tcp_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; - u32 daddr, nexthop; + __be32 daddr, nexthop; int tmp; int err; @@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); skb->csum = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; return 0; } @@ -734,8 +734,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct inet_request_sock *ireq; struct tcp_options_received tmp_opt; struct request_sock *req; - __u32 saddr = skb->nh.iph->saddr; - __u32 daddr = skb->nh.iph->daddr; + __be32 saddr = skb->nh.iph->saddr; + __be32 daddr = skb->nh.iph->daddr; __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES @@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; @@ -948,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, - th->source, skb->nh.iph->daddr, - ntohs(th->dest), inet_iif(skb)); + nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, + th->source, skb->nh.iph->daddr, + th->dest, inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -970,7 +973,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) static int tcp_v4_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1087,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), + skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (!sk) @@ -1101,7 +1104,7 @@ process: goto discard_and_relse; nf_reset(skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; @@ -1165,7 +1168,7 @@ do_time_wait: case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, - ntohs(th->dest), + th->dest, inet_iif(skb)); if (sk2) { inet_twsk_deschedule((struct inet_timewait_sock *)sk, @@ -1760,7 +1763,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) { - unsigned int dest, src; + __be32 dest, src; __u16 destp, srcp; int ttd = tw->tw_ttd - jiffies; diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 48f28d617ce..f0ebaf0e21c 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -31,11 +31,8 @@ * Hung Hing Lun, Mike <hlhung3i@gmail.com> * SourceForge project page: * http://tcp-lp-mod.sourceforge.net/ - * - * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ */ -#include <linux/config.h> #include <linux/module.h> #include <net/tcp.h> @@ -165,7 +162,7 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) out: /* record time for successful remote HZ calc */ - if (rhz > 0) + if ((rhz >> 6) > 0) lp->flag |= LP_VALID_RHZ; else lp->flag &= ~LP_VALID_RHZ; @@ -328,7 +325,7 @@ static struct tcp_congestion_ops tcp_lp = { static int __init tcp_lp_register(void) { - BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_lp); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624e2b2c7f5..0163d982690 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -34,8 +34,8 @@ #define SYNC_INIT 1 #endif -int sysctl_tcp_syncookies = SYNC_INIT; -int sysctl_tcp_abort_on_overflow; +int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; +int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4f3ffe1b3b..9a253faefc8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -43,24 +43,24 @@ #include <linux/smp_lock.h> /* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse = 1; +int sysctl_tcp_retrans_collapse __read_mostly = 1; /* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ -int sysctl_tcp_workaround_signed_windows = 0; +int sysctl_tcp_workaround_signed_windows __read_mostly = 0; /* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. */ -int sysctl_tcp_tso_win_divisor = 3; +int sysctl_tcp_tso_win_divisor __read_mostly = 3; -int sysctl_tcp_mtu_probing = 0; -int sysctl_tcp_base_mss = 512; +int sysctl_tcp_mtu_probing __read_mostly = 0; +int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle = 1; +int sysctl_tcp_slow_start_after_idle __read_mostly = 1; static void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) @@ -269,7 +269,7 @@ static u16 tcp_select_window(struct sock *sk) return new_win; } -static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, +static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, __u32 tstamp) { if (tp->rx_opt.tstamp_ok) { @@ -305,7 +305,7 @@ static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, * MAX_SYN_SIZE to match the new maximum number of options that you * can generate. */ -static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, +static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) { @@ -424,7 +424,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->dest = inet->dport; th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); - *(((__u16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | + *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | tcb->flags); if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { @@ -445,7 +445,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { - tcp_syn_build_options((__u32 *)(th + 1), + tcp_syn_build_options((__be32 *)(th + 1), tcp_advertise_mss(sk), (sysctl_flags & SYSCTL_FLAG_TSTAMPS), (sysctl_flags & SYSCTL_FLAG_SACK), @@ -454,7 +454,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcb->when, tp->rx_opt.ts_recent); } else { - tcp_build_and_update_options((__u32 *)(th + 1), + tcp_build_and_update_options((__be32 *)(th + 1), tp, tcb->when); TCP_ECN_send(sk, tp, skb, tcp_header_size); } @@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; - if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { + if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), nsize, 0); @@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss skb->csum = csum_block_sub(skb->csum, buff->csum, len); } else { - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); } @@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = CHECKSUM_PARTIAL; skb->truesize -= len; sk->sk_wmem_queued -= len; @@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; - buff->ip_summed = skb->ip_summed = CHECKSUM_HW; + buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; skb_split(skb, buff, len); /* Fix up tso_factor for both original and new SKB. */ @@ -1206,8 +1206,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; - if (skb->ip_summed == CHECKSUM_HW) - nskb->ip_summed = CHECKSUM_HW; + nskb->ip_summed = skb->ip_summed; len = 0; while (len < probe_size) { @@ -1231,7 +1230,7 @@ static int tcp_mtu_probe(struct sock *sk) ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_partial(skb->data, skb->len, 0); } else { __pskb_trim_head(skb, copy); @@ -1572,10 +1571,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); - if (next_skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_HW; + skb->ip_summed = next_skb->ip_summed; - if (skb->ip_summed != CHECKSUM_HW) + if (skb->ip_summed != CHECKSUM_PARTIAL) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); /* Update sequence range on original skb. */ @@ -2072,7 +2070,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->window = htons(req->rcv_wnd); TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, + tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, TCP_SKB_CB(skb)->when, req->ts_recent); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7c1bde3cd6c..fb09ade5897 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -23,14 +23,14 @@ #include <linux/module.h> #include <net/tcp.h> -int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; -int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; -int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; -int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; -int sysctl_tcp_retries1 = TCP_RETR1; -int sysctl_tcp_retries2 = TCP_RETR2; -int sysctl_tcp_orphan_retries; +int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; +int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; +int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; +int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; +int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; +int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; +int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; +int sysctl_tcp_orphan_retries __read_mostly; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 490360b5b4b..a3b7aa015a2 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 11b42a7135c..ce57bf302f6 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -9,7 +9,6 @@ * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf */ -#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/skbuff.h> @@ -213,7 +212,7 @@ static struct tcp_congestion_ops tcp_veno = { static int __init tcp_veno_register(void) { - BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_veno); return 0; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 5446312ffd2..4f42a86c77f 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = { static int __init tcp_westwood_register(void) { - BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); + BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f136cec96d9..6d6142f9c47 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -118,14 +118,33 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -/* Shared by v4/v6 udp. */ -int udp_port_rover; +static int udp_port_rover; -static int udp_v4_get_port(struct sock *sk, unsigned short snum) +static inline int udp_lport_inuse(u16 num) +{ + struct sock *sk; + struct hlist_node *node; + + sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) + if (inet_sk(sk)->num == num) + return 1; + return 0; +} + +/** + * udp_get_port - common port lookup for IPv4 and IPv6 + * + * @sk: socket struct in question + * @snum: port number to look up + * @saddr_comp: AF-dependent comparison of bound local IP addresses + */ +int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) { struct hlist_node *node; + struct hlist_head *head; struct sock *sk2; - struct inet_sock *inet = inet_sk(sk); + int error = 1; write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -137,11 +156,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct hlist_head *list; int size; - list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(list)) { + head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -149,12 +167,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) goto gotit; } size = 0; - sk_for_each(sk2, node, list) - if (++size >= best_size_so_far) - goto next; - best_size_so_far = size; - best = result; - next:; + sk_for_each(sk2, node, head) + if (++size < best_size_so_far) { + best_size_so_far = size; + best = result; + } } result = best; for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { @@ -170,38 +187,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) gotit: udp_port_rover = snum = result; } else { - sk_for_each(sk2, node, - &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet2 = inet_sk(sk2); - - if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && - (!sk2->sk_reuse || !sk->sk_reuse)) + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + + sk_for_each(sk2, node, head) + if (inet_sk(sk2)->num == snum && + sk2 != sk && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_cmp)(sk, sk2) ) goto fail; - } } - inet->num = snum; + inet_sk(sk)->num = snum; if (sk_unhashed(sk)) { - struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - - sk_add_node(sk, h); + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } - write_unlock_bh(&udp_hash_lock); - return 0; - + error = 0; fail: write_unlock_bh(&udp_hash_lock); - return 1; + return error; +} + +static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +{ + struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); + + return ( !ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr )); } +static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + + static void udp_v4_hash(struct sock *sk) { BUG(); @@ -220,8 +243,8 @@ static void udp_v4_unhash(struct sock *sk) /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, - u32 daddr, u16 dport, int dif) +static struct sock *udp_v4_lookup_longway(__be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -265,8 +288,8 @@ static struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, return result; } -static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, - u32 daddr, u16 dport, int dif) +static __inline__ struct sock *udp_v4_lookup(__be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) { struct sock *sk; @@ -279,8 +302,8 @@ static __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, } static inline struct sock *udp_v4_mcast_next(struct sock *sk, - u16 loc_port, u32 loc_addr, - u16 rmt_port, u32 rmt_addr, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, int dif) { struct hlist_node *node; @@ -429,7 +452,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) /* * Only one fragment on the socket. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { skb->csum = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, IPPROTO_UDP, 0); @@ -448,7 +471,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) * fragments on the socket so that all csums of sk_buffs * should be together. */ - if (skb->ip_summed == CHECKSUM_HW) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { int offset = (unsigned char *)uh - skb->data; skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -475,7 +498,7 @@ out: } -static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base) +static unsigned short udp_check(struct udphdr *uh, int len, __be32 saddr, __be32 daddr, unsigned long base) { return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base)); } @@ -490,8 +513,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct rtable *rt = NULL; int free = 0; int connected = 0; - u32 daddr, faddr, saddr; - u16 dport; + __be32 daddr, faddr, saddr; + __be16 dport; u8 tos; int err; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; @@ -603,6 +626,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -661,6 +685,16 @@ out: UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); + } return err; do_confirm: @@ -897,7 +931,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) int iphlen, len; __u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr); - __u32 *udpdata32 = (__u32 *)udpdata; + __be32 *udpdata32 = (__be32 *)udpdata; __u16 encap_type = up->encap_type; /* if we're overly short, let UDP handle it */ @@ -980,6 +1014,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); + int rc; /* * Charge it to the socket, dropping if the queue is full. @@ -1026,7 +1061,10 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (sock_queue_rcv_skb(sk,skb)<0) { + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return -1; @@ -1042,7 +1080,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) * so we don't need to lock the hashes. */ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, - u32 saddr, u32 daddr) + __be32 saddr, __be32 daddr) { struct sock *sk; int dif; @@ -1083,11 +1121,11 @@ static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh, * including udp header and folding it to skb->csum. */ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, u32 saddr, u32 daddr) + unsigned short ulen, __be32 saddr, __be32 daddr) { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_HW) { + } else if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -1108,8 +1146,8 @@ int udp_rcv(struct sk_buff *skb) struct udphdr *uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; - u32 saddr = skb->nh.iph->saddr; - u32 daddr = skb->nh.iph->daddr; + __be32 saddr = skb->nh.iph->saddr; + __be32 daddr = skb->nh.iph->daddr; int len = skb->len; /* @@ -1525,8 +1563,8 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) { struct inet_sock *inet = inet_sk(sp); - unsigned int dest = inet->daddr; - unsigned int src = inet->rcv_saddr; + __be32 dest = inet->daddr; + __be32 src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); __u16 srcp = ntohs(inet->sport); @@ -1581,7 +1619,7 @@ EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_port_rover); +EXPORT_SYMBOL(udp_get_port); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_poll); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 817ed84511a..8655d038364 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -23,7 +23,7 @@ int xfrm4_rcv(struct sk_buff *skb) EXPORT_SYMBOL(xfrm4_rcv); -static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { switch (nexthdr) { case IPPROTO_IPIP: @@ -55,7 +55,7 @@ drop: int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { int err; - u32 spi, seq; + __be32 spi, seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; @@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index a9e6b3dd19c..92676b7e403 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -21,9 +21,8 @@ * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_transport_output(struct sk_buff *skb) +static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x; struct iphdr *iph; int ihl; @@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb) ihl = iph->ihl * 4; skb->h.raw += ihl; - x = skb->dst->xfrm; skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); return 0; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 13cafbe56ce..e23c21d31a5 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_tunnel_output(struct sk_buff *skb) +static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct iphdr *iph, *top_iph; int flags; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d16f863cf68..04403fb01a5 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_HW) { - err = skb_checksum_help(skb, 0); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); if (err) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; @@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(skb); + err = x->mode->output(x, skb); if (err) goto error; @@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8f50eae47d0..7a7a00147e5 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return __ip_route_output_key((struct rtable**)dst, fl); } +static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +{ + struct rtable *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip4_u = { + .daddr = daddr->a4, + }, + }, + }; + + if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { + saddr->a4 = rt->rt_src; + dst_release(&rt->u.dst); + return 0; + } + return -EHOSTUNREACH; +} + static struct dst_entry * __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -33,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && xdst->u.rt.fl.fl4_tos == fl->fl4_tos && - xfrm_bundle_ok(xdst, fl, AF_INET)) { + xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { dst_clone(dst); break; } @@ -93,10 +112,11 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = xfrm[i]->id.daddr.a4; local = xfrm[i]->props.saddr.a4; tunnel = 1; @@ -135,6 +155,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->nfheader_len = 0; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -200,7 +221,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_ESP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - u32 *ehdr = (u32 *)xprth; + __be32 *ehdr = (__be32 *)xprth; fl->fl_ipsec_spi = ehdr[0]; } @@ -208,7 +229,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_AH: if (pskb_may_pull(skb, xprth + 8 - skb->data)) { - u32 *ah_hdr = (u32*)xprth; + __be32 *ah_hdr = (__be32*)xprth; fl->fl_ipsec_spi = ah_hdr[1]; } @@ -216,7 +237,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_COMP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - u16 *ipcomp_hdr = (u16 *)xprth; + __be16 *ipcomp_hdr = (__be16 *)xprth; fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } @@ -296,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, .dst_ops = &xfrm4_dst_ops, .dst_lookup = xfrm4_dst_lookup, + .get_saddr = xfrm4_get_saddr, .find_bundle = __xfrm4_find_bundle, .bundle_create = __xfrm4_bundle_create, .decode_session = _decode_session4, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 81e1751c966..3cc3df0c6ec 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -29,9 +29,9 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.daddr.a4 = fl->fl4_dst; x->sel.saddr.a4 = fl->fl4_src; x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = ~0; + x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = ~0; + x->sel.sport_mask = htons(0xffff); x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; @@ -42,99 +42,15 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; - if (tmpl->mode && x->props.saddr.a4 == 0) { - struct rtable *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip4_u = { - .daddr = x->id.daddr.a4, - } - } - }; - if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET)) { - x->props.saddr.a4 = rt->rt_src; - dst_release(&rt->u.dst); - } - } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET; } -static struct xfrm_state * -__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm4_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET && - spi == x->id.spi && - daddr->a4 == x->id.daddr.a4 && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -static struct xfrm_state * -__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm4_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET && - daddr->a4 == x->id.daddr.a4 && - mode == x->props.mode && - proto == x->id.proto && - saddr->a4 == x->props.saddr.a4 && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - x0->sel.daddr.a4 = daddr->a4; - x0->sel.saddr.a4 = saddr->a4; - x0->sel.prefixlen_d = 32; - x0->sel.prefixlen_s = 32; - x0->props.saddr.a4 = saddr->a4; - x0->km.state = XFRM_STATE_ACQ; - x0->id.daddr.a4 = daddr->a4; - x0->id.proto = proto; - x0->props.family = AF_INET; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->props.family = AF_INET; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, - .state_lookup = __xfrm4_state_lookup, - .find_acq = __xfrm4_find_acq, }; void __init xfrm4_state_init(void) diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index f8ceaa127c8..f110af5b131 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) static int ipip_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) |