diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-04 08:26:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-04 08:26:19 -0700 |
commit | d002ec481c24f325ed6cfcb7810d317c015dd1b5 (patch) | |
tree | 71fbdce6aab6ffb5f8590e7ddde7c095a7fa31ab | |
parent | 5a96c5d0c58ead9a0ece03ffe1c116dea6dafe9c (diff) | |
parent | 0a69452cb45add0841c2bc1e75c25f6bd4f1d8d9 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6:
[XFRM]: BEET mode
[TCP]: Kill warning in tcp_clean_rtx_queue().
[NET_SCHED]: Remove old estimator implementation
[ATM]: [zatm] always *pcr in alloc_shaper()
[ATM]: [ambassador] Change the return type to reflect reality
[ATM]: kmalloc to kzalloc patches for drivers/atm
[TIPC]: fix printk warning
[XFRM]: Clearing xfrm_policy_count[] to zero during flush is incorrect.
[XFRM] STATE: Use destination address for src hash.
[NEIGH]: always use hash_mask under tbl lock
[UDP]: Fix MSG_PROBE crash
[UDP6]: Fix flowi clobbering
[NET_SCHED]: Revert "HTB: fix incorrect use of RB_EMPTY_NODE"
[NETFILTER]: ebt_mark: add or/and/xor action support to mark target
[NETFILTER]: ipt_REJECT: remove largely duplicate route_reverse function
[NETFILTER]: Honour source routing for LVS-NAT
[NETFILTER]: add type parameter to ip_route_me_harder
[NETFILTER]: Kconfig: fix xt_physdev dependencies
41 files changed, 462 insertions, 398 deletions
diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c index 6cc93de0b71..ac2c10822be 100644 --- a/drivers/atm/adummy.c +++ b/drivers/atm/adummy.c @@ -113,15 +113,13 @@ static int __init adummy_init(void) printk(KERN_ERR "adummy: version %s\n", DRV_VERSION); - adummy_dev = (struct adummy_dev *) kmalloc(sizeof(struct adummy_dev), + adummy_dev = kzalloc(sizeof(struct adummy_dev), GFP_KERNEL); if (!adummy_dev) { - printk(KERN_ERR DEV_LABEL ": kmalloc() failed\n"); + printk(KERN_ERR DEV_LABEL ": kzalloc() failed\n"); err = -ENOMEM; goto out; } - memset(adummy_dev, 0, sizeof(struct adummy_dev)); - atm_dev = atm_dev_register(DEV_LABEL, &adummy_ops, -1, NULL); if (!atm_dev) { printk(KERN_ERR DEV_LABEL ": atm_dev_register() failed\n"); diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 4521a249dd5..da599e6e9d3 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -915,8 +915,8 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id, /********** make rate (not quite as much fun as Horizon) **********/ -static unsigned int make_rate (unsigned int rate, rounding r, - u16 * bits, unsigned int * actual) { +static int make_rate (unsigned int rate, rounding r, + u16 * bits, unsigned int * actual) { unsigned char exp = -1; // hush gcc unsigned int man = -1; // hush gcc diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 38fc054bd67..5f25e5efefc 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -1784,7 +1784,7 @@ static int __devinit fs_init (struct fs_dev *dev) write_fs (dev, RAM, (1 << (28 - FS155_VPI_BITS - FS155_VCI_BITS)) - 1); dev->nchannels = FS155_NR_CHANNELS; } - dev->atm_vccs = kmalloc (dev->nchannels * sizeof (struct atm_vcc *), + dev->atm_vccs = kcalloc (dev->nchannels, sizeof (struct atm_vcc *), GFP_KERNEL); fs_dprintk (FS_DEBUG_ALLOC, "Alloc atmvccs: %p(%Zd)\n", dev->atm_vccs, dev->nchannels * sizeof (struct atm_vcc *)); @@ -1794,9 +1794,8 @@ static int __devinit fs_init (struct fs_dev *dev) /* XXX Clean up..... */ return 1; } - memset (dev->atm_vccs, 0, dev->nchannels * sizeof (struct atm_vcc *)); - dev->tx_inuse = kmalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL); + dev->tx_inuse = kzalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL); fs_dprintk (FS_DEBUG_ALLOC, "Alloc tx_inuse: %p(%d)\n", dev->atm_vccs, dev->nchannels / 8); @@ -1805,8 +1804,6 @@ static int __devinit fs_init (struct fs_dev *dev) /* XXX Clean up..... */ return 1; } - memset (dev->tx_inuse, 0, dev->nchannels / 8); - /* -- RAS1 : FS155 and 50 differ. Default (0) should be OK for both */ /* -- RAS2 : FS50 only: Default is OK. */ @@ -1893,14 +1890,11 @@ static int __devinit firestream_init_one (struct pci_dev *pci_dev, if (pci_enable_device(pci_dev)) goto err_out; - fs_dev = kmalloc (sizeof (struct fs_dev), GFP_KERNEL); + fs_dev = kzalloc (sizeof (struct fs_dev), GFP_KERNEL); fs_dprintk (FS_DEBUG_ALLOC, "Alloc fs-dev: %p(%Zd)\n", fs_dev, sizeof (struct fs_dev)); if (!fs_dev) goto err_out; - - memset (fs_dev, 0, sizeof (struct fs_dev)); - atm_dev = atm_dev_register("fs", &ops, -1, NULL); if (!atm_dev) goto err_out_free_fs_dev; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index f2511b42dba..b22a9142b24 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -383,14 +383,12 @@ he_init_one(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent) } pci_set_drvdata(pci_dev, atm_dev); - he_dev = (struct he_dev *) kmalloc(sizeof(struct he_dev), + he_dev = kzalloc(sizeof(struct he_dev), GFP_KERNEL); if (!he_dev) { err = -ENOMEM; goto init_one_failure; } - memset(he_dev, 0, sizeof(struct he_dev)); - he_dev->pci_dev = pci_dev; he_dev->atm_dev = atm_dev; he_dev->atm_dev->dev_data = he_dev; diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index d1113e845f9..209dba1c70d 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2719,7 +2719,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_ goto out_disable; } - dev = kmalloc(sizeof(hrz_dev), GFP_KERNEL); + dev = kzalloc(sizeof(hrz_dev), GFP_KERNEL); if (!dev) { // perhaps we should be nice: deregister all adapters and abort? PRINTD(DBG_ERR, "out of memory"); @@ -2727,8 +2727,6 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_ goto out_release; } - memset(dev, 0, sizeof(hrz_dev)); - pci_set_drvdata(pci_dev, dev); // grab IRQ and install handler - move this someplace more sensible diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index b0369bb20f0..7487f0ad68e 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -642,11 +642,9 @@ alloc_scq(struct idt77252_dev *card, int class) { struct scq_info *scq; - scq = (struct scq_info *) kmalloc(sizeof(struct scq_info), GFP_KERNEL); + scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL); if (!scq) return NULL; - memset(scq, 0, sizeof(struct scq_info)); - scq->base = pci_alloc_consistent(card->pcidev, SCQ_SIZE, &scq->paddr); if (scq->base == NULL) { @@ -2142,11 +2140,9 @@ idt77252_init_est(struct vc_map *vc, int pcr) { struct rate_estimator *est; - est = kmalloc(sizeof(struct rate_estimator), GFP_KERNEL); + est = kzalloc(sizeof(struct rate_estimator), GFP_KERNEL); if (!est) return NULL; - memset(est, 0, sizeof(*est)); - est->maxcps = pcr < 0 ? -pcr : pcr; est->cps = est->maxcps; est->avcps = est->cps << 5; @@ -2451,14 +2447,12 @@ idt77252_open(struct atm_vcc *vcc) index = VPCI2VC(card, vpi, vci); if (!card->vcs[index]) { - card->vcs[index] = kmalloc(sizeof(struct vc_map), GFP_KERNEL); + card->vcs[index] = kzalloc(sizeof(struct vc_map), GFP_KERNEL); if (!card->vcs[index]) { printk("%s: can't alloc vc in open()\n", card->name); up(&card->mutex); return -ENOMEM; } - memset(card->vcs[index], 0, sizeof(struct vc_map)); - card->vcs[index]->card = card; card->vcs[index]->index = index; @@ -2926,13 +2920,11 @@ open_card_oam(struct idt77252_dev *card) for (vci = 3; vci < 5; vci++) { index = VPCI2VC(card, vpi, vci); - vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL); + vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL); if (!vc) { printk("%s: can't alloc vc\n", card->name); return -ENOMEM; } - memset(vc, 0, sizeof(struct vc_map)); - vc->index = index; card->vcs[index] = vc; @@ -2995,12 +2987,11 @@ open_card_ubr0(struct idt77252_dev *card) { struct vc_map *vc; - vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL); + vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL); if (!vc) { printk("%s: can't alloc vc\n", card->name); return -ENOMEM; } - memset(vc, 0, sizeof(struct vc_map)); card->vcs[0] = vc; vc->class = SCHED_UBR0; @@ -3695,14 +3686,12 @@ idt77252_init_one(struct pci_dev *pcidev, const struct pci_device_id *id) goto err_out_disable_pdev; } - card = kmalloc(sizeof(struct idt77252_dev), GFP_KERNEL); + card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL); if (!card) { printk("idt77252-%d: can't allocate private data\n", index); err = -ENOMEM; goto err_out_disable_pdev; } - memset(card, 0, sizeof(struct idt77252_dev)); - card->revision = revision; card->index = index; card->pcidev = pcidev; diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index fe60a59b7fc..b9568e10965 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1482,16 +1482,10 @@ static inline void vcc_table_deallocate(const struct lanai_dev *lanai) static inline struct lanai_vcc *new_lanai_vcc(void) { struct lanai_vcc *lvcc; - lvcc = (struct lanai_vcc *) kmalloc(sizeof(*lvcc), GFP_KERNEL); + lvcc = kzalloc(sizeof(*lvcc), GFP_KERNEL); if (likely(lvcc != NULL)) { - lvcc->vbase = NULL; - lvcc->rx.atmvcc = lvcc->tx.atmvcc = NULL; - lvcc->nref = 0; - memset(&lvcc->stats, 0, sizeof lvcc->stats); - lvcc->rx.buf.start = lvcc->tx.buf.start = NULL; skb_queue_head_init(&lvcc->tx.backlog); #ifdef DEBUG - lvcc->tx.unqueue = NULL; lvcc->vci = -1; #endif } diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 2c65e82f0d6..083c5d3f2e1 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -603,9 +603,8 @@ static int start_rx(struct atm_dev *dev) DPRINTK("start_rx\n"); zatm_dev = ZATM_DEV(dev); size = sizeof(struct atm_vcc *)*zatm_dev->chans; - zatm_dev->rx_map = (struct atm_vcc **) kmalloc(size,GFP_KERNEL); + zatm_dev->rx_map = kzalloc(size,GFP_KERNEL); if (!zatm_dev->rx_map) return -ENOMEM; - memset(zatm_dev->rx_map,0,size); /* set VPI/VCI split (use all VCIs and give what's left to VPIs) */ zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR); /* prepare free buffer pools */ @@ -801,6 +800,7 @@ static int alloc_shaper(struct atm_dev *dev,int *pcr,int min,int max,int ubr) i = m = 1; zatm_dev->ubr_ref_cnt++; zatm_dev->ubr = shaper; + *pcr = 0; } else { if (min) { @@ -951,9 +951,8 @@ static int open_tx_first(struct atm_vcc *vcc) skb_queue_head_init(&zatm_vcc->tx_queue); init_waitqueue_head(&zatm_vcc->tx_wait); /* initialize ring */ - zatm_vcc->ring = kmalloc(RING_SIZE,GFP_KERNEL); + zatm_vcc->ring = kzalloc(RING_SIZE,GFP_KERNEL); if (!zatm_vcc->ring) return -ENOMEM; - memset(zatm_vcc->ring,0,RING_SIZE); loop = zatm_vcc->ring+RING_ENTRIES*RING_WORDS; loop[0] = uPD98401_TXPD_V; loop[1] = loop[2] = 0; diff --git a/include/linux/in.h b/include/linux/in.h index d79fc75fa7c..2619859f6e1 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -40,6 +40,7 @@ enum { IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */ IPPROTO_AH = 51, /* Authentication Header protocol */ + IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */ IPPROTO_PIM = 103, /* Protocol Independent Multicast */ IPPROTO_COMP = 108, /* Compression Header protocol */ diff --git a/include/linux/ip.h b/include/linux/ip.h index 6b25d36fc54..ecee9bb27d0 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -80,6 +80,8 @@ #define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ #define IPOPT_TS_PRESPEC 3 /* specified modules only */ +#define IPV4_BEET_PHMAXLEN 8 + struct iphdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 ihl:4, @@ -123,4 +125,11 @@ struct ip_comp_hdr { __be16 cpi; }; +struct ip_beet_phdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 padlen; + __u8 reserved; +}; + #endif /* _LINUX_IP_H */ diff --git a/include/linux/ipsec.h b/include/linux/ipsec.h index d3c527616b5..d17a6302a0e 100644 --- a/include/linux/ipsec.h +++ b/include/linux/ipsec.h @@ -12,7 +12,8 @@ enum { IPSEC_MODE_ANY = 0, /* We do not support this for SA */ IPSEC_MODE_TRANSPORT = 1, - IPSEC_MODE_TUNNEL = 2 + IPSEC_MODE_TUNNEL = 2, + IPSEC_MODE_BEET = 3 }; enum { diff --git a/include/linux/netfilter_bridge/ebt_mark_t.h b/include/linux/netfilter_bridge/ebt_mark_t.h index 110fec6a40a..6270f6f3369 100644 --- a/include/linux/netfilter_bridge/ebt_mark_t.h +++ b/include/linux/netfilter_bridge/ebt_mark_t.h @@ -1,6 +1,18 @@ #ifndef __LINUX_BRIDGE_EBT_MARK_T_H #define __LINUX_BRIDGE_EBT_MARK_T_H +/* The target member is reused for adding new actions, the + * value of the real target is -1 to -NUM_STANDARD_TARGETS. + * For backward compatibility, the 4 lsb (2 would be enough, + * but let's play it safe) are kept to designate this target. + * The remaining bits designate the action. By making the set + * action 0xfffffff0, the result will look ok for older + * versions. [September 2006] */ +#define MARK_SET_VALUE (0xfffffff0) +#define MARK_OR_VALUE (0xffffffe0) +#define MARK_AND_VALUE (0xffffffd0) +#define MARK_XOR_VALUE (0xffffffc0) + struct ebt_mark_t_info { unsigned long mark; diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index ce02c984f3b..5b63a231a76 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -77,7 +77,7 @@ enum nf_ip_hook_priorities { #define SO_ORIGINAL_DST 80 #ifdef __KERNEL__ -extern int ip_route_me_harder(struct sk_buff **pskb); +extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type); extern int ip_xfrm_me_harder(struct sk_buff **pskb); extern unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 430afd05826..8ae7f744917 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -129,7 +129,8 @@ enum #define XFRM_MODE_TUNNEL 1 #define XFRM_MODE_ROUTEOPTIMIZATION 2 #define XFRM_MODE_IN_TRIGGER 3 -#define XFRM_MODE_MAX 4 +#define XFRM_MODE_BEET 4 +#define XFRM_MODE_MAX 5 /* Netlink configuration messages. */ enum { diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 770c0df972a..b54306a934e 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -22,24 +22,37 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, const void *data, unsigned int datalen) { struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + int action = info->target & -16; - if ((*pskb)->nfmark != info->mark) + if (action == MARK_SET_VALUE) (*pskb)->nfmark = info->mark; + else if (action == MARK_OR_VALUE) + (*pskb)->nfmark |= info->mark; + else if (action == MARK_AND_VALUE) + (*pskb)->nfmark &= info->mark; + else + (*pskb)->nfmark ^= info->mark; - return info->target; + return info->target | -16; } static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, const struct ebt_entry *e, void *data, unsigned int datalen) { struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + int tmp; if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) return -EINVAL; - if (BASE_CHAIN && info->target == EBT_RETURN) + tmp = info->target | -16; + if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; CLEAR_BASE_CHAIN_BIT; - if (INVALID_TARGET) + if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) + return -EINVAL; + tmp = info->target & -16; + if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && + tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) return -EINVAL; return 0; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8ce8c471d86..b4b478353b2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -344,12 +344,12 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, { struct neighbour *n; int key_len = tbl->key_len; - u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + u32 hash_val = tbl->hash(pkey, dev); NEIGH_CACHE_STAT_INC(tbl, lookups); read_lock_bh(&tbl->lock); - for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); @@ -364,12 +364,12 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) { struct neighbour *n; int key_len = tbl->key_len; - u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask; + u32 hash_val = tbl->hash(pkey, NULL); NEIGH_CACHE_STAT_INC(tbl, lookups); read_lock_bh(&tbl->lock); - for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { if (!memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); @@ -1998,12 +1998,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; + read_lock_bh(&tbl->lock); for (h = 0; h <= tbl->hash_mask; h++) { if (h < s_h) continue; if (h > s_h) s_idx = 0; - read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { if (idx < s_idx) continue; @@ -2016,8 +2016,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, goto out; } } - read_unlock_bh(&tbl->lock); } + read_unlock_bh(&tbl->lock); rc = skb->len; out: cb->args[1] = h; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index d172a980444..5572071af73 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL If unsure, say Y. +config INET_XFRM_MODE_BEET + tristate "IP: IPsec BEET mode" + default y + select XFRM + ---help--- + Support for IPsec BEET mode. + + If unsure, say Y. + config INET_DIAG tristate "INET: socket monitoring interface" default y diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f66049e28ae..15645c51520 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o +obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 13b29360d10..b5c205b5766 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (x->props.mode == XFRM_MODE_TRANSPORT || + x->props.mode == XFRM_MODE_BEET) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - - if (x->props.mode == XFRM_MODE_TUNNEL) { - mtu = ALIGN(mtu + 2, blksize); - } else { - /* The worst case. */ + int enclen = 0; + + switch (x->props.mode) { + case XFRM_MODE_TUNNEL: + mtu = ALIGN(mtu +2, blksize); + break; + default: + case XFRM_MODE_TRANSPORT: + /* The worst case */ mtu = ALIGN(mtu + 2, 4) + blksize - 4; + break; + case XFRM_MODE_BEET: + /* The worst case. */ + enclen = IPV4_BEET_PHMAXLEN; + mtu = ALIGN(mtu + enclen + 2, blksize); + break; } + if (esp->conf.padlen) mtu = ALIGN(mtu, esp->conf.padlen); - return mtu + x->props.header_len + esp->auth.icv_trunc_len; + return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen; } static void esp4_err(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 2017d36024d..3839b706142 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t; + u8 mode = XFRM_MODE_TUNNEL; t = xfrm_state_alloc(); if (t == NULL) @@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = XFRM_MODE_TUNNEL; + if (x->props.mode == XFRM_MODE_BEET) + mode = x->props.mode; + t->props.mode = mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 6dee03935f7..1445bb47fea 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, skb->nh.iph->saddr = cp->vaddr; ip_send_check(skb->nh.iph); + /* For policy routing, packets originating from this + * machine itself may be routed differently to packets + * passing through. We want this packet to be routed as + * if it came from this machine itself. So re-compute + * the routing information. + */ + if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) + goto drop; + skb = *pskb; + IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 5ac15379a0c..e2005c6810a 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -8,7 +8,7 @@ #include <net/ip.h> /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff **pskb) +int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) { struct iphdr *iph = (*pskb)->nh.iph; struct rtable *rt; @@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb) struct dst_entry *odst; unsigned int hh_len; + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(iph->saddr); + /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. */ - if (inet_addr_type(iph->saddr) == RTN_LOCAL) { + if (addr_type == RTN_LOCAL) { fl.nl_u.ip4_u.daddr = iph->daddr; fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); @@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(pskb); + return ip_route_me_harder(pskb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 021395b6746..d85d2de5044 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum, ct->tuplehash[!dir].tuple.src.u.all #endif ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + if (ip_route_me_harder(pskb, RTN_UNSPEC)) + ret = NF_DROP; } return ret; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index fd0c05efed8..ad0312d0e4f 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module"); #define DEBUGP(format, args...) #endif -static inline struct rtable *route_reverse(struct sk_buff *skb, - struct tcphdr *tcph, int hook) -{ - struct iphdr *iph = skb->nh.iph; - struct dst_entry *odst; - struct flowi fl = {}; - struct rtable *rt; - - /* We don't require ip forwarding to be enabled to be able to - * send a RST reply for bridged traffic. */ - if (hook != NF_IP_FORWARD -#ifdef CONFIG_BRIDGE_NETFILTER - || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED) -#endif - ) { - fl.nl_u.ip4_u.daddr = iph->saddr; - if (hook == NF_IP_LOCAL_IN) - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->daddr; - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - - odst = skb->dst; - if (ip_route_input(skb, iph->saddr, iph->daddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return NULL; - } - dst_release(&rt->u.dst); - rt = (struct rtable *)skb->dst; - skb->dst = odst; - - fl.nl_u.ip4_u.daddr = iph->saddr; - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - } - - if (rt->u.dst.error) { - dst_release(&rt->u.dst); - return NULL; - } - - fl.proto = IPPROTO_TCP; - fl.fl_ip_sport = tcph->dest; - fl.fl_ip_dport = tcph->source; - security_skb_classify_flow(skb, &fl); - - xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); - - return rt; -} - /* Send RST reply */ static void send_reset(struct sk_buff *oldskb, int hook) { struct sk_buff *nskb; struct iphdr *iph = oldskb->nh.iph; struct tcphdr _otcph, *oth, *tcph; - struct rtable *rt; __be16 tmp_port; __be32 tmp_addr; int needs_ack; - int hh_len; + unsigned int addr_type; /* IP header checks: fragment. */ if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) @@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP)) return; - if ((rt = route_reverse(oldskb, oth, hook)) == NULL) - return; - - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - /* We need a linear, writeable skb. We also need to expand headroom in case hh_len of incoming interface < hh_len of outgoing interface */ - nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb), + nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb), GFP_ATOMIC); - if (!nskb) { - dst_release(&rt->u.dst); + if (!nskb) return; - } - - dst_release(nskb->dst); - nskb->dst = &rt->u.dst; /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); @@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->window = 0; tcph->urg_ptr = 0; + /* Set DF, id = 0 */ + nskb->nh.iph->frag_off = htons(IP_DF); + nskb->nh.iph->id = 0; + + addr_type = RTN_UNSPEC; + if (hook != NF_IP_FORWARD +#ifdef CONFIG_BRIDGE_NETFILTER + || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED) +#endif + ) + addr_type = RTN_LOCAL; + + if (ip_route_me_harder(&nskb, addr_type)) + goto free_nskb; + /* Adjust TCP checksum */ nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; @@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook) nskb->nh.iph->daddr, csum_partial((char *)tcph, sizeof(struct tcphdr), 0)); - - /* Adjust IP TTL, DF */ + /* Adjust IP TTL */ nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); - /* Set DF, id = 0 */ - nskb->nh.iph->frag_off = htons(IP_DF); - nskb->nh.iph->id = 0; /* Adjust IP checksum */ nskb->nh.iph->check = 0; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index e62ea2bb9c0..b91f3582359 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook, || (*pskb)->nfmark != nfmark #endif || (*pskb)->nh.iph->tos != tos)) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + if (ip_route_me_harder(pskb, RTN_UNSPEC)) + ret = NF_DROP; return ret; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f884cea14f..cf06accbe68 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) u32 pkts_acked = 0; void (*rtt_sample)(struct sock *sk, u32 usrtt) = icsk->icsk_ca_ops->rtt_sample; - struct timeval tv; + struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6d6142f9c47..865d75214a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -675,6 +675,8 @@ do_append_data: udp_flush_pending_frames(sk); else if (!corkreq) err = udp_push_pending_frames(sk, up); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; release_sock(sk); out: diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c new file mode 100644 index 00000000000..89cf59ea7bb --- /dev/null +++ b/net/ipv4/xfrm4_mode_beet.c @@ -0,0 +1,139 @@ +/* + * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4. + * + * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> + * Miika Komu <miika@iki.fi> + * Herbert Xu <herbert@gondor.apana.org.au> + * Abhinav Pathak <abhinav.pathak@hiit.fi> + * Jeff Ahrenholz <ahrenholz@gmail.com> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dst.h> +#include <net/ip.h> +#include <net/xfrm.h> + +/* Add encapsulation header. + * + * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. + * The following fields in it shall be filled in by x->type->output: + * tot_len + * check + * + * On exit, skb->h will be set to the start of the payload to be processed + * by x->type->output and skb->nh will be set to the top IP header. + */ +static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph, *top_iph = NULL; + int hdrlen, optlen; + + iph = skb->nh.iph; + skb->h.ipiph = iph; + + hdrlen = 0; + optlen = iph->ihl * 4 - sizeof(*iph); + if (unlikely(optlen)) + hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); + + skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen); + top_iph = skb->nh.iph; + hdrlen = iph->ihl * 4 - optlen; + skb->h.raw += hdrlen; + + memmove(top_iph, iph, hdrlen); + if (unlikely(optlen)) { + struct ip_beet_phdr *ph; + + BUG_ON(optlen < 0); + + ph = (struct ip_beet_phdr *)skb->h.raw; + ph->padlen = 4 - (optlen & 4); + ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8; + ph->nexthdr = top_iph->protocol; + + top_iph->protocol = IPPROTO_BEETPH; + top_iph->ihl = sizeof(struct iphdr) / 4; + } + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + + return 0; +} + +static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct iphdr *iph = skb->nh.iph; + int phlen = 0; + int optlen = 0; + __u8 ph_nexthdr = 0, protocol = 0; + int err = -EINVAL; + + protocol = iph->protocol; + + if (unlikely(iph->protocol == IPPROTO_BEETPH)) { + struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1); + + if (!pskb_may_pull(skb, sizeof(*ph))) + goto out; + + phlen = ph->hdrlen * 8; + optlen = phlen - ph->padlen - sizeof(*ph); + if (optlen < 0 || optlen & 3 || optlen > 250) + goto out; + + if (!pskb_may_pull(skb, phlen)) + goto out; + + ph_nexthdr = ph->nexthdr; + } + + skb_push(skb, sizeof(*iph) - phlen + optlen); + memmove(skb->data, skb->nh.raw, sizeof(*iph)); + skb->nh.raw = skb->data; + + iph = skb->nh.iph; + iph->ihl = (sizeof(*iph) + optlen) / 4; + iph->tot_len = htons(skb->len); + iph->daddr = x->sel.daddr.a4; + iph->saddr = x->sel.saddr.a4; + if (ph_nexthdr) + iph->protocol = ph_nexthdr; + else + iph->protocol = protocol; + iph->check = 0; + iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); + err = 0; +out: + return err; +} + +static struct xfrm_mode xfrm4_beet_mode = { + .input = xfrm4_beet_input, + .output = xfrm4_beet_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_BEET, +}; + +static int __init xfrm4_beet_init(void) +{ + return xfrm_register_mode(&xfrm4_beet_mode, AF_INET); +} + +static void __exit xfrm4_beet_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET); + BUG_ON(err); +} + +module_init(xfrm4_beet_init); +module_exit(xfrm4_beet_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index a2d211da2ab..a460e8132b4 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -136,6 +136,16 @@ config INET6_XFRM_MODE_TUNNEL If unsure, say Y. +config INET6_XFRM_MODE_BEET + tristate "IPv6: IPsec BEET mode" + depends on IPV6 + default IPV6 + select XFRM + ---help--- + Support for IPsec BEET mode. + + If unsure, say Y. + config INET6_XFRM_MODE_ROUTEOPTIMIZATION tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" depends on IPV6 && EXPERIMENTAL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 0213c6612b5..87274e47fe3 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o +obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index a2860e35efd..71f59f18ede 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -199,6 +199,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; + u8 mode = XFRM_MODE_TUNNEL; t = xfrm_state_alloc(); if (!t) @@ -212,7 +213,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - t->props.mode = XFRM_MODE_TUNNEL; + if (x->props.mode == XFRM_MODE_BEET) + mode = x->props.mode; + t->props.mode = mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9662561701d..e0c3934a7e4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct in6_addr *daddr, *final_p = NULL, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi *fl = &inet->cork.fl; + struct flowi fl; struct dst_entry *dst; int addr_len = msg->msg_namelen; int ulen = len; @@ -626,19 +626,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(fl, 0, sizeof(*fl)); + memset(&fl, 0, sizeof(fl)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl->fl_ip_dport = sin6->sin6_port; + fl.fl_ip_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel); + fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -656,32 +656,32 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl->oif = sin6->sin6_scope_id; + fl.oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl->fl_ip_dport = inet->dport; + fl.fl_ip_dport = inet->dport; daddr = &np->daddr; - fl->fl6_flowlabel = np->flow_label; + fl.fl6_flowlabel = np->flow_label; connected = 1; } - if (!fl->oif) - fl->oif = sk->sk_bound_dev_if; + if (!fl.oif) + fl.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel); + if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -695,39 +695,39 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl->proto = IPPROTO_UDP; - ipv6_addr_copy(&fl->fl6_dst, daddr); - if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl->fl6_src, &np->saddr); - fl->fl_ip_sport = inet->sport; + fl.proto = IPPROTO_UDP; + ipv6_addr_copy(&fl.fl6_dst, daddr); + if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl.fl_ip_sport = inet->sport; /* merge ip6_build_xmit from ip6_output */ if (opt && opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt; - ipv6_addr_copy(&final, &fl->fl6_dst); - ipv6_addr_copy(&fl->fl6_dst, rt0->addr); + ipv6_addr_copy(&final, &fl.fl6_dst); + ipv6_addr_copy(&fl.fl6_dst, rt0->addr); final_p = &final; connected = 0; } - if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { - fl->oif = np->mcast_oif; + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { + fl.oif = np->mcast_oif; connected = 0; } - security_sk_classify_flow(sk, fl); + security_sk_classify_flow(sk, &fl); - err = ip6_sk_dst_lookup(sk, &dst, fl); + err = ip6_sk_dst_lookup(sk, &dst, &fl); if (err) goto out; if (final_p) - ipv6_addr_copy(&fl->fl6_dst, final_p); + ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) + if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out; if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl->fl6_dst)) + if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -763,21 +763,23 @@ back_from_confirm: do_append_data: up->len += ulen; err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, fl, + sizeof(struct udphdr), hlimit, tclass, opt, &fl, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? &np->saddr : #endif NULL); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c new file mode 100644 index 00000000000..edcfffa9e87 --- /dev/null +++ b/net/ipv6/xfrm6_mode_beet.c @@ -0,0 +1,107 @@ +/* + * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6. + * + * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com> + * Miika Komu <miika@iki.fi> + * Herbert Xu <herbert@gondor.apana.org.au> + * Abhinav Pathak <abhinav.pathak@hiit.fi> + * Jeff Ahrenholz <ahrenholz@gmail.com> + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/stringify.h> +#include <net/dsfield.h> +#include <net/dst.h> +#include <net/inet_ecn.h> +#include <net/ipv6.h> +#include <net/xfrm.h> + +/* Add encapsulation header. + * + * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt. + * The following fields in it shall be filled in by x->type->output: + * payload_len + * + * On exit, skb->h will be set to the start of the encapsulation header to be + * filled in by x->type->output and skb->nh will be set to the nextheader field + * of the extension header directly preceding the encapsulation header, or in + * its absence, that of the top IP header. The value of skb->data will always + * point to the top IP header. + */ +static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph, *top_iph; + u8 *prevhdr; + int hdr_len; + + skb_push(skb, x->props.header_len); + iph = skb->nh.ipv6h; + + hdr_len = ip6_find_1stfragopt(skb, &prevhdr); + skb->nh.raw = prevhdr - x->props.header_len; + skb->h.raw = skb->data + hdr_len; + memmove(skb->data, iph, hdr_len); + + skb->nh.raw = skb->data; + top_iph = skb->nh.ipv6h; + skb->nh.raw = &top_iph->nexthdr; + skb->h.ipv6h = top_iph + 1; + + ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); + ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); + + return 0; +} + +static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *ip6h; + int size = sizeof(struct ipv6hdr); + int err = -EINVAL; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + + skb_push(skb, size); + memmove(skb->data, skb->nh.raw, size); + skb->nh.raw = skb->data; + + skb->mac.raw = memmove(skb->data - skb->mac_len, + skb->mac.raw, skb->mac_len); + + ip6h = skb->nh.ipv6h; + ip6h->payload_len = htons(skb->len - size); + ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6); + ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6); + err = 0; +out: + return err; +} + +static struct xfrm_mode xfrm6_beet_mode = { + .input = xfrm6_beet_input, + .output = xfrm6_beet_output, + .owner = THIS_MODULE, + .encap = XFRM_MODE_BEET, +}; + +static int __init xfrm6_beet_init(void) +{ + return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6); +} + +static void __exit xfrm6_beet_exit(void) +{ + int err; + + err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6); + BUG_ON(err); +} + +module_init(xfrm6_beet_init); +module_exit(xfrm6_beet_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0a28d2c5c44..ce94732b8e2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -365,7 +365,7 @@ config NETFILTER_XT_MATCH_MULTIPORT config NETFILTER_XT_MATCH_PHYSDEV tristate '"physdev" match support' - depends on NETFILTER_XTABLES && BRIDGE_NETFILTER + depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER help Physdev packet matching matches against the physical bridge ports the IP packet arrived on or will leave by. diff --git a/net/sched/estimator.c b/net/sched/estimator.c deleted file mode 100644 index 0ebc98e9be2..00000000000 --- a/net/sched/estimator.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * net/sched/estimator.c Simple rate estimator. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/jiffies.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/init.h> -#include <net/sock.h> -#include <net/pkt_sched.h> - -/* - This code is NOT intended to be used for statistics collection, - its purpose is to provide a base for statistical multiplexing - for controlled load service. - If you need only statistics, run a user level daemon which - periodically reads byte counters. - - Unfortunately, rate estimation is not a very easy task. - F.e. I did not find a simple way to estimate the current peak rate - and even failed to formulate the problem 8)8) - - So I preferred not to built an estimator into the scheduler, - but run this task separately. - Ideally, it should be kernel thread(s), but for now it runs - from timers, which puts apparent top bounds on the number of rated - flows, has minimal overhead on small, but is enough - to handle controlled load service, sets of aggregates. - - We measure rate over A=(1<<interval) seconds and evaluate EWMA: - - avrate = avrate*(1-W) + rate*W - - where W is chosen as negative power of 2: W = 2^(-ewma_log) - - The resulting time constant is: - - T = A/(-ln(1-W)) - - - NOTES. - - * The stored value for avbps is scaled by 2^5, so that maximal - rate is ~1Gbit, avpps is scaled by 2^10. - - * Minimal interval is HZ/4=250msec (it is the greatest common divisor - for HZ=100 and HZ=1024 8)), maximal interval - is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals - are too expensive, longer ones can be implemented - at user level painlessly. - */ - -#define EST_MAX_INTERVAL 5 - -struct qdisc_estimator -{ - struct qdisc_estimator *next; - struct tc_stats *stats; - spinlock_t *stats_lock; - unsigned interval; - int ewma_log; - u64 last_bytes; - u32 last_packets; - u32 avpps; - u32 avbps; -}; - -struct qdisc_estimator_head -{ - struct timer_list timer; - struct qdisc_estimator *list; -}; - -static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1]; - -/* Estimator array lock */ -static DEFINE_RWLOCK(est_lock); - -static void est_timer(unsigned long arg) -{ - int idx = (int)arg; - struct qdisc_estimator *e; - - read_lock(&est_lock); - for (e = elist[idx].list; e; e = e->next) { - struct tc_stats *st = e->stats; - u64 nbytes; - u32 npackets; - u32 rate; - - spin_lock(e->stats_lock); - nbytes = st->bytes; - npackets = st->packets; - rate = (nbytes - e->last_bytes)<<(7 - idx); - e->last_bytes = nbytes; - e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; - st->bps = (e->avbps+0xF)>>5; - - rate = (npackets - e->last_packets)<<(12 - idx); - e->last_packets = npackets; - e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; - e->stats->pps = (e->avpps+0x1FF)>>10; - spin_unlock(e->stats_lock); - } - - mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); - read_unlock(&est_lock); -} - -int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt) -{ - struct qdisc_estimator *est; - struct tc_estimator *parm = RTA_DATA(opt); - - if (RTA_PAYLOAD(opt) < sizeof(*parm)) - return -EINVAL; - - if (parm->interval < -2 || parm->interval > 3) - return -EINVAL; - - est = kzalloc(sizeof(*est), GFP_KERNEL); - if (est == NULL) - return -ENOBUFS; - - est->interval = parm->interval + 2; - est->stats = stats; - est->stats_lock = stats_lock; - est->ewma_log = parm->ewma_log; - est->last_bytes = stats->bytes; - est->avbps = stats->bps<<5; - est->last_packets = stats->packets; - est->avpps = stats->pps<<10; - - est->next = elist[est->interval].list; - if (est->next == NULL) { - init_timer(&elist[est->interval].timer); - elist[est->interval].timer.data = est->interval; - elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); - elist[est->interval].timer.function = est_timer; - add_timer(&elist[est->interval].timer); - } - write_lock_bh(&est_lock); - elist[est->interval].list = est; - write_unlock_bh(&est_lock); - return 0; -} - -void qdisc_kill_estimator(struct tc_stats *stats) -{ - int idx; - struct qdisc_estimator *est, **pest; - - for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - int killed = 0; - pest = &elist[idx].list; - while ((est=*pest) != NULL) { - if (est->stats != stats) { - pest = &est->next; - continue; - } - - write_lock_bh(&est_lock); - *pest = est->next; - write_unlock_bh(&est_lock); - - kfree(est); - killed++; - } - if (killed && elist[idx].list == NULL) - del_timer(&elist[idx].timer); - } -} - -EXPORT_SYMBOL(qdisc_kill_estimator); -EXPORT_SYMBOL(qdisc_new_estimator); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 6c058e3660c..bb3ddd4784b 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -391,7 +391,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q, /* If this triggers, it is a bug in this code, but it need not be fatal */ static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) { - if (!RB_EMPTY_NODE(rb)) { + if (RB_EMPTY_NODE(rb)) { WARN_ON(1); } else { rb_erase(rb, root); diff --git a/net/tipc/link.c b/net/tipc/link.c index 693f02eca6d..53bc8cb5adb 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1666,8 +1666,9 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) char addr_string[16]; tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg)); - tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle); - + tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n", + (unsigned long) TIPC_SKB_CB(buf)->handle); + n_ptr = l_ptr->owner->next; tipc_node_lock(n_ptr); diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 6ac4e4f033a..d401dc8f05e 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -41,17 +41,18 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t return (h ^ (h >> 16)) & hmask; } -static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, +static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, unsigned short family, unsigned int hmask) { unsigned int h = family; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(saddr); + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(saddr); + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; }; return (h ^ (h >> 16)) & hmask; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b6e2e79d726..2a7861661f1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -778,8 +778,9 @@ void xfrm_policy_flush(u8 type) for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy *pol; struct hlist_node *entry; - int i; + int i, killed; + killed = 0; again1: hlist_for_each_entry(pol, entry, &xfrm_policy_inexact[dir], bydst) { @@ -790,6 +791,7 @@ void xfrm_policy_flush(u8 type) write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(pol); + killed++; write_lock_bh(&xfrm_policy_lock); goto again1; @@ -807,13 +809,14 @@ void xfrm_policy_flush(u8 type) write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(pol); + killed++; write_lock_bh(&xfrm_policy_lock); goto again2; } } - xfrm_policy_count[dir] = 0; + xfrm_policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f927b7330f0..39b8bf3a9de 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -63,10 +63,11 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } -static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, +static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, unsigned short family) { - return __xfrm_src_hash(addr, family, xfrm_state_hmask); + return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask); } static inline unsigned int @@ -92,7 +93,8 @@ static void xfrm_hash_transfer(struct hlist_head *list, nhashmask); hlist_add_head(&x->bydst, ndsttable+h); - h = __xfrm_src_hash(&x->props.saddr, x->props.family, + h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, + x->props.family, nhashmask); hlist_add_head(&x->bysrc, nsrctable+h); @@ -458,7 +460,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(saddr, family); + unsigned int h = xfrm_src_hash(daddr, saddr, family); struct xfrm_state *x; struct hlist_node *entry; @@ -587,7 +589,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(saddr, family); + h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); @@ -622,7 +624,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(&x->props.saddr, x->props.family); + h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); if (x->id.spi) { @@ -748,7 +750,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(saddr, family); + h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c59a78d2923..d54b3a70d5d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -211,6 +211,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case XFRM_MODE_TRANSPORT: case XFRM_MODE_TUNNEL: case XFRM_MODE_ROUTEOPTIMIZATION: + case XFRM_MODE_BEET: break; default: |