diff options
59 files changed, 2406 insertions, 1099 deletions
diff --git a/include/linux/ip.h b/include/linux/ip.h index 8438c68591f..31e7cedd9f8 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -81,6 +81,7 @@ #ifdef __KERNEL__ #include <linux/config.h> #include <linux/types.h> +#include <net/request_sock.h> #include <net/sock.h> #include <linux/igmp.h> #include <net/flow.h> @@ -107,6 +108,26 @@ struct ip_options { #define optlength(opt) (sizeof(struct ip_options) + opt->optlen) +struct inet_request_sock { + struct request_sock req; + u32 loc_addr; + u32 rmt_addr; + u16 rmt_port; + u16 snd_wscale : 4, + rcv_wscale : 4, + tstamp_ok : 1, + sack_ok : 1, + wscale_ok : 1, + ecn_ok : 1, + acked : 1; + struct ip_options *opt; +}; + +static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) +{ + return (struct inet_request_sock *)sk; +} + struct ipv6_pinfo; struct inet_sock { diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ab0d0efbf24..6fcd6a0ade2 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -193,6 +193,19 @@ struct inet6_skb_parm { #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) +struct tcp6_request_sock { + struct tcp_request_sock req; + struct in6_addr loc_addr; + struct in6_addr rmt_addr; + struct sk_buff *pktopts; + int iif; +}; + +static inline struct tcp6_request_sock *tcp6_rsk(const struct request_sock *sk) +{ + return (struct tcp6_request_sock *)sk; +} + /** * struct ipv6_pinfo - ipv6 private area * diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b2738ac8bc9..e38407a23d0 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -156,7 +156,7 @@ struct netlink_notify }; static __inline__ struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len) +__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = NLMSG_LENGTH(len); @@ -164,15 +164,31 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len) nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); nlh->nlmsg_type = type; nlh->nlmsg_len = size; - nlh->nlmsg_flags = 0; + nlh->nlmsg_flags = flags; nlh->nlmsg_pid = pid; nlh->nlmsg_seq = seq; return nlh; } +#define NLMSG_NEW(skb, pid, seq, type, len, flags) \ +({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) \ + goto nlmsg_failure; \ + __nlmsg_put(skb, pid, seq, type, len, flags); }) + #define NLMSG_PUT(skb, pid, seq, type, len) \ -({ if (skb_tailroom(skb) < (int)NLMSG_SPACE(len)) goto nlmsg_failure; \ - __nlmsg_put(skb, pid, seq, type, len); }) + NLMSG_NEW(skb, pid, seq, type, len, 0) + +#define NLMSG_NEW_ANSWER(skb, cb, type, len, flags) \ + NLMSG_NEW(skb, NETLINK_CB((cb)->skb).pid, \ + (cb)->nlh->nlmsg_seq, type, len, flags) + +#define NLMSG_END(skb, nlh) \ +({ (nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \ + (skb)->len; }) + +#define NLMSG_CANCEL(skb, nlh) \ +({ skb_trim(skb, (unsigned char *) (nlh) - (skb)->data); \ + -1; }) extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 91ac97c2077..e68dbf0bf57 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -89,6 +89,13 @@ enum { RTM_GETANYCAST = 62, #define RTM_GETANYCAST RTM_GETANYCAST + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -493,6 +500,106 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + +#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ + NLMSG_ALIGN(sizeof(struct ndtmsg)))) +#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) + + /**** * General form of address family dependent message. ****/ @@ -789,6 +896,75 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi ({ if (unlikely(skb_tailroom(skb) < (int)(attrlen))) \ goto rtattr_failure; \ memcpy(skb_put(skb, RTA_ALIGN(attrlen)), data, attrlen); }) + +#define RTA_PUT_U8(skb, attrtype, value) \ +({ u8 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u8), &_tmp); }) + +#define RTA_PUT_U16(skb, attrtype, value) \ +({ u16 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u16), &_tmp); }) + +#define RTA_PUT_U32(skb, attrtype, value) \ +({ u32 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u32), &_tmp); }) + +#define RTA_PUT_U64(skb, attrtype, value) \ +({ u64 _tmp = (value); \ + RTA_PUT(skb, attrtype, sizeof(u64), &_tmp); }) + +#define RTA_PUT_SECS(skb, attrtype, value) \ + RTA_PUT_U64(skb, attrtype, (value) / HZ) + +#define RTA_PUT_MSECS(skb, attrtype, value) \ + RTA_PUT_U64(skb, attrtype, jiffies_to_msecs(value)) + +#define RTA_PUT_STRING(skb, attrtype, value) \ + RTA_PUT(skb, attrtype, strlen(value) + 1, value) + +#define RTA_PUT_FLAG(skb, attrtype) \ + RTA_PUT(skb, attrtype, 0, NULL); + +#define RTA_NEST(skb, type) \ +({ struct rtattr *__start = (struct rtattr *) (skb)->tail; \ + RTA_PUT(skb, type, 0, NULL); \ + __start; }) + +#define RTA_NEST_END(skb, start) \ +({ (start)->rta_len = ((skb)->tail - (unsigned char *) (start)); \ + (skb)->len; }) + +#define RTA_NEST_CANCEL(skb, start) \ +({ if (start) \ + skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ + -1; }) + +#define RTA_GET_U8(rta) \ +({ if (!rta || RTA_PAYLOAD(rta) < sizeof(u8)) \ + goto rtattr_failure; \ + *(u8 *) RTA_DATA(rta); }) + +#define RTA_GET_U16(rta) \ +({ if (!rta || RTA_PAYLOAD(rta) < sizeof(u16)) \ + goto rtattr_failure; \ + *(u16 *) RTA_DATA(rta); }) + +#define RTA_GET_U32(rta) \ +({ if (!rta || RTA_PAYLOAD(rta) < sizeof(u32)) \ + goto rtattr_failure; \ + *(u32 *) RTA_DATA(rta); }) + +#define RTA_GET_U64(rta) \ +({ u64 _tmp; \ + if (!rta || RTA_PAYLOAD(rta) < sizeof(u64)) \ + goto rtattr_failure; \ + memcpy(&_tmp, RTA_DATA(rta), sizeof(_tmp)); \ + _tmp; }) + +#define RTA_GET_FLAG(rta) (!!(rta)) + +#define RTA_GET_SECS(rta) ((unsigned long) RTA_GET_U64(rta) * HZ) +#define RTA_GET_MSECS(rta) (msecs_to_jiffies((unsigned long) RTA_GET_U64(rta))) static inline struct rtattr * __rta_reserve(struct sk_buff *skb, int attrtype, int attrlen) diff --git a/include/linux/slab.h b/include/linux/slab.h index 7d66385ae75..76cf7e60216 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -64,6 +64,7 @@ extern int kmem_cache_shrink(kmem_cache_t *); extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); +extern const char *kmem_cache_name(kmem_cache_t *); extern kmem_cache_t *kmem_find_general_cachep(size_t size, int gfpflags); /* Size description struct for general caches. */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 14a55e3e3a5..97a7c9e03df 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -230,6 +230,17 @@ struct tcp_options_received { __u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ }; +struct tcp_request_sock { + struct inet_request_sock req; + __u32 rcv_isn; + __u32 snt_isn; +}; + +static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) +{ + return (struct tcp_request_sock *)req; +} + struct tcp_sock { /* inet_sock has to be the first member of tcp_sock */ struct inet_sock inet; @@ -368,22 +379,7 @@ struct tcp_sock { __u32 total_retrans; /* Total retransmits for entire connection */ - /* The syn_wait_lock is necessary only to avoid proc interface having - * to grab the main lock sock while browsing the listening hash - * (otherwise it's deadlock prone). - * This lock is acquired in read mode only from listening_get_next() - * and it's acquired in write mode _only_ from code that is actively - * changing the syn_wait_queue. All readers that are holding - * the master sock lock don't need to grab this lock in read mode - * too as the syn_wait_queue writes are always protected from - * the main sock lock. - */ - rwlock_t syn_wait_lock; - struct tcp_listen_opt *listen_opt; - - /* FIFO of established children */ - struct open_request *accept_queue; - struct open_request *accept_queue_tail; + struct request_sock_queue accept_queue; /* FIFO of established children */ unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index fd2ef742a9f..d68391a9b9f 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -174,6 +174,8 @@ enum xfrm_attr_type_t { XFRMA_ALG_COMP, /* struct xfrm_algo */ XFRMA_ENCAP, /* struct xfrm_algo + struct xfrm_encap_tmpl */ XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */ + XFRMA_SA, + XFRMA_POLICY, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -257,5 +259,7 @@ struct xfrm_usersa_flush { #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 +#define XFRMGRP_SA 4 +#define XFRMGRP_POLICY 8 #endif /* _LINUX_XFRM_H */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4f33bbc21e7..89809891e5a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -65,11 +65,10 @@ struct neighbour; struct neigh_parms { + struct net_device *dev; struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; - int entries; - void *priv; void *sysctl_table; @@ -192,7 +191,6 @@ struct neigh_table atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct neigh_parms *parms_list; kmem_cache_t *kmem_cachep; struct neigh_statistics *stats; struct neighbour **hash_buckets; @@ -252,6 +250,9 @@ extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern void neigh_app_ns(struct neighbour *n); +extern int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb); +extern int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); + extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); diff --git a/include/net/request_sock.h b/include/net/request_sock.h new file mode 100644 index 00000000000..72fd6f5e86b --- /dev/null +++ b/include/net/request_sock.h @@ -0,0 +1,255 @@ +/* + * NET Generic infrastructure for Network protocols. + * + * Definitions for request_sock + * + * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * + * From code originally in include/net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _REQUEST_SOCK_H +#define _REQUEST_SOCK_H + +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <net/sock.h> + +struct request_sock; +struct sk_buff; +struct dst_entry; +struct proto; + +struct request_sock_ops { + int family; + kmem_cache_t *slab; + int obj_size; + int (*rtx_syn_ack)(struct sock *sk, + struct request_sock *req, + struct dst_entry *dst); + void (*send_ack)(struct sk_buff *skb, + struct request_sock *req); + void (*send_reset)(struct sk_buff *skb); + void (*destructor)(struct request_sock *req); +}; + +/* struct request_sock - mini sock to represent a connection request + */ +struct request_sock { + struct request_sock *dl_next; /* Must be first member! */ + u16 mss; + u8 retrans; + u8 __pad; + /* The following two fields can be easily recomputed I think -AK */ + u32 window_clamp; /* window clamp at creation time */ + u32 rcv_wnd; /* rcv_wnd offered first time */ + u32 ts_recent; + unsigned long expires; + struct request_sock_ops *rsk_ops; + struct sock *sk; +}; + +static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) +{ + struct request_sock *req = kmem_cache_alloc(ops->slab, SLAB_ATOMIC); + + if (req != NULL) + req->rsk_ops = ops; + + return req; +} + +static inline void __reqsk_free(struct request_sock *req) +{ + kmem_cache_free(req->rsk_ops->slab, req); +} + +static inline void reqsk_free(struct request_sock *req) +{ + req->rsk_ops->destructor(req); + __reqsk_free(req); +} + +extern int sysctl_max_syn_backlog; + +/** struct listen_sock - listen state + * + * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs + */ +struct listen_sock { + u8 max_qlen_log; + /* 3 bytes hole, try to use */ + int qlen; + int qlen_young; + int clock_hand; + u32 hash_rnd; + struct request_sock *syn_table[0]; +}; + +/** struct request_sock_queue - queue of request_socks + * + * @rskq_accept_head - FIFO head of established children + * @rskq_accept_tail - FIFO tail of established children + * @syn_wait_lock - serializer + * + * %syn_wait_lock is necessary only to avoid proc interface having to grab the main + * lock sock while browsing the listening hash (otherwise it's deadlock prone). + * + * This lock is acquired in read mode only from listening_get_next() seq_file + * op and it's acquired in write mode _only_ from code that is actively + * changing rskq_accept_head. All readers that are holding the master sock lock + * don't need to grab this lock in read mode too as rskq_accept_head. writes + * are always protected from the main sock lock. + */ +struct request_sock_queue { + struct request_sock *rskq_accept_head; + struct request_sock *rskq_accept_tail; + rwlock_t syn_wait_lock; + struct listen_sock *listen_opt; +}; + +extern int reqsk_queue_alloc(struct request_sock_queue *queue, + const int nr_table_entries); + +static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) +{ + struct listen_sock *lopt; + + write_lock_bh(&queue->syn_wait_lock); + lopt = queue->listen_opt; + queue->listen_opt = NULL; + write_unlock_bh(&queue->syn_wait_lock); + + return lopt; +} + +static inline void reqsk_queue_destroy(struct request_sock_queue *queue) +{ + kfree(reqsk_queue_yank_listen_sk(queue)); +} + +static inline struct request_sock * + reqsk_queue_yank_acceptq(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + queue->rskq_accept_head = queue->rskq_accept_head = NULL; + return req; +} + +static inline int reqsk_queue_empty(struct request_sock_queue *queue) +{ + return queue->rskq_accept_head == NULL; +} + +static inline void reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req, + struct request_sock **prev_req) +{ + write_lock(&queue->syn_wait_lock); + *prev_req = req->dl_next; + write_unlock(&queue->syn_wait_lock); +} + +static inline void reqsk_queue_add(struct request_sock_queue *queue, + struct request_sock *req, + struct sock *parent, + struct sock *child) +{ + req->sk = child; + sk_acceptq_added(parent); + + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_head = req; + else + queue->rskq_accept_tail->dl_next = req; + + queue->rskq_accept_tail = req; + req->dl_next = NULL; +} + +static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + BUG_TRAP(req != NULL); + + queue->rskq_accept_head = req->dl_next; + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_tail = NULL; + + return req; |