diff options
author | David S. Miller <davem@davemloft.net> | 2008-09-09 19:51:04 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-09-09 19:51:04 -0700 |
commit | dacc62dbf56e872ad96edde0393b9deb56d80cd5 (patch) | |
tree | 3d1b3e25aba9c5324bb0f6289033f502fa6ccb8c | |
parent | 47abf28d5b36521558a848a346064a3a3c82bd9e (diff) | |
parent | c051a0a2c9e283c1123ed3ce65e66e41d2ce5e24 (diff) |
Merge branch 'lvs-next-2.6' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6
-rw-r--r-- | include/net/ip_vs.h | 308 | ||||
-rw-r--r-- | net/ipv4/ipvs/Kconfig | 11 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_conn.c | 249 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_core.c | 806 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_ctl.c | 523 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_dh.c | 5 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_est.c | 40 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_ftp.c | 61 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lblc.c | 7 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lblcr.c | 11 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lc.c | 11 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_nq.c | 15 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto.c | 65 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto_ah_esp.c | 100 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto_tcp.c | 253 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto_udp.c | 226 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_rr.c | 13 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sed.c | 15 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sh.c | 5 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sync.c | 40 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_wlc.c | 15 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_wrr.c | 15 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_xmit.c | 471 |
23 files changed, 2469 insertions, 796 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a25ad243031..33e2ac6ceb3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -21,11 +21,103 @@ #include <linux/timer.h> #include <net/checksum.h> +#include <linux/netfilter.h> /* for union nf_inet_addr */ +#include <linux/ipv6.h> /* for struct ipv6hdr */ +#include <net/ipv6.h> /* for ipv6_addr_copy */ + +struct ip_vs_iphdr { + int len; + __u8 protocol; + union nf_inet_addr saddr; + union nf_inet_addr daddr; +}; + +static inline void +ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + const struct ipv6hdr *iph = nh; + iphdr->len = sizeof(struct ipv6hdr); + iphdr->protocol = iph->nexthdr; + ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr); + ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr); + } else +#endif + { + const struct iphdr *iph = nh; + iphdr->len = iph->ihl * 4; + iphdr->protocol = iph->protocol; + iphdr->saddr.ip = iph->saddr; + iphdr->daddr.ip = iph->daddr; + } +} + +static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst, + const union nf_inet_addr *src) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ipv6_addr_copy(&dst->in6, &src->in6); + else +#endif + dst->ip = src->ip; +} + +static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a, + const union nf_inet_addr *b) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return ipv6_addr_equal(&a->in6, &b->in6); +#endif + return a->ip == b->ip; +} #ifdef CONFIG_IP_VS_DEBUG #include <linux/net.h> extern int ip_vs_get_debug_level(void); + +static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, + const union nf_inet_addr *addr, + int *idx) +{ + int len; +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]", + NIP6(addr->in6)) + 1; + else +#endif + len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, + NIPQUAD(addr->ip)) + 1; + + *idx += len; + BUG_ON(*idx > buf_len + 1); + return &buf[*idx - len]; +} + +#define IP_VS_DBG_BUF(level, msg...) \ + do { \ + char ip_vs_dbg_buf[160]; \ + int ip_vs_dbg_idx = 0; \ + if (level <= ip_vs_get_debug_level()) \ + printk(KERN_DEBUG "IPVS: " msg); \ + } while (0) +#define IP_VS_ERR_BUF(msg...) \ + do { \ + char ip_vs_dbg_buf[160]; \ + int ip_vs_dbg_idx = 0; \ + printk(KERN_ERR "IPVS: " msg); \ + } while (0) + +/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */ +#define IP_VS_DBG_ADDR(af, addr) \ + ip_vs_dbg_addr(af, ip_vs_dbg_buf, \ + sizeof(ip_vs_dbg_buf), addr, \ + &ip_vs_dbg_idx) + #define IP_VS_DBG(level, msg...) \ do { \ if (level <= ip_vs_get_debug_level()) \ @@ -48,6 +140,8 @@ extern int ip_vs_get_debug_level(void); pp->debug_packet(pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ +#define IP_VS_DBG_BUF(level, msg...) do {} while (0) +#define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) #define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) @@ -160,27 +254,10 @@ struct ip_vs_estimator { struct ip_vs_stats { - __u32 conns; /* connections scheduled */ - __u32 inpkts; /* incoming packets */ - __u32 outpkts; /* outgoing packets */ - __u64 inbytes; /* incoming bytes */ - __u64 outbytes; /* outgoing bytes */ - - __u32 cps; /* current connection rate */ - __u32 inpps; /* current in packet rate */ - __u32 outpps; /* current out packet rate */ - __u32 inbps; /* current in byte rate */ - __u32 outbps; /* current out byte rate */ - - /* - * Don't add anything before the lock, because we use memcpy() to copy - * the members before the lock to struct ip_vs_stats_user in - * ip_vs_ctl.c. - */ + struct ip_vs_stats_user ustats; /* statistics */ + struct ip_vs_estimator est; /* estimator */ spinlock_t lock; /* spin lock */ - - struct ip_vs_estimator est; /* estimator */ }; struct dst_entry; @@ -202,21 +279,23 @@ struct ip_vs_protocol { void (*exit)(struct ip_vs_protocol *pp); - int (*conn_schedule)(struct sk_buff *skb, + int (*conn_schedule)(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp); struct ip_vs_conn * - (*conn_in_get)(const struct sk_buff *skb, + (*conn_in_get)(int af, + const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, + const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); struct ip_vs_conn * - (*conn_out_get)(const struct sk_buff *skb, + (*conn_out_get)(int af, + const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, + const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -226,7 +305,8 @@ struct ip_vs_protocol { int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp); - int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp); + int (*csum_check)(int af, struct sk_buff *skb, + struct ip_vs_protocol *pp); const char *(*state_name)(int state); @@ -259,9 +339,10 @@ struct ip_vs_conn { struct list_head c_list; /* hashed list heads */ /* Protocol, addresses and port numbers */ - __be32 caddr; /* client address */ - __be32 vaddr; /* virtual address */ - __be32 daddr; /* destination address */ + u16 af; /* address family */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + union nf_inet_addr daddr; /* destination address */ __be16 cport; __be16 vport; __be16 dport; @@ -305,6 +386,45 @@ struct ip_vs_conn { /* + * Extended internal versions of struct ip_vs_service_user and + * ip_vs_dest_user for IPv6 support. + * + * We need these to conveniently pass around service and destination + * options, but unfortunately, we also need to keep the old definitions to + * maintain userspace backwards compatibility for the setsockopt interface. + */ +struct ip_vs_service_user_kern { + /* virtual service addresses */ + u16 af; + u16 protocol; + union nf_inet_addr addr; /* virtual ip address */ + u16 port; + u32 fwmark; /* firwall mark of service */ + + /* virtual service options */ + char *sched_name; + unsigned flags; /* virtual service flags */ + unsigned timeout; /* persistent timeout in sec */ + u32 netmask; /* persistent netmask */ +}; + + +struct ip_vs_dest_user_kern { + /* destination server address */ + union nf_inet_addr addr; + u16 port; + + /* real server options */ + unsigned conn_flags; /* connection flags */ + int weight; /* destination weight */ + + /* thresholds for active connections */ + u32 u_threshold; /* upper threshold */ + u32 l_threshold; /* lower threshold */ +}; + + +/* * The information about the virtual service offered to the net * and the forwarding entries */ @@ -314,8 +434,9 @@ struct ip_vs_service { atomic_t refcnt; /* reference counter */ atomic_t usecnt; /* use counter */ + u16 af; /* address family */ __u16 protocol; /* which protocol (TCP/UDP) */ - __be32 addr; /* IP address for virtual service */ + union nf_inet_addr addr; /* IP address for virtual service */ __be16 port; /* port number for the service */ __u32 fwmark; /* firewall mark of the service */ unsigned flags; /* service status flags */ @@ -342,7 +463,8 @@ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ struct list_head d_list; /* for table with all the dests */ - __be32 addr; /* IP address of the server */ + u16 af; /* address family */ + union nf_inet_addr addr; /* IP address of the server */ __be16 port; /* port number of the server */ volatile unsigned flags; /* dest status flags */ atomic_t conn_flags; /* flags to copy to conn */ @@ -366,7 +488,7 @@ struct ip_vs_dest { /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ __u16 protocol; /* which protocol (TCP/UDP) */ - __be32 vaddr; /* virtual IP address */ + union nf_inet_addr vaddr; /* virtual IP address */ __be16 vport; /* virtual port number */ __u32 vfwmark; /* firewall mark of service */ }; @@ -380,6 +502,9 @@ struct ip_vs_scheduler { char *name; /* scheduler name */ atomic_t refcnt; /* reference counter */ struct module *module; /* THIS_MODULE/NULL */ +#ifdef CONFIG_IP_VS_IPV6 + int supports_ipv6; /* scheduler has IPv6 support */ +#endif /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); @@ -479,16 +604,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows); #ifndef CONFIG_IP_VS_TAB_BITS #define CONFIG_IP_VS_TAB_BITS 12 #endif -/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */ -#if CONFIG_IP_VS_TAB_BITS < 8 -#define IP_VS_CONN_TAB_BITS 8 -#endif -#if CONFIG_IP_VS_TAB_BITS > 20 -#define IP_VS_CONN_TAB_BITS 20 -#endif -#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20 + #define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS -#endif #define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS) #define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1) @@ -500,11 +617,16 @@ enum { }; extern struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); + extern struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); + extern struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port); /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) @@ -515,8 +637,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); extern struct ip_vs_conn * -ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, - __be32 daddr, __be16 dport, unsigned flags, +ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + const union nf_inet_addr *daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); @@ -532,24 +655,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp) { struct ip_vs_conn *ctl_cp = cp->control; if (!ctl_cp) { - IP_VS_ERR("request control DEL for uncontrolled: " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("request control DEL for uncontrolled: " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + return; } - IP_VS_DBG(7, "DELeting control for: " - "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport)); + IP_VS_DBG_BUF(7, "DELeting control for: " + "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), + ntohs(ctl_cp->cport)); cp->control = NULL; if (atomic_read(&ctl_cp->n_control) == 0) { - IP_VS_ERR("BUG control DEL with n=0 : " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("BUG control DEL with n=0 : " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + return; } atomic_dec(&ctl_cp->n_control); @@ -559,17 +690,22 @@ static inline void ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) { if (cp->control) { - IP_VS_ERR("request control ADD for already controlled: " - "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(cp->vaddr),ntohs(cp->vport)); + IP_VS_ERR_BUF("request control ADD for already controlled: " + "%s:%d to %s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport)); + ip_vs_control_del(cp); } - IP_VS_DBG(7, "ADDing control for: " - "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n", - NIPQUAD(cp->caddr),ntohs(cp->cport), - NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport)); + IP_VS_DBG_BUF(7, "ADDing control for: " + "cp.dst=%s:%d ctl_cp.dst=%s:%d\n", + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr), + ntohs(ctl_cp->cport)); cp->control = ctl_cp; atomic_inc(&ctl_cp->n_control); @@ -647,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; extern struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport); +ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport); static inline void ip_vs_service_put(struct ip_vs_service *svc) { @@ -655,14 +792,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) } extern struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport); +ip_vs_lookup_real_service(int af, __u16 protocol, + const union nf_inet_addr *daddr, __be16 dport); + extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); extern int ip_vs_control_init(void); extern void ip_vs_control_cleanup(void); extern struct ip_vs_dest * -ip_vs_find_dest(__be32 daddr, __be16 dport, - __be32 vaddr, __be16 vport, __u16 protocol); +ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, + const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol); extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); @@ -706,6 +845,19 @@ extern int ip_vs_icmp_xmit (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset); extern void ip_vs_dst_reset(struct ip_vs_dest *dest); +#ifdef CONFIG_IP_VS_IPV6 +extern int ip_vs_bypass_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_nat_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_tunnel_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_dr_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); +extern int ip_vs_icmp_xmit_v6 +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, + int offset); +#endif /* * This is a simple mechanism to ignore packets when @@ -750,7 +902,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) } extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, - struct ip_vs_conn *cp, int dir); + struct ip_vs_conn *cp, int dir); + +#ifdef CONFIG_IP_VS_IPV6 +extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int dir); +#endif extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset); @@ -761,6 +918,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) return csum_partial((char *) diff, sizeof(diff), oldsum); } +#ifdef CONFIG_IP_VS_IPV6 +static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, + __wsum oldsum) +{ + __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], + new[3], new[2], new[1], new[0] }; + + return csum_partial((char *) diff, sizeof(diff), oldsum); +} +#endif + static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) { __be16 diff[2] = { ~old, new }; diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig index 2e48a7e2722..de6004de80b 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/ipv4/ipvs/Kconfig @@ -24,6 +24,14 @@ menuconfig IP_VS if IP_VS +config IP_VS_IPV6 + bool "IPv6 support for IPVS (DANGEROUS)" + depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) + ---help--- + Add IPv6 support to IPVS. This is incomplete and might be dangerous. + + Say N if unsure. + config IP_VS_DEBUG bool "IP virtual server debugging" ---help--- @@ -33,7 +41,8 @@ config IP_VS_DEBUG config IP_VS_TAB_BITS int "IPVS connection table size (the Nth power of 2)" - default "12" + range 8 20 + default 12 ---help--- The IPVS connection hash table uses the chaining scheme to handle hash collisions. Using a big IPVS connection hash table will greatly diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 44a6872dc24..9a24332fbed 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) +static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, + const union nf_inet_addr *addr, + __be16 port) { - return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), + (__force u32)port, proto, ip_vs_conn_rnd) + & IP_VS_CONN_TAB_MASK; +#endif + return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK; } @@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) int ret; /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) int ret; /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -187,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) * d_addr, d_port: pkt dest address (load balancer) */ static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -214,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get } struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { struct ip_vs_conn *cp; - cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) - cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, + d_port); - IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); goto out; @@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get out: ct_read_unlock(hash); - IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } @@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get * d_addr, d_port: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get /* * Check for "full" addressed entries */ - hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); + hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (d_addr == cp->caddr && d_port == cp->cport && - s_port == cp->dport && s_addr == cp->daddr && + if (cp->af == af && + ip_vs_addr_equal(af, d_addr, &cp->caddr) && + ip_vs_addr_equal(af, s_addr, &cp->daddr) && + d_port == cp->cport && s_port == cp->dport && protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get ct_read_unlock(hash); - IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - ret?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + ret ? "hit" : "not hit"); return ret; } @@ -369,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp) } } +#ifdef CONFIG_IP_VS_IPV6 +static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp) +{ + switch (IP_VS_FWD_METHOD(cp)) { + case IP_VS_CONN_F_MASQ: + cp->packet_xmit = ip_vs_nat_xmit_v6; + break; + + case IP_VS_CONN_F_TUNNEL: + cp->packet_xmit = ip_vs_tunnel_xmit_v6; + break; + + case IP_VS_CONN_F_DROUTE: + cp->packet_xmit = ip_vs_dr_xmit_v6; + break; + + case IP_VS_CONN_F_LOCALNODE: + cp->packet_xmit = ip_vs_null_xmit; + break; + + case IP_VS_CONN_F_BYPASS: + cp->packet_xmit = ip_vs_bypass_xmit_v6; + break; + } +} +#endif + static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest) { @@ -402,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) cp->flags |= atomic_read(&dest->conn_flags); cp->dest = dest; - IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -444,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->daddr, cp->dport, - cp->vaddr, cp->vport, cp->protocol); + dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, + &cp->vaddr, cp->vport, + cp->protocol); ip_vs_bind_dest(cp, dest); return dest; } else @@ -464,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) if (!dest) return; - IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -526,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct) !(dest->flags & IP_VS_DEST_F_AVAILABLE) || (sysctl_ip_vs_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { - IP_VS_DBG(9, "check_template: dest not available for " - "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "-> d:%u.%u.%u.%u:%d\n", - ip_vs_proto_name(ct->protocol), - NIPQUAD(ct->caddr), ntohs(ct->cport), - NIPQUAD(ct->vaddr), ntohs(ct->vport), - NIPQUAD(ct->daddr), ntohs(ct->dport)); + IP_VS_DBG_BUF(9, "check_template: dest not available for " + "protocol %s s:%s:%d v:%s:%d " + "-> d:%s:%d\n", + ip_vs_proto_name(ct->protocol), + IP_VS_DBG_ADDR(ct->af, &ct->caddr), + ntohs(ct->cport), + IP_VS_DBG_ADDR(ct->af, &ct->vaddr), + ntohs(ct->vport), + IP_VS_DBG_ADDR(ct->af, &ct->daddr), + ntohs(ct->dport)); /* * Invalidate the connection template @@ -625,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, - __be32 daddr, __be16 dport, unsigned flags, +ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, |