aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-09-09 19:51:04 -0700
committerDavid S. Miller <davem@davemloft.net>2008-09-09 19:51:04 -0700
commitdacc62dbf56e872ad96edde0393b9deb56d80cd5 (patch)
tree3d1b3e25aba9c5324bb0f6289033f502fa6ccb8c
parent47abf28d5b36521558a848a346064a3a3c82bd9e (diff)
parentc051a0a2c9e283c1123ed3ce65e66e41d2ce5e24 (diff)
Merge branch 'lvs-next-2.6' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6
-rw-r--r--include/net/ip_vs.h308
-rw-r--r--net/ipv4/ipvs/Kconfig11
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c249
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c806
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c523
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c40
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c61
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c7
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c11
-rw-r--r--net/ipv4/ipvs/ip_vs_lc.c11
-rw-r--r--net/ipv4/ipvs/ip_vs_nq.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c65
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah_esp.c100
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c253
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c226
-rw-r--r--net/ipv4/ipvs/ip_vs_rr.c13
-rw-r--r--net/ipv4/ipvs/ip_vs_sed.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c40
-rw-r--r--net/ipv4/ipvs/ip_vs_wlc.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_wrr.c15
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c471
23 files changed, 2469 insertions, 796 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a25ad243031..33e2ac6ceb3 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -21,11 +21,103 @@
#include <linux/timer.h>
#include <net/checksum.h>
+#include <linux/netfilter.h> /* for union nf_inet_addr */
+#include <linux/ipv6.h> /* for struct ipv6hdr */
+#include <net/ipv6.h> /* for ipv6_addr_copy */
+
+struct ip_vs_iphdr {
+ int len;
+ __u8 protocol;
+ union nf_inet_addr saddr;
+ union nf_inet_addr daddr;
+};
+
+static inline void
+ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ const struct ipv6hdr *iph = nh;
+ iphdr->len = sizeof(struct ipv6hdr);
+ iphdr->protocol = iph->nexthdr;
+ ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr);
+ ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr);
+ } else
+#endif
+ {
+ const struct iphdr *iph = nh;
+ iphdr->len = iph->ihl * 4;
+ iphdr->protocol = iph->protocol;
+ iphdr->saddr.ip = iph->saddr;
+ iphdr->daddr.ip = iph->daddr;
+ }
+}
+
+static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
+ const union nf_inet_addr *src)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ipv6_addr_copy(&dst->in6, &src->in6);
+ else
+#endif
+ dst->ip = src->ip;
+}
+
+static inline int ip_vs_addr_equal(int af, const union nf_inet_addr *a,
+ const union nf_inet_addr *b)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return ipv6_addr_equal(&a->in6, &b->in6);
+#endif
+ return a->ip == b->ip;
+}
#ifdef CONFIG_IP_VS_DEBUG
#include <linux/net.h>
extern int ip_vs_get_debug_level(void);
+
+static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
+ const union nf_inet_addr *addr,
+ int *idx)
+{
+ int len;
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]",
+ NIP6(addr->in6)) + 1;
+ else
+#endif
+ len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT,
+ NIPQUAD(addr->ip)) + 1;
+
+ *idx += len;
+ BUG_ON(*idx > buf_len + 1);
+ return &buf[*idx - len];
+}
+
+#define IP_VS_DBG_BUF(level, msg...) \
+ do { \
+ char ip_vs_dbg_buf[160]; \
+ int ip_vs_dbg_idx = 0; \
+ if (level <= ip_vs_get_debug_level()) \
+ printk(KERN_DEBUG "IPVS: " msg); \
+ } while (0)
+#define IP_VS_ERR_BUF(msg...) \
+ do { \
+ char ip_vs_dbg_buf[160]; \
+ int ip_vs_dbg_idx = 0; \
+ printk(KERN_ERR "IPVS: " msg); \
+ } while (0)
+
+/* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
+#define IP_VS_DBG_ADDR(af, addr) \
+ ip_vs_dbg_addr(af, ip_vs_dbg_buf, \
+ sizeof(ip_vs_dbg_buf), addr, \
+ &ip_vs_dbg_idx)
+
#define IP_VS_DBG(level, msg...) \
do { \
if (level <= ip_vs_get_debug_level()) \
@@ -48,6 +140,8 @@ extern int ip_vs_get_debug_level(void);
pp->debug_packet(pp, skb, ofs, msg); \
} while (0)
#else /* NO DEBUGGING at ALL */
+#define IP_VS_DBG_BUF(level, msg...) do {} while (0)
+#define IP_VS_ERR_BUF(msg...) do {} while (0)
#define IP_VS_DBG(level, msg...) do {} while (0)
#define IP_VS_DBG_RL(msg...) do {} while (0)
#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0)
@@ -160,27 +254,10 @@ struct ip_vs_estimator {
struct ip_vs_stats
{
- __u32 conns; /* connections scheduled */
- __u32 inpkts; /* incoming packets */
- __u32 outpkts; /* outgoing packets */
- __u64 inbytes; /* incoming bytes */
- __u64 outbytes; /* outgoing bytes */
-
- __u32 cps; /* current connection rate */
- __u32 inpps; /* current in packet rate */
- __u32 outpps; /* current out packet rate */
- __u32 inbps; /* current in byte rate */
- __u32 outbps; /* current out byte rate */
-
- /*
- * Don't add anything before the lock, because we use memcpy() to copy
- * the members before the lock to struct ip_vs_stats_user in
- * ip_vs_ctl.c.
- */
+ struct ip_vs_stats_user ustats; /* statistics */
+ struct ip_vs_estimator est; /* estimator */
spinlock_t lock; /* spin lock */
-
- struct ip_vs_estimator est; /* estimator */
};
struct dst_entry;
@@ -202,21 +279,23 @@ struct ip_vs_protocol {
void (*exit)(struct ip_vs_protocol *pp);
- int (*conn_schedule)(struct sk_buff *skb,
+ int (*conn_schedule)(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn *
- (*conn_in_get)(const struct sk_buff *skb,
+ (*conn_in_get)(int af,
+ const struct sk_buff *skb,
struct ip_vs_protocol *pp,
- const struct iphdr *iph,
+ const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
struct ip_vs_conn *
- (*conn_out_get)(const struct sk_buff *skb,
+ (*conn_out_get)(int af,
+ const struct sk_buff *skb,
struct ip_vs_protocol *pp,
- const struct iphdr *iph,
+ const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -226,7 +305,8 @@ struct ip_vs_protocol {
int (*dnat_handler)(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp);
- int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp);
+ int (*csum_check)(int af, struct sk_buff *skb,
+ struct ip_vs_protocol *pp);
const char *(*state_name)(int state);
@@ -259,9 +339,10 @@ struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */
/* Protocol, addresses and port numbers */
- __be32 caddr; /* client address */
- __be32 vaddr; /* virtual address */
- __be32 daddr; /* destination address */
+ u16 af; /* address family */
+ union nf_inet_addr caddr; /* client address */
+ union nf_inet_addr vaddr; /* virtual address */
+ union nf_inet_addr daddr; /* destination address */
__be16 cport;
__be16 vport;
__be16 dport;
@@ -305,6 +386,45 @@ struct ip_vs_conn {
/*
+ * Extended internal versions of struct ip_vs_service_user and
+ * ip_vs_dest_user for IPv6 support.
+ *
+ * We need these to conveniently pass around service and destination
+ * options, but unfortunately, we also need to keep the old definitions to
+ * maintain userspace backwards compatibility for the setsockopt interface.
+ */
+struct ip_vs_service_user_kern {
+ /* virtual service addresses */
+ u16 af;
+ u16 protocol;
+ union nf_inet_addr addr; /* virtual ip address */
+ u16 port;
+ u32 fwmark; /* firwall mark of service */
+
+ /* virtual service options */
+ char *sched_name;
+ unsigned flags; /* virtual service flags */
+ unsigned timeout; /* persistent timeout in sec */
+ u32 netmask; /* persistent netmask */
+};
+
+
+struct ip_vs_dest_user_kern {
+ /* destination server address */
+ union nf_inet_addr addr;
+ u16 port;
+
+ /* real server options */
+ unsigned conn_flags; /* connection flags */
+ int weight; /* destination weight */
+
+ /* thresholds for active connections */
+ u32 u_threshold; /* upper threshold */
+ u32 l_threshold; /* lower threshold */
+};
+
+
+/*
* The information about the virtual service offered to the net
* and the forwarding entries
*/
@@ -314,8 +434,9 @@ struct ip_vs_service {
atomic_t refcnt; /* reference counter */
atomic_t usecnt; /* use counter */
+ u16 af; /* address family */
__u16 protocol; /* which protocol (TCP/UDP) */
- __be32 addr; /* IP address for virtual service */
+ union nf_inet_addr addr; /* IP address for virtual service */
__be16 port; /* port number for the service */
__u32 fwmark; /* firewall mark of the service */
unsigned flags; /* service status flags */
@@ -342,7 +463,8 @@ struct ip_vs_dest {
struct list_head n_list; /* for the dests in the service */
struct list_head d_list; /* for table with all the dests */
- __be32 addr; /* IP address of the server */
+ u16 af; /* address family */
+ union nf_inet_addr addr; /* IP address of the server */
__be16 port; /* port number of the server */
volatile unsigned flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
@@ -366,7 +488,7 @@ struct ip_vs_dest {
/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
__u16 protocol; /* which protocol (TCP/UDP) */
- __be32 vaddr; /* virtual IP address */
+ union nf_inet_addr vaddr; /* virtual IP address */
__be16 vport; /* virtual port number */
__u32 vfwmark; /* firewall mark of service */
};
@@ -380,6 +502,9 @@ struct ip_vs_scheduler {
char *name; /* scheduler name */
atomic_t refcnt; /* reference counter */
struct module *module; /* THIS_MODULE/NULL */
+#ifdef CONFIG_IP_VS_IPV6
+ int supports_ipv6; /* scheduler has IPv6 support */
+#endif
/* scheduler initializing service */
int (*init_service)(struct ip_vs_service *svc);
@@ -479,16 +604,8 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows);
#ifndef CONFIG_IP_VS_TAB_BITS
#define CONFIG_IP_VS_TAB_BITS 12
#endif
-/* make sure that IP_VS_CONN_TAB_BITS is located in [8, 20] */
-#if CONFIG_IP_VS_TAB_BITS < 8
-#define IP_VS_CONN_TAB_BITS 8
-#endif
-#if CONFIG_IP_VS_TAB_BITS > 20
-#define IP_VS_CONN_TAB_BITS 20
-#endif
-#if 8 <= CONFIG_IP_VS_TAB_BITS && CONFIG_IP_VS_TAB_BITS <= 20
+
#define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS
-#endif
#define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS)
#define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1)
@@ -500,11 +617,16 @@ enum {
};
extern struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
extern struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
+
extern struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port);
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port);
/* put back the conn without restarting its timer */
static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
@@ -515,8 +637,9 @@ extern void ip_vs_conn_put(struct ip_vs_conn *cp);
extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
extern struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
- __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
+ const union nf_inet_addr *vaddr, __be16 vport,
+ const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
struct ip_vs_dest *dest);
extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
@@ -532,24 +655,32 @@ static inline void ip_vs_control_del(struct ip_vs_conn *cp)
{
struct ip_vs_conn *ctl_cp = cp->control;
if (!ctl_cp) {
- IP_VS_ERR("request control DEL for uncontrolled: "
- "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(cp->vaddr),ntohs(cp->vport));
+ IP_VS_ERR_BUF("request control DEL for uncontrolled: "
+ "%s:%d to %s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport));
+
return;
}
- IP_VS_DBG(7, "DELeting control for: "
- "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+ IP_VS_DBG_BUF(7, "DELeting control for: "
+ "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+ ntohs(ctl_cp->cport));
cp->control = NULL;
if (atomic_read(&ctl_cp->n_control) == 0) {
- IP_VS_ERR("BUG control DEL with n=0 : "
- "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(cp->vaddr),ntohs(cp->vport));
+ IP_VS_ERR_BUF("BUG control DEL with n=0 : "
+ "%s:%d to %s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport));
+
return;
}
atomic_dec(&ctl_cp->n_control);
@@ -559,17 +690,22 @@ static inline void
ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
{
if (cp->control) {
- IP_VS_ERR("request control ADD for already controlled: "
- "%d.%d.%d.%d:%d to %d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(cp->vaddr),ntohs(cp->vport));
+ IP_VS_ERR_BUF("request control ADD for already controlled: "
+ "%s:%d to %s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport));
+
ip_vs_control_del(cp);
}
- IP_VS_DBG(7, "ADDing control for: "
- "cp.dst=%d.%d.%d.%d:%d ctl_cp.dst=%d.%d.%d.%d:%d\n",
- NIPQUAD(cp->caddr),ntohs(cp->cport),
- NIPQUAD(ctl_cp->caddr),ntohs(ctl_cp->cport));
+ IP_VS_DBG_BUF(7, "ADDing control for: "
+ "cp.dst=%s:%d ctl_cp.dst=%s:%d\n",
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &ctl_cp->caddr),
+ ntohs(ctl_cp->cport));
cp->control = ctl_cp;
atomic_inc(&ctl_cp->n_control);
@@ -647,7 +783,8 @@ extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern struct ip_vs_service *
-ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
+ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
{
@@ -655,14 +792,16 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
}
extern struct ip_vs_dest *
-ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport);
+ip_vs_lookup_real_service(int af, __u16 protocol,
+ const union nf_inet_addr *daddr, __be16 dport);
+
extern int ip_vs_use_count_inc(void);
extern void ip_vs_use_count_dec(void);
extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
-ip_vs_find_dest(__be32 daddr, __be16 dport,
- __be32 vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
+ const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
@@ -706,6 +845,19 @@ extern int ip_vs_icmp_xmit
(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset);
extern void ip_vs_dst_reset(struct ip_vs_dest *dest);
+#ifdef CONFIG_IP_VS_IPV6
+extern int ip_vs_bypass_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_nat_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_tunnel_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_dr_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
+extern int ip_vs_icmp_xmit_v6
+(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp,
+ int offset);
+#endif
/*
* This is a simple mechanism to ignore packets when
@@ -750,7 +902,12 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp)
}
extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
- struct ip_vs_conn *cp, int dir);
+ struct ip_vs_conn *cp, int dir);
+
+#ifdef CONFIG_IP_VS_IPV6
+extern void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int dir);
+#endif
extern __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset);
@@ -761,6 +918,17 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum)
return csum_partial((char *) diff, sizeof(diff), oldsum);
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new,
+ __wsum oldsum)
+{
+ __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0],
+ new[3], new[2], new[1], new[0] };
+
+ return csum_partial((char *) diff, sizeof(diff), oldsum);
+}
+#endif
+
static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
{
__be16 diff[2] = { ~old, new };
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 2e48a7e2722..de6004de80b 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -24,6 +24,14 @@ menuconfig IP_VS
if IP_VS
+config IP_VS_IPV6
+ bool "IPv6 support for IPVS (DANGEROUS)"
+ depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6)
+ ---help---
+ Add IPv6 support to IPVS. This is incomplete and might be dangerous.
+
+ Say N if unsure.
+
config IP_VS_DEBUG
bool "IP virtual server debugging"
---help---
@@ -33,7 +41,8 @@ config IP_VS_DEBUG
config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
- default "12"
+ range 8 20
+ default 12
---help---
The IPVS connection hash table uses the chaining scheme to handle
hash collisions. Using a big IPVS connection hash table will greatly
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 44a6872dc24..9a24332fbed 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port)
+static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+ const union nf_inet_addr *addr,
+ __be16 port)
{
- return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd)
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+ (__force u32)port, proto, ip_vs_conn_rnd)
+ & IP_VS_CONN_TAB_MASK;
+#endif
+ return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+ ip_vs_conn_rnd)
& IP_VS_CONN_TAB_MASK;
}
@@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
int ret;
/* Hash by protocol, client address and port */
- hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
ct_write_lock(hash);
@@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
int ret;
/* unhash it and decrease its reference counter */
- hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport);
+ hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
ct_write_lock(hash);
@@ -187,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
* d_addr, d_port: pkt dest address (load balancer)
*/
static inline struct ip_vs_conn *__ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
- hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+ hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (s_addr==cp->caddr && s_port==cp->cport &&
- d_port==cp->vport && d_addr==cp->vaddr &&
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+ s_port == cp->cport && d_port == cp->vport &&
((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
- protocol==cp->protocol) {
+ protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
ct_read_unlock(hash);
@@ -214,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
}
struct ip_vs_conn *ip_vs_conn_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
struct ip_vs_conn *cp;
- cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port);
+ cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port);
if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt))
- cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port);
+ cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr,
+ d_port);
- IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
- ip_vs_proto_name(protocol),
- NIPQUAD(s_addr), ntohs(s_port),
- NIPQUAD(d_addr), ntohs(d_port),
- cp?"hit":"not hit");
+ IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ cp ? "hit" : "not hit");
return cp;
}
/* Get reference to connection template */
struct ip_vs_conn *ip_vs_ct_in_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp;
- hash = ip_vs_conn_hashkey(protocol, s_addr, s_port);
+ hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (s_addr==cp->caddr && s_port==cp->cport &&
- d_port==cp->vport && d_addr==cp->vaddr &&
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+ s_port == cp->cport && d_port == cp->vport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
- protocol==cp->protocol) {
+ protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
goto out;
@@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get
out:
ct_read_unlock(hash);
- IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
- ip_vs_proto_name(protocol),
- NIPQUAD(s_addr), ntohs(s_port),
- NIPQUAD(d_addr), ntohs(d_port),
- cp?"hit":"not hit");
+ IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ cp ? "hit" : "not hit");
return cp;
}
@@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get
* d_addr, d_port: pkt dest address (foreign host)
*/
struct ip_vs_conn *ip_vs_conn_out_get
-(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port)
+(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
+ const union nf_inet_addr *d_addr, __be16 d_port)
{
unsigned hash;
struct ip_vs_conn *cp, *ret=NULL;
@@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get
/*
* Check for "full" addressed entries
*/
- hash = ip_vs_conn_hashkey(protocol, d_addr, d_port);
+ hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port);
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
- if (d_addr == cp->caddr && d_port == cp->cport &&
- s_port == cp->dport && s_addr == cp->daddr &&
+ if (cp->af == af &&
+ ip_vs_addr_equal(af, d_addr, &cp->caddr) &&
+ ip_vs_addr_equal(af, s_addr, &cp->daddr) &&
+ d_port == cp->cport && s_port == cp->dport &&
protocol == cp->protocol) {
/* HIT */
atomic_inc(&cp->refcnt);
@@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get
ct_read_unlock(hash);
- IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n",
- ip_vs_proto_name(protocol),
- NIPQUAD(s_addr), ntohs(s_port),
- NIPQUAD(d_addr), ntohs(d_port),
- ret?"hit":"not hit");
+ IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
+ ip_vs_proto_name(protocol),
+ IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port),
+ IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port),
+ ret ? "hit" : "not hit");
return ret;
}
@@ -369,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
}
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
+{
+ switch (IP_VS_FWD_METHOD(cp)) {
+ case IP_VS_CONN_F_MASQ:
+ cp->packet_xmit = ip_vs_nat_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_TUNNEL:
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_DROUTE:
+ cp->packet_xmit = ip_vs_dr_xmit_v6;
+ break;
+
+ case IP_VS_CONN_F_LOCALNODE:
+ cp->packet_xmit = ip_vs_null_xmit;
+ break;
+
+ case IP_VS_CONN_F_BYPASS:
+ cp->packet_xmit = ip_vs_bypass_xmit_v6;
+ break;
+ }
+}
+#endif
+
static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
{
@@ -402,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
cp->flags |= atomic_read(&dest->conn_flags);
cp->dest = dest;
- IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
- "dest->refcnt:%d\n",
- ip_vs_proto_name(cp->protocol),
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- NIPQUAD(cp->daddr), ntohs(cp->dport),
- ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
+ "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
+ ip_vs_proto_name(cp->protocol),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ ip_vs_fwd_tag(cp), cp->state,
+ cp->flags, atomic_read(&cp->refcnt),
+ atomic_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -444,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(cp->daddr, cp->dport,
- cp->vaddr, cp->vport, cp->protocol);
+ dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+ &cp->vaddr, cp->vport,
+ cp->protocol);
ip_vs_bind_dest(cp, dest);
return dest;
} else
@@ -464,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
if (!dest)
return;
- IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
- "dest->refcnt:%d\n",
- ip_vs_proto_name(cp->protocol),
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- NIPQUAD(cp->daddr), ntohs(cp->dport),
- ip_vs_fwd_tag(cp), cp->state,
- cp->flags, atomic_read(&cp->refcnt),
- atomic_read(&dest->refcnt));
+ IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d "
+ "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
+ "dest->refcnt:%d\n",
+ ip_vs_proto_name(cp->protocol),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ ip_vs_fwd_tag(cp), cp->state,
+ cp->flags, atomic_read(&cp->refcnt),
+ atomic_read(&dest->refcnt));
/* Update the connection counters */
if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
@@ -526,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
(sysctl_ip_vs_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
- IP_VS_DBG(9, "check_template: dest not available for "
- "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- "-> d:%u.%u.%u.%u:%d\n",
- ip_vs_proto_name(ct->protocol),
- NIPQUAD(ct->caddr), ntohs(ct->cport),
- NIPQUAD(ct->vaddr), ntohs(ct->vport),
- NIPQUAD(ct->daddr), ntohs(ct->dport));
+ IP_VS_DBG_BUF(9, "check_template: dest not available for "
+ "protocol %s s:%s:%d v:%s:%d "
+ "-> d:%s:%d\n",
+ ip_vs_proto_name(ct->protocol),
+ IP_VS_DBG_ADDR(ct->af, &ct->caddr),
+ ntohs(ct->cport),
+ IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
+ ntohs(ct->vport),
+ IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ ntohs(ct->dport));
/*
* Invalidate the connection template
@@ -625,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
-ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport,
- __be32 daddr, __be16 dport, unsigned flags,
+ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,