diff options
author | David S. Miller <davem@davemloft.net> | 2012-05-08 14:40:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-08 14:40:21 -0400 |
commit | 9bb862beb6e5839e92f709d33fda07678f062f20 (patch) | |
tree | a2c396712c5a2cda380034173fd07a67bfa0489f | |
parent | b44907e64cc1987153f6577306108379be1523b7 (diff) | |
parent | d16cf20e2f2f13411eece7f7fb72c17d141c4a84 (diff) |
Merge branch 'master' of git://1984.lsi.us.es/net-next
34 files changed, 856 insertions, 1708 deletions
diff --git a/Documentation/ABI/removed/ip_queue b/Documentation/ABI/removed/ip_queue new file mode 100644 index 00000000000..3243613bc2d --- /dev/null +++ b/Documentation/ABI/removed/ip_queue @@ -0,0 +1,9 @@ +What: ip_queue +Date: finally removed in kernel v3.5.0 +Contact: Pablo Neira Ayuso <pablo@netfilter.org> +Description: + ip_queue has been replaced by nfnetlink_queue which provides + more advanced queueing mechanism to user-space. The ip_queue + module was already announced to become obsolete years ago. + +Users: diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 90b0c4fd275..6f896b94abd 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1301,13 +1301,22 @@ bridge-nf-call-ip6tables - BOOLEAN bridge-nf-filter-vlan-tagged - BOOLEAN 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables. 0 : disable this. - Default: 1 + Default: 0 bridge-nf-filter-pppoe-tagged - BOOLEAN 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables. 0 : disable this. - Default: 1 + Default: 0 +bridge-nf-pass-vlan-input-dev - BOOLEAN + 1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan + interface on the bridge and set the netfilter input device to the vlan. + This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT + target work with vlan-on-top-of-bridge interfaces. When no matching + vlan interface is found, or this switch is off, the input device is + set to the bridge interface. + 0: disable bridge netfilter vlan interface lookup. + Default: 0 proc/sys/net/sctp/* Variables: diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index be0ef3df4ac..8a2d438dc49 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -89,6 +89,7 @@ #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ +/* Initial bits allowed in backup server */ #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ IP_VS_CONN_F_NOOUTPUT | \ IP_VS_CONN_F_INACTIVE | \ @@ -97,6 +98,10 @@ IP_VS_CONN_F_TEMPLATE \ ) +/* Bits allowed to update in backup server */ +#define IP_VS_CONN_F_BACKUP_UPD_MASK (IP_VS_CONN_F_INACTIVE | \ + IP_VS_CONN_F_SEQ_MASK) + /* Flags that are not sent to backup server start from bit 16 */ #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 0d3dd66322e..d146872a0b9 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -83,6 +83,10 @@ enum ip_conntrack_status { /* Conntrack is a fake untracked entry */ IPS_UNTRACKED_BIT = 12, IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), + + /* Conntrack got a helper explicitly attached via CT target. */ + IPS_HELPER_BIT = 13, + IPS_HELPER = (1 << IPS_HELPER_BIT), }; /* Connection tracking event types */ diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 31f8bec9565..c61b8fb1a9e 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -1,4 +1,3 @@ -header-y += ip_queue.h header-y += ip_tables.h header-y += ipt_CLUSTERIP.h header-y += ipt_ECN.h diff --git a/include/linux/netfilter_ipv4/ip_queue.h b/include/linux/netfilter_ipv4/ip_queue.h deleted file mode 100644 index a03507f465f..00000000000 --- a/include/linux/netfilter_ipv4/ip_queue.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * This is a module which is used for queueing IPv4 packets and - * communicating with userspace via netlink. - * - * (C) 2000 James Morris, this code is GPL. - */ -#ifndef _IP_QUEUE_H -#define _IP_QUEUE_H - -#ifdef __KERNEL__ -#ifdef DEBUG_IPQ -#define QDEBUG(x...) printk(KERN_DEBUG ## x) -#else -#define QDEBUG(x...) -#endif /* DEBUG_IPQ */ -#else -#include <net/if.h> -#endif /* ! __KERNEL__ */ - -/* Messages sent from kernel */ -typedef struct ipq_packet_msg { - unsigned long packet_id; /* ID of queued packet */ - unsigned long mark; /* Netfilter mark value */ - long timestamp_sec; /* Packet arrival time (seconds) */ - long timestamp_usec; /* Packet arrvial time (+useconds) */ - unsigned int hook; /* Netfilter hook we rode in on */ - char indev_name[IFNAMSIZ]; /* Name of incoming interface */ - char outdev_name[IFNAMSIZ]; /* Name of outgoing interface */ - __be16 hw_protocol; /* Hardware protocol (network order) */ - unsigned short hw_type; /* Hardware type */ - unsigned char hw_addrlen; /* Hardware address length */ - unsigned char hw_addr[8]; /* Hardware address */ - size_t data_len; /* Length of packet data */ - unsigned char payload[0]; /* Optional packet data */ -} ipq_packet_msg_t; - -/* Messages sent from userspace */ -typedef struct ipq_mode_msg { - unsigned char value; /* Requested mode */ - size_t range; /* Optional range of packet requested */ -} ipq_mode_msg_t; - -typedef struct ipq_verdict_msg { - unsigned int value; /* Verdict to hand to netfilter */ - unsigned long id; /* Packet ID for this verdict */ - size_t data_len; /* Length of replacement data */ - unsigned char payload[0]; /* Optional replacement packet */ -} ipq_verdict_msg_t; - -typedef struct ipq_peer_msg { - union { - ipq_verdict_msg_t verdict; - ipq_mode_msg_t mode; - } msg; -} ipq_peer_msg_t; - -/* Packet delivery modes */ -enum { - IPQ_COPY_NONE, /* Initial mode, packets are dropped */ - IPQ_COPY_META, /* Copy metadata */ - IPQ_COPY_PACKET /* Copy metadata + packet (range) */ -}; -#define IPQ_COPY_MAX IPQ_COPY_PACKET - -/* Types of messages */ -#define IPQM_BASE 0x10 /* standard netlink messages below this */ -#define IPQM_MODE (IPQM_BASE + 1) /* Mode request from peer */ -#define IPQM_VERDICT (IPQM_BASE + 2) /* Verdict from peer */ -#define IPQM_PACKET (IPQM_BASE + 3) /* Packet from kernel */ -#define IPQM_MAX (IPQM_BASE + 4) - -#endif /*_IP_QUEUE_H*/ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a2092f582a7..0f628ffa420 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -7,7 +7,7 @@ #define NETLINK_ROUTE 0 /* Routing/device hook */ #define NETLINK_UNUSED 1 /* Unused number */ #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ -#define NETLINK_FIREWALL 3 /* Firewalling hook */ +#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */ #define NETLINK_SOCK_DIAG 4 /* socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_XFRM 6 /* ipsec */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 93b81aa7342..d6146b4811c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -504,6 +504,7 @@ struct ip_vs_conn { * state transition triggerd * synchronization */ + unsigned long sync_endtime; /* jiffies + sent_retries */ /* Control members */ struct ip_vs_conn *control; /* Master control connection */ @@ -783,6 +784,16 @@ struct ip_vs_app { void (*timeout_change)(struct ip_vs_app *app, int flags); }; +struct ipvs_master_sync_state { + struct list_head sync_queue; + struct ip_vs_sync_buff *sync_buff; + int sync_queue_len; + unsigned int sync_queue_delay; + struct task_struct *master_thread; + struct delayed_work master_wakeup_work; + struct netns_ipvs *ipvs; +}; + /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ @@ -869,10 +880,15 @@ struct netns_ipvs { #endif int sysctl_snat_reroute; int sysctl_sync_ver; + int sysctl_sync_ports; + int sysctl_sync_qlen_max; + int sysctl_sync_sock_size; int sysctl_cache_bypass; int sysctl_expire_nodest_conn; int sysctl_expire_quiescent_template; int sysctl_sync_threshold[2]; + unsigned int sysctl_sync_refresh_period; + int sysctl_sync_retries; int sysctl_nat_icmp_send; /* ip_vs_lblc */ @@ -888,13 +904,11 @@ struct netns_ipvs { spinlock_t est_lock; struct timer_list est_timer; /* Estimation timer */ /* ip_vs_sync */ - struct list_head sync_queue; spinlock_t sync_lock; - struct ip_vs_sync_buff *sync_buff; + struct ipvs_master_sync_state *ms; spinlock_t sync_buff_lock; - struct sockaddr_in sync_mcast_addr; - struct task_struct *master_thread; - struct task_struct *backup_thread; + struct task_struct **backup_threads; + int threads_mask; int send_mesg_maxlen; int recv_mesg_maxlen; volatile int sync_state; @@ -911,6 +925,14 @@ struct netns_ipvs { #define DEFAULT_SYNC_THRESHOLD 3 #define DEFAULT_SYNC_PERIOD 50 #define DEFAULT_SYNC_VER 1 +#define DEFAULT_SYNC_REFRESH_PERIOD (0U * HZ) +#define DEFAULT_SYNC_RETRIES 0 +#define IPVS_SYNC_WAKEUP_RATE 8 +#define IPVS_SYNC_QLEN_MAX (IPVS_SYNC_WAKEUP_RATE * 4) +#define IPVS_SYNC_SEND_DELAY (HZ / 50) +#define IPVS_SYNC_CHECK_PERIOD HZ +#define IPVS_SYNC_FLUSH_TIME (HZ * 2) +#define IPVS_SYNC_PORTS_MAX (1 << 6) #ifdef CONFIG_SYSCTL @@ -921,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) static inline int sysctl_sync_period(struct netns_ipvs *ipvs) { - return ipvs->sysctl_sync_threshold[1]; + return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]); +} + +static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) +{ + return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period); +} + +static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_sync_retries; } static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) @@ -929,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) return ipvs->sysctl_sync_ver; } +static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) +{ + return ACCESS_ONCE(ipvs->sysctl_sync_ports); +} + +static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_sync_qlen_max; +} + +static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_sync_sock_size; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -941,11 +988,36 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs) return DEFAULT_SYNC_PERIOD; } +static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs) +{ + return DEFAULT_SYNC_REFRESH_PERIOD; +} + +static inline int sysctl_sync_retries(struct netns_ipvs *ipvs) +{ + return DEFAULT_SYNC_RETRIES & 3; +} + static inline int sysctl_sync_ver(struct netns_ipvs *ipvs) { return DEFAULT_SYNC_VER; } +static inline int sysctl_sync_ports(struct netns_ipvs *ipvs) +{ + return 1; +} + +static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs) +{ + return IPVS_SYNC_QLEN_MAX; +} + +static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs) +{ + return 0; +} + #endif /* @@ -1185,7 +1257,6 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg); extern struct ip_vs_stats ip_vs_stats; extern int sysctl_ip_vs_sync_ver; -extern void ip_vs_sync_switch_mode(struct net *net, int mode); extern struct ip_vs_service * ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport); @@ -1219,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid); extern int stop_sync_thread(struct net *net, int state); -extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); +extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts); /* diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index ab86036bbf0..cce7f6a798b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -321,14 +321,8 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); -#define NF_CT_STAT_INC(net, count) \ - __this_cpu_inc((net)->ct.stat->count) -#define NF_CT_STAT_INC_ATOMIC(net, count) \ -do { \ - local_bh_disable(); \ - __this_cpu_inc((net)->ct.stat->count); \ - local_bh_enable(); \ -} while (0) +#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) +#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 5767dc242de..1d1889409b9 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -60,8 +60,8 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); } -extern int nf_conntrack_helper_init(void); -extern void nf_conntrack_helper_fini(void); +extern int nf_conntrack_helper_init(struct net *net); +extern void nf_conntrack_helper_fini(struct net *net); extern int nf_conntrack_broadcast_help(struct sk_buff *skb, unsigned int protoff, diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 7a911eca0f1..a053a19870c 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -26,11 +26,14 @@ struct netns_ct { int sysctl_tstamp; int sysctl_checksum; unsigned int sysctl_log_invalid; /* Log invalid packets */ + int sysctl_auto_assign_helper; + bool auto_assign_helper_warned; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; struct ctl_table_header *acct_sysctl_header; struct ctl_table_header *tstamp_sysctl_header; struct ctl_table_header *event_sysctl_header; + struct ctl_table_header *helper_sysctl_header; #endif char *slabname; }; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 53f083686ae..dce55d4ee83 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -54,12 +54,14 @@ static int brnf_call_ip6tables __read_mostly = 1; static int brnf_call_arptables __read_mostly = 1; static int brnf_filter_vlan_tagged __read_mostly = 0; static int brnf_filter_pppoe_tagged __read_mostly = 0; +static int brnf_pass_vlan_indev __read_mostly = 0; #else #define brnf_call_iptables 1 #define brnf_call_ip6tables 1 #define brnf_call_arptables 1 #define brnf_filter_vlan_tagged 0 #define brnf_filter_pppoe_tagged 0 +#define brnf_pass_vlan_indev 0 #endif #define IS_IP(skb) \ @@ -503,6 +505,19 @@ bridged_dnat: return 0; } +static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) +{ + struct net_device *vlan, *br; + + br = bridge_parent(dev); + if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) + return br; + + vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); + + return vlan ? vlan : br; +} + /* Some common code for IPv4/IPv6 */ static struct net_device *setup_pre_routing(struct sk_buff *skb) { @@ -515,7 +530,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb) nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; - skb->dev = bridge_parent(skb->dev); + skb->dev = brnf_get_logical_dev(skb, skb->dev); if (skb->protocol == htons(ETH_P_8021Q)) nf_bridge->mask |= BRNF_8021Q; else if (skb->protocol == htons(ETH_P_PPP_SES)) @@ -774,7 +789,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent, + NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent, br_nf_forward_finish); return NF_STOLEN; @@ -1002,6 +1017,13 @@ static ctl_table brnf_table[] = { .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, + { + .procname = "bridge-nf-pass-vlan-input-dev", + .data = &brnf_pass_vlan_indev, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, + }, { } }; #endif diff --git a/net/core/sock.c b/net/core/sock.c index b8c818e69c2..26ed27fb2bf 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -259,7 +259,9 @@ static struct lock_class_key af_callback_keys[AF_MAX]; /* Run time adjustable parameters. */ __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; +EXPORT_SYMBOL(sysctl_wmem_max); __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; +EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 240b68469a7..c20674dc945 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o # just filtering instance of ARP tables for now obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o - -obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o - diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c deleted file mode 100644 index 09775a1e134..00000000000 --- a/net/ipv4/netfilter/ip_queue.c +++ /dev/null @@ -1,639 +0,0 @@ -/* - * This is a module which is used for queueing IPv4 packets and - * communicating with userspace via netlink. - * - * (C) 2000-2002 James Morris <jmorris@intercode.com.au> - * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/init.h> -#include <linux/ip.h> -#include <linux/notifier.h> -#include <linux/netdevice.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_queue.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netlink.h> -#include <linux/spinlock.h> -#include <linux/sysctl.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/security.h> -#include <linux/net.h> -#include <linux/mutex.h> -#include <linux/slab.h> -#include <net/net_namespace.h> -#include <net/sock.h> -#include <net/route.h> -#include <net/netfilter/nf_queue.h> -#include <net/ip.h> - -#define IPQ_QMAX_DEFAULT 1024 -#define IPQ_PROC_FS_NAME "ip_queue" -#define NET_IPQ_QMAX 2088 -#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" - -typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); - -static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; -static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; -static DEFINE_SPINLOCK(queue_lock); -static int peer_pid __read_mostly; -static unsigned int copy_range __read_mostly; -static unsigned int queue_total; -static unsigned int queue_dropped = 0; -static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl __read_mostly; -static LIST_HEAD(queue_list); -static DEFINE_MUTEX(ipqnl_mutex); - -static inline void -__ipq_enqueue_entry(struct nf_queue_entry *entry) -{ - list_add_tail(&entry->list, &queue_list); - queue_total++; -} - -static inline int -__ipq_set_mode(unsigned char mode, unsigned int range) -{ - int status = 0; - - switch(mode) { - case IPQ_COPY_NONE: - case IPQ_COPY_META: - copy_mode = mode; - copy_range = 0; - break; - - case IPQ_COPY_PACKET: - if (range > 0xFFFF) - range = 0xFFFF; - copy_range = range; - copy_mode = mode; - break; - - default: - status = -EINVAL; - - } - return status; -} - -static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); - -static inline void -__ipq_reset(void) -{ - peer_pid = 0; - net_disable_timestamp(); - __ipq_set_mode(IPQ_COPY_NONE, 0); - __ipq_flush(NULL, 0); -} - -static struct nf_queue_entry * -ipq_find_dequeue_entry(unsigned long id) -{ - struct nf_queue_entry *entry = NULL, *i; - - spin_lock_bh(&queue_lock); - - list_for_each_entry(i, &queue_list, list) { - if ((unsigned long)i == id) { - entry = i; - break; - } - } - - if (entry) { - list_del(&entry->list); - queue_total--; - } - - spin_unlock_bh(&queue_lock); - return entry; -} - -static void -__ipq_flush(ipq_cmpfn cmpfn, unsigned long data) -{ - struct nf_queue_entry *entry, *next; - - list_for_each_entry_safe(entry, next, &queue_list, list) { - if (!cmpfn || cmpfn(entry, data)) { - list_del(&entry->list); - queue_total--; - nf_reinject(entry, NF_DROP); - } - } -} - -static void -ipq_flush(ipq_cmpfn cmpfn, unsigned long data) -{ - spin_lock_bh(&queue_lock); - __ipq_flush(cmpfn, data); - spin_unlock_bh(&queue_lock); -} - -static struct sk_buff * -ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) -{ - sk_buff_data_t old_tail; - size_t size = 0; - size_t data_len = 0; - struct sk_buff *skb; - struct ipq_packet_msg *pmsg; - struct nlmsghdr *nlh; - struct timeval tv; - - switch (ACCESS_ONCE(copy_mode)) { - case IPQ_COPY_META: - case IPQ_COPY_NONE: - size = NLMSG_SPACE(sizeof(*pmsg)); - break; - - case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_PARTIAL && - (*errp = skb_checksum_help(entry->skb))) - return NULL; - - data_len = ACCESS_ONCE(copy_range); - if (data_len == 0 || data_len > entry->skb->len) - data_len = entry->skb->len; - - size = NLMSG_SPACE(sizeof(*pmsg) + data_len); - break; - - default: - *errp = -EINVAL; - return NULL; - } - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - goto nlmsg_failure; - - old_tail = skb->tail; - nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); - pmsg = NLMSG_DATA(nlh); - memset(pmsg, 0, sizeof(*pmsg)); - - pmsg->packet_id = (unsigned long )entry; - pmsg->data_len = data_len; - tv = ktime_to_timeval(entry->skb->tstamp); - pmsg->timestamp_sec = tv.tv_sec; - pmsg->timestamp_usec = tv.tv_usec; - pmsg->mark = entry->skb->mark; - pmsg->hook = entry->hook; - pmsg->hw_protocol = entry->skb->protocol; - - if (entry->indev) - strcpy(pmsg->indev_name, entry->indev->name); - else - pmsg->indev_name[0] = '\0'; - - if (entry->outdev) - strcpy(pmsg->outdev_name, entry->outdev->name); - else - pmsg->outdev_name[0] = '\0'; - - if (entry->indev && entry->skb->dev && - entry->skb->mac_header != entry->skb->network_header) { - pmsg->hw_type = entry->skb->dev->type; - pmsg->hw_addrlen = dev_parse_header(entry->skb, - pmsg->hw_addr); - } - - if (data_len) - if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) - BUG(); - - nlh->nlmsg_len = skb->tail - old_tail; - return skb; - -nlmsg_failure: - kfree_skb(skb); - *errp = -EINVAL; - printk(KERN_ERR "ip_queue: error creating packet message\n"); - return NULL; -} - -static int -ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) -{ - int status = -EINVAL; - struct sk_buff *nskb; - - if (copy_mode == IPQ_COPY_NONE) - return -EAGAIN; - - nskb = ipq_build_packet_message(entry, &status); - if (nskb == NULL) - return status; - - spin_lock_bh(&queue_lock); - - if (!peer_pid) - goto err_out_free_nskb; - - if (queue_total >= queue_maxlen) { - queue_dropped++; - status = -ENOSPC; - if (net_ratelimit()) - printk (KERN_WARNING "ip_queue: full at %d entries, " - "dropping packets(s). Dropped: %d\n", queue_total, - queue_dropped); - goto err_out_free_nskb; - } - - /* netlink_unicast will either free the nskb or attach it to a socket */ - status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); - if (status < 0) { - queue_user_dropped++; - goto err_out_unlock; - } - - __ipq_enqueue_entry(entry); - - spin_unlock_bh(&queue_lock); - return status; - -err_out_free_nskb: - kfree_skb(nskb); - -err_out_unlock: - spin_unlock_bh(&queue_lock); - return status; -} - -static int -ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e) -{ - int diff; - struct iphdr *user_iph = (struct iphdr *)v->payload; - struct sk_buff *nskb; - - if (v->data_len < sizeof(*user_iph)) - return 0; - diff = v->data_len - e->skb->len; - if (diff < 0) { - if (pskb_trim(e->skb, v->data_len)) - return -ENOMEM; - } else if (diff > 0) { - if (v->data_len > 0xFFFF) - return -EINVAL; - if (diff > skb_tailroom(e->skb)) { - nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), - diff, GFP_ATOMIC); - if (!nskb) { - printk(KERN_WARNING "ip_queue: error " - "in mangle, dropping packet\n"); - return -ENOM |