diff options
Diffstat (limited to 'net/bridge')
49 files changed, 11593 insertions, 3407 deletions
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index db23d59746c..aa0d3b2f1bb 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -4,6 +4,9 @@ config BRIDGE tristate "802.1d Ethernet Bridging" + select LLC + select STP + depends on IPV6 || IPV6=n ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it @@ -29,3 +32,31 @@ config BRIDGE will be called bridge. If unsure, say N. + +config BRIDGE_IGMP_SNOOPING + bool "IGMP/MLD snooping" + depends on BRIDGE + depends on INET + default y + ---help--- + If you say Y here, then the Ethernet bridge will be able selectively + forward multicast traffic based on IGMP/MLD traffic received from + each port. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. + +config BRIDGE_VLAN_FILTERING + bool "VLAN filtering" + depends on BRIDGE + depends on VLAN_8021Q + default n + ---help--- + If you say Y here, then the Ethernet bridge will be able selectively + receive and forward traffic based on VLAN information in the packet + any VLAN information configured on the bridge port or bridge device. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 59556e40e14..8590b942bff 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -5,11 +5,15 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ - br_ioctl.o br_notify.o br_stp.o br_stp_bpdu.o \ - br_stp_if.o br_stp_timer.o + br_ioctl.o br_stp.o br_stp_bpdu.o \ + br_stp_if.o br_stp_timer.o br_netlink.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o -obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o + +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o + +obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br.c b/net/bridge/br.c index 188cc1ac49e..1a755a1e541 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -5,66 +5,216 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br.c,v 1.47 2001/12/24 00:56:41 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/init.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/stp.h> #include "br_private.h" -int (*br_should_route_hook) (struct sk_buff **pskb) = NULL; +/* + * Handle changes in state of network devices enslaved to a bridge. + * + * Note: don't care about up/down if bridge itself is down, because + * port state is checked when bridge is brought up. + */ +static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net_bridge_port *p; + struct net_bridge *br; + bool changed_addr; + int err; + + /* register of bridge completed, add sysfs entries */ + if ((dev->priv_flags & IFF_EBRIDGE) && event == NETDEV_REGISTER) { + br_sysfs_addbr(dev); + return NOTIFY_DONE; + } + + /* not a port of a bridge */ + p = br_port_get_rtnl(dev); + if (!p) + return NOTIFY_DONE; + + br = p->br; + + switch (event) { + case NETDEV_CHANGEMTU: + dev_set_mtu(br->dev, br_min_mtu(br)); + break; + + case NETDEV_CHANGEADDR: + spin_lock_bh(&br->lock); + br_fdb_changeaddr(p, dev->dev_addr); + changed_addr = br_stp_recalculate_bridge_id(br); + spin_unlock_bh(&br->lock); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + break; + + case NETDEV_CHANGE: + br_port_carrier_check(p); + break; + + case NETDEV_FEAT_CHANGE: + netdev_update_features(br->dev); + break; + + case NETDEV_DOWN: + spin_lock_bh(&br->lock); + if (br->dev->flags & IFF_UP) + br_stp_disable_port(p); + spin_unlock_bh(&br->lock); + break; + + case NETDEV_UP: + if (netif_running(br->dev) && netif_oper_up(dev)) { + spin_lock_bh(&br->lock); + br_stp_enable_port(p); + spin_unlock_bh(&br->lock); + } + break; + + case NETDEV_UNREGISTER: + br_del_if(br, dev); + break; + + case NETDEV_CHANGENAME: + err = br_sysfs_renameif(p); + if (err) + return notifier_from_errno(err); + break; + + case NETDEV_PRE_TYPE_CHANGE: + /* Forbid underlaying device to change its type. */ + return NOTIFY_BAD; + + case NETDEV_RESEND_IGMP: + /* Propagate to master device */ + call_netdevice_notifiers(event, br->dev); + break; + } + + /* Events that may cause spanning tree to refresh */ + if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || + event == NETDEV_CHANGE || event == NETDEV_DOWN) + br_ifinfo_notify(RTM_NEWLINK, p); + + return NOTIFY_DONE; +} + +static struct notifier_block br_device_notifier = { + .notifier_call = br_device_event +}; + +static void __net_exit br_net_exit(struct net *net) +{ + struct net_device *dev; + LIST_HEAD(list); + + rtnl_lock(); + for_each_netdev(net, dev) + if (dev->priv_flags & IFF_EBRIDGE) + br_dev_delete(dev, &list); + + unregister_netdevice_many(&list); + rtnl_unlock(); + +} + +static struct pernet_operations br_net_ops = { + .exit = br_net_exit, +}; + +static const struct stp_proto br_stp_proto = { + .rcv = br_stp_rcv, +}; static int __init br_init(void) { - br_fdb_init(); + int err; -#ifdef CONFIG_BRIDGE_NETFILTER - if (br_netfilter_init()) - return 1; -#endif - brioctl_set(br_ioctl_deviceless_stub); - br_handle_frame_hook = br_handle_frame; + err = stp_proto_register(&br_stp_proto); + if (err < 0) { + pr_err("bridge: can't register sap for STP\n"); + return err; + } + + err = br_fdb_init(); + if (err) + goto err_out; - br_fdb_get_hook = br_fdb_get; - br_fdb_put_hook = br_fdb_put; + err = register_pernet_subsys(&br_net_ops); + if (err) + goto err_out1; - register_netdevice_notifier(&br_device_notifier); + err = br_netfilter_init(); + if (err) + goto err_out2; + + err = register_netdevice_notifier(&br_device_notifier); + if (err) + goto err_out3; + + err = br_netlink_init(); + if (err) + goto err_out4; + + brioctl_set(br_ioctl_deviceless_stub); + +#if IS_ENABLED(CONFIG_ATM_LANE) + br_fdb_test_addr_hook = br_fdb_test_addr; +#endif return 0; +err_out4: + unregister_netdevice_notifier(&br_device_notifier); +err_out3: + br_netfilter_fini(); +err_out2: + unregister_pernet_subsys(&br_net_ops); +err_out1: + br_fdb_fini(); +err_out: + stp_proto_unregister(&br_stp_proto); + return err; } static void __exit br_deinit(void) { -#ifdef CONFIG_BRIDGE_NETFILTER - br_netfilter_fini(); -#endif + stp_proto_unregister(&br_stp_proto); + + br_netlink_fini(); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); - br_cleanup_bridges(); + unregister_pernet_subsys(&br_net_ops); - synchronize_net(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ - br_fdb_get_hook = NULL; - br_fdb_put_hook = NULL; + br_netfilter_fini(); +#if IS_ENABLED(CONFIG_ATM_LANE) + br_fdb_test_addr_hook = NULL; +#endif - br_handle_frame_hook = NULL; br_fdb_fini(); } -EXPORT_SYMBOL(br_should_route_hook); - module_init(br_init) module_exit(br_deinit) MODULE_LICENSE("GPL"); MODULE_VERSION(BR_VERSION); +MODULE_ALIAS_RTNL_LINK("bridge"); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0b33a7b3a00..568cccd39a3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_device.c,v 1.6 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -15,39 +13,86 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> +#include <linux/list.h> +#include <linux/netfilter_bridge.h> #include <asm/uaccess.h> #include "br_private.h" -static struct net_device_stats *br_dev_get_stats(struct net_device *dev) -{ - struct net_bridge *br = netdev_priv(dev); - return &br->statistics; -} +#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) -int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) +/* net device transmit always called with BH disabled */ +netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; + struct net_bridge_mdb_entry *mdst; + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + u16 vid = 0; + + rcu_read_lock(); +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + br_nf_pre_routing_finish_bridge_slow(skb); + rcu_read_unlock(); + return NETDEV_TX_OK; + } +#endif + + u64_stats_update_begin(&brstats->syncp); + brstats->tx_packets++; + brstats->tx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); - br->statistics.tx_packets++; - br->statistics.tx_bytes += skb->len; + BR_INPUT_SKB_CB(skb)->brdev = dev; - skb->mac.raw = skb->data; + skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); - rcu_read_lock(); - if (dest[0] & 1) - br_flood_deliver(br, skb, 0); - else if ((dst = __br_fdb_get(br, dest)) != NULL) + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + + if (is_broadcast_ether_addr(dest)) + br_flood_deliver(br, skb, false); + else if (is_multicast_ether_addr(dest)) { + if (unlikely(netpoll_tx_running(dev))) { + br_flood_deliver(br, skb, false); + goto out; + } + if (br_multicast_rcv(br, NULL, skb, vid)) { + kfree_skb(skb); + goto out; + } + + mdst = br_mdb_get(br, skb, vid); + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br, eth_hdr(skb))) + br_multicast_deliver(mdst, skb); + else + br_flood_deliver(br, skb, false); + } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb, 0); + br_flood_deliver(br, skb, true); +out: rcu_read_unlock(); + return NETDEV_TX_OK; +} + +static int br_dev_init(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!br->stats) + return -ENOMEM; + return 0; } @@ -55,9 +100,10 @@ static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); - br_features_recompute(br); + netdev_update_features(dev); netif_start_queue(dev); br_stp_enable_bridge(br); + br_multicast_open(br); return 0; } @@ -66,125 +112,282 @@ static void br_dev_set_multicast_list(struct net_device *dev) { } +static void br_dev_change_rx_flags(struct net_device *dev, int change) +{ + if (change & IFF_PROMISC) + br_manage_promisc(netdev_priv(dev)); +} + static int br_dev_stop(struct net_device *dev) { - br_stp_disable_bridge(netdev_priv(dev)); + struct net_bridge *br = netdev_priv(dev); + + br_stp_disable_bridge(br); + br_multicast_stop(br); netif_stop_queue(dev); return 0; } +static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct net_bridge *br = netdev_priv(dev); + struct pcpu_sw_netstats tmp, sum = { 0 }; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + unsigned int start; + const struct pcpu_sw_netstats *bstats + = per_cpu_ptr(br->stats, cpu); + do { + start = u64_stats_fetch_begin_irq(&bstats->syncp); + memcpy(&tmp, bstats, sizeof(tmp)); + } while (u64_stats_fetch_retry_irq(&bstats->syncp, start)); + sum.tx_bytes += tmp.tx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.rx_packets += tmp.rx_packets; + } + + stats->tx_bytes = sum.tx_bytes; + stats->tx_packets = sum.tx_packets; + stats->rx_bytes = sum.rx_bytes; + stats->rx_packets = sum.rx_packets; + + return stats; +} + static int br_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev))) + struct net_bridge *br = netdev_priv(dev); + if (new_mtu < 68 || new_mtu > br_min_mtu(br)) return -EINVAL; dev->mtu = new_mtu; + +#ifdef CONFIG_BRIDGE_NETFILTER + /* remember the MTU in the rtable for PMTU */ + dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu); +#endif + return 0; } -/* Allow setting mac address of pseudo-bridge to be same as - * any of the bound interfaces - */ +/* Allow setting mac address to any valid ethernet address. */ static int br_set_mac_address(struct net_device *dev, void *p) { struct net_bridge *br = netdev_priv(dev); struct sockaddr *addr = p; - struct net_bridge_port *port; - int err = -EADDRNOTAVAIL; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; spin_lock_bh(&br->lock); - list_for_each_entry(port, &br->port_list, list) { - if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) { - br_stp_change_bridge_id(br, addr->sa_data); - err = 0; - break; - } + if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { + /* Mac address will be changed in br_stp_change_bridge_id(). */ + br_stp_change_bridge_id(br, addr->sa_data); } spin_unlock_bh(&br->lock); - return err; + return 0; } static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strcpy(info->driver, "bridge"); - strcpy(info->version, BR_VERSION); - strcpy(info->fw_version, "N/A"); - strcpy(info->bus_info, "N/A"); + strlcpy(info->driver, "bridge", sizeof(info->driver)); + strlcpy(info->version, BR_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, "N/A", sizeof(info->bus_info)); } -static int br_set_sg(struct net_device *dev, u32 data) +static netdev_features_t br_fix_features(struct net_device *dev, + netdev_features_t features) { struct net_bridge *br = netdev_priv(dev); - if (data) - br->feature_mask |= NETIF_F_SG; - else - br->feature_mask &= ~NETIF_F_SG; + return br_features_recompute(br, features); +} - br_features_recompute(br); - return 0; +#ifdef CONFIG_NET_POLL_CONTROLLER +static void br_poll_controller(struct net_device *br_dev) +{ } -static int br_set_tso(struct net_device *dev, u32 data) +static void br_netpoll_cleanup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; - if (data) - br->feature_mask |= NETIF_F_TSO; - else - br->feature_mask &= ~NETIF_F_TSO; + list_for_each_entry(p, &br->port_list, list) + br_netpoll_disable(p); +} - br_features_recompute(br); - return 0; +static int __br_netpoll_enable(struct net_bridge_port *p) +{ + struct netpoll *np; + int err; + + np = kzalloc(sizeof(*p->np), GFP_KERNEL); + if (!np) + return -ENOMEM; + + err = __netpoll_setup(np, p->dev); + if (err) { + kfree(np); + return err; + } + + p->np = np; + return err; +} + +int br_netpoll_enable(struct net_bridge_port *p) +{ + if (!p->br->dev->npinfo) + return 0; + + return __br_netpoll_enable(p); } -static int br_set_tx_csum(struct net_device *dev, u32 data) +static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + int err = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (!p->dev) + continue; + err = __br_netpoll_enable(p); + if (err) + goto fail; + } - if (data) - br->feature_mask |= NETIF_F_IP_CSUM; - else - br->feature_mask &= ~NETIF_F_IP_CSUM; +out: + return err; - br_features_recompute(br); - return 0; +fail: + br_netpoll_cleanup(dev); + goto out; +} + +void br_netpoll_disable(struct net_bridge_port *p) +{ + struct netpoll *np = p->np; + + if (!np) + return; + + p->np = NULL; + + __netpoll_free_async(np); } -static struct ethtool_ops br_ethtool_ops = { - .get_drvinfo = br_getinfo, - .get_link = ethtool_op_get_link, - .get_sg = ethtool_op_get_sg, - .set_sg = br_set_sg, - .get_tx_csum = ethtool_op_get_tx_csum, - .set_tx_csum = br_set_tx_csum, - .get_tso = ethtool_op_get_tso, - .set_tso = br_set_tso, +#endif + +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) + +{ + struct net_bridge *br = netdev_priv(dev); + + return br_add_if(br, slave_dev); +} + +static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return br_del_if(br, slave_dev); +} + +static const struct ethtool_ops br_ethtool_ops = { + .get_drvinfo = br_getinfo, + .get_link = ethtool_op_get_link, +}; + +static const struct net_device_ops br_netdev_ops = { + .ndo_open = br_dev_open, + .ndo_stop = br_dev_stop, + .ndo_init = br_dev_init, + .ndo_start_xmit = br_dev_xmit, + .ndo_get_stats64 = br_get_stats64, + .ndo_set_mac_address = br_set_mac_address, + .ndo_set_rx_mode = br_dev_set_multicast_list, + .ndo_change_rx_flags = br_dev_change_rx_flags, + .ndo_change_mtu = br_change_mtu, + .ndo_do_ioctl = br_dev_ioctl, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_netpoll_setup = br_netpoll_setup, + .ndo_netpoll_cleanup = br_netpoll_cleanup, + .ndo_poll_controller = br_poll_controller, +#endif + .ndo_add_slave = br_add_slave, + .ndo_del_slave = br_del_slave, + .ndo_fix_features = br_fix_features, + .ndo_fdb_add = br_fdb_add, + .ndo_fdb_del = br_fdb_delete, + .ndo_fdb_dump = br_fdb_dump, + .ndo_bridge_getlink = br_getlink, + .ndo_bridge_setlink = br_setlink, + .ndo_bridge_dellink = br_dellink, +}; + +static void br_dev_free(struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + + free_percpu(br->stats); + free_netdev(dev); +} + +static struct device_type br_type = { + .name = "bridge", }; void br_dev_setup(struct net_device *dev) { - memset(dev->dev_addr, 0, ETH_ALEN); + struct net_bridge *br = netdev_priv(dev); + eth_hw_addr_random(dev); ether_setup(dev); - dev->do_ioctl = br_dev_ioctl; - dev->get_stats = br_dev_get_stats; - dev->hard_start_xmit = br_dev_xmit; - dev->open = br_dev_open; - dev->set_multicast_list = br_dev_set_multicast_list; - dev->change_mtu = br_change_mtu; - dev->destructor = free_netdev; - SET_MODULE_OWNER(dev); - SET_ETHTOOL_OPS(dev, &br_ethtool_ops); - dev->stop = br_dev_stop; + dev->netdev_ops = &br_netdev_ops; + dev->destructor = br_dev_free; + dev->ethtool_ops = &br_ethtool_ops; + SET_NETDEV_DEVTYPE(dev, &br_type); dev->tx_queue_len = 0; - dev->set_mac_address = br_set_mac_address; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST - | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM; + dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + dev->vlan_features = COMMON_FEATURES; + + br->dev = dev; + spin_lock_init(&br->lock); + INIT_LIST_HEAD(&br->port_list); + spin_lock_init(&br->hash_lock); + + br->bridge_id.prio[0] = 0x80; + br->bridge_id.prio[1] = 0x00; + + ether_addr_copy(br->group_addr, eth_reserved_addr_base); + + br->stp_enabled = BR_NO_STP; + br->group_fwd_mask = BR_GROUPFWD_DEFAULT; + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; + + br->designated_root = br->bridge_id; + br->bridge_max_age = br->max_age = 20 * HZ; + br->bridge_hello_time = br->hello_time = 2 * HZ; + br->bridge_forward_delay = br->forward_delay = 15 * HZ; + br->ageing_time = 300 * HZ; + + br_netfilter_rtable_init(br); + br_stp_timer_init(br); + br_multicast_init(br); + br_vlan_init(br); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 1f08a59b51e..b524c36c127 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_fdb.c,v 1.6 2002/01/17 00:57:07 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -15,27 +13,44 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <linux/rculist.h> #include <linux/spinlock.h> #include <linux/times.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/jhash.h> -#include <asm/atomic.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/atomic.h> +#include <asm/unaligned.h> +#include <linux/if_vlan.h> #include "br_private.h" -static kmem_cache_t *br_fdb_cache __read_mostly; +static struct kmem_cache *br_fdb_cache __read_mostly; +static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, + const unsigned char *addr, + __u16 vid); static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr); + const unsigned char *addr, u16 vid); +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *, int); + +static u32 fdb_salt __read_mostly; -void __init br_fdb_init(void) +int __init br_fdb_init(void) { br_fdb_cache = kmem_cache_create("bridge_fdb_cache", sizeof(struct net_bridge_fdb_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); + if (!br_fdb_cache) + return -ENOMEM; + + get_random_bytes(&fdb_salt, sizeof(fdb_salt)); + return 0; } -void __exit br_fdb_fini(void) +void br_fdb_fini(void) { kmem_cache_destroy(br_fdb_cache); } @@ -44,34 +59,141 @@ void __exit br_fdb_fini(void) /* if topology_changing then use forward_delay (default 15 sec) * otherwise keep longer (default 5 minutes) */ -static __inline__ unsigned long hold_time(const struct net_bridge *br) +static inline unsigned long hold_time(const struct net_bridge *br) { return br->topology_change ? br->forward_delay : br->ageing_time; } -static __inline__ int has_expired(const struct net_bridge *br, +static inline int has_expired(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { - return !fdb->is_static - && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); + return !fdb->is_static && + time_before_eq(fdb->updated + hold_time(br), jiffies); +} + +static inline int br_mac_hash(const unsigned char *mac, __u16 vid) +{ + /* use 1 byte of OUI and 3 bytes of NIC */ + u32 key = get_unaligned((u32 *)(mac + 2)); + return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1); +} + +static void fdb_rcu_free(struct rcu_head *head) +{ + struct net_bridge_fdb_entry *ent + = container_of(head, struct net_bridge_fdb_entry, rcu); + kmem_cache_free(br_fdb_cache, ent); +} + +/* When a static FDB entry is added, the mac address from the entry is + * added to the bridge private HW address list and all required ports + * are then updated with the new information. + * Called under RTNL. + */ +static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr) +{ + int err; + struct net_bridge_port *p, *tmp; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) { + err = dev_uc_add(p->dev, addr); + if (err) + goto undo; + } + } + + return; +undo: + list_for_each_entry(tmp, &br->port_list, list) { + if (tmp == p) + break; + if (!br_promisc_port(tmp)) + dev_uc_del(tmp->dev, addr); + } } -static __inline__ int br_mac_hash(const unsigned char *mac) +/* When a static FDB entry is deleted, the HW address from that entry is + * also removed from the bridge private HW address list and updates all + * the ports with needed information. + * Called under RTNL. + */ +static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr) { - return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1); + struct net_bridge_port *p; + + ASSERT_RTNL(); + + list_for_each_entry(p, &br->port_list, list) { + if (!br_promisc_port(p)) + dev_uc_del(p->dev, addr); + } } -static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f) +static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) { + if (f->is_static) + fdb_del_hw(br, f->addr.addr); + hlist_del_rcu(&f->hlist); - br_fdb_put(f); + fdb_notify(br, f, RTM_DELNEIGH); + call_rcu(&f->rcu, fdb_rcu_free); +} + +/* Delete a local entry if no other port had the same address. */ +static void fdb_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + struct net_bridge_fdb_entry *f) +{ + const unsigned char *addr = f->addr.addr; + u16 vid = f->vlan_id; + struct net_bridge_port *op; + + /* Maybe another port has same hw addr? */ + list_for_each_entry(op, &br->port_list, list) { + if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && + (!vid || nbp_vlan_find(op, vid))) { + f->dst = op; + f->added_by_user = 0; + return; + } + } + + /* Maybe bridge device has same hw addr? */ + if (p && ether_addr_equal(br->dev->dev_addr, addr) && + (!vid || br_vlan_find(br, vid))) { + f->dst = NULL; + f->added_by_user = 0; + return; + } + + fdb_delete(br, f); +} + +void br_fdb_find_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + struct net_bridge_fdb_entry *f; + + spin_lock_bh(&br->hash_lock); + f = fdb_find(head, addr, vid); + if (f && f->is_local && !f->added_by_user && f->dst == p) + fdb_delete_local(br, p, f); + spin_unlock_bh(&br->hash_lock); } void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge *br = p->br; + struct net_port_vlans *pv = nbp_get_vlan_info(p); + bool no_vlan = !pv; int i; - + u16 vid; + spin_lock_bh(&br->hash_lock); /* Search all chains since old address/hash is unknown */ @@ -81,28 +203,68 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) struct net_bridge_fdb_entry *f; f = hlist_entry(h, struct net_bridge_fdb_entry, hlist); - if (f->dst == p && f->is_local) { - /* maybe another port has same hw addr? */ - struct net_bridge_port *op; - list_for_each_entry(op, &br->port_list, list) { - if (op != p && - !compare_ether_addr(op->dev->dev_addr, - f->addr.addr)) { - f->dst = op; - goto insert; - } - } - + if (f->dst == p && f->is_local && !f->added_by_user) { /* delete old one */ - fdb_delete(f); - goto insert; + fdb_delete_local(br, p, f); + + /* if this port has no vlan information + * configured, we can safely be done at + * this point. + */ + if (no_vlan) + goto insert; } } } - insert: + +insert: /* insert new address, may fail if invalid address or dup. */ - fdb_insert(br, p, newaddr); + fdb_insert(br, p, newaddr, 0); + + if (no_vlan) + goto done; + + /* Now add entries for every VLAN configured on the port. + * This function runs under RTNL so the bitmap will not change + * from under us. + */ + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + fdb_insert(br, p, newaddr, vid); +done: + spin_unlock_bh(&br->hash_lock); +} + +void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) +{ + struct net_bridge_fdb_entry *f; + struct net_port_vlans *pv; + u16 vid = 0; + + spin_lock_bh(&br->hash_lock); + + /* If old entry was unassociated with any port, then delete it. */ + f = __br_fdb_get(br, br->dev->dev_addr, 0); + if (f && f->is_local && !f->dst) + fdb_delete_local(br, NULL, f); + + fdb_insert(br, NULL, newaddr, 0); + + /* Now remove and add entries for every VLAN configured on the + * bridge. This function runs under RTNL so the bitmap will not + * change from under us. + */ + pv = br_get_vlan_info(br); + if (!pv) + goto out; + + for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) { + f = __br_fdb_get(br, br->dev->dev_addr, vid); + if (f && f->is_local && !f->dst) + fdb_delete_local(br, NULL, f); + fdb_insert(br, NULL, newaddr, vid); + } +out: spin_unlock_bh(&br->hash_lock); } @@ -110,71 +272,89 @@ void br_fdb_cleanup(unsigned long _data) { struct net_bridge *br = (struct net_bridge *)_data; unsigned long delay = hold_time(br); + unsigned long next_timer = jiffies + br->ageing_time; int i; - spin_lock_bh(&br->hash_lock); + spin_lock(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; - struct hlist_node *h, *n; + struct hlist_node *n; - hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) { - if (!f->is_static && - time_before_eq(f->ageing_timer + delay, jiffies)) - fdb_delete(f); + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { + unsigned long this_timer; + if (f->is_static) + continue; + this_timer = f->updated + delay; + if (time_before_eq(this_timer, jiffies)) + fdb_delete(br, f); + else if (time_before(this_timer, next_timer)) + next_timer = this_timer; } } - spin_unlock_bh(&br->hash_lock); + spin_unlock(&br->hash_lock); - mod_timer(&br->gc_timer, jiffies + HZ/10); + mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); } -void br_fdb_delete_by_port(struct net_bridge *br, struct net_bridge_port *p) +/* Completely flush all dynamic entries in forwarding database.*/ +void br_fdb_flush(struct net_bridge *br) +{ + int i; + + spin_lock_bh(&br->hash_lock); + for (i = 0; i < BR_HASH_SIZE; i++) { + struct net_bridge_fdb_entry *f; + struct hlist_node *n; + hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { + if (!f->is_static) + fdb_delete(br, f); + } + } + spin_unlock_bh(&br->hash_lock); +} + +/* Flush all entries referring to a specific port. + * if do_all is set also flush static entries + */ +void br_fdb_delete_by_port(struct net_bridge *br, + const struct net_bridge_port *p, + int do_all) { int i; spin_lock_bh(&br->hash_lock); for (i = 0; i < BR_HASH_SIZE; i++) { struct hlist_node *h, *g; - + hlist_for_each_safe(h, g, &br->hash[i]) { struct net_bridge_fdb_entry *f = hlist_entry(h, struct net_bridge_fdb_entry, hlist); - if (f->dst != p) + if (f->dst != p) continue; - /* - * if multiple ports all have the same device address - * then when one port is deleted, assign - * the local entry to other port - */ - if (f->is_local) { - struct net_bridge_port *op; - list_for_each_entry(op, &br->port_list, list) { - if (op != p && - !compare_ether_addr(op->dev->dev_addr, - f->addr.addr)) { - f->dst = op; - goto skip_delete; - } - } - } + if (f->is_static && !do_all) + continue; - fdb_delete(f); - skip_delete: ; + if (f->is_local) + fdb_delete_local(br, p, f); + else + fdb_delete(br, f); } } spin_unlock_bh(&br->hash_lock); } -/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */ +/* No locking or refcounting, assumes caller has rcu_read_lock */ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr) + const unsigned char *addr, + __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, &br->hash[br_mac_hash(addr)], hlist) { - if (!compare_ether_addr(fdb->addr.addr, addr)) { + hlist_for_each_entry_rcu(fdb, + &br->hash[br_mac_hash(addr, vid)], hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) { if (unlikely(has_expired(br, fdb))) break; return fdb; @@ -184,36 +364,32 @@ struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, return NULL; } -/* Interface used by ATM hook that keeps a ref count */ -struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br, - unsigned char *addr) +#if IS_ENABLED(CONFIG_ATM_LANE) +/* Interface used by ATM LANE hook to test + * if an addr is on some other bridge port */ +int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) { struct net_bridge_fdb_entry *fdb; + struct net_bridge_port *port; + int ret; rcu_read_lock(); - fdb = __br_fdb_get(br, addr); - if (fdb) - atomic_inc(&fdb->use_count); + port = br_port_get_rcu(dev); + if (!port) + ret = 0; + else { + fdb = __br_fdb_get(port->br, addr, 0); + ret = fdb && fdb->dst && fdb->dst->dev != dev && + fdb->dst->state == BR_STATE_FORWARDING; + } rcu_read_unlock(); - return fdb; -} - -static void fdb_rcu_free(struct rcu_head *head) -{ - struct net_bridge_fdb_entry *ent - = container_of(head, struct net_bridge_fdb_entry, rcu); - kmem_cache_free(br_fdb_cache, ent); -} -/* Set entry up for deletion with RCU */ -void br_fdb_put(struct net_bridge_fdb_entry *ent) -{ - if (atomic_dec_and_test(&ent->use_count)) - call_rcu(&ent->rcu, fdb_rcu_free); + return ret; } +#endif /* CONFIG_ATM_LANE */ /* - * Fill buffer with forwarding table records in + * Fill buffer with forwarding table records in * the API format. */ int br_fdb_fillbuf(struct net_bridge *br, void *buf, @@ -221,18 +397,21 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, { struct __fdb_entry *fe = buf; int i, num = 0; - struct hlist_node *h; struct net_bridge_fdb_entry *f; memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); rcu_read_lock(); for (i = 0; i < BR_HASH_SIZE; i++) { - hlist_for_each_entry_rcu(f, h, &br->hash[i], hlist) { + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { if (num >= maxnum) goto out; - if (has_expired(br, f)) + if (has_expired(br, f)) + continue; + + /* ignore pseudo entry for local MAC address */ + if (!f->dst) continue; if (skip) { @@ -242,10 +421,14 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, /* convert from internal format to API */ memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN); + + /* due to ABI compat need to split into hi/lo */ fe->port_no = f->dst->port_no; + fe->port_hi = f->dst->port_no >> 8; + fe->is_local = f->is_local; if (!f->is_static) - fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer); + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); ++fe; ++num; } @@ -257,14 +440,29 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, return num; } -static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr) +static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, + const unsigned char *addr, + __u16 vid) { - struct hlist_node *h; struct net_bridge_fdb_entry *fdb; - hlist_for_each_entry_rcu(fdb, h, head, hlist) { - if (!compare_ether_addr(fdb->addr.addr, addr)) + hlist_for_each_entry(fdb, head, hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) + return fdb; + } + return NULL; +} + +static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_entry *fdb; + + hlist_for_each_entry_rcu(fdb, head, hlist) { + if (ether_addr_equal(fdb->addr.addr, addr) && + fdb->vlan_id == vid) return fdb; } return NULL; @@ -272,97 +470,535 @@ static inline struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, - const unsigned char *addr, - int is_local) + const unsigned char *addr, + __u16 vid) { struct net_bridge_fdb_entry *fdb; fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (fdb) { memcpy(fdb->addr.addr, addr, ETH_ALEN); - atomic_set(&fdb->use_count, 1); - hlist_add_head_rcu(&fdb->hlist, head); - fdb->dst = source; - fdb->is_local = is_local; - fdb->is_static = is_local; - fdb->ageing_timer = jiffies; + fdb->vlan_id = vid; + fdb->is_local = 0; + fdb->is_static = 0; + fdb->added_by_user = 0; + fdb->updated = fdb->used = jiffies; + hlist_add_head_rcu(&fdb->hlist, head); } return fdb; } static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; if (!is_valid_ether_addr(addr)) return -EINVAL; - fdb = fdb_find(head, addr); + fdb = fdb_find(head, addr, vid); if (fdb) { - /* it is okay to have multiple ports with same + /* it is okay to have multiple ports with same * address, just use the first one. */ - if (fdb->is_local) + if (fdb->is_local) return 0; - - printk(KERN_WARNING "%s adding interface with same address " + br_warn(br, "adding interface %s with same address " "as a received packet\n", - source->dev->name); - fdb_delete(fdb); - } + source ? source->dev->name : br->dev->name); + fdb_delete(br, fdb); + } - if (!fdb_create(head, source, addr, 1)) + fdb = fdb_create(head, source, addr, vid); + if (!fdb) return -ENOMEM; + fdb->is_local = fdb->is_static = 1; + fdb_add_hw(br, addr); + fdb_notify(br, fdb, RTM_NEWNEIGH); return 0; } +/* Add entry for local address of interface */ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid) { int ret; spin_lock_bh(&br->hash_lock); - ret = fdb_insert(br, source, addr); + ret = fdb_insert(br, source, addr, vid); spin_unlock_bh(&br->hash_lock); return ret; } void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr) + const unsigned char *addr, u16 vid, bool added_by_user) { - struct hlist_head *head = &br->hash[br_mac_hash(addr)]; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; + bool fdb_modified = false; /* some users want to always flood. */ if (hold_time(br) == 0) return; - rcu_read_lock(); - fdb = fdb_find(head, addr); + /* ignore packets unless we are using this port */ + if (!(source->state == BR_STATE_LEARNING || + source->state == BR_STATE_FORWARDING)) + return; + + fdb = fdb_find_rcu(head, addr, vid); if (likely(fdb)) { /* attempt to update an entry for a local interface */ if (unlikely(fdb->is_local)) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: received packet with " - " own address as source address\n", - source->dev->name); + if (net_ratelimit()) + br_warn(br, "received packet on %s with " + "own address as source address\n", + source->dev->name); } else { /* fastpath: update of existing entry */ - fdb->dst = source; - fdb->ageing_timer = jiffies; + if (unlikely(source != fdb->dst)) { + fdb->dst = source; + fdb_modified = true; + } + fdb->updated = jiffies; + if (unlikely(added_by_user)) + fdb->added_by_user = 1; + if (unlikely(fdb_modified)) + fdb_notify(br, fdb, RTM_NEWNEIGH); } } else { - spin_lock_bh(&br->hash_lock); - if (!fdb_find(head, addr)) - fdb_create(head, source, addr, 0); + spin_lock(&br->hash_lock); + if (likely(!fdb_find(head, addr, vid))) { + fdb = fdb_create(head, source, addr, vid); + if (fdb) { + if (unlikely(added_by_user)) + fdb->added_by_user = 1; + fdb_notify(br, fdb, RTM_NEWNEIGH); + } + } /* else we lose race and someone else inserts * it first, don't bother updating */ - spin_unlock_bh(&br->hash_lock); + spin_unlock(&br->hash_lock); + } +} + +static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) +{ + if (fdb->is_local) + return NUD_PERMANENT; + else if (fdb->is_static) + return NUD_NOARP; + else if (has_expired(fdb->dst->br, fdb)) + return NUD_STALE; + else + return NUD_REACHABLE; +} + +static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb, + u32 portid, u32 seq, int type, unsigned int flags) +{ + unsigned long now = jiffies; + struct nda_cacheinfo ci; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = 0; + ndm->ndm_type = 0; + ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex; + ndm->ndm_state = fdb_to_nud(fdb); + + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) + goto nla_put_failure; + if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) + goto nla_put_failure; + ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_confirmed = 0; + ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_refcnt = 0; + if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; + + if (nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t fdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + + nla_total_size(sizeof(struct nda_cacheinfo)); +} + +static void fdb_notify(struct net_bridge *br, + const struct net_bridge_fdb_entry *fdb, int type) +{ + struct net *net = dev_net(br->dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +} + +/* Dump information about entries, in response to GETNEIGH */ +int br_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + int idx) +{ + struct net_bridge *br = netdev_priv(dev); + int i; + + if (!(dev->priv_flags & IFF_EBRIDGE)) + goto out; + + for (i = 0; i < BR_HASH_SIZE; i++) { + struct net_bridge_fdb_entry *f; + + hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { + if (idx < cb->args[0]) + goto skip; + + if (fdb_fill_info(skb, br, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI) < 0) + break; +skip: + ++idx; + } + } + +out: + return idx; +} + +/* Update (create or replace) forwarding database entry */ +static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, + __u16 state, __u16 flags, __u16 vid) +{ + struct net_bridge *br = source->br; + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + struct net_bridge_fdb_entry *fdb; + bool modified = false; + + fdb = fdb_find(head, addr, vid); + if (fdb == NULL) { + if (!(flags & NLM_F_CREATE)) + return -ENOENT; + + fdb = fdb_create(head, source, addr, vid); + if (!fdb) + return -ENOMEM; + + modified = true; + } else { + if (flags & NLM_F_EXCL) + return -EEXIST; + + if (fdb->dst != source) { + fdb->dst = source; + modified = true; + } + } + + if (fdb_to_nud(fdb) != state) { + if (state & NUD_PERMANENT) { + fdb->is_local = 1; + if (!fdb->is_static) { + fdb->is_static = 1; + fdb_add_hw(br, addr); + } + } else if (state & NUD_NOARP) { + fdb->is_local = 0; + if (!fdb->is_static) { + fdb->is_static = 1; + fdb_add_hw(br, addr); + } + } else { + fdb->is_local = 0; + if (fdb->is_static) { + fdb->is_static = 0; + fdb_del_hw(br, addr); + } + } + + modified = true; + } + fdb->added_by_user = 1; + + fdb->used = jiffies; + if (modified) { + fdb->updated = jiffies; + fdb_notify(br, fdb, RTM_NEWNEIGH); + } + + return 0; +} + +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, + const unsigned char *addr, u16 nlh_flags, u16 vid) +{ + int err = 0; + + if (ndm->ndm_flags & NTF_USE) { + rcu_read_lock(); + br_fdb_update(p->br, p, addr, vid, true); + rcu_read_unlock(); + } else { + spin_lock_bh(&p->br->hash_lock); + err = fdb_add_entry(p, addr, ndm->ndm_state, + nlh_flags, vid); + spin_unlock_bh(&p->br->hash_lock); + } + + return err; +} + +/* Add new permanent fdb entry with RTM_NEWNEIGH */ +int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 nlh_flags) +{ + struct net_bridge_port *p; + int err = 0; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + + if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { + pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); + return -EINVAL; + } + + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (!vid || vid >= VLAN_VID_MASK) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + + if (is_zero_ether_addr(addr)) { + pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); + return -EINVAL; + } + + p = br_port_get_rtnl(dev); + if (p == NULL) { + pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", + dev->name); + return -EINVAL; + } + + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_NEWNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + /* VID was specified, so use it. */ + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + if (err) + goto out; + } + } + +out: + return err; +} + +static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vlan) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; + struct net_bridge_fdb_entry *fdb; + + fdb = fdb_find(head, addr, vlan); + if (!fdb) + return -ENOENT; + + fdb_delete(br, fdb); + return 0; +} + +static int __br_fdb_delete(struct net_bridge_port *p, + const unsigned char *addr, u16 vid) +{ + int err; + + spin_lock_bh(&p->br->hash_lock); + err = fdb_delete_by_addr(p->br, addr, vid); + spin_unlock_bh(&p->br->hash_lock); + + return err; +} + +/* Remove neighbor entry with RTM_DELNEIGH */ +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr) +{ + struct net_bridge_port *p; + int err; + struct net_port_vlans *pv; + unsigned short vid = VLAN_N_VID; + + if (tb[NDA_VLAN]) { + if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n"); + return -EINVAL; + } + + vid = nla_get_u16(tb[NDA_VLAN]); + + if (!vid || vid >= VLAN_VID_MASK) { + pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", + vid); + return -EINVAL; + } + } + p = br_port_get_rtnl(dev); + if (p == NULL) { + pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", + dev->name); + return -EINVAL; + } + + pv = nbp_get_vlan_info(p); + if (vid != VLAN_N_VID) { + if (!pv || !test_bit(vid, pv->vlan_bitmap)) { + pr_info("bridge: RTM_DELNEIGH with unconfigured " + "vlan %d on port %s\n", vid, dev->name); + return -EINVAL; + } + + err = __br_fdb_delete(p, addr, vid); + } else { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { + err = __br_fdb_delete(p, addr, 0); + goto out; + } + + /* We have vlans configured on this port and user didn't + * specify a VLAN. To be nice, add/update entry for every + * vlan on this port. + */ + err = -ENOENT; + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + err &= __br_fdb_delete(p, addr, vid); + } + } +out: + return err; +} + +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *fdb, *tmp; + int i; + int err; + + ASSERT_RTNL(); + + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry(fdb, &br->hash[i], hlist) { + /* We only care for static entries */ + if (!fdb->is_static) + continue; + + err = dev_uc_add(p->dev, fdb->addr.addr); + if (err) + goto rollback; + } + } + return 0; + +rollback: + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry(tmp, &br->hash[i], hlist) { + /* If we reached the fdb that failed, we can stop */ + if (tmp == fdb) + break; + + /* We only care for static entries */ + if (!tmp->is_static) + continue; + + dev_uc_del(p->dev, tmp->addr.addr); + } + } + return err; +} + +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) +{ + struct net_bridge_fdb_entry *fdb; + int i; + + ASSERT_RTNL(); + + for (i = 0; i < BR_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) { + /* We only care for static entries */ + if (!fdb->is_static) + continue; + + dev_uc_del(p->dev, fdb->addr.addr); + } } - rcu_read_unlock(); } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 2d24fb400e0..056b67b0e27 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -5,42 +5,45 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_forward.c,v 1.4 2001/08/14 22:05:57 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/err.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/netpoll.h> #include <linux/skbuff.h> +#include <linux/if_vlan.h> #include <linux/netfilter_bridge.h> #include "br_private.h" -static inline int should_deliver(const struct net_bridge_port *p, +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)); + +/* Don't forward packets to originating port or forwarding disabled */ +static inline int should_deliver(const struct net_bridge_port *p, const struct sk_buff *skb) { - if (skb->dev == p->dev || - p->state != BR_STATE_FORWARDING) - return 0; - - return 1; + return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && + br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) && + p->state == BR_STATE_FORWARDING; } int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* drop mtu oversized packets except tso */ - if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) + /* ip_fragment doesn't copy the MAC header */ + if (nf_bridge_maybe_copy_header(skb) || + !is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); - else { -#ifdef CONFIG_BRIDGE_NETFILTER - /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ - nf_bridge_maybe_copy_header(skb); -#endif + } else { skb_push(skb, ETH_HLEN); - + br_drop_fake_rtable(skb); dev_queue_xmit(skb); } @@ -49,35 +52,58 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) int br_forward_finish(struct sk_buff *skb) { - NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, - br_dev_queue_push_xmit); + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, + br_dev_queue_push_xmit); - return 0; } static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + skb->dev = to->dev; - NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - br_forward_finish); + + if (unlikely(netpoll_tx_running(to->br->dev))) { + if (!is_skb_forwardable(skb->dev, skb)) + kfree_skb(skb); + else { + skb_push(skb, ETH_HLEN); + br_netpoll_send_skb(to, skb); + } + return; + } + + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + br_forward_finish); } static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) { struct net_device *indev; + if (skb_warn_if_lro(skb)) { + kfree_skb(skb); + return; + } + + skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb); + if (!skb) + return; + indev = skb->dev; skb->dev = to->dev; - skb->ip_summed = CHECKSUM_NONE; + skb_forward_csum(skb); - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, - br_forward_finish); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, + br_forward_finish); } /* called with rcu_read_lock */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { - if (should_deliver(to, skb)) { + if (to && should_deliver(to, skb)) { __br_deliver(to, skb); return; } @@ -86,72 +112,169 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) } /* called with rcu_read_lock */ -void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) +void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) { if (should_deliver(to, skb)) { - __br_forward(to, skb); + if (skb0) + deliver_clone(to, skb, __br_forward); + else + __br_forward(to, skb); return; } - kfree_skb(skb); + if (!skb0) + kfree_skb(skb); } -/* called under bridge lock */ -static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, - void (*__packet_hook)(const struct net_bridge_port *p, +static int deliver_clone(const struct net_bridge_port *prev, + struct sk_buff *skb, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb)) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) { + dev->stats.tx_dropped++; + return -ENOMEM; + } + + __packet_hook(prev, skb); + return 0; +} + +static struct net_bridge_port *maybe_deliver( + struct net_bridge_port *prev, struct net_bridge_port *p, + struct sk_buff *skb, + void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { - struct net_bridge_port *p; - struct net_bridge_port *prev; + int err; - if (clone) { - struct sk_buff *skb2; + if (!should_deliver(p, skb)) + return prev; - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; - return; - } + if (!prev) + goto out; - skb = skb2; - } + err = deliver_clone(prev, skb, __packet_hook); + if (err) + return ERR_PTR(err); + +out: + return p; +} + +/* called under bridge lock */ +static void br_flood(struct net_bridge *br, struct sk_buff *skb, + struct sk_buff *skb0, + void (*__packet_hook)(const struct net_bridge_port *p, + struct sk_buff *skb), + bool unicast) +{ + struct net_bridge_port *p; + struct net_bridge_port *prev; prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { - if (should_deliver(p, skb)) { - if (prev != NULL) { - struct sk_buff *skb2; + /* Do not flood unicast traffic to ports that turn it off */ + if (unicast && !(p->flags & BR_FLOOD)) + continue; + prev = maybe_deliver(prev, p, skb, __packet_hook); + if (IS_ERR(prev)) + goto out; + } - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; - kfree_skb(skb); - return; - } + if (!prev) + goto out; - __packet_hook(prev, skb2); - } + if (skb0) + deliver_clone(prev, skb, __packet_hook); + else + __packet_hook(prev, skb); + return; - prev = p; - } +out: + if (!skb0) + kfree_skb(skb); +} + + +/* called with rcu_read_lock */ +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast) +{ + br_flood(br, skb, NULL, __br_deliver, unicast); +} + +/* called under bridge lock */ +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, + struct sk_buff *skb2, bool unicast) +{ + br_flood(br, skb, skb2, __br_forward, unicast); +} + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +/* called with rcu_read_lock */ +static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, struct sk_buff *skb0, + void (*__packet_hook)( + const struct net_bridge_port *p, + struct sk_buff *skb)) +{ + struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *prev = NULL; + struct net_bridge_port_group *p; + struct hlist_node *rp; + + rp = rcu_dereference(hlist_first_rcu(&br->router_list)); + p = mdst ? rcu_dereference(mdst->ports) : NULL; + while (p || rp) { + struct net_bridge_port *port, *lport, *rport; + + lport = p ? p->port : NULL; + rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : + NULL; + + port = (unsigned long)lport > (unsigned long)rport ? + lport : rport; + + prev = maybe_deliver(prev, port, skb, __packet_hook); + if (IS_ERR(prev)) + goto out; + + if ((unsigned long)lport >= (unsigned long)port) + p = rcu_dereference(p->next); + if ((unsigned long)rport >= (unsigned long)port) + rp = rcu_dereference(hlist_next_rcu(rp)); } - if (prev != NULL) { + if (!prev) + goto out; + + if (skb0) + deliver_clone(prev, skb, __packet_hook); + else __packet_hook(prev, skb); - return; - } + return; - kfree_skb(skb); +out: + if (!skb0) + kfree_skb(skb); } - /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone) +void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb) { - br_flood(br, skb, clone, __br_deliver); + br_multicast_flood(mdst, skb, NULL, __br_deliver); } -/* called under bridge lock */ -void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone) +/* called with rcu_read_lock */ +void br_multicast_forward(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, struct sk_buff *skb2) { - br_flood(br, skb, clone, __br_forward); + br_multicast_flood(mdst, skb, skb2, __br_forward); } +#endif diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ba442883e87..3eca3fdf8fe 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_if.c,v 1.7 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -15,13 +13,17 @@ #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/netpoll.h> #include <linux/ethtool.h> #include <linux/if_arp.h> #include <linux/module.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <linux/if_ether.h> +#include <linux/slab.h> #include <net/sock.h> +#include <linux/if_vlan.h> #include "br_private.h" @@ -29,33 +31,20 @@ * Determine initial path cost based on speed. * using recommendations from 802.1d standard * - * Need to simulate user ioctl because not all device's that support - * ethtool, use ethtool_ops. Also, since driver might sleep need to - * not be holding any locks. + * Since driver might sleep need to not be holding any locks. */ static int port_cost(struct net_device *dev) { - struct ethtool_cmd ecmd = { ETHTOOL_GSET }; - struct ifreq ifr; - mm_segment_t old_fs; - int err; - - strncpy(ifr.ifr_name, dev->name, IFNAMSIZ); - ifr.ifr_data = (void __user *) &ecmd; + struct ethtool_cmd ecmd; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ethtool(&ifr); - set_fs(old_fs); - - if (!err) { - switch(ecmd.speed) { - case SPEED_100: - return 19; - case SPEED_1000: - return 4; + if (!__ethtool_get_settings(dev, &ecmd)) { + switch (ethtool_cmd_speed(&ecmd)) { case SPEED_10000: return 2; + case SPEED_1000: + return 4; + case SPEED_100: + return 19; case SPEED_10: return 100; } @@ -72,34 +61,149 @@ static int port_cost(struct net_device *dev) } -/* - * Check for port carrier transistions. - * Called from work queue to allow for calling functions that - * might sleep (such as speed check), and to debounce. - */ -static void port_carrier_check(void *arg) +/* Check for port carrier transitions. */ +void br_port_carrier_check(struct net_bridge_port *p) { - struct net_bridge_port *p = arg; + struct net_device *dev = p->dev; + struct net_bridge *br = p->br; - rtnl_lock(); - if (netif_carrier_ok(p->dev)) { - u32 cost = port_cost(p->dev); + if (!(p->flags & BR_ADMIN_COST) && + netif_running(dev) && netif_oper_up(dev)) + p->path_cost = port_cost(dev); - spin_lock_bh(&p->br->lock); - if (p->state == BR_STATE_DISABLED) { - p->path_cost = cost; + if (!netif_running(br->dev)) + return; + + spin_lock_bh(&br->lock); + if (netif_running(dev) && netif_oper_up(dev)) { + if (p->state == BR_STATE_DISABLED) br_stp_enable_port(p); - } - spin_unlock_bh(&p->br->lock); } else { - spin_lock_bh(&p->br->lock); if (p->state != BR_STATE_DISABLED) br_stp_disable_port(p); - spin_unlock_bh(&p->br->lock); } - rtnl_unlock(); + spin_unlock_bh(&br->lock); +} + +static void br_port_set_promisc(struct net_bridge_port *p) +{ + int err = 0; + + if (br_promisc_port(p)) + return; + + err = dev_set_promiscuity(p->dev, 1); + if (err) + return; + + br_fdb_unsync_static(p->br, p); + p->flags |= BR_PROMISC; +} + +static void br_port_clear_promisc(struct net_bridge_port *p) +{ + int err; + + /* Check if the port is already non-promisc or if it doesn't + * support UNICAST filtering. Without unicast filtering support + * we'll end up re-enabling promisc mode anyway, so just check for + * it here. + */ + if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT)) + return; + + /* Since we'll be clearing the promisc mode, program the port + * first so that we don't have interruption in traffic. + */ + err = br_fdb_sync_static(p->br, p); + if (err) + return; + + dev_set_promiscuity(p->dev, -1); + p->flags &= ~BR_PROMISC; +} + +/* When a port is added or removed or when certain port flags + * change, this function is called to automatically manage + * promiscuity setting of all the bridge ports. We are always called + * under RTNL so can skip using rcu primitives. + */ +void br_manage_promisc(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool set_all = false; + + /* If vlan filtering is disabled or bridge interface is placed + * into promiscuous mode, place all ports in promiscuous mode. + */ + if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br)) + set_all = true; + + list_for_each_entry(p, &br->port_list, list) { + if (set_all) { + br_port_set_promisc(p); + } else { + /* If the number of auto-ports is <= 1, then all other + * ports will have their output configuration + * statically specified through fdbs. Since ingress + * on the auto-port becomes forwarding/egress to other + * ports and egress configuration is statically known, + * we can say that ingress configuration of the + * auto-port is also statically known. + * This lets us disable promiscuous mode and write + * this config to hw. + */ + if (br->auto_cnt == 0 || + (br->auto_cnt == 1 && br_auto_port(p))) + br_port_clear_promisc(p); + else + br_port_set_promisc(p); + } + } +} + +static void nbp_update_port_count(struct net_bridge *br) +{ + struct net_bridge_port *p; + u32 cnt = 0; + + list_for_each_entry(p, &br->port_list, list) { + if (br_auto_port(p)) + cnt++; + } + if (br->auto_cnt != cnt) { + br->auto_cnt = cnt; + br_manage_promisc(br); + } +} + +static void nbp_delete_promisc(struct net_bridge_port *p) +{ + /* If port is currently promiscuous, unset promiscuity. + * Otherwise, it is a static port so remove all addresses + * from it. + */ + dev_set_allmulti(p->dev, -1); + if (br_promisc_port(p)) + dev_set_promiscuity(p->dev, -1); + else + br_fdb_unsync_static(p->br, p); +} + +static void release_nbp(struct kobject *kobj) +{ + struct net_bridge_port *p + = container_of(kobj, struct net_bridge_port, kobj); + kfree(p); } +static struct kobj_type brport_ktype = { +#ifdef CONFIG_SYSFS + .sysfs_ops = &brport_sysfs_ops, +#endif + .release = release_nbp, +}; + static void destroy_nbp(struct net_bridge_port *p) { struct net_device *dev = p->dev; @@ -108,7 +212,7 @@ static void destroy_nbp(struct net_bridge_port *p) p->dev = NULL; dev_put(dev); - br_sysfs_freeif(p); + kobject_put(&p->kobj); } static void destroy_nbp_rcu(struct rcu_head *head) @@ -118,87 +222,69 @@ static void destroy_nbp_rcu(struct rcu_head *head) destroy_nbp(p); } -/* called with RTNL */ +/* Delete port(interface) from bridge is done in two steps. + * via RCU. First step, marks device as down. That deletes + * all the timers and stops new packets from flowing through. + * + * Final cleanup doesn't occur until after all CPU's finished + * processing packets. + * + * Protected from multiple admin operations by RTNL mutex + */ static void del_nbp(struct net_bridge_port *p) { struct net_bridge *br = p->br; struct net_device *dev = p->dev; - dev->br_port = NULL; - dev_set_promiscuity(dev, -1); + sysfs_remove_link(br->ifobj, p->dev->name); - cancel_delayed_work(&p->carrier_check); - flush_scheduled_work(); + nbp_delete_promisc(p); spin_lock_bh(&br->lock); br_stp_disable_port(p); spin_unlock_bh(&br->lock); - br_fdb_delete_by_port(br, p); + br_ifinfo_notify(RTM_DELLINK, p); list_del_rcu(&p->list); - del_timer_sync(&p->message_age_timer); - del_timer_sync(&p->forward_delay_timer); - del_timer_sync(&p->hold_timer); - + nbp_vlan_flush(p); + br_fdb_delete_by_port(br, p, 1); + nbp_update_port_count(br); + + dev->priv_flags &= ~IFF_BRIDGE_PORT; + + netdev_rx_handler_unregister(dev); + + netdev_upper_dev_unlink(dev, br->dev); + + br_multicast_del_port(p); + + kobject_uevent(&p->kobj, KOBJ_REMOVE); + kobject_del(&p->kobj); + + br_netpoll_disable(p); + call_rcu(&p->rcu, destroy_nbp_rcu); } -/* called with RTNL */ -static void del_br(struct net_bridge *br) +/* Delete bridge device */ +void br_dev_delete(struct net_device *dev, struct list_head *head) { + struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p, *n; list_for_each_entry_safe(p, n, &br->port_list, list) { - br_sysfs_removeif(p); del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + + br_vlan_flush(br); del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); - unregister_netdevice(br->dev); -} - -static struct net_device *new_bridge_dev(const char *name) -{ - struct net_bridge *br; - struct net_device *dev; - - dev = alloc_netdev(sizeof(struct net_bridge), name, - br_dev_setup); - - if (!dev) - return NULL; - - br = netdev_priv(dev); - br->dev = dev; - - spin_lock_init(&br->lock); - INIT_LIST_HEAD(&br->port_list); - spin_lock_init(&br->hash_lock); - - br->bridge_id.prio[0] = 0x80; - br->bridge_id.prio[1] = 0x00; - memset(br->bridge_id.addr, 0, ETH_ALEN); - - br->feature_mask = dev->features; - br->stp_enabled = 0; - br->designated_root = br->bridge_id; - br->root_path_cost = 0; - br->root_port = 0; - br->bridge_max_age = br->max_age = 20 * HZ; - br->bridge_hello_time = br->hello_time = 2 * HZ; - br->bridge_forward_delay = br->forward_delay = 15 * HZ; - br->topology_change = 0; - br->topology_change_detected = 0; - br->ageing_time = 300 * HZ; - INIT_LIST_HEAD(&br->age_list); - - br_stp_timer_init(br); - - return dev; + unregister_netdevice_queue(br->dev, head); } /* find an available port number */ @@ -208,12 +294,11 @@ static int find_portno(struct net_bridge *br) struct net_bridge_port *p; unsigned long *inuse; - inuse = kmalloc(BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long), + inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long), GFP_KERNEL); if (!inuse) return -ENOMEM; - memset(inuse, 0, BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long)); set_bit(0, inuse); /* zero is reserved */ list_for_each_entry(p, &br->port_list, list) { set_bit(p->port_no, inuse); @@ -225,86 +310,63 @@ static int find_portno(struct net_bridge *br) } /* called with RTNL but without bridge lock */ -static struct net_bridge_port *new_nbp(struct net_bridge *br, +static struct net_bridge_port *new_nbp(struct net_bridge *br, struct net_device *dev) { int index; struct net_bridge_port *p; - + index = find_portno(br); if (index < 0) return ERR_PTR(index); - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return ERR_PTR(-ENOMEM); - memset(p, 0, sizeof(*p)); p->br = br; dev_hold(dev); p->dev = dev; p->path_cost = port_cost(dev); - p->priority = 0x8000 >> BR_PORT_BITS; - dev->br_port = p; + p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; + p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); p->state = BR_STATE_DISABLED; - INIT_WORK(&p->carrier_check, port_carrier_check, p); - kobject_init(&p->kobj); + br_stp_port_timer_init(p); + br_multicast_add_port(p); return p; } -int br_add_bridge(const char *name) +int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; - int ret; - - dev = new_bridge_dev(name); - if (!dev) - return -ENOMEM; - - rtnl_lock(); - if (strchr(dev->name, '%')) { - ret = dev_alloc_name(dev, dev->name); - if (ret < 0) - goto err1; - } - - ret = register_netdevice(dev); - if (ret) - goto err2; + int res; - /* network device kobject is not setup until - * after rtnl_unlock does it's hotplug magic. - * so hold reference to avoid race. - */ - dev_hold(dev); - rtnl_unlock(); + dev = alloc_netdev(sizeof(struct net_bridge), name, + br_dev_setup); - ret = br_sysfs_addbr(dev); - dev_put(dev); + if (!dev) + return -ENOMEM; - if (ret) - unregister_netdev(dev); - out: - return ret; + dev_net_set(dev, net); + dev->rtnl_link_ops = &br_link_ops; - err2: - free_netdev(dev); - err1: - rtnl_unlock(); - goto out; + res = register_netdev(dev); + if (res) + free_netdev(dev); + return res; } -int br_del_bridge(const char *name) +int br_del_bridge(struct net *net, const char *name) { struct net_device *dev; int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(name); - if (dev == NULL) + dev = __dev_get_by_name(net, name); + if (dev == NULL) ret = -ENXIO; /* Could not find device */ else if (!(dev->priv_flags & IFF_EBRIDGE)) { @@ -315,10 +377,10 @@ int br_del_bridge(const char *name) else if (dev->flags & IFF_UP) { /* Not shutdown yet. */ ret = -EBUSY; - } + } - else - del_br(netdev_priv(dev)); + else + br_dev_delete(dev, NULL); rtnl_unlock(); return ret; @@ -346,22 +408,24 @@ int br_min_mtu(const struct net_bridge *br) /* * Recomputes features using slave's features */ -void br_features_recompute(struct net_bridge *br) +netdev_features_t br_features_recompute(struct net_bridge *br, + netdev_features_t features) { struct net_bridge_port *p; - unsigned long features, checksum; + netdev_features_t mask; - features = br->feature_mask &~ NETIF_F_IP_CSUM; - checksum = br->feature_mask & NETIF_F_IP_CSUM; + if (list_empty(&br->port_list)) + return features; + + mask = features; + features &= ~NETIF_F_ONE_FOR_ALL; list_for_each_entry(p, &br->port_list, list) { - if (!(p->dev->features - & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))) - checksum = 0; - features &= p->dev->features; + features = netdev_increment_features(features, + p->dev->features, mask); } - br->dev->features = features | checksum | NETIF_F_LLTX; + return features; } /* called with RTNL */ @@ -369,72 +433,141 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; int err = 0; + bool changed_addr; - if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) + /* Don't allow bridging non-ethernet like devices */ + if ((dev->flags & IFF_LOOPBACK) || + dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || + !is_valid_ether_addr(dev->dev_addr)) return -EINVAL; - if (dev->hard_start_xmit == br_dev_xmit) + /* No bridging of bridges */ + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; - if (dev->br_port != NULL) + /* Device is already being bridged */ + if (br_port_exists(dev)) return -EBUSY; - if (IS_ERR(p = new_nbp(br, dev))) + /* No bridging devices that dislike that (e.g. wireless) */ + if (dev->priv_flags & IFF_DONT_BRIDGE) + return -EOPNOTSUPP; + + p = new_nbp(br, dev); + if (IS_ERR(p)) return PTR_ERR(p); - if ((err = br_fdb_insert(br, p, dev->dev_addr))) - destroy_nbp(p); - - else if ((err = br_sysfs_addif(p))) - del_nbp(p); - else { - dev_set_promiscuity(dev, 1); + call_netdevice_notifiers(NETDEV_JOIN, dev); - list_add_rcu(&p->list, &br->port_list); + err = dev_set_allmulti(dev, 1); + if (err) + goto put_back; - spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); - br_features_recompute(br); - if ((br->dev->flags & IFF_UP) - && (dev->flags & IFF_UP) && netif_carrier_ok(dev)) - br_stp_enable_port(p); - spin_unlock_bh(&br->lock); + err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), + SYSFS_BRIDGE_PORT_ATTR); + if (err) + goto err1; - dev_set_mtu(br->dev, br_min_mtu(br)); - } + err = br_sysfs_addif(p); + if (err) + goto err2; + + err = br_netpoll_enable(p); + if (err) + goto err3; + + err = netdev_master_upper_dev_link(dev, br->dev); + if (err) + goto err4; + + err = netdev_rx_handler_register(dev, br_handle_frame, p); + if (err) + goto err5; + + dev->priv_flags |= IFF_BRIDGE_PORT; + + dev_disable_lro(dev); + + list_add_rcu(&p->list, &br->port_list); + + nbp_update_port_count(br); + + netdev_update_features(br->dev); + + if (br->dev->needed_headroom < dev->needed_headroom) + br->dev->needed_headroom = dev->needed_headroom; + + if (br_fdb_insert(br, p, dev->dev_addr, 0)) + netdev_err(dev, "failed insert local address bridge forwarding table\n"); + + spin_lock_bh(&br->lock); + changed_addr = br_stp_recalculate_bridge_id(br); + + if (netif_running(dev) && netif_oper_up(dev) && + (br->dev->flags & IFF_UP)) + br_stp_enable_port(p); + spin_unlock_bh(&br->lock); + br_ifinfo_notify(RTM_NEWLINK, p); + + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + dev_set_mtu(br->dev, br_min_mtu(br)); + + kobject_uevent(&p->kobj, KOBJ_ADD); + + return 0; + +err5: + netdev_upper_dev_unlink(dev, br->dev); +err4: + br_netpoll_disable(p); +err3: + sysfs_remove_link(br->ifobj, p->dev->name); +err2: + kobject_put(&p->kobj); + p = NULL; /* kobject_put frees */ +err1: + dev_set_allmulti(dev, -1); +put_back: + dev_put(dev); + kfree(p); return err; } /* called with RTNL */ int br_del_if(struct net_bridge *br, struct net_device *dev) { - struct net_bridge_port *p = dev->br_port; - - if (!p || p->br != br) + struct net_bridge_port *p; + bool changed_addr; + + p = br_port_get_rtnl(dev); + if (!p || p->br != br) return -EINVAL; - br_sysfs_removeif(p); + /* Since more than one interface can be attached to a bridge, + * there still maybe an alternate path for netconsole to use; + * therefore there is no reason for a NETDEV_RELEASE event. + */ del_nbp(p); spin_lock_bh(&br->lock); - br_stp_recalculate_bridge_id(br); - br_features_recompute(br); + changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); + if (changed_addr) + call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); + + netdev_update_features(br->dev); + return 0; } -void __exit br_cleanup_bridges(void) +void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) { - struct net_device *dev, *nxt; - - rtnl_lock(); - for (dev = dev_base; dev; dev = nxt) { - nxt = dev->next; - if (dev->priv_flags & IFF_EBRIDGE) - del_br(dev->priv); - } - rtnl_unlock(); + struct net_bridge *br = p->br; + if (mask & BR_AUTO_MASK) + nbp_update_port_count(br); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index e3a73cead6b..366c4364907 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -5,141 +5,244 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_input.c,v 1.10 2001/12/24 04:50:20 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/netfilter_bridge.h> +#include <linux/export.h> +#include <linux/rculist.h> #include "br_private.h" -const unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; - -static int br_pass_frame_up_finish(struct sk_buff *skb) -{ - netif_receive_skb(skb); - return 0; -} +/* Hook for brouter */ +br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; +EXPORT_SYMBOL(br_should_route_hook); -static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) +static int br_pass_frame_up(struct sk_buff *skb) { - struct net_device *indev; - - br->statistics.rx_packets++; - br->statistics.rx_bytes += skb->len; + struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; + struct net_bridge *br = netdev_priv(brdev); + struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + struct net_port_vlans *pv; + + u64_stats_update_begin(&brstats->syncp); + brstats->rx_packets++; + brstats->rx_bytes += skb->len; + u64_stats_update_end(&brstats->syncp); + + /* Bridge is just like any other port. Make sure the + * packet is allowed except in promisc modue when someone + * may be running packet capture. + */ + pv = br_get_vlan_info(br); + if (!(brdev->flags & IFF_PROMISC) && + !br_allowed_egress(br, pv, skb)) { + kfree_skb(skb); + return NET_RX_DROP; + } indev = skb->dev; - skb->dev = br->dev; + skb->dev = brdev; + skb = br_handle_vlan(br, pv, skb); + if (!skb) + return NET_RX_DROP; - NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, - br_pass_frame_up_finish); + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, + netif_receive_skb); } -/* note: already called with rcu_read_lock (preempt_disabled) */ +/* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; - struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + struct net_bridge *br; struct net_bridge_fdb_entry *dst; - int passedup = 0; + struct net_bridge_mdb_entry *mdst; + struct sk_buff *skb2; + bool unicast = true; + u16 vid = 0; - /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); - - if (p->state == BR_STATE_LEARNING) { - kfree_skb(skb); - goto out; - } - - if (br->dev->flags & IFF_PROMISC) { - struct sk_buff *skb2; - - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 != NULL) { - passedup = 1; - br_pass_frame_up(br, skb2); - } - } + if (!p || p->state == BR_STATE_DISABLED) + goto drop; - if (is_multicast_ether_addr(dest)) { - br_flood_forward(br, skb, !passedup); - if (!passedup) - br_pass_frame_up(br, skb); + if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) goto out; - } - dst = __br_fdb_get(br, dest); - if (dst != NULL && dst->is_local) { - if (!passedup) - br_pass_frame_up(br, skb); - else - kfree_skb(skb); - goto out; + /* insert into forwarding database after filtering to avoid spoofing */ + br = p->br; + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false); + + if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && + br_multicast_rcv(br, p, skb, vid)) + goto drop; + + if (p->state == BR_STATE_LEARNING) + goto drop; + + BR_INPUT_SKB_CB(skb)->brdev = br->dev; + + /* The packet skb2 goes to the local host (NULL to skip). */ + skb2 = NULL; + + if (br->dev->flags & IFF_PROMISC) + skb2 = skb; + + dst = NULL; + + if (is_broadcast_ether_addr(dest)) { + skb2 = skb; + unicast = false; + } else if (is_multicast_ether_addr(dest)) { + mdst = br_mdb_get(br, skb, vid); + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br, eth_hdr(skb))) { + if ((mdst && mdst->mglist) || + br_multicast_is_router(br)) + skb2 = skb; + br_multicast_forward(mdst, skb, skb2); + skb = NULL; + if (!skb2) + goto out; + } else + skb2 = skb; + + unicast = false; + br->dev->stats.multicast++; + } else if ((dst = __br_fdb_get(br, dest, vid)) && + dst->is_local) { + skb2 = skb; + /* Do not forward the packet since it's local. */ + skb = NULL; } - if (dst != NULL) { - br_forward(dst->dst, skb); - goto out; + if (skb) { + if (dst) { + dst->used = jiffies; + br_forward(dst->dst, skb, skb2); + } else + br_flood_forward(br, skb, skb2, unicast); } - br_flood_forward(br, skb, 0); + if (skb2) + return br_pass_frame_up(skb2); out: return 0; +drop: + kfree_skb(skb); + goto out; +} + +/* note: already called with rcu_read_lock */ +static int br_handle_local_finish(struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + u16 vid = 0; + + /* check if vlan is allowed, to avoid spoofing */ + if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); + return 0; /* process further */ } /* - * Called via br_handle_frame_hook. - * Return 0 if *pskb should be processed furthur - * 1 if *pskb is handled - * note: already called with rcu_read_lock (preempt_disabled) + * Return NULL if skb is handled + * note: already called with rcu_read_lock */ -int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) +rx_handler_result_t br_handle_frame(struct sk_buff **pskb) { + struct net_bridge_port *p; struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; + br_should_route_hook_t *rhook; - if (p->state == BR_STATE_DISABLED) - goto err; + if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) + return RX_HANDLER_PASS; if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) - goto err; - - if (p->br->stp_enabled && - !memcmp(dest, bridge_ula, 5) && - !(dest[5] & 0xF0)) { - if (!dest[5]) { - NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_stp_handle_bpdu); - return 1; + goto drop; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + return RX_HANDLER_CONSUMED; + + p = br_port_get_rcu(skb->dev); + + if (unlikely(is_link_local_ether_addr(dest))) { + u16 fwd_mask = p->br->group_fwd_mask_required; + + /* + * See IEEE 802.1D Table 7-10 Reserved addresses + * + * Assignment Value + * Bridge Group Address 01-80-C2-00-00-00 + * (MAC Control) 802.3 01-80-C2-00-00-01 + * (Link Aggregation) 802.3 01-80-C2-00-00-02 + * 802.1X PAE address 01-80-C2-00-00-03 + * + * 802.1AB LLDP 01-80-C2-00-00-0E + * + * Others reserved for future standardization + */ + switch (dest[5]) { + case 0x00: /* Bridge Group Address */ + /* If STP is turned off, + then must forward to keep loop detection */ + if (p->br->stp_enabled == BR_NO_STP || + fwd_mask & (1u << dest[5])) + goto forward; + break; + + case 0x01: /* IEEE MAC (Pause) */ + goto drop; + + default: + /* Allow selective forwarding for most other protocols */ + fwd_mask |= p->br->group_fwd_mask; + if (fwd_mask & (1u << dest[5])) + goto forward; + } + + /* Deliver packet to local host only */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, + NULL, br_handle_local_finish)) { + return RX_HANDLER_CONSUMED; /* consumed by filter */ + } else { + *pskb = skb; + return RX_HANDLER_PASS; /* continue processing */ } - goto err; } - if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) { - if (br_should_route_hook) { - if (br_should_route_hook(pskb)) - return 0; - skb = *pskb; +forward: + switch (p->state) { + case BR_STATE_FORWARDING: + rhook = rcu_dereference(br_should_route_hook); + if (rhook) { + if ((*rhook)(skb)) { + *pskb = skb; + return RX_HANDLER_PASS; + } dest = eth_hdr(skb)->h_dest; } - - if (!compare_ether_addr(p->br->dev->dev_addr, dest)) + /* fall through */ + case BR_STATE_LEARNING: + if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish); - return 1; + break; + default: +drop: + kfree_skb(skb); } - -err: - kfree_skb(skb); - return 1; + return RX_HANDLER_CONSUMED; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 159fb840982..a9a4a1b7863 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_ioctl.c,v 1.4 2000/11/08 05:16:40 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -17,18 +15,22 @@ #include <linux/kernel.h> #include <linux/if_bridge.h> #include <linux/netdevice.h> +#include <linux/slab.h> #include <linux/times.h> +#include <net/net_namespace.h> #include <asm/uaccess.h> #include "br_private.h" /* called with RTNL */ -static int get_bridge_ifindices(int *indices, int num) +static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for (dev = dev_base; dev && i < num; dev = dev->next) { - if (dev->priv_flags & IFF_EBRIDGE) + for_each_netdev(net, dev) { + if (i >= num) + break; + if (dev->priv_flags & IFF_EBRIDGE) indices[i++] = dev->ifindex; } @@ -53,22 +55,23 @@ static void get_port_ifindices(struct net_bridge *br, int *ifindices, int num) * (limited to a page for sanity) * offset -- number of records to skip */ -static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, +static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, unsigned long maxnum, unsigned long offset) { int num; void *buf; - size_t size = maxnum * sizeof(struct __fdb_entry); + size_t size; - if (size > PAGE_SIZE) { - size = PAGE_SIZE; + /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */ + if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry)) maxnum = PAGE_SIZE/sizeof(struct __fdb_entry); - } + + size = maxnum * sizeof(struct __fdb_entry); buf = kmalloc(size, GFP_USER); if (!buf) return -ENOMEM; - + num = br_fdb_fillbuf(br, buf, maxnum, offset); if (num > 0) { if (copy_to_user(userbuf, buf, num*sizeof(struct __fdb_entry))) @@ -79,37 +82,38 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, return num; } +/* called with RTNL */ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) { + struct net *net = dev_net(br->dev); struct net_device *dev; int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(ifindex); + dev = __dev_get_by_index(net, ifindex); if (dev == NULL) return -EINVAL; - + if (isadd) ret = br_add_if(br, dev); else ret = br_del_if(br, dev); - dev_put(dev); return ret; } /* * Legacy ioctl's through SIOCDEVPRIVATE * This interface is deprecated because it was too difficult to - * to do the translation for 32/64bit ioctl compatability. + * to do the translation for 32/64bit ioctl compatibility. */ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct net_bridge *br = netdev_priv(dev); unsigned long args[4]; - + if (copy_from_user(args, rq->ifr_data, sizeof(args))) return -EFAULT; @@ -136,13 +140,14 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) b.topology_change = br->topology_change; b.topology_change_detected = br->topology_change_detected; b.root_port = br->root_port; - b.stp_enabled = br->stp_enabled; + + b.stp_enabled = (br->stp_enabled != BR_NO_STP); b.ageing_time = jiffies_to_clock_t(br->ageing_time); b.hello_timer_value = br_timer_value(&br->hello_timer); b.tcn_timer_value = br_timer_value(&br->tcn_timer); b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); b.gc_timer_value = br_timer_value(&br->gc_timer); - rcu_read_unlock(); + rcu_read_unlock(); if (copy_to_user((void __user *)args[1], &b, sizeof(b))) return -EFAULT; @@ -162,12 +167,10 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (num > BR_MAX_PORTS) num = BR_MAX_PORTS; - indices = kmalloc(num*sizeof(int), GFP_KERNEL); + indices = kcalloc(num, sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; - memset(indices, 0, num*sizeof(int)); - get_port_ifindices(br, indices, num); if (copy_to_user((void __user *)args[1], indices, num*sizeof(int))) num = -EFAULT; @@ -176,40 +179,25 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } case BRCTL_SET_BRIDGE_FORWARD_DELAY: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); - br->bridge_forward_delay = clock_t_to_jiffies(args[1]); - if (br_is_root_bridge(br)) - br->forward_delay = br->bridge_forward_delay; - spin_unlock_bh(&br->lock); - return 0; + return br_set_forward_delay(br, args[1]); case BRCTL_SET_BRIDGE_HELLO_TIME: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); - br->bridge_hello_time = clock_t_to_jiffies(args[1]); - if (br_is_root_bridge(br)) - br->hello_time = br->bridge_hello_time; - spin_unlock_bh(&br->lock); - return 0; + return br_set_hello_time(br, args[1]); case BRCTL_SET_BRIDGE_MAX_AGE: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - spin_lock_bh(&br->lock); - br->bridge_max_age = clock_t_to_jiffies(args[1]); - if (br_is_root_bridge(br)) - br->max_age = br->bridge_max_age; - spin_unlock_bh(&br->lock); - return 0; + return br_set_max_age(br, args[1]); case BRCTL_SET_AGEING_TIME: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; br->ageing_time = clock_t_to_jiffies(args[1]); @@ -249,14 +237,14 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } case BRCTL_SET_BRIDGE_STP_STATE: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - br->stp_enabled = args[1]?1:0; + br_stp_set_enabled(br, args[1]); return 0; case BRCTL_SET_BRIDGE_PRIORITY: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; spin_lock_bh(&br->lock); @@ -267,19 +255,16 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case BRCTL_SET_PORT_PRIORITY: { struct net_bridge_port *p; - int ret = 0; + int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (args[2] >= (1<<(16-BR_PORT_BITS))) - return -ERANGE; - spin_lock_bh(&br->lock); - if ((p = br_get_port(br, args[1])) == NULL) + if ((p = br_get_port(br, args[1])) == NULL) ret = -EINVAL; else - br_stp_set_port_priority(p, args[2]); + ret = br_stp_set_port_priority(p, args[2]); spin_unlock_bh(&br->lock); return ret; } @@ -287,29 +272,30 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case BRCTL_SET_PATH_COST: { struct net_bridge_port *p; - int ret = 0; + int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; spin_lock_bh(&br->lock); if ((p = br_get_port(br, args[1])) == NULL) ret = -EINVAL; else - br_stp_set_path_cost(p, args[2]); + ret = br_stp_set_path_cost(p, args[2]); spin_unlock_bh(&br->lock); + return ret; } case BRCTL_GET_FDB_ENTRIES: - return get_fdb_entries(br, (void __user *)args[1], + return get_fdb_entries(br, (void __user *)args[1], args[2], args[3]); } return -EOPNOTSUPP; } -static int old_deviceless(void __user *uarg) +static int old_deviceless(struct net *net, void __user *uarg) { unsigned long args[3]; @@ -327,12 +313,11 @@ static int old_deviceless(void __user *uarg) if (args[2] >= 2048) return -ENOMEM; - indices = kmalloc(args[2]*sizeof(int), GFP_KERNEL); + indices = kcalloc(args[2], sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; - memset(indices, 0, args[2]*sizeof(int)); - args[2] = get_bridge_ifindices(indices, args[2]); + args[2] = get_bridge_ifindices(net, indices, args[2]); ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) ? -EFAULT : args[2]; @@ -346,7 +331,7 @@ static int old_deviceless(void __user *uarg) { char buf[IFNAMSIZ]; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ)) @@ -355,28 +340,28 @@ static int old_deviceless(void __user *uarg) buf[IFNAMSIZ-1] = 0; if (args[0] == BRCTL_ADD_BRIDGE) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } return -EOPNOTSUPP; } -int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) { switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: - return old_deviceless(uarg); - + return old_deviceless(net, uarg); + case SIOCBRADDBR: case SIOCBRDELBR: { char buf[IFNAMSIZ]; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(buf, uarg, IFNAMSIZ)) @@ -384,9 +369,9 @@ int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } return -EOPNOTSUPP; @@ -396,7 +381,7 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct net_bridge *br = netdev_priv(dev); - switch(cmd) { + switch (cmd) { case SIOCDEVPRIVATE: return old_dev_ioctl(dev, rq, cmd); @@ -406,6 +391,6 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } - pr_debug("Bridge does not support ioctl 0x%x\n", cmd); + br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd); return -EOPNOTSUPP; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c new file mode 100644 index 00000000000..5df05269d17 --- /dev/null +++ b/net/bridge/br_mdb.c @@ -0,0 +1,498 @@ +#include <linux/err.h> +#include <linux/igmp.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <net/ip.h> +#include <net/netlink.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/addrconf.h> +#endif + +#include "br_private.h" + +static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + struct nlattr *nest; + + if (!br->multicast_router || hlist_empty(&br->router_list)) + return 0; + + nest = nla_nest_start(skb, MDBA_ROUTER); + if (nest == NULL) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(p, &br->router_list, rlist) { + if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) + goto fail; + } + + nla_nest_end(skb, nest); + return 0; +fail: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mdb_htable *mdb; + struct nlattr *nest, *nest2; + int i, err = 0; + int idx = 0, s_idx = cb->args[1]; + + if (br->multicast_disabled) + return 0; + + mdb = rcu_dereference(br->mdb); + if (!mdb) + return 0; + + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + return -EMSGSIZE; + + for (i = 0; i < mdb->max; i++) { + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port *port; + + hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) { + if (idx < s_idx) + goto skip; + + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) { + err = -EMSGSIZE; + goto out; + } + + for (pp = &mp->ports; + (p = rcu_dereference(*pp)) != NULL; + pp = &p->next) { + port = p->port; + if (port) { + struct br_mdb_entry e; + memset(&e, 0, sizeof(e)); + e.ifindex = port->dev->ifindex; + e.state = p->state; + if (p->addr.proto == htons(ETH_P_IP)) + e.addr.u.ip4 = p->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + if (p->addr.proto == htons(ETH_P_IPV6)) + e.addr.u.ip6 = p->addr.u.ip6; +#endif + e.addr.proto = p->addr.proto; + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { + nla_nest_cancel(skb, nest2); + err = -EMSGSIZE; + goto out; + } + } + } + nla_nest_end(skb, nest2); + skip: + idx++; + } + } + +out: + cb->args[1] = idx; + nla_nest_end(skb, nest); + return err; +} + +static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net_device *dev; + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + int idx = 0, s_idx; + + s_idx = cb->args[0]; + + rcu_read_lock(); + + /* In theory this could be wrapped to 0... */ + cb->seq = net->dev_base_seq + br_mdb_rehash_seq; + + for_each_netdev_rcu(net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { + struct br_port_msg *bpm; + + if (idx < s_idx) + goto skip; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, + sizeof(*bpm), NLM_F_MULTI); + if (nlh == NULL) + break; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->ifindex = dev->ifindex; + if (br_mdb_fill_info(skb, cb, dev) < 0) + goto out; + if (br_rports_fill_info(skb, cb, dev) < 0) + goto out; + + cb->args[1] = 0; + nlmsg_end(skb, nlh); + skip: + idx++; + } + } + +out: + if (nlh) + nlmsg_end(skb, nlh); + rcu_read_unlock(); + cb->args[0] = idx; + return skb->len; +} + +static int nlmsg_populate_mdb_fill(struct sk_buff *skb, + struct net_device *dev, + struct br_mdb_entry *entry, u32 pid, + u32 seq, int type, unsigned int flags) +{ + struct nlmsghdr *nlh; + struct br_port_msg *bpm; + struct nlattr *nest, *nest2; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = dev->ifindex; + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + goto cancel; + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) + goto end; + + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry)) + goto end; + + nla_nest_end(skb, nest2); + nla_nest_end(skb, nest); + return nlmsg_end(skb, nlh); + +end: + nla_nest_end(skb, nest); +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t rtnl_mdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + nla_total_size(sizeof(struct br_mdb_entry)); +} + +static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, + int type) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); + if (!skb) + goto errout; + + err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_MDB, err); +} + +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type) +{ + struct br_mdb_entry entry; + + memset(&entry, 0, sizeof(entry)); + entry.ifindex = port->dev->ifindex; + entry.addr.proto = group->proto; + entry.addr.u.ip4 = group->u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + entry.addr.u.ip6 = group->u.ip6; +#endif + __br_mdb_notify(dev, &entry, type); +} + +static bool is_valid_mdb_entry(struct br_mdb_entry *entry) +{ + if (entry->ifindex == 0) + return false; + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4)) + return false; + if (ipv4_is_local_multicast(entry->addr.u.ip4)) + return false; +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) + return false; +#endif + } else + return false; + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) + return false; + + return true; +} + +static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, + struct net_device **pdev, struct br_mdb_entry **pentry) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct br_port_msg *bpm; + struct nlattr *tb[MDBA_SET_ENTRY_MAX+1]; + struct net_device *dev; + int err; + + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); + if (err < 0) + return err; + + bpm = nlmsg_data(nlh); + if (bpm->ifindex == 0) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (dev == NULL) { + pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n"); + return -ENODEV; + } + + if (!(dev->priv_flags & IFF_EBRIDGE)) { + pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n"); + return -EOPNOTSUPP; + } + + *pdev = dev; + + if (!tb[MDBA_SET_ENTRY] || + nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n"); + return -EINVAL; + } + + entry = nla_data(tb[MDBA_SET_ENTRY]); + if (!is_valid_mdb_entry(entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n"); + return -EINVAL; + } + + *pentry = entry; + return 0; +} + +static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group, unsigned char state) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_mdb_htable *mdb; + int err; + + mdb = mlock_dereference(br->mdb, br); + mp = br_mdb_ip_get(mdb, group); + if (!mp) { + mp = br_multicast_new_group(br, port, group); + err = PTR_ERR(mp); + if (IS_ERR(mp)) + return err; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port == port) + return -EEXIST; + if ((unsigned long)p->port < (unsigned long)port) + break; + } + + p = br_multicast_new_port_group(port, group, *pp, state); + if (unlikely(!p)) + return -ENOMEM; + rcu_assign_pointer(*pp, p); + + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + return 0; +} + +static int __br_mdb_add(struct net *net, struct net_bridge *br, + struct br_mdb_entry *entry) +{ + struct br_ip ip; + struct net_device *dev; + struct net_bridge_port *p; + int ret; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + dev = __dev_get_by_index(net, entry->ifindex); + if (!dev) + return -ENODEV; + + p = br_port_get_rtnl(dev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip.u.ip6 = entry->addr.u.ip6; +#endif + + spin_lock_bh(&br->multicast_lock); + ret = br_mdb_add_group(br, p, &ip, entry->state); + spin_unlock_bh(&br->multicast_lock); + return ret; +} + +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct net_device *dev; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_add(net, br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_NEWMDB); + return err; +} + +static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip ip; + int err = -EINVAL; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) { + if (timer_pending(&br->ip4_other_query.timer)) + return -EBUSY; + + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (timer_pending(&br->ip6_other_query.timer)) + return -EBUSY; + + ip.u.ip6 = entry->addr.u.ip6; +#endif + } + + spin_lock_bh(&br->multicast_lock); + mdb = mlock_dereference(br->mdb, br); + + mp = br_mdb_ip_get(mdb, &ip); + if (!mp) + goto unlock; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (!p->port || p->port->dev->ifindex != entry->ifindex) + continue; + + if (p->port->state == BR_STATE_DISABLED) + goto unlock; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + err = 0; + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + break; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + return err; +} + +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + struct net_device *dev; + struct br_mdb_entry *entry; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_del(br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_DELMDB); + return err; +} + +void br_mdb_init(void) +{ + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); +} + +void br_mdb_uninit(void) +{ + rtnl_unregister(PF_BRIDGE, RTM_GETMDB); + rtnl_unregister(PF_BRIDGE, RTM_NEWMDB); + rtnl_unregister(PF_BRIDGE, RTM_DELMDB); +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c new file mode 100644 index 00000000000..abfa0b65a11 --- /dev/null +++ b/net/bridge/br_multicast.c @@ -0,0 +1,2265 @@ +/* + * Bridge multicast support. + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <linux/err.h> +#include <linux/export.h> +#include <linux/if_ether.h> +#include <linux/igmp.h> +#include <linux/jhash.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/netdevice.h> +#include <linux/netfilter_bridge.h> +#include <linux/random.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/inetdevice.h> +#include <net/ip.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/ip6_checksum.h> +#include <net/addrconf.h> +#endif + +#include "br_private.h" + +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_own_query *query); +unsigned int br_mdb_rehash_seq; + +static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) +{ + if (a->proto != b->proto) + return 0; + if (a->vid != b->vid) + return 0; + switch (a->proto) { + case htons(ETH_P_IP): + return a->u.ip4 == b->u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return ipv6_addr_equal(&a->u.ip6, &b->u.ip6); +#endif + } + return 0; +} + +static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip, + __u16 vid) +{ + return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1); +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb, + const struct in6_addr *ip, + __u16 vid) +{ + return jhash_2words(ipv6_addr_hash(ip), vid, + mdb->secret) & (mdb->max - 1); +} +#endif + +static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, + struct br_ip *ip) +{ + switch (ip->proto) { + case htons(ETH_P_IP): + return __br_ip4_hash(mdb, ip->u.ip4, ip->vid); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid); +#endif + } + return 0; +} + +static struct net_bridge_mdb_entry *__br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash) +{ + struct net_bridge_mdb_entry *mp; + + hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) { + if (br_ip_equal(&mp->addr, dst)) + return mp; + } + + return NULL; +} + +struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, + struct br_ip *dst) +{ + if (!mdb) + return NULL; + + return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); +} + +static struct net_bridge_mdb_entry *br_mdb_ip4_get( + struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid) +{ + struct br_ip br_dst; + + br_dst.u.ip4 = dst; + br_dst.proto = htons(ETH_P_IP); + br_dst.vid = vid; + + return br_mdb_ip_get(mdb, &br_dst); +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct net_bridge_mdb_entry *br_mdb_ip6_get( + struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst, + __u16 vid) +{ + struct br_ip br_dst; + + br_dst.u.ip6 = *dst; + br_dst.proto = htons(ETH_P_IPV6); + br_dst.vid = vid; + + return br_mdb_ip_get(mdb, &br_dst); +} +#endif + +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb, u16 vid) +{ + struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb); + struct br_ip ip; + + if (br->multicast_disabled) + return NULL; + + if (BR_INPUT_SKB_CB(skb)->igmp) + return NULL; + + ip.proto = skb->protocol; + ip.vid = vid; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ip.u.ip4 = ip_hdr(skb)->daddr; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ip.u.ip6 = ipv6_hdr(skb)->daddr; + break; +#endif + default: + return NULL; + } + + return br_mdb_ip_get(mdb, &ip); +} + +static void br_mdb_free(struct rcu_head *head) +{ + struct net_bridge_mdb_htable *mdb = + container_of(head, struct net_bridge_mdb_htable, rcu); + struct net_bridge_mdb_htable *old = mdb->old; + + mdb->old = NULL; + kfree(old->mhash); + kfree(old); +} + +static int br_mdb_copy(struct net_bridge_mdb_htable *new, + struct net_bridge_mdb_htable *old, + int elasticity) +{ + struct net_bridge_mdb_entry *mp; + int maxlen; + int len; + int i; + + for (i = 0; i < old->max; i++) + hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver]) + hlist_add_head(&mp->hlist[new->ver], + &new->mhash[br_ip_hash(new, &mp->addr)]); + + if (!elasticity) + return 0; + + maxlen = 0; + for (i = 0; i < new->max; i++) { + len = 0; + hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver]) + len++; + if (len > maxlen) + maxlen = len; + } + + return maxlen > elasticity ? -EINVAL : 0; +} + +void br_multicast_free_pg(struct rcu_head *head) +{ + struct net_bridge_port_group *p = + container_of(head, struct net_bridge_port_group, rcu); + + kfree(p); +} + +static void br_multicast_free_group(struct rcu_head *head) +{ + struct net_bridge_mdb_entry *mp = + container_of(head, struct net_bridge_mdb_entry, rcu); + + kfree(mp); +} + +static void br_multicast_group_expired(unsigned long data) +{ + struct net_bridge_mdb_entry *mp = (void *)data; + struct net_bridge *br = mp->br; + struct net_bridge_mdb_htable *mdb; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || timer_pending(&mp->timer)) + goto out; + + mp->mglist = false; + + if (mp->ports) + goto out; + + mdb = mlock_dereference(br->mdb, br); + + hlist_del_rcu(&mp->hlist[mdb->ver]); + mdb->size--; + + call_rcu_bh(&mp->rcu, br_multicast_free_group); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_del_pg(struct net_bridge *br, + struct net_bridge_port_group *pg) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + + mdb = mlock_dereference(br->mdb, br); + + mp = br_mdb_ip_get(mdb, &pg->addr); + if (WARN_ON(!mp)) + return; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p != pg) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + + return; + } + + WARN_ON(1); +} + +static void br_multicast_port_group_expired(unsigned long data) +{ + struct net_bridge_port_group *pg = (void *)data; + struct net_bridge *br = pg->port->br; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || timer_pending(&pg->timer) || + hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) + goto out; + + br_multicast_del_pg(br, pg); + +out: + spin_unlock(&br->multicast_lock); +} + +static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max, + int elasticity) +{ + struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1); + struct net_bridge_mdb_htable *mdb; + int err; + + mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC); + if (!mdb) + return -ENOMEM; + + mdb->max = max; + mdb->old = old; + + mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC); + if (!mdb->mhash) { + kfree(mdb); + return -ENOMEM; + } + + mdb->size = old ? old->size : 0; + mdb->ver = old ? old->ver ^ 1 : 0; + + if (!old || elasticity) + get_random_bytes(&mdb->secret, sizeof(mdb->secret)); + else + mdb->secret = old->secret; + + if (!old) + goto out; + + err = br_mdb_copy(mdb, old, elasticity); + if (err) { + kfree(mdb->mhash); + kfree(mdb); + return err; + } + + br_mdb_rehash_seq++; + call_rcu_bh(&mdb->rcu, br_mdb_free); + +out: + rcu_assign_pointer(*mdbp, mdb); + + return 0; +} + +static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, + __be32 group) +{ + struct sk_buff *skb; + struct igmphdr *ih; + struct ethhdr *eth; + struct iphdr *iph; + + skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) + + sizeof(*ih) + 4); + if (!skb) + goto out; + + skb->protocol = htons(ETH_P_IP); + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + ether_addr_copy(eth->h_source, br->dev->dev_addr); + eth->h_dest[0] = 1; + eth->h_dest[1] = 0; + eth->h_dest[2] = 0x5e; + eth->h_dest[3] = 0; + eth->h_dest[4] = 0; + eth->h_dest[5] = 1; + eth->h_proto = htons(ETH_P_IP); + skb_put(skb, sizeof(*eth)); + + skb_set_network_header(skb, skb->len); + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = 6; + iph->tos = 0xc0; + iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4); + iph->id = 0; + iph->frag_off = htons(IP_DF); + iph->ttl = 1; + iph->protocol = IPPROTO_IGMP; + iph->saddr = br->multicast_query_use_ifaddr ? + inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; + iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); + ((u8 *)&iph[1])[0] = IPOPT_RA; + ((u8 *)&iph[1])[1] = 4; + ((u8 *)&iph[1])[2] = 0; + ((u8 *)&iph[1])[3] = 0; + ip_send_check(iph); + skb_put(skb, 24); + + skb_set_transport_header(skb, skb->len); + ih = igmp_hdr(skb); + ih->type = IGMP_HOST_MEMBERSHIP_QUERY; + ih->code = (group ? br->multicast_last_member_interval : + br->multicast_query_response_interval) / + (HZ / IGMP_TIMER_SCALE); + ih->group = group; + ih->csum = 0; + ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr)); + skb_put(skb, sizeof(*ih)); + + __skb_pull(skb, sizeof(*eth)); + +out: + return skb; +} + +#if IS_ENABLED(CONFIG_IPV6) +static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, + const struct in6_addr *group) +{ + struct sk_buff *skb; + struct ipv6hdr *ip6h; + struct mld_msg *mldq; + struct ethhdr *eth; + u8 *hopopt; + unsigned long interval; + + skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) + + 8 + sizeof(*mldq)); + if (!skb) + goto out; + + skb->protocol = htons(ETH_P_IPV6); + + /* Ethernet header */ + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + ether_addr_copy(eth->h_source, br->dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + skb_put(skb, sizeof(*eth)); + + /* IPv6 header + HbH option */ + skb_set_network_header(skb, skb->len); + ip6h = ipv6_hdr(skb); + + *(__force __be32 *)ip6h = htonl(0x60000000); + ip6h->payload_len = htons(8 + sizeof(*mldq)); + ip6h->nexthdr = IPPROTO_HOPOPTS; + ip6h->hop_limit = 1; + ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); + if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, + &ip6h->saddr)) { + kfree_skb(skb); + return NULL; + } + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); + + hopopt = (u8 *)(ip6h + 1); + hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ + hopopt[1] = 0; /* length of HbH */ + hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */ + hopopt[3] = 2; /* Length of RA Option */ + hopopt[4] = 0; /* Type = 0x0000 (MLD) */ + hopopt[5] = 0; + hopopt[6] = IPV6_TLV_PAD1; /* Pad1 */ + hopopt[7] = IPV6_TLV_PAD1; /* Pad1 */ + + skb_put(skb, sizeof(*ip6h) + 8); + + /* ICMPv6 */ + skb_set_transport_header(skb, skb->len); + mldq = (struct mld_msg *) icmp6_hdr(skb); + + interval = ipv6_addr_any(group) ? + br->multicast_query_response_interval : + br->multicast_last_member_interval; + + mldq->mld_type = ICMPV6_MGM_QUERY; + mldq->mld_code = 0; + mldq->mld_cksum = 0; + mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); + mldq->mld_reserved = 0; + mldq->mld_mca = *group; + + /* checksum */ + mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + sizeof(*mldq), IPPROTO_ICMPV6, + csum_partial(mldq, + sizeof(*mldq), 0)); + skb_put(skb, sizeof(*mldq)); + + __skb_pull(skb, sizeof(*eth)); + +out: + return skb; +} +#endif + +static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, + struct br_ip *addr) +{ + switch (addr->proto) { + case htons(ETH_P_IP): + return br_ip4_multicast_alloc_query(br, addr->u.ip4); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return br_ip6_multicast_alloc_query(br, &addr->u.ip6); +#endif + } + return NULL; +} + +static struct net_bridge_mdb_entry *br_multicast_get_group( + struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group, int hash) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + unsigned int count = 0; + unsigned int max; + int elasticity; + int err; + + mdb = rcu_dereference_protected(br->mdb, 1); + hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) { + count++; + if (unlikely(br_ip_equal(group, &mp->addr))) + return mp; + } + + elasticity = 0; + max = mdb->max; + + if (unlikely(count > br->hash_elasticity && count)) { + if (net_ratelimit()) + br_info(br, "Multicast hash table " + "chain limit reached: %s\n", + port ? port->dev->name : br->dev->name); + + elasticity = br->hash_elasticity; + } + + if (mdb->size >= max) { + max *= 2; + if (unlikely(max > br->hash_max)) { + br_warn(br, "Multicast hash table maximum of %d " + "reached, disabling snooping: %s\n", + br->hash_max, + port ? port->dev->name : br->dev->name); + err = -E2BIG; +disable: + br->multicast_disabled = 1; + goto err; + } + } + + if (max > mdb->max || elasticity) { + if (mdb->old) { + if (net_ratelimit()) + br_info(br, "Multicast hash table " + "on fire: %s\n", + port ? port->dev->name : br->dev->name); + err = -EEXIST; + goto err; + } + + err = br_mdb_rehash(&br->mdb, max, elasticity); + if (err) { + br_warn(br, "Cannot rehash multicast " + "hash table, disabling snooping: %s, %d, %d\n", + port ? port->dev->name : br->dev->name, + mdb->size, err); + goto disable; + } + + err = -EAGAIN; + goto err; + } + + return NULL; + +err: + mp = ERR_PTR(err); + return mp; +} + +struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *group) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + int hash; + int err; + + mdb = rcu_dereference_protected(br->mdb, 1); + if (!mdb) { + err = br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0); + if (err) + return ERR_PTR(err); + goto rehash; + } + + hash = br_ip_hash(mdb, group); + mp = br_multicast_get_group(br, port, group, hash); + switch (PTR_ERR(mp)) { + case 0: + break; + + case -EAGAIN: +rehash: + mdb = rcu_dereference_protected(br->mdb, 1); + hash = br_ip_hash(mdb, group); + break; + + default: + goto out; + } + + mp = kzalloc(sizeof(*mp), GFP_ATOMIC); + if (unlikely(!mp)) + return ERR_PTR(-ENOMEM); + + mp->br = br; + mp->addr = *group; + setup_timer(&mp->timer, br_multicast_group_expired, + (unsigned long)mp); + + hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); + mdb->size++; + +out: + return mp; +} + +struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group __rcu *next, + unsigned char state) +{ + struct net_bridge_port_group *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + if (unlikely(!p)) + return NULL; + + p->addr = *group; + p->port = port; + p->state = state; + rcu_assign_pointer(p->next, next); + hlist_add_head(&p->mglist, &port->mglist); + setup_timer(&p->timer, br_multicast_port_group_expired, + (unsigned long)p); + return p; +} + +static int br_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + unsigned long now = jiffies; + int err; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + mp = br_multicast_new_group(br, port, group); + err = PTR_ERR(mp); + if (IS_ERR(mp)) + goto err; + + if (!port) { + mp->mglist = true; + mod_timer(&mp->timer, now + br->multicast_membership_interval); + goto out; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port == port) + goto found; + if ((unsigned long)p->port < (unsigned long)port) + break; + } + + p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); + if (unlikely(!p)) + goto err; + rcu_assign_pointer(*pp, p); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + +found: + mod_timer(&p->timer, now + br->multicast_membership_interval); +out: + err = 0; + +err: + spin_unlock(&br->multicast_lock); + return err; +} + +static int br_ip4_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group, + __u16 vid) +{ + struct br_ip br_group; + + if (ipv4_is_local_multicast(group)) + return 0; + + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; + + return br_multicast_add_group(br, port, &br_group); +} + +#if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_add_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group, + __u16 vid) +{ + struct br_ip br_group; + + if (ipv6_addr_is_ll_all_nodes(group)) + return 0; + + br_group.u.ip6 = *group; + br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; + + return br_multicast_add_group(br, port, &br_group); +} +#endif + +static void br_multicast_router_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (port->multicast_router != 1 || + timer_pending(&port->multicast_router_timer) || + hlist_unhashed(&port->rlist)) + goto out; + + hlist_del_init_rcu(&port->rlist); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_local_router_expired(unsigned long data) +{ +} + +static void br_multicast_querier_expired(struct net_bridge *br, + struct bridge_mcast_own_query *query) +{ + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || br->multicast_disabled) + goto out; + + br_multicast_start_querier(br, query); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_querier_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_querier_expired(br, &br->ip4_own_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_querier_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_querier_expired(br, &br->ip6_own_query); +} +#endif + +static void br_multicast_select_own_querier(struct net_bridge *br, + struct br_ip *ip, + struct sk_buff *skb) +{ + if (ip->proto == htons(ETH_P_IP)) + br->ip4_querier.addr.u.ip4 = ip_hdr(skb)->saddr; +#if IS_ENABLED(CONFIG_IPV6) + else + br->ip6_querier.addr.u.ip6 = ipv6_hdr(skb)->saddr; +#endif +} + +static void __br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *ip) +{ + struct sk_buff *skb; + + skb = br_multicast_alloc_query(br, ip); + if (!skb) + return; + + if (port) { + __skb_push(skb, sizeof(struct ethhdr)); + skb->dev = port->dev; + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + dev_queue_xmit); + } else { + br_multicast_select_own_querier(br, ip, skb); + netif_rx(skb); + } +} + +static void br_multicast_send_query(struct net_bridge *br, + struct net_bridge_port *port, + struct bridge_mcast_own_query *own_query) +{ + unsigned long time; + struct br_ip br_group; + struct bridge_mcast_other_query *other_query = NULL; + + if (!netif_running(br->dev) || br->multicast_disabled || + !br->multicast_querier) + return; + + memset(&br_group.u, 0, sizeof(br_group.u)); + + if (port ? (own_query == &port->ip4_own_query) : + (own_query == &br->ip4_own_query)) { + other_query = &br->ip4_other_query; + br_group.proto = htons(ETH_P_IP); +#if IS_ENABLED(CONFIG_IPV6) + } else { + other_query = &br->ip6_other_query; + br_group.proto = htons(ETH_P_IPV6); +#endif + } + + if (!other_query || timer_pending(&other_query->timer)) + return; + + __br_multicast_send_query(br, port, &br_group); + + time = jiffies; + time += own_query->startup_sent < br->multicast_startup_query_count ? + br->multicast_startup_query_interval : + br->multicast_query_interval; + mod_timer(&own_query->timer, time); +} + +static void +br_multicast_port_query_expired(struct net_bridge_port *port, + struct bridge_mcast_own_query *query) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) + goto out; + + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; + + br_multicast_send_query(port->br, port, query); + +out: + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_port_query_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + + br_multicast_port_query_expired(port, &port->ip4_own_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_port_query_expired(unsigned long data) +{ + struct net_bridge_port *port = (void *)data; + + br_multicast_port_query_expired(port, &port->ip6_own_query); +} +#endif + +void br_multicast_add_port(struct net_bridge_port *port) +{ + port->multicast_router = 1; + + setup_timer(&port->multicast_router_timer, br_multicast_router_expired, + (unsigned long)port); + setup_timer(&port->ip4_own_query.timer, + br_ip4_multicast_port_query_expired, (unsigned long)port); +#if IS_ENABLED(CONFIG_IPV6) + setup_timer(&port->ip6_own_query.timer, + br_ip6_multicast_port_query_expired, (unsigned long)port); +#endif +} + +void br_multicast_del_port(struct net_bridge_port *port) +{ + del_timer_sync(&port->multicast_router_timer); +} + +static void br_multicast_enable(struct bridge_mcast_own_query *query) +{ + query->startup_sent = 0; + + if (try_to_del_timer_sync(&query->timer) >= 0 || + del_timer(&query->timer)) + mod_timer(&query->timer, jiffies); +} + +void br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + if (br->multicast_disabled || !netif_running(br->dev)) + goto out; + + br_multicast_enable(&port->ip4_own_query); +#if IS_ENABLED(CONFIG_IPV6) + br_multicast_enable(&port->ip6_own_query); +#endif + +out: + spin_unlock(&br->multicast_lock); +} + +void br_multicast_disable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + struct net_bridge_port_group *pg; + struct hlist_node *n; + + spin_lock(&br->multicast_lock); + hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) + br_multicast_del_pg(br, pg); + + if (!hlist_unhashed(&port->rlist)) + hlist_del_init_rcu(&port->rlist); + del_timer(&port->multicast_router_timer); + del_timer(&port->ip4_own_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer(&port->ip6_own_query.timer); +#endif + spin_unlock(&br->multicast_lock); +} + +static int br_ip4_multicast_igmp3_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + struct igmpv3_report *ih; + struct igmpv3_grec *grec; + int i; + int len; + int num; + int type; + int err = 0; + __be32 group; + + if (!pskb_may_pull(skb, sizeof(*ih))) + return -EINVAL; + + ih = igmpv3_report_hdr(skb); + num = ntohs(ih->ngrec); + len = sizeof(*ih); + + for (i = 0; i < num; i++) { + len += sizeof(*grec); + if (!pskb_may_pull(skb, len)) + return -EINVAL; + + grec = (void *)(skb->data + len - sizeof(*grec)); + group = grec->grec_mca; + type = grec->grec_type; + + len += ntohs(grec->grec_nsrcs) * 4; + if (!pskb_may_pull(skb, len)) + return -EINVAL; + + /* We treat this as an IGMPv2 report for now. */ + switch (type) { + case IGMPV3_MODE_IS_INCLUDE: + case IGMPV3_MODE_IS_EXCLUDE: + case IGMPV3_CHANGE_TO_INCLUDE: + case IGMPV3_CHANGE_TO_EXCLUDE: + case IGMPV3_ALLOW_NEW_SOURCES: + case IGMPV3_BLOCK_OLD_SOURCES: + break; + + default: + continue; + } + + err = br_ip4_multicast_add_group(br, port, group, vid); + if (err) + break; + } + + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_mld2_report(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + struct icmp6hdr *icmp6h; + struct mld2_grec *grec; + int i; + int len; + int num; + int err = 0; + + if (!pskb_may_pull(skb, sizeof(*icmp6h))) + return -EINVAL; + + icmp6h = icmp6_hdr(skb); + num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); + len = sizeof(*icmp6h); + + for (i = 0; i < num; i++) { + __be16 *nsrcs, _nsrcs; + + nsrcs = skb_header_pointer(skb, + len + offsetof(struct mld2_grec, + grec_nsrcs), + sizeof(_nsrcs), &_nsrcs); + if (!nsrcs) + return -EINVAL; + + if (!pskb_may_pull(skb, + len + sizeof(*grec) + + sizeof(struct in6_addr) * ntohs(*nsrcs))) + return -EINVAL; + + grec = (struct mld2_grec *)(skb->data + len); + len += sizeof(*grec) + + sizeof(struct in6_addr) * ntohs(*nsrcs); + + /* We treat these as MLDv1 reports for now. */ + switch (grec->grec_type) { + case MLD2_MODE_IS_INCLUDE: + case MLD2_MODE_IS_EXCLUDE: + case MLD2_CHANGE_TO_INCLUDE: + case MLD2_CHANGE_TO_EXCLUDE: + case MLD2_ALLOW_NEW_SOURCES: + case MLD2_BLOCK_OLD_SOURCES: + break; + + default: + continue; + } + + err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, + vid); + if (!err) + break; + } + + return err; +} +#endif + +static bool br_ip4_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + __be32 saddr) +{ + if (!timer_pending(&br->ip4_own_query.timer) && + !timer_pending(&br->ip4_other_query.timer)) + goto update; + + if (!br->ip4_querier.addr.u.ip4) + goto update; + + if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.u.ip4)) + goto update; + + return false; + +update: + br->ip4_querier.addr.u.ip4 = saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip4_querier.port, port); + + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static bool br_ip6_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct in6_addr *saddr) +{ + if (!timer_pending(&br->ip6_own_query.timer) && + !timer_pending(&br->ip6_other_query.timer)) + goto update; + + if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.u.ip6) <= 0) + goto update; + + return false; + +update: + br->ip6_querier.addr.u.ip6 = *saddr; + + /* update protected by general multicast_lock by caller */ + rcu_assign_pointer(br->ip6_querier.port, port); + + return true; +} +#endif + +static bool br_multicast_select_querier(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *saddr) +{ + switch (saddr->proto) { + case htons(ETH_P_IP): + return br_ip4_multicast_select_querier(br, port, saddr->u.ip4); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6); +#endif + } + + return false; +} + +static void +br_multicast_update_query_timer(struct net_bridge *br, + struct bridge_mcast_other_query *query, + unsigned long max_delay) +{ + if (!timer_pending(&query->timer)) + query->delay_time = jiffies + max_delay; + + mod_timer(&query->timer, jiffies + br->multicast_querier_interval); +} + +/* + * Add port to router_list + * list is maintained ordered by pointer value + * and locked by br->multicast_lock and RCU + */ +static void br_multicast_add_router(struct net_bridge *br, + struct net_bridge_port *port) +{ + struct net_bridge_port *p; + struct hlist_node *slot = NULL; + + hlist_for_each_entry(p, &br->router_list, rlist) { + if ((unsigned long) port >= (unsigned long) p) + break; + slot = &p->rlist; + } + + if (slot) + hlist_add_after_rcu(slot, &port->rlist); + else + hlist_add_head_rcu(&port->rlist, &br->router_list); +} + +static void br_multicast_mark_router(struct net_bridge *br, + struct net_bridge_port *port) +{ + unsigned long now = jiffies; + + if (!port) { + if (br->multicast_router == 1) + mod_timer(&br->multicast_router_timer, + now + br->multicast_querier_interval); + return; + } + + if (port->multicast_router != 1) + return; + + if (!hlist_unhashed(&port->rlist)) + goto timer; + + br_multicast_add_router(br, port); + +timer: + mod_timer(&port->multicast_router_timer, + now + br->multicast_querier_interval); +} + +static void br_multicast_query_received(struct net_bridge *br, + struct net_bridge_port *port, + struct bridge_mcast_other_query *query, + struct br_ip *saddr, + unsigned long max_delay) +{ + if (!br_multicast_select_querier(br, port, saddr)) + return; + + br_multicast_update_query_timer(br, query, max_delay); + br_multicast_mark_router(br, port); +} + +static int br_ip4_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + const struct iphdr *iph = ip_hdr(skb); + struct igmphdr *ih = igmp_hdr(skb); + struct net_bridge_mdb_entry *mp; + struct igmpv3_query *ih3; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; + unsigned long max_delay; + unsigned long now = jiffies; + __be32 group; + int err = 0; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + group = ih->group; + + if (skb->len == sizeof(*ih)) { + max_delay = ih->code * (HZ / IGMP_TIMER_SCALE); + + if (!max_delay) { + max_delay = 10 * HZ; + group = 0; + } + } else { + if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) { + err = -EINVAL; + goto out; + } + + ih3 = igmpv3_query_hdr(skb); + if (ih3->nsrcs) + goto out; + + max_delay = ih3->code ? + IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; + } + + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { + err = -EINVAL; + goto out; + } + + if (!group) { + saddr.proto = htons(ETH_P_IP); + saddr.u.ip4 = iph->saddr; + + br_multicast_query_received(br, port, &br->ip4_other_query, + &saddr, max_delay); + goto out; + } + + mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); + if (!mp) + goto out; + + max_delay *= br->multicast_last_member_count; + + if (mp->mglist && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, now + max_delay) : + try_to_del_timer_sync(&mp->timer) >= 0)) + mod_timer(&mp->timer, now + max_delay); + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (timer_pending(&p->timer) ? + time_after(p->timer.expires, now + max_delay) : + try_to_del_timer_sync(&p->timer) >= 0) + mod_timer(&p->timer, now + max_delay); + } + +out: + spin_unlock(&br->multicast_lock); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int br_ip6_multicast_query(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct mld_msg *mld; + struct net_bridge_mdb_entry *mp; + struct mld2_query *mld2q; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip saddr; + unsigned long max_delay; + unsigned long now = jiffies; + const struct in6_addr *group = NULL; + bool is_general_query; + int err = 0; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED)) + goto out; + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { + err = -EINVAL; + goto out; + } + + if (skb->len == sizeof(*mld)) { + if (!pskb_may_pull(skb, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *) icmp6_hdr(skb); + max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); + if (max_delay) + group = &mld->mld_mca; + } else { + if (!pskb_may_pull(skb, sizeof(*mld2q))) { + err = -EINVAL; + goto out; + } + mld2q = (struct mld2_query *)icmp6_hdr(skb); + if (!mld2q->mld2q_nsrcs) + group = &mld2q->mld2q_mca; + + max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); + } + + is_general_query = group && ipv6_addr_any(group); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { + err = -EINVAL; + goto out; + } + + if (is_general_query) { + saddr.proto = htons(ETH_P_IPV6); + saddr.u.ip6 = ip6h->saddr; + + br_multicast_query_received(br, port, &br->ip6_other_query, + &saddr, max_delay); + goto out; + } else if (!group) { + goto out; + } + + mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); + if (!mp) + goto out; + + max_delay *= br->multicast_last_member_count; + if (mp->mglist && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, now + max_delay) : + try_to_del_timer_sync(&mp->timer) >= 0)) + mod_timer(&mp->timer, now + max_delay); + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (timer_pending(&p->timer) ? + time_after(p->timer.expires, now + max_delay) : + try_to_del_timer_sync(&p->timer) >= 0) + mod_timer(&p->timer, now + max_delay); + } + +out: + spin_unlock(&br->multicast_lock); + return err; +} +#endif + +static void +br_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + struct br_ip *group, + struct bridge_mcast_other_query *other_query, + struct bridge_mcast_own_query *own_query) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + unsigned long now; + unsigned long time; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || + (port && port->state == BR_STATE_DISABLED) || + timer_pending(&other_query->timer)) + goto out; + + mdb = mlock_dereference(br->mdb, br); + mp = br_mdb_ip_get(mdb, group); + if (!mp) + goto out; + + if (br->multicast_querier) { + __br_multicast_send_query(br, port, &mp->addr); + + time = jiffies + br->multicast_last_member_count * + br->multicast_last_member_interval; + + mod_timer(&own_query->timer, time); + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } + } + + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + struct net_bridge_port_group __rcu **pp; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + } + goto out; + } + + now = jiffies; + time = now + br->multicast_last_member_count * + br->multicast_last_member_interval; + + if (!port) { + if (mp->mglist && + (timer_pending(&mp->timer) ? + time_after(mp->timer.expires, time) : + try_to_del_timer_sync(&mp->timer) >= 0)) { + mod_timer(&mp->timer, time); + } + + goto out; + } + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } +out: + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + __be32 group, + __u16 vid) +{ + struct br_ip br_group; + struct bridge_mcast_own_query *own_query; + + if (ipv4_is_local_multicast(group)) + return; + + own_query = port ? &port->ip4_own_query : &br->ip4_own_query; + + br_group.u.ip4 = group; + br_group.proto = htons(ETH_P_IP); + br_group.vid = vid; + + br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query, + own_query); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_leave_group(struct net_bridge *br, + struct net_bridge_port *port, + const struct in6_addr *group, + __u16 vid) +{ + struct br_ip br_group; + struct bridge_mcast_own_query *own_query; + + if (ipv6_addr_is_ll_all_nodes(group)) + return; + + own_query = port ? &port->ip6_own_query : &br->ip6_own_query; + + br_group.u.ip6 = *group; + br_group.proto = htons(ETH_P_IPV6); + br_group.vid = vid; + + br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query, + own_query); +} +#endif + +static int br_multicast_ipv4_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + struct sk_buff *skb2 = skb; + const struct iphdr *iph; + struct igmphdr *ih; + unsigned int len; + unsigned int offset; + int err; + + /* We treat OOM as packet loss for now. */ + if (!pskb_may_pull(skb, sizeof(*iph))) + return -EINVAL; + + iph = ip_hdr(skb); + + if (iph->ihl < 5 || iph->version != 4) + return -EINVAL; + + if (!pskb_may_pull(skb, ip_hdrlen(skb))) + return -EINVAL; + + iph = ip_hdr(skb); + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + return -EINVAL; + + if (iph->protocol != IPPROTO_IGMP) { + if (!ipv4_is_local_multicast(iph->daddr)) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + return 0; + } + + len = ntohs(iph->tot_len); + if (skb->len < len || len < ip_hdrlen(skb)) + return -EINVAL; + + if (skb->len > len) { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + return -ENOMEM; + + err = pskb_trim_rcsum(skb2, len); + if (err) + goto err_out; + } + + len -= ip_hdrlen(skb2); + offset = skb_network_offset(skb2) + ip_hdrlen(skb2); + __skb_pull(skb2, offset); + skb_reset_transport_header(skb2); + + err = -EINVAL; + if (!pskb_may_pull(skb2, sizeof(*ih))) + goto out; + + switch (skb2->ip_summed) { + case CHECKSUM_COMPLETE: + if (!csum_fold(skb2->csum)) + break; + /* fall through */ + case CHECKSUM_NONE: + skb2->csum = 0; + if (skb_checksum_complete(skb2)) + goto out; + } + + err = 0; + + BR_INPUT_SKB_CB(skb)->igmp = 1; + ih = igmp_hdr(skb2); + + switch (ih->type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + err = br_ip4_multicast_add_group(br, port, ih->group, vid); + break; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); + break; + case IGMP_HOST_MEMBERSHIP_QUERY: + err = br_ip4_multicast_query(br, port, skb2, vid); + break; + case IGMP_HOST_LEAVE_MESSAGE: + br_ip4_multicast_leave_group(br, port, ih->group, vid); + break; + } + +out: + __skb_push(skb2, offset); +err_out: + if (skb2 != skb) + kfree_skb(skb2); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int br_multicast_ipv6_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + struct sk_buff *skb2; + const struct ipv6hdr *ip6h; + u8 icmp6_type; + u8 nexthdr; + __be16 frag_off; + unsigned int len; + int offset; + int err; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + /* + * We're interested in MLD messages only. + * - Version is 6 + * - MLD has always Router Alert hop-by-hop option + * - But we do not support jumbrograms. + */ + if (ip6h->version != 6) + return 0; + + /* Prevent flooding this packet if there is no listener present */ + if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + + if (ip6h->nexthdr != IPPROTO_HOPOPTS || + ip6h->payload_len == 0) + return 0; + + len = ntohs(ip6h->payload_len) + sizeof(*ip6h); + if (skb->len < len) + return -EINVAL; + + nexthdr = ip6h->nexthdr; + offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off); + + if (offset < 0 || nexthdr != IPPROTO_ICMPV6) + return 0; + + /* Okay, we found ICMPv6 header */ + skb2 = skb_clone(skb, GFP_ATOMIC); + if (!skb2) + return -ENOMEM; + + err = -EINVAL; + if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr))) + goto out; + + len -= offset - skb_network_offset(skb2); + + __skb_pull(skb2, offset); + skb_reset_transport_header(skb2); + skb_postpull_rcsum(skb2, skb_network_header(skb2), + skb_network_header_len(skb2)); + + icmp6_type = icmp6_hdr(skb2)->icmp6_type; + + switch (icmp6_type) { + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MLD2_REPORT: + break; + default: + err = 0; + goto out; + } + + /* Okay, we found MLD message. Check further. */ + if (skb2->len > len) { + err = pskb_trim_rcsum(skb2, len); + if (err) + goto out; + err = -EINVAL; + } + + ip6h = ipv6_hdr(skb2); + + switch (skb2->ip_summed) { + case CHECKSUM_COMPLETE: + if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, + IPPROTO_ICMPV6, skb2->csum)) + break; + /*FALLTHROUGH*/ + case CHECKSUM_NONE: + skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb2->len, + IPPROTO_ICMPV6, 0)); + if (__skb_checksum_complete(skb2)) + goto out; + } + + err = 0; + + BR_INPUT_SKB_CB(skb)->igmp = 1; + + switch (icmp6_type) { + case ICMPV6_MGM_REPORT: + { + struct mld_msg *mld; + if (!pskb_may_pull(skb2, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *)skb_transport_header(skb2); + BR_INPUT_SKB_CB(skb)->mrouters_only = 1; + err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid); + break; + } + case ICMPV6_MLD2_REPORT: + err = br_ip6_multicast_mld2_report(br, port, skb2, vid); + break; + case ICMPV6_MGM_QUERY: + err = br_ip6_multicast_query(br, port, skb2, vid); + break; + case ICMPV6_MGM_REDUCTION: + { + struct mld_msg *mld; + if (!pskb_may_pull(skb2, sizeof(*mld))) { + err = -EINVAL; + goto out; + } + mld = (struct mld_msg *)skb_transport_header(skb2); + br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid); + } + } + +out: + kfree_skb(skb2); + return err; +} +#endif + +int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, + struct sk_buff *skb, u16 vid) +{ + BR_INPUT_SKB_CB(skb)->igmp = 0; + BR_INPUT_SKB_CB(skb)->mrouters_only = 0; + + if (br->multicast_disabled) + return 0; + + switch (skb->protocol) { + case htons(ETH_P_IP): + return br_multicast_ipv4_rcv(br, port, skb, vid); +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + return br_multicast_ipv6_rcv(br, port, skb, vid); +#endif + } + + return 0; +} + +static void br_multicast_query_expired(struct net_bridge *br, + struct bridge_mcast_own_query *query, + struct bridge_mcast_querier *querier) +{ + spin_lock(&br->multicast_lock); + if (query->startup_sent < br->multicast_startup_query_count) + query->startup_sent++; + + rcu_assign_pointer(querier, NULL); + br_multicast_send_query(br, NULL, query); + spin_unlock(&br->multicast_lock); +} + +static void br_ip4_multicast_query_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); +} + +#if IS_ENABLED(CONFIG_IPV6) +static void br_ip6_multicast_query_expired(unsigned long data) +{ + struct net_bridge *br = (void *)data; + + br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); +} +#endif + +void br_multicast_init(struct net_bridge *br) +{ + br->hash_elasticity = 4; + br->hash_max = 512; + + br->multicast_router = 1; + br->multicast_querier = 0; + br->multicast_query_use_ifaddr = 0; + br->multicast_last_member_count = 2; + br->multicast_startup_query_count = 2; + + br->multicast_last_member_interval = HZ; + br->multicast_query_response_interval = 10 * HZ; + br->multicast_startup_query_interval = 125 * HZ / 4; + br->multicast_query_interval = 125 * HZ; + br->multicast_querier_interval = 255 * HZ; + br->multicast_membership_interval = 260 * HZ; + + br->ip4_other_query.delay_time = 0; + br->ip4_querier.port = NULL; +#if IS_ENABLED(CONFIG_IPV6) + br->ip6_other_query.delay_time = 0; + br->ip6_querier.port = NULL; +#endif + + spin_lock_init(&br->multicast_lock); + setup_timer(&br->multicast_router_timer, + br_multicast_local_router_expired, 0); + setup_timer(&br->ip4_other_query.timer, + br_ip4_multicast_querier_expired, (unsigned long)br); + setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, + (unsigned long)br); +#if IS_ENABLED(CONFIG_IPV6) + setup_timer(&br->ip6_other_query.timer, + br_ip6_multicast_querier_expired, (unsigned long)br); + setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired, + (unsigned long)br); +#endif +} + +static void __br_multicast_open(struct net_bridge *br, + struct bridge_mcast_own_query *query) +{ + query->startup_sent = 0; + + if (br->multicast_disabled) + return; + + mod_timer(&query->timer, jiffies); +} + +void br_multicast_open(struct net_bridge *br) +{ + __br_multicast_open(br, &br->ip4_own_query); +#if IS_ENABLED(CONFIG_IPV6) + __br_multicast_open(br, &br->ip6_own_query); +#endif +} + +void br_multicast_stop(struct net_bridge *br) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct hlist_node *n; + u32 ver; + int i; + + del_timer_sync(&br->multicast_router_timer); + del_timer_sync(&br->ip4_other_query.timer); + del_timer_sync(&br->ip4_own_query.timer); +#if IS_ENABLED(CONFIG_IPV6) + del_timer_sync(&br->ip6_other_query.timer); + del_timer_sync(&br->ip6_own_query.timer); +#endif + + spin_lock_bh(&br->multicast_lock); + mdb = mlock_dereference(br->mdb, br); + if (!mdb) + goto out; + + br->mdb = NULL; + + ver = mdb->ver; + for (i = 0; i < mdb->max; i++) { + hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], + hlist[ver]) { + del_timer(&mp->timer); + call_rcu_bh(&mp->rcu, br_multicast_free_group); + } + } + + if (mdb->old) { + spin_unlock_bh(&br->multicast_lock); + rcu_barrier_bh(); + spin_lock_bh(&br->multicast_lock); + WARN_ON(mdb->old); + } + + mdb->old = mdb; + call_rcu_bh(&mdb->rcu, br_mdb_free); + +out: + spin_unlock_bh(&br->multicast_lock); +} + +int br_multicast_set_router(struct net_bridge *br, unsigned long val) +{ + int err = -ENOENT; + + spin_lock_bh(&br->multicast_lock); + if (!netif_running(br->dev)) + goto unlock; + + switch (val) { + case 0: + case 2: + del_timer(&br->multicast_router_timer); + /* fall through */ + case 1: + br->multicast_router = val; + err = 0; + break; + + default: + err = -EINVAL; + break; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + + return err; +} + +int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) +{ + struct net_bridge *br = p->br; + int err = -ENOENT; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED) + goto unlock; + + switch (val) { + case 0: + case 1: + case 2: + p->multicast_router = val; + err = 0; + + if (val < 2 && !hlist_unhashed(&p->rlist)) + hlist_del_init_rcu(&p->rlist); + + if (val == 1) + break; + + del_timer(&p->multicast_router_timer); + + if (val == 0) + break; + + br_multicast_add_router(br, p); + break; + + default: + err = -EINVAL; + break; + } + +unlock: + spin_unlock(&br->multicast_lock); + + return err; +} + +static void br_multicast_start_querier(struct net_bridge *br, + struct bridge_mcast_own_query *query) +{ + struct net_bridge_port *port; + + __br_multicast_open(br, query); + + list_for_each_entry(port, &br->port_list, list) { + if (port->state == BR_STATE_DISABLED || + port->state == BR_STATE_BLOCKING) + continue; + + if (query == &br->ip4_own_query) + br_multicast_enable(&port->ip4_own_query); +#if IS_ENABLED(CONFIG_IPV6) + else + br_multicast_enable(&port->ip6_own_query); +#endif + } +} + +int br_multicast_toggle(struct net_bridge *br, unsigned long val) +{ + int err = 0; + struct net_bridge_mdb_htable *mdb; + + spin_lock_bh(&br->multicast_lock); + if (br->multicast_disabled == !val) + goto unlock; + + br->multicast_disabled = !val; + if (br->multicast_disabled) + goto unlock; + + if (!netif_running(br->dev)) + goto unlock; + + mdb = mlock_dereference(br->mdb, br); + if (mdb) { + if (mdb->old) { + err = -EEXIST; +rollback: + br->multicast_disabled = !!val; + goto unlock; + } + + err = br_mdb_rehash(&br->mdb, mdb->max, + br->hash_elasticity); + if (err) + goto rollback; + } + + br_multicast_start_querier(br, &br->ip4_own_query); +#if IS_ENABLED(CONFIG_IPV6) + br_multicast_start_querier(br, &br->ip6_own_query); +#endif + +unlock: + spin_unlock_bh(&br->multicast_lock); + + return err; +} + +int br_multicast_set_querier(struct net_bridge *br, unsigned long val) +{ + unsigned long max_delay; + + val = !!val; + + spin_lock_bh(&br->multicast_lock); + if (br->multicast_querier == val) + goto unlock; + + br->multicast_querier = val; + if (!val) + goto unlock; + + max_delay = br->multicast_query_response_interval; + + if (!timer_pending(&br->ip4_other_query.timer)) + br->ip4_other_query.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip4_own_query); + +#if IS_ENABLED(CONFIG_IPV6) + if (!timer_pending(&br->ip6_other_query.timer)) + br->ip6_other_query.delay_time = jiffies + max_delay; + + br_multicast_start_querier(br, &br->ip6_own_query); +#endif + +unlock: + spin_unlock_bh(&br->multicast_lock); + + return 0; +} + +int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) +{ + int err = -ENOENT; + u32 old; + struct net_bridge_mdb_htable *mdb; + + spin_lock_bh(&br->multicast_lock); + if (!netif_running(br->dev)) + goto unlock; + + err = -EINVAL; + if (!is_power_of_2(val)) + goto unlock; + + mdb = mlock_dereference(br->mdb, br); + if (mdb && val < mdb->size) + goto unlock; + + err = 0; + + old = br->hash_max; + br->hash_max = val; + + if (mdb) { + if (mdb->old) { + err = -EEXIST; +rollback: + br->hash_max = old; + goto unlock; + } + + err = br_mdb_rehash(&br->mdb, br->hash_max, + br->hash_elasticity); + if (err) + goto rollback; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + + return err; +} + +/** + * br_multicast_list_adjacent - Returns snooped multicast addresses + * @dev: The bridge port adjacent to which to retrieve addresses + * @br_ip_list: The list to store found, snooped multicast IP addresses in + * + * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast + * snooping feature on all bridge ports of dev's bridge device, excluding + * the addresses from dev itself. + * + * Returns the number of items added to br_ip_list. + * + * Notes: + * - br_ip_list needs to be initialized by caller + * - br_ip_list might contain duplicates in the end + * (needs to be taken care of by caller) + * - br_ip_list needs to be freed by caller + */ +int br_multicast_list_adjacent(struct net_device *dev, + struct list_head *br_ip_list) +{ + struct net_bridge *br; + struct net_bridge_port *port; + struct net_bridge_port_group *group; + struct br_ip_list *entry; + int count = 0; + + rcu_read_lock(); + if (!br_ip_list || !br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + list_for_each_entry_rcu(port, &br->port_list, list) { + if (!port->dev || port->dev == dev) + continue; + + hlist_for_each_entry_rcu(group, &port->mglist, mglist) { + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + goto unlock; + + entry->addr = group->addr; + list_add(&entry->list, br_ip_list); + count++; + } + } + +unlock: + rcu_read_unlock(); + return count; +} +EXPORT_SYMBOL_GPL(br_multicast_list_adjacent); + +/** + * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port + * @dev: The bridge port adjacent to which to check for a querier + * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6 + * + * Checks whether the given interface has a bridge on top and if so returns + * true if a selected querier is behind one of the other ports of this + * bridge. Otherwise returns false. + */ +bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) +{ + struct net_bridge *br; + struct net_bridge_port *port; + bool ret = false; + + rcu_read_lock(); + if (!br_port_exists(dev)) + goto unlock; + + port = br_port_get_rcu(dev); + if (!port || !port->br) + goto unlock; + + br = port->br; + + switch (proto) { + case ETH_P_IP: + if (!timer_pending(&br->ip4_other_query.timer) || + rcu_dereference(br->ip4_querier.port) == port) + goto unlock; + break; +#if IS_ENABLED(CONFIG_IPV6) + case ETH_P_IPV6: + if (!timer_pending(&br->ip6_other_query.timer) || + rcu_dereference(br->ip6_querier.port) == port) + goto unlock; + break; +#endif + default: + goto unlock; + } + + ret = true; +unlock: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 7cac3fb9f80..a615264cf01 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -3,15 +3,8 @@ * Linux ethernet bridge * * Authors: - * Lennert Buytenhek <buytenh@gnu.org> - * Bart De Schuymer (maintainer) <bdschuym@pandora.be> - * - * Changes: - * Apr 29 2003: physdev module support (bdschuym) - * Jun 19 2003: let arptables see bridged ARP traffic (bdschuym) - * Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge - * (bdschuym) - * Sep 01 2004: add IPv6 filtering (bdschuym) + * Lennert Buytenhek <buytenh@gnu.org> + * Bart De Schuymer <bdschuym@pandora.be> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -23,24 +16,27 @@ #include <linux/module.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> +#include <linux/if_pppox.h> +#include <linux/ppp_defs.h> #include <linux/netfilter_bridge.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_arp.h> #include <linux/in_route.h> +#include <linux/inetdevice.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/route.h> #include <asm/uaccess.h> -#include <asm/checksum.h> #include "br_private.h" #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> @@ -48,56 +44,301 @@ #define skb_origaddr(skb) (((struct bridge_skb_cb *) \ (skb->nf_bridge->data))->daddr.ipv4) -#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr) -#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr) - -#define has_bridge_parent(device) ((device)->br_port != NULL) -#define bridge_parent(device) ((device)->br_port->br->dev) +#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr) +#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr) #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; -static int brnf_call_iptables = 1; -static int brnf_call_ip6tables = 1; -static int brnf_call_arptables = 1; -static int brnf_filter_vlan_tagged = 1; +static int brnf_call_iptables __read_mostly = 1; +static int brnf_call_ip6tables __read_mostly = 1; +static int brnf_call_arptables __read_mostly = 1; +static int brnf_filter_vlan_tagged __read_mostly = 0; +static int brnf_filter_pppoe_tagged __read_mostly = 0; +static int brnf_pass_vlan_indev __read_mostly = 0; #else -#define brnf_filter_vlan_tagged 1 +#define brnf_call_iptables 1 +#define brnf_call_ip6tables 1 +#define brnf_call_arptables 1 +#define brnf_filter_vlan_tagged 0 +#define brnf_filter_pppoe_tagged 0 +#define brnf_pass_vlan_indev 0 #endif -#define IS_VLAN_IP (skb->protocol == __constant_htons(ETH_P_8021Q) && \ - hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_IP) && \ - brnf_filter_vlan_tagged) -#define IS_VLAN_IPV6 (skb->protocol == __constant_htons(ETH_P_8021Q) && \ - hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_IPV6) && \ - brnf_filter_vlan_tagged) -#define IS_VLAN_ARP (skb->protocol == __constant_htons(ETH_P_8021Q) && \ - hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_ARP) && \ - brnf_filter_vlan_tagged) - -/* We need these fake structures to make netfilter happy -- - * lots of places assume that skb->dst != NULL, which isn't - * all that unreasonable. - * +#define IS_IP(skb) \ + (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP)) + +#define IS_IPV6(skb) \ + (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6)) + +#define IS_ARP(skb) \ + (!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP)) + +static inline __be16 vlan_proto(const struct sk_buff *skb) +{ + if (vlan_tx_tag_present(skb)) + return skb->protocol; + else if (skb->protocol == htons(ETH_P_8021Q)) + return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; + else + return 0; +} + +#define IS_VLAN_IP(skb) \ + (vlan_proto(skb) == htons(ETH_P_IP) && \ + brnf_filter_vlan_tagged) + +#define IS_VLAN_IPV6(skb) \ + (vlan_proto(skb) == htons(ETH_P_IPV6) && \ + brnf_filter_vlan_tagged) + +#define IS_VLAN_ARP(skb) \ + (vlan_proto(skb) == htons(ETH_P_ARP) && \ + brnf_filter_vlan_tagged) + +static inline __be16 pppoe_proto(const struct sk_buff *skb) +{ + return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + + sizeof(struct pppoe_hdr))); +} + +#define IS_PPPOE_IP(skb) \ + (skb->protocol == htons(ETH_P_PPP_SES) && \ + pppoe_proto(skb) == htons(PPP_IP) && \ + brnf_filter_pppoe_tagged) + +#define IS_PPPOE_IPV6(skb) \ + (skb->protocol == htons(ETH_P_PPP_SES) && \ + pppoe_proto(skb) == htons(PPP_IPV6) && \ + brnf_filter_pppoe_tagged) + +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ +} + +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) +{ +} + +static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + return NULL; +} + +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + return NULL; +} + +static unsigned int fake_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + +static struct dst_ops fake_dst_ops = { + .family = AF_INET, + .protocol = cpu_to_be16(ETH_P_IP), + .update_pmtu = fake_update_pmtu, + .redirect = fake_redirect, + .cow_metrics = fake_cow_metrics, + .neigh_lookup = fake_neigh_lookup, + .mtu = fake_mtu, +}; + +/* + * Initialize bogus route table used to keep netfilter happy. * Currently, we fill in the PMTU entry because netfilter * refragmentation needs it, and the rt_flags entry because * ipt_REJECT needs it. Future netfilter modules might - * require us to fill additional fields. */ -static struct net_device __fake_net_device = { - .hard_header_len = ETH_HLEN + * require us to fill additional fields. + */ +static const u32 br_dst_default_metrics[RTAX_MAX] = { + [RTAX_MTU - 1] = 1500, }; -static struct rtable __fake_rtable = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .dev = &__fake_net_device, - .path = &__fake_rtable.u.dst, - .metrics = {[RTAX_MTU - 1] = 1500}, +void br_netfilter_rtable_init(struct net_bridge *br) +{ + struct rtable *rt = &br->fake_rtable; + + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; + rt->dst.path = &rt->dst; + dst_init_metrics(&rt->dst, br_dst_default_metrics, true); + rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; + rt->dst.ops = &fake_dst_ops; +} + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? &port->br->fake_rtable : NULL; +} + +static inline struct net_device *bridge_parent(const struct net_device *dev) +{ + struct net_bridge_port *port; + + port = br_port_get_rcu(dev); + return port ? port->br->dev : NULL; +} + +static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) +{ + skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); + if (likely(skb->nf_bridge)) + atomic_set(&(skb->nf_bridge->use), 1); + + return skb->nf_bridge; +} + +static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + + if (atomic_read(&nf_bridge->use) > 1) { + struct nf_bridge_info *tmp = nf_bridge_alloc(skb); + + if (tmp) { + memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); + atomic_set(&tmp->use, 1); } - }, - .rt_flags = 0, -}; + nf_bridge_put(nf_bridge); + nf_bridge = tmp; + } + return nf_bridge; +} + +static inline void nf_bridge_push_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_push(skb, len); + skb->network_header -= len; +} + +static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull(skb, len); + skb->network_header += len; +} + +static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) +{ + unsigned int len = nf_bridge_encap_header_len(skb); + + skb_pull_rcsum(skb, len); + skb->network_header += len; +} + +static inline void nf_bridge_save_header(struct sk_buff *skb) +{ + int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); + + skb_copy_from_linear_data_offset(skb, -header_size, + skb->nf_bridge->data, header_size); +} + +static inline void nf_bridge_update_protocol(struct sk_buff *skb) +{ + if (skb->nf_bridge->mask & BRNF_8021Q) + skb->protocol = htons(ETH_P_8021Q); + else if (skb->nf_bridge->mask & BRNF_PPPoE) + skb->protocol = htons(ETH_P_PPP_SES); +} + +/* When handing a packet over to the IP layer + * check whether we have a skb that is in the + * expected format + */ +static int br_parse_ip_options(struct sk_buff *skb) +{ + struct ip_options *opt; + const struct iphdr *iph; + struct net_device *dev = skb->dev; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto inhdr_error; + + iph = ip_hdr(skb); + opt = &(IPCB(skb)->opt); + + /* Basic sanity checks */ + if (iph->ihl < 5 || iph->version != 4) + goto inhdr_error; + + if (!pskb_may_pull(skb, iph->ihl*4)) + goto inhdr_error; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto inhdr_error; + + len = ntohs(iph->tot_len); + if (skb->len < len) { + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; + } else if (len < (iph->ihl*4)) + goto inhdr_error; + + if (pskb_trim_rcsum(skb, len)) { + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); + goto drop; + } + + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (iph->ihl == 5) + return 0; + + opt->optlen = iph->ihl*4 - sizeof(struct iphdr); + if (ip_options_compile(dev_net(dev), opt, skb)) + goto inhdr_error; + + /* Check correct handling of SRR option */ + if (unlikely(opt->srr)) { + struct in_device *in_dev = __in_dev_get_rcu(dev); + if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) + goto drop; + + if (ip_options_rcv_srr(skb)) + goto drop; + } + + return 0; + +inhdr_error: + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); +drop: + return -1; +} + +/* Fill in the header for fragmented IP packets handled by + * the IPv4 connection tracking code. + */ +int nf_bridge_copy_header(struct sk_buff *skb) +{ + int err; + unsigned int header_size; + + nf_bridge_update_protocol(skb); + header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); + err = skb_cow_head(skb, header_size); + if (err) + return err; + + skb_copy_to_linear_data_offset(skb, -header_size, + skb->nf_bridge->data, header_size); + __skb_push(skb, nf_bridge_encap_header_len(skb)); + return 0; +} /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the @@ -105,6 +346,7 @@ static struct rtable __fake_rtable = { static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct rtable *rt; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; @@ -112,34 +354,68 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - skb->dst = (struct dst_entry *)&__fake_rtable; - dst_hold(skb->dst); + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); skb->dev = nf_bridge->physindev; - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); return 0; } -static void __br_dnat_complain(void) +/* Obtain the correct destination MAC address, while preserving the original + * source MAC address. If we already know this address, we just copy it. If we + * don't, we use the neighbour framework to find out. In both cases, we make + * sure that br_handle_frame_finish() is called afterwards. + */ +static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) { - static unsigned long last_complaint; + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct neighbour *neigh; + struct dst_entry *dst; + + skb->dev = bridge_parent(skb->dev); + if (!skb->dev) + goto free_skb; + dst = skb_dst(skb); + neigh = dst_neigh_lookup_skb(dst, skb); + if (neigh) { + int ret; - if (jiffies - last_complaint >= 5 * HZ) { - printk(KERN_WARNING "Performing cross-bridge DNAT requires IP " - "forwarding to be enabled\n"); - last_complaint = jiffies; + if (neigh->hh.hh_len) { + neigh_hh_bridge(&neigh->hh, skb); + skb->dev = nf_bridge->physindev; + ret = br_handle_frame_finish(skb); + } else { + /* the neighbour function below overwrites the complete + * MAC header, so we save the Ethernet source address and + * protocol number. + */ + skb_copy_from_linear_data_offset(skb, + -(ETH_HLEN-ETH_ALEN), + skb->nf_bridge->data, + ETH_HLEN-ETH_ALEN); + /* tell br_dev_xmit to continue with forwarding */ + nf_bridge->mask |= BRNF_BRIDGED_DNAT; + ret = neigh->output(neigh, skb); + } + neigh_release(neigh); + return ret; } +free_skb: + kfree_skb(skb); + return 0; } /* This requires some explaining. If DNAT has taken place, - * we will need to fix up the destination Ethernet address, - * and this is a tricky process. + * we will need to fix up the destination Ethernet address. * * There are two cases to consider: * 1. The packet was DNAT'ed to a device in the same bridge @@ -153,124 +429,120 @@ static void __br_dnat_complain(void) * call ip_route_input() and to look at skb->dst->dev, which is * changed to the destination device if ip_route_input() succeeds. * - * Let us first consider the case that ip_route_input() succeeds: - * - * If skb->dst->dev equals the logical bridge device the packet - * came in on, we can consider this bridging. We then call - * skb->dst->output() which will make the packet enter br_nf_local_out() - * not much later. In that function it is assured that the iptables - * FORWARD chain is traversed for the packet. + * Let's first consider the case that ip_route_input() succeeds: * + * If the output device equals the logical bridge device the packet + * came in on, we can consider this bridging. The corresponding MAC + * address will be obtained in br_nf_pre_routing_finish_bridge. * Otherwise, the packet is considered to be routed and we just * change the destination MAC address so that the packet will - * later be passed up to the IP stack to be routed. + * later be passed up to the IP stack to be routed. For a redirected + * packet, ip_route_input() will give back the localhost as output device, + * which differs from the bridge device. * - * Let us now consider the case that ip_route_input() fails: + * Let's now consider the case that ip_route_input() fails: * - * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() - * will fail, while __ip_route_output_key() will return success. The source - * address for __ip_route_output_key() is set to zero, so __ip_route_output_key + * This can be because the destination address is martian, in which case + * the packet will be dropped. + * If IP forwarding is disabled, ip_route_input() will fail, while + * ip_route_output_key() can return success. The source + * address for ip_route_output_key() is set to zero, so ip_route_output_key() * thinks we're handling a locally generated packet and won't care - * if IP forwarding is allowed. We send a warning message to the users's - * log telling her to put IP forwarding on. - * - * ip_route_input() will also fail if there is no route available. - * In that case we just drop the packet. - * - * --Lennert, 20020411 - * --Bart, 20020416 (updated) - * --Bart, 20021007 (updated) */ -static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) -{ - if (skb->pkt_type == PACKET_OTHERHOST) { - skb->pkt_type = PACKET_HOST; - skb->nf_bridge->mask |= BRNF_PKT_TYPE; - } - skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - - skb->dev = bridge_parent(skb->dev); - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - skb->nh.raw += VLAN_HLEN; - } - skb->dst->output(skb); - return 0; -} - + * if IP forwarding is enabled. If the output device equals the logical bridge + * device, we proceed as if ip_route_input() succeeded. If it differs from the + * logical bridge port or if ip_route_output_key() fails we drop the packet. + */ static int br_nf_pre_routing_finish(struct sk_buff *skb) { struct net_device *dev = skb->dev; - struct iphdr *iph = skb->nh.iph; + struct iphdr *iph = ip_hdr(skb); struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct rtable *rt; + int err; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - if (dnat_took_place(skb)) { - if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, - dev)) { - struct rtable *rt; - struct flowi fl = { .nl_u = - { .ip4_u = { .daddr = iph->daddr, .saddr = 0 , - .tos = RT_TOS(iph->tos)} }, .proto = 0}; - - if (!ip_route_output_key(&rt, &fl)) { + if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + /* If err equals -EHOSTUNREACH the error is due to a + * martian destination or due to the fact that + * forwarding is disabled. For most martian packets, + * ip_route_output_key() will fail. It won't fail for 2 types of + * martian destinations: loopback destinations and destination + * 0.0.0.0. In both cases the packet will be dropped because the + * destination is the loopback device and not the bridge. */ + if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) + goto free_skb; + + rt = ip_route_output(dev_net(dev), iph->daddr, 0, + RT_TOS(iph->tos), 0); + if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. - * - Deal with redirected traffic. */ - if (((struct dst_entry *)rt)->dev == dev || - rt->rt_type == RTN_LOCAL) { - skb->dst = (struct dst_entry *)rt; + * require ip_forwarding. */ + if (rt->dst.dev == dev) { + skb_dst_set(skb, &rt->dst); goto bridged_dnat; } - __br_dnat_complain(); - dst_release((struct dst_entry *)rt); + ip_rt_put(rt); } +free_skb: kfree_skb(skb); return 0; } else { - if (skb->dst->dev == dev) { + if (skb_dst(skb)->dev == dev) { bridged_dnat: - /* Tell br_nf_local_out this is a - * bridged frame */ - nf_bridge->mask |= BRNF_BRIDGED_DNAT; skb->dev = nf_bridge->physindev; - if (skb->protocol == - __constant_htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, + NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, 1); return 0; } - memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, - ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); skb->pkt_type = PACKET_HOST; } } else { - skb->dst = (struct dst_entry *)&__fake_rtable; - dst_hold(skb->dst); + rt = bridge_parent_rtable(nf_bridge->physindev); + if (!rt) { + kfree_skb(skb); + return 0; + } + skb_dst_set_noref(skb, &rt->dst); } skb->dev = nf_bridge->physindev; - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, br_handle_frame_finish, 1); return 0; } +static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) +{ + struct net_device *vlan, *br; + + br = bridge_parent(dev); + if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) + return br; + + vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto, + vlan_tx_tag_get(skb) & VLAN_VID_MASK); + + return vlan ? vlan : br; +} + /* Some common code for IPv4/IPv6 */ -static void setup_pre_routing(struct sk_buff *skb) +static struct net_device *setup_pre_routing(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; @@ -281,16 +553,25 @@ static void setup_pre_routing(struct sk_buff *skb) nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; - skb->dev = bridge_parent(skb->dev); + skb->dev = brnf_get_logical_dev(skb, skb->dev); + if (skb->protocol == htons(ETH_P_8021Q)) + nf_bridge->mask |= BRNF_8021Q; + else if (skb->protocol == htons(ETH_P_PPP_SES)) + nf_bridge->mask |= BRNF_PPPoE; + + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return skb->dev; } /* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ static int check_hbh_len(struct sk_buff *skb) { - unsigned char *raw = (u8*)(skb->nh.ipv6h+1); + unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); u32 pkt_len; - int off = raw - skb->nh.raw; - int len = (raw[1]+1)<<3; + const unsigned char *nh = skb_network_header(skb); + int off = raw - nh; + int len = (raw[1] + 1) << 3; if ((raw + len) - skb->data > skb_headlen(skb)) goto bad; @@ -299,10 +580,10 @@ static int check_hbh_len(struct sk_buff *skb) len -= 2; while (len > 0) { - int optlen = skb->nh.raw[off+1]+2; + int optlen = nh[off + 1] + 2; - switch (skb->nh.raw[off]) { - case IPV6_TLV_PAD0: + switch (nh[off]) { + case IPV6_TLV_PAD1: optlen = 1; break; @@ -310,17 +591,18 @@ static int check_hbh_len(struct sk_buff *skb) break; case IPV6_TLV_JUMBO: - if (skb->nh.raw[off+1] != 4 || (off&3) != 2) + if (nh[off + 1] != 4 || (off & 3) != 2) goto bad; - pkt_len = ntohl(*(u32*)(skb->nh.raw+off+2)); + pkt_len = ntohl(*(__be32 *) (nh + off + 2)); if (pkt_len <= IPV6_MAXPLEN || - skb->nh.ipv6h->payload_len) + ipv6_hdr(skb)->payload_len) goto bad; if (pkt_len > skb->len - sizeof(struct ipv6hdr)) goto bad; if (pskb_trim_rcsum(skb, - pkt_len+sizeof(struct ipv6hdr))) + pkt_len + sizeof(struct ipv6hdr))) goto bad; + nh = skb_network_header(skb); break; default: if (optlen > len) @@ -339,52 +621,48 @@ bad: /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables, which doesn't support NAT, so things are fairly simple. */ -static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, - struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct ipv6hdr *hdr; + const struct ipv6hdr *hdr; u32 pkt_len; - struct nf_bridge_info *nf_bridge; if (skb->len < sizeof(struct ipv6hdr)) - goto inhdr_error; + return NF_DROP; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto inhdr_error; + return NF_DROP; - hdr = skb->nh.ipv6h; + hdr = ipv6_hdr(skb); if (hdr->version != 6) - goto inhdr_error; + return NF_DROP; pkt_len = ntohs(hdr->payload_len); if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - goto inhdr_error; - if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { - if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr))) - goto inhdr_error; - if (skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_NONE; - } + return NF_DROP; + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + return NF_DROP; } if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) - goto inhdr_error; + return NF_DROP; - nf_bridge_put(skb->nf_bridge); - if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) + nf_bridge_put(skb->nf_bridge); + if (!nf_bridge_alloc(skb)) + return NF_DROP; + if (!setup_pre_routing(skb)) return NF_DROP; - setup_pre_routing(skb); - NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL, + skb->protocol = htons(ETH_P_IPV6); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); return NF_STOLEN; - -inhdr_error: - return NF_DROP; } /* Direct IPv6 traffic to br_nf_pre_routing_ipv6. @@ -393,90 +671,55 @@ inhdr_error: * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, +static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct iphdr *iph; - __u32 len; - struct sk_buff *skb = *pskb; - struct nf_bridge_info *nf_bridge; - struct vlan_ethhdr *hdr = vlan_eth_hdr(*pskb); + struct net_bridge_port *p; + struct net_bridge *br; + __u32 len = nf_bridge_encap_header_len(skb); - if (skb->protocol == __constant_htons(ETH_P_IPV6) || IS_VLAN_IPV6) { -#ifdef CONFIG_SYSCTL - if (!brnf_call_ip6tables) - return NF_ACCEPT; -#endif - if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) - goto out; - - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - u8 *vhdr = skb->data; - skb_pull(skb, VLAN_HLEN); - skb_postpull_rcsum(skb, vhdr, VLAN_HLEN); - skb->nh.raw += VLAN_HLEN; - } - return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); - } -#ifdef CONFIG_SYSCTL - if (!brnf_call_iptables) - return NF_ACCEPT; -#endif + if (unlikely(!pskb_may_pull(skb, len))) + return NF_DROP; - if (skb->protocol != __constant_htons(ETH_P_IP) && !IS_VLAN_IP) - return NF_ACCEPT; + p = br_port_get_rcu(in); + if (p == NULL) + return NF_DROP; + br = p->br; - if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) - goto out; + if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) { + if (!brnf_call_ip6tables && !br->nf_call_ip6tables) + return NF_ACCEPT; - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - u8 *vhdr = skb->data; - skb_pull(skb, VLAN_HLEN); - skb_postpull_rcsum(skb, vhdr, VLAN_HLEN); - skb->nh.raw += VLAN_HLEN; + nf_bridge_pull_encap_header_rcsum(skb); + return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); } - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto inhdr_error; - - iph = skb->nh.iph; - if (iph->ihl < 5 || iph->version != 4) - goto inhdr_error; - - if (!pskb_may_pull(skb, 4*iph->ihl)) - goto inhdr_error; + if (!brnf_call_iptables && !br->nf_call_iptables) + return NF_ACCEPT; - iph = skb->nh.iph; - if (ip_fast_csum((__u8 *)iph, iph->ihl) != 0) - goto inhdr_error; + if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb)) + return NF_ACCEPT; - len = ntohs(iph->tot_len); - if (skb->len < len || len < 4*iph->ihl) - goto inhdr_error; + nf_bridge_pull_encap_header_rcsum(skb); - if (skb->len > len) { - __pskb_trim(skb, len); - if (skb->ip_summed == CHECKSUM_HW) - skb->ip_summed = CHECKSUM_NONE; - } + if (br_parse_ip_options(skb)) + return NF_DROP; - nf_bridge_put(skb->nf_bridge); - if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) + nf_bridge_put(skb->nf_bridge); + if (!nf_bridge_alloc(skb)) + return NF_DROP; + if (!setup_pre_routing(skb)) return NF_DROP; - setup_pre_routing(skb); store_orig_dstaddr(skb); + skb->protocol = htons(ETH_P_IP); - NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; - -inhdr_error: -// IP_INC_STATS_BH(IpInHdrErrors); -out: - return NF_DROP; } @@ -487,72 +730,75 @@ out: * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - - if (skb->dst == (struct dst_entry *)&__fake_rtable) { - dst_release(skb->dst); - skb->dst = NULL; - } - + br_drop_fake_rtable(skb); return NF_ACCEPT; } - /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *in; - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); - if (skb->protocol != __constant_htons(ETH_P_ARP) && !IS_VLAN_ARP) { + if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { in = nf_bridge->physindev; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; nf_bridge->mask ^= BRNF_PKT_TYPE; } + nf_bridge_update_protocol(skb); } else { in = *((struct net_device **)(skb->cb)); } - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in, - skb->dev, br_forward_finish, 1); + nf_bridge_push_encap_header(skb); + + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in, + skb->dev, br_forward_finish, 1); return 0; } + /* This is the 'purely bridged' case. For IP, we pass the packet to * netfilter with indev and outdev set to the bridge device, * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); - int pf; + struct net_device *parent; + u_int8_t pf; if (!skb->nf_bridge) return NF_ACCEPT; - if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) - pf = PF_INET; + /* Need exclusive nf_bridge_info since we might have multiple + * different physoutdevs. */ + if (!nf_bridge_unshare(skb)) + return NF_DROP; + + parent = bridge_parent(out); + if (!parent) + return NF_DROP; + + if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) + pf = NFPROTO_IPV4; + else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) + pf = NFPROTO_IPV6; else - pf = PF_INET6; + return NF_ACCEPT; - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_pull(*pskb, VLAN_HLEN); - (*pskb)->nh.raw += VLAN_HLEN; - } + nf_bridge_pull_encap_header(skb); nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { @@ -560,200 +806,106 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, nf_bridge->mask |= BRNF_PKT_TYPE; } + if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) + return NF_DROP; + /* The physdev module checks on this */ nf_bridge->mask |= BRNF_BRIDGED; nf_bridge->physoutdev = skb->dev; + if (pf == NFPROTO_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), - bridge_parent(out), br_nf_forward_finish); + NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent, + br_nf_forward_finish); return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + struct net_bridge_port *p; + struct net_bridge *br; struct net_device **d = (struct net_device **)(skb->cb); -#ifdef CONFIG_SYSCTL - if (!brnf_call_arptables) + p = br_port_get_rcu(out); + if (p == NULL) + return NF_ACCEPT; + br = p->br; + + if (!brnf_call_arptables && !br->nf_call_arptables) return NF_ACCEPT; -#endif - if (skb->protocol != __constant_htons(ETH_P_ARP)) { - if (!IS_VLAN_ARP) + if (!IS_ARP(skb)) { + if (!IS_VLAN_ARP(skb)) return NF_ACCEPT; - skb_pull(*pskb, VLAN_HLEN); - (*pskb)->nh.raw += VLAN_HLEN; + nf_bridge_pull_encap_header(skb); } - if (skb->nh.arph->ar_pln != 4) { - if (IS_VLAN_ARP) { - skb_push(*pskb, VLAN_HLEN); - (*pskb)->nh.raw -= VLAN_HLEN; - } + if (arp_hdr(skb)->ar_pln != 4) { + if (IS_VLAN_ARP(skb)) + nf_bridge_push_encap_header(skb); return NF_ACCEPT; } *d = (struct net_device *)in; - NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, (struct net_device *)out, br_nf_forward_finish); return NF_STOLEN; } - -/* PF_BRIDGE/LOCAL_OUT ***********************************************/ -static int br_nf_local_out_finish(struct sk_buff *skb) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } + int ret; - NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - br_forward_finish, NF_BR_PRI_FIRST + 1); + if (skb->protocol == htons(ETH_P_IP) && + skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && + !skb_is_gso(skb)) { + if (br_parse_ip_options(skb)) + /* Drop invalid packet */ + return NF_DROP; + ret = ip_fragment(skb, br_dev_queue_push_xmit); + } else + ret = br_dev_queue_push_xmit(skb); - return 0; + return ret; } - -/* This function sees both locally originated IP packets and forwarded - * IP packets (in both cases the destination device is a bridge - * device). It also sees bridged-and-DNAT'ed packets. - * To be able to filter on the physical bridge devices (with the physdev - * module), we steal packets destined to a bridge device away from the - * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, - * when we have determined the real output device. This is done in here. - * - * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged - * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() - * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority - * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor - * will be executed. - * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched - * this packet before, and so the packet was locally originated. We fake - * the PF_INET/LOCAL_OUT hook. - * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, - * so we fake the PF_INET/FORWARD hook. ip_sabotage_out() makes sure - * even routed packets that didn't arrive on a bridge interface have their - * nf_bridge->physindev set. */ -static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) +#else +static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - struct net_device *realindev, *realoutdev; - struct sk_buff *skb = *pskb; - struct nf_bridge_info *nf_bridge; - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); - int pf; - - if (!skb->nf_bridge) - return NF_ACCEPT; - - if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) - pf = PF_INET; - else - pf = PF_INET6; - -#ifdef CONFIG_NETFILTER_DEBUG - /* Sometimes we get packets with NULL ->dst here (for example, - * running a dhcp client daemon triggers this). This should now - * be fixed, but let's keep the check around. */ - if (skb->dst == NULL) { - printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); - return NF_ACCEPT; - } -#endif - - nf_bridge = skb->nf_bridge; - nf_bridge->physoutdev = skb->dev; - realindev = nf_bridge->physindev; - - /* Bridged, take PF_BRIDGE/FORWARD. - * (see big note in front of br_nf_pre_routing_finish) */ - if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { - if (nf_bridge->mask & BRNF_PKT_TYPE) { - skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; - } - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_push(skb, VLAN_HLEN); - skb->nh.raw -= VLAN_HLEN; - } - - NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, - skb->dev, br_forward_finish); - goto out; - } - realoutdev = bridge_parent(skb->dev); - -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* iptables should match -o br0.x */ - if (nf_bridge->netoutdev) - realoutdev = nf_bridge->netoutdev; -#endif - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - (*pskb)->nh.raw += VLAN_HLEN; - } - /* IP forwarded traffic has a physindev, locally - * generated traffic hasn't. */ - if (realindev != NULL) { - if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT) && - has_bridge_parent(realindev)) - realindev = bridge_parent(realindev); - - NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); - } else { - NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev, - realoutdev, br_nf_local_out_finish, - NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); - } - -out: - return NF_STOLEN; + return br_dev_queue_push_xmit(skb); } - +#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct sk_buff *skb = *pskb; - struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge; - struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); - int pf; - -#ifdef CONFIG_NETFILTER_DEBUG - /* Be very paranoid. This probably won't happen anymore, but let's - * keep the check just to be sure... */ - if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { - printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " - "bad mac.raw pointer."); - goto print_error; - } -#endif + u_int8_t pf; - if (!nf_bridge) + if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) return NF_ACCEPT; - if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) - pf = PF_INET; - else - pf = PF_INET6; + if (!realoutdev) + return NF_DROP; -#ifdef CONFIG_NETFILTER_DEBUG - if (skb->dst == NULL) { - printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); - goto print_error; - } -#endif + if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) + pf = NFPROTO_IPV4; + else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) + pf = NFPROTO_IPV6; + else + return NF_ACCEPT; /* We assume any code from br_dev_queue_push_xmit onwards doesn't care * about the value of skb->pkt_type. */ @@ -762,298 +914,183 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (skb->protocol == __constant_htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - skb->nh.raw += VLAN_HLEN; - } - + nf_bridge_pull_encap_header(skb); nf_bridge_save_header(skb); + if (pf == NFPROTO_IPV4) + skb->protocol = htons(ETH_P_IP); + else + skb->protocol = htons(ETH_P_IPV6); -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - if (nf_bridge->netoutdev) - realoutdev = nf_bridge->netoutdev; -#endif - NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev, - br_dev_queue_push_xmit); + NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, + br_nf_dev_queue_xmit); return NF_STOLEN; - -#ifdef CONFIG_NETFILTER_DEBUG -print_error: - if (skb->dev != NULL) { - printk("[%s]", skb->dev->name); - if (has_bridge_parent(skb->dev)) - printk("[%s]", bridge_parent(skb->dev)->name); - } - printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, - skb->data); - return NF_ACCEPT; -#endif } - /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - if ((*pskb)->nf_bridge && - !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + if (skb->nf_bridge && + !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { return NF_STOP; } return NF_ACCEPT; } -/* Postpone execution of PF_INET(6)/FORWARD, PF_INET(6)/LOCAL_OUT - * and PF_INET(6)/POST_ROUTING until we have done the forwarding - * decision in the bridge code and have determined nf_bridge->physoutdev. */ -static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *skb = *pskb; - - if ((out->hard_start_xmit == br_dev_xmit && - okfn != br_nf_forward_finish && - okfn != br_nf_local_out_finish && - okfn != br_dev_queue_push_xmit) -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - || ((out->priv_flags & IFF_802_1Q_VLAN) && - VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) -#endif - ) { - struct nf_bridge_info *nf_bridge; - - if (!skb->nf_bridge) { -#ifdef CONFIG_SYSCTL - /* This code is executed while in the IP(v6) stack, - the version should be 4 or 6. We can't use - skb->protocol because that isn't set on - PF_INET(6)/LOCAL_OUT. */ - struct iphdr *ip = skb->nh.iph; - - if (ip->version == 4 && !brnf_call_iptables) - return NF_ACCEPT; - else if (ip->version == 6 && !brnf_call_ip6tables) - return NF_ACCEPT; -#endif - if (hook == NF_IP_POST_ROUTING) - return NF_ACCEPT; - if (!nf_bridge_alloc(skb)) - return NF_DROP; - } - - nf_bridge = skb->nf_bridge; - - /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we - * will need the indev then. For a brouter, the real indev - * can be a bridge port, so we make sure br_nf_local_out() - * doesn't use the bridge parent of the indev by using - * the BRNF_DONT_TAKE_PARENT mask. */ - if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { - nf_bridge->mask |= BRNF_DONT_TAKE_PARENT; - nf_bridge->physindev = (struct net_device *)in; - } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - /* the iptables outdev is br0.x, not br0 */ - if (out->priv_flags & IFF_802_1Q_VLAN) - nf_bridge->netoutdev = (struct net_device *)out; -#endif - return NF_STOP; - } - - return NF_ACCEPT; -} - -/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent - * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. - * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because - * ip_refrag() can return NF_STOLEN. */ -static struct nf_hook_ops br_nf_ops[] = { - { .hook = br_nf_pre_routing, - .owner = THIS_MODULE, - .pf = PF_BRIDGE, - .hooknum = NF_BR_PRE_ROUTING, - .priority = NF_BR_PRI_BRNF, }, - { .hook = br_nf_local_in, - .owner = THIS_MODULE, - .pf = PF_BRIDGE, - .hooknum = NF_BR_LOCAL_IN, - .priority = NF_BR_PRI_BRNF, }, - { .hook = br_nf_forward_ip, - .owner = THIS_MODULE, - .pf = PF_BRIDGE, - .hooknum = NF_BR_FORWARD, - .priority = NF_BR_PRI_BRNF - 1, }, - { .hook = br_nf_forward_arp, - .owner = THIS_MODULE, - .pf = PF_BRIDGE, - .hooknum = NF_BR_FORWARD, - .priority = NF_BR_PRI_BRNF, }, - { .hook = br_nf_local_out, - .owner = THIS_MODULE, - .pf = PF_BRIDGE, - .hooknum = NF_BR_LOCAL_OUT, - .priority = NF_BR_PRI_FIRST, }, - { .hook = br_nf_post_routing, - .owner = THIS_MODULE, - .pf = PF_BRIDGE, - .hooknum = NF_BR_POST_ROUTING, - .priority = NF_BR_PRI_LAST, }, - { .hook = ip_sabotage_in, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_FIRST, }, - { .hook = ip_sabotage_in, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_PRE_ROUTING, - .priority = NF_IP6_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_FORWARD, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_FORWARD, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_LOCAL_OUT, - .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_FIRST, }, - { .hook = ip_sabotage_out, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_IP6_POST_ROUTING, - .priority = NF_IP6_PRI_FIRST, }, +/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because + * br_dev_queue_push_xmit is called afterwards */ +static struct nf_hook_ops br_nf_ops[] __read_mostly = { + { + .hook = br_nf_pre_routing, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_local_in, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_forward_ip, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF - 1, + }, + { + .hook = br_nf_forward_arp, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF, + }, + { + .hook = br_nf_post_routing, + .owner = THIS_MODULE, + .pf = NFPROTO_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_BR_PRI_LAST, + }, + { + .hook = ip_sabotage_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_FIRST, + }, + { + .hook = ip_sabotage_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_FIRST, + }, }; #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table *ctl, int write, struct file * filp, - void __user *buffer, size_t *lenp, loff_t *ppos) +int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (write && *(int *)(ctl->data)) *(int *)(ctl->data) = 1; return ret; } -static ctl_table brnf_table[] = { +static struct ctl_table brnf_table[] = { { - .ctl_name = NET_BRIDGE_NF_CALL_ARPTABLES, .procname = "bridge-nf-call-arptables", .data = &brnf_call_arptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_CALL_IPTABLES, .procname = "bridge-nf-call-iptables", .data = &brnf_call_iptables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_CALL_IP6TABLES, .procname = "bridge-nf-call-ip6tables", .data = &brnf_call_ip6tables, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, { - .ctl_name = NET_BRIDGE_NF_FILTER_VLAN_TAGGED, .procname = "bridge-nf-filter-vlan-tagged", .data = &brnf_filter_vlan_tagged, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &brnf_sysctl_call_tables, + .proc_handler = brnf_sysctl_call_tables, }, - { .ctl_name = 0 } -}; - -static ctl_table brnf_bridge_table[] = { { - .ctl_name = NET_BRIDGE, - .procname = "bridge", - .mode = 0555, - .child = brnf_table, + .procname = "bridge-nf-filter-pppoe-tagged", + .data = &brnf_filter_pppoe_tagged, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, }, - { .ctl_name = 0 } -}; - -static ctl_table brnf_net_table[] = { { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = brnf_bridge_table, + .procname = "bridge-nf-pass-vlan-input-dev", + .data = &brnf_pass_vlan_indev, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = brnf_sysctl_call_tables, }, - { .ctl_name = 0 } + { } }; #endif -int br_netfilter_init(void) +int __init br_netfilter_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) { - int ret; - - if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0) - continue; + int ret; - while (i--) - nf_unregister_hook(&br_nf_ops[i]); + ret = dst_entries_init(&fake_dst_ops); + if (ret < 0) + return ret; + ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + if (ret < 0) { + dst_entries_destroy(&fake_dst_ops); return ret; } - #ifdef CONFIG_SYSCTL - brnf_sysctl_header = register_sysctl_table(brnf_net_table, 0); + brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); if (brnf_sysctl_header == NULL) { - printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); - for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) - nf_unregister_hook(&br_nf_ops[i]); - return -EFAULT; + printk(KERN_WARNING + "br_netfilter: can't register to sysctl.\n"); + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + dst_entries_destroy(&fake_dst_ops); + return -ENOMEM; } #endif - printk(KERN_NOTICE "Bridge firewalling registered\n"); - return 0; } void br_netfilter_fini(void) { - int i; - - for (i = ARRAY_SIZE(br_nf_ops) - 1; i >= 0; i--) - nf_unregister_hook(&br_nf_ops[i]); + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); #ifdef CONFIG_SYSCTL - unregister_sysctl_table(brnf_sysctl_header); + unregister_net_sysctl_table(brnf_sysctl_header); #endif + dst_entries_destroy(&fake_dst_ops); } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c new file mode 100644 index 00000000000..26edb518b83 --- /dev/null +++ b/net/bridge/br_netlink.c @@ -0,0 +1,521 @@ +/* + * Bridge netlink control interface + * + * Authors: + * Stephen Hemminger <shemminger@osdl.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/etherdevice.h> +#include <net/rtnetlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <uapi/linux/if_bridge.h> + +#include "br_private.h" +#include "br_private_stp.h" + +static inline size_t br_port_info_size(void) +{ + return nla_total_size(1) /* IFLA_BRPORT_STATE */ + + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */ + + nla_total_size(4) /* IFLA_BRPORT_COST */ + + nla_total_size(1) /* IFLA_BRPORT_MODE */ + + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + + 0; +} + +static inline size_t br_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */ +} + +static int br_port_fill_attrs(struct sk_buff *skb, + const struct net_bridge_port *p) +{ + u8 mode = !!(p->flags & BR_HAIRPIN_MODE); + + if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || + nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || + nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || + nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD))) + return -EMSGSIZE; + + return 0; +} + +/* + * Create one netlink message for one interface + * Contains port and master info as well as carrier and bridge state. + */ +static int br_fill_ifinfo(struct sk_buff *skb, + const struct net_bridge_port *port, + u32 pid, u32 seq, int event, unsigned int flags, + u32 filter_mask, const struct net_device *dev) +{ + const struct net_bridge *br; + struct ifinfomsg *hdr; + struct nlmsghdr *nlh; + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + + if (port) + br = port->br; + else + br = netdev_priv(dev); + + br_debug(br, "br_fill_info event %d port %s master %s\n", + event, dev->name, br->dev->name); + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + if (nlh == NULL) + return -EMSGSIZE; + + hdr = nlmsg_data(nlh); + hdr->ifi_family = AF_BRIDGE; + hdr->__ifi_pad = 0; + hdr->ifi_type = dev->type; + hdr->ifi_index = dev->ifindex; + hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_change = 0; + + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + nla_put_u32(skb, IFLA_MASTER, br->dev->ifindex) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + nla_put_u8(skb, IFLA_OPERSTATE, operstate) || + (dev->addr_len && + nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || + (dev->ifindex != dev->iflink && + nla_put_u32(skb, IFLA_LINK, dev->iflink))) + goto nla_put_failure; + + if (event == RTM_NEWLINK && port) { + struct nlattr *nest + = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + + if (nest == NULL || br_port_fill_attrs(skb, port) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + + /* Check if the VID information is requested */ + if (filter_mask & RTEXT_FILTER_BRVLAN) { + struct nlattr *af; + const struct net_port_vlans *pv; + struct bridge_vlan_info vinfo; + u16 vid; + u16 pvid; + + if (port) + pv = nbp_get_vlan_info(port); + else + pv = br_get_vlan_info(br); + + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) + goto done; + + af = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af) + goto nla_put_failure; + + pvid = br_get_pvid(pv); + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + vinfo.vid = vid; + vinfo.flags = 0; + if (vid == pvid) + vinfo.flags |= BRIDGE_VLAN_INFO_PVID; + + if (test_bit(vid, pv->untagged_bitmap)) + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED; + + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO, + sizeof(vinfo), &vinfo)) + goto nla_put_failure; + } + + nla_nest_end(skb, af); + } + +done: + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +/* + * Notify listeners of a change in port information + */ +void br_ifinfo_notify(int event, struct net_bridge_port *port) +{ + struct net *net; + struct sk_buff *skb; + int err = -ENOBUFS; + + if (!port) + return; + + net = dev_net(port->dev); + br_debug(port->br, "port %u(%s) event %d\n", + (unsigned int)port->port_no, port->dev->name, event); + + skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, 0, port->dev); + if (err < 0) { + /* -EMSGSIZE implies BUG in br_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_LINK, err); +} + + +/* + * Dump information about all ports, in response to GETLINK + */ +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask) +{ + int err = 0; + struct net_bridge_port *port = br_port_get_rtnl(dev); + + /* not a bridge port and */ + if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) + goto out; + + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI, + filter_mask, dev); +out: + return err; +} + +static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = { + [IFLA_BRIDGE_FLAGS] = { .type = NLA_U16 }, + [IFLA_BRIDGE_MODE] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY, + .len = sizeof(struct bridge_vlan_info), }, +}; + +static int br_afspec(struct net_bridge *br, + struct net_bridge_port *p, + struct nlattr *af_spec, + int cmd) +{ + struct nlattr *tb[IFLA_BRIDGE_MAX+1]; + int err = 0; + + err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy); + if (err) + return err; + + if (tb[IFLA_BRIDGE_VLAN_INFO]) { + struct bridge_vlan_info *vinfo; + + vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); + + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + if (p) { + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); + if (err) + break; + + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + err = br_vlan_add(p->br, vinfo->vid, + vinfo->flags); + } else + err = br_vlan_add(br, vinfo->vid, vinfo->flags); + + if (err) + break; + + break; + + case RTM_DELLINK: + if (p) { + nbp_vlan_delete(p, vinfo->vid); + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) + br_vlan_delete(p->br, vinfo->vid); + } else + br_vlan_delete(br, vinfo->vid); + break; + } + } + + return err; +} + +static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { + [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, + [IFLA_BRPORT_COST] = { .type = NLA_U32 }, + [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, + [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, +}; + +/* Change the state of the port and notify spanning tree */ +static int br_set_port_state(struct net_bridge_port *p, u8 state) +{ + if (state > BR_STATE_BLOCKING) + return -EINVAL; + + /* if kernel STP is running, don't allow changes */ + if (p->br->stp_enabled == BR_KERNEL_STP) + return -EBUSY; + + /* if device is not up, change is not allowed + * if link is not present, only allowable state is disabled + */ + if (!netif_running(p->dev) || + (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED)) + return -ENETDOWN; + + p->state = state; + br_log_state(p); + br_port_state_selection(p->br); + return 0; +} + +/* Set/clear or port flags based on attribute */ +static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], + int attrtype, unsigned long mask) +{ + if (tb[attrtype]) { + u8 flag = nla_get_u8(tb[attrtype]); + if (flag) + p->flags |= mask; + else + p->flags &= ~mask; + } +} + +/* Process bridge protocol info on port */ +static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) +{ + int err; + unsigned long old_flags = p->flags; + + br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); + br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); + br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); + + if (tb[IFLA_BRPORT_COST]) { + err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_PRIORITY]) { + err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_STATE]) { + err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE])); + if (err) + return err; + } + + br_port_flags_change(p, old_flags ^ p->flags); + return 0; +} + +/* Change state and parameters on port. */ +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct nlattr *protinfo; + struct nlattr *afspec; + struct net_bridge_port *p; + struct nlattr *tb[IFLA_BRPORT_MAX + 1]; + int err = 0; + + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!protinfo && !afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself if the AF_SPEC + * is set to see if someone is setting vlan info on the bridge + */ + if (!p && !afspec) + return -EINVAL; + + if (p && protinfo) { + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; + + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatibility with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } + if (err) + goto out; + } + + if (afspec) { + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_SETLINK); + } + + if (err == 0) + br_ifinfo_notify(RTM_NEWLINK, p); + +out: + return err; +} + +/* Delete port information */ +int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) +{ + struct nlattr *afspec; + struct net_bridge_port *p; + int err; + + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!afspec) + return 0; + + p = br_port_get_rtnl(dev); + /* We want to accept dev as bridge itself as well */ + if (!p && !(dev->priv_flags & IFF_EBRIDGE)) + return -EINVAL; + + err = br_afspec((struct net_bridge *)netdev_priv(dev), p, + afspec, RTM_DELLINK); + + return err; +} +static int br_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + return 0; +} + +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + return register_netdevice(dev); +} + +static size_t br_get_link_af_size(const struct net_device *dev) +{ + struct net_port_vlans *pv; + + if (br_port_exists(dev)) + pv = nbp_get_vlan_info(br_port_get_rtnl(dev)); + else if (dev->priv_flags & IFF_EBRIDGE) + pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); + else + return 0; + + if (!pv) + return 0; + + /* Each VLAN is returned in bridge_vlan_info along with flags */ + return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info)); +} + +static struct rtnl_af_ops br_af_ops = { + .family = AF_BRIDGE, + .get_link_af_size = br_get_link_af_size, +}; + +struct rtnl_link_ops br_link_ops __read_mostly = { + .kind = "bridge", + .priv_size = sizeof(struct net_bridge), + .setup = br_dev_setup, + .validate = br_validate, + .newlink = br_dev_newlink, + .dellink = br_dev_delete, +}; + +int __init br_netlink_init(void) +{ + int err; + + br_mdb_init(); + rtnl_af_register(&br_af_ops); + + err = rtnl_link_register(&br_link_ops); + if (err) + goto out_af; + + return 0; + +out_af: + rtnl_af_unregister(&br_af_ops); + br_mdb_uninit(); + return err; +} + +void __exit br_netlink_fini(void) +{ + br_mdb_uninit(); + rtnl_af_unregister(&br_af_ops); + rtnl_link_unregister(&br_link_ops); +} diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c deleted file mode 100644 index a43a9c1d50d..00000000000 --- a/net/bridge/br_notify.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Device event handling - * Linux ethernet bridge - * - * Authors: - * Lennert Buytenhek <buytenh@gnu.org> - * - * $Id: br_notify.c,v 1.2 2000/02/21 15:51:34 davem Exp $ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> - -#include "br_private.h" - -static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr); - -struct notifier_block br_device_notifier = { - .notifier_call = br_device_event -}; - -/* - * Handle changes in state of network devices enslaved to a bridge. - * - * Note: don't care about up/down if bridge itself is down, because - * port state is checked when bridge is brought up. - */ -static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - struct net_bridge_port *p = dev->br_port; - struct net_bridge *br; - - /* not a port of a bridge */ - if (p == NULL) - return NOTIFY_DONE; - - br = p->br; - - spin_lock_bh(&br->lock); - switch (event) { - case NETDEV_CHANGEMTU: - dev_set_mtu(br->dev, br_min_mtu(br)); - break; - - case NETDEV_CHANGEADDR: - br_fdb_changeaddr(p, dev->dev_addr); - br_stp_recalculate_bridge_id(br); - break; - - case NETDEV_CHANGE: - if (br->dev->flags & IFF_UP) - schedule_delayed_work(&p->carrier_check, BR_PORT_DEBOUNCE); - break; - - case NETDEV_FEAT_CHANGE: - if (br->dev->flags & IFF_UP) - br_features_recompute(br); - - /* could do recursive feature change notification - * but who would care?? - */ - break; - - case NETDEV_DOWN: - if (br->dev->flags & IFF_UP) - br_stp_disable_port(p); - break; - - case NETDEV_UP: - if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) - br_stp_enable_port(p); - break; - - case NETDEV_UNREGISTER: - spin_unlock_bh(&br->lock); - br_del_if(br, dev); - goto done; - } - spin_unlock_bh(&br->lock); - - done: - return NOTIFY_DONE; -} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c5bd631ffcd..23caf5b0309 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -4,8 +4,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_private.h,v 1.7 2001/12/24 00:59:55 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -16,8 +14,11 @@ #define _BR_PRIVATE_H #include <linux/netdevice.h> -#include <linux/miscdevice.h> #include <linux/if_bridge.h> +#include <linux/netpoll.h> +#include <linux/u64_stats_sync.h> +#include <net/route.h> +#include <linux/if_vlan.h> #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -26,10 +27,19 @@ #define BR_PORT_BITS 10 #define BR_MAX_PORTS (1<<BR_PORT_BITS) +#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID) + +#define BR_VERSION "2.3" -#define BR_PORT_DEBOUNCE (HZ/10) +/* Control of forwarding link local multicast */ +#define BR_GROUPFWD_DEFAULT 0 +/* Don't allow forwarding control protocols like STP and LLDP */ +#define BR_GROUPFWD_RESTRICTED 0x4007u +/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ +#define BR_GROUPFWD_8021AD 0xB801u -#define BR_VERSION "2.1" +/* Path to usermode spanning tree program */ +#define BR_STP_PROG "/sbin/bridge-stp" typedef struct bridge_id bridge_id; typedef struct mac_addr mac_addr; @@ -38,12 +48,45 @@ typedef __u16 port_id; struct bridge_id { unsigned char prio[2]; - unsigned char addr[6]; + unsigned char addr[ETH_ALEN]; }; struct mac_addr { - unsigned char addr[6]; + unsigned char addr[ETH_ALEN]; +}; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +/* our own querier */ +struct bridge_mcast_own_query { + struct timer_list timer; + u32 startup_sent; +}; + +/* other querier */ +struct bridge_mcast_other_query { + struct timer_list timer; + unsigned long delay_time; +}; + +/* selected querier */ +struct bridge_mcast_querier { + struct br_ip addr; + struct net_bridge_port __rcu *port; +}; +#endif + +struct net_port_vlans { + u16 port_idx; + u16 pvid; + union { + struct net_bridge_port *port; + struct net_bridge *br; + } parent; + struct rcu_head rcu; + unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN]; + unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN]; + u16 num_vlans; }; struct net_bridge_fdb_entry @@ -52,11 +95,45 @@ struct net_bridge_fdb_entry struct net_bridge_port *dst; struct rcu_head rcu; - atomic_t use_count; - unsigned long ageing_timer; + unsigned long updated; + unsigned long used; mac_addr addr; unsigned char is_local; unsigned char is_static; + unsigned char added_by_user; + __u16 vlan_id; +}; + +struct net_bridge_port_group { + struct net_bridge_port *port; + struct net_bridge_port_group __rcu *next; + struct hlist_node mglist; + struct rcu_head rcu; + struct timer_list timer; + struct br_ip addr; + unsigned char state; +}; + +struct net_bridge_mdb_entry +{ + struct hlist_node hlist[2]; + struct net_bridge *br; + struct net_bridge_port_group __rcu *ports; + struct rcu_head rcu; + struct timer_list timer; + struct br_ip addr; + bool mglist; +}; + +struct net_bridge_mdb_htable +{ + struct hlist_head *mhash; + struct rcu_head rcu; + struct net_bridge_mdb_htable *old; + u32 size; + u32 max; + u32 secret; + u32 ver; }; struct net_bridge_port @@ -77,25 +154,81 @@ struct net_bridge_port bridge_id designated_bridge; u32 path_cost; u32 designated_cost; + unsigned long designated_age; struct timer_list forward_delay_timer; struct timer_list hold_timer; struct timer_list message_age_timer; struct kobject kobj; - struct work_struct carrier_check; struct rcu_head rcu; + + unsigned long flags; +#define BR_HAIRPIN_MODE 0x00000001 +#define BR_BPDU_GUARD 0x00000002 +#define BR_ROOT_BLOCK 0x00000004 +#define BR_MULTICAST_FAST_LEAVE 0x00000008 +#define BR_ADMIN_COST 0x00000010 +#define BR_LEARNING 0x00000020 +#define BR_FLOOD 0x00000040 +#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING) +#define BR_PROMISC 0x00000080 + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + struct bridge_mcast_own_query ip4_own_query; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_own_query ip6_own_query; +#endif /* IS_ENABLED(CONFIG_IPV6) */ + unsigned char multicast_router; + struct timer_list multicast_router_timer; + struct hlist_head mglist; + struct hlist_node rlist; +#endif + +#ifdef CONFIG_SYSFS + char sysfs_name[IFNAMSIZ]; +#endif + +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *np; +#endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + struct net_port_vlans __rcu *vlan_info; +#endif }; +#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) +#define br_promisc_port(p) ((p)->flags & BR_PROMISC) + +#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT) + +static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler_data); +} + +static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev) +{ + return br_port_exists(dev) ? + rtnl_dereference(dev->rx_handler_data) : NULL; +} + struct net_bridge { spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct net_device_stats statistics; + + struct pcpu_sw_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; - struct list_head age_list; - unsigned long feature_mask; +#ifdef CONFIG_BRIDGE_NETFILTER + struct rtable fake_rtable; + bool nf_call_iptables; + bool nf_call_ip6tables; + bool nf_call_arptables; +#endif + u16 group_fwd_mask; + u16 group_fwd_mask_required; /* STP */ bridge_id designated_root; @@ -109,20 +242,97 @@ struct net_bridge unsigned long bridge_hello_time; unsigned long bridge_forward_delay; + u8 group_addr[ETH_ALEN]; + bool group_addr_set; u16 root_port; - unsigned char stp_enabled; + + enum { + BR_NO_STP, /* no spanning tree */ + BR_KERNEL_STP, /* old STP in kernel */ + BR_USER_STP, /* new RSTP in userspace */ + } stp_enabled; + unsigned char topology_change; unsigned char topology_change_detected; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + unsigned char multicast_router; + + u8 multicast_disabled:1; + u8 multicast_querier:1; + u8 multicast_query_use_ifaddr:1; + + u32 hash_elasticity; + u32 hash_max; + + u32 multicast_last_member_count; + u32 multicast_startup_query_count; + + unsigned long multicast_last_member_interval; + unsigned long multicast_membership_interval; + unsigned long multicast_querier_interval; + unsigned long multicast_query_interval; + unsigned long multicast_query_response_interval; + unsigned long multicast_startup_query_interval; + + spinlock_t multicast_lock; + struct net_bridge_mdb_htable __rcu *mdb; + struct hlist_head router_list; + + struct timer_list multicast_router_timer; + struct bridge_mcast_other_query ip4_other_query; + struct bridge_mcast_own_query ip4_own_query; + struct bridge_mcast_querier ip4_querier; +#if IS_ENABLED(CONFIG_IPV6) + struct bridge_mcast_other_query ip6_other_query; + struct bridge_mcast_own_query ip6_own_query; + struct bridge_mcast_querier ip6_querier; +#endif /* IS_ENABLED(CONFIG_IPV6) */ +#endif + struct timer_list hello_timer; struct timer_list tcn_timer; struct timer_list topology_change_timer; struct timer_list gc_timer; - struct kobject ifobj; + struct kobject *ifobj; + u32 auto_cnt; +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + u8 vlan_enabled; + __be16 vlan_proto; + struct net_port_vlans __rcu *vlan_info; +#endif +}; + +struct br_input_skb_cb { + struct net_device *brdev; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + int igmp; + int mrouters_only; +#endif }; -extern struct notifier_block br_device_notifier; -extern const unsigned char bridge_ula[6]; +#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only) +#else +# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) +#endif + +#define br_printk(level, br, format, args...) \ + printk(level "%s: " format, (br)->dev->name, ##args) + +#define br_err(__br, format, args...) \ + br_printk(KERN_ERR, __br, format, ##args) +#define br_warn(__br, format, args...) \ + br_printk(KERN_WARNING, __br, format, ##args) +#define br_notice(__br, format, args...) \ + br_printk(KERN_NOTICE, __br, format, ##args) +#define br_info(__br, format, args...) \ + br_printk(KERN_INFO, __br, format, ##args) + +#define br_debug(br, format, args...) \ + pr_debug("%s: " format, (br)->dev->name, ##args) /* called under bridge lock */ static inline int br_is_root_bridge(const struct net_bridge *br) @@ -130,123 +340,493 @@ static inline int br_is_root_bridge(const struct net_bridge *br) return !memcmp(&br->bridge_id, &br->designated_root, 8); } - /* br_device.c */ -extern void br_dev_setup(struct net_device *dev); -extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev); +void br_dev_setup(struct net_device *dev); +void br_dev_delete(struct net_device *dev, struct list_head *list); +netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev); +#ifdef CONFIG_NET_POLL_CONTROLLER +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + struct netpoll *np = p->np; + + if (np) + netpoll_send_skb(np, skb); +} + +int br_netpoll_enable(struct net_bridge_port *p); +void br_netpoll_disable(struct net_bridge_port *p); +#else +static inline void br_netpoll_send_skb(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline int br_netpoll_enable(struct net_bridge_port *p) +{ + return 0; +} + +static inline void br_netpoll_disable(struct net_bridge_port *p) +{ +} +#endif /* br_fdb.c */ -extern void br_fdb_init(void); -extern void br_fdb_fini(void); -extern void br_fdb_changeaddr(struct net_bridge_port *p, - const unsigned char *newaddr); -extern void br_fdb_cleanup(unsigned long arg); -extern void br_fdb_delete_by_port(struct net_bridge *br, - struct net_bridge_port *p); -extern struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr); -extern struct net_bridge_fdb_entry *br_fdb_get(struct net_bridge *br, - unsigned char *addr); -extern void br_fdb_put(struct net_bridge_fdb_entry *ent); -extern int br_fdb_fillbuf(struct net_bridge *br, void *buf, - unsigned long count, unsigned long off); -extern int br_fdb_insert(struct net_bridge *br, - struct net_bridge_port *source, - const unsigned char *addr); -extern void br_fdb_update(struct net_bridge *br, - struct net_bridge_port *source, - const unsigned char *addr); +int br_fdb_init(void); +void br_fdb_fini(void); +void br_fdb_flush(struct net_bridge *br); +void br_fdb_find_delete_local(struct net_bridge *br, + const struct net_bridge_port *p, + const unsigned char *addr, u16 vid); +void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); +void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); +void br_fdb_cleanup(unsigned long arg); +void br_fdb_delete_by_port(struct net_bridge *br, + const struct net_bridge_port *p, int do_all); +struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, + const unsigned char *addr, __u16 vid); +int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); +int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, + unsigned long off); +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid); +void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid, bool added_by_user); + +int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, const unsigned char *addr); +int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, + const unsigned char *addr, u16 nlh_flags); +int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, int idx); +int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); +void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); /* br_forward.c */ -extern void br_deliver(const struct net_bridge_port *to, - struct sk_buff *skb); -extern int br_dev_queue_push_xmit(struct sk_buff *skb); -extern void br_forward(const struct net_bridge_port *to, - struct sk_buff *skb); -extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, - struct sk_buff *skb, - int clone); -extern void br_flood_forward(struct net_bridge *br, - struct sk_buff *skb, - int clone); +void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); +int br_dev_queue_push_xmit(struct sk_buff *skb); +void br_forward(const struct net_bridge_port *to, + struct sk_buff *skb, struct sk_buff *skb0); +int br_forward_finish(struct sk_buff *skb); +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast); +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, + struct sk_buff *skb2, bool unicast); /* br_if.c */ -extern int br_add_bridge(const char *name); -extern int br_del_bridge(const char *name); -extern void br_cleanup_bridges(void); -extern int br_add_if(struct net_bridge *br, - struct net_device *dev); -extern int br_del_if(struct net_bridge *br, - struct net_device *dev); -extern int br_min_mtu(const struct net_bridge *br); -extern void br_features_recompute(struct net_bridge *br); +void br_port_carrier_check(struct net_bridge_port *p); +int br_add_bridge(struct net *net, const char *name); +int br_del_bridge(struct net *net, const char *name); +int br_add_if(struct net_bridge *br, struct net_device *dev); +int br_del_if(struct net_bridge *br, struct net_device *dev); +int br_min_mtu(const struct net_bridge *br); +netdev_features_t br_features_recompute(struct net_bridge *br, + netdev_features_t features); +void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); +void br_manage_promisc(struct net_bridge *br); /* br_input.c */ -extern int br_handle_frame_finish(struct sk_buff *skb); -extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb); +int br_handle_frame_finish(struct sk_buff *skb); +rx_handler_result_t br_handle_frame(struct sk_buff **pskb); + +static inline bool br_rx_handler_check_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler) == br_handle_frame; +} + +static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) +{ + return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; +} /* br_ioctl.c */ -extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); +int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, + void __user *arg); + +/* br_multicast.c */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +extern unsigned int br_mdb_rehash_seq; +int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, + struct sk_buff *skb, u16 vid); +struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb, u16 vid); +void br_multicast_add_port(struct net_bridge_port *port); +void br_multicast_del_port(struct net_bridge_port *port); +void br_multicast_enable_port(struct net_bridge_port *port); +void br_multicast_disable_port(struct net_bridge_port *port); +void br_multicast_init(struct net_bridge *br); +void br_multicast_open(struct net_bridge *br); +void br_multicast_stop(struct net_bridge *br); +void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb); +void br_multicast_forward(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, struct sk_buff *skb2); +int br_multicast_set_router(struct net_bridge *br, unsigned long val); +int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); +int br_multicast_toggle(struct net_bridge *br, unsigned long val); +int br_multicast_set_querier(struct net_bridge *br, unsigned long val); +int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +struct net_bridge_mdb_entry * +br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst); +struct net_bridge_mdb_entry * +br_multicast_new_group(struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group); +void br_multicast_free_pg(struct rcu_head *head); +struct net_bridge_port_group * +br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, + struct net_bridge_port_group __rcu *next, + unsigned char state); +void br_mdb_init(void); +void br_mdb_uninit(void); +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type); + +#define mlock_dereference(X, br) \ + rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) + +static inline bool br_multicast_is_router(struct net_bridge *br) +{ + return br->multicast_router == 2 || + (br->multicast_router == 1 && + timer_pending(&br->multicast_router_timer)); +} + +static inline bool +__br_multicast_querier_exists(struct net_bridge *br, + struct bridge_mcast_other_query *querier) +{ + return time_is_before_jiffies(querier->delay_time) && + (br->multicast_querier || timer_pending(&querier->timer)); +} + +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) +{ + switch (eth->h_proto) { + case (htons(ETH_P_IP)): + return __br_multicast_querier_exists(br, &br->ip4_other_query); +#if IS_ENABLED(CONFIG_IPV6) + case (htons(ETH_P_IPV6)): + return __br_multicast_querier_exists(br, &br->ip6_other_query); +#endif + default: + return false; + } +} +#else +static inline int br_multicast_rcv(struct net_bridge *br, + struct net_bridge_port *port, + struct sk_buff *skb, + u16 vid) +{ + return 0; +} + +static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, + struct sk_buff *skb, u16 vid) +{ + return NULL; +} + +static inline void br_multicast_add_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_del_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_enable_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_disable_port(struct net_bridge_port *port) +{ +} + +static inline void br_multicast_init(struct net_bridge *br) +{ +} + +static inline void br_multicast_open(struct net_bridge *br) +{ +} + +static inline void br_multicast_stop(struct net_bridge *br) +{ +} + +static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb) +{ +} + +static inline void br_multicast_forward(struct net_bridge_mdb_entry *mdst, + struct sk_buff *skb, + struct sk_buff *skb2) +{ +} +static inline bool br_multicast_is_router(struct net_bridge *br) +{ + return 0; +} +static inline bool br_multicast_querier_exists(struct net_bridge *br, + struct ethhdr *eth) +{ + return false; +} +static inline void br_mdb_init(void) +{ +} +static inline void br_mdb_uninit(void) +{ +} +#endif + +/* br_vlan.c */ +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid); +bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v, + const struct sk_buff *skb); +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb); +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +int br_vlan_delete(struct net_bridge *br, u16 vid); +void br_vlan_flush(struct net_bridge *br); +bool br_vlan_find(struct net_bridge *br, u16 vid); +void br_recalculate_fwd_mask(struct net_bridge *br); +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); +int br_vlan_set_proto(struct net_bridge *br, unsigned long val); +void br_vlan_init(struct net_bridge *br); +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); +void nbp_vlan_flush(struct net_bridge_port *port); +bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return rcu_dereference_rtnl(br->vlan_info); +} + +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return rcu_dereference_rtnl(p->vlan_info); +} + +/* Since bridge now depends on 8021Q module, but the time bridge sees the + * skb, the vlan tag will always be present if the frame was tagged. + */ +static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) +{ + int err = 0; + + if (vlan_tx_tag_present(skb)) + *vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK; + else { + *vid = 0; + err = -EINVAL; + } + + return err; +} + +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if + * vid wasn't set + */ + smp_rmb(); + return v->pvid ?: VLAN_N_VID; +} + +static inline int br_vlan_enabled(struct net_bridge *br) +{ + return br->vlan_enabled; +} +#else +static inline bool br_allowed_ingress(struct net_bridge *br, + struct net_port_vlans *v, + struct sk_buff *skb, + u16 *vid) +{ + return true; +} + +static inline bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + return true; +} + +static inline bool br_should_learn(struct net_bridge_port *p, + struct sk_buff *skb, u16 *vid) +{ + return true; +} + +static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *v, + struct sk_buff *skb) +{ + return skb; +} + +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void br_vlan_flush(struct net_bridge *br) +{ +} + +static inline bool br_vlan_find(struct net_bridge *br, u16 vid) +{ + return false; +} + +static inline void br_recalculate_fwd_mask(struct net_bridge *br) +{ +} + +static inline void br_vlan_init(struct net_bridge *br) +{ +} + +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + return -EOPNOTSUPP; +} + +static inline int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + return -EOPNOTSUPP; +} + +static inline void nbp_vlan_flush(struct net_bridge_port *port) +{ +} + +static inline struct net_port_vlans *br_get_vlan_info( + const struct net_bridge *br) +{ + return NULL; +} +static inline struct net_port_vlans *nbp_get_vlan_info( + const struct net_bridge_port *p) +{ + return NULL; +} + +static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + return false; +} + +static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) +{ + return 0; +} +static inline u16 br_get_pvid(const struct net_port_vlans *v) +{ + return VLAN_N_VID; /* Returns invalid vid */ +} + +static inline int br_vlan_enabled(struct net_bridge *br) +{ + return 0; +} +#endif /* br_netfilter.c */ -extern int br_netfilter_init(void); -extern void br_netfilter_fini(void); +#ifdef CONFIG_BRIDGE_NETFILTER +int br_netfilter_init(void); +void br_netfilter_fini(void); +void br_netfilter_rtable_init(struct net_bridge *); +#else +#define br_netfilter_init() (0) +#define br_netfilter_fini() do { } while (0) +#define br_netfilter_rtable_init(x) +#endif /* br_stp.c */ -extern void br_log_state(const struct net_bridge_port *p); -extern struct net_bridge_port *br_get_port(struct net_bridge *br, - u16 port_no); -extern void br_init_port(struct net_bridge_port *p); -extern void br_become_designated_port(struct net_bridge_port *p); +void br_log_state(const struct net_bridge_port *p); +struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); +void br_init_port(struct net_bridge_port *p); +void br_become_designated_port(struct net_bridge_port *p); + +void __br_set_forward_delay(struct net_bridge *br, unsigned long t); +int br_set_forward_delay(struct net_bridge *br, unsigned long x); +int br_set_hello_time(struct net_bridge *br, unsigned long x); +int br_set_max_age(struct net_bridge *br, unsigned long x); + /* br_stp_if.c */ -extern void br_stp_enable_bridge(struct net_bridge *br); -extern void br_stp_disable_bridge(struct net_bridge *br); -extern void br_stp_enable_port(struct net_bridge_port *p); -extern void br_stp_disable_port(struct net_bridge_port *p); -extern void br_stp_recalculate_bridge_id(struct net_bridge *br); -extern void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); -extern void br_stp_set_bridge_priority(struct net_bridge *br, - u16 newprio); -extern void br_stp_set_port_priority(struct net_bridge_port *p, - u8 newprio); -extern void br_stp_set_path_cost(struct net_bridge_port *p, - u32 path_cost); -extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); +void br_stp_enable_bridge(struct net_bridge *br); +void br_stp_disable_bridge(struct net_bridge *br); +void br_stp_set_enabled(struct net_bridge *br, unsigned long val); +void br_stp_enable_port(struct net_bridge_port *p); +void br_stp_disable_port(struct net_bridge_port *p); +bool br_stp_recalculate_bridge_id(struct net_bridge *br); +void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *a); +void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio); +int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio); +int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost); +ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ -extern int br_stp_handle_bpdu(struct sk_buff *skb); +struct stp_proto; +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev); /* br_stp_timer.c */ -extern void br_stp_timer_init(struct net_bridge *br); -extern void br_stp_port_timer_init(struct net_bridge_port *p); -extern unsigned long br_timer_value(const struct timer_list *timer); +void br_stp_timer_init(struct net_bridge *br); +void br_stp_port_timer_init(struct net_bridge_port *p); +unsigned long br_timer_value(const struct timer_list *timer); /* br.c */ -extern struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, - unsigned char *addr); -extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); +#if IS_ENABLED(CONFIG_ATM_LANE) +extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr); +#endif +/* br_netlink.c */ +extern struct rtnl_link_ops br_link_ops; +int br_netlink_init(void); +void br_netlink_fini(void); +void br_ifinfo_notify(int event, struct net_bridge_port *port); +int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg); +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, + u32 filter_mask); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ -extern int br_sysfs_addif(struct net_bridge_port *p); -extern void br_sysfs_removeif(struct net_bridge_port *p); -extern void br_sysfs_freeif(struct net_bridge_port *p); +extern const struct sysfs_ops brport_sysfs_ops; +int br_sysfs_addif(struct net_bridge_port *p); +int br_sysfs_renameif(struct net_bridge_port *p); /* br_sysfs_br.c */ -extern int br_sysfs_addbr(struct net_device *dev); -extern void br_sysfs_delbr(struct net_device *dev); +int br_sysfs_addbr(struct net_device *dev); +void br_sysfs_delbr(struct net_device *dev); #else -#define br_sysfs_addif(p) (0) -#define br_sysfs_removeif(p) do { } while(0) -#define br_sysfs_freeif(p) kfree(p) -#define br_sysfs_addbr(dev) (0) -#define br_sysfs_delbr(dev) do { } while(0) +static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } +static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ #endif diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h index e29f01ac1ad..2fe910c4e17 100644 --- a/net/bridge/br_private_stp.h +++ b/net/bridge/br_private_stp.h @@ -4,8 +4,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_private_stp.h,v 1.3 2001/02/05 06:03:47 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -18,10 +16,22 @@ #define BPDU_TYPE_CONFIG 0 #define BPDU_TYPE_TCN 0x80 -struct br_config_bpdu -{ - unsigned topology_change:1; - unsigned topology_change_ack:1; +/* IEEE 802.1D-1998 timer values */ +#define BR_MIN_HELLO_TIME (1*HZ) +#define BR_MAX_HELLO_TIME (10*HZ) + +#define BR_MIN_FORWARD_DELAY (2*HZ) +#define BR_MAX_FORWARD_DELAY (30*HZ) + +#define BR_MIN_MAX_AGE (6*HZ) +#define BR_MAX_MAX_AGE (40*HZ) + +#define BR_MIN_PATH_COST 1 +#define BR_MAX_PATH_COST 65535 + +struct br_config_bpdu { + unsigned int topology_change:1; + unsigned int topology_change_ack:1; bridge_id root; int root_path_cost; bridge_id bridge_id; @@ -41,18 +51,19 @@ static inline int br_is_designated_port(const struct net_bridge_port *p) /* br_stp.c */ -extern void br_become_root_bridge(struct net_bridge *br); -extern void br_config_bpdu_generation(struct net_bridge *); -extern void br_configuration_update(struct net_bridge *); -extern void br_port_state_selection(struct net_bridge *); -extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu); -extern void br_received_tcn_bpdu(struct net_bridge_port *p); -extern void br_transmit_config(struct net_bridge_port *p); -extern void br_transmit_tcn(struct net_bridge *br); -extern void br_topology_change_detection(struct net_bridge *br); +void br_become_root_bridge(struct net_bridge *br); +void br_config_bpdu_generation(struct net_bridge *); +void br_configuration_update(struct net_bridge *); +void br_port_state_selection(struct net_bridge *); +void br_received_config_bpdu(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu); +void br_received_tcn_bpdu(struct net_bridge_port *p); +void br_transmit_config(struct net_bridge_port *p); +void br_transmit_tcn(struct net_bridge *br); +void br_topology_change_detection(struct net_bridge *br); /* br_stp_bpdu.c */ -extern void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); -extern void br_send_tcn_bpdu(struct net_bridge_port *); +void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); +void br_send_tcn_bpdu(struct net_bridge_port *); #endif diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 04ca0639a95..3c86f0538cb 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -5,38 +5,35 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp.c,v 1.4 2000/06/19 10:13:35 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include <linux/kernel.h> -#include <linux/smp_lock.h> +#include <linux/rculist.h> #include "br_private.h" #include "br_private_stp.h" /* since time values in bpdu are in jiffies and then scaled (1/256) - * before sending, make sure that is at least one. + * before sending, make sure that is at least one STP tick. */ -#define MESSAGE_AGE_INCR ((HZ < 256) ? 1 : (HZ/256)) +#define MESSAGE_AGE_INCR ((HZ / 256) + 1) -static const char *br_port_state_names[] = { - [BR_STATE_DISABLED] = "disabled", +static const char *const br_port_state_names[] = { + [BR_STATE_DISABLED] = "disabled", [BR_STATE_LISTENING] = "listening", - [BR_STATE_LEARNING] = "learning", - [BR_STATE_FORWARDING] = "forwarding", + [BR_STATE_LEARNING] = "learning", + [BR_STATE_FORWARDING] = "forwarding", [BR_STATE_BLOCKING] = "blocking", }; void br_log_state(const struct net_bridge_port *p) { - pr_info("%s: port %d(%s) entering %s state\n", - p->br->dev->name, p->port_no, p->dev->name, + br_info(p->br, "port %u(%s) entered %s state\n", + (unsigned int) p->port_no, p->dev->name, br_port_state_names[p->state]); - } /* called under bridge lock */ @@ -53,7 +50,7 @@ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) } /* called under bridge lock */ -static int br_should_become_root_port(const struct net_bridge_port *p, +static int br_should_become_root_port(const struct net_bridge_port *p, u16 root_port) { struct net_bridge *br; @@ -103,6 +100,21 @@ static int br_should_become_root_port(const struct net_bridge_port *p, return 0; } +static void br_root_port_block(const struct net_bridge *br, + struct net_bridge_port *p) +{ + + br_notice(br, "port %u(%s) tried to become root port (blocked)", + (unsigned int) p->port_no, p->dev->name); + + p->state = BR_STATE_LISTENING; + br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + + if (br->forward_delay > 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); +} + /* called under bridge lock */ static void br_root_selection(struct net_bridge *br) { @@ -110,9 +122,13 @@ static void br_root_selection(struct net_bridge *br) u16 root_port = 0; list_for_each_entry(p, &br->port_list, list) { - if (br_should_become_root_port(p, root_port)) - root_port = p->port_no; + if (!br_should_become_root_port(p, root_port)) + continue; + if (p->flags & BR_ROOT_BLOCK) + br_root_port_block(br, p); + else + root_port = p->port_no; } br->root_port = root_port; @@ -148,7 +164,6 @@ void br_transmit_config(struct net_bridge_port *p) struct br_config_bpdu bpdu; struct net_bridge *br; - if (timer_pending(&p->hold_timer)) { p->config_pending = 1; return; @@ -167,8 +182,7 @@ void br_transmit_config(struct net_bridge_port *p) else { struct net_bridge_port *root = br_get_port(br, br->root_port); - bpdu.message_age = br->max_age - - (root->message_age_timer.expires - jiffies) + bpdu.message_age = (jiffies - root->designated_age) + MESSAGE_AGE_INCR; } bpdu.max_age = br->max_age; @@ -179,25 +193,27 @@ void br_transmit_config(struct net_bridge_port *p) br_send_config_bpdu(p, &bpdu); p->topology_change_ack = 0; p->config_pending = 0; - mod_timer(&p->hold_timer, jiffies + BR_HOLD_TIME); + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); } } /* called under bridge lock */ -static inline void br_record_config_information(struct net_bridge_port *p, - const struct br_config_bpdu *bpdu) +static void br_record_config_information(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { p->designated_root = bpdu->root; p->designated_cost = bpdu->root_path_cost; p->designated_bridge = bpdu->bridge_id; p->designated_port = bpdu->port_id; + p->designated_age = jiffies - bpdu->message_age; - mod_timer(&p->message_age_timer, jiffies - + (p->br->max_age - bpdu->message_age)); + mod_timer(&p->message_age_timer, jiffies + + (bpdu->max_age - bpdu->message_age)); } /* called under bridge lock */ -static inline void br_record_config_timeout_values(struct net_bridge *br, +static void br_record_config_timeout_values(struct net_bridge *br, const struct br_config_bpdu *bpdu) { br->max_age = bpdu->max_age; @@ -209,7 +225,14 @@ static inline void br_record_config_timeout_values(struct net_bridge *br, /* called under bridge lock */ void br_transmit_tcn(struct net_bridge *br) { - br_send_tcn_bpdu(br_get_port(br, br->root_port)); + struct net_bridge_port *p; + + p = br_get_port(br, br->root_port); + if (p) + br_send_tcn_bpdu(p); + else + br_notice(br, "root port %u not found for topology notice\n", + br->root_port); } /* called under bridge lock */ @@ -256,7 +279,8 @@ static void br_designated_port_selection(struct net_bridge *br) } /* called under bridge lock */ -static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +static int br_supersedes_port_info(const struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { int t; @@ -287,7 +311,7 @@ static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_b } /* called under bridge lock */ -static inline void br_topology_change_acknowledged(struct net_bridge *br) +static void br_topology_change_acknowledged(struct net_bridge *br) { br->topology_change_detected = 0; del_timer(&br->tcn_timer); @@ -298,7 +322,10 @@ void br_topology_change_detection(struct net_bridge *br) { int isroot = br_is_root_bridge(br); - pr_info("%s: topology change detected, %s\n", br->dev->name, + if (br->stp_enabled != BR_KERNEL_STP) + return; + + br_info(br, "topology change detected, %s\n", isroot ? "propagating" : "sending tcn bpdu"); if (isroot) { @@ -326,7 +353,7 @@ void br_config_bpdu_generation(struct net_bridge *br) } /* called under bridge lock */ -static inline void br_reply(struct net_bridge_port *p) +static void br_reply(struct net_bridge_port *p) { br_transmit_config(p); } @@ -362,6 +389,8 @@ static void br_make_blocking(struct net_bridge_port *p) p->state = BR_STATE_BLOCKING; br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + del_timer(&p->forward_delay_timer); } } @@ -369,23 +398,40 @@ static void br_make_blocking(struct net_bridge_port *p) /* called under bridge lock */ static void br_make_forwarding(struct net_bridge_port *p) { - if (p->state == BR_STATE_BLOCKING) { - if (p->br->stp_enabled) { - p->state = BR_STATE_LISTENING; - } else { - p->state = BR_STATE_LEARNING; - } - br_log_state(p); - mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); } + struct net_bridge *br = p->br; + + if (p->state != BR_STATE_BLOCKING) + return; + + if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { + p->state = BR_STATE_FORWARDING; + br_topology_change_detection(br); + del_timer(&p->forward_delay_timer); + } else if (br->stp_enabled == BR_KERNEL_STP) + p->state = BR_STATE_LISTENING; + else + p->state = BR_STATE_LEARNING; + + br_multicast_enable_port(p); + br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + + if (br->forward_delay != 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); } /* called under bridge lock */ void br_port_state_selection(struct net_bridge *br) { struct net_bridge_port *p; + unsigned int liveports = 0; list_for_each_entry(p, &br->port_list, list) { - if (p->state != BR_STATE_DISABLED) { + if (p->state == BR_STATE_DISABLED) + continue; + + /* Don't change port states if userspace is handling STP */ + if (br->stp_enabled != BR_USER_STP) { if (p->port_no == br->root_port) { p->config_pending = 0; p->topology_change_ack = 0; @@ -400,22 +446,30 @@ void br_port_state_selection(struct net_bridge *br) } } + if (p->state == BR_STATE_FORWARDING) + ++liveports; } + + if (liveports == 0) + netif_carrier_off(br->dev); + else + netif_carrier_on(br->dev); } /* called under bridge lock */ -static inline void br_topology_change_acknowledge(struct net_bridge_port *p) +static void br_topology_change_acknowledge(struct net_bridge_port *p) { p->topology_change_ack = 1; br_transmit_config(p); } /* called under bridge lock */ -void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +void br_received_config_bpdu(struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { struct net_bridge *br; int was_root; - + br = p->br; was_root = br_is_root_bridge(br); @@ -430,7 +484,7 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b del_timer(&br->topology_change_timer); br_transmit_tcn(br); - mod_timer(&br->tcn_timer, + mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); } } @@ -441,8 +495,8 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b if (bpdu->topology_change_ack) br_topology_change_acknowledged(br); } - } else if (br_is_designated_port(p)) { - br_reply(p); + } else if (br_is_designated_port(p)) { + br_reply(p); } } @@ -450,10 +504,67 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b void br_received_tcn_bpdu(struct net_bridge_port *p) { if (br_is_designated_port(p)) { - pr_info("%s: received tcn bpdu on port %i(%s)\n", - p->br->dev->name, p->port_no, p->dev->name); + br_info(p->br, "port %u(%s) received tcn bpdu\n", + (unsigned int) p->port_no, p->dev->name); br_topology_change_detection(p->br); br_topology_change_acknowledge(p); } } + +/* Change bridge STP parameter */ +int br_set_hello_time(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + + if (t < BR_MIN_HELLO_TIME || t > BR_MAX_HELLO_TIME) + return -ERANGE; + + spin_lock_bh(&br->lock); + br->bridge_hello_time = t; + if (br_is_root_bridge(br)) + br->hello_time = br->bridge_hello_time; + spin_unlock_bh(&br->lock); + return 0; +} + +int br_set_max_age(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + + if (t < BR_MIN_MAX_AGE || t > BR_MAX_MAX_AGE) + return -ERANGE; + + spin_lock_bh(&br->lock); + br->bridge_max_age = t; + if (br_is_root_bridge(br)) + br->max_age = br->bridge_max_age; + spin_unlock_bh(&br->lock); + return 0; + +} + +void __br_set_forward_delay(struct net_bridge *br, unsigned long t) +{ + br->bridge_forward_delay = t; + if (br_is_root_bridge(br)) + br->forward_delay = br->bridge_forward_delay; +} + +int br_set_forward_delay(struct net_bridge *br, unsigned long val) +{ + unsigned long t = clock_t_to_jiffies(val); + int err = -ERANGE; + + spin_lock_bh(&br->lock); + if (br->stp_enabled != BR_NO_STP && + (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) + goto unlock; + + __br_set_forward_delay(br, t); + err = 0; + +unlock: + spin_unlock_bh(&br->lock); + return err; +} diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index d071f1c9ad0..bdb459d21ad 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_bpdu.c,v 1.3 2001/11/10 02:35:25 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -15,152 +13,179 @@ #include <linux/kernel.h> #include <linux/netfilter_bridge.h> +#include <linux/etherdevice.h> +#include <linux/llc.h> +#include <linux/slab.h> +#include <linux/pkt_sched.h> +#include <net/net_namespace.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/stp.h> +#include <asm/unaligned.h> #include "br_private.h" #include "br_private_stp.h" -#define JIFFIES_TO_TICKS(j) (((j) << 8) / HZ) -#define TICKS_TO_JIFFIES(j) (((j) * HZ) >> 8) +#define STP_HZ 256 + +#define LLC_RESERVE sizeof(struct llc_pdu_un) -static void br_send_bpdu(struct net_bridge_port *p, unsigned char *data, int length) +static void br_send_bpdu(struct net_bridge_port *p, + const unsigned char *data, int length) { - struct net_device *dev; struct sk_buff *skb; - int size; - if (!p->br->stp_enabled) + skb = dev_alloc_skb(length+LLC_RESERVE); + if (!skb) return; - size = length + 2*ETH_ALEN + 2; - if (size < 60) - size = 60; + skb->dev = p->dev; + skb->protocol = htons(ETH_P_802_2); + skb->priority = TC_PRIO_CONTROL; - dev = p->dev; + skb_reserve(skb, LLC_RESERVE); + memcpy(__skb_put(skb, length), data, length); - if ((skb = dev_alloc_skb(size)) == NULL) { - printk(KERN_INFO "br: memory squeeze!\n"); - return; - } + llc_pdu_header_init(skb, LLC_PDU_TYPE_U, LLC_SAP_BSPAN, + LLC_SAP_BSPAN, LLC_PDU_CMD); + llc_pdu_init_as_ui_cmd(skb); - skb->dev = dev; - skb->protocol = htons(ETH_P_802_2); - skb->mac.raw = skb_put(skb, size); - memcpy(skb->mac.raw, bridge_ula, ETH_ALEN); - memcpy(skb->mac.raw+ETH_ALEN, dev->dev_addr, ETH_ALEN); - skb->mac.raw[2*ETH_ALEN] = 0; - skb->mac.raw[2*ETH_ALEN+1] = length; - skb->nh.raw = skb->mac.raw + 2*ETH_ALEN + 2; - memcpy(skb->nh.raw, data, length); - memset(skb->nh.raw + length, 0xa5, size - length - 2*ETH_ALEN - 2); - - NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); + + skb_reset_mac_header(skb); + + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); } -static __inline__ void br_set_ticks(unsigned char *dest, int jiff) +static inline void br_set_ticks(unsigned char *dest, int j) { - __u16 ticks; + unsigned long ticks = (STP_HZ * j)/ HZ; - ticks = JIFFIES_TO_TICKS(jiff); - dest[0] = (ticks >> 8) & 0xFF; - dest[1] = ticks & 0xFF; + put_unaligned_be16(ticks, dest); } -static __inline__ int br_get_ticks(unsigned char *dest) +static inline int br_get_ticks(const unsigned char *src) { - return TICKS_TO_JIFFIES((dest[0] << 8) | dest[1]); + unsigned long ticks = get_unaligned_be16(src); + + return DIV_ROUND_UP(ticks * HZ, STP_HZ); } /* called under bridge lock */ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) { - unsigned char buf[38]; - - buf[0] = 0x42; - buf[1] = 0x42; - buf[2] = 0x03; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = BPDU_TYPE_CONFIG; - buf[7] = (bpdu->topology_change ? 0x01 : 0) | + unsigned char buf[35]; + + if (p->br->stp_enabled != BR_KERNEL_STP) + return; + + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = BPDU_TYPE_CONFIG; + buf[4] = (bpdu->topology_change ? 0x01 : 0) | (bpdu->topology_change_ack ? 0x80 : 0); - buf[8] = bpdu->root.prio[0]; - buf[9] = bpdu->root.prio[1]; - buf[10] = bpdu->root.addr[0]; - buf[11] = bpdu->root.addr[1]; - buf[12] = bpdu->root.addr[2]; - buf[13] = bpdu->root.addr[3]; - buf[14] = bpdu->root.addr[4]; - buf[15] = bpdu->root.addr[5]; - buf[16] = (bpdu->root_path_cost >> 24) & 0xFF; - buf[17] = (bpdu->root_path_cost >> 16) & 0xFF; - buf[18] = (bpdu->root_path_cost >> 8) & 0xFF; - buf[19] = bpdu->root_path_cost & 0xFF; - buf[20] = bpdu->bridge_id.prio[0]; - buf[21] = bpdu->bridge_id.prio[1]; - buf[22] = bpdu->bridge_id.addr[0]; - buf[23] = bpdu->bridge_id.addr[1]; - buf[24] = bpdu->bridge_id.addr[2]; - buf[25] = bpdu->bridge_id.addr[3]; - buf[26] = bpdu->bridge_id.addr[4]; - buf[27] = bpdu->bridge_id.addr[5]; - buf[28] = (bpdu->port_id >> 8) & 0xFF; - buf[29] = bpdu->port_id & 0xFF; - - br_set_ticks(buf+30, bpdu->message_age); - br_set_ticks(buf+32, bpdu->max_age); - br_set_ticks(buf+34, bpdu->hello_time); - br_set_ticks(buf+36, bpdu->forward_delay); - - br_send_bpdu(p, buf, 38); + buf[5] = bpdu->root.prio[0]; + buf[6] = bpdu->root.prio[1]; + buf[7] = bpdu->root.addr[0]; + buf[8] = bpdu->root.addr[1]; + buf[9] = bpdu->root.addr[2]; + buf[10] = bpdu->root.addr[3]; + buf[11] = bpdu->root.addr[4]; + buf[12] = bpdu->root.addr[5]; + buf[13] = (bpdu->root_path_cost >> 24) & 0xFF; + buf[14] = (bpdu->root_path_cost >> 16) & 0xFF; + buf[15] = (bpdu->root_path_cost >> 8) & 0xFF; + buf[16] = bpdu->root_path_cost & 0xFF; + buf[17] = bpdu->bridge_id.prio[0]; + buf[18] = bpdu->bridge_id.prio[1]; + buf[19] = bpdu->bridge_id.addr[0]; + buf[20] = bpdu->bridge_id.addr[1]; + buf[21] = bpdu->bridge_id.addr[2]; + buf[22] = bpdu->bridge_id.addr[3]; + buf[23] = bpdu->bridge_id.addr[4]; + buf[24] = bpdu->bridge_id.addr[5]; + buf[25] = (bpdu->port_id >> 8) & 0xFF; + buf[26] = bpdu->port_id & 0xFF; + + br_set_ticks(buf+27, bpdu->message_age); + br_set_ticks(buf+29, bpdu->max_age); + br_set_ticks(buf+31, bpdu->hello_time); + br_set_ticks(buf+33, bpdu->forward_delay); + + br_send_bpdu(p, buf, 35); } /* called under bridge lock */ void br_send_tcn_bpdu(struct net_bridge_port *p) { - unsigned char buf[7]; - - buf[0] = 0x42; - buf[1] = 0x42; - buf[2] = 0x03; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = BPDU_TYPE_TCN; - br_send_bpdu(p, buf, 7); -} + unsigned char buf[4]; -static const unsigned char header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + if (p->br->stp_enabled != BR_KERNEL_STP) + return; -/* NO locks */ -int br_stp_handle_bpdu(struct sk_buff *skb) + buf[0] = 0; + buf[1] = 0; + buf[2] = 0; + buf[3] = BPDU_TYPE_TCN; + br_send_bpdu(p, buf, 4); +} + +/* + * Called from llc. + * + * NO locks, but rcu_read_lock + */ +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) { - struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; - unsigned char *buf; + const unsigned char *dest = eth_hdr(skb)->h_dest; + struct net_bridge_port *p; + struct net_bridge *br; + const unsigned char *buf; - /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + if (!pskb_may_pull(skb, 4)) + goto err; - /* need at least the 802 and STP headers */ - if (!pskb_may_pull(skb, sizeof(header)+1) || - memcmp(skb->data, header, sizeof(header))) + /* compare of protocol id and version */ + buf = skb->data; + if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; - buf = skb_pull(skb, sizeof(header)); + p = br_port_get_check_rcu(dev); + if (!p) + goto err; - spin_lock_bh(&br->lock); - if (p->state == BR_STATE_DISABLED - || !(br->dev->flags & IFF_UP) - || !br->stp_enabled) + br = p->br; + spin_lock(&br->lock); + + if (br->stp_enabled != BR_KERNEL_STP) goto out; + if (!(br->dev->flags & IFF_UP)) + goto out; + + if (p->state == BR_STATE_DISABLED) + goto out; + + if (!ether_addr_equal(dest, br->group_addr)) + goto out; + + if (p->flags & BR_BPDU_GUARD) { + br_notice(br, "BPDU received on blocked port %u(%s)\n", + (unsigned int) p->port_no, p->dev->name); + br_stp_disable_port(p); + goto out; + } + + buf = skb_pull(skb, 3); + if (buf[0] == BPDU_TYPE_CONFIG) { struct br_config_bpdu bpdu; if (!pskb_may_pull(skb, 32)) - goto out; + goto out; buf = skb->data; bpdu.topology_change = (buf[1] & 0x01) ? 1 : 0; @@ -194,15 +219,23 @@ int br_stp_handle_bpdu(struct sk_buff *skb) bpdu.hello_time = br_get_ticks(buf+28); bpdu.forward_delay = br_get_ticks(buf+30); - br_received_config_bpdu(p, &bpdu); - } + if (bpdu.message_age > bpdu.max_age) { + if (net_ratelimit()) + br_notice(p->br, + "port %u config from %pM" + " (message_age %ul > max_age %ul)\n", + p->port_no, + eth_hdr(skb)->h_source, + bpdu.message_age, bpdu.max_age); + goto out; + } - else if (buf[0] == BPDU_TYPE_TCN) { + br_received_config_bpdu(p, &bpdu); + } else if (buf[0] == BPDU_TYPE_TCN) { br_received_tcn_bpdu(p); } out: - spin_unlock_bh(&br->lock); + spin_unlock(&br->lock); err: kfree_skb(skb); - return 0; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index cc047f7fb6e..189ba1e7d85 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_if.c,v 1.4 2001/04/14 21:14:39 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -14,23 +12,26 @@ */ #include <linux/kernel.h> -#include <linux/smp_lock.h> +#include <linux/kmod.h> #include <linux/etherdevice.h> +#include <linux/rtnetlink.h> #include "br_private.h" #include "br_private_stp.h" /* Port id is composed of priority and port number. - * NB: least significant bits of priority are dropped to + * NB: some bits of priority are dropped to * make room for more ports. */ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) { - return ((u16)priority << BR_PORT_BITS) + return ((u16)priority << BR_PORT_BITS) | (port_no & ((1<<BR_PORT_BITS)-1)); } +#define BR_MAX_PORT_PRIORITY ((u16)~0 >> BR_PORT_BITS) + /* called under bridge lock */ void br_init_port(struct net_bridge_port *p) { @@ -39,8 +40,6 @@ void br_init_port(struct net_bridge_port *p) p->state = BR_STATE_BLOCKING; p->topology_change_ack = 0; p->config_pending = 0; - - br_stp_port_timer_init(p); } /* called under bridge lock */ @@ -51,11 +50,11 @@ void br_stp_enable_bridge(struct net_bridge *br) spin_lock_bh(&br->lock); mod_timer(&br->hello_timer, jiffies + br->hello_time); mod_timer(&br->gc_timer, jiffies + HZ/10); - + br_config_bpdu_generation(br); list_for_each_entry(p, &br->port_list, list) { - if ((p->dev->flags & IFF_UP) && netif_carrier_ok(p->dev)) + if (netif_running(p->dev) && netif_oper_up(p->dev)) br_stp_enable_port(p); } @@ -67,7 +66,7 @@ void br_stp_disable_bridge(struct net_bridge *br) { struct net_bridge_port *p; - spin_lock(&br->lock); + spin_lock_bh(&br->lock); list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED) br_stp_disable_port(p); @@ -76,7 +75,7 @@ void br_stp_disable_bridge(struct net_bridge *br) br->topology_change = 0; br->topology_change_detected = 0; - spin_unlock(&br->lock); + spin_unlock_bh(&br->lock); del_timer_sync(&br->hello_timer); del_timer_sync(&br->topology_change_timer); @@ -89,28 +88,32 @@ void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); br_port_state_selection(p->br); + br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); } /* called under bridge lock */ void br_stp_disable_port(struct net_bridge_port *p) { - struct net_bridge *br; + struct net_bridge *br = p->br; int wasroot; - br = p->br; - printk(KERN_INFO "%s: port %i(%s) entering %s state\n", - br->dev->name, p->port_no, p->dev->name, "disabled"); - wasroot = br_is_root_bridge(br); br_become_designated_port(p); p->state = BR_STATE_DISABLED; p->topology_change_ack = 0; p->config_pending = 0; + br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + del_timer(&p->message_age_timer); del_timer(&p->forward_delay_timer); del_timer(&p->hold_timer); + br_fdb_delete_by_port(br, p, 0); + br_multicast_disable_port(p); + br_configuration_update(br); br_port_state_selection(br); @@ -119,26 +122,90 @@ void br_stp_disable_port(struct net_bridge_port *p) br_become_root_bridge(br); } +static void br_stp_start(struct net_bridge *br) +{ + int r; + char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; + char *envp[] = { NULL }; + + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + + spin_lock_bh(&br->lock); + + if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); + else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); + + if (r == 0) { + br->stp_enabled = BR_USER_STP; + br_debug(br, "userspace STP started\n"); + } else { + br->stp_enabled = BR_KERNEL_STP; + br_debug(br, "using kernel STP\n"); + + /* To start timers on any ports left in blocking */ + br_port_state_selection(br); + } + + spin_unlock_bh(&br->lock); +} + +static void br_stp_stop(struct net_bridge *br) +{ + int r; + char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; + char *envp[] = { NULL }; + + if (br->stp_enabled == BR_USER_STP) { + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + br_info(br, "userspace STP stopped, return code %d\n", r); + + /* To start timers on any ports left in blocking */ + spin_lock_bh(&br->lock); + br_port_state_selection(br); + spin_unlock_bh(&br->lock); + } + + br->stp_enabled = BR_NO_STP; +} + +void br_stp_set_enabled(struct net_bridge *br, unsigned long val) +{ + ASSERT_RTNL(); + + if (val) { + if (br->stp_enabled == BR_NO_STP) + br_stp_start(br); + } else { + if (br->stp_enabled != BR_NO_STP) + br_stp_stop(br); + } +} + /* called under bridge lock */ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) { - unsigned char oldaddr[6]; + /* should be aligned on 2 bytes for ether_addr_equal() */ + unsigned short oldaddr_aligned[ETH_ALEN >> 1]; + unsigned char *oldaddr = (unsigned char *)oldaddr_aligned; struct net_bridge_port *p; int wasroot; wasroot = br_is_root_bridge(br); + br_fdb_change_mac_address(br, addr); + memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN); memcpy(br->bridge_id.addr, addr, ETH_ALEN); memcpy(br->dev->dev_addr, addr, ETH_ALEN); list_for_each_entry(p, &br->port_list, list) { - if (!compare_ether_addr(p->designated_bridge.addr, oldaddr)) + if (ether_addr_equal(p->designated_bridge.addr, oldaddr)) memcpy(p->designated_bridge.addr, addr, ETH_ALEN); - if (!compare_ether_addr(p->designated_root.addr, oldaddr)) + if (ether_addr_equal(p->designated_root.addr, oldaddr)) memcpy(p->designated_root.addr, addr, ETH_ALEN); - } br_configuration_update(br); @@ -147,14 +214,21 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr) br_become_root_bridge(br); } -static const unsigned char br_mac_zero[6]; +/* should be aligned on 2 bytes for ether_addr_equal() */ +static const unsigned short br_mac_zero_aligned[ETH_ALEN >> 1]; /* called under bridge lock */ -void br_stp_recalculate_bridge_id(struct net_bridge *br) +bool br_stp_recalculate_bridge_id(struct net_bridge *br) { + const unsigned char *br_mac_zero = + (const unsigned char *)br_mac_zero_aligned; const unsigned char *addr = br_mac_zero; struct net_bridge_port *p; + /* user has chosen a value so keep it */ + if (br->dev->addr_assign_type == NET_ADDR_SET) + return false; + list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) @@ -162,8 +236,11 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br) } - if (compare_ether_addr(br->bridge_id.addr, addr)) - br_stp_change_bridge_id(br, addr); + if (ether_addr_equal(br->bridge_id.addr, addr)) + return false; /* no change */ + + br_stp_change_bridge_id(br, addr); + return true; } /* called under bridge lock */ @@ -192,10 +269,14 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) } /* called under bridge lock */ -void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) +int br_stp_set_port_priority(struct net_bridge_port *p, unsigned long newprio) { - port_id new_port_id = br_make_port_id(newprio, p->port_no); + port_id new_port_id; + + if (newprio > BR_MAX_PORT_PRIORITY) + return -ERANGE; + new_port_id = br_make_port_id(newprio, p->port_no); if (br_is_designated_port(p)) p->designated_port = new_port_id; @@ -206,14 +287,22 @@ void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) br_become_designated_port(p); br_port_state_selection(p->br); } + + return 0; } /* called under bridge lock */ -void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost) +int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost) { + if (path_cost < BR_MIN_PATH_COST || + path_cost > BR_MAX_PATH_COST) + return -ERANGE; + + p->flags |= BR_ADMIN_COST; p->path_cost = path_cost; br_configuration_update(p->br); br_port_state_selection(p->br); + return 0; } ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id) diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 9bef55f5642..558c46d19e0 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -5,8 +5,6 @@ * Authors: * Lennert Buytenhek <buytenh@gnu.org> * - * $Id: br_stp_timer.c,v 1.3 2000/05/05 02:17:17 davem Exp $ - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -15,7 +13,6 @@ #include <linux/kernel.h> #include <linux/times.h> -#include <linux/smp_lock.h> #include "br_private.h" #include "br_private_stp.h" @@ -27,7 +24,7 @@ static int br_is_designated_for_some_port(const struct net_bridge *br) list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED && - !memcmp(&p->designated_bridge, &br->bridge_id, 8)) + !memcmp(&p->designated_bridge, &br->bridge_id, 8)) return 1; } @@ -37,15 +34,15 @@ static int br_is_designated_for_some_port(const struct net_bridge *br) static void br_hello_timer_expired(unsigned long arg) { struct net_bridge *br = (struct net_bridge *)arg; - - pr_debug("%s: hello timer expired\n", br->dev->name); - spin_lock_bh(&br->lock); + + br_debug(br, "hello timer expired\n"); + spin_lock(&br->lock); if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - mod_timer(&br->hello_timer, jiffies + br->hello_time); + mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); } - spin_unlock_bh(&br->lock); + spin_unlock(&br->lock); } static void br_message_age_timer_expired(unsigned long arg) @@ -58,20 +55,16 @@ static void br_message_age_timer_expired(unsigned long arg) if (p->state == BR_STATE_DISABLED) return; - - pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n", - br->dev->name, - id->prio[0], id->prio[1], - id->addr[0], id->addr[1], id->addr[2], - id->addr[3], id->addr[4], id->addr[5], - p->port_no, p->dev->name); + br_info(br, "port %u(%s) neighbor %.2x%.2x.%pM lost\n", + (unsigned int) p->port_no, p->dev->name, + id->prio[0], id->prio[1], &id->addr); /* * According to the spec, the message age timer cannot be * running when we are the root bridge. So.. this was_root * check is redundant. I'm leaving it in for now, though. */ - spin_lock_bh(&br->lock); + spin_lock(&br->lock); if (p->state == BR_STATE_DISABLED) goto unlock; was_root = br_is_root_bridge(br); @@ -82,7 +75,7 @@ static void br_message_age_timer_expired(unsigned long arg) if (br_is_root_bridge(br) && !was_root) br_become_root_bridge(br); unlock: - spin_unlock_bh(&br->lock); + spin_unlock(&br->lock); } static void br_forward_delay_timer_expired(unsigned long arg) @@ -90,9 +83,9 @@ static void br_forward_delay_timer_expired(unsigned long arg) struct net_bridge_port *p = (struct net_bridge_port *) arg; struct net_bridge *br = p->br; - pr_debug("%s: %d(%s) forward delay timer\n", - br->dev->name, p->port_no, p->dev->name); - spin_lock_bh(&br->lock); + br_debug(br, "port %u(%s) forward delay timer\n", + (unsigned int) p->port_no, p->dev->name); + spin_lock(&br->lock); if (p->state == BR_STATE_LISTENING) { p->state = BR_STATE_LEARNING; mod_timer(&p->forward_delay_timer, @@ -101,88 +94,81 @@ static void br_forward_delay_timer_expired(unsigned long arg) p->state = BR_STATE_FORWARDING; if (br_is_designated_for_some_port(br)) br_topology_change_detection(br); + netif_carrier_on(br->dev); } br_log_state(p); - spin_unlock_bh(&br->lock); + br_ifinfo_notify(RTM_NEWLINK, p); + spin_unlock(&br->lock); } static void br_tcn_timer_expired(unsigned long arg) { struct net_bridge *br = (struct net_bridge *) arg; - pr_debug("%s: tcn timer expired\n", br->dev->name); - spin_lock_bh(&br->lock); - if (br->dev->flags & IFF_UP) { + br_debug(br, "tcn timer expired\n"); + spin_lock(&br->lock); + if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) { br_transmit_tcn(br); - - mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); + + mod_timer(&br->tcn_timer, jiffies + br->bridge_hello_time); } - spin_unlock_bh(&br->lock); + spin_unlock(&br->lock); } static void br_topology_change_timer_expired(unsigned long arg) { struct net_bridge *br = (struct net_bridge *) arg; - pr_debug("%s: topo change timer expired\n", br->dev->name); - spin_lock_bh(&br->lock); + br_debug(br, "topo change timer expired\n"); + spin_lock(&br->lock); br->topology_change_detected = 0; br->topology_change = 0; - spin_unlock_bh(&br->lock); + spin_unlock(&br->lock); } static void br_hold_timer_expired(unsigned long arg) { struct net_bridge_port *p = (struct net_bridge_port *) arg; - pr_debug("%s: %d(%s) hold timer expired\n", - p->br->dev->name, p->port_no, p->dev->name); + br_debug(p->br, "port %u(%s) hold timer expired\n", + (unsigned int) p->port_no, p->dev->name); - spin_lock_bh(&p->br->lock); + spin_lock(&p->br->lock); if (p->config_pending) br_transmit_config(p); - spin_unlock_bh(&p->br->lock); -} - -static inline void br_timer_init(struct timer_list *timer, - void (*_function)(unsigned long), - unsigned long _data) -{ - init_timer(timer); - timer->function = _function; - timer->data = _data; + spin_unlock(&p->br->lock); } void br_stp_timer_init(struct net_bridge *br) { - br_timer_init(&br->hello_timer, br_hello_timer_expired, + setup_timer(&br->hello_timer, br_hello_timer_expired, (unsigned long) br); - br_timer_init(&br->tcn_timer, br_tcn_timer_expired, + setup_timer(&br->tcn_timer, br_tcn_timer_expired, (unsigned long) br); - br_timer_init(&br->topology_change_timer, + setup_timer(&br->topology_change_timer, br_topology_change_timer_expired, (unsigned long) br); - br_timer_init(&br->gc_timer, br_fdb_cleanup, (unsigned long) br); + setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br); } void br_stp_port_timer_init(struct net_bridge_port *p) { - br_timer_init(&p->message_age_timer, br_message_age_timer_expired, + setup_timer(&p->message_age_timer, br_message_age_timer_expired, (unsigned long) p); - br_timer_init(&p->forward_delay_timer, br_forward_delay_timer_expired, + setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired, (unsigned long) p); - - br_timer_init(&p->hold_timer, br_hold_timer_expired, + + setup_timer(&p->hold_timer, br_hold_timer_expired, (unsigned long) p); -} +} /* Report ticks left (in USER_HZ) used for API */ unsigned long br_timer_value(const struct timer_list *timer) { return timer_pending(timer) - ? jiffies_to_clock_t(timer->expires - jiffies) : 0; + ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0; } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 6f577f16c4c..c9e2572b15f 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -1,5 +1,5 @@ /* - * Sysfs attributes of bridge ports + * Sysfs attributes of bridge * Linux ethernet bridge * * Authors: @@ -14,6 +14,7 @@ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/if_bridge.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> @@ -21,244 +22,756 @@ #include "br_private.h" -#define to_class_dev(obj) container_of(obj,struct class_device,kobj) -#define to_net_dev(class) container_of(class, struct net_device, class_dev) -#define to_bridge(cd) ((struct net_bridge *)(to_net_dev(cd)->priv)) +#define to_dev(obj) container_of(obj, struct device, kobj) +#define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* * Common code for storing bridge parameters. */ -static ssize_t store_bridge_parm(struct class_device *cd, +static ssize_t store_bridge_parm(struct device *d, const char *buf, size_t len, - void (*set)(struct net_bridge *, unsigned long)) + int (*set)(struct net_bridge *, unsigned long)) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); char *endp; unsigned long val; + int err; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; val = simple_strtoul(buf, &endp, 0); if (endp == buf) return -EINVAL; - spin_lock_bh(&br->lock); - (*set)(br, val); - spin_unlock_bh(&br->lock); - return len; + err = (*set)(br, val); + return err ? err : len; } -static ssize_t show_forward_delay(struct class_device *cd, char *buf) +static ssize_t forward_delay_show(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static void set_forward_delay(struct net_bridge *br, unsigned long val) +static ssize_t forward_delay_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { - unsigned long delay = clock_t_to_jiffies(val); - br->forward_delay = delay; - if (br_is_root_bridge(br)) - br->bridge_forward_delay = delay; + return store_bridge_parm(d, buf, len, br_set_forward_delay); } +static DEVICE_ATTR_RW(forward_delay); -static ssize_t store_forward_delay(struct class_device *cd, const char *buf, - size_t len) -{ - return store_bridge_parm(cd, buf, len, set_forward_delay); -} -static CLASS_DEVICE_ATTR(forward_delay, S_IRUGO | S_IWUSR, - show_forward_delay, store_forward_delay); - -static ssize_t show_hello_time(struct class_device *cd, char *buf) +static ssize_t hello_time_show(struct device *d, struct device_attribute *attr, + char *buf) { return sprintf(buf, "%lu\n", - jiffies_to_clock_t(to_bridge(cd)->hello_time)); -} - -static void set_hello_time(struct net_bridge *br, unsigned long val) -{ - unsigned long t = clock_t_to_jiffies(val); - br->hello_time = t; - if (br_is_root_bridge(br)) - br->bridge_hello_time = t; + jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static ssize_t store_hello_time(struct class_device *cd, const char *buf, +static ssize_t hello_time_store(struct device *d, + struct device_attribute *attr, const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_hello_time); + return store_bridge_parm(d, buf, len, br_set_hello_time); } +static DEVICE_ATTR_RW(hello_time); -static CLASS_DEVICE_ATTR(hello_time, S_IRUGO | S_IWUSR, show_hello_time, - store_hello_time); - -static ssize_t show_max_age(struct class_device *cd, char *buf) +static ssize_t max_age_show(struct device *d, struct device_attribute *attr, + char *buf) { return sprintf(buf, "%lu\n", - jiffies_to_clock_t(to_bridge(cd)->max_age)); -} - -static void set_max_age(struct net_bridge *br, unsigned long val) -{ - unsigned long t = clock_t_to_jiffies(val); - br->max_age = t; - if (br_is_root_bridge(br)) - br->bridge_max_age = t; + jiffies_to_clock_t(to_bridge(d)->max_age)); } -static ssize_t store_max_age(struct class_device *cd, const char *buf, - size_t len) +static ssize_t max_age_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_max_age); + return store_bridge_parm(d, buf, len, br_set_max_age); } +static DEVICE_ATTR_RW(max_age); -static CLASS_DEVICE_ATTR(max_age, S_IRUGO | S_IWUSR, show_max_age, - store_max_age); - -static ssize_t show_ageing_time(struct class_device *cd, char *buf) +static ssize_t ageing_time_show(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); } -static void set_ageing_time(struct net_bridge *br, unsigned long val) +static int set_ageing_time(struct net_bridge *br, unsigned long val) { br->ageing_time = clock_t_to_jiffies(val); + return 0; } -static ssize_t store_ageing_time(struct class_device *cd, const char *buf, - size_t len) +static ssize_t ageing_time_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_ageing_time); + return store_bridge_parm(d, buf, len, set_ageing_time); } +static DEVICE_ATTR_RW(ageing_time); -static CLASS_DEVICE_ATTR(ageing_time, S_IRUGO | S_IWUSR, show_ageing_time, - store_ageing_time); -static ssize_t show_stp_state(struct class_device *cd, char *buf) +static ssize_t stp_state_show(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->stp_enabled); } -static void set_stp_state(struct net_bridge *br, unsigned long val) + +static ssize_t stp_state_store(struct device *d, + struct device_attribute *attr, const char *buf, + size_t len) { - br->stp_enabled = val; + struct net_bridge *br = to_bridge(d); + char *endp; + unsigned long val; + + if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + br_stp_set_enabled(br, val); + rtnl_unlock(); + + return len; } +static DEVICE_ATTR_RW(stp_state); -static ssize_t store_stp_state(struct class_device *cd, - const char *buf, size_t len) +static ssize_t group_fwd_mask_show(struct device *d, + struct device_attribute *attr, + char *buf) { - return store_bridge_parm(cd, buf, len, set_stp_state); + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#x\n", br->group_fwd_mask); } -static CLASS_DEVICE_ATTR(stp_state, S_IRUGO | S_IWUSR, show_stp_state, - store_stp_state); -static ssize_t show_priority(struct class_device *cd, char *buf) +static ssize_t group_fwd_mask_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + struct net_bridge *br = to_bridge(d); + char *endp; + unsigned long val; + + if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + val = simple_strtoul(buf, &endp, 0); + if (endp == buf) + return -EINVAL; + + if (val & BR_GROUPFWD_RESTRICTED) + return -EINVAL; + + br->group_fwd_mask = val; + + return len; +} +static DEVICE_ATTR_RW(group_fwd_mask); + +static ssize_t priority_show(struct device *d, struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); } -static void set_priority(struct net_bridge *br, unsigned long val) +static int set_priority(struct net_bridge *br, unsigned long val) { br_stp_set_bridge_priority(br, (u16) val); + return 0; } -static ssize_t store_priority(struct class_device *cd, - const char *buf, size_t len) +static ssize_t priority_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) { - return store_bridge_parm(cd, buf, len, set_priority); + return store_bridge_parm(d, buf, len, set_priority); } -static CLASS_DEVICE_ATTR(priority, S_IRUGO | S_IWUSR, show_priority, - store_priority); +static DEVICE_ATTR_RW(priority); -static ssize_t show_root_id(struct class_device *cd, char *buf) +static ssize_t root_id_show(struct device *d, struct device_attribute *attr, + char *buf) { - return br_show_bridge_id(buf, &to_bridge(cd)->designated_root); + return br_show_bridge_id(buf, &to_bridge(d)->designated_root); } -static CLASS_DEVICE_ATTR(root_id, S_IRUGO, show_root_id, NULL); +static DEVICE_ATTR_RO(root_id); -static ssize_t show_bridge_id(struct class_device *cd, char *buf) +static ssize_t bridge_id_show(struct device *d, struct device_attribute *attr, + char *buf) { - return br_show_bridge_id(buf, &to_bridge(cd)->bridge_id); + return br_show_bridge_id(buf, &to_bridge(d)->bridge_id); } -static CLASS_DEVICE_ATTR(bridge_id, S_IRUGO, show_bridge_id, NULL); +static DEVICE_ATTR_RO(bridge_id); -static ssize_t show_root_port(struct class_device *cd, char *buf) +static ssize_t root_port_show(struct device *d, struct device_attribute *attr, + char *buf) { - return sprintf(buf, "%d\n", to_bridge(cd)->root_port); + return sprintf(buf, "%d\n", to_bridge(d)->root_port); } -static CLASS_DEVICE_ATTR(root_port, S_IRUGO, show_root_port, NULL); +static DEVICE_ATTR_RO(root_port); -static ssize_t show_root_path_cost(struct class_device *cd, char *buf) +static ssize_t root_path_cost_show(struct device *d, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", to_bridge(cd)->root_path_cost); + return sprintf(buf, "%d\n", to_bridge(d)->root_path_cost); } -static CLASS_DEVICE_ATTR(root_path_cost, S_IRUGO, show_root_path_cost, NULL); +static DEVICE_ATTR_RO(root_path_cost); -static ssize_t show_topology_change(struct class_device *cd, char *buf) +static ssize_t topology_change_show(struct device *d, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", to_bridge(cd)->topology_change); + return sprintf(buf, "%d\n", to_bridge(d)->topology_change); } -static CLASS_DEVICE_ATTR(topology_change, S_IRUGO, show_topology_change, NULL); +static DEVICE_ATTR_RO(topology_change); -static ssize_t show_topology_change_detected(struct class_device *cd, char *buf) +static ssize_t topology_change_detected_show(struct device *d, + struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%d\n", br->topology_change_detected); } -static CLASS_DEVICE_ATTR(topology_change_detected, S_IRUGO, show_topology_change_detected, NULL); +static DEVICE_ATTR_RO(topology_change_detected); -static ssize_t show_hello_timer(struct class_device *cd, char *buf) +static ssize_t hello_timer_show(struct device *d, + struct device_attribute *attr, char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->hello_timer)); } -static CLASS_DEVICE_ATTR(hello_timer, S_IRUGO, show_hello_timer, NULL); +static DEVICE_ATTR_RO(hello_timer); -static ssize_t show_tcn_timer(struct class_device *cd, char *buf) +static ssize_t tcn_timer_show(struct device *d, struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->tcn_timer)); } -static CLASS_DEVICE_ATTR(tcn_timer, S_IRUGO, show_tcn_timer, NULL); +static DEVICE_ATTR_RO(tcn_timer); -static ssize_t show_topology_change_timer(struct class_device *cd, char *buf) +static ssize_t topology_change_timer_show(struct device *d, + struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->topology_change_timer)); } -static CLASS_DEVICE_ATTR(topology_change_timer, S_IRUGO, show_topology_change_timer, NULL); +static DEVICE_ATTR_RO(topology_change_timer); -static ssize_t show_gc_timer(struct class_device *cd, char *buf) +static ssize_t gc_timer_show(struct device *d, struct device_attribute *attr, + char *buf) { - struct net_bridge *br = to_bridge(cd); + struct net_bridge *br = to_bridge(d); return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); } -static CLASS_DEVICE_ATTR(gc_timer, S_IRUGO, show_gc_timer, NULL); +static DEVICE_ATTR_RO(gc_timer); + +static ssize_t group_addr_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%x:%x:%x:%x:%x:%x\n", + br->group_addr[0], br->group_addr[1], + br->group_addr[2], br->group_addr[3], + br->group_addr[4], br->group_addr[5]); +} + +static ssize_t group_addr_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_bridge *br = to_bridge(d); + u8 new_addr[6]; + int i; + + if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &new_addr[0], &new_addr[1], &new_addr[2], + &new_addr[3], &new_addr[4], &new_addr[5]) != 6) + return -EINVAL; + + if (!is_link_local_ether_addr(new_addr)) + return -EINVAL; + + if (new_addr[5] == 1 || /* 802.3x Pause address */ + new_addr[5] == 2 || /* 802.3ad Slow protocols */ + new_addr[5] == 3) /* 802.1X PAE address */ + return -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + spin_lock_bh(&br->lock); + for (i = 0; i < 6; i++) + br->group_addr[i] = new_addr[i]; + spin_unlock_bh(&br->lock); + + br->group_addr_set = true; + br_recalculate_fwd_mask(br); + + rtnl_unlock(); + + return len; +} + +static DEVICE_ATTR_RW(group_addr); + +static ssize_t flush_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_bridge *br = to_bridge(d); + + if (!ns_capable(dev_net(br->dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + br_fdb_flush(br); + return len; +} +static DEVICE_ATTR_WO(flush); + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static ssize_t multicast_router_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_router); +} + +static ssize_t multicast_router_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_router); +} +static DEVICE_ATTR_RW(multicast_router); + +static ssize_t multicast_snooping_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", !br->multicast_disabled); +} + +static ssize_t multicast_snooping_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_toggle); +} +static DEVICE_ATTR_RW(multicast_snooping); + +static ssize_t multicast_query_use_ifaddr_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); +} + +static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_use_ifaddr = !!val; + return 0; +} + +static ssize_t +multicast_query_use_ifaddr_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_use_ifaddr); +} +static DEVICE_ATTR_RW(multicast_query_use_ifaddr); + +static ssize_t multicast_querier_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_querier); +} + +static ssize_t multicast_querier_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_querier); +} +static DEVICE_ATTR_RW(multicast_querier); + +static ssize_t hash_elasticity_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->hash_elasticity); +} + +static int set_elasticity(struct net_bridge *br, unsigned long val) +{ + br->hash_elasticity = val; + return 0; +} + +static ssize_t hash_elasticity_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_elasticity); +} +static DEVICE_ATTR_RW(hash_elasticity); + +static ssize_t hash_max_show(struct device *d, struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->hash_max); +} + +static ssize_t hash_max_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_multicast_set_hash_max); +} +static DEVICE_ATTR_RW(hash_max); + +static ssize_t multicast_last_member_count_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->multicast_last_member_count); +} + +static int set_last_member_count(struct net_bridge *br, unsigned long val) +{ + br->multicast_last_member_count = val; + return 0; +} + +static ssize_t multicast_last_member_count_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_last_member_count); +} +static DEVICE_ATTR_RW(multicast_last_member_count); + +static ssize_t multicast_startup_query_count_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->multicast_startup_query_count); +} + +static int set_startup_query_count(struct net_bridge *br, unsigned long val) +{ + br->multicast_startup_query_count = val; + return 0; +} + +static ssize_t multicast_startup_query_count_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_startup_query_count); +} +static DEVICE_ATTR_RW(multicast_startup_query_count); + +static ssize_t multicast_last_member_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_last_member_interval)); +} + +static int set_last_member_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_last_member_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_last_member_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_last_member_interval); +} +static DEVICE_ATTR_RW(multicast_last_member_interval); + +static ssize_t multicast_membership_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_membership_interval)); +} + +static int set_membership_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_membership_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_membership_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_membership_interval); +} +static DEVICE_ATTR_RW(multicast_membership_interval); + +static ssize_t multicast_querier_interval_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_querier_interval)); +} + +static int set_querier_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_querier_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_querier_interval_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_querier_interval); +} +static DEVICE_ATTR_RW(multicast_querier_interval); + +static ssize_t multicast_query_interval_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%lu\n", + jiffies_to_clock_t(br->multicast_query_interval)); +} + +static int set_query_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_query_interval_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_interval); +} +static DEVICE_ATTR_RW(multicast_query_interval); + +static ssize_t multicast_query_response_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf( + buf, "%lu\n", + jiffies_to_clock_t(br->multicast_query_response_interval)); +} + +static int set_query_response_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_response_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_query_response_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_response_interval); +} +static DEVICE_ATTR_RW(multicast_query_response_interval); + +static ssize_t multicast_startup_query_interval_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf( + buf, "%lu\n", + jiffies_to_clock_t(br->multicast_startup_query_interval)); +} + +static int set_startup_query_interval(struct net_bridge *br, unsigned long val) +{ + br->multicast_startup_query_interval = clock_t_to_jiffies(val); + return 0; +} + +static ssize_t multicast_startup_query_interval_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_startup_query_interval); +} +static DEVICE_ATTR_RW(multicast_startup_query_interval); +#endif +#ifdef CONFIG_BRIDGE_NETFILTER +static ssize_t nf_call_iptables_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_iptables); +} + +static int set_nf_call_iptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_iptables = val ? true : false; + return 0; +} + +static ssize_t nf_call_iptables_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_iptables); +} +static DEVICE_ATTR_RW(nf_call_iptables); + +static ssize_t nf_call_ip6tables_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_ip6tables); +} + +static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_ip6tables = val ? true : false; + return 0; +} + +static ssize_t nf_call_ip6tables_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_ip6tables); +} +static DEVICE_ATTR_RW(nf_call_ip6tables); + +static ssize_t nf_call_arptables_show( + struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%u\n", br->nf_call_arptables); +} + +static int set_nf_call_arptables(struct net_bridge *br, unsigned long val) +{ + br->nf_call_arptables = val ? true : false; + return 0; +} + +static ssize_t nf_call_arptables_store( + struct device *d, struct device_attribute *attr, const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_nf_call_arptables); +} +static DEVICE_ATTR_RW(nf_call_arptables); +#endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +static ssize_t vlan_filtering_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->vlan_enabled); +} + +static ssize_t vlan_filtering_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_filter_toggle); +} +static DEVICE_ATTR_RW(vlan_filtering); + +static ssize_t vlan_protocol_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%#06x\n", ntohs(br->vlan_proto)); +} + +static ssize_t vlan_protocol_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_proto); +} +static DEVICE_ATTR_RW(vlan_protocol); +#endif static struct attribute *bridge_attrs[] = { - &class_device_attr_forward_delay.attr, - &class_device_attr_hello_time.attr, - &class_device_attr_max_age.attr, - &class_device_attr_ageing_time.attr, - &class_device_attr_stp_state.attr, - &class_device_attr_priority.attr, - &class_device_attr_bridge_id.attr, - &class_device_attr_root_id.attr, - &class_device_attr_root_path_cost.attr, - &class_device_attr_root_port.attr, - &class_device_attr_topology_change.attr, - &class_device_attr_topology_change_detected.attr, - &class_device_attr_hello_timer.attr, - &class_device_attr_tcn_timer.attr, - &class_device_attr_topology_change_timer.attr, - &class_device_attr_gc_timer.attr, + &dev_attr_forward_delay.attr, + &dev_attr_hello_time.attr, + &dev_attr_max_age.attr, + &dev_attr_ageing_time.attr, + &dev_attr_stp_state.attr, + &dev_attr_group_fwd_mask.attr, + &dev_attr_priority.attr, + &dev_attr_bridge_id.attr, + &dev_attr_root_id.attr, + &dev_attr_root_path_cost.attr, + &dev_attr_root_port.attr, + &dev_attr_topology_change.attr, + &dev_attr_topology_change_detected.attr, + &dev_attr_hello_timer.attr, + &dev_attr_tcn_timer.attr, + &dev_attr_topology_change_timer.attr, + &dev_attr_gc_timer.attr, + &dev_attr_group_addr.attr, + &dev_attr_flush.attr, +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + &dev_attr_multicast_router.attr, + &dev_attr_multicast_snooping.attr, + &dev_attr_multicast_querier.attr, + &dev_attr_multicast_query_use_ifaddr.attr, + &dev_attr_hash_elasticity.attr, + &dev_attr_hash_max.attr, + &dev_attr_multicast_last_member_count.attr, + &dev_attr_multicast_startup_query_count.attr, + &dev_attr_multicast_last_member_interval.attr, + &dev_attr_multicast_membership_interval.attr, + &dev_attr_multicast_querier_interval.attr, + &dev_attr_multicast_query_interval.attr, + &dev_attr_multicast_query_response_interval.attr, + &dev_attr_multicast_startup_query_interval.attr, +#endif +#ifdef CONFIG_BRIDGE_NETFILTER + &dev_attr_nf_call_iptables.attr, + &dev_attr_nf_call_ip6tables.attr, + &dev_attr_nf_call_arptables.attr, +#endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + &dev_attr_vlan_filtering.attr, + &dev_attr_vlan_protocol.attr, +#endif NULL }; @@ -273,31 +786,31 @@ static struct attribute_group bridge_group = { * * Returns the number of bytes read. */ -static ssize_t brforward_read(struct kobject *kobj, char *buf, - loff_t off, size_t count) +static ssize_t brforward_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { - struct class_device *cdev = to_class_dev(kobj); - struct net_bridge *br = to_bridge(cdev); + struct device *dev = to_dev(kobj); + struct net_bridge *br = to_bridge(dev); int n; /* must read whole records */ if (off % sizeof(struct __fdb_entry) != 0) return -EINVAL; - n = br_fdb_fillbuf(br, buf, + n = br_fdb_fillbuf(br, buf, count / sizeof(struct __fdb_entry), off / sizeof(struct __fdb_entry)); if (n > 0) n *= sizeof(struct __fdb_entry); - + return n; } static struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, - .mode = S_IRUGO, - .owner = THIS_MODULE, }, + .mode = S_IRUGO, }, .read = brforward_read, }; @@ -314,41 +827,35 @@ static struct bin_attribute bridge_forward = { */ int br_sysfs_addbr(struct net_device *dev) { - struct kobject *brobj = &dev->class_dev.kobj; + struct kobject *brobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); int err; err = sysfs_create_group(brobj, &bridge_group); if (err) { pr_info("%s: can't create group %s/%s\n", - __FUNCTION__, dev->name, bridge_group.name); + __func__, dev->name, bridge_group.name); goto out1; } err = sysfs_create_bin_file(brobj, &bridge_forward); if (err) { - pr_info("%s: can't create attribue file %s/%s\n", - __FUNCTION__, dev->name, bridge_forward.attr.name); + pr_info("%s: can't create attribute file %s/%s\n", + __func__, dev->name, bridge_forward.attr.name); goto out2; } - - kobject_set_name(&br->ifobj, SYSFS_BRIDGE_PORT_SUBDIR); - br->ifobj.ktype = NULL; - br->ifobj.kset = NULL; - br->ifobj.parent = brobj; - - err = kobject_register(&br->ifobj); - if (err) { + br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, brobj); + if (!br->ifobj) { pr_info("%s: can't add kobject (directory) %s/%s\n", - __FUNCTION__, dev->name, br->ifobj.name); + __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); goto out3; } return 0; out3: - sysfs_remove_bin_file(&dev->class_dev.kobj, &bridge_forward); + sysfs_remove_bin_file(&dev->dev.kobj, &bridge_forward); out2: - sysfs_remove_group(&dev->class_dev.kobj, &bridge_group); + sysfs_remove_group(&dev->dev.kobj, &bridge_group); out1: return err; @@ -356,10 +863,10 @@ int br_sysfs_addbr(struct net_device *dev) void br_sysfs_delbr(struct net_device *dev) { - struct kobject *kobj = &dev->class_dev.kobj; + struct kobject *kobj = &dev->dev.kobj; struct net_bridge *br = netdev_priv(dev); - kobject_unregister(&br->ifobj); + kobject_put(br->ifobj); sysfs_remove_bin_file(kobj, &bridge_forward); sysfs_remove_group(kobj, &bridge_group); } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 0ac0355d16d..e561cd59b8a 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -23,43 +23,64 @@ struct brport_attribute { struct attribute attr; ssize_t (*show)(struct net_bridge_port *, char *); - ssize_t (*store)(struct net_bridge_port *, unsigned long); + int (*store)(struct net_bridge_port *, unsigned long); }; -#define BRPORT_ATTR(_name,_mode,_show,_store) \ -struct brport_attribute brport_attr_##_name = { \ +#define BRPORT_ATTR(_name, _mode, _show, _store) \ +const struct brport_attribute brport_attr_##_name = { \ .attr = {.name = __stringify(_name), \ - .mode = _mode, \ - .owner = THIS_MODULE, }, \ + .mode = _mode }, \ .show = _show, \ .store = _store, \ }; -static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) +#define BRPORT_ATTR_FLAG(_name, _mask) \ +static ssize_t show_##_name(struct net_bridge_port *p, char *buf) \ +{ \ + return sprintf(buf, "%d\n", !!(p->flags & _mask)); \ +} \ +static int store_##_name(struct net_bridge_port *p, unsigned long v) \ +{ \ + return store_flag(p, v, _mask); \ +} \ +static BRPORT_ATTR(_name, S_IRUGO | S_IWUSR, \ + show_##_name, store_##_name) + +static int store_flag(struct net_bridge_port *p, unsigned long v, + unsigned long mask) { - return sprintf(buf, "%d\n", p->path_cost); + unsigned long flags; + + flags = p->flags; + + if (v) + flags |= mask; + else + flags &= ~mask; + + if (flags != p->flags) { + p->flags = flags; + br_port_flags_change(p, mask); + br_ifinfo_notify(RTM_NEWLINK, p); + } + return 0; } -static ssize_t store_path_cost(struct net_bridge_port *p, unsigned long v) + +static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) { - br_stp_set_path_cost(p, v); - return 0; + return sprintf(buf, "%d\n", p->path_cost); } + static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR, - show_path_cost, store_path_cost); + show_path_cost, br_stp_set_path_cost); static ssize_t show_priority(struct net_bridge_port *p, char *buf) { return sprintf(buf, "%d\n", p->priority); } -static ssize_t store_priority(struct net_bridge_port *p, unsigned long v) -{ - if (v >= (1<<(16-BR_PORT_BITS))) - return -ERANGE; - br_stp_set_port_priority(p, v); - return 0; -} + static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR, - show_priority, store_priority); + show_priority, br_stp_set_port_priority); static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) { @@ -137,7 +158,37 @@ static ssize_t show_hold_timer(struct net_bridge_port *p, } static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL); -static struct brport_attribute *brport_attrs[] = { +static int store_flush(struct net_bridge_port *p, unsigned long v) +{ + br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry + return 0; +} +static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); + +BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); +BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); +BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); +BRPORT_ATTR_FLAG(learning, BR_LEARNING); +BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%d\n", p->multicast_router); +} + +static int store_multicast_router(struct net_bridge_port *p, + unsigned long v) +{ + return br_multicast_set_port_router(p, v); +} +static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, + store_multicast_router); + +BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); +#endif + +static const struct brport_attribute *brport_attrs[] = { &brport_attr_path_cost, &brport_attr_priority, &brport_attr_port_id, @@ -152,37 +203,48 @@ static struct brport_attribute *brport_attrs[] = { &brport_attr_message_age_timer, &brport_attr_forward_delay_timer, &brport_attr_hold_timer, + &brport_attr_flush, + &brport_attr_hairpin_mode, + &brport_attr_bpdu_guard, + &brport_attr_root_block, + &brport_attr_learning, + &brport_attr_unicast_flood, +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + &brport_attr_multicast_router, + &brport_attr_multicast_fast_leave, +#endif NULL }; #define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr) #define to_brport(obj) container_of(obj, struct net_bridge_port, kobj) -static ssize_t brport_show(struct kobject * kobj, - struct attribute * attr, char * buf) +static ssize_t brport_show(struct kobject *kobj, + struct attribute *attr, char *buf) { - struct brport_attribute * brport_attr = to_brport_attr(attr); - struct net_bridge_port * p = to_brport(kobj); + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = to_brport(kobj); return brport_attr->show(p, buf); } -static ssize_t brport_store(struct kobject * kobj, - struct attribute * attr, - const char * buf, size_t count) +static ssize_t brport_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t count) { - struct brport_attribute * brport_attr = to_brport_attr(attr); - struct net_bridge_port * p = to_brport(kobj); + struct brport_attribute *brport_attr = to_brport_attr(attr); + struct net_bridge_port *p = to_brport(kobj); ssize_t ret = -EINVAL; char *endp; unsigned long val; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(dev_net(p->dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; val = simple_strtoul(buf, &endp, 0); if (endp != buf) { - rtnl_lock(); + if (!rtnl_trylock()) + return restart_syscall(); if (p->dev && p->br && brport_attr->store) { spin_lock_bh(&p->br->lock); ret = brport_attr->store(p, val); @@ -195,78 +257,56 @@ static ssize_t brport_store(struct kobject * kobj, return ret; } -/* called from kobject_put when port ref count goes to zero. */ -static void brport_release(struct kobject *kobj) -{ - kfree(container_of(kobj, struct net_bridge_port, kobj)); -} - -static struct sysfs_ops brport_sysfs_ops = { +const struct sysfs_ops brport_sysfs_ops = { .show = brport_show, .store = brport_store, }; -static struct kobj_type brport_ktype = { - .sysfs_ops = &brport_sysfs_ops, - .release = brport_release, -}; - - /* * Add sysfs entries to ethernet device added to a bridge. * Creates a brport subdirectory with bridge attributes. - * Puts symlink in bridge's brport subdirectory + * Puts symlink in bridge's brif subdirectory */ int br_sysfs_addif(struct net_bridge_port *p) { struct net_bridge *br = p->br; - struct brport_attribute **a; + const struct brport_attribute **a; int err; - ASSERT_RTNL(); - - kobject_set_name(&p->kobj, SYSFS_BRIDGE_PORT_ATTR); - p->kobj.ktype = &brport_ktype; - p->kobj.parent = &(p->dev->class_dev.kobj); - p->kobj.kset = NULL; - - err = kobject_add(&p->kobj); - if(err) - goto out1; - - err = sysfs_create_link(&p->kobj, &br->dev->class_dev.kobj, + err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, SYSFS_BRIDGE_PORT_LINK); if (err) - goto out2; + return err; for (a = brport_attrs; *a; ++a) { err = sysfs_create_file(&p->kobj, &((*a)->attr)); if (err) - goto out2; + return err; } - err = sysfs_create_link(&br->ifobj, &p->kobj, p->dev->name); - if (err) - goto out2; - - kobject_uevent(&p->kobj, KOBJ_ADD); - return 0; - out2: - kobject_del(&p->kobj); - out1: - return err; + strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ); + return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name); } -void br_sysfs_removeif(struct net_bridge_port *p) +/* Rename bridge's brif symlink */ +int br_sysfs_renameif(struct net_bridge_port *p) { - pr_debug("br_sysfs_removeif\n"); - sysfs_remove_link(&p->br->ifobj, p->dev->name); - kobject_uevent(&p->kobj, KOBJ_REMOVE); - kobject_del(&p->kobj); -} + struct net_bridge *br = p->br; + int err; -void br_sysfs_freeif(struct net_bridge_port *p) -{ - pr_debug("br_sysfs_freeif\n"); - kobject_put(&p->kobj); + /* If a rename fails, the rollback will cause another + * rename call with the existing name. + */ + if (!strncmp(p->sysfs_name, p->dev->name, IFNAMSIZ)) + return 0; + + err = sysfs_rename_link(br->ifobj, &p->kobj, + p->sysfs_name, p->dev->name); + if (err) + netdev_notice(br->dev, "unable to rename link %s to %s", + p->sysfs_name, p->dev->name); + else + strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ); + + return err; } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c new file mode 100644 index 00000000000..2b2774fe070 --- /dev/null +++ b/net/bridge/br_vlan.c @@ -0,0 +1,587 @@ +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> + +#include "br_private.h" + +static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid == vid) + return; + + smp_wmb(); + v->pvid = vid; +} + +static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid) +{ + if (v->pvid != vid) + return; + + smp_wmb(); + v->pvid = 0; +} + +static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) +{ + if (flags & BRIDGE_VLAN_INFO_PVID) + __vlan_add_pvid(v, vid); + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) + set_bit(vid, v->untagged_bitmap); +} + +static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) +{ + struct net_bridge_port *p = NULL; + struct net_bridge *br; + struct net_device *dev; + int err; + + if (test_bit(vid, v->vlan_bitmap)) { + __vlan_add_flags(v, vid, flags); + return 0; + } + + if (v->port_idx) { + p = v->parent.port; + br = p->br; + dev = p->dev; + } else { + br = v->parent.br; + dev = br->dev; + } + + if (p) { + /* Add VLAN to the device filter if it is supported. + * Stricly speaking, this is not necessary now, since + * devices are made promiscuous by the bridge, but if + * that ever changes this code will allow tagged + * traffic to enter the bridge. + */ + err = vlan_vid_add(dev, br->vlan_proto, vid); + if (err) + return err; + } + + err = br_fdb_insert(br, p, dev->dev_addr, vid); + if (err) { + br_err(br, "failed insert local address into bridge " + "forwarding table\n"); + goto out_filt; + } + + set_bit(vid, v->vlan_bitmap); + v->num_vlans++; + __vlan_add_flags(v, vid, flags); + + return 0; + +out_filt: + if (p) + vlan_vid_del(dev, br->vlan_proto, vid); + return err; +} + +static int __vlan_del(struct net_port_vlans *v, u16 vid) +{ + if (!test_bit(vid, v->vlan_bitmap)) + return -EINVAL; + + __vlan_delete_pvid(v, vid); + clear_bit(vid, v->untagged_bitmap); + + if (v->port_idx) { + struct net_bridge_port *p = v->parent.port; + vlan_vid_del(p->dev, p->br->vlan_proto, vid); + } + + clear_bit(vid, v->vlan_bitmap); + v->num_vlans--; + if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) { + if (v->port_idx) + RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); + else + RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); + } + return 0; +} + +static void __vlan_flush(struct net_port_vlans *v) +{ + smp_wmb(); + v->pvid = 0; + bitmap_zero(v->vlan_bitmap, VLAN_N_VID); + if (v->port_idx) + RCU_INIT_POINTER(v->parent.port->vlan_info, NULL); + else + RCU_INIT_POINTER(v->parent.br->vlan_info, NULL); + kfree_rcu(v, rcu); +} + +struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_port_vlans *pv, + struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + goto out; + + /* Vlan filter table must be configured at this point. The + * only exception is the bridge is set in promisc mode and the + * packet is destined for the bridge device. In this case + * pass the packet as is. + */ + if (!pv) { + if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { + goto out; + } else { + kfree_skb(skb); + return NULL; + } + } + + /* At this point, we know that the frame was filtered and contains + * a valid vlan id. If the vlan id is set in the untagged bitmap, + * send untagged; otherwise, send tagged. + */ + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, pv->untagged_bitmap)) + skb->vlan_tci = 0; + +out: + return skb; +} + +/* Called under RCU */ +bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, + struct sk_buff *skb, u16 *vid) +{ + bool tagged; + __be16 proto; + + /* If VLAN filtering is disabled on the bridge, all packets are + * permitted. + */ + if (!br->vlan_enabled) + return true; + + /* If there are no vlan in the permitted list, all packets are + * rejected. + */ + if (!v) + goto drop; + + proto = br->vlan_proto; + + /* If vlan tx offload is disabled on bridge device and frame was + * sent from vlan device on the bridge device, it does not have + * HW accelerated vlan tag. + */ + if (unlikely(!vlan_tx_tag_present(skb) && + skb->protocol == proto)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + + if (!br_vlan_get_tag(skb, vid)) { + /* Tagged frame */ + if (skb->vlan_proto != proto) { + /* Protocol-mismatch, empty out vlan_tci for new tag */ + skb_push(skb, ETH_HLEN); + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (unlikely(!skb)) + return false; + + skb_pull(skb, ETH_HLEN); + skb_reset_mac_len(skb); + *vid = 0; + tagged = false; + } else { + tagged = true; + } + } else { + /* Untagged frame */ + tagged = false; + } + + if (!*vid) { + u16 pvid = br_get_pvid(v); + + /* Frame had a tag with VID 0 or did not have a tag. + * See if pvid is set on this port. That tells us which + * vlan untagged or priority-tagged traffic belongs to. + */ + if (pvid == VLAN_N_VID) + goto drop; + + /* PVID is set on this port. Any untagged or priority-tagged + * ingress frame is considered to belong to this vlan. + */ + *vid = pvid; + if (likely(!tagged)) + /* Untagged Frame. */ + __vlan_hwaccel_put_tag(skb, proto, pvid); + else + /* Priority-tagged Frame. + * At this point, We know that skb->vlan_tci had + * VLAN_TAG_PRESENT bit and its VID field was 0x000. + * We update only VID field and preserve PCP field. + */ + skb->vlan_tci |= pvid; + + return true; + } + + /* Frame had a valid vlan tag. See if vlan is allowed */ + if (test_bit(*vid, v->vlan_bitmap)) + return true; +drop: + kfree_skb(skb); + return false; +} + +/* Called under RCU. */ +bool br_allowed_egress(struct net_bridge *br, + const struct net_port_vlans *v, + const struct sk_buff *skb) +{ + u16 vid; + + if (!br->vlan_enabled) + return true; + + if (!v) + return false; + + br_vlan_get_tag(skb, &vid); + if (test_bit(vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Called under RCU */ +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) +{ + struct net_bridge *br = p->br; + struct net_port_vlans *v; + + if (!br->vlan_enabled) + return true; + + v = rcu_dereference(p->vlan_info); + if (!v) + return false; + + if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto) + *vid = 0; + + if (!*vid) { + *vid = br_get_pvid(v); + if (*vid == VLAN_N_VID) + return false; + + return true; + } + + if (test_bit(*vid, v->vlan_bitmap)) + return true; + + return false; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) + return -ENOMEM; + + pv->parent.br = br; + err = __vlan_add(pv, vid, flags); + if (err) + goto out; + + rcu_assign_pointer(br->vlan_info, pv); + return 0; +out: + kfree(pv); + return err; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int br_vlan_delete(struct net_bridge *br, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return -EINVAL; + + br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); + + __vlan_del(pv, vid); + return 0; +} + +void br_vlan_flush(struct net_bridge *br) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + pv = rtnl_dereference(br->vlan_info); + if (!pv) + return; + + __vlan_flush(pv); +} + +bool br_vlan_find(struct net_bridge *br, u16 vid) +{ + struct net_port_vlans *pv; + bool found = false; + + rcu_read_lock(); + pv = rcu_dereference(br->vlan_info); + + if (!pv) + goto out; + + if (test_bit(vid, pv->vlan_bitmap)) + found = true; + +out: + rcu_read_unlock(); + return found; +} + +/* Must be protected by RTNL. */ +static void recalculate_group_addr(struct net_bridge *br) +{ + if (br->group_addr_set) + return; + + spin_lock_bh(&br->lock); + if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) { + /* Bridge Group Address */ + br->group_addr[5] = 0x00; + } else { /* vlan_enabled && ETH_P_8021AD */ + /* Provider Bridge Group Address */ + br->group_addr[5] = 0x08; + } + spin_unlock_bh(&br->lock); +} + +/* Must be protected by RTNL. */ +void br_recalculate_fwd_mask(struct net_bridge *br) +{ + if (!br->vlan_enabled || br->vlan_proto == htons(ETH_P_8021Q)) + br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; + else /* vlan_enabled && ETH_P_8021AD */ + br->group_fwd_mask_required = BR_GROUPFWD_8021AD & + ~(1u << br->group_addr[5]); +} + +int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val) +{ + if (!rtnl_trylock()) + return restart_syscall(); + + if (br->vlan_enabled == val) + goto unlock; + + br->vlan_enabled = val; + br_manage_promisc(br); + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); + +unlock: + rtnl_unlock(); + return 0; +} + +int br_vlan_set_proto(struct net_bridge *br, unsigned long val) +{ + int err = 0; + struct net_bridge_port *p; + struct net_port_vlans *pv; + __be16 proto, oldproto; + u16 vid, errvid; + + if (val != ETH_P_8021Q && val != ETH_P_8021AD) + return -EPROTONOSUPPORT; + + if (!rtnl_trylock()) + return restart_syscall(); + + proto = htons(val); + if (br->vlan_proto == proto) + goto unlock; + + /* Add VLANs for the new proto to the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { + err = vlan_vid_add(p->dev, proto, vid); + if (err) + goto err_filt; + } + } + + oldproto = br->vlan_proto; + br->vlan_proto = proto; + + recalculate_group_addr(br); + br_recalculate_fwd_mask(br); + + /* Delete VLANs for the old proto from the device filter. */ + list_for_each_entry(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + vlan_vid_del(p->dev, oldproto, vid); + } + +unlock: + rtnl_unlock(); + return err; + +err_filt: + errvid = vid; + for_each_set_bit(vid, pv->vlan_bitmap, errvid) + vlan_vid_del(p->dev, proto, vid); + + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + pv = rtnl_dereference(p->vlan_info); + if (!pv) + continue; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + vlan_vid_del(p->dev, proto, vid); + } + + goto unlock; +} + +void br_vlan_init(struct net_bridge *br) +{ + br->vlan_proto = htons(ETH_P_8021Q); +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +{ + struct net_port_vlans *pv = NULL; + int err; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (pv) + return __vlan_add(pv, vid, flags); + + /* Create port vlan infomration + */ + pv = kzalloc(sizeof(*pv), GFP_KERNEL); + if (!pv) { + err = -ENOMEM; + goto clean_up; + } + + pv->port_idx = port->port_no; + pv->parent.port = port; + err = __vlan_add(pv, vid, flags); + if (err) + goto clean_up; + + rcu_assign_pointer(port->vlan_info, pv); + return 0; + +clean_up: + kfree(pv); + return err; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return -EINVAL; + + br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid); + + return __vlan_del(pv, vid); +} + +void nbp_vlan_flush(struct net_bridge_port *port) +{ + struct net_port_vlans *pv; + u16 vid; + + ASSERT_RTNL(); + + pv = rtnl_dereference(port->vlan_info); + if (!pv) + return; + + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) + vlan_vid_del(port->dev, port->br->vlan_proto, vid); + + __vlan_flush(pv); +} + +bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) +{ + struct net_port_vlans *pv; + bool found = false; + + rcu_read_lock(); + pv = rcu_dereference(port->vlan_info); + + if (!pv) + goto out; + + if (test_bit(vid, pv->vlan_bitmap)) + found = true; + +out: + rcu_read_unlock(); + return found; +} diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index b84fc6075fe..629dc77874a 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -1,22 +1,36 @@ # # Bridge netfilter configuration # +# +menuconfig NF_TABLES_BRIDGE + depends on BRIDGE && NETFILTER && NF_TABLES + tristate "Ethernet Bridge nf_tables support" + +if NF_TABLES_BRIDGE + +config NFT_BRIDGE_META + tristate "Netfilter nf_table bridge meta support" + depends on NFT_META + help + Add support for bridge dedicated meta key. -menu "Bridge: Netfilter Configuration" - depends on BRIDGE && NETFILTER +endif # NF_TABLES_BRIDGE -config BRIDGE_NF_EBTABLES +menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" + depends on BRIDGE && NETFILTER && NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet filtering/NAT/brouting on the Ethernet bridge. + +if BRIDGE_NF_EBTABLES + # # tables # config BRIDGE_EBT_BROUTE tristate "ebt: broute table support" - depends on BRIDGE_NF_EBTABLES help The ebtables broute table is used to define rules that decide between bridging and routing frames, giving Linux the functionality of a @@ -27,7 +41,6 @@ config BRIDGE_EBT_BROUTE config BRIDGE_EBT_T_FILTER tristate "ebt: filter table support" - depends on BRIDGE_NF_EBTABLES help The ebtables filter table is used to define frame filtering rules at local input, forwarding and local output. See the man page for @@ -37,7 +50,6 @@ config BRIDGE_EBT_T_FILTER config BRIDGE_EBT_T_NAT tristate "ebt: nat table support" - depends on BRIDGE_NF_EBTABLES help The ebtables nat table is used to define rules that alter the MAC source address (MAC SNAT) or the MAC destination address (MAC DNAT). @@ -49,7 +61,6 @@ config BRIDGE_EBT_T_NAT # config BRIDGE_EBT_802_3 tristate "ebt: 802.3 filter support" - depends on BRIDGE_NF_EBTABLES help This option adds matching support for 802.3 Ethernet frames. @@ -57,7 +68,6 @@ config BRIDGE_EBT_802_3 config BRIDGE_EBT_AMONG tristate "ebt: among filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the among match, which allows matching the MAC source and/or destination address on a list of addresses. Optionally, @@ -67,7 +77,6 @@ config BRIDGE_EBT_AMONG config BRIDGE_EBT_ARP tristate "ebt: ARP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the ARP match, which allows ARP and RARP header field filtering. @@ -76,16 +85,23 @@ config BRIDGE_EBT_ARP config BRIDGE_EBT_IP tristate "ebt: IP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the IP match, which allows basic IP header field filtering. To compile it as a module, choose M here. If unsure, say N. +config BRIDGE_EBT_IP6 + tristate "ebt: IP6 filter support" + depends on BRIDGE_NF_EBTABLES && IPV6 + help + This option adds the IP6 match, which allows basic IPV6 header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + config BRIDGE_EBT_LIMIT tristate "ebt: limit match support" - depends on BRIDGE_NF_EBTABLES help This option adds the limit match, which allows you to control the rate at which a rule can be matched. This match is the @@ -96,7 +112,6 @@ config BRIDGE_EBT_LIMIT config BRIDGE_EBT_MARK tristate "ebt: mark filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark match, which allows matching frames based on the 'nfmark' value in the frame. This can be set by the mark target. @@ -107,7 +122,6 @@ config BRIDGE_EBT_MARK config BRIDGE_EBT_PKTTYPE tristate "ebt: packet type filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the packet type match, which allows matching on the type of packet based on its Ethernet "class" (as determined by @@ -118,7 +132,6 @@ config BRIDGE_EBT_PKTTYPE config BRIDGE_EBT_STP tristate "ebt: STP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the Spanning Tree Protocol match, which allows STP header field filtering. @@ -127,7 +140,6 @@ config BRIDGE_EBT_STP config BRIDGE_EBT_VLAN tristate "ebt: 802.1Q VLAN filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the 802.1Q vlan match, which allows the filtering of 802.1Q vlan fields. @@ -147,7 +159,6 @@ config BRIDGE_EBT_ARPREPLY config BRIDGE_EBT_DNAT tristate "ebt: dnat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC DNAT target, which allows altering the MAC destination address of frames. @@ -156,7 +167,6 @@ config BRIDGE_EBT_DNAT config BRIDGE_EBT_MARK_T tristate "ebt: mark target support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark target, which allows marking frames by setting the 'nfmark' value in the frame. @@ -167,7 +177,6 @@ config BRIDGE_EBT_MARK_T config BRIDGE_EBT_REDIRECT tristate "ebt: redirect target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC redirect target, which allows altering the MAC destination address of a frame to that of the device it arrived on. @@ -176,7 +185,6 @@ config BRIDGE_EBT_REDIRECT config BRIDGE_EBT_SNAT tristate "ebt: snat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC SNAT target, which allows altering the MAC source address of frames. @@ -187,7 +195,6 @@ config BRIDGE_EBT_SNAT # config BRIDGE_EBT_LOG tristate "ebt: log support" - depends on BRIDGE_NF_EBTABLES help This option adds the log watcher, that you can use in any rule in any ebtables table. It records info about the frame header @@ -197,7 +204,6 @@ config BRIDGE_EBT_LOG config BRIDGE_EBT_ULOG tristate "ebt: ulog support (OBSOLETE)" - depends on BRIDGE_NF_EBTABLES help This option enables the old bridge-specific "ebt_ulog" implementation which has been obsoleted by the new "nfnetlink_log" code (see @@ -212,4 +218,17 @@ config BRIDGE_EBT_ULOG To compile it as a module, choose M here. If unsure, say N. -endmenu +config BRIDGE_EBT_NFLOG + tristate "ebt: nflog support" + help + This option enables the nflog watcher, which allows to LOG + messages through the netfilter logging API, which can use + either the old LOG target, the old ULOG target or nfnetlink_log + as backend. + + This option adds the nflog watcher, that you can use in any rule + in any ebtables table. + + To compile it as a module, choose M here. If unsure, say N. + +endif # BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 8bf6d9f6e9d..6f2f3943d66 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -2,6 +2,9 @@ # Makefile for the netfilter modules for Link Layer filtering on a bridge. # +obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o +obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o + obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o # tables @@ -14,6 +17,7 @@ obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o +obj-$(CONFIG_BRIDGE_EBT_IP6) += ebt_ip6.o obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o @@ -29,4 +33,5 @@ obj-$(CONFIG_BRIDGE_EBT_SNAT) += ebt_snat.o # watchers obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_log.o -obj-$(CONFIG_BRIDGE_EBT_LOG) += ebt_ulog.o +obj-$(CONFIG_BRIDGE_EBT_ULOG) += ebt_ulog.o +obj-$(CONFIG_BRIDGE_EBT_NFLOG) += ebt_nflog.o diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 468ebdf4bc1..2a449b7ab8f 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -5,69 +5,68 @@ * Chris Vitale csv@bluetail.com * * May 2003 - * + * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_802_3.h> -#include <linux/module.h> -static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; - struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); - uint16_t type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; + const struct ebt_802_3_info *info = par->matchinfo; + const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); + __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { - if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) - return EBT_NOMATCH; + if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) + return false; if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_802_3_TYPE) { if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) - return EBT_NOMATCH; - if (FWINV(info->type != type, EBT_802_3_TYPE)) - return EBT_NOMATCH; + return false; + if (FWINV(info->type != type, EBT_802_3_TYPE)) + return false; } - return EBT_MATCH; + return true; } -static struct ebt_match filter_802_3; -static int ebt_802_3_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_802_3_mt_check(const struct xt_mtchk_param *par) { - struct ebt_802_3_info *info = (struct ebt_802_3_info *)data; + const struct ebt_802_3_info *info = par->matchinfo; - if (datalen < sizeof(struct ebt_802_3_info)) - return -EINVAL; if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) return -EINVAL; return 0; } -static struct ebt_match filter_802_3 = -{ - .name = EBT_802_3_MATCH, - .match = ebt_filter_802_3, - .check = ebt_802_3_check, +static struct xt_match ebt_802_3_mt_reg __read_mostly = { + .name = "802_3", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_802_3_mt, + .checkentry = ebt_802_3_mt_check, + .matchsize = sizeof(struct ebt_802_3_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_802_3_init(void) { - return ebt_register_match(&filter_802_3); + return xt_register_match(&ebt_802_3_mt_reg); } -static void __exit fini(void) +static void __exit ebt_802_3_fini(void) { - ebt_unregister_match(&filter_802_3); + xt_unregister_match(&ebt_802_3_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_802_3_init); +module_exit(ebt_802_3_fini); +MODULE_DESCRIPTION("Ebtables: DSAP/SSAP field and SNAP type matching"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 5a1f5e3bff1..9024283d2bc 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -7,49 +7,46 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_among.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ip.h> #include <linux/if_arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_among.h> -static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, - const char *mac, uint32_t ip) +static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, + const char *mac, __be32 ip) { /* You may be puzzled as to how this code works. - * Some tricks were used, refer to + * Some tricks were used, refer to * include/linux/netfilter_bridge/ebt_among.h * as there you can find a solution of this mystery. */ const struct ebt_mac_wormhash_tuple *p; int start, limit, i; uint32_t cmp[2] = { 0, 0 }; - int key = (const unsigned char) mac[5]; + int key = ((const unsigned char *)mac)[5]; - memcpy(((char *) cmp) + 2, mac, 6); + ether_addr_copy(((char *) cmp) + 2, mac); start = wh->table[key]; limit = wh->table[key + 1]; if (ip) { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0 || p->ip == ip) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0 || p->ip == ip) + return true; } } else { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0) + return true; } } - return 0; + return false; } static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash @@ -70,27 +67,30 @@ static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash return 0; } -static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) +static int get_ip_dst(const struct sk_buff *skb, __be32 *addr) { if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { - struct iphdr _iph, *ih; + const struct iphdr *ih; + struct iphdr _iph; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) return -1; *addr = ih->daddr; } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { - struct arphdr _arph, *ah; - uint32_t buf, *bp; + const struct arphdr *ah; + struct arphdr _arph; + const __be32 *bp; + __be32 buf; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL || - ah->ar_pln != sizeof(uint32_t) || + ah->ar_pln != sizeof(__be32) || ah->ar_hln != ETH_ALEN) return -1; bp = skb_header_pointer(skb, sizeof(struct arphdr) + - 2 * ETH_ALEN + sizeof(uint32_t), - sizeof(uint32_t), &buf); + 2 * ETH_ALEN + sizeof(__be32), + sizeof(__be32), &buf); if (bp == NULL) return -1; *addr = *bp; @@ -98,26 +98,29 @@ static int get_ip_dst(const struct sk_buff *skb, uint32_t *addr) return 0; } -static int get_ip_src(const struct sk_buff *skb, uint32_t *addr) +static int get_ip_src(const struct sk_buff *skb, __be32 *addr) { if (eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { - struct iphdr _iph, *ih; + const struct iphdr *ih; + struct iphdr _iph; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) return -1; *addr = ih->saddr; } else if (eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { - struct arphdr _arph, *ah; - uint32_t buf, *bp; + const struct arphdr *ah; + struct arphdr _arph; + const __be32 *bp; + __be32 buf; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL || - ah->ar_pln != sizeof(uint32_t) || + ah->ar_pln != sizeof(__be32) || ah->ar_hln != ETH_ALEN) return -1; bp = skb_header_pointer(skb, sizeof(struct arphdr) + - ETH_ALEN, sizeof(uint32_t), &buf); + ETH_ALEN, sizeof(__be32), &buf); if (bp == NULL) return -1; *addr = *bp; @@ -125,15 +128,13 @@ static int get_ip_src(const struct sk_buff *skb, uint32_t *addr) return 0; } -static int ebt_filter_among(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_among_info *info = (struct ebt_among_info *) data; + const struct ebt_among_info *info = par->matchinfo; const char *dmac, *smac; const struct ebt_mac_wormhash *wh_dst, *wh_src; - uint32_t dip = 0, sip = 0; + __be32 dip = 0, sip = 0; wh_dst = ebt_among_wh_dst(info); wh_src = ebt_among_wh_src(info); @@ -141,41 +142,41 @@ static int ebt_filter_among(const struct sk_buff *skb, if (wh_src) { smac = eth_hdr(skb)->h_source; if (get_ip_src(skb, &sip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_SRC_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } } if (wh_dst) { dmac = eth_hdr(skb)->h_dest; if (get_ip_dst(skb, &dip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_DST_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_among_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, - unsigned int datalen) +static int ebt_among_mt_check(const struct xt_mtchk_param *par) { - struct ebt_among_info *info = (struct ebt_among_info *) data; + const struct ebt_among_info *info = par->matchinfo; + const struct ebt_entry_match *em = + container_of(par->matchinfo, const struct ebt_entry_match, data); int expected_length = sizeof(struct ebt_among_info); const struct ebt_mac_wormhash *wh_dst, *wh_src; int err; @@ -185,44 +186,44 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask, expected_length += ebt_mac_wormhash_size(wh_dst); expected_length += ebt_mac_wormhash_size(wh_src); - if (datalen != EBT_ALIGN(expected_length)) { - printk(KERN_WARNING - "ebtables: among: wrong size: %d" - "against expected %d, rounded to %Zd\n", - datalen, expected_length, - EBT_ALIGN(expected_length)); + if (em->match_size != EBT_ALIGN(expected_length)) { + pr_info("wrong size: %d against expected %d, rounded to %Zd\n", + em->match_size, expected_length, + EBT_ALIGN(expected_length)); return -EINVAL; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { - printk(KERN_WARNING - "ebtables: among: dst integrity fail: %x\n", -err); + pr_info("dst integrity fail: %x\n", -err); return -EINVAL; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { - printk(KERN_WARNING - "ebtables: among: src integrity fail: %x\n", -err); + pr_info("src integrity fail: %x\n", -err); return -EINVAL; } return 0; } -static struct ebt_match filter_among = { - .name = EBT_AMONG_MATCH, - .match = ebt_filter_among, - .check = ebt_among_check, +static struct xt_match ebt_among_mt_reg __read_mostly = { + .name = "among", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_among_mt, + .checkentry = ebt_among_mt_check, + .matchsize = -1, /* special case */ .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_among_init(void) { - return ebt_register_match(&filter_among); + return xt_register_match(&ebt_among_mt_reg); } -static void __exit fini(void) +static void __exit ebt_among_fini(void) { - ebt_unregister_match(&filter_among); + xt_unregister_match(&ebt_among_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_among_init); +module_exit(ebt_among_fini); +MODULE_DESCRIPTION("Ebtables: Combined MAC/IP address list matching"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index b94c48cb6e4..cd457b891b2 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -8,79 +8,78 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arp.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arp.h> -static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_arp_info *info = (struct ebt_arp_info *)data; - struct arphdr _arph, *ah; + const struct ebt_arp_info *info = par->matchinfo; + const struct arphdr *ah; + struct arphdr _arph; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != ah->ar_op, EBT_ARP_OPCODE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != ah->ar_hrd, EBT_ARP_HTYPE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != ah->ar_pro, EBT_ARP_PTYPE)) - return EBT_NOMATCH; - - if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { - uint32_t _addr, *ap; - - /* IPv4 addresses are always 4 bytes */ - if (ah->ar_pln != sizeof(uint32_t)) - return EBT_NOMATCH; - if (info->bitmask & EBT_ARP_SRC_IP) { - ap = skb_header_pointer(skb, sizeof(struct arphdr) + - ah->ar_hln, sizeof(_addr), - &_addr); - if (ap == NULL) - return EBT_NOMATCH; - if (FWINV(info->saddr != (*ap & info->smsk), - EBT_ARP_SRC_IP)) - return EBT_NOMATCH; - } - - if (info->bitmask & EBT_ARP_DST_IP) { - ap = skb_header_pointer(skb, sizeof(struct arphdr) + - 2*ah->ar_hln+sizeof(uint32_t), - sizeof(_addr), &_addr); - if (ap == NULL) - return EBT_NOMATCH; - if (FWINV(info->daddr != (*ap & info->dmsk), - EBT_ARP_DST_IP)) - return EBT_NOMATCH; - } + return false; + + if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { + const __be32 *sap, *dap; + __be32 saddr, daddr; + + if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) + return false; + sap = skb_header_pointer(skb, sizeof(struct arphdr) + + ah->ar_hln, sizeof(saddr), + &saddr); + if (sap == NULL) + return false; + dap = skb_header_pointer(skb, sizeof(struct arphdr) + + 2*ah->ar_hln+sizeof(saddr), + sizeof(daddr), &daddr); + if (dap == NULL) + return false; + if (info->bitmask & EBT_ARP_SRC_IP && + FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP)) + return false; + if (info->bitmask & EBT_ARP_DST_IP && + FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP)) + return false; + if (info->bitmask & EBT_ARP_GRAT && + FWINV(*dap != *sap, EBT_ARP_GRAT)) + return false; } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { - unsigned char _mac[ETH_ALEN], *mp; + const unsigned char *mp; + unsigned char _mac[ETH_ALEN]; uint8_t verdict, i; - /* MAC addresses are 6 bytes */ - if (ah->ar_hln != ETH_ALEN) - return EBT_NOMATCH; + if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) + return false; if (info->bitmask & EBT_ARP_SRC_MAC) { mp = skb_header_pointer(skb, sizeof(struct arphdr), sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->smaddr[i]) & info->smmsk[i]; if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_ARP_DST_MAC) { @@ -88,26 +87,24 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in ah->ar_hln + ah->ar_pln, sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->dmaddr[i]) & info->dmmsk[i]; if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_arp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_arp_mt_check(const struct xt_mtchk_param *par) { - struct ebt_arp_info *info = (struct ebt_arp_info *)data; + const struct ebt_arp_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info))) - return -EINVAL; if ((e->ethproto != htons(ETH_P_ARP) && e->ethproto != htons(ETH_P_RARP)) || e->invflags & EBT_IPROTO) @@ -117,24 +114,27 @@ static int ebt_arp_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_match filter_arp = -{ - .name = EBT_ARP_MATCH, - .match = ebt_filter_arp, - .check = ebt_arp_check, +static struct xt_match ebt_arp_mt_reg __read_mostly = { + .name = "arp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_arp_mt, + .checkentry = ebt_arp_mt_check, + .matchsize = sizeof(struct ebt_arp_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_arp_init(void) { - return ebt_register_match(&filter_arp); + return xt_register_match(&ebt_arp_mt_reg); } -static void __exit fini(void) +static void __exit ebt_arp_fini(void) { - ebt_unregister_match(&filter_arp); + xt_unregister_match(&ebt_arp_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_arp_init); +module_exit(ebt_arp_fini); +MODULE_DESCRIPTION("Ebtables: ARP protocol packet match"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index b934de90f7c..070cf134a22 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -8,22 +8,23 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arpreply.h> #include <linux/if_arp.h> #include <net/arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arpreply.h> -static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; - u32 _sip, *siptr, _dip, *diptr; - struct arphdr _ah, *ap; - unsigned char _sha[ETH_ALEN], *shp; - struct sk_buff *skb = *pskb; + const struct ebt_arpreply_info *info = par->targinfo; + const __be32 *siptr, *diptr; + __be32 _sip, _dip; + const struct arphdr *ap; + struct arphdr _ah; + const unsigned char *shp; + unsigned char _sha[ETH_ALEN]; ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) @@ -50,48 +51,48 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in, - *diptr, shp, info->mac, shp); + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, + *diptr, shp, info->mac, shp); return info->target; } -static int ebt_target_reply_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par) { - struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; + const struct ebt_arpreply_info *info = par->targinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) return -EINVAL; if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) - return -EINVAL; return 0; } -static struct ebt_target reply_target = -{ - .name = EBT_ARPREPLY_TARGET, - .target = ebt_target_reply, - .check = ebt_target_reply_check, +static struct xt_target ebt_arpreply_tg_reg __read_mostly = { + .name = "arpreply", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING), + .target = ebt_arpreply_tg, + .checkentry = ebt_arpreply_tg_check, + .targetsize = sizeof(struct ebt_arpreply_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_arpreply_init(void) { - return ebt_register_target(&reply_target); + return xt_register_target(&ebt_arpreply_tg_reg); } -static void __exit fini(void) +static void __exit ebt_arpreply_fini(void) { - ebt_unregister_target(&reply_target); + xt_unregister_target(&ebt_arpreply_tg_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_arpreply_init); +module_exit(ebt_arpreply_fini); +MODULE_DESCRIPTION("Ebtables: ARP reply target"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index f5463086c7b..4e0b0c35932 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -7,70 +7,68 @@ * June, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> -static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ebt_nat_info *info = (struct ebt_nat_info *)data; + const struct ebt_nat_info *info = par->targinfo; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (!skb_make_writable(skb, 0)) + return EBT_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN); + ether_addr_copy(eth_hdr(skb)->h_dest, info->mac); return info->target; } -static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_dnat_tg_check(const struct xt_tgchk_param *par) { - struct ebt_nat_info *info = (struct ebt_nat_info *)data; + const struct ebt_nat_info *info = par->targinfo; + unsigned int hook_mask; if (BASE_CHAIN && info->target == EBT_RETURN) return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || - (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) - return -EINVAL; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + (hook_mask & ~((1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT)))) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) return -EINVAL; if (INVALID_TARGET) return -EINVAL; return 0; } -static struct ebt_target dnat = -{ - .name = EBT_DNAT_TARGET, - .target = ebt_target_dnat, - .check = ebt_target_dnat_check, +static struct xt_target ebt_dnat_tg_reg __read_mostly = { + .name = "dnat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING), + .target = ebt_dnat_tg, + .checkentry = ebt_dnat_tg_check, + .targetsize = sizeof(struct ebt_nat_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_dnat_init(void) { - return ebt_register_target(&dnat); + return xt_register_target(&ebt_dnat_tg_reg); } -static void __exit fini(void) +static void __exit ebt_dnat_fini(void) { - ebt_unregister_target(&dnat); + xt_unregister_target(&ebt_dnat_tg_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_dnat_init); +module_exit(ebt_dnat_fini); +MODULE_DESCRIPTION("Ebtables: Destination MAC address translation"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index dc5d0b2427c..23bca62d58d 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -11,78 +11,77 @@ * Innominate Security Technologies AG <mhopf@innominate.com> * September, 2002 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_ip.h> #include <linux/ip.h> #include <net/ip.h> #include <linux/in.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip.h> struct tcpudphdr { - uint16_t src; - uint16_t dst; + __be16 src; + __be16 dst; }; -static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_ip_info *info = (struct ebt_ip_info *)data; - struct iphdr _iph, *ih; - struct tcpudphdr _ports, *pptr; + const struct ebt_ip_info *info = par->matchinfo; + const struct iphdr *ih; + struct iphdr _iph; + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_TOS && FWINV(info->tos != ih->tos, EBT_IP_TOS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_SOURCE && FWINV((ih->saddr & info->smsk) != info->saddr, EBT_IP_SOURCE)) - return EBT_NOMATCH; + return false; if ((info->bitmask & EBT_IP_DEST) && FWINV((ih->daddr & info->dmsk) != info->daddr, EBT_IP_DEST)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_PROTO) { if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) - return EBT_MATCH; + return true; if (ntohs(ih->frag_off) & IP_OFFSET) - return EBT_NOMATCH; + return false; pptr = skb_header_pointer(skb, ih->ihl*4, sizeof(_ports), &_ports); if (pptr == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_DPORT) { u32 dst = ntohs(pptr->dst); if (FWINV(dst < info->dport[0] || - dst > info->dport[1], - EBT_IP_DPORT)) - return EBT_NOMATCH; + dst > info->dport[1], + EBT_IP_DPORT)) + return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); if (FWINV(src < info->sport[0] || - src > info->sport[1], - EBT_IP_SPORT)) - return EBT_NOMATCH; + src > info->sport[1], + EBT_IP_SPORT)) + return false; } } - return EBT_MATCH; + return true; } -static int ebt_ip_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_ip_mt_check(const struct xt_mtchk_param *par) { - struct ebt_ip_info *info = (struct ebt_ip_info *)data; + const struct ebt_ip_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info))) - return -EINVAL; if (e->ethproto != htons(ETH_P_IP) || e->invflags & EBT_IPROTO) return -EINVAL; @@ -93,6 +92,7 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask, return -EINVAL; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) return -EINVAL; @@ -104,24 +104,27 @@ static int ebt_ip_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_match filter_ip = -{ - .name = EBT_IP_MATCH, - .match = ebt_filter_ip, - .check = ebt_ip_check, +static struct xt_match ebt_ip_mt_reg __read_mostly = { + .name = "ip", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip_mt, + .checkentry = ebt_ip_mt_check, + .matchsize = sizeof(struct ebt_ip_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_ip_init(void) { - return ebt_register_match(&filter_ip); + return xt_register_match(&ebt_ip_mt_reg); } -static void __exit fini(void) +static void __exit ebt_ip_fini(void) { - ebt_unregister_match(&filter_ip); + xt_unregister_match(&ebt_ip_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_ip_init); +module_exit(ebt_ip_fini); +MODULE_DESCRIPTION("Ebtables: IPv4 protocol packet match"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c new file mode 100644 index 00000000000..17fd5f2cb4b --- /dev/null +++ b/net/bridge/netfilter/ebt_ip6.c @@ -0,0 +1,158 @@ +/* + * ebt_ip6 + * + * Authors: + * Manohar Castelino <manohar.r.castelino@intel.com> + * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> + * Jan Engelhardt <jengelh@medozas.de> + * + * Summary: + * This is just a modification of the IPv4 code written by + * Bart De Schuymer <bdschuym@pandora.be> + * with the changes required to support IPv6 + * + * Jan, 2008 + */ +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in.h> +#include <linux/module.h> +#include <net/dsfield.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip6.h> + +union pkthdr { + struct { + __be16 src; + __be16 dst; + } tcpudphdr; + struct { + u8 type; + u8 code; + } icmphdr; +}; + +static bool +ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct ebt_ip6_info *info = par->matchinfo; + const struct ipv6hdr *ih6; + struct ipv6hdr _ip6h; + const union pkthdr *pptr; + union pkthdr _pkthdr; + + ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); + if (ih6 == NULL) + return false; + if (info->bitmask & EBT_IP6_TCLASS && + FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) + return false; + if ((info->bitmask & EBT_IP6_SOURCE && + FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr), EBT_IP6_SOURCE)) || + (info->bitmask & EBT_IP6_DEST && + FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, + &info->daddr), EBT_IP6_DEST))) + return false; + if (info->bitmask & EBT_IP6_PROTO) { + uint8_t nexthdr = ih6->nexthdr; + __be16 frag_off; + int offset_ph; + + offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off); + if (offset_ph == -1) + return false; + if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) + return false; + if (!(info->bitmask & ( EBT_IP6_DPORT | + EBT_IP6_SPORT | EBT_IP6_ICMP6))) + return true; + + /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ + pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), + &_pkthdr); + if (pptr == NULL) + return false; + if (info->bitmask & EBT_IP6_DPORT) { + u16 dst = ntohs(pptr->tcpudphdr.dst); + if (FWINV(dst < info->dport[0] || + dst > info->dport[1], EBT_IP6_DPORT)) + return false; + } + if (info->bitmask & EBT_IP6_SPORT) { + u16 src = ntohs(pptr->tcpudphdr.src); + if (FWINV(src < info->sport[0] || + src > info->sport[1], EBT_IP6_SPORT)) + return false; + } + if ((info->bitmask & EBT_IP6_ICMP6) && + FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || + pptr->icmphdr.type > info->icmpv6_type[1] || + pptr->icmphdr.code < info->icmpv6_code[0] || + pptr->icmphdr.code > info->icmpv6_code[1], + EBT_IP6_ICMP6)) + return false; + } + return true; +} + +static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) +{ + const struct ebt_entry *e = par->entryinfo; + struct ebt_ip6_info *info = par->matchinfo; + + if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) + return -EINVAL; + if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { + if (info->invflags & EBT_IP6_PROTO) + return -EINVAL; + if (info->protocol != IPPROTO_TCP && + info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && + info->protocol != IPPROTO_SCTP && + info->protocol != IPPROTO_DCCP) + return -EINVAL; + } + if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP6_ICMP6) { + if ((info->invflags & EBT_IP6_PROTO) || + info->protocol != IPPROTO_ICMPV6) + return -EINVAL; + if (info->icmpv6_type[0] > info->icmpv6_type[1] || + info->icmpv6_code[0] > info->icmpv6_code[1]) + return -EINVAL; + } + return 0; +} + +static struct xt_match ebt_ip6_mt_reg __read_mostly = { + .name = "ip6", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip6_mt, + .checkentry = ebt_ip6_mt_check, + .matchsize = sizeof(struct ebt_ip6_info), + .me = THIS_MODULE, +}; + +static int __init ebt_ip6_init(void) +{ + return xt_register_match(&ebt_ip6_mt_reg); +} + +static void __exit ebt_ip6_fini(void) +{ + xt_unregister_match(&ebt_ip6_mt_reg); +} + +module_init(ebt_ip6_init); +module_exit(ebt_ip6_fini); +MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); +MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 637c8844cd5..517e78befcb 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -10,13 +10,13 @@ * September, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_limit.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> - #include <linux/netdevice.h> #include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_limit.h> static DEFINE_SPINLOCK(limit_lock); @@ -31,11 +31,10 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) -static int ebt_limit_match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static bool +ebt_limit_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_limit_info *info = (struct ebt_limit_info *)data; + struct ebt_limit_info *info = (void *)par->matchinfo; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -47,11 +46,11 @@ static int ebt_limit_match(const struct sk_buff *skb, /* We're not limited. */ info->credit -= info->cost; spin_unlock_bh(&limit_lock); - return EBT_MATCH; + return true; } spin_unlock_bh(&limit_lock); - return EBT_NOMATCH; + return false; } /* Precision saver. */ @@ -66,18 +65,14 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; } -static int ebt_limit_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_limit_mt_check(const struct xt_mtchk_param *par) { - struct ebt_limit_info *info = (struct ebt_limit_info *)data; - - if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info))) - return -EINVAL; + struct ebt_limit_info *info = par->matchinfo; /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { - printk("Overflow in ebt_limit, try lower: %u/%u\n", + pr_info("overflow, try lower: %u/%u\n", info->avg, info->burst); return -EINVAL; } @@ -90,24 +85,43 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_match ebt_limit_reg = -{ - .name = EBT_LIMIT_MATCH, - .match = ebt_limit_match, - .check = ebt_limit_check, + +#ifdef CONFIG_COMPAT +/* + * no conversion function needed -- + * only avg/burst have meaningful values in userspace. + */ +struct ebt_compat_limit_info { + compat_uint_t avg, burst; + compat_ulong_t prev; + compat_uint_t credit, credit_cap, cost; +}; +#endif + +static struct xt_match ebt_limit_mt_reg __read_mostly = { + .name = "limit", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_limit_mt, + .checkentry = ebt_limit_mt_check, + .matchsize = sizeof(struct ebt_limit_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct ebt_compat_limit_info), +#endif .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_limit_init(void) { - return ebt_register_match(&ebt_limit_reg); + return xt_register_match(&ebt_limit_mt_reg); } -static void __exit fini(void) +static void __exit ebt_limit_fini(void) { - ebt_unregister_match(&ebt_limit_reg); + xt_unregister_match(&ebt_limit_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_limit_init); +module_exit(ebt_limit_fini); +MODULE_DESCRIPTION("Ebtables: Rate-limit match"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0128fbbe232..5322a36867a 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -8,26 +8,26 @@ * April, 2002 * */ - -#include <linux/in.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_log.h> -#include <linux/netfilter.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/in.h> #include <linux/if_arp.h> #include <linux/spinlock.h> +#include <net/netfilter/nf_log.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in6.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_log.h> +#include <linux/netfilter.h> static DEFINE_SPINLOCK(ebt_log_lock); -static int ebt_log_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_log_tg_check(const struct xt_tgchk_param *par) { - struct ebt_log_info *info = (struct ebt_log_info *)data; + struct ebt_log_info *info = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info))) - return -EINVAL; if (info->bitmask & ~EBT_LOG_MASK) return -EINVAL; if (info->loglevel >= 8) @@ -38,8 +38,8 @@ static int ebt_log_check(const char *tablename, unsigned int hookmask, struct tcpudphdr { - uint16_t src; - uint16_t dst; + __be16 src; + __be16 dst; }; struct arppayload @@ -50,32 +50,45 @@ struct arppayload unsigned char ip_dst[4]; }; -static void print_MAC(unsigned char *p) +static void +print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) { - int i; + if (protocol == IPPROTO_TCP || + protocol == IPPROTO_UDP || + protocol == IPPROTO_UDPLITE || + protocol == IPPROTO_SCTP || + protocol == IPPROTO_DCCP) { + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; - for (i = 0; i < ETH_ALEN; i++, p++) - printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':'); + pptr = skb_header_pointer(skb, offset, + sizeof(_ports), &_ports); + if (pptr == NULL) { + printk(" INCOMPLETE TCP/UDP header"); + return; + } + printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); + } } -#define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void -ebt_log_packet(unsigned int pf, unsigned int hooknum, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct nf_loginfo *loginfo, - const char *prefix) +ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, + const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const struct nf_loginfo *loginfo, + const char *prefix) { unsigned int bitmask; - spin_lock_bh(&ebt_log_lock); - printk("<%c>%s IN=%s OUT=%s MAC source = ", '0' + loginfo->u.log.level, - prefix, in ? in->name : "", out ? out->name : ""); - - print_MAC(eth_hdr(skb)->h_source); - printk("MAC dest = "); - print_MAC(eth_hdr(skb)->h_dest); + /* FIXME: Disabled from containers until syslog ns is supported */ + if (!net_eq(net, &init_net)) + return; - printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto)); + spin_lock_bh(&ebt_log_lock); + printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", + '0' + loginfo->u.log.level, prefix, + in ? in->name : "", out ? out->name : "", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); if (loginfo->type == NF_LOG_TYPE_LOG) bitmask = loginfo->u.log.logflags; @@ -83,39 +96,51 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, bitmask = NF_LOG_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == - htons(ETH_P_IP)){ - struct iphdr _iph, *ih; + htons(ETH_P_IP)) { + const struct iphdr *ih; + struct iphdr _iph; ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) { printk(" INCOMPLETE IP header"); goto out; } - printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP " - "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr), ih->tos, ih->protocol); - if (ih->protocol == IPPROTO_TCP || - ih->protocol == IPPROTO_UDP || - ih->protocol == IPPROTO_SCTP || - ih->protocol == IPPROTO_DCCP) { - struct tcpudphdr _ports, *pptr; - - pptr = skb_header_pointer(skb, ih->ihl*4, - sizeof(_ports), &_ports); - if (pptr == NULL) { - printk(" INCOMPLETE TCP/UDP header"); - goto out; - } - printk(" SPT=%u DPT=%u", ntohs(pptr->src), - ntohs(pptr->dst)); + printk(" IP SRC=%pI4 IP DST=%pI4, IP tos=0x%02X, IP proto=%d", + &ih->saddr, &ih->daddr, ih->tos, ih->protocol); + print_ports(skb, ih->protocol, ih->ihl*4); + goto out; + } + +#if IS_ENABLED(CONFIG_BRIDGE_EBT_IP6) + if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto == + htons(ETH_P_IPV6)) { + const struct ipv6hdr *ih; + struct ipv6hdr _iph; + uint8_t nexthdr; + __be16 frag_off; + int offset_ph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { + printk(" INCOMPLETE IPv6 header"); + goto out; } + printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", + &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); + nexthdr = ih->nexthdr; + offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr, &frag_off); + if (offset_ph == -1) + goto out; + print_ports(skb, nexthdr, offset_ph); goto out; } +#endif if ((bitmask & EBT_LOG_ARP) && ((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) || (eth_hdr(skb)->h_proto == htons(ETH_P_RARP)))) { - struct arphdr _arph, *ah; + const struct arphdr *ah; + struct arphdr _arph; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) { @@ -130,8 +155,9 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, * then log the ARP payload */ if (ah->ar_hrd == htons(1) && ah->ar_hln == ETH_ALEN && - ah->ar_pln == sizeof(uint32_t)) { - struct arppayload _arpp, *ap; + ah->ar_pln == sizeof(__be32)) { + const struct arppayload *ap; + struct arppayload _arpp; ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp); @@ -139,14 +165,8 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, printk(" INCOMPLETE ARP payload"); goto out; } - printk(" ARP MAC SRC="); - print_MAC(ap->mac_src); - printk(" ARP IP SRC=%u.%u.%u.%u", - myNIPQUAD(ap->ip_src)); - printk(" ARP MAC DST="); - print_MAC(ap->mac_dst); - printk(" ARP IP DST=%u.%u.%u.%u", - myNIPQUAD(ap->ip_dst)); + printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4", + ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); } } out: @@ -155,57 +175,88 @@ out: } -static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ebt_log_info *info = (struct ebt_log_info *)data; + const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; li.u.log.logflags = info->bitmask; - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, info->prefix); + if (info->bitmask & EBT_LOG_NFLOG) + nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, + par->in, par->out, &li, "%s", info->prefix); + else + ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, info->prefix); + return EBT_CONTINUE; } -static struct ebt_watcher log = -{ - .name = EBT_LOG_WATCHER, - .watcher = ebt_log, - .check = ebt_log_check, +static struct xt_target ebt_log_tg_reg __read_mostly = { + .name = "log", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_log_tg, + .checkentry = ebt_log_tg_check, + .targetsize = sizeof(struct ebt_log_info), .me = THIS_MODULE, }; -static struct nf_logger ebt_log_logger = { +static struct nf_logger ebt_log_logger __read_mostly = { .name = "ebt_log", .logfn = &ebt_log_packet, .me = THIS_MODULE, }; -static int __init init(void) +static int __net_init ebt_log_net_init(struct net *net) +{ + nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger); + return 0; +} + +static void __net_exit ebt_log_net_fini(struct net *net) +{ + nf_log_unset(net, &ebt_log_logger); +} + +static struct pernet_operations ebt_log_net_ops = { + .init = ebt_log_net_init, + .exit = ebt_log_net_fini, +}; + +static int __init ebt_log_init(void) { int ret; - ret = ebt_register_watcher(&log); + ret = register_pernet_subsys(&ebt_log_net_ops); if (ret < 0) - return ret; - if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) { - printk(KERN_WARNING "ebt_log: not logging via system console " - "since somebody else already registered for PF_INET\n"); - /* we cannot make module load fail here, since otherwise - * ebtables userspace would abort */ - } + goto err_pernet; - return 0; + ret = xt_register_target(&ebt_log_tg_reg); + if (ret < 0) + goto err_target; + + nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); + + return ret; + +err_target: + unregister_pernet_subsys(&ebt_log_net_ops); +err_pernet: + return ret; } -static void __exit fini(void) +static void __exit ebt_log_fini(void) { - nf_log_unregister_logger(&ebt_log_logger); - ebt_unregister_watcher(&log); + unregister_pernet_subsys(&ebt_log_net_ops); + nf_log_unregister(&ebt_log_logger); + xt_unregister_target(&ebt_log_tg_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_log_init); +module_exit(ebt_log_fini); +MODULE_DESCRIPTION("Ebtables: Packet logging to syslog"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index c93d35ab95c..66697cbd0a8 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -13,55 +13,98 @@ * Marking a frame doesn't really change anything in the frame anyway. */ +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_t.h> -#include <linux/module.h> -static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_mark_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + const struct ebt_mark_t_info *info = par->targinfo; + int action = info->target & -16; - if ((*pskb)->nfmark != info->mark) - (*pskb)->nfmark = info->mark; + if (action == MARK_SET_VALUE) + skb->mark = info->mark; + else if (action == MARK_OR_VALUE) + skb->mark |= info->mark; + else if (action == MARK_AND_VALUE) + skb->mark &= info->mark; + else + skb->mark ^= info->mark; - return info->target; + return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_mark_tg_check(const struct xt_tgchk_param *par) { - struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; + const struct ebt_mark_t_info *info = par->targinfo; + int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) + tmp = info->target | ~EBT_VERDICT_BITS; + if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; - if (BASE_CHAIN && info->target == EBT_RETURN) + if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (INVALID_TARGET) + tmp = info->target & ~EBT_VERDICT_BITS; + if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && + tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) return -EINVAL; return 0; } +#ifdef CONFIG_COMPAT +struct compat_ebt_mark_t_info { + compat_ulong_t mark; + compat_uint_t target; +}; -static struct ebt_target mark_target = +static void mark_tg_compat_from_user(void *dst, const void *src) { - .name = EBT_MARK_TARGET, - .target = ebt_target_mark, - .check = ebt_target_mark_check, + const struct compat_ebt_mark_t_info *user = src; + struct ebt_mark_t_info *kern = dst; + + kern->mark = user->mark; + kern->target = user->target; +} + +static int mark_tg_compat_to_user(void __user *dst, const void *src) +{ + struct compat_ebt_mark_t_info __user *user = dst; + const struct ebt_mark_t_info *kern = src; + + if (put_user(kern->mark, &user->mark) || + put_user(kern->target, &user->target)) + return -EFAULT; + return 0; +} +#endif + +static struct xt_target ebt_mark_tg_reg __read_mostly = { + .name = "mark", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_mark_tg, + .checkentry = ebt_mark_tg_check, + .targetsize = sizeof(struct ebt_mark_t_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ebt_mark_t_info), + .compat_from_user = mark_tg_compat_from_user, + .compat_to_user = mark_tg_compat_to_user, +#endif .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_mark_init(void) { - return ebt_register_target(&mark_target); + return xt_register_target(&ebt_mark_tg_reg); } -static void __exit fini(void) +static void __exit ebt_mark_fini(void) { - ebt_unregister_target(&mark_target); + xt_unregister_target(&ebt_mark_tg_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_mark_init); +module_exit(ebt_mark_fini); +MODULE_DESCRIPTION("Ebtables: Packet mark modification"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c index 625102de149..d98baefc4c7 100644 --- a/net/bridge/netfilter/ebt_mark_m.c +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -7,29 +7,25 @@ * July, 2002 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_m.h> -#include <linux/module.h> -static int ebt_filter_mark(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_mark_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; + const struct ebt_mark_m_info *info = par->matchinfo; if (info->bitmask & EBT_MARK_OR) - return !(!!(skb->nfmark & info->mask) ^ info->invert); - return !(((skb->nfmark & info->mask) == info->mark) ^ info->invert); + return !!(skb->mark & info->mask) ^ info->invert; + return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static int ebt_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_mark_mt_check(const struct xt_mtchk_param *par) { - struct ebt_mark_m_info *info = (struct ebt_mark_m_info *) data; + const struct ebt_mark_m_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info))) - return -EINVAL; if (info->bitmask & ~EBT_MARK_MASK) return -EINVAL; if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) @@ -39,24 +35,64 @@ static int ebt_mark_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_match filter_mark = + +#ifdef CONFIG_COMPAT +struct compat_ebt_mark_m_info { + compat_ulong_t mark, mask; + uint8_t invert, bitmask; +}; + +static void mark_mt_compat_from_user(void *dst, const void *src) { - .name = EBT_MARK_MATCH, - .match = ebt_filter_mark, - .check = ebt_mark_check, + const struct compat_ebt_mark_m_info *user = src; + struct ebt_mark_m_info *kern = dst; + + kern->mark = user->mark; + kern->mask = user->mask; + kern->invert = user->invert; + kern->bitmask = user->bitmask; +} + +static int mark_mt_compat_to_user(void __user *dst, const void *src) +{ + struct compat_ebt_mark_m_info __user *user = dst; + const struct ebt_mark_m_info *kern = src; + + if (put_user(kern->mark, &user->mark) || + put_user(kern->mask, &user->mask) || + put_user(kern->invert, &user->invert) || + put_user(kern->bitmask, &user->bitmask)) + return -EFAULT; + return 0; +} +#endif + +static struct xt_match ebt_mark_mt_reg __read_mostly = { + .name = "mark_m", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_mark_mt, + .checkentry = ebt_mark_mt_check, + .matchsize = sizeof(struct ebt_mark_m_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ebt_mark_m_info), + .compat_from_user = mark_mt_compat_from_user, + .compat_to_user = mark_mt_compat_to_user, +#endif .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_mark_m_init(void) { - return ebt_register_match(&filter_mark); + return xt_register_match(&ebt_mark_mt_reg); } -static void __exit fini(void) +static void __exit ebt_mark_m_fini(void) { - ebt_unregister_match(&filter_mark); + xt_unregister_match(&ebt_mark_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_mark_m_init); +module_exit(ebt_mark_m_fini); +MODULE_DESCRIPTION("Ebtables: Packet mark match"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c new file mode 100644 index 00000000000..59ac7952010 --- /dev/null +++ b/net/bridge/netfilter/ebt_nflog.c @@ -0,0 +1,73 @@ +/* + * ebt_nflog + * + * Author: + * Peter Warasin <peter@endian.com> + * + * February, 2008 + * + * Based on: + * xt_NFLOG.c, (C) 2006 by Patrick McHardy <kaber@trash.net> + * ebt_ulog.c, (C) 2004 by Bart De Schuymer <bdschuym@pandora.be> + * + */ + +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nflog.h> +#include <net/netfilter/nf_log.h> + +static unsigned int +ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ebt_nflog_info *info = par->targinfo; + struct nf_loginfo li; + struct net *net = dev_net(par->in ? par->in : par->out); + + li.type = NF_LOG_TYPE_ULOG; + li.u.ulog.copy_len = info->len; + li.u.ulog.group = info->group; + li.u.ulog.qthreshold = info->threshold; + + nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); + return EBT_CONTINUE; +} + +static int ebt_nflog_tg_check(const struct xt_tgchk_param *par) +{ + struct ebt_nflog_info *info = par->targinfo; + + if (info->flags & ~EBT_NFLOG_MASK) + return -EINVAL; + info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; + return 0; +} + +static struct xt_target ebt_nflog_tg_reg __read_mostly = { + .name = "nflog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_nflog_tg, + .checkentry = ebt_nflog_tg_check, + .targetsize = sizeof(struct ebt_nflog_info), + .me = THIS_MODULE, +}; + +static int __init ebt_nflog_init(void) +{ + return xt_register_target(&ebt_nflog_tg_reg); +} + +static void __exit ebt_nflog_fini(void) +{ + xt_unregister_target(&ebt_nflog_tg_reg); +} + +module_init(ebt_nflog_init); +module_exit(ebt_nflog_fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Peter Warasin <peter@endian.com>"); +MODULE_DESCRIPTION("ebtables NFLOG netfilter logging module"); diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c index ecd3b42b19b..496a5651530 100644 --- a/net/bridge/netfilter/ebt_pkttype.c +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -7,53 +7,50 @@ * April, 2003 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_pkttype.h> -#include <linux/module.h> -static int ebt_filter_pkttype(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, - unsigned int datalen) +static bool +ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; + const struct ebt_pkttype_info *info = par->matchinfo; - return (skb->pkt_type != info->pkt_type) ^ info->invert; + return (skb->pkt_type == info->pkt_type) ^ info->invert; } -static int ebt_pkttype_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par) { - struct ebt_pkttype_info *info = (struct ebt_pkttype_info *)data; + const struct ebt_pkttype_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info))) - return -EINVAL; if (info->invert != 0 && info->invert != 1) return -EINVAL; /* Allow any pkt_type value */ return 0; } -static struct ebt_match filter_pkttype = -{ - .name = EBT_PKTTYPE_MATCH, - .match = ebt_filter_pkttype, - .check = ebt_pkttype_check, +static struct xt_match ebt_pkttype_mt_reg __read_mostly = { + .name = "pkttype", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_pkttype_mt, + .checkentry = ebt_pkttype_mt_check, + .matchsize = sizeof(struct ebt_pkttype_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_pkttype_init(void) { - return ebt_register_match(&filter_pkttype); + return xt_register_match(&ebt_pkttype_mt_reg); } -static void __exit fini(void) +static void __exit ebt_pkttype_fini(void) { - ebt_unregister_match(&filter_pkttype); + xt_unregister_match(&ebt_pkttype_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_pkttype_init); +module_exit(ebt_pkttype_fini); +MODULE_DESCRIPTION("Ebtables: Link layer packet type match"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 1538b438666..203964997a5 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -7,75 +7,74 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_redirect.h> #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_redirect.h> -static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; + const struct ebt_redirect_info *info = par->targinfo; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (!skb_make_writable(skb, 0)) + return EBT_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - if (hooknr != NF_BR_BROUTING) - memcpy(eth_hdr(*pskb)->h_dest, - in->br_port->br->dev->dev_addr, ETH_ALEN); + if (par->hooknum != NF_BR_BROUTING) + /* rcu_read_lock()ed by nf_hook_slow */ + ether_addr_copy(eth_hdr(skb)->h_dest, + br_port_get_rcu(par->in)->br->dev->dev_addr); else - memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN); - (*pskb)->pkt_type = PACKET_HOST; + ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr); + skb->pkt_type = PACKET_HOST; return info->target; } -static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_redirect_tg_check(const struct xt_tgchk_param *par) { - struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; + const struct ebt_redirect_info *info = par->targinfo; + unsigned int hook_mask; - if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + hook_mask & ~(1 << NF_BR_PRE_ROUTING)) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) return -EINVAL; if (INVALID_TARGET) return -EINVAL; return 0; } -static struct ebt_target redirect_target = -{ - .name = EBT_REDIRECT_TARGET, - .target = ebt_target_redirect, - .check = ebt_target_redirect_check, +static struct xt_target ebt_redirect_tg_reg __read_mostly = { + .name = "redirect", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_BROUTING), + .target = ebt_redirect_tg, + .checkentry = ebt_redirect_tg_check, + .targetsize = sizeof(struct ebt_redirect_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_redirect_init(void) { - return ebt_register_target(&redirect_target); + return xt_register_target(&ebt_redirect_tg_reg); } -static void __exit fini(void) +static void __exit ebt_redirect_fini(void) { - ebt_unregister_target(&redirect_target); + xt_unregister_target(&ebt_redirect_tg_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_redirect_init); +module_exit(ebt_redirect_fini); +MODULE_DESCRIPTION("Ebtables: Packet redirection to localhost"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 1529bdcb9a4..e56ccd060d2 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -7,70 +7,81 @@ * June, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> +#include <linux/if_arp.h> +#include <net/arp.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> -static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ebt_nat_info *info = (struct ebt_nat_info *) data; + const struct ebt_nat_info *info = par->targinfo; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (!skb_make_writable(skb, 0)) + return EBT_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; + ether_addr_copy(eth_hdr(skb)->h_source, info->mac); + if (!(info->target & NAT_ARP_BIT) && + eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { + const struct arphdr *ap; + struct arphdr _ah; + + ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); + if (ap == NULL) + return EBT_DROP; + if (ap->ar_hln != ETH_ALEN) + goto out; + if (skb_store_bits(skb, sizeof(_ah), info->mac, ETH_ALEN)) + return EBT_DROP; } - memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN); - return info->target; +out: + return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_snat_tg_check(const struct xt_tgchk_param *par) { - struct ebt_nat_info *info = (struct ebt_nat_info *) data; + const struct ebt_nat_info *info = par->targinfo; + int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) - return -EINVAL; - if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat")) + tmp = info->target | ~EBT_VERDICT_BITS; + if (BASE_CHAIN && tmp == EBT_RETURN) return -EINVAL; - if (hookmask & ~(1 << NF_BR_POST_ROUTING)) + + if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) return -EINVAL; - if (INVALID_TARGET) + tmp = info->target | EBT_VERDICT_BITS; + if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) return -EINVAL; return 0; } -static struct ebt_target snat = -{ - .name = EBT_SNAT_TARGET, - .target = ebt_target_snat, - .check = ebt_target_snat_check, +static struct xt_target ebt_snat_tg_reg __read_mostly = { + .name = "snat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING), + .target = ebt_snat_tg, + .checkentry = ebt_snat_tg_check, + .targetsize = sizeof(struct ebt_nat_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_snat_init(void) { - return ebt_register_target(&snat); + return xt_register_target(&ebt_snat_tg_reg); } -static void __exit fini(void) +static void __exit ebt_snat_fini(void) { - ebt_unregister_target(&snat); + xt_unregister_target(&ebt_snat_tg_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_snat_init); +module_exit(ebt_snat_fini); +MODULE_DESCRIPTION("Ebtables: Source MAC address translation"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 0248c67277e..071d87214dd 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -7,11 +7,11 @@ * * July, 2003 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_stp.h> #include <linux/etherdevice.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_stp.h> #define BPDU_TYPE_CONFIG 0 #define BPDU_TYPE_TCN 0x80 @@ -40,10 +40,10 @@ struct stp_config_pdu { #define NR16(p) (p[0] << 8 | p[1]) #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) -static int ebt_filter_config(struct ebt_stp_info *info, - struct stp_config_pdu *stpc) +static bool ebt_filter_config(const struct ebt_stp_info *info, + const struct stp_config_pdu *stpc) { - struct ebt_stp_config_info *c; + const struct ebt_stp_config_info *c; uint16_t v16; uint32_t v32; int verdict, i; @@ -51,145 +51,147 @@ static int ebt_filter_config(struct ebt_stp_info *info, c = &info->config; if ((info->bitmask & EBT_STP_FLAGS) && FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_STP_ROOTPRIO) { v16 = NR16(stpc->root); if (FWINV(v16 < c->root_priol || v16 > c->root_priou, EBT_STP_ROOTPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTADDR) { verdict = 0; for (i = 0; i < 6; i++) verdict |= (stpc->root[2+i] ^ c->root_addr[i]) & - c->root_addrmsk[i]; + c->root_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_ROOTADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTCOST) { v32 = NR32(stpc->root_cost); if (FWINV(v32 < c->root_costl || v32 > c->root_costu, EBT_STP_ROOTCOST)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERPRIO) { v16 = NR16(stpc->sender); if (FWINV(v16 < c->sender_priol || v16 > c->sender_priou, EBT_STP_SENDERPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERADDR) { verdict = 0; for (i = 0; i < 6; i++) verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) & - c->sender_addrmsk[i]; + c->sender_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_SENDERADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_PORT) { v16 = NR16(stpc->port); if (FWINV(v16 < c->portl || v16 > c->portu, EBT_STP_PORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MSGAGE) { v16 = NR16(stpc->msg_age); if (FWINV(v16 < c->msg_agel || v16 > c->msg_ageu, EBT_STP_MSGAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MAXAGE) { v16 = NR16(stpc->max_age); if (FWINV(v16 < c->max_agel || v16 > c->max_ageu, EBT_STP_MAXAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_HELLOTIME) { v16 = NR16(stpc->hello_time); if (FWINV(v16 < c->hello_timel || v16 > c->hello_timeu, EBT_STP_HELLOTIME)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_FWDD) { v16 = NR16(stpc->forward_delay); if (FWINV(v16 < c->forward_delayl || v16 > c->forward_delayu, EBT_STP_FWDD)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } -static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_stp_info *info = (struct ebt_stp_info *)data; - struct stp_header _stph, *sp; - uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + const struct ebt_stp_info *info = par->matchinfo; + const struct stp_header *sp; + struct stp_header _stph; + const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); if (sp == NULL) - return EBT_NOMATCH; + return false; /* The stp code only considers these */ if (memcmp(sp, header, sizeof(header))) - return EBT_NOMATCH; + return false; - if (info->bitmask & EBT_STP_TYPE - && FWINV(info->type != sp->type, EBT_STP_TYPE)) - return EBT_NOMATCH; + if (info->bitmask & EBT_STP_TYPE && + FWINV(info->type != sp->type, EBT_STP_TYPE)) + return false; if (sp->type == BPDU_TYPE_CONFIG && info->bitmask & EBT_STP_CONFIG_MASK) { - struct stp_config_pdu _stpc, *st; + const struct stp_config_pdu *st; + struct stp_config_pdu _stpc; st = skb_header_pointer(skb, sizeof(_stph), sizeof(_stpc), &_stpc); if (st == NULL) - return EBT_NOMATCH; + return false; return ebt_filter_config(info, st); } - return EBT_MATCH; + return true; } -static int ebt_stp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_stp_mt_check(const struct xt_mtchk_param *par) { - struct ebt_stp_info *info = (struct ebt_stp_info *)data; - int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); - uint8_t bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; - uint8_t msk[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + const struct ebt_stp_info *info = par->matchinfo; + const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; + const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const struct ebt_entry *e = par->entryinfo; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || !(info->bitmask & EBT_STP_MASK)) return -EINVAL; - if (datalen != len) - return -EINVAL; /* Make sure the match only receives stp frames */ - if (compare_ether_addr(e->destmac, bridge_ula) || - compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) + if (!ether_addr_equal(e->destmac, bridge_ula) || + !ether_addr_equal(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) return -EINVAL; return 0; } -static struct ebt_match filter_stp = -{ - .name = EBT_STP_MATCH, - .match = ebt_filter_stp, - .check = ebt_stp_check, +static struct xt_match ebt_stp_mt_reg __read_mostly = { + .name = "stp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_stp_mt, + .checkentry = ebt_stp_mt_check, + .matchsize = sizeof(struct ebt_stp_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_stp_init(void) { - return ebt_register_match(&filter_stp); + return xt_register_match(&ebt_stp_mt_reg); } -static void __exit fini(void) +static void __exit ebt_stp_fini(void) { - ebt_unregister_match(&filter_stp); + xt_unregister_match(&ebt_stp_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_stp_init); +module_exit(ebt_stp_fini); +MODULE_DESCRIPTION("Ebtables: Spanning Tree Protocol packet match"); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index ce617b3dbbb..7c470c371e1 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -10,8 +10,8 @@ * Based on ipt_ULOG.c, which is * (C) 2000-2002 by Harald Welte <laforge@netfilter.org> * - * This module accepts two parameters: - * + * This module accepts two parameters: + * * nlbufsiz: * The parameter specifies how big the buffer for each netlink multicast * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will @@ -27,34 +27,33 @@ * flushed even if it is not full yet. * */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> -#include <linux/config.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/kernel.h> #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netdevice.h> -#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ulog.h> +#include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/sock.h> #include "../br_private.h" -#define PRINTR(format, args...) do { if (net_ratelimit()) \ - printk(format , ## args); } while (0) - -static unsigned int nlbufsiz = 4096; +static unsigned int nlbufsiz = NLMSG_GOODSIZE; module_param(nlbufsiz, uint, 0600); MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) " - "(defaults to 4096)"); + "(defaults to 4096)"); static unsigned int flushtimeout = 10; module_param(flushtimeout, uint, 0600); MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) " - "(defaults to 10)"); + "(defaults to 10)"); typedef struct { unsigned int qlen; /* number of nlmsgs' in the skb */ @@ -64,23 +63,34 @@ typedef struct { spinlock_t lock; /* the per-queue lock */ } ebt_ulog_buff_t; -static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; -static struct sock *ebtulognl; +static int ebt_ulog_net_id __read_mostly; +struct ebt_ulog_net { + unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS]; + ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; + struct sock *ebtulognl; +}; + +static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net) +{ + return net_generic(net, ebt_ulog_net_id); +} /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroup) +static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup) { - ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); + del_timer(&ub->timer); + + if (!ub->skb) + return; /* last nlmsg needs NLMSG_DONE */ if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; NETLINK_CB(ub->skb).dst_group = nlgroup + 1; - netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); + netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; @@ -89,43 +99,53 @@ static void ulog_send(unsigned int nlgroup) /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { - spin_lock_bh(&ulog_buffers[data].lock); - if (ulog_buffers[data].skb) - ulog_send(data); - spin_unlock_bh(&ulog_buffers[data].lock); + struct ebt_ulog_net *ebt = container_of((void *)data, + struct ebt_ulog_net, + nlgroup[*(unsigned int *)data]); + + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data]; + spin_lock_bh(&ub->lock); + if (ub->skb) + ulog_send(ebt, *(unsigned int *)data); + spin_unlock_bh(&ub->lock); } static struct sk_buff *ulog_alloc_skb(unsigned int size) { struct sk_buff *skb; + unsigned int n; - skb = alloc_skb(nlbufsiz, GFP_ATOMIC); + n = max(size, nlbufsiz); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { - PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer " - "of size %ub!\n", nlbufsiz); - if (size < nlbufsiz) { + if (n > size) { /* try to allocate only as much as we need for * current packet */ skb = alloc_skb(size, GFP_ATOMIC); if (!skb) - PRINTR(KERN_ERR "ebt_ulog: can't even allocate " - "buffer of size %ub\n", size); + pr_debug("cannot even allocate buffer of size %ub\n", + size); } } return skb; } -static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const struct ebt_ulog_info *uloginfo, const char *prefix) +static void ebt_ulog_packet(struct net *net, unsigned int hooknr, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct ebt_ulog_info *uloginfo, + const char *prefix) { ebt_ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); unsigned int group = uloginfo->nlgroup; - ebt_ulog_buff_t *ub = &ulog_buffers[group]; + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group]; spinlock_t *lock = &ub->lock; + ktime_t kt; if ((uloginfo->cprange == 0) || (uloginfo->cprange > skb->len + ETH_HLEN)) @@ -133,10 +153,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, else copy_len = uloginfo->cprange; - size = NLMSG_SPACE(sizeof(*pm) + copy_len); + size = nlmsg_total_size(sizeof(*pm) + copy_len); if (size > nlbufsiz) { - PRINTR("ebt_ulog: Size %Zd needed, but nlbufsiz=%d\n", - size, nlbufsiz); + pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); return; } @@ -144,49 +163,54 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (!ub->skb) { if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; + goto unlock; } else if (size > skb_tailroom(ub->skb)) { - ulog_send(group); + ulog_send(ebt, group); if (!(ub->skb = ulog_alloc_skb(size))) - goto alloc_failure; + goto unlock; } - nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, 0, - size - NLMSG_ALIGN(sizeof(*nlh))); + nlh = nlmsg_put(ub->skb, 0, ub->qlen, 0, + size - NLMSG_ALIGN(sizeof(*nlh)), 0); + if (!nlh) { + kfree_skb(ub->skb); + ub->skb = NULL; + goto unlock; + } ub->qlen++; - pm = NLMSG_DATA(nlh); + pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* Fill in the ulog data */ pm->version = EBT_ULOG_VERSION; - do_gettimeofday(&pm->stamp); + kt = ktime_get_real(); + pm->stamp = ktime_to_timeval(kt); if (ub->qlen == 1) - skb_set_timestamp(ub->skb, &pm->stamp); + ub->skb->tstamp = kt; pm->data_len = copy_len; - pm->mark = skb->nfmark; + pm->mark = skb->mark; pm->hook = hooknr; if (uloginfo->prefix != NULL) strcpy(pm->prefix, uloginfo->prefix); - else - *(pm->prefix) = '\0'; if (in) { strcpy(pm->physindev, in->name); /* If in isn't a bridge, then physindev==indev */ - if (in->br_port) - strcpy(pm->indev, in->br_port->br->dev->name); + if (br_port_exists(in)) + /* rcu_read_lock()ed by nf_hook_slow */ + strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); else strcpy(pm->indev, in->name); - } else - pm->indev[0] = pm->physindev[0] = '\0'; + } if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); - strcpy(pm->outdev, out->br_port->br->dev->name); - } else - pm->outdev[0] = pm->physoutdev[0] = '\0'; + /* rcu_read_lock()ed by nf_hook_slow */ + strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); + } if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) BUG(); @@ -197,7 +221,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, ub->lastnlh = nlh; if (ub->qlen >= uloginfo->qthreshold) - ulog_send(group); + ulog_send(ebt, group); else if (!timer_pending(&ub->timer)) { ub->timer.expires = jiffies + flushtimeout * HZ / 100; add_timer(&ub->timer); @@ -205,19 +229,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, unlock: spin_unlock_bh(lock); - - return; - -nlmsg_failure: - printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should " - "not happen, please report to author.\n"); - goto unlock; -alloc_failure: - goto unlock; } /* this function is registered with the netfilter core */ -static void ebt_log_packet(unsigned int pf, unsigned int hooknum, +static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, const char *prefix) @@ -236,26 +251,30 @@ static void ebt_log_packet(unsigned int pf, unsigned int hooknum, strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); } - ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); + ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); } -static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + struct net *net = dev_net(par->in ? par->in : par->out); - ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL); + ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); + return EBT_CONTINUE; } - -static int ebt_ulog_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) { - struct ebt_ulog_info *uloginfo = (struct ebt_ulog_info *)data; + struct ebt_ulog_info *uloginfo = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || - uloginfo->nlgroup > 31) + if (!par->net->xt.ebt_ulog_warn_deprecated) { + pr_info("ebt_ulog is deprecated and it will be removed soon, " + "use ebt_nflog instead\n"); + par->net->xt.ebt_ulog_warn_deprecated = true; + } + + if (uloginfo->nlgroup > 31) return -EINVAL; uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; @@ -266,78 +285,109 @@ static int ebt_ulog_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_watcher ulog = { - .name = EBT_ULOG_WATCHER, - .watcher = ebt_ulog, - .check = ebt_ulog_check, +static struct xt_target ebt_ulog_tg_reg __read_mostly = { + .name = "ulog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_ulog_tg, + .checkentry = ebt_ulog_tg_check, + .targetsize = sizeof(struct ebt_ulog_info), .me = THIS_MODULE, }; -static struct nf_logger ebt_ulog_logger = { - .name = EBT_ULOG_WATCHER, +static struct nf_logger ebt_ulog_logger __read_mostly = { + .name = "ebt_ulog", .logfn = &ebt_log_packet, .me = THIS_MODULE, }; -static int __init init(void) +static int __net_init ebt_ulog_net_init(struct net *net) { - int i, ret = 0; + int i; + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - if (nlbufsiz >= 128*1024) { - printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," - " please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; - } + struct netlink_kernel_cfg cfg = { + .groups = EBT_ULOG_MAXNLGROUPS, + }; /* initialize ulog_buffers */ for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - init_timer(&ulog_buffers[i].timer); - ulog_buffers[i].timer.function = ulog_timer; - ulog_buffers[i].timer.data = i; - spin_lock_init(&ulog_buffers[i].lock); + ebt->nlgroup[i] = i; + setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer, + (unsigned long)&ebt->nlgroup[i]); + spin_lock_init(&ebt->ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, - NULL, THIS_MODULE); - if (!ebtulognl) - ret = -ENOMEM; - else if ((ret = ebt_register_watcher(&ulog))) - sock_release(ebtulognl->sk_socket); - - if (nf_log_register(PF_BRIDGE, &ebt_ulog_logger) < 0) { - printk(KERN_WARNING "ebt_ulog: not logging via ulog " - "since somebody else already registered for PF_BRIDGE\n"); - /* we cannot make module load fail here, since otherwise - * ebtables userspace would abort */ - } + ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); + if (!ebt->ebtulognl) + return -ENOMEM; - return ret; + nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger); + return 0; } -static void __exit fini(void) +static void __net_exit ebt_ulog_net_fini(struct net *net) { - ebt_ulog_buff_t *ub; int i; + struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); - nf_log_unregister_logger(&ebt_ulog_logger); - ebt_unregister_watcher(&ulog); + nf_log_unset(net, &ebt_ulog_logger); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { - ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) - del_timer(&ub->timer); - spin_lock_bh(&ub->lock); + ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i]; + del_timer(&ub->timer); + if (ub->skb) { kfree_skb(ub->skb); ub->skb = NULL; } - spin_unlock_bh(&ub->lock); } - sock_release(ebtulognl->sk_socket); + netlink_kernel_release(ebt->ebtulognl); +} + +static struct pernet_operations ebt_ulog_net_ops = { + .init = ebt_ulog_net_init, + .exit = ebt_ulog_net_fini, + .id = &ebt_ulog_net_id, + .size = sizeof(struct ebt_ulog_net), +}; + +static int __init ebt_ulog_init(void) +{ + int ret; + + if (nlbufsiz >= 128*1024) { + pr_warn("Netlink buffer has to be <= 128kB," + "please try a smaller nlbufsiz parameter.\n"); + return -EINVAL; + } + + ret = register_pernet_subsys(&ebt_ulog_net_ops); + if (ret) + goto out_pernet; + + ret = xt_register_target(&ebt_ulog_tg_reg); + if (ret) + goto out_target; + + nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); + + return 0; + +out_target: + unregister_pernet_subsys(&ebt_ulog_net_ops); +out_pernet: + return ret; +} + +static void __exit ebt_ulog_fini(void) +{ + nf_log_unregister(&ebt_ulog_logger); + xt_unregister_target(&ebt_ulog_tg_reg); + unregister_pernet_subsys(&ebt_ulog_net_ops); } -module_init(init); -module_exit(fini); +module_init(ebt_ulog_init); +module_exit(ebt_ulog_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); -MODULE_DESCRIPTION("ebtables userspace logging module for bridged Ethernet" - " frames"); +MODULE_DESCRIPTION("Ebtables: Packet logging to netlink using ULOG"); diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index db60d734908..8d3f8c7651f 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -14,52 +14,51 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_vlan.h> -static int debug; #define MODULE_VERS "0.6" -module_param(debug, int, 0); -MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages"); MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>"); -MODULE_DESCRIPTION("802.1Q match module (ebtables extension), v" - MODULE_VERS); +MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match"); MODULE_LICENSE("GPL"); - -#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) -#define INV_FLAG(_inv_flag_) (info->invflags & _inv_flag_) ? "!" : "" #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ -#define SET_BITMASK(_BIT_MASK_) info->bitmask |= _BIT_MASK_ -#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;} - -static int -ebt_filter_vlan(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, unsigned int datalen) +#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; } + +static bool +ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; - struct vlan_hdr _frame, *fp; + const struct ebt_vlan_info *info = par->matchinfo; unsigned short TCI; /* Whole TCI, given from parsed frame */ unsigned short id; /* VLAN ID, given from frame TCI */ unsigned char prio; /* user_priority, given from frame TCI */ /* VLAN encapsulated Type/Length field, given from orig frame */ - unsigned short encap; + __be16 encap; + + if (vlan_tx_tag_present(skb)) { + TCI = vlan_tx_tag_get(skb); + encap = skb->protocol; + } else { + const struct vlan_hdr *fp; + struct vlan_hdr _frame; + + fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); + if (fp == NULL) + return false; - fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); - if (fp == NULL) - return EBT_NOMATCH; + TCI = ntohs(fp->h_vlan_TCI); + encap = fp->h_vlan_encapsulated_proto; + } /* Tag Control Information (TCI) consists of the following elements: * - User_priority. The user_priority field is three bits in length, @@ -68,10 +67,8 @@ ebt_filter_vlan(const struct sk_buff *skb, * (CFI) is a single bit flag value. Currently ignored. * - VLAN Identifier (VID). The VID is encoded as * an unsigned binary number. */ - TCI = ntohs(fp->h_vlan_TCI); id = TCI & VLAN_VID_MASK; prio = (TCI >> 13) & 0x7; - encap = fp->h_vlan_encapsulated_proto; /* Checking VLAN Identifier (VID) */ if (GET_BITMASK(EBT_VLAN_ID)) @@ -85,63 +82,51 @@ ebt_filter_vlan(const struct sk_buff *skb, if (GET_BITMASK(EBT_VLAN_ENCAP)) EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP); - return EBT_MATCH; + return true; } -static int -ebt_check_vlan(const char *tablename, - unsigned int hooknr, - const struct ebt_entry *e, void *data, unsigned int datalen) +static int ebt_vlan_mt_check(const struct xt_mtchk_param *par) { - struct ebt_vlan_info *info = (struct ebt_vlan_info *) data; - - /* Parameters buffer overflow check */ - if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) { - DEBUG_MSG - ("passed size %d is not eq to ebt_vlan_info (%Zd)\n", - datalen, sizeof(struct ebt_vlan_info)); - return -EINVAL; - } + struct ebt_vlan_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; /* Is it 802.1Q frame checked? */ if (e->ethproto != htons(ETH_P_8021Q)) { - DEBUG_MSG - ("passed entry proto %2.4X is not 802.1Q (8100)\n", - (unsigned short) ntohs(e->ethproto)); + pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n", + ntohs(e->ethproto)); return -EINVAL; } /* Check for bitmask range * True if even one bit is out of mask */ if (info->bitmask & ~EBT_VLAN_MASK) { - DEBUG_MSG("bitmask %2X is out of mask (%2X)\n", - info->bitmask, EBT_VLAN_MASK); + pr_debug("bitmask %2X is out of mask (%2X)\n", + info->bitmask, EBT_VLAN_MASK); return -EINVAL; } /* Check for inversion flags range */ if (info->invflags & ~EBT_VLAN_MASK) { - DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n", - info->invflags, EBT_VLAN_MASK); + pr_debug("inversion flags %2X is out of mask (%2X)\n", + info->invflags, EBT_VLAN_MASK); return -EINVAL; } /* Reserved VLAN ID (VID) values * ----------------------------- - * 0 - The null VLAN ID. + * 0 - The null VLAN ID. * 1 - The default Port VID (PVID) - * 0x0FFF - Reserved for implementation use. - * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */ + * 0x0FFF - Reserved for implementation use. + * if_vlan.h: VLAN_N_VID 4096. */ if (GET_BITMASK(EBT_VLAN_ID)) { if (!!info->id) { /* if id!=0 => check vid range */ - if (info->id > VLAN_GROUP_ARRAY_LEN) { - DEBUG_MSG - ("id %d is out of range (1-4096)\n", - info->id); + if (info->id > VLAN_N_VID) { + pr_debug("id %d is out of range (1-4096)\n", + info->id); return -EINVAL; } /* Note: This is valid VLAN-tagged frame point. - * Any value of user_priority are acceptable, + * Any value of user_priority are acceptable, * but should be ignored according to 802.1Q Std. * So we just drop the prio flag. */ info->bitmask &= ~EBT_VLAN_PRIO; @@ -151,8 +136,8 @@ ebt_check_vlan(const char *tablename, if (GET_BITMASK(EBT_VLAN_PRIO)) { if ((unsigned char) info->prio > 7) { - DEBUG_MSG("prio %d is out of range (0-7)\n", - info->prio); + pr_debug("prio %d is out of range (0-7)\n", + info->prio); return -EINVAL; } } @@ -161,9 +146,8 @@ ebt_check_vlan(const char *tablename, * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */ if (GET_BITMASK(EBT_VLAN_ENCAP)) { if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { - DEBUG_MSG - ("encap frame length %d is less than minimal\n", - ntohs(info->encap)); + pr_debug("encap frame length %d is less than " + "minimal\n", ntohs(info->encap)); return -EINVAL; } } @@ -171,25 +155,26 @@ ebt_check_vlan(const char *tablename, return 0; } -static struct ebt_match filter_vlan = { - .name = EBT_VLAN_MATCH, - .match = ebt_filter_vlan, - .check = ebt_check_vlan, +static struct xt_match ebt_vlan_mt_reg __read_mostly = { + .name = "vlan", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_vlan_mt, + .checkentry = ebt_vlan_mt_check, + .matchsize = sizeof(struct ebt_vlan_info), .me = THIS_MODULE, }; -static int __init init(void) +static int __init ebt_vlan_init(void) { - DEBUG_MSG("ebtables 802.1Q extension module v" - MODULE_VERS "\n"); - DEBUG_MSG("module debug=%d\n", !!debug); - return ebt_register_match(&filter_vlan); + pr_debug("ebtables 802.1Q extension module v" MODULE_VERS "\n"); + return xt_register_match(&ebt_vlan_mt_reg); } -static void __exit fini(void) +static void __exit ebt_vlan_fini(void) { - ebt_unregister_match(&filter_vlan); + xt_unregister_match(&ebt_vlan_mt_reg); } -module_init(init); -module_exit(fini); +module_init(ebt_vlan_init); +module_exit(ebt_vlan_fini); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 1767c94cd3d..d2cdf5d6e98 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -23,8 +23,7 @@ static struct ebt_entries initial_chain = { .policy = EBT_ACCEPT, }; -static struct ebt_replace initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "broute", .valid_hooks = 1 << NF_BR_BROUTING, .entries_size = sizeof(struct ebt_entries), @@ -41,46 +40,61 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table broute_table = -{ +static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .lock = RW_LOCK_UNLOCKED, .check = check, .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff **pskb) +static int ebt_broute(struct sk_buff *skb) { int ret; - ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL, - &broute_table); + ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, + dev_net(skb->dev)->xt.broute_table); if (ret == NF_DROP) return 1; /* route it */ return 0; /* bridge it */ } -static int __init init(void) +static int __net_init broute_net_init(struct net *net) +{ + net->xt.broute_table = ebt_register_table(net, &broute_table); + return PTR_ERR_OR_ZERO(net->xt.broute_table); +} + +static void __net_exit broute_net_exit(struct net *net) +{ + ebt_unregister_table(net, net->xt.broute_table); +} + +static struct pernet_operations broute_net_ops = { + .init = broute_net_init, + .exit = broute_net_exit, +}; + +static int __init ebtable_broute_init(void) { int ret; - ret = ebt_register_table(&broute_table); + ret = register_pernet_subsys(&broute_net_ops); if (ret < 0) return ret; /* see br_input.c */ - br_should_route_hook = ebt_broute; - return ret; + RCU_INIT_POINTER(br_should_route_hook, + (br_should_route_hook_t *)ebt_broute); + return 0; } -static void __exit fini(void) +static void __exit ebtable_broute_fini(void) { - br_should_route_hook = NULL; + RCU_INIT_POINTER(br_should_route_hook, NULL); synchronize_net(); - ebt_unregister_table(&broute_table); + unregister_pernet_subsys(&broute_net_ops); } -module_init(init); -module_exit(fini); +module_init(ebtable_broute_init); +module_exit(ebtable_broute_fini); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index c18666e0392..ce205aabf9c 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -14,8 +14,7 @@ #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \ (1 << NF_BR_LOCAL_OUT)) -static struct ebt_entries initial_chains[] = -{ +static struct ebt_entries initial_chains[] = { { .name = "INPUT", .policy = EBT_ACCEPT, @@ -30,8 +29,7 @@ static struct ebt_entries initial_chains[] = }, }; -static struct ebt_replace initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .entries_size = 3 * sizeof(struct ebt_entries), @@ -50,74 +48,91 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_filter = -{ +static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, - .valid_hooks = FILTER_VALID_HOOKS, - .lock = RW_LOCK_UNLOCKED, + .valid_hooks = FILTER_VALID_HOOKS, .check = check, .me = THIS_MODULE, }; static unsigned int -ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, - const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_filter); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_filter); } -static struct nf_hook_ops ebt_ops_filter[] = { +static unsigned int +ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_filter); +} + +static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { { - .hook = ebt_hook, + .hook = ebt_in_hook, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_hook, + .hook = ebt_in_hook, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_hook, + .hook = ebt_out_hook, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_FILTER_OTHER, }, }; -static int __init init(void) +static int __net_init frame_filter_net_init(struct net *net) +{ + net->xt.frame_filter = ebt_register_table(net, &frame_filter); + return PTR_ERR_OR_ZERO(net->xt.frame_filter); +} + +static void __net_exit frame_filter_net_exit(struct net *net) +{ + ebt_unregister_table(net, net->xt.frame_filter); +} + +static struct pernet_operations frame_filter_net_ops = { + .init = frame_filter_net_init, + .exit = frame_filter_net_exit, +}; + +static int __init ebtable_filter_init(void) { - int i, j, ret; + int ret; - ret = ebt_register_table(&frame_filter); + ret = register_pernet_subsys(&frame_filter_net_ops); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - if ((ret = nf_register_hook(&ebt_ops_filter[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_filter[j]); - ebt_unregister_table(&frame_filter); + ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); + if (ret < 0) + unregister_pernet_subsys(&frame_filter_net_ops); return ret; } -static void __exit fini(void) +static void __exit ebtable_filter_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_filter); i++) - nf_unregister_hook(&ebt_ops_filter[i]); - ebt_unregister_table(&frame_filter); + nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); + unregister_pernet_subsys(&frame_filter_net_ops); } -module_init(init); -module_exit(fini); +module_init(ebtable_filter_init); +module_exit(ebtable_filter_fini); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 828cac2cc4a..a0ac2984fb6 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -14,8 +14,7 @@ #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \ (1 << NF_BR_POST_ROUTING)) -static struct ebt_entries initial_chains[] = -{ +static struct ebt_entries initial_chains[] = { { .name = "PREROUTING", .policy = EBT_ACCEPT, @@ -30,8 +29,7 @@ static struct ebt_entries initial_chains[] = } }; -static struct ebt_replace initial_table = -{ +static struct ebt_replace_kernel initial_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .entries_size = 3 * sizeof(struct ebt_entries), @@ -50,81 +48,91 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_nat = -{ +static struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .lock = RW_LOCK_UNLOCKED, .check = check, .me = THIS_MODULE, }; static unsigned int -ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(in)->xt.frame_nat); } static unsigned int -ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in - , const struct net_device *out, int (*okfn)(struct sk_buff *)) +ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, pskb, in, out, &frame_nat); + return ebt_do_table(ops->hooknum, skb, in, out, + dev_net(out)->xt.frame_nat); } -static struct nf_hook_ops ebt_ops_nat[] = { +static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { { - .hook = ebt_nat_dst, + .hook = ebt_nat_out, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { - .hook = ebt_nat_src, + .hook = ebt_nat_out, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { - .hook = ebt_nat_dst, + .hook = ebt_nat_in, .owner = THIS_MODULE, - .pf = PF_BRIDGE, + .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_NAT_DST_BRIDGED, }, }; -static int __init init(void) +static int __net_init frame_nat_net_init(struct net *net) +{ + net->xt.frame_nat = ebt_register_table(net, &frame_nat); + return PTR_ERR_OR_ZERO(net->xt.frame_nat); +} + +static void __net_exit frame_nat_net_exit(struct net *net) +{ + ebt_unregister_table(net, net->xt.frame_nat); +} + +static struct pernet_operations frame_nat_net_ops = { + .init = frame_nat_net_init, + .exit = frame_nat_net_exit, +}; + +static int __init ebtable_nat_init(void) { - int i, ret, j; + int ret; - ret = ebt_register_table(&frame_nat); + ret = register_pernet_subsys(&frame_nat_net_ops); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - if ((ret = nf_register_hook(&ebt_ops_nat[i])) < 0) - goto cleanup; - return ret; -cleanup: - for (j = 0; j < i; j++) - nf_unregister_hook(&ebt_ops_nat[j]); - ebt_unregister_table(&frame_nat); + ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); + if (ret < 0) + unregister_pernet_subsys(&frame_nat_net_ops); return ret; } -static void __exit fini(void) +static void __exit ebtable_nat_fini(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(ebt_ops_nat); i++) - nf_unregister_hook(&ebt_ops_nat[i]); - ebt_unregister_table(&frame_nat); + nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); + unregister_pernet_subsys(&frame_nat_net_ops); } -module_init(init); -module_exit(fini); +module_init(ebtable_nat_init); +module_exit(ebtable_nat_fini); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 00729b3604f..1059ed3bc25 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -14,16 +14,15 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -/* used for print_string */ -#include <linux/sched.h> -#include <linux/tty.h> - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kmod.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/smp.h> #include <linux/cpumask.h> @@ -31,40 +30,9 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" -/* list_named_find */ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) -#include <linux/netfilter_ipv4/listhelp.h> - -#if 0 -/* use this for remote debugging - * Copyright (C) 1998 by Ori Pomerantz - * Print the string to the appropriate tty, the one - * the current task uses - */ -static void print_string(char *str) -{ - struct tty_struct *my_tty; - - /* The tty for the current task */ - my_tty = current->signal->tty; - if (my_tty != NULL) { - my_tty->driver->write(my_tty, 0, str, strlen(str)); - my_tty->driver->write(my_tty, 0, "\015\012", 2); - } -} - -#define BUGPRINT(args) print_string(args); -#else #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ - "report to author: "format, ## args) + "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ -#endif -#define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ - ": out of memory: "format, ## args) -/* #define MEMPRINT(format, args...) */ - - /* * Each cpu has its own set of counters, so there is no need for write_lock in @@ -81,71 +49,111 @@ static void print_string(char *str) -static DECLARE_MUTEX(ebt_mutex); -static LIST_HEAD(ebt_tables); -static LIST_HEAD(ebt_targets); -static LIST_HEAD(ebt_matches); -static LIST_HEAD(ebt_watchers); +static DEFINE_MUTEX(ebt_mutex); + +#ifdef CONFIG_COMPAT +static void ebt_standard_compat_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v >= 0) + v += xt_compat_calc_jump(NFPROTO_BRIDGE, v); + memcpy(dst, &v, sizeof(v)); +} + +static int ebt_standard_compat_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv >= 0) + cv -= xt_compat_calc_jump(NFPROTO_BRIDGE, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} +#endif + -static struct ebt_target ebt_standard_target = -{ {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL}; +static struct xt_target ebt_standard_target = { + .name = "standard", + .revision = 0, + .family = NFPROTO_BRIDGE, + .targetsize = sizeof(int), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = ebt_standard_compat_from_user, + .compat_to_user = ebt_standard_compat_to_user, +#endif +}; -static inline int ebt_do_watcher (struct ebt_entry_watcher *w, - const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, - const struct net_device *out) +static inline int +ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, + struct xt_action_param *par) { - w->u.watcher->watcher(skb, hooknr, in, out, w->data, - w->watcher_size); + par->target = w->u.watcher; + par->targinfo = w->data; + w->u.watcher->target(skb, par); /* watchers don't give a verdict */ return 0; } -static inline int ebt_do_match (struct ebt_entry_match *m, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out) +static inline int +ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb, + struct xt_action_param *par) { - return m->u.match->match(skb, in, out, m->data, - m->match_size); + par->match = m->u.match; + par->matchinfo = m->data; + return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH; } -static inline int ebt_dev_check(char *entry, const struct net_device *device) +static inline int +ebt_dev_check(const char *entry, const struct net_device *device) { int i = 0; - char *devname = device->name; + const char *devname; if (*entry == '\0') return 0; if (!device) return 1; + devname = device->name; /* 1 is the wildcard token */ while (entry[i] != '\0' && entry[i] != 1 && entry[i] == devname[i]) i++; - return (devname[i] != entry[i] && entry[i] != 1); + return devname[i] != entry[i] && entry[i] != 1; } -#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg)) +#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) /* process standard matches */ -static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, - const struct net_device *in, const struct net_device *out) +static inline int +ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out) { + const struct ethhdr *h = eth_hdr(skb); + const struct net_bridge_port *p; + __be16 ethproto; int verdict, i; + if (vlan_tx_tag_present(skb)) + ethproto = htons(ETH_P_8021Q); + else + ethproto = h->h_proto; + if (e->bitmask & EBT_802_3) { - if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO)) + if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && - FWINV2(e->ethproto != h->h_proto, EBT_IPROTO)) + FWINV2(e->ethproto != ethproto, EBT_IPROTO)) return 1; if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN)) return 1; if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) return 1; - if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN)) + /* rcu_read_lock()ed by nf_hook_slow */ + if (in && (p = br_port_get_rcu(in)) != NULL && + FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN)) return 1; - if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT)) + if (out && (p = br_port_get_rcu(out)) != NULL && + FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT)) return 1; if (e->bitmask & EBT_SOURCEMAC) { @@ -167,20 +175,33 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, return 0; } +static inline __pure +struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + /* Do some firewalling */ -unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, +unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table) { int i, nentries; struct ebt_entry *point; struct ebt_counter *counter_base, *cb_base; - struct ebt_entry_target *t; + const struct ebt_entry_target *t; int verdict, sp = 0; struct ebt_chainstack *cs; struct ebt_entries *chaininfo; - char *base; - struct ebt_table_info *private; + const char *base; + const struct ebt_table_info *private; + struct xt_action_param acpar; + + acpar.family = NFPROTO_BRIDGE; + acpar.in = in; + acpar.out = out; + acpar.hotdrop = false; + acpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; @@ -198,29 +219,34 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, base = private->entries; i = 0; while (i < nentries) { - if (ebt_basic_match(point, eth_hdr(*pskb), in, out)) + if (ebt_basic_match(point, skb, in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) goto letscontinue; + if (acpar.hotdrop) { + read_unlock_bh(&table->lock); + return NF_DROP; + } /* increase counter */ (*(counter_base + i)).pcnt++; - (*(counter_base + i)).bcnt+=(**pskb).len; + (*(counter_base + i)).bcnt += skb->len; /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in, - out); + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar); t = (struct ebt_entry_target *) (((char *)point) + point->target_offset); /* standard target */ if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; - else - verdict = t->u.target->target(pskb, hook, - in, out, t->data, t->target_size); + else { + acpar.target = t->u.target; + acpar.targinfo = t->data; + verdict = t->u.target->target(skb, &acpar); + } if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); return NF_ACCEPT; @@ -260,8 +286,7 @@ letsreturn: /* jump to a udc */ cs[sp].n = i + 1; cs[sp].chaininfo = chaininfo; - cs[sp].e = (struct ebt_entry *) - (((char *)point) + point->next_offset); + cs[sp].e = ebt_next_entry(point); i = 0; chaininfo = (struct ebt_entries *) (base + verdict); #ifdef CONFIG_NETFILTER_DEBUG @@ -277,8 +302,7 @@ letsreturn: sp++; continue; letscontinue: - point = (struct ebt_entry *) - (((char *)point) + point->next_offset); + point = ebt_next_entry(point); i++; } @@ -296,162 +320,197 @@ letscontinue: /* If it succeeds, returns element and locks mutex */ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, - struct semaphore *mutex) + struct mutex *mutex) { - void *ret; + struct { + struct list_head list; + char name[EBT_FUNCTION_MAXNAMELEN]; + } *e; - *error = down_interruptible(mutex); + *error = mutex_lock_interruptible(mutex); if (*error != 0) return NULL; - ret = list_named_find(head, name); - if (!ret) { - *error = -ENOENT; - up(mutex); + list_for_each_entry(e, head, list) { + if (strcmp(e->name, name) == 0) + return e; } - return ret; + *error = -ENOENT; + mutex_unlock(mutex); + return NULL; } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else static void * find_inlist_lock(struct list_head *head, const char *name, const char *prefix, - int *error, struct semaphore *mutex) + int *error, struct mutex *mutex) { - void *ret; - - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); - } - return ret; + return try_then_request_module( + find_inlist_lock_noload(head, name, error, mutex), + "%s%s", prefix, name); } -#endif static inline struct ebt_table * -find_table_lock(const char *name, int *error, struct semaphore *mutex) -{ - return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex); -} - -static inline struct ebt_match * -find_match_lock(const char *name, int *error, struct semaphore *mutex) +find_table_lock(struct net *net, const char *name, int *error, + struct mutex *mutex) { - return find_inlist_lock(&ebt_matches, name, "ebt_", error, mutex); -} - -static inline struct ebt_watcher * -find_watcher_lock(const char *name, int *error, struct semaphore *mutex) -{ - return find_inlist_lock(&ebt_watchers, name, "ebt_", error, mutex); -} - -static inline struct ebt_target * -find_target_lock(const char *name, int *error, struct semaphore *mutex) -{ - return find_inlist_lock(&ebt_targets, name, "ebt_", error, mutex); + return find_inlist_lock(&net->xt.tables[NFPROTO_BRIDGE], name, + "ebtable_", error, mutex); } static inline int -ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *cnt) { - struct ebt_match *match; + const struct ebt_entry *e = par->entryinfo; + struct xt_match *match; + size_t left = ((char *)e + e->watchers_offset) - (char *)m; int ret; - if (((char *)m) + m->match_size + sizeof(struct ebt_entry_match) > - ((char *)e) + e->watchers_offset) + if (left < sizeof(struct ebt_entry_match) || + left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = find_match_lock(m->u.name, &ret, &ebt_mutex); - if (!match) - return ret; + + match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0); + if (IS_ERR(match)) + return PTR_ERR(match); m->u.match = match; - if (!try_module_get(match->me)) { - up(&ebt_mutex); - return -ENOENT; - } - up(&ebt_mutex); - if (match->check && - match->check(name, hookmask, e, m->data, m->match_size) != 0) { - BUGPRINT("match->check failed\n"); + + par->match = match; + par->matchinfo = m->data; + ret = xt_check_match(par, m->match_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(match->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } static inline int -ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, + unsigned int *cnt) { - struct ebt_watcher *watcher; + const struct ebt_entry *e = par->entryinfo; + struct xt_target *watcher; + size_t left = ((char *)e + e->target_offset) - (char *)w; int ret; - if (((char *)w) + w->watcher_size + sizeof(struct ebt_entry_watcher) > - ((char *)e) + e->target_offset) + if (left < sizeof(struct ebt_entry_watcher) || + left - sizeof(struct ebt_entry_watcher) < w->watcher_size) return -EINVAL; - watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex); - if (!watcher) - return ret; + + watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0); + if (IS_ERR(watcher)) + return PTR_ERR(watcher); w->u.watcher = watcher; - if (!try_module_get(watcher->me)) { - up(&ebt_mutex); - return -ENOENT; - } - up(&ebt_mutex); - if (watcher->check && - watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) { - BUGPRINT("watcher->check failed\n"); + + par->target = watcher; + par->targinfo = w->data; + ret = xt_check_target(par, w->watcher_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(watcher->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } +static int ebt_verify_pointers(const struct ebt_replace *repl, + struct ebt_table_info *newinfo) +{ + unsigned int limit = repl->entries_size; + unsigned int valid_hooks = repl->valid_hooks; + unsigned int offset = 0; + int i; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) + newinfo->hook_entry[i] = NULL; + + newinfo->entries_size = repl->entries_size; + newinfo->nentries = repl->nentries; + + while (offset < limit) { + size_t left = limit - offset; + struct ebt_entry *e = (void *)newinfo->entries + offset; + + if (left < sizeof(unsigned int)) + break; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((valid_hooks & (1 << i)) == 0) + continue; + if ((char __user *)repl->hook_entry[i] == + repl->entries + offset) + break; + } + + if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { + if (e->bitmask != 0) { + /* we make userspace set this right, + so there is no misunderstanding */ + BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " + "in distinguisher\n"); + return -EINVAL; + } + if (i != NF_BR_NUMHOOKS) + newinfo->hook_entry[i] = (struct ebt_entries *)e; + if (left < sizeof(struct ebt_entries)) + break; + offset += sizeof(struct ebt_entries); + } else { + if (left < sizeof(struct ebt_entry)) + break; + if (left < e->next_offset) + break; + if (e->next_offset < sizeof(struct ebt_entry)) + return -EINVAL; + offset += e->next_offset; + } + } + if (offset != limit) { + BUGPRINT("entries_size too small\n"); + return -EINVAL; + } + + /* check if all valid hooks have a chain */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if (!newinfo->hook_entry[i] && + (valid_hooks & (1 << i))) { + BUGPRINT("Valid hook without chain\n"); + return -EINVAL; + } + } + return 0; +} + /* * this one is very careful, as it is the first function * to parse the userspace data */ static inline int -ebt_check_entry_size_and_hooks(struct ebt_entry *e, - struct ebt_table_info *newinfo, char *base, char *limit, - struct ebt_entries **hook_entries, unsigned int *n, unsigned int *cnt, - unsigned int *totalcnt, unsigned int *udc_cnt, unsigned int valid_hooks) +ebt_check_entry_size_and_hooks(const struct ebt_entry *e, + const struct ebt_table_info *newinfo, + unsigned int *n, unsigned int *cnt, + unsigned int *totalcnt, unsigned int *udc_cnt) { int i; for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if ((valid_hooks & (1 << i)) == 0) - continue; - if ( (char *)hook_entries[i] - base == - (char *)e - newinfo->entries) + if ((void *)e == (void *)newinfo->hook_entry[i]) break; } /* beginning of a new chain if i == NF_BR_NUMHOOKS it must be a user defined chain */ - if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) { - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) != 0) { - /* we make userspace set this right, - so there is no misunderstanding */ - BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " - "in distinguisher\n"); - return -EINVAL; - } + if (i != NF_BR_NUMHOOKS || !e->bitmask) { /* this checks if the previous chain has as many entries as it said it has */ if (*n != *cnt) { BUGPRINT("nentries does not equal the nr of entries " - "in the chain\n"); - return -EINVAL; - } - /* before we look at the struct, be sure it is not too big */ - if ((char *)hook_entries[i] + sizeof(struct ebt_entries) - > limit) { - BUGPRINT("entries_size too small\n"); + "in the chain\n"); return -EINVAL; } if (((struct ebt_entries *)e)->policy != EBT_DROP && @@ -465,8 +524,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e, } if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ (*udc_cnt)++; - else - newinfo->hook_entry[i] = (struct ebt_entries *)e; if (((struct ebt_entries *)e)->counter_offset != *totalcnt) { BUGPRINT("counter_offset != totalcnt"); return -EINVAL; @@ -487,7 +544,6 @@ ebt_check_entry_size_and_hooks(struct ebt_entry *e, BUGPRINT("target size too small\n"); return -EINVAL; } - (*cnt)++; (*totalcnt)++; return 0; @@ -506,17 +562,14 @@ struct ebt_cl_stack */ static inline int ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, - struct ebt_entries **hook_entries, unsigned int *n, unsigned int valid_hooks, - struct ebt_cl_stack *udc) + unsigned int *n, struct ebt_cl_stack *udc) { int i; /* we're only interested in chain starts */ - if (e->bitmask & EBT_ENTRY_OR_ENTRIES) + if (e->bitmask) return 0; for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if ((valid_hooks & (1 << i)) == 0) - continue; if (newinfo->hook_entry[i] == (struct ebt_entries *)e) break; } @@ -534,61 +587,82 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, } static inline int -ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) +ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.match->destroy) - m->u.match->destroy(m->data, m->match_size); - module_put(m->u.match->me); + par.net = net; + par.match = m->u.match; + par.matchinfo = m->data; + par.family = NFPROTO_BRIDGE; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } static inline int -ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) +ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i) { + struct xt_tgdtor_param par; + if (i && (*i)-- == 0) return 1; - if (w->u.watcher->destroy) - w->u.watcher->destroy(w->data, w->watcher_size); - module_put(w->u.watcher->me); + par.net = net; + par.target = w->u.watcher; + par.targinfo = w->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } static inline int -ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) +ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) { + struct xt_tgdtor_param par; struct ebt_entry_target *t; - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + if (e->bitmask == 0) return 0; /* we're done */ if (cnt && (*cnt)-- == 0) return 1; - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); - EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - if (t->u.target->destroy) - t->u.target->destroy(t->data, t->target_size); - module_put(t->u.target->me); + par.net = net; + par.target = t->u.target; + par.targinfo = t->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } static inline int -ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, - const char *name, unsigned int *cnt, unsigned int valid_hooks, +ebt_check_entry(struct ebt_entry *e, struct net *net, + const struct ebt_table_info *newinfo, + const char *name, unsigned int *cnt, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; - struct ebt_target *target; + struct xt_target *target; unsigned int i, j, hook = 0, hookmask = 0; + size_t gap; int ret; + struct xt_mtchk_param mtpar; + struct xt_tgchk_param tgpar; /* don't mess with the struct ebt_entries */ - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + if (e->bitmask == 0) return 0; if (e->bitmask & ~EBT_F_MASK) { @@ -605,7 +679,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, } /* what hook do we belong to? */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if ((valid_hooks & (1 << i)) == 0) + if (!newinfo->hook_entry[i]) continue; if ((char *)newinfo->hook_entry[i] < (char *)e) hook = i; @@ -626,28 +700,31 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, hookmask = cl_s[i - 1].hookmask; } i = 0; - ret = EBT_MATCH_ITERATE(e, ebt_check_match, e, name, hookmask, &i); + + mtpar.net = tgpar.net = net; + mtpar.table = tgpar.table = name; + mtpar.entryinfo = tgpar.entryinfo = e; + mtpar.hook_mask = tgpar.hook_mask = hookmask; + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i); if (ret != 0) goto cleanup_matches; j = 0; - ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, e, name, hookmask, &j); + ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j); if (ret != 0) goto cleanup_watchers; t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - target = find_target_lock(t->u.name, &ret, &ebt_mutex); - if (!target) - goto cleanup_watchers; - if (!try_module_get(target->me)) { - up(&ebt_mutex); - ret = -ENOENT; + gap = e->next_offset - e->target_offset; + + target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0); + if (IS_ERR(target)) { + ret = PTR_ERR(target); goto cleanup_watchers; } - up(&ebt_mutex); t->u.target = target; if (t->u.target == &ebt_standard_target) { - if (e->target_offset + sizeof(struct ebt_standard_target) > - e->next_offset) { + if (gap < sizeof(struct ebt_standard_target)) { BUGPRINT("Standard target size too big\n"); ret = -EFAULT; goto cleanup_watchers; @@ -658,20 +735,26 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, ret = -EFAULT; goto cleanup_watchers; } - } else if ((e->target_offset + t->target_size + - sizeof(struct ebt_entry_target) > e->next_offset) || - (t->u.target->check && - t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){ + } else if (t->target_size > gap - sizeof(struct ebt_entry_target)) { module_put(t->u.target->me); ret = -EFAULT; goto cleanup_watchers; } + + tgpar.target = target; + tgpar.targinfo = t->data; + ret = xt_check_target(&tgpar, t->target_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { + module_put(target->me); + goto cleanup_watchers; + } (*cnt)++; return 0; cleanup_watchers: - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j); cleanup_matches: - EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); return ret; } @@ -680,12 +763,12 @@ cleanup_matches: * the hook mask for udc tells us from which base chains the udc can be * accessed. This mask is a parameter to the check() functions of the extensions */ -static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s, +static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s, unsigned int udc_cnt, unsigned int hooknr, char *base) { int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict; - struct ebt_entry *e = (struct ebt_entry *)chain->data; - struct ebt_entry_target *t; + const struct ebt_entry *e = (struct ebt_entry *)chain->data; + const struct ebt_entry_target *t; while (pos < nentries || chain_nr != -1) { /* end of udc, go back one 'recursion' step */ @@ -729,10 +812,12 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s BUGPRINT("loop\n"); return -1; } - /* this can't be 0, so the above test is correct */ + if (cl_s[i].hookmask & (1 << hooknr)) + goto letscontinue; + /* this can't be 0, so the loop test is correct */ cl_s[i].cs.n = pos + 1; pos = 0; - cl_s[i].cs.e = ((void *)e + e->next_offset); + cl_s[i].cs.e = ebt_next_entry(e); e = (struct ebt_entry *)(hlp2->data); nentries = hlp2->nentries; cl_s[i].from = chain_nr; @@ -742,66 +827,59 @@ static int check_chainloops(struct ebt_entries *chain, struct ebt_cl_stack *cl_s continue; } letscontinue: - e = (void *)e + e->next_offset; + e = ebt_next_entry(e); pos++; } return 0; } /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ -static int translate_table(struct ebt_replace *repl, - struct ebt_table_info *newinfo) +static int translate_table(struct net *net, const char *name, + struct ebt_table_info *newinfo) { unsigned int i, j, k, udc_cnt; int ret; struct ebt_cl_stack *cl_s = NULL; /* used in the checking for chain loops */ i = 0; - while (i < NF_BR_NUMHOOKS && !(repl->valid_hooks & (1 << i))) + while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i]) i++; if (i == NF_BR_NUMHOOKS) { BUGPRINT("No valid hooks specified\n"); return -EINVAL; } - if (repl->hook_entry[i] != (struct ebt_entries *)repl->entries) { + if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) { BUGPRINT("Chains don't start at beginning\n"); return -EINVAL; } /* make sure chains are ordered after each other in same order as their corresponding hooks */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { - if (!(repl->valid_hooks & (1 << j))) + if (!newinfo->hook_entry[j]) continue; - if ( repl->hook_entry[j] <= repl->hook_entry[i] ) { + if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) { BUGPRINT("Hook order must be followed\n"); return -EINVAL; } i = j; } - for (i = 0; i < NF_BR_NUMHOOKS; i++) - newinfo->hook_entry[i] = NULL; - - newinfo->entries_size = repl->entries_size; - newinfo->nentries = repl->nentries; - /* do some early checkings and initialize some things */ i = 0; /* holds the expected nr. of entries for the chain */ j = 0; /* holds the up to now counted entries for the chain */ k = 0; /* holds the total nr. of entries, should equal - newinfo->nentries afterwards */ + newinfo->nentries afterwards */ udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */ ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_check_entry_size_and_hooks, newinfo, repl->entries, - repl->entries + repl->entries_size, repl->hook_entry, &i, &j, &k, - &udc_cnt, repl->valid_hooks); + ebt_check_entry_size_and_hooks, newinfo, + &i, &j, &k, &udc_cnt); if (ret != 0) return ret; if (i != j) { BUGPRINT("nentries does not equal the nr of entries in the " - "(last) chain\n"); + "(last) chain\n"); return -EINVAL; } if (k != newinfo->nentries) { @@ -809,28 +887,18 @@ static int translate_table(struct ebt_replace *repl, return -EINVAL; } - /* check if all valid hooks have a chain */ - for (i = 0; i < NF_BR_NUMHOOKS; i++) { - if (newinfo->hook_entry[i] == NULL && - (repl->valid_hooks & (1 << i))) { - BUGPRINT("Valid hook without chain\n"); - return -EINVAL; - } - } - /* get the location of the udc, put them in an array while we're at it, allocate the chainstack */ if (udc_cnt) { /* this will get free'd in do_replace()/ebt_register_table() if an error occurs */ - newinfo->chainstack = (struct ebt_chainstack **) - vmalloc((highest_possible_processor_id()+1) - * sizeof(struct ebt_chainstack)); + newinfo->chainstack = + vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack))); if (!newinfo->chainstack) return -ENOMEM; - for_each_cpu(i) { + for_each_possible_cpu(i) { newinfo->chainstack[i] = - vmalloc(udc_cnt * sizeof(struct ebt_chainstack)); + vmalloc(udc_cnt * sizeof(*(newinfo->chainstack[0]))); if (!newinfo->chainstack[i]) { while (i) vfree(newinfo->chainstack[--i]); @@ -840,14 +908,12 @@ static int translate_table(struct ebt_replace *repl, } } - cl_s = (struct ebt_cl_stack *) - vmalloc(udc_cnt * sizeof(struct ebt_cl_stack)); + cl_s = vmalloc(udc_cnt * sizeof(*cl_s)); if (!cl_s) return -ENOMEM; i = 0; /* the i'th udc */ EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_get_udc_positions, newinfo, repl->hook_entry, &i, - repl->valid_hooks, cl_s); + ebt_get_udc_positions, newinfo, &i, cl_s); /* sanity check */ if (i != udc_cnt) { BUGPRINT("i != udc_cnt\n"); @@ -858,14 +924,14 @@ static int translate_table(struct ebt_replace *repl, /* Check for loops */ for (i = 0; i < NF_BR_NUMHOOKS; i++) - if (repl->valid_hooks & (1 << i)) + if (newinfo->hook_entry[i]) if (check_chainloops(newinfo->hook_entry[i], cl_s, udc_cnt, i, newinfo->entries)) { vfree(cl_s); return -EINVAL; } - /* we now know the following (along with E=mc²): + /* we now know the following (along with E=mc²): - the nr of entries in each chain is right - the size of the allocated space is right - all valid hooks have a corresponding chain @@ -878,18 +944,17 @@ static int translate_table(struct ebt_replace *repl, /* used to know what we need to clean up if something goes wrong */ i = 0; ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_check_entry, newinfo, repl->name, &i, repl->valid_hooks, - cl_s, udc_cnt); + ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, &i); + ebt_cleanup_entry, net, &i); } vfree(cl_s); return ret; } /* called under write_lock */ -static void get_counters(struct ebt_counter *oldcounters, +static void get_counters(const struct ebt_counter *oldcounters, struct ebt_counter *counters, unsigned int nentries) { int i, cpu; @@ -900,7 +965,7 @@ static void get_counters(struct ebt_counter *oldcounters, sizeof(struct ebt_counter) * nentries); /* add other counters to those of cpu 0 */ - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { if (cpu == 0) continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); @@ -911,82 +976,45 @@ static void get_counters(struct ebt_counter *oldcounters, } } -/* replace the table */ -static int do_replace(void __user *user, unsigned int len) +static int do_replace_finish(struct net *net, struct ebt_replace *repl, + struct ebt_table_info *newinfo) { - int ret, i, countersize; - struct ebt_table_info *newinfo; - struct ebt_replace tmp; - struct ebt_table *t; + int ret, i; struct ebt_counter *counterstmp = NULL; /* used to be able to unlock earlier */ struct ebt_table_info *table; - - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) - return -EFAULT; - - if (len != sizeof(tmp) + tmp.entries_size) { - BUGPRINT("Wrong len argument\n"); - return -EINVAL; - } - - if (tmp.entries_size == 0) { - BUGPRINT("Entries_size never zero\n"); - return -EINVAL; - } - countersize = COUNTER_OFFSET(tmp.nentries) * - (highest_possible_processor_id()+1); - newinfo = (struct ebt_table_info *) - vmalloc(sizeof(struct ebt_table_info) + countersize); - if (!newinfo) - return -ENOMEM; - - if (countersize) - memset(newinfo->counters, 0, countersize); - - newinfo->entries = vmalloc(tmp.entries_size); - if (!newinfo->entries) { - ret = -ENOMEM; - goto free_newinfo; - } - if (copy_from_user( - newinfo->entries, tmp.entries, tmp.entries_size) != 0) { - BUGPRINT("Couldn't copy entries from userspace\n"); - ret = -EFAULT; - goto free_entries; - } + struct ebt_table *t; /* the user wants counters back the check on the size is done later, when we have the lock */ - if (tmp.num_counters) { - counterstmp = (struct ebt_counter *) - vmalloc(tmp.num_counters * sizeof(struct ebt_counter)); - if (!counterstmp) { - ret = -ENOMEM; - goto free_entries; - } + if (repl->num_counters) { + unsigned long size = repl->num_counters * sizeof(*counterstmp); + counterstmp = vmalloc(size); + if (!counterstmp) + return -ENOMEM; } - else - counterstmp = NULL; - /* this can get initialized by translate_table() */ newinfo->chainstack = NULL; - ret = translate_table(&tmp, newinfo); + ret = ebt_verify_pointers(repl, newinfo); + if (ret != 0) + goto free_counterstmp; + + ret = translate_table(net, repl->name, newinfo); if (ret != 0) goto free_counterstmp; - t = find_table_lock(tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, repl->name, &ret, &ebt_mutex); if (!t) { ret = -ENOENT; goto free_iterate; } /* the table doesn't like it */ - if (t->check && (ret = t->check(newinfo, tmp.valid_hooks))) + if (t->check && (ret = t->check(newinfo, repl->valid_hooks))) goto free_unlock; - if (tmp.num_counters && tmp.num_counters != t->private->nentries) { + if (repl->num_counters && repl->num_counters != t->private->nentries) { BUGPRINT("Wrong nr. of counters requested\n"); ret = -EINVAL; goto free_unlock; @@ -1002,33 +1030,31 @@ static int do_replace(void __user *user, unsigned int len) module_put(t->me); /* we need an atomic snapshot of the counters */ write_lock_bh(&t->lock); - if (tmp.num_counters) + if (repl->num_counters) get_counters(t->private->counters, counterstmp, t->private->nentries); t->private = newinfo; write_unlock_bh(&t->lock); - up(&ebt_mutex); + mutex_unlock(&ebt_mutex); /* so, a user can change the chains while having messed up her counter allocation. Only reason why this is done is because this way the lock is held only once, while this doesn't bring the kernel into a dangerous state. */ - if (tmp.num_counters && - copy_to_user(tmp.counters, counterstmp, - tmp.num_counters * sizeof(struct ebt_counter))) { - BUGPRINT("Couldn't copy counters to userspace\n"); - ret = -EFAULT; + if (repl->num_counters && + copy_to_user(repl->counters, counterstmp, + repl->num_counters * sizeof(struct ebt_counter))) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n"); } - else - ret = 0; /* decrease module count and free resources */ EBT_ENTRY_ITERATE(table->entries, table->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); vfree(table->entries); if (table->chainstack) { - for_each_cpu(i) + for_each_possible_cpu(i) vfree(table->chainstack[i]); vfree(table->chainstack); } @@ -1038,127 +1064,132 @@ static int do_replace(void __user *user, unsigned int len) return ret; free_unlock: - up(&ebt_mutex); + mutex_unlock(&ebt_mutex); free_iterate: EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ if (newinfo->chainstack) { - for_each_cpu(i) + for_each_possible_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } -free_entries: - vfree(newinfo->entries); -free_newinfo: - vfree(newinfo); return ret; } -int ebt_register_target(struct ebt_target *target) +/* replace the table */ +static int do_replace(struct net *net, const void __user *user, + unsigned int len) { - int ret; - - ret = down_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - if (!list_named_insert(&ebt_targets, target)) { - up(&ebt_mutex); - return -EEXIST; - } - up(&ebt_mutex); - - return 0; -} + int ret, countersize; + struct ebt_table_info *newinfo; + struct ebt_replace tmp; -void ebt_unregister_target(struct ebt_target *target) -{ - down(&ebt_mutex); - LIST_DELETE(&ebt_targets, target); - up(&ebt_mutex); -} + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; -int ebt_register_match(struct ebt_match *match) -{ - int ret; + if (len != sizeof(tmp) + tmp.entries_size) { + BUGPRINT("Wrong len argument\n"); + return -EINVAL; + } - ret = down_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - if (!list_named_insert(&ebt_matches, match)) { - up(&ebt_mutex); - return -EEXIST; + if (tmp.entries_size == 0) { + BUGPRINT("Entries_size never zero\n"); + return -EINVAL; } - up(&ebt_mutex); + /* overflow check */ + if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / + NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) + return -ENOMEM; - return 0; -} + tmp.name[sizeof(tmp.name) - 1] = 0; -void ebt_unregister_match(struct ebt_match *match) -{ - down(&ebt_mutex); - LIST_DELETE(&ebt_matches, match); - up(&ebt_mutex); -} + countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; + newinfo = vmalloc(sizeof(*newinfo) + countersize); + if (!newinfo) + return -ENOMEM; -int ebt_register_watcher(struct ebt_watcher *watcher) -{ - int ret; + if (countersize) + memset(newinfo->counters, 0, countersize); - ret = down_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - if (!list_named_insert(&ebt_watchers, watcher)) { - up(&ebt_mutex); - return -EEXIST; + newinfo->entries = vmalloc(tmp.entries_size); + if (!newinfo->entries) { + ret = -ENOMEM; + goto free_newinfo; + } + if (copy_from_user( + newinfo->entries, tmp.entries, tmp.entries_size) != 0) { + BUGPRINT("Couldn't copy entries from userspace\n"); + ret = -EFAULT; + goto free_entries; } - up(&ebt_mutex); - - return 0; -} -void ebt_unregister_watcher(struct ebt_watcher *watcher) -{ - down(&ebt_mutex); - LIST_DELETE(&ebt_watchers, watcher); - up(&ebt_mutex); + ret = do_replace_finish(net, &tmp, newinfo); + if (ret == 0) + return ret; +free_entries: + vfree(newinfo->entries); +free_newinfo: + vfree(newinfo); + return ret; } -int ebt_register_table(struct ebt_table *table) +struct ebt_table * +ebt_register_table(struct net *net, const struct ebt_table *input_table) { struct ebt_table_info *newinfo; + struct ebt_table *t, *table; + struct ebt_replace_kernel *repl; int ret, i, countersize; + void *p; - if (!table || !table->table ||!table->table->entries || - table->table->entries_size == 0 || - table->table->counters || table->private) { + if (input_table == NULL || (repl = input_table->table) == NULL || + repl->entries == NULL || repl->entries_size == 0 || + repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } - countersize = COUNTER_OFFSET(table->table->nentries) * - (highest_possible_processor_id()+1); - newinfo = (struct ebt_table_info *) - vmalloc(sizeof(struct ebt_table_info) + countersize); + /* Don't add one table to multiple lists. */ + table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); + if (!table) { + ret = -ENOMEM; + goto out; + } + + countersize = COUNTER_OFFSET(repl->nentries) * nr_cpu_ids; + newinfo = vmalloc(sizeof(*newinfo) + countersize); ret = -ENOMEM; if (!newinfo) - return -ENOMEM; + goto free_table; - newinfo->entries = vmalloc(table->table->entries_size); - if (!(newinfo->entries)) + p = vmalloc(repl->entries_size); + if (!p) goto free_newinfo; - memcpy(newinfo->entries, table->table->entries, - table->table->entries_size); + memcpy(p, repl->entries, repl->entries_size); + newinfo->entries = p; + + newinfo->entries_size = repl->entries_size; + newinfo->nentries = repl->nentries; if (countersize) memset(newinfo->counters, 0, countersize); /* fill in newinfo and parse the entries */ newinfo->chainstack = NULL; - ret = translate_table(table->table, newinfo); + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + if ((repl->valid_hooks & (1 << i)) == 0) + newinfo->hook_entry[i] = NULL; + else + newinfo->hook_entry[i] = p + + ((char *)repl->hook_entry[i] - repl->entries); + } + ret = translate_table(net, repl->name, newinfo); if (ret != 0) { BUGPRINT("Translate_table failed\n"); goto free_chainstack; @@ -1166,19 +1197,22 @@ int ebt_register_table(struct ebt_table *table) if (table->check && table->check(newinfo, table->valid_hooks)) { BUGPRINT("The table doesn't like its own initial data, lol\n"); - return -EINVAL; + ret = -EINVAL; + goto free_chainstack; } table->private = newinfo; rwlock_init(&table->lock); - ret = down_interruptible(&ebt_mutex); + ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) goto free_chainstack; - if (list_named_find(&ebt_tables, table->name)) { - ret = -EEXIST; - BUGPRINT("Table name already exists\n"); - goto free_unlock; + list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + BUGPRINT("Table name already exists\n"); + goto free_unlock; + } } /* Hold a reference count if the chains aren't empty */ @@ -1186,24 +1220,27 @@ int ebt_register_table(struct ebt_table *table) ret = -ENOENT; goto free_unlock; } - list_prepend(&ebt_tables, table); - up(&ebt_mutex); - return 0; + list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); + mutex_unlock(&ebt_mutex); + return table; free_unlock: - up(&ebt_mutex); + mutex_unlock(&ebt_mutex); free_chainstack: if (newinfo->chainstack) { - for_each_cpu(i) + for_each_possible_cpu(i) vfree(newinfo->chainstack[i]); vfree(newinfo->chainstack); } vfree(newinfo->entries); free_newinfo: vfree(newinfo); - return ret; +free_table: + kfree(table); +out: + return ERR_PTR(ret); } -void ebt_unregister_table(struct ebt_table *table) +void ebt_unregister_table(struct net *net, struct ebt_table *table) { int i; @@ -1211,53 +1248,51 @@ void ebt_unregister_table(struct ebt_table *table) BUGPRINT("Request to unregister NULL table!!!\n"); return; } - down(&ebt_mutex); - LIST_DELETE(&ebt_tables, table); - up(&ebt_mutex); + mutex_lock(&ebt_mutex); + list_del(&table->list); + mutex_unlock(&ebt_mutex); + EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, + ebt_cleanup_entry, net, NULL); + if (table->private->nentries) + module_put(table->me); vfree(table->private->entries); if (table->private->chainstack) { - for_each_cpu(i) + for_each_possible_cpu(i) vfree(table->private->chainstack[i]); vfree(table->private->chainstack); } vfree(table->private); + kfree(table); } /* userspace just supplied us with counters */ -static int update_counters(void __user *user, unsigned int len) +static int do_update_counters(struct net *net, const char *name, + struct ebt_counter __user *counters, + unsigned int num_counters, + const void __user *user, unsigned int len) { int i, ret; struct ebt_counter *tmp; - struct ebt_replace hlp; struct ebt_table *t; - if (copy_from_user(&hlp, user, sizeof(hlp))) - return -EFAULT; - - if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) - return -EINVAL; - if (hlp.num_counters == 0) + if (num_counters == 0) return -EINVAL; - if ( !(tmp = (struct ebt_counter *) - vmalloc(hlp.num_counters * sizeof(struct ebt_counter))) ){ - MEMPRINT("Update_counters && nomemory\n"); + tmp = vmalloc(num_counters * sizeof(*tmp)); + if (!tmp) return -ENOMEM; - } - t = find_table_lock(hlp.name, &ret, &ebt_mutex); + t = find_table_lock(net, name, &ret, &ebt_mutex); if (!t) goto free_tmp; - if (hlp.num_counters != t->private->nentries) { + if (num_counters != t->private->nentries) { BUGPRINT("Wrong nr of counters\n"); ret = -EINVAL; goto unlock_mutex; } - if ( copy_from_user(tmp, hlp.counters, - hlp.num_counters * sizeof(struct ebt_counter)) ) { - BUGPRINT("Updata_counters && !cfu\n"); + if (copy_from_user(tmp, counters, num_counters * sizeof(*counters))) { ret = -EFAULT; goto unlock_mutex; } @@ -1266,7 +1301,7 @@ static int update_counters(void __user *user, unsigned int len) write_lock_bh(&t->lock); /* we add to the counters of the first cpu */ - for (i = 0; i < hlp.num_counters; i++) { + for (i = 0; i < num_counters; i++) { t->private->counters[i].pcnt += tmp[i].pcnt; t->private->counters[i].bcnt += tmp[i].bcnt; } @@ -1274,60 +1309,119 @@ static int update_counters(void __user *user, unsigned int len) write_unlock_bh(&t->lock); ret = 0; unlock_mutex: - up(&ebt_mutex); + mutex_unlock(&ebt_mutex); free_tmp: vfree(tmp); return ret; } -static inline int ebt_make_matchname(struct ebt_entry_match *m, - char *base, char *ubase) +static int update_counters(struct net *net, const void __user *user, + unsigned int len) +{ + struct ebt_replace hlp; + + if (copy_from_user(&hlp, user, sizeof(hlp))) + return -EFAULT; + + if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) + return -EINVAL; + + return do_update_counters(net, hlp.name, hlp.counters, + hlp.num_counters, user, len); +} + +static inline int ebt_make_matchname(const struct ebt_entry_match *m, + const char *base, char __user *ubase) { - char *hlp = ubase - base + (char *)m; - if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN)) + char __user *hlp = ubase + ((char *)m - base); + char name[EBT_FUNCTION_MAXNAMELEN] = {}; + + /* ebtables expects 32 bytes long names but xt_match names are 29 bytes + long. Copy 29 bytes and fill remaining bytes with zeroes. */ + strlcpy(name, m->u.match->name, sizeof(name)); + if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } -static inline int ebt_make_watchername(struct ebt_entry_watcher *w, - char *base, char *ubase) +static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, + const char *base, char __user *ubase) { - char *hlp = ubase - base + (char *)w; - if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN)) + char __user *hlp = ubase + ((char *)w - base); + char name[EBT_FUNCTION_MAXNAMELEN] = {}; + + strlcpy(name, w->u.watcher->name, sizeof(name)); + if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } -static inline int ebt_make_names(struct ebt_entry *e, char *base, char *ubase) +static inline int +ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) { int ret; - char *hlp; - struct ebt_entry_target *t; + char __user *hlp; + const struct ebt_entry_target *t; + char name[EBT_FUNCTION_MAXNAMELEN] = {}; - if ((e->bitmask & EBT_ENTRY_OR_ENTRIES) == 0) + if (e->bitmask == 0) return 0; - hlp = ubase - base + (char *)e + e->target_offset; + hlp = ubase + (((char *)e + e->target_offset) - base); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - + ret = EBT_MATCH_ITERATE(e, ebt_make_matchname, base, ubase); if (ret != 0) return ret; ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); if (ret != 0) return ret; - if (copy_to_user(hlp, t->u.target->name, EBT_FUNCTION_MAXNAMELEN)) + strlcpy(name, t->u.target->name, sizeof(name)); + if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; } -/* called with ebt_mutex down */ +static int copy_counters_to_user(struct ebt_table *t, + const struct ebt_counter *oldcounters, + void __user *user, unsigned int num_counters, + unsigned int nentries) +{ + struct ebt_counter *counterstmp; + int ret = 0; + + /* userspace might not need the counters */ + if (num_counters == 0) + return 0; + + if (num_counters != nentries) { + BUGPRINT("Num_counters wrong\n"); + return -EINVAL; + } + + counterstmp = vmalloc(nentries * sizeof(*counterstmp)); + if (!counterstmp) + return -ENOMEM; + + write_lock_bh(&t->lock); + get_counters(oldcounters, counterstmp, nentries); + write_unlock_bh(&t->lock); + + if (copy_to_user(user, counterstmp, + nentries * sizeof(struct ebt_counter))) + ret = -EFAULT; + vfree(counterstmp); + return ret; +} + +/* called with ebt_mutex locked */ static int copy_everything_to_user(struct ebt_table *t, void __user *user, - int *len, int cmd) + const int *len, int cmd) { struct ebt_replace tmp; - struct ebt_counter *counterstmp, *oldcounters; + const struct ebt_counter *oldcounters; unsigned int entries_size, nentries; + int ret; char *entries; if (cmd == EBT_SO_GET_ENTRIES) { @@ -1342,16 +1436,12 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, oldcounters = t->table->counters; } - if (copy_from_user(&tmp, user, sizeof(tmp))) { - BUGPRINT("Cfu didn't work\n"); + if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - } if (*len != sizeof(struct ebt_replace) + entries_size + - (tmp.num_counters? nentries * sizeof(struct ebt_counter): 0)) { - BUGPRINT("Wrong size\n"); + (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) return -EINVAL; - } if (tmp.nentries != nentries) { BUGPRINT("Nentries wrong\n"); @@ -1363,30 +1453,10 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, return -EINVAL; } - /* userspace might not need the counters */ - if (tmp.num_counters) { - if (tmp.num_counters != nentries) { - BUGPRINT("Num_counters wrong\n"); - return -EINVAL; - } - counterstmp = (struct ebt_counter *) - vmalloc(nentries * sizeof(struct ebt_counter)); - if (!counterstmp) { - MEMPRINT("Couldn't copy counters, out of memory\n"); - return -ENOMEM; - } - write_lock_bh(&t->lock); - get_counters(oldcounters, counterstmp, nentries); - write_unlock_bh(&t->lock); - - if (copy_to_user(tmp.counters, counterstmp, - nentries * sizeof(struct ebt_counter))) { - BUGPRINT("Couldn't copy counters to userspace\n"); - vfree(counterstmp); - return -EFAULT; - } - vfree(counterstmp); - } + ret = copy_counters_to_user(t, oldcounters, tmp.counters, + tmp.num_counters, nentries); + if (ret) + return ret; if (copy_to_user(tmp.entries, entries, entries_size)) { BUGPRINT("Couldn't copy entries to userspace\n"); @@ -1401,17 +1471,21 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; + struct net *net = sock_net(sk); + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; - switch(cmd) { + switch (cmd) { case EBT_SO_SET_ENTRIES: - ret = do_replace(user, len); + ret = do_replace(net, user, len); break; case EBT_SO_SET_COUNTERS: - ret = update_counters(user, len); + ret = update_counters(net, user, len); break; default: ret = -EINVAL; - } + } return ret; } @@ -1420,20 +1494,24 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int ret; struct ebt_replace tmp; struct ebt_table *t; + struct net *net = sock_net(sk); + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; - switch(cmd) { + switch (cmd) { case EBT_SO_GET_INFO: case EBT_SO_GET_INIT_INFO: - if (*len != sizeof(struct ebt_replace)){ + if (*len != sizeof(struct ebt_replace)) { ret = -EINVAL; - up(&ebt_mutex); + mutex_unlock(&ebt_mutex); break; } if (cmd == EBT_SO_GET_INFO) { @@ -1445,8 +1523,8 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) tmp.entries_size = t->table->entries_size; tmp.valid_hooks = t->table->valid_hooks; } - up(&ebt_mutex); - if (copy_to_user(user, &tmp, *len) != 0){ + mutex_unlock(&ebt_mutex); + if (copy_to_user(user, &tmp, *len) != 0) { BUGPRINT("c2u Didn't work\n"); ret = -EFAULT; break; @@ -1457,51 +1535,889 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case EBT_SO_GET_ENTRIES: case EBT_SO_GET_INIT_ENTRIES: ret = copy_everything_to_user(t, user, len, cmd); - up(&ebt_mutex); + mutex_unlock(&ebt_mutex); break; default: - up(&ebt_mutex); + mutex_unlock(&ebt_mutex); ret = -EINVAL; } return ret; } -static struct nf_sockopt_ops ebt_sockopts = -{ { NULL, NULL }, PF_INET, EBT_BASE_CTL, EBT_SO_SET_MAX + 1, do_ebt_set_ctl, - EBT_BASE_CTL, EBT_SO_GET_MAX + 1, do_ebt_get_ctl, 0, NULL +#ifdef CONFIG_COMPAT +/* 32 bit-userspace compatibility definitions. */ +struct compat_ebt_replace { + char name[EBT_TABLE_MAXNAMELEN]; + compat_uint_t valid_hooks; + compat_uint_t nentries; + compat_uint_t entries_size; + /* start of the chains */ + compat_uptr_t hook_entry[NF_BR_NUMHOOKS]; + /* nr of counters userspace expects back */ + compat_uint_t num_counters; + /* where the kernel will put the old counters. */ + compat_uptr_t counters; + compat_uptr_t entries; +}; + +/* struct ebt_entry_match, _target and _watcher have same layout */ +struct compat_ebt_entry_mwt { + union { + char name[EBT_FUNCTION_MAXNAMELEN]; + compat_uptr_t ptr; + } u; + compat_uint_t match_size; + compat_uint_t data[0]; +}; + +/* account for possible padding between match_size and ->data */ +static int ebt_compat_entry_padsize(void) +{ + BUILD_BUG_ON(XT_ALIGN(sizeof(struct ebt_entry_match)) < + COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt))); + return (int) XT_ALIGN(sizeof(struct ebt_entry_match)) - + COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt)); +} + +static int ebt_compat_match_offset(const struct xt_match *match, + unsigned int userlen) +{ + /* + * ebt_among needs special handling. The kernel .matchsize is + * set to -1 at registration time; at runtime an EBT_ALIGN()ed + * value is expected. + * Example: userspace sends 4500, ebt_among.c wants 4504. + */ + if (unlikely(match->matchsize == -1)) + return XT_ALIGN(userlen) - COMPAT_XT_ALIGN(userlen); + return xt_compat_match_offset(match); +} + +static int compat_match_to_user(struct ebt_entry_match *m, void __user **dstptr, + unsigned int *size) +{ + const struct xt_match *match = m->u.match; + struct compat_ebt_entry_mwt __user *cm = *dstptr; + int off = ebt_compat_match_offset(match, m->match_size); + compat_uint_t msize = m->match_size - off; + + BUG_ON(off >= m->match_size); + + if (copy_to_user(cm->u.name, match->name, + strlen(match->name) + 1) || put_user(msize, &cm->match_size)) + return -EFAULT; + + if (match->compat_to_user) { + if (match->compat_to_user(cm->data, m->data)) + return -EFAULT; + } else if (copy_to_user(cm->data, m->data, msize)) + return -EFAULT; + + *size -= ebt_compat_entry_padsize() + off; + *dstptr = cm->data; + *dstptr += msize; + return 0; +} + +static int compat_target_to_user(struct ebt_entry_target *t, + void __user **dstptr, + unsigned int *size) +{ + const struct xt_target *target = t->u.target; + struct compat_ebt_entry_mwt __user *cm = *dstptr; + int off = xt_compat_target_offset(target); + compat_uint_t tsize = t->target_size - off; + + BUG_ON(off >= t->target_size); + + if (copy_to_user(cm->u.name, target->name, + strlen(target->name) + 1) || put_user(tsize, &cm->match_size)) + return -EFAULT; + + if (target->compat_to_user) { + if (target->compat_to_user(cm->data, t->data)) + return -EFAULT; + } else if (copy_to_user(cm->data, t->data, tsize)) + return -EFAULT; + + *size -= ebt_compat_entry_padsize() + off; + *dstptr = cm->data; + *dstptr += tsize; + return 0; +} + +static int compat_watcher_to_user(struct ebt_entry_watcher *w, + void __user **dstptr, + unsigned int *size) +{ + return compat_target_to_user((struct ebt_entry_target *)w, + dstptr, size); +} + +static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr, + unsigned int *size) +{ + struct ebt_entry_target *t; + struct ebt_entry __user *ce; + u32 watchers_offset, target_offset, next_offset; + compat_uint_t origsize; + int ret; + + if (e->bitmask == 0) { + if (*size < sizeof(struct ebt_entries)) + return -EINVAL; + if (copy_to_user(*dstptr, e, sizeof(struct ebt_entries))) + return -EFAULT; + + *dstptr += sizeof(struct ebt_entries); + *size -= sizeof(struct ebt_entries); + return 0; + } + + if (*size < sizeof(*ce)) + return -EINVAL; + + ce = (struct ebt_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(*ce))) + return -EFAULT; + + origsize = *size; + *dstptr += sizeof(*ce); + + ret = EBT_MATCH_ITERATE(e, compat_match_to_user, dstptr, size); + if (ret) + return ret; + watchers_offset = e->watchers_offset - (origsize - *size); + + ret = EBT_WATCHER_ITERATE(e, compat_watcher_to_user, dstptr, size); + if (ret) + return ret; + target_offset = e->target_offset - (origsize - *size); + + t = (struct ebt_entry_target *) ((char *) e + e->target_offset); + + ret = compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + + if (put_user(watchers_offset, &ce->watchers_offset) || + put_user(target_offset, &ce->target_offset) || + put_user(next_offset, &ce->next_offset)) + return -EFAULT; + + *size -= sizeof(*ce); + return 0; +} + +static int compat_calc_match(struct ebt_entry_match *m, int *off) +{ + *off += ebt_compat_match_offset(m->u.match, m->match_size); + *off += ebt_compat_entry_padsize(); + return 0; +} + +static int compat_calc_watcher(struct ebt_entry_watcher *w, int *off) +{ + *off += xt_compat_target_offset(w->u.watcher); + *off += ebt_compat_entry_padsize(); + return 0; +} + +static int compat_calc_entry(const struct ebt_entry *e, + const struct ebt_table_info *info, + const void *base, + struct compat_ebt_replace *newinfo) +{ + const struct ebt_entry_target *t; + unsigned int entry_offset; + int off, ret, i; + + if (e->bitmask == 0) + return 0; + + off = 0; + entry_offset = (void *)e - base; + + EBT_MATCH_ITERATE(e, compat_calc_match, &off); + EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off); + + t = (const struct ebt_entry_target *) ((char *) e + e->target_offset); + + off += xt_compat_target_offset(t->u.target); + off += ebt_compat_entry_padsize(); + + newinfo->entries_size -= off; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + const void *hookptr = info->hook_entry[i]; + if (info->hook_entry[i] && + (e < (struct ebt_entry *)(base - hookptr))) { + newinfo->hook_entry[i] -= off; + pr_debug("0x%08X -> 0x%08X\n", + newinfo->hook_entry[i] + off, + newinfo->hook_entry[i]); + } + } + + return 0; +} + + +static int compat_table_info(const struct ebt_table_info *info, + struct compat_ebt_replace *newinfo) +{ + unsigned int size = info->entries_size; + const void *entries = info->entries; + + newinfo->entries_size = size; + + xt_compat_init_offsets(NFPROTO_BRIDGE, info->nentries); + return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, + entries, newinfo); +} + +static int compat_copy_everything_to_user(struct ebt_table *t, + void __user *user, int *len, int cmd) +{ + struct compat_ebt_replace repl, tmp; + struct ebt_counter *oldcounters; + struct ebt_table_info tinfo; + int ret; + void __user *pos; + + memset(&tinfo, 0, sizeof(tinfo)); + + if (cmd == EBT_SO_GET_ENTRIES) { + tinfo.entries_size = t->private->entries_size; + tinfo.nentries = t->private->nentries; + tinfo.entries = t->private->entries; + oldcounters = t->private->counters; + } else { + tinfo.entries_size = t->table->entries_size; + tinfo.nentries = t->table->nentries; + tinfo.entries = t->table->entries; + oldcounters = t->table->counters; + } + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + if (tmp.nentries != tinfo.nentries || + (tmp.num_counters && tmp.num_counters != tinfo.nentries)) + return -EINVAL; + + memcpy(&repl, &tmp, sizeof(repl)); + if (cmd == EBT_SO_GET_ENTRIES) + ret = compat_table_info(t->private, &repl); + else + ret = compat_table_info(&tinfo, &repl); + if (ret) + return ret; + + if (*len != sizeof(tmp) + repl.entries_size + + (tmp.num_counters? tinfo.nentries * sizeof(struct ebt_counter): 0)) { + pr_err("wrong size: *len %d, entries_size %u, replsz %d\n", + *len, tinfo.entries_size, repl.entries_size); + return -EINVAL; + } + + /* userspace might not need the counters */ + ret = copy_counters_to_user(t, oldcounters, compat_ptr(tmp.counters), + tmp.num_counters, tinfo.nentries); + if (ret) + return ret; + + pos = compat_ptr(tmp.entries); + return EBT_ENTRY_ITERATE(tinfo.entries, tinfo.entries_size, + compat_copy_entry_to_user, &pos, &tmp.entries_size); +} + +struct ebt_entries_buf_state { + char *buf_kern_start; /* kernel buffer to copy (translated) data to */ + u32 buf_kern_len; /* total size of kernel buffer */ + u32 buf_kern_offset; /* amount of data copied so far */ + u32 buf_user_offset; /* read position in userspace buffer */ +}; + +static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) +{ + state->buf_kern_offset += sz; + return state->buf_kern_offset >= sz ? 0 : -EINVAL; +} + +static int ebt_buf_add(struct ebt_entries_buf_state *state, + void *data, unsigned int sz) +{ + if (state->buf_kern_start == NULL) + goto count_only; + + BUG_ON(state->buf_kern_offset + sz > state->buf_kern_len); + + memcpy(state->buf_kern_start + state->buf_kern_offset, data, sz); + + count_only: + state->buf_user_offset += sz; + return ebt_buf_count(state, sz); +} + +static int ebt_buf_add_pad(struct ebt_entries_buf_state *state, unsigned int sz) +{ + char *b = state->buf_kern_start; + + BUG_ON(b && state->buf_kern_offset > state->buf_kern_len); + + if (b != NULL && sz > 0) + memset(b + state->buf_kern_offset, 0, sz); + /* do not adjust ->buf_user_offset here, we added kernel-side padding */ + return ebt_buf_count(state, sz); +} + +enum compat_mwt { + EBT_COMPAT_MATCH, + EBT_COMPAT_WATCHER, + EBT_COMPAT_TARGET, +}; + +static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, + enum compat_mwt compat_mwt, + struct ebt_entries_buf_state *state, + const unsigned char *base) +{ + char name[EBT_FUNCTION_MAXNAMELEN]; + struct xt_match *match; + struct xt_target *wt; + void *dst = NULL; + int off, pad = 0; + unsigned int size_kern, match_size = mwt->match_size; + + strlcpy(name, mwt->u.name, sizeof(name)); + + if (state->buf_kern_start) + dst = state->buf_kern_start + state->buf_kern_offset; + + switch (compat_mwt) { + case EBT_COMPAT_MATCH: + match = xt_request_find_match(NFPROTO_BRIDGE, name, 0); + if (IS_ERR(match)) + return PTR_ERR(match); + + off = ebt_compat_match_offset(match, match_size); + if (dst) { + if (match->compat_from_user) + match->compat_from_user(dst, mwt->data); + else + memcpy(dst, mwt->data, match_size); + } + + size_kern = match->matchsize; + if (unlikely(size_kern == -1)) + size_kern = match_size; + module_put(match->me); + break; + case EBT_COMPAT_WATCHER: /* fallthrough */ + case EBT_COMPAT_TARGET: + wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0); + if (IS_ERR(wt)) + return PTR_ERR(wt); + off = xt_compat_target_offset(wt); + + if (dst) { + if (wt->compat_from_user) + wt->compat_from_user(dst, mwt->data); + else + memcpy(dst, mwt->data, match_size); + } + + size_kern = wt->targetsize; + module_put(wt->me); + break; + + default: + return -EINVAL; + } + + state->buf_kern_offset += match_size + off; + state->buf_user_offset += match_size; + pad = XT_ALIGN(size_kern) - size_kern; + + if (pad > 0 && dst) { + BUG_ON(state->buf_kern_len <= pad); + BUG_ON(state->buf_kern_offset - (match_size + off) + size_kern > state->buf_kern_len - pad); + memset(dst + size_kern, 0, pad); + } + return off + match_size; +} + +/* + * return size of all matches, watchers or target, including necessary + * alignment and padding. + */ +static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, + unsigned int size_left, enum compat_mwt type, + struct ebt_entries_buf_state *state, const void *base) +{ + int growth = 0; + char *buf; + + if (size_left == 0) + return 0; + + buf = (char *) match32; + + while (size_left >= sizeof(*match32)) { + struct ebt_entry_match *match_kern; + int ret; + + match_kern = (struct ebt_entry_match *) state->buf_kern_start; + if (match_kern) { + char *tmp; + tmp = state->buf_kern_start + state->buf_kern_offset; + match_kern = (struct ebt_entry_match *) tmp; + } + ret = ebt_buf_add(state, buf, sizeof(*match32)); + if (ret < 0) + return ret; + size_left -= sizeof(*match32); + + /* add padding before match->data (if any) */ + ret = ebt_buf_add_pad(state, ebt_compat_entry_padsize()); + if (ret < 0) + return ret; + + if (match32->match_size > size_left) + return -EINVAL; + + size_left -= match32->match_size; + + ret = compat_mtw_from_user(match32, type, state, base); + if (ret < 0) + return ret; + + BUG_ON(ret < match32->match_size); + growth += ret - match32->match_size; + growth += ebt_compat_entry_padsize(); + + buf += sizeof(*match32); + buf += match32->match_size; + + if (match_kern) + match_kern->match_size = ret; + + WARN_ON(type == EBT_COMPAT_TARGET && size_left); + match32 = (struct compat_ebt_entry_mwt *) buf; + } + + return growth; +} + +/* called for all ebt_entry structures. */ +static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, + unsigned int *total, + struct ebt_entries_buf_state *state) +{ + unsigned int i, j, startoff, new_offset = 0; + /* stores match/watchers/targets & offset of next struct ebt_entry: */ + unsigned int offsets[4]; + unsigned int *offsets_update = NULL; + int ret; + char *buf_start; + + if (*total < sizeof(struct ebt_entries)) + return -EINVAL; + + if (!entry->bitmask) { + *total -= sizeof(struct ebt_entries); + return ebt_buf_add(state, entry, sizeof(struct ebt_entries)); + } + if (*total < sizeof(*entry) || entry->next_offset < sizeof(*entry)) + return -EINVAL; + + startoff = state->buf_user_offset; + /* pull in most part of ebt_entry, it does not need to be changed. */ + ret = ebt_buf_add(state, entry, + offsetof(struct ebt_entry, watchers_offset)); + if (ret < 0) + return ret; + + offsets[0] = sizeof(struct ebt_entry); /* matches come first */ + memcpy(&offsets[1], &entry->watchers_offset, + sizeof(offsets) - sizeof(offsets[0])); + + if (state->buf_kern_start) { + buf_start = state->buf_kern_start + state->buf_kern_offset; + offsets_update = (unsigned int *) buf_start; + } + ret = ebt_buf_add(state, &offsets[1], + sizeof(offsets) - sizeof(offsets[0])); + if (ret < 0) + return ret; + buf_start = (char *) entry; + /* + * 0: matches offset, always follows ebt_entry. + * 1: watchers offset, from ebt_entry structure + * 2: target offset, from ebt_entry structure + * 3: next ebt_entry offset, from ebt_entry structure + * + * offsets are relative to beginning of struct ebt_entry (i.e., 0). + */ + for (i = 0, j = 1 ; j < 4 ; j++, i++) { + struct compat_ebt_entry_mwt *match32; + unsigned int size; + char *buf = buf_start; + + buf = buf_start + offsets[i]; + if (offsets[i] > offsets[j]) + return -EINVAL; + + match32 = (struct compat_ebt_entry_mwt *) buf; + size = offsets[j] - offsets[i]; + ret = ebt_size_mwt(match32, size, i, state, base); + if (ret < 0) + return ret; + new_offset += ret; + if (offsets_update && new_offset) { + pr_debug("change offset %d to %d\n", + offsets_update[i], offsets[j] + new_offset); + offsets_update[i] = offsets[j] + new_offset; + } + } + + if (state->buf_kern_start == NULL) { + unsigned int offset = buf_start - (char *) base; + + ret = xt_compat_add_offset(NFPROTO_BRIDGE, offset, new_offset); + if (ret < 0) + return ret; + } + + startoff = state->buf_user_offset - startoff; + + BUG_ON(*total < startoff); + *total -= startoff; + return 0; +} + +/* + * repl->entries_size is the size of the ebt_entry blob in userspace. + * It might need more memory when copied to a 64 bit kernel in case + * userspace is 32-bit. So, first task: find out how much memory is needed. + * + * Called before validation is performed. + */ +static int compat_copy_entries(unsigned char *data, unsigned int size_user, + struct ebt_entries_buf_state *state) +{ + unsigned int size_remaining = size_user; + int ret; + + ret = EBT_ENTRY_ITERATE(data, size_user, size_entry_mwt, data, + &size_remaining, state); + if (ret < 0) + return ret; + + WARN_ON(size_remaining); + return state->buf_kern_offset; +} + + +static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl, + void __user *user, unsigned int len) +{ + struct compat_ebt_replace tmp; + int i; + + if (len < sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + if (len != sizeof(tmp) + tmp.entries_size) + return -EINVAL; + + if (tmp.entries_size == 0) + return -EINVAL; + + if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / + NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) + return -ENOMEM; + + memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry)); + + /* starting with hook_entry, 32 vs. 64 bit structures are different */ + for (i = 0; i < NF_BR_NUMHOOKS; i++) + repl->hook_entry[i] = compat_ptr(tmp.hook_entry[i]); + + repl->num_counters = tmp.num_counters; + repl->counters = compat_ptr(tmp.counters); + repl->entries = compat_ptr(tmp.entries); + return 0; +} + +static int compat_do_replace(struct net *net, void __user *user, + unsigned int len) +{ + int ret, i, countersize, size64; + struct ebt_table_info *newinfo; + struct ebt_replace tmp; + struct ebt_entries_buf_state state; + void *entries_tmp; + + ret = compat_copy_ebt_replace_from_user(&tmp, user, len); + if (ret) { + /* try real handler in case userland supplied needed padding */ + if (ret == -EINVAL && do_replace(net, user, len) == 0) + ret = 0; + return ret; + } + + countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; + newinfo = vmalloc(sizeof(*newinfo) + countersize); + if (!newinfo) + return -ENOMEM; + + if (countersize) + memset(newinfo->counters, 0, countersize); + + memset(&state, 0, sizeof(state)); + + newinfo->entries = vmalloc(tmp.entries_size); + if (!newinfo->entries) { + ret = -ENOMEM; + goto free_newinfo; + } + if (copy_from_user( + newinfo->entries, tmp.entries, tmp.entries_size) != 0) { + ret = -EFAULT; + goto free_entries; + } + + entries_tmp = newinfo->entries; + + xt_compat_lock(NFPROTO_BRIDGE); + + xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); + if (ret < 0) + goto out_unlock; + + pr_debug("tmp.entries_size %d, kern off %d, user off %d delta %d\n", + tmp.entries_size, state.buf_kern_offset, state.buf_user_offset, + xt_compat_calc_jump(NFPROTO_BRIDGE, tmp.entries_size)); + + size64 = ret; + newinfo->entries = vmalloc(size64); + if (!newinfo->entries) { + vfree(entries_tmp); + ret = -ENOMEM; + goto out_unlock; + } + + memset(&state, 0, sizeof(state)); + state.buf_kern_start = newinfo->entries; + state.buf_kern_len = size64; + + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); + BUG_ON(ret < 0); /* parses same data again */ + + vfree(entries_tmp); + tmp.entries_size = size64; + + for (i = 0; i < NF_BR_NUMHOOKS; i++) { + char __user *usrptr; + if (tmp.hook_entry[i]) { + unsigned int delta; + usrptr = (char __user *) tmp.hook_entry[i]; + delta = usrptr - tmp.entries; + usrptr += xt_compat_calc_jump(NFPROTO_BRIDGE, delta); + tmp.hook_entry[i] = (struct ebt_entries __user *)usrptr; + } + } + + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + + ret = do_replace_finish(net, &tmp, newinfo); + if (ret == 0) + return ret; +free_entries: + vfree(newinfo->entries); +free_newinfo: + vfree(newinfo); + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + goto free_entries; +} + +static int compat_update_counters(struct net *net, void __user *user, + unsigned int len) +{ + struct compat_ebt_replace hlp; + + if (copy_from_user(&hlp, user, sizeof(hlp))) + return -EFAULT; + + /* try real handler in case userland supplied needed padding */ + if (len != sizeof(hlp) + hlp.num_counters * sizeof(struct ebt_counter)) + return update_counters(net, user, len); + + return do_update_counters(net, hlp.name, compat_ptr(hlp.counters), + hlp.num_counters, user, len); +} + +static int compat_do_ebt_set_ctl(struct sock *sk, + int cmd, void __user *user, unsigned int len) +{ + int ret; + struct net *net = sock_net(sk); + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case EBT_SO_SET_ENTRIES: + ret = compat_do_replace(net, user, len); + break; + case EBT_SO_SET_COUNTERS: + ret = compat_update_counters(net, user, len); + break; + default: + ret = -EINVAL; + } + return ret; +} + +static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, + void __user *user, int *len) +{ + int ret; + struct compat_ebt_replace tmp; + struct ebt_table *t; + struct net *net = sock_net(sk); + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + /* try real handler in case userland supplied needed padding */ + if ((cmd == EBT_SO_GET_INFO || + cmd == EBT_SO_GET_INIT_INFO) && *len != sizeof(tmp)) + return do_ebt_get_ctl(sk, cmd, user, len); + + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); + if (!t) + return ret; + + xt_compat_lock(NFPROTO_BRIDGE); + switch (cmd) { + case EBT_SO_GET_INFO: + tmp.nentries = t->private->nentries; + ret = compat_table_info(t->private, &tmp); + if (ret) + goto out; + tmp.valid_hooks = t->valid_hooks; + + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + case EBT_SO_GET_INIT_INFO: + tmp.nentries = t->table->nentries; + tmp.entries_size = t->table->entries_size; + tmp.valid_hooks = t->table->valid_hooks; + + if (copy_to_user(user, &tmp, *len) != 0) { + ret = -EFAULT; + break; + } + ret = 0; + break; + case EBT_SO_GET_ENTRIES: + case EBT_SO_GET_INIT_ENTRIES: + /* + * try real handler first in case of userland-side padding. + * in case we are dealing with an 'ordinary' 32 bit binary + * without 64bit compatibility padding, this will fail right + * after copy_from_user when the *len argument is validated. + * + * the compat_ variant needs to do one pass over the kernel + * data set to adjust for size differences before it the check. + */ + if (copy_everything_to_user(t, user, len, cmd) == 0) + ret = 0; + else + ret = compat_copy_everything_to_user(t, user, len, cmd); + break; + default: + ret = -EINVAL; + } + out: + xt_compat_flush_offsets(NFPROTO_BRIDGE); + xt_compat_unlock(NFPROTO_BRIDGE); + mutex_unlock(&ebt_mutex); + return ret; +} +#endif + +static struct nf_sockopt_ops ebt_sockopts = { + .pf = PF_INET, + .set_optmin = EBT_BASE_CTL, + .set_optmax = EBT_SO_SET_MAX + 1, + .set = do_ebt_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_ebt_set_ctl, +#endif + .get_optmin = EBT_BASE_CTL, + .get_optmax = EBT_SO_GET_MAX + 1, + .get = do_ebt_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_ebt_get_ctl, +#endif + .owner = THIS_MODULE, }; -static int __init init(void) +static int __init ebtables_init(void) { int ret; - down(&ebt_mutex); - list_named_insert(&ebt_targets, &ebt_standard_target); - up(&ebt_mutex); - if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) + ret = xt_register_target(&ebt_standard_target); + if (ret < 0) return ret; + ret = nf_register_sockopt(&ebt_sockopts); + if (ret < 0) { + xt_unregister_target(&ebt_standard_target); + return ret; + } - printk(KERN_NOTICE "Ebtables v2.0 registered\n"); + printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; } -static void __exit fini(void) +static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); - printk(KERN_NOTICE "Ebtables v2.0 unregistered\n"); + xt_unregister_target(&ebt_standard_target); + printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); EXPORT_SYMBOL(ebt_unregister_table); -EXPORT_SYMBOL(ebt_register_match); -EXPORT_SYMBOL(ebt_unregister_match); -EXPORT_SYMBOL(ebt_register_watcher); -EXPORT_SYMBOL(ebt_unregister_watcher); -EXPORT_SYMBOL(ebt_register_target); -EXPORT_SYMBOL(ebt_unregister_target); EXPORT_SYMBOL(ebt_do_table); -module_init(init); -module_exit(fini); +module_init(ebtables_init); +module_exit(ebtables_fini); MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c new file mode 100644 index 00000000000..5bcc0d8b31f --- /dev/null +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netfilter_bridge.h> +#include <net/netfilter/nf_tables.h> + +static unsigned int +nft_do_chain_bridge(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + +static struct nft_af_info nft_af_bridge __read_mostly = { + .family = NFPROTO_BRIDGE, + .nhooks = NF_BR_NUMHOOKS, + .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_BR_LOCAL_IN] = nft_do_chain_bridge, + [NF_BR_FORWARD] = nft_do_chain_bridge, + [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + }, +}; + +static int nf_tables_bridge_init_net(struct net *net) +{ + net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.bridge == NULL) + return -ENOMEM; + + memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge)); + + if (nft_register_afinfo(net, net->nft.bridge) < 0) + goto err; + + return 0; +err: + kfree(net->nft.bridge); + return -ENOMEM; +} + +static void nf_tables_bridge_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.bridge); + kfree(net->nft.bridge); +} + +static struct pernet_operations nf_tables_bridge_net_ops = { + .init = nf_tables_bridge_init_net, + .exit = nf_tables_bridge_exit_net, +}; + +static const struct nf_chain_type filter_bridge = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_BRIDGE, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_BR_LOCAL_IN) | + (1 << NF_BR_FORWARD) | + (1 << NF_BR_LOCAL_OUT), +}; + +static int __init nf_tables_bridge_init(void) +{ + int ret; + + nft_register_chain_type(&filter_bridge); + ret = register_pernet_subsys(&nf_tables_bridge_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_bridge); + + return ret; +} + +static void __exit nf_tables_bridge_exit(void) +{ + unregister_pernet_subsys(&nf_tables_bridge_net_ops); + nft_unregister_chain_type(&filter_bridge); +} + +module_init(nf_tables_bridge_init); +module_exit(nf_tables_bridge_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE); diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c new file mode 100644 index 00000000000..4f02109d708 --- /dev/null +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2014 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_meta.h> + +#include "../br_private.h" + +static void nft_meta_bridge_get_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + const struct net_device *in = pkt->in, *out = pkt->out; + struct nft_data *dest = &data[priv->dreg]; + const struct net_bridge_port *p; + + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + if (in == NULL || (p = br_port_get_rcu(in)) == NULL) + goto err; + break; + case NFT_META_BRI_OIFNAME: + if (out == NULL || (p = br_port_get_rcu(out)) == NULL) + goto err; + break; + default: + goto out; + } + + strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data)); + return; +out: + return nft_meta_get_eval(expr, data, pkt); +err: + data[NFT_REG_VERDICT].verdict = NFT_BREAK; +} + +static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_IIFNAME: + case NFT_META_BRI_OIFNAME: + break; + default: + return nft_meta_get_init(ctx, expr, tb); + } + + priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); + err = nft_validate_output_register(priv->dreg); + if (err < 0) + return err; + + err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + if (err < 0) + return err; + + return 0; +} + +static struct nft_expr_type nft_meta_bridge_type; +static const struct nft_expr_ops nft_meta_bridge_get_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_bridge_get_eval, + .init = nft_meta_bridge_get_init, + .dump = nft_meta_get_dump, +}; + +static const struct nft_expr_ops nft_meta_bridge_set_ops = { + .type = &nft_meta_bridge_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), + .eval = nft_meta_set_eval, + .init = nft_meta_set_init, + .dump = nft_meta_set_dump, +}; + +static const struct nft_expr_ops * +nft_meta_bridge_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_META_KEY] == NULL) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG]) + return ERR_PTR(-EINVAL); + + if (tb[NFTA_META_DREG]) + return &nft_meta_bridge_get_ops; + + if (tb[NFTA_META_SREG]) + return &nft_meta_bridge_set_ops; + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_meta_bridge_type __read_mostly = { + .family = NFPROTO_BRIDGE, + .name = "meta", + .select_ops = &nft_meta_bridge_select_ops, + .policy = nft_meta_policy, + .maxattr = NFTA_META_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_meta_bridge_module_init(void) +{ + return nft_register_expr(&nft_meta_bridge_type); +} + +static void __exit nft_meta_bridge_module_exit(void) +{ + nft_unregister_expr(&nft_meta_bridge_type); +} + +module_init(nft_meta_bridge_module_init); +module_exit(nft_meta_bridge_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta"); |
