diff options
Diffstat (limited to 'net')
409 files changed, 14220 insertions, 9929 deletions
diff --git a/net/802/tr.c b/net/802/tr.c index e874447ad14..44acce47fcd 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -635,19 +635,18 @@ struct net_device *alloc_trdev(int sizeof_priv) #ifdef CONFIG_SYSCTL static struct ctl_table tr_table[] = { { - .ctl_name = NET_TR_RIF_TIMEOUT, .procname = "rif_timeout", .data = &sysctl_tr_rif_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { 0 }, + { }, }; static __initdata struct ctl_path tr_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "token-ring", .ctl_name = NET_TR, }, + { .procname = "net", }, + { .procname = "token-ring", }, { } }; #endif diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8836575f9d7..33f90e7362c 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -41,7 +41,7 @@ /* Global VLAN variables */ -int vlan_net_id; +int vlan_net_id __read_mostly; /* Our listing of VLAN group(s) */ static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]; @@ -140,7 +140,7 @@ static void vlan_rcu_free(struct rcu_head *rcu) vlan_group_free(container_of(rcu, struct vlan_group, rcu)); } -void unregister_vlan_dev(struct net_device *dev) +void unregister_vlan_dev(struct net_device *dev, struct list_head *head) { struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; @@ -159,12 +159,13 @@ void unregister_vlan_dev(struct net_device *dev) if (real_dev->features & NETIF_F_HW_VLAN_FILTER) ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); - vlan_group_set_device(grp, vlan_id, NULL); grp->nr_vlans--; - synchronize_net(); + vlan_group_set_device(grp, vlan_id, NULL); + if (!grp->killall) + synchronize_net(); - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); /* If the group is now empty, kill off the group. */ if (grp->nr_vlans == 0) { @@ -183,27 +184,6 @@ void unregister_vlan_dev(struct net_device *dev) dev_put(real_dev); } -static void vlan_transfer_operstate(const struct net_device *dev, - struct net_device *vlandev) -{ - /* Have to respect userspace enforced dormant state - * of real device, also must allow supplicant running - * on VLAN device - */ - if (dev->operstate == IF_OPER_DORMANT) - netif_dormant_on(vlandev); - else - netif_dormant_off(vlandev); - - if (netif_carrier_ok(dev)) { - if (!netif_carrier_ok(vlandev)) - netif_carrier_on(vlandev); - } else { - if (netif_carrier_ok(vlandev)) - netif_carrier_off(vlandev); - } -} - int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) { const char *name = real_dev->name; @@ -261,7 +241,7 @@ int register_vlan_dev(struct net_device *dev) /* Account for reference in struct vlan_dev_info */ dev_hold(real_dev); - vlan_transfer_operstate(real_dev, dev); + netif_stacked_transfer_operstate(real_dev, dev); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ /* So, got the sucker initialized, now lets place @@ -281,8 +261,11 @@ out_uninit_applicant: if (ngrp) vlan_gvrp_uninit_applicant(real_dev); out_free_group: - if (ngrp) - vlan_group_free(ngrp); + if (ngrp) { + hlist_del_rcu(&ngrp->hlist); + /* Free the group, after all cpu's are done. */ + call_rcu(&ngrp->rcu, vlan_rcu_free); + } return err; } @@ -427,6 +410,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, struct vlan_group *grp; int i, flgs; struct net_device *vlandev; + struct vlan_dev_info *vlan; + LIST_HEAD(list); if (is_vlan_dev(dev)) __vlan_device_event(dev, event); @@ -447,7 +432,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!vlandev) continue; - vlan_transfer_operstate(dev, vlandev); + netif_stacked_transfer_operstate(dev, vlandev); } break; @@ -502,8 +487,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (!(flgs & IFF_UP)) continue; - dev_change_flags(vlandev, flgs & ~IFF_UP); - vlan_transfer_operstate(dev, vlandev); + vlan = vlan_dev_info(vlandev); + if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) + dev_change_flags(vlandev, flgs & ~IFF_UP); + netif_stacked_transfer_operstate(dev, vlandev); } break; @@ -518,13 +505,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (flgs & IFF_UP) continue; - dev_change_flags(vlandev, flgs | IFF_UP); - vlan_transfer_operstate(dev, vlandev); + vlan = vlan_dev_info(vlandev); + if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) + dev_change_flags(vlandev, flgs | IFF_UP); + netif_stacked_transfer_operstate(dev, vlandev); } break; case NETDEV_UNREGISTER: /* Delete all VLANs for this dev. */ + grp->killall = 1; + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { vlandev = vlan_group_get_device(grp, i); if (!vlandev) @@ -535,8 +526,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, if (grp->nr_vlans == 1) i = VLAN_GROUP_ARRAY_LEN; - unregister_vlan_dev(vlandev); + unregister_vlan_dev(vlandev, &list); } + unregister_netdevice_many(&list); break; } @@ -642,7 +634,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - unregister_vlan_dev(dev); + unregister_vlan_dev(dev, NULL); err = 0; break; @@ -673,47 +665,26 @@ out: static int vlan_init_net(struct net *net) { + struct vlan_net *vn = net_generic(net, vlan_net_id); int err; - struct vlan_net *vn; - - err = -ENOMEM; - vn = kzalloc(sizeof(struct vlan_net), GFP_KERNEL); - if (vn == NULL) - goto err_alloc; - - err = net_assign_generic(net, vlan_net_id, vn); - if (err < 0) - goto err_assign; vn->name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD; err = vlan_proc_init(net); - if (err < 0) - goto err_proc; - - return 0; -err_proc: - /* nothing */ -err_assign: - kfree(vn); -err_alloc: return err; } static void vlan_exit_net(struct net *net) { - struct vlan_net *vn; - - vn = net_generic(net, vlan_net_id); - rtnl_kill_links(net, &vlan_link_ops); vlan_proc_cleanup(net); - kfree(vn); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, .exit = vlan_exit_net, + .id = &vlan_net_id, + .size = sizeof(struct vlan_net), }; static int __init vlan_proto_init(void) @@ -723,7 +694,7 @@ static int __init vlan_proto_init(void) pr_info("%s v%s %s\n", vlan_fullname, vlan_version, vlan_copyright); pr_info("All bugs added by %s\n", vlan_buggyright); - err = register_pernet_gen_device(&vlan_net_id, &vlan_net_ops); + err = register_pernet_subsys(&vlan_net_ops); if (err < 0) goto err0; @@ -748,7 +719,7 @@ err4: err3: unregister_netdevice_notifier(&vlan_notifier_block); err2: - unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops); + unregister_pernet_subsys(&vlan_net_ops); err0: return err; } @@ -768,7 +739,7 @@ static void __exit vlan_cleanup_module(void) for (i = 0; i < VLAN_GRP_HASH_SIZE; i++) BUG_ON(!hlist_empty(&vlan_group_hash[i])); - unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops); + unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ vlan_gvrp_uninit(); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 82570bc2a18..5685296017e 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -16,6 +16,21 @@ struct vlan_priority_tci_mapping { struct vlan_priority_tci_mapping *next; }; + +/** + * struct vlan_rx_stats - VLAN percpu rx stats + * @rx_packets: number of received packets + * @rx_bytes: number of received bytes + * @multicast: number of received multicast packets + * @rx_errors: number of errors + */ +struct vlan_rx_stats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long multicast; + unsigned long rx_errors; +}; + /** * struct vlan_dev_info - VLAN private device data * @nr_ingress_mappings: number of ingress priority mappings @@ -29,6 +44,7 @@ struct vlan_priority_tci_mapping { * @dent: proc dir entry * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX + * @vlan_rx_stats: ptr to percpu rx stats */ struct vlan_dev_info { unsigned int nr_ingress_mappings; @@ -45,6 +61,7 @@ struct vlan_dev_info { struct proc_dir_entry *dent; unsigned long cnt_inc_headroom_on_tx; unsigned long cnt_encap_on_xmit; + struct vlan_rx_stats *vlan_rx_stats; }; static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev) @@ -82,14 +99,14 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); -void unregister_vlan_dev(struct net_device *dev); +void unregister_vlan_dev(struct net_device *dev, struct list_head *head); static inline u32 vlan_get_ingress_priority(struct net_device *dev, u16 vlan_tci) { struct vlan_dev_info *vip = vlan_dev_info(dev); - return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7]; + return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7]; } #ifdef CONFIG_VLAN_8021Q_GVRP diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 7f7de1a04de..e75a2f3b10a 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -14,7 +14,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, if (skb_bond_should_drop(skb)) goto drop; - skb->vlan_tci = vlan_tci; + __vlan_hwaccel_put_tag(skb, vlan_tci); skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); if (!skb->dev) @@ -31,7 +31,7 @@ EXPORT_SYMBOL(__vlan_hwaccel_rx); int vlan_hwaccel_do_receive(struct sk_buff *skb) { struct net_device *dev = skb->dev; - struct net_device_stats *stats; + struct vlan_rx_stats *rx_stats; skb->dev = vlan_dev_info(dev)->real_dev; netif_nit_deliver(skb); @@ -40,15 +40,17 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb) skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &dev->stats; - stats->rx_packets++; - stats->rx_bytes += skb->len; + rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, + smp_processor_id()); + + rx_stats->rx_packets++; + rx_stats->rx_bytes += skb->len; switch (skb->pkt_type) { case PACKET_BROADCAST: break; case PACKET_MULTICAST: - stats->multicast++; + rx_stats->multicast++; break; case PACKET_OTHERHOST: /* Our lower layer thinks this is not local, let's make sure. @@ -74,15 +76,16 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); -static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb) +static gro_result_t +vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) { struct sk_buff *p; if (skb_bond_should_drop(skb)) goto drop; - skb->vlan_tci = vlan_tci; + __vlan_hwaccel_put_tag(skb, vlan_tci); skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); if (!skb->dev) @@ -101,11 +104,12 @@ drop: return GRO_DROP; } -int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci, struct sk_buff *skb) +gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci, struct sk_buff *skb) { if (netpoll_rx_on(skb)) - return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + return vlan_hwaccel_receive_skb(skb, grp, vlan_tci) + ? GRO_DROP : GRO_NORMAL; skb_gro_reset_offset(skb); @@ -113,17 +117,18 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, } EXPORT_SYMBOL(vlan_gro_receive); -int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, - unsigned int vlan_tci) +gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, + unsigned int vlan_tci) { struct sk_buff *skb = napi_frags_skb(napi); if (!skb) - return NET_RX_DROP; + return GRO_DROP; if (netpoll_rx_on(skb)) { skb->protocol = eth_type_trans(skb, skb->dev); - return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + return vlan_hwaccel_receive_skb(skb, grp, vlan_tci) + ? GRO_DROP : GRO_NORMAL; } return napi_frags_finish(napi, skb, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4198ec5c8ab..b7889782047 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -140,7 +140,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct vlan_hdr *vhdr; - struct net_device_stats *stats; + struct vlan_rx_stats *rx_stats; u16 vlan_id; u16 vlan_tci; @@ -163,9 +163,10 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, goto err_unlock; } - stats = &skb->dev->stats; - stats->rx_packets++; - stats->rx_bytes += skb->len; + rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, + smp_processor_id()); + rx_stats->rx_packets++; + rx_stats->rx_bytes += skb->len; skb_pull_rcsum(skb, VLAN_HLEN); @@ -180,7 +181,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, break; case PACKET_MULTICAST: - stats->multicast++; + rx_stats->multicast++; break; case PACKET_OTHERHOST: @@ -200,7 +201,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, skb = vlan_check_reorder_header(skb); if (!skb) { - stats->rx_errors++; + rx_stats->rx_errors++; goto err_unlock; } @@ -332,7 +333,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, } else txq->tx_dropped++; - return NETDEV_TX_OK; + return ret; } static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, @@ -358,7 +359,7 @@ static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, } else txq->tx_dropped++; - return NETDEV_TX_OK; + return ret; } static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu) @@ -393,7 +394,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, struct vlan_dev_info *vlan = vlan_dev_info(dev); struct vlan_priority_tci_mapping *mp = NULL; struct vlan_priority_tci_mapping *np; - u32 vlan_qos = (vlan_prio << 13) & 0xE000; + u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; /* See if a priority mapping exists.. */ mp = vlan->egress_priority_map[skb_prio & 0xF]; @@ -430,7 +431,8 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) struct vlan_dev_info *vlan = vlan_dev_info(dev); u32 old_flags = vlan->flags; - if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) + if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | + VLAN_FLAG_LOOSE_BINDING)) return -EINVAL; vlan->flags = (old_flags & ~mask) | (flags & mask); @@ -455,7 +457,8 @@ static int vlan_dev_open(struct net_device *dev) struct net_device *real_dev = vlan->real_dev; int err; - if (!(real_dev->flags & IFF_UP)) + if (!(real_dev->flags & IFF_UP) && + !(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) return -ENETDOWN; if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { @@ -626,6 +629,17 @@ static int vlan_dev_fcoe_disable(struct net_device *dev) rc = ops->ndo_fcoe_disable(real_dev); return rc; } + +static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; + int rc = -EINVAL; + + if (ops->ndo_fcoe_get_wwn) + rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); + return rc; +} #endif static void vlan_dev_change_rx_flags(struct net_device *dev, int change) @@ -720,6 +734,11 @@ static int vlan_dev_init(struct net_device *dev) subclass = 1; vlan_dev_set_lockdep_class(dev, subclass); + + vlan_dev_info(dev)->vlan_rx_stats = alloc_percpu(struct vlan_rx_stats); + if (!vlan_dev_info(dev)->vlan_rx_stats) + return -ENOMEM; + return 0; } @@ -729,6 +748,8 @@ static void vlan_dev_uninit(struct net_device *dev) struct vlan_dev_info *vlan = vlan_dev_info(dev); int i; + free_percpu(vlan->vlan_rx_stats); + vlan->vlan_rx_stats = NULL; for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { while ((pm = vlan->egress_priority_map[i]) != NULL) { vlan->egress_priority_map[i] = pm->next; @@ -764,6 +785,31 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev) return dev_ethtool_get_flags(vlan->real_dev); } +static struct net_device_stats *vlan_dev_get_stats(struct net_device *dev) +{ + struct net_device_stats *stats = &dev->stats; + + dev_txq_stats_fold(dev, stats); + + if (vlan_dev_info(dev)->vlan_rx_stats) { + struct vlan_rx_stats *p, rx = {0}; + int i; + + for_each_possible_cpu(i) { + p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); + rx.rx_packets += p->rx_packets; + rx.rx_bytes += p->rx_bytes; + rx.rx_errors += p->rx_errors; + rx.multicast += p->multicast; + } + stats->rx_packets = rx.rx_packets; + stats->rx_bytes = rx.rx_bytes; + stats->rx_errors = rx.rx_errors; + stats->multicast = rx.multicast; + } + return stats; +} + static const struct ethtool_ops vlan_ethtool_ops = { .get_settings = vlan_ethtool_get_settings, .get_drvinfo = vlan_ethtool_get_drvinfo, @@ -786,11 +832,13 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, + .ndo_get_stats = vlan_dev_get_stats, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, + .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, #endif }; @@ -808,11 +856,13 @@ static const struct net_device_ops vlan_netdev_accel_ops = { .ndo_change_rx_flags = vlan_dev_change_rx_flags, .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, + .ndo_get_stats = vlan_dev_get_stats, #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, + .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, #endif }; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index a9150485019..ddc105734af 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -60,7 +60,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); if ((flags->flags & flags->mask) & - ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) + ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP | + VLAN_FLAG_LOOSE_BINDING)) return -EINVAL; } @@ -119,7 +120,7 @@ static int vlan_get_tx_queues(struct net *net, return 0; } -static int vlan_newlink(struct net_device *dev, +static int vlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct vlan_dev_info *vlan = vlan_dev_info(dev); @@ -131,7 +132,7 @@ static int vlan_newlink(struct net_device *dev, if (!tb[IFLA_LINK]) return -EINVAL; - real_dev = __dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK])); + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 6262c335f3c..9ec1f057c03 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -201,18 +201,17 @@ int vlan_proc_rem_dev(struct net_device *vlandev) /* start read of /proc/net/vlan/config */ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(rcu) { struct net_device *dev; struct net *net = seq_file_net(seq); loff_t i = 1; - read_lock(&dev_base_lock); - + rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { if (!is_vlan_dev(dev)) continue; @@ -234,7 +233,7 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) dev = net_device_entry(&net->dev_base_head); - for_each_netdev_continue(net, dev) { + for_each_netdev_continue_rcu(net, dev) { if (!is_vlan_dev(dev)) continue; @@ -245,9 +244,9 @@ static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void vlan_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(rcu) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int vlan_seq_show(struct seq_file *seq, void *v) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 8d934dd7fd5..4dd873e3a1b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -633,8 +633,8 @@ static void p9_poll_mux(struct p9_conn *m) if (n & POLLOUT) { set_bit(Wpending, &m->wsched); P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); - if ((m->wsize || !list_empty(&m->unsent_req_list)) - && !test_and_set_bit(Wworksched, &m->wsched)) { + if ((m->wsize || !list_empty(&m->unsent_req_list)) && + !test_and_set_bit(Wworksched, &m->wsched)) { P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); queue_work(p9_mux_wq, &m->wq); } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b1a4290996b..9fc4da56fb1 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -56,6 +56,7 @@ #include <linux/if_arp.h> #include <linux/smp_lock.h> #include <linux/termios.h> /* For TIOCOUTQ/INQ */ +#include <linux/compat.h> #include <net/datalink.h> #include <net/psnap.h> #include <net/sock.h> @@ -922,13 +923,8 @@ static unsigned long atalk_sum_partial(const unsigned char *data, { /* This ought to be unwrapped neatly. I'll trust gcc for now */ while (len--) { - sum += *data; - sum <<= 1; - if (sum & 0x10000) { - sum++; - sum &= 0xffff; - } - data++; + sum += *data++; + sum = rol16(sum, 1); } return sum; } @@ -1021,12 +1017,13 @@ static struct proto ddp_proto = { * Create a socket. Initialise the socket, blank the addresses * set the state. */ -static int atalk_create(struct net *net, struct socket *sock, int protocol) +static int atalk_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; /* @@ -1054,11 +1051,13 @@ static int atalk_release(struct socket *sock) { struct sock *sk = sock->sk; + lock_kernel(); if (sk) { sock_orphan(sk); sock->sk = NULL; atalk_destroy_socket(sk); } + unlock_kernel(); return 0; } @@ -1134,6 +1133,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_at *addr = (struct sockaddr_at *)uaddr; struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); + int err; if (!sock_flag(sk, SOCK_ZAPPED) || addr_len != sizeof(struct sockaddr_at)) @@ -1142,37 +1142,44 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr->sat_family != AF_APPLETALK) return -EAFNOSUPPORT; + lock_kernel(); if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) { struct atalk_addr *ap = atalk_find_primary(); + err = -EADDRNOTAVAIL; if (!ap) - return -EADDRNOTAVAIL; + goto out; at->src_net = addr->sat_addr.s_net = ap->s_net; at->src_node = addr->sat_addr.s_node= ap->s_node; } else { + err = -EADDRNOTAVAIL; if (!atalk_find_interface(addr->sat_addr.s_net, addr->sat_addr.s_node)) - return -EADDRNOTAVAIL; + goto out; at->src_net = addr->sat_addr.s_net; at->src_node = addr->sat_addr.s_node; } if (addr->sat_port == ATADDR_ANYPORT) { - int n = atalk_pick_and_bind_port(sk, addr); + err = atalk_pick_and_bind_port(sk, addr); - if (n < 0) - return n; + if (err < 0) + goto out; } else { at->src_port = addr->sat_port; + err = -EADDRINUSE; if (atalk_find_or_insert_socket(sk, addr)) - return -EADDRINUSE; + goto out; } sock_reset_flag(sk, SOCK_ZAPPED); - return 0; + err = 0; +out: + unlock_kernel(); + return err; } /* Set the address we talk to */ @@ -1182,6 +1189,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); struct sockaddr_at *addr; + int err; sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; @@ -1206,12 +1214,15 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, #endif } + lock_kernel(); + err = -EBUSY; if (sock_flag(sk, SOCK_ZAPPED)) if (atalk_autobind(sk) < 0) - return -EBUSY; + goto out; + err = -ENETUNREACH; if (!atrtr_get_dev(&addr->sat_addr)) - return -ENETUNREACH; + goto out; at->dest_port = addr->sat_port; at->dest_net = addr->sat_addr.s_net; @@ -1219,7 +1230,10 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; - return 0; + err = 0; +out: + unlock_kernel(); + return err; } /* @@ -1232,17 +1246,21 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_at sat; struct sock *sk = sock->sk; struct atalk_sock *at = at_sk(sk); + int err; + lock_kernel(); + err = -ENOBUFS; if (sock_flag(sk, SOCK_ZAPPED)) if (atalk_autobind(sk) < 0) - return -ENOBUFS; + goto out; *uaddr_len = sizeof(struct sockaddr_at); memset(&sat.sat_zero, 0, sizeof(sat.sat_zero)); if (peer) { + err = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; + goto out; sat.sat_addr.s_net = at->dest_net; sat.sat_addr.s_node = at->dest_node; @@ -1253,9 +1271,23 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, sat.sat_port = at->src_port; } + err = 0; sat.sat_family = AF_APPLETALK; memcpy(uaddr, &sat, sizeof(sat)); - return 0; + +out: + unlock_kernel(); + return err; +} + +static unsigned int atalk_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + int err; + lock_kernel(); + err = datagram_poll(file, sock, wait); + unlock_kernel(); + return err; } #if defined(CONFIG_IPDDP) || defined(CONFIG_IPDDP_MODULE) @@ -1563,23 +1595,28 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (len > DDP_MAXSZ) return -EMSGSIZE; + lock_kernel(); if (usat) { + err = -EBUSY; if (sock_flag(sk, SOCK_ZAPPED)) if (atalk_autobind(sk) < 0) - return -EBUSY; + goto out; + err = -EINVAL; if (msg->msg_namelen < sizeof(*usat) || usat->sat_family != AF_APPLETALK) - return -EINVAL; + goto out; + err = -EPERM; /* netatalk didn't implement this check */ if (usat->sat_addr.s_node == ATADDR_BCAST && !sock_flag(sk, SOCK_BROADCAST)) { - return -EPERM; + goto out; } } else { + err = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; + goto out; usat = &local_satalk; usat->sat_family = AF_APPLETALK; usat->sat_port = at->dest_port; @@ -1603,8 +1640,9 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr rt = atrtr_find(&at_hint); } + err = ENETUNREACH; if (!rt) - return -ENETUNREACH; + goto out; dev = rt->dev; @@ -1614,7 +1652,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size += dev->hard_header_len; skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); if (!skb) - return err; + goto out; skb->sk = sk; skb_reserve(skb, ddp_dl->header_length); @@ -1637,7 +1675,8 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); if (err) { kfree_skb(skb); - return -EFAULT; + err = -EFAULT; + goto out; } if (sk->sk_no_check == 1) @@ -1676,7 +1715,8 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr rt = atrtr_find(&at_lo); if (!rt) { kfree_skb(skb); - return -ENETUNREACH; + err = -ENETUNREACH; + goto out; } dev = rt->dev; skb->dev = dev; @@ -1696,7 +1736,9 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len); - return len; +out: + unlock_kernel(); + return err ? : len; } static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, @@ -1708,10 +1750,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr int copied = 0; int offset = 0; int err = 0; - struct sk_buff *skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, + struct sk_buff *skb; + + lock_kernel(); + skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) - return err; + goto out; /* FIXME: use skb->cb to be able to use shared skbs */ ddp = ddp_hdr(skb); @@ -1739,6 +1784,9 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } skb_free_datagram(sk, skb); /* Free the datagram. */ + +out: + unlock_kernel(); return err ? : copied; } @@ -1810,24 +1858,26 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) static int atalk_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { /* - * All Appletalk ioctls except SIOCATALKDIFADDR are standard. And - * SIOCATALKDIFADDR is handled by upper layer as well, so there is - * nothing to do. Eventually SIOCATALKDIFADDR should be moved - * here so there is no generic SIOCPROTOPRIVATE translation in the - * system. + * SIOCATALKDIFADDR is a SIOCPROTOPRIVATE ioctl number, so we + * cannot handle it in common code. The data we access if ifreq + * here is compatible, so we can simply call the native + * handler. */ + if (cmd == SIOCATALKDIFADDR) + return atalk_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); + return -ENOIOCTLCMD; } #endif -static struct net_proto_family atalk_family_ops = { +static const struct net_proto_family atalk_family_ops = { .family = PF_APPLETALK, .create = atalk_create, .owner = THIS_MODULE, }; -static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { +static const struct proto_ops atalk_dgram_ops = { .family = PF_APPLETALK, .owner = THIS_MODULE, .release = atalk_release, @@ -1836,7 +1886,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = atalk_getname, - .poll = datagram_poll, + .poll = atalk_poll, .ioctl = atalk_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, @@ -1851,8 +1901,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = { .sendpage = sock_no_sendpage, }; -SOCKOPS_WRAP(atalk_dgram, PF_APPLETALK); - static struct notifier_block ddp_notifier = { .notifier_call = ddp_device_event, }; diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index 8d237b15183..04e9c0da7aa 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -12,25 +12,20 @@ static struct ctl_table atalk_table[] = { { - .ctl_name = NET_ATALK_AARP_EXPIRY_TIME, .procname = "aarp-expiry-time", .data = &sysctl_aarp_expiry_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_ATALK_AARP_TICK_TIME, .procname = "aarp-tick-time", .data = &sysctl_aarp_tick_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_ATALK_AARP_RETRANSMIT_LIMIT, .procname = "aarp-retransmit-limit", .data = &sysctl_aarp_retransmit_limit, .maxlen = sizeof(int), @@ -38,20 +33,18 @@ static struct ctl_table atalk_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_ATALK_AARP_RESOLVE_TIME, .procname = "aarp-resolve-time", .data = &sysctl_aarp_resolve_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, - { 0 }, + { }, }; static struct ctl_path atalk_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "appletalk", .ctl_name = NET_ATALK, }, + { .procname = "net", }, + { .procname = "appletalk", }, { } }; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 26a646d4eb3..c9230c39869 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -554,6 +554,12 @@ static const struct net_device_ops br2684_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; +static const struct net_device_ops br2684_netdev_ops_routed = { + .ndo_start_xmit = br2684_start_xmit, + .ndo_set_mac_address = br2684_mac_addr, + .ndo_change_mtu = eth_change_mtu +}; + static void br2684_setup(struct net_device *netdev) { struct br2684_dev *brdev = BRPRIV(netdev); @@ -569,11 +575,10 @@ static void br2684_setup(struct net_device *netdev) static void br2684_setup_routed(struct net_device *netdev) { struct br2684_dev *brdev = BRPRIV(netdev); - brdev->net_dev = netdev; + brdev->net_dev = netdev; netdev->hard_header_len = 0; - - netdev->netdev_ops = &br2684_netdev_ops; + netdev->netdev_ops = &br2684_netdev_ops_routed; netdev->addr_len = 0; netdev->mtu = 1500; netdev->type = ARPHRD_PPP; diff --git a/net/atm/common.c b/net/atm/common.c index 950bd16d238..d61e051e0a3 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -496,7 +496,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (error) return error; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize); atm_return(vcc, skb->truesize); skb_free_datagram(sk, skb); diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 4da8892ced5..2ea40995dce 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -191,8 +191,181 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } #ifdef CONFIG_COMPAT -int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +/* + * FIXME: + * The compat_ioctl handling is duplicated, using both these conversion + * routines and the compat argument to the actual handlers. Both + * versions are somewhat incomplete and should be merged, e.g. by + * moving the ioctl number translation into the actual handlers and + * killing the conversion code. + * + * -arnd, November 2009 + */ +#define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct compat_atmif_sioc) +#define ATM_GETNAMES32 _IOW('a', ATMIOC_ITF+3, struct compat_atm_iobuf) +#define ATM_GETTYPE32 _IOW('a', ATMIOC_ITF+4, struct compat_atmif_sioc) +#define ATM_GETESI32 _IOW('a', ATMIOC_ITF+5, struct compat_atmif_sioc) +#define ATM_GETADDR32 _IOW('a', ATMIOC_ITF+6, struct compat_atmif_sioc) +#define ATM_RSTADDR32 _IOW('a', ATMIOC_ITF+7, struct compat_atmif_sioc) +#define ATM_ADDADDR32 _IOW('a', ATMIOC_ITF+8, struct compat_atmif_sioc) +#define ATM_DELADDR32 _IOW('a', ATMIOC_ITF+9, struct compat_atmif_sioc) +#define ATM_GETCIRANGE32 _IOW('a', ATMIOC_ITF+10, struct compat_atmif_sioc) +#define ATM_SETCIRANGE32 _IOW('a', ATMIOC_ITF+11, struct compat_atmif_sioc) +#define ATM_SETESI32 _IOW('a', ATMIOC_ITF+12, struct compat_atmif_sioc) +#define ATM_SETESIF32 _IOW('a', ATMIOC_ITF+13, struct compat_atmif_sioc) +#define ATM_GETSTAT32 _IOW('a', ATMIOC_SARCOM+0, struct compat_atmif_sioc) +#define ATM_GETSTATZ32 _IOW('a', ATMIOC_SARCOM+1, struct compat_atmif_sioc) +#define ATM_GETLOOP32 _IOW('a', ATMIOC_SARCOM+2, struct compat_atmif_sioc) +#define ATM_SETLOOP32 _IOW('a', ATMIOC_SARCOM+3, struct compat_atmif_sioc) +#define ATM_QUERYLOOP32 _IOW('a', ATMIOC_SARCOM+4, struct compat_atmif_sioc) + +static struct { + unsigned int cmd32; + unsigned int cmd; +} atm_ioctl_map[] = { + { ATM_GETLINKRATE32, ATM_GETLINKRATE }, + { ATM_GETNAMES32, ATM_GETNAMES }, + { ATM_GETTYPE32, ATM_GETTYPE }, + { ATM_GETESI32, ATM_GETESI }, + { ATM_GETADDR32, ATM_GETADDR }, + { ATM_RSTADDR32, ATM_RSTADDR }, + { ATM_ADDADDR32, ATM_ADDADDR }, + { ATM_DELADDR32, ATM_DELADDR }, + { ATM_GETCIRANGE32, ATM_GETCIRANGE }, + { ATM_SETCIRANGE32, ATM_SETCIRANGE }, + { ATM_SETESI32, ATM_SETESI }, + { ATM_SETESIF32, ATM_SETESIF }, + { ATM_GETSTAT32, ATM_GETSTAT }, + { ATM_GETSTATZ32, ATM_GETSTATZ }, + { ATM_GETLOOP32, ATM_GETLOOP }, + { ATM_SETLOOP32, ATM_SETLOOP }, + { ATM_QUERYLOOP32, ATM_QUERYLOOP }, +}; + +#define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map) + +static int do_atm_iobuf(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct atm_iobuf __user *iobuf; + struct compat_atm_iobuf __user *iobuf32; + u32 data; + void __user *datap; + int len, err; + + iobuf = compat_alloc_user_space(sizeof(*iobuf)); + iobuf32 = compat_ptr(arg); + + if (get_user(len, &iobuf32->length) || + get_user(data, &iobuf32->buffer)) + return -EFAULT; + datap = compat_ptr(data); + if (put_user(len, &iobuf->length) || + put_user(datap, &iobuf->buffer)) + return -EFAULT; + + err = do_vcc_ioctl(sock, cmd, (unsigned long) iobuf, 0); + + if (!err) { + if (copy_in_user(&iobuf32->length, &iobuf->length, + sizeof(int))) + err = -EFAULT; + } + + return err; +} + +static int do_atmif_sioc(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct atmif_sioc __user *sioc; + struct compat_atmif_sioc __user *sioc32; + u32 data; + void __user *datap; + int err; + + sioc = compat_alloc_user_space(sizeof(*sioc)); + sioc32 = compat_ptr(arg); + + if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) + || get_user(data, &sioc32->arg)) + return -EFAULT; + datap = compat_ptr(data); + if (put_user(datap, &sioc->arg)) + return -EFAULT; + + err = do_vcc_ioctl(sock, cmd, (unsigned long) sioc, 0); + + if (!err) { + if (copy_in_user(&sioc32->length, &sioc->length, + sizeof(int))) + err = -EFAULT; + } + return err; +} + +static int do_atm_ioctl(struct socket *sock, unsigned int cmd32, + unsigned long arg) +{ + int i; + unsigned int cmd = 0; + + switch (cmd32) { + case SONET_GETSTAT: + case SONET_GETSTATZ: + case SONET_GETDIAG: + case SONET_SETDIAG: + case SONET_CLRDIAG: + case SONET_SETFRAMING: + case SONET_GETFRAMING: + case SONET_GETFRSENSE: + return do_atmif_sioc(sock, cmd32, arg); + } + + for (i = 0; i < NR_ATM_IOCTL; i++) { + if (cmd32 == atm_ioctl_map[i].cmd32) { + cmd = atm_ioctl_map[i].cmd; + break; + } + } + if (i == NR_ATM_IOCTL) + return -EINVAL; + + switch (cmd) { + case ATM_GETNAMES: + return do_atm_iobuf(sock, cmd, arg); + + case ATM_GETLINKRATE: + case ATM_GETTYPE: + case ATM_GETESI: + case ATM_GETADDR: + case ATM_RSTADDR: + case ATM_ADDADDR: + case ATM_DELADDR: + case ATM_GETCIRANGE: + case ATM_SETCIRANGE: + case ATM_SETESI: + case ATM_SETESIF: + case ATM_GETSTAT: + case ATM_GETSTATZ: + case ATM_GETLOOP: + case ATM_SETLOOP: + case ATM_QUERYLOOP: + return do_atmif_sioc(sock, cmd, arg); + } + + return -EINVAL; +} + +int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) { - return do_vcc_ioctl(sock, cmd, arg, 1); + int ret; + + ret = do_vcc_ioctl(sock, cmd, arg, 1); + if (ret != -ENOIOCTLCMD) + return ret; + + return do_atm_ioctl(sock, cmd, arg); } #endif diff --git a/net/atm/lec.c b/net/atm/lec.c index b2d64456032..42749b7b917 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -62,7 +62,6 @@ static int lec_open(struct net_device *dev); static netdev_tx_t lec_start_xmit(struct sk_buff *skb, struct net_device *dev); static int lec_close(struct net_device *dev); -static void lec_init(struct net_device *dev); static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, const unsigned char *mac_addr); static int lec_arp_remove(struct lec_priv *priv, @@ -670,13 +669,6 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_set_multicast_list = lec_set_multicast_list, }; - -static void lec_init(struct net_device *dev) -{ - dev->netdev_ops = &lec_netdev_ops; - printk("%s: Initialized!\n", dev->name); -} - static const unsigned char lec_ctrl_magic[] = { 0xff, 0x00, @@ -893,6 +885,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) dev_lec[i] = alloc_etherdev(size); if (!dev_lec[i]) return -ENOMEM; + dev_lec[i]->netdev_ops = &lec_netdev_ops; snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); @@ -901,7 +894,6 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) priv = netdev_priv(dev_lec[i]); priv->is_trdev = is_trdev; - lec_init(dev_lec[i]); } else { priv = netdev_priv(dev_lec[i]); if (priv->lecd) diff --git a/net/atm/pvc.c b/net/atm/pvc.c index d4c024504f9..8d74e62b0d7 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -127,7 +127,8 @@ static const struct proto_ops pvc_proto_ops = { }; -static int pvc_create(struct net *net, struct socket *sock,int protocol) +static int pvc_create(struct net *net, struct socket *sock, int protocol, + int kern) { if (net != &init_net) return -EAFNOSUPPORT; @@ -137,7 +138,7 @@ static int pvc_create(struct net *net, struct socket *sock,int protocol) } -static struct net_proto_family pvc_family_ops = { +static const struct net_proto_family pvc_family_ops = { .family = PF_ATMPVC, .create = pvc_create, .owner = THIS_MODULE, diff --git a/net/atm/svc.c b/net/atm/svc.c index f90d143c4b2..66e1d9b3e5d 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -25,7 +25,7 @@ #include "signaling.h" #include "addr.h" -static int svc_create(struct net *net, struct socket *sock,int protocol); +static int svc_create(struct net *net, struct socket *sock, int protocol, int kern); /* * Note: since all this is still nicely synchronized with the signaling demon, @@ -330,7 +330,7 @@ static int svc_accept(struct socket *sock,struct socket *newsock,int flags) lock_sock(sk); - error = svc_create(sock_net(sk), newsock,0); + error = svc_create(sock_net(sk), newsock, 0, 0); if (error) goto out; @@ -650,11 +650,12 @@ static const struct proto_ops svc_proto_ops = { }; -static int svc_create(struct net *net, struct socket *sock,int protocol) +static int svc_create(struct net *net, struct socket *sock, int protocol, + int kern) { int error; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; sock->ops = &svc_proto_ops; @@ -666,7 +667,7 @@ static int svc_create(struct net *net, struct socket *sock,int protocol) } -static struct net_proto_family svc_family_ops = { +static const struct net_proto_family svc_family_ops = { .family = PF_ATMSVC, .create = svc_create, .owner = THIS_MODULE, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index f4546073037..5588ba69c46 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -369,6 +369,9 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) if (ax25_ctl.digi_count > AX25_MAX_DIGIS) return -EINVAL; + if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) + return -EINVAL; + digi.ndigi = ax25_ctl.digi_count; for (k = 0; k < digi.ndigi; k++) digi.calls[k] = ax25_ctl.digi_addr[k]; @@ -418,14 +421,10 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) break; case AX25_T3: - if (ax25_ctl.arg < 0) - goto einval_put; ax25->t3 = ax25_ctl.arg * HZ; break; case AX25_IDLE: - if (ax25_ctl.arg < 0) - goto einval_put; ax25->idle = ax25_ctl.arg * 60 * HZ; break; @@ -800,12 +799,13 @@ static struct proto ax25_proto = { .obj_size = sizeof(struct sock), }; -static int ax25_create(struct net *net, struct socket *sock, int protocol) +static int ax25_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; ax25_cb *ax25; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; switch (sock->type) { @@ -1961,7 +1961,7 @@ static const struct file_operations ax25_info_fops = { #endif -static struct net_proto_family ax25_family_ops = { +static const struct net_proto_family ax25_family_ops = { .family = PF_AX25, .create = ax25_create, .owner = THIS_MODULE, diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index 62ee3fb3473..5159be6b262 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -34,156 +34,128 @@ static ctl_table *ax25_table; static int ax25_table_size; static struct ctl_path ax25_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ax25", .ctl_name = NET_AX25, }, + { .procname = "net", }, + { .procname = "ax25", }, { } }; static const ctl_table ax25_param_table[] = { { - .ctl_name = NET_AX25_IP_DEFAULT_MODE, .procname = "ip_default_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ipdefmode, .extra2 = &max_ipdefmode }, { - .ctl_name = NET_AX25_DEFAULT_MODE, .procname = "ax25_default_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_axdefmode, .extra2 = &max_axdefmode }, { - .ctl_name = NET_AX25_BACKOFF_TYPE, .procname = "backoff_type", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_backoff, .extra2 = &max_backoff }, { - .ctl_name = NET_AX25_CONNECT_MODE, .procname = "connect_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_conmode, .extra2 = &max_conmode }, { - .ctl_name = NET_AX25_STANDARD_WINDOW, .procname = "standard_window_size", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, { - .ctl_name = NET_AX25_EXTENDED_WINDOW, .procname = "extended_window_size", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ewindow, .extra2 = &max_ewindow }, { - .ctl_name = NET_AX25_T1_TIMEOUT, .procname = "t1_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, { - .ctl_name = NET_AX25_T2_TIMEOUT, .procname = "t2_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, { - .ctl_name = NET_AX25_T3_TIMEOUT, .procname = "t3_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t3, .extra2 = &max_t3 }, { - .ctl_name = NET_AX25_IDLE_TIMEOUT, .procname = "idle_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, { - .ctl_name = NET_AX25_N2, .procname = "maximum_retry_count", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, { - .ctl_name = NET_AX25_PACLEN, .procname = "maximum_packet_length", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_paclen, .extra2 = &max_paclen }, { - .ctl_name = NET_AX25_PROTOCOL, .procname = "protocol", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_proto, .extra2 = &max_proto }, #ifdef CONFIG_AX25_DAMA_SLAVE { - .ctl_name = NET_AX25_DAMA_SLAVE_TIMEOUT, .procname = "dama_slave_timeout", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ds_timeout, .extra2 = &max_ds_timeout }, #endif - { .ctl_name = 0 } /* that's all, folks! */ + { } /* that's all, folks! */ }; void ax25_register_sysctl(void) @@ -212,11 +184,9 @@ void ax25_register_sysctl(void) return; } ax25_table[n].child = ax25_dev->systable = child; - ax25_table[n].ctl_name = n + 1; ax25_table[n].procname = ax25_dev->dev->name; ax25_table[n].mode = 0555; - child[AX25_MAX_VALUES].ctl_name = 0; /* just in case... */ for (k = 0; k < AX25_MAX_VALUES; k++) child[k].data = &ax25_dev->values[k]; @@ -233,7 +203,7 @@ void ax25_unregister_sysctl(void) ctl_table *p; unregister_sysctl_table(ax25_table_header); - for (p = ax25_table; p->ctl_name; p++) + for (p = ax25_table; p->procname; p++) kfree(p->child); kfree(ax25_table); } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8cfb5a84984..087cc51f592 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -45,7 +45,7 @@ /* Bluetooth sockets */ #define BT_MAX_PROTO 8 -static struct net_proto_family *bt_proto[BT_MAX_PROTO]; +static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; @@ -86,7 +86,7 @@ static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) bt_key_strings[proto], &bt_lock_key[proto]); } -int bt_sock_register(int proto, struct net_proto_family *ops) +int bt_sock_register(int proto, const struct net_proto_family *ops) { int err = 0; @@ -126,7 +126,8 @@ int bt_sock_unregister(int proto) } EXPORT_SYMBOL(bt_sock_unregister); -static int bt_sock_create(struct net *net, struct socket *sock, int proto) +static int bt_sock_create(struct net *net, struct socket *sock, int proto, + int kern) { int err; @@ -144,7 +145,7 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto) read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { - err = bt_proto[proto]->create(net, sock, proto); + err = bt_proto[proto]->create(net, sock, proto, kern); bt_sock_reclassify_lock(sock, proto); module_put(bt_proto[proto]->owner); } @@ -257,7 +258,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (err == 0) - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); skb_free_datagram(sk, skb); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index cafe9f54d84..ef09c7b3a85 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -78,7 +78,7 @@ static struct bnep_session *__bnep_get_session(u8 *dst) static void __bnep_link_session(struct bnep_session *s) { /* It's safe to call __module_get() here because sessions are added - by the socket layer which has to hold the refference to this module. + by the socket layer which has to hold the reference to this module. */ __module_get(THIS_MODULE); list_add(&s->list, &bnep_session_list); @@ -230,7 +230,6 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len) switch (cmd) { case BNEP_CMD_NOT_UNDERSTOOD: - case BNEP_SETUP_CONN_REQ: case BNEP_SETUP_CONN_RSP: case BNEP_FILTER_NET_TYPE_RSP: case BNEP_FILTER_MULTI_ADDR_RSP: @@ -245,6 +244,10 @@ static int bnep_rx_control(struct bnep_session *s, void *data, int len) err = bnep_ctrl_set_mcfilter(s, data, len); break; + case BNEP_SETUP_CONN_REQ: + err = bnep_send_rsp(s, BNEP_SETUP_CONN_RSP, BNEP_CONN_NOT_ALLOWED); + break; + default: { u8 pkt[3]; pkt[0] = BNEP_CONTROL; @@ -629,7 +632,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) s = __bnep_get_session(req->dst); if (s) { /* Wakeup user-space which is polling for socket errors. - * This is temporary hack untill we have shutdown in L2CAP */ + * This is temporary hack until we have shutdown in L2CAP */ s->sock->sk->sk_err = EUNATCH; /* Kill session thread */ diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index e857628b0b2..2ff6ac7b2ed 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -195,7 +195,8 @@ static struct proto bnep_proto = { .obj_size = sizeof(struct bt_sock) }; -static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) +static int bnep_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -222,7 +223,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) return 0; } -static struct net_proto_family bnep_sock_family_ops = { +static const struct net_proto_family bnep_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = bnep_sock_create diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 16b0fad74f6..978cc3a718a 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -190,7 +190,8 @@ static struct proto cmtp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) +static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -217,7 +218,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) return 0; } -static struct net_proto_family cmtp_sock_family_ops = { +static const struct net_proto_family cmtp_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = cmtp_sock_create diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a9750984f77..b7c4224f4e7 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -211,6 +211,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->type = type; conn->mode = HCI_CM_ACTIVE; conn->state = BT_OPEN; + conn->auth_type = HCI_AT_GENERAL_BONDING; conn->power_save = 1; conn->disc_timeout = HCI_DISCONN_TIMEOUT; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e1da8f68759..94ba3498202 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -193,8 +193,9 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) while ((skb = skb_dequeue(&hdev->driver_init))) { bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; + skb_queue_tail(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } skb_queue_purge(&hdev->driver_init); @@ -987,6 +988,30 @@ int hci_resume_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_resume_dev); +/* Receive frame from HCI drivers */ +int hci_recv_frame(struct sk_buff *skb) +{ + struct hci_dev *hdev = (struct hci_dev *) skb->dev; + if (!hdev || (!test_bit(HCI_UP, &hdev->flags) + && !test_bit(HCI_INIT, &hdev->flags))) { + kfree_skb(skb); + return -ENXIO; + } + + /* Incomming skb */ + bt_cb(skb)->incoming = 1; + + /* Time stamp */ + __net_timestamp(skb); + + /* Queue frame for rx task */ + skb_queue_tail(&hdev->rx_q, skb); + tasklet_schedule(&hdev->rx_task); + + return 0; +} +EXPORT_SYMBOL(hci_recv_frame); + /* Receive packet type fragment */ #define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) @@ -1193,8 +1218,9 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; + skb_queue_tail(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); return 0; } @@ -1271,7 +1297,8 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) spin_unlock_bh(&conn->data_q.lock); } - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); + return 0; } EXPORT_SYMBOL(hci_send_acl); @@ -1298,8 +1325,10 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = HCI_SCODATA_PKT; + skb_queue_tail(&conn->data_q, skb); - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); + return 0; } EXPORT_SYMBOL(hci_send_sco); @@ -1612,7 +1641,7 @@ static void hci_cmd_task(unsigned long arg) hdev->cmd_last_tx = jiffies; } else { skb_queue_head(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e99fe385fba..28517bad796 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1320,7 +1320,7 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } @@ -1386,7 +1386,7 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } @@ -1454,7 +1454,7 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s } } - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); tasklet_enable(&hdev->tx_task); } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 75302a98606..688cfebfbee 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -414,6 +414,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto done; } + if (!test_bit(HCI_UP, &hdev->flags)) { + err = -ENETDOWN; + goto done; + } + if (!(skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err))) goto done; @@ -440,10 +445,10 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, if (test_bit(HCI_RAW, &hdev->flags) || (ogf == 0x3f)) { skb_queue_tail(&hdev->raw_q, skb); - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); } else { skb_queue_tail(&hdev->cmd_q, skb); - hci_sched_cmd(hdev); + tasklet_schedule(&hdev->cmd_task); } } else { if (!capable(CAP_NET_RAW)) { @@ -452,7 +457,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } skb_queue_tail(&hdev->raw_q, skb); - hci_sched_tx(hdev); + tasklet_schedule(&hdev->tx_task); } err = len; @@ -621,7 +626,8 @@ static struct proto hci_sk_proto = { .obj_size = sizeof(struct hci_pinfo) }; -static int hci_sock_create(struct net *net, struct socket *sock, int protocol) +static int hci_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -687,7 +693,7 @@ static int hci_sock_dev_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -static struct net_proto_family hci_sock_family_ops = { +static const struct net_proto_family hci_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = hci_sock_create, diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 49d8495d69b..569750010fd 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -280,6 +280,13 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep return hidp_queue_report(session, buf, rsize); } +static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, size_t count) +{ + if (hidp_queue_report(hid->driver_data, data, count)) + return -ENOMEM; + return count; +} + static void hidp_idle_timeout(unsigned long arg) { struct hidp_session *session = (struct hidp_session *) arg; @@ -785,6 +792,8 @@ static int hidp_setup_hid(struct hidp_session *session, hid->dev.parent = hidp_get_device(session); hid->ll_driver = &hidp_hid_driver; + hid->hid_output_raw_report = hidp_output_raw_report; + err = hid_add_device(hid); if (err < 0) goto failed; diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 37c9d7d2e68..9cfef68b9fe 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -241,7 +241,8 @@ static struct proto hidp_proto = { .obj_size = sizeof(struct bt_sock) }; -static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) +static int hidp_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -268,7 +269,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) return 0; } -static struct net_proto_family hidp_sock_family_ops = { +static const struct net_proto_family hidp_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = hidp_sock_create diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 77e9fb130ad..5129b88c8e5 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -54,6 +54,7 @@ #define VERSION "2.14" static int enable_ertm = 0; +static int max_transmit = L2CAP_DEFAULT_MAX_TX; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; @@ -373,6 +374,8 @@ static inline int l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) else control |= L2CAP_SUPER_RCV_READY; + control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + return l2cap_send_sframe(pi, control); } @@ -819,7 +822,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p return sk; } -static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -831,7 +835,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - if (sock->type == SOCK_RAW && !capable(CAP_NET_RAW)) + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) return -EPERM; sock->ops = &l2cap_sock_ops; @@ -1332,7 +1336,7 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq) tx_skb = skb_clone(skb, GFP_ATOMIC); bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - control |= (pi->req_seq << L2CAP_CTRL_REQSEQ_SHIFT) + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); @@ -1361,8 +1365,8 @@ static int l2cap_ertm_send(struct sock *sk) if (pi->conn_state & L2CAP_CONN_WAIT_F) return 0; - while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) - && !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) { + while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) && + !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) { tx_skb = skb_clone(skb, GFP_ATOMIC); if (pi->remote_max_tx && @@ -1374,7 +1378,7 @@ static int l2cap_ertm_send(struct sock *sk) bt_cb(skb)->retries++; control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - control |= (pi->req_seq << L2CAP_CTRL_REQSEQ_SHIFT) + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) | (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); @@ -1603,8 +1607,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms return -EOPNOTSUPP; /* Check outgoing MTU */ - if (sk->sk_type == SOCK_SEQPACKET && pi->mode == L2CAP_MODE_BASIC - && len > pi->omtu) + if (sk->sk_type == SOCK_SEQPACKET && pi->mode == L2CAP_MODE_BASIC && + len > pi->omtu) return -EINVAL; lock_sock(sk); @@ -2172,6 +2176,21 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) *ptr += L2CAP_CONF_OPT_SIZE + len; } +static inline void l2cap_ertm_init(struct sock *sk) +{ + l2cap_pi(sk)->expected_ack_seq = 0; + l2cap_pi(sk)->unacked_frames = 0; + l2cap_pi(sk)->buffer_seq = 0; + l2cap_pi(sk)->num_to_ack = 0; + + setup_timer(&l2cap_pi(sk)->retrans_timer, + l2cap_retrans_timeout, (unsigned long) sk); + setup_timer(&l2cap_pi(sk)->monitor_timer, + l2cap_monitor_timeout, (unsigned long) sk); + + __skb_queue_head_init(SREJ_QUEUE(sk)); +} + static int l2cap_mode_supported(__u8 mode, __u32 feat_mask) { u32 local_feat_mask = l2cap_feat_mask; @@ -2205,7 +2224,7 @@ static int l2cap_build_conf_req(struct sock *sk, void *data) { struct l2cap_pinfo *pi = l2cap_pi(sk); struct l2cap_conf_req *req = data; - struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_ERTM }; + struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; void *ptr = req->data; BT_DBG("sk %p", sk); @@ -2235,7 +2254,7 @@ done: case L2CAP_MODE_ERTM: rfc.mode = L2CAP_MODE_ERTM; rfc.txwin_size = L2CAP_DEFAULT_TX_WINDOW; - rfc.max_transmit = L2CAP_DEFAULT_MAX_TX; + rfc.max_transmit = max_transmit; rfc.retrans_timeout = 0; rfc.monitor_timeout = 0; rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); @@ -2394,6 +2413,10 @@ done: rfc.monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; pi->conf_state |= L2CAP_CONF_MODE_DONE; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + break; case L2CAP_MODE_STREAMING: @@ -2401,6 +2424,10 @@ done: pi->max_pdu_size = rfc.max_pdu_size; pi->conf_state |= L2CAP_CONF_MODE_DONE; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + break; default: @@ -2410,9 +2437,6 @@ done: rfc.mode = pi->mode; } - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - if (result == L2CAP_CONF_SUCCESS) pi->conf_state |= L2CAP_CONF_OUTPUT_DONE; } @@ -2750,22 +2774,18 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr goto unlock; if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) { - if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) - || l2cap_pi(sk)->fcs != L2CAP_FCS_NONE) + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) || + l2cap_pi(sk)->fcs != L2CAP_FCS_NONE) l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16; sk->sk_state = BT_CONNECTED; - l2cap_pi(sk)->next_tx_seq = 0; - l2cap_pi(sk)->expected_ack_seq = 0; - l2cap_pi(sk)->unacked_frames = 0; - - setup_timer(&l2cap_pi(sk)->retrans_timer, - l2cap_retrans_timeout, (unsigned long) sk); - setup_timer(&l2cap_pi(sk)->monitor_timer, - l2cap_monitor_timeout, (unsigned long) sk); + l2cap_pi(sk)->next_tx_seq = 0; + l2cap_pi(sk)->expected_tx_seq = 0; __skb_queue_head_init(TX_QUEUE(sk)); - __skb_queue_head_init(SREJ_QUEUE(sk)); + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) + l2cap_ertm_init(sk); + l2cap_chan_ready(sk); goto unlock; } @@ -2839,16 +2859,17 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE; if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) { - if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) - || l2cap_pi(sk)->fcs != L2CAP_FCS_NONE) + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV) || + l2cap_pi(sk)->fcs != L2CAP_FCS_NONE) l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16; sk->sk_state = BT_CONNECTED; + l2cap_pi(sk)->next_tx_seq = 0; l2cap_pi(sk)->expected_tx_seq = 0; - l2cap_pi(sk)->buffer_seq = 0; - l2cap_pi(sk)->num_to_ack = 0; __skb_queue_head_init(TX_QUEUE(sk)); - __skb_queue_head_init(SREJ_QUEUE(sk)); + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) + l2cap_ertm_init(sk); + l2cap_chan_ready(sk); } @@ -2880,9 +2901,12 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(TX_QUEUE(sk)); - skb_queue_purge(SREJ_QUEUE(sk)); - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + skb_queue_purge(SREJ_QUEUE(sk)); + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + } l2cap_chan_del(sk, ECONNRESET); bh_unlock_sock(sk); @@ -2907,9 +2931,12 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd return 0; skb_queue_purge(TX_QUEUE(sk)); - skb_queue_purge(SREJ_QUEUE(sk)); - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + skb_queue_purge(SREJ_QUEUE(sk)); + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + } l2cap_chan_del(sk, 0); bh_unlock_sock(sk); @@ -3274,12 +3301,16 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str { struct l2cap_pinfo *pi = l2cap_pi(sk); u8 tx_seq = __get_txseq(rx_control); + u8 req_seq = __get_reqseq(rx_control); u16 tx_control = 0; u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; int err = 0; BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); + pi->expected_ack_seq = req_seq; + l2cap_drop_acked_frames(sk); + if (tx_seq == pi->expected_tx_seq) goto expected; @@ -3334,6 +3365,16 @@ expected: return 0; } + if (rx_control & L2CAP_CTRL_FINAL) { + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); + } + } + pi->buffer_seq = (pi->buffer_seq + 1) % 64; err = l2cap_sar_reassembly_sdu(sk, skb, rx_control); @@ -3370,6 +3411,14 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); + } + if (!(pi->conn_state & L2CAP_CONN_WAIT_F)) break; @@ -3382,8 +3431,8 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str pi->expected_ack_seq = tx_seq; l2cap_drop_acked_frames(sk); - if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) - && (pi->unacked_frames > 0)) + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->unacked_frames > 0)) __mod_retrans_timer(); l2cap_ertm_send(sk); @@ -3397,10 +3446,24 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str pi->expected_ack_seq = __get_reqseq(rx_control); l2cap_drop_acked_frames(sk); - sk->sk_send_head = TX_QUEUE(sk)->next; - pi->next_tx_seq = pi->expected_ack_seq; + if (rx_control & L2CAP_CTRL_FINAL) { + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); + } + } else { + sk->sk_send_head = TX_QUEUE(sk)->next; + pi->next_tx_seq = pi->expected_ack_seq; + l2cap_ertm_send(sk); - l2cap_ertm_send(sk); + if (pi->conn_state & L2CAP_CONN_WAIT_F) { + pi->srej_save_reqseq = tx_seq; + pi->conn_state |= L2CAP_CONN_REJ_ACT; + } + } break; @@ -3419,7 +3482,7 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str } else if (rx_control & L2CAP_CTRL_FINAL) { if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) && pi->srej_save_reqseq == tx_seq) - pi->srej_save_reqseq &= ~L2CAP_CONN_SREJ_ACT; + pi->conn_state &= ~L2CAP_CONN_SREJ_ACT; else l2cap_retransmit_frame(sk, tx_seq); } @@ -3919,7 +3982,7 @@ static const struct proto_ops l2cap_sock_ops = { .getsockopt = l2cap_sock_getsockopt }; -static struct net_proto_family l2cap_sock_family_ops = { +static const struct net_proto_family l2cap_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = l2cap_sock_create, @@ -3998,6 +4061,9 @@ module_exit(l2cap_exit); module_param(enable_ertm, bool, 0644); MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode"); +module_param(max_transmit, uint, 0644); +MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)"); + MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); MODULE_VERSION(VERSION); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 25692bc0a34..fc5ee3296e2 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -51,6 +51,7 @@ static int disable_cfc = 0; static int channel_mtu = -1; static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; +static int l2cap_ertm = 0; static struct task_struct *rfcomm_thread; @@ -702,6 +703,8 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst sk = sock->sk; lock_sock(sk); l2cap_pi(sk)->imtu = l2cap_mtu; + if (l2cap_ertm) + l2cap_pi(sk)->mode = L2CAP_MODE_ERTM; release_sock(sk); s = rfcomm_session_add(sock, BT_BOUND); @@ -2185,6 +2188,9 @@ MODULE_PARM_DESC(channel_mtu, "Default MTU for the RFCOMM channel"); module_param(l2cap_mtu, uint, 0644); MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection"); +module_param(l2cap_ertm, bool, 0644); +MODULE_PARM_DESC(l2cap_ertm, "Use L2CAP ERTM mode for connection"); + MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth RFCOMM ver " VERSION); MODULE_VERSION(VERSION); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8a20aaf1f23..4b5968dda67 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -323,7 +323,8 @@ static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int return sk; } -static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol) +static int rfcomm_sock_create(struct net *net, struct socket *sock, + int protocol, int kern) { struct sock *sk; @@ -703,7 +704,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, copied += chunk; size -= chunk; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (!(flags & MSG_PEEK)) { atomic_sub(chunk, &sk->sk_rmem_alloc); @@ -1101,7 +1102,7 @@ static const struct proto_ops rfcomm_sock_ops = { .mmap = sock_no_mmap }; -static struct net_proto_family rfcomm_sock_family_ops = { +static const struct net_proto_family rfcomm_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = rfcomm_sock_create diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 77f4153bdb5..dd8f6ec57dc 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -430,7 +430,8 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro return sk; } -static int sco_sock_create(struct net *net, struct socket *sock, int protocol) +static int sco_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -993,7 +994,7 @@ static const struct proto_ops sco_sock_ops = { .getsockopt = sco_sock_getsockopt }; -static struct net_proto_family sco_sock_family_ops = { +static const struct net_proto_family sco_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = sco_sock_create, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 07a07770c8b..1a99c4e04e8 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -157,6 +157,7 @@ static const struct ethtool_ops br_ethtool_ops = { .get_tso = ethtool_op_get_tso, .set_tso = br_set_tso, .get_ufo = ethtool_op_get_ufo, + .set_ufo = ethtool_op_set_ufo, .get_flags = ethtool_op_get_flags, }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 57bf05c353b..3b8e038ab32 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -60,8 +60,8 @@ static inline unsigned long hold_time(const struct net_bridge *br) static inline int has_expired(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { - return !fdb->is_static - && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); + return !fdb->is_static && + time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); } static inline int br_mac_hash(const unsigned char *mac) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b1b3b0fbf41..a2cbe61f6e6 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -154,7 +154,7 @@ static void del_nbp(struct net_bridge_port *p) } /* called with RTNL */ -static void del_br(struct net_bridge *br) +static void del_br(struct net_bridge *br, struct list_head *head) { struct net_bridge_port *p, *n; @@ -165,7 +165,7 @@ static void del_br(struct net_bridge *br) del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); - unregister_netdevice(br->dev); + unregister_netdevice_queue(br->dev, head); } static struct net_device *new_bridge_dev(struct net *net, const char *name) @@ -323,7 +323,7 @@ int br_del_bridge(struct net *net, const char *name) } else - del_br(netdev_priv(dev)); + del_br(netdev_priv(dev), NULL); rtnl_unlock(); return ret; @@ -377,15 +377,23 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) struct net_bridge_port *p; int err = 0; - if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) + /* Don't allow bridging non-ethernet like devices */ + if ((dev->flags & IFF_LOOPBACK) || + dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN) return -EINVAL; + /* No bridging of bridges */ if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; + /* Device is already being bridged */ if (dev->br_port != NULL) return -EBUSY; + /* No bridging devices that dislike that (e.g. wireless) */ + if (dev->priv_flags & IFF_DONT_BRIDGE) + return -EOPNOTSUPP; + p = new_nbp(br, dev); if (IS_ERR(p)) return PTR_ERR(p); @@ -462,15 +470,14 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) void br_net_exit(struct net *net) { struct net_device *dev; + LIST_HEAD(list); rtnl_lock(); -restart: - for_each_netdev(net, dev) { - if (dev->priv_flags & IFF_EBRIDGE) { - del_br(netdev_priv(dev)); - goto restart; - } - } + for_each_netdev(net, dev) + if (dev->priv_flags & IFF_EBRIDGE) + del_br(netdev_priv(dev), &list); + + unregister_netdevice_many(&list); rtnl_unlock(); } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 6a6433daaf2..2af6e4a9026 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -81,6 +81,7 @@ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, return num; } +/* called with RTNL */ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) { struct net_device *dev; @@ -89,7 +90,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(dev_net(br->dev), ifindex); + dev = __dev_get_by_index(dev_net(br->dev), ifindex); if (dev == NULL) return -EINVAL; @@ -98,7 +99,6 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) else ret = br_del_if(br, dev); - dev_put(dev); return ret; } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index a16a2342f6b..268e2e72588 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -1013,12 +1013,12 @@ static ctl_table brnf_table[] = { .mode = 0644, .proc_handler = brnf_sysctl_call_tables, }, - { .ctl_name = 0 } + { } }; static struct ctl_path brnf_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "bridge", .ctl_name = NET_BRIDGE, }, + { .procname = "net", }, + { .procname = "bridge", }, { } }; #endif diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index ee4820aa184..bee4f300d0c 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -316,9 +316,9 @@ static ssize_t store_group_addr(struct device *d, if (new_addr[5] & ~0xf) return -EINVAL; - if (new_addr[5] == 1 /* 802.3x Pause address */ - || new_addr[5] == 2 /* 802.3ad Slow protocols */ - || new_addr[5] == 3) /* 802.1X PAE address */ + if (new_addr[5] == 1 || /* 802.3x Pause address */ + new_addr[5] == 2 || /* 802.3ad Slow protocols */ + new_addr[5] == 3) /* 802.1X PAE address */ return -EINVAL; spin_lock_bh(&br->lock); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 48527e62162..75e29a9cebd 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -135,8 +135,8 @@ ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (memcmp(sp, header, sizeof(header))) return false; - if (info->bitmask & EBT_STP_TYPE - && FWINV(info->type != sp->type, EBT_STP_TYPE)) + if (info->bitmask & EBT_STP_TYPE && + FWINV(info->type != sp->type, EBT_STP_TYPE)) return false; if (sp->type == BPDU_TYPE_CONFIG && diff --git a/net/can/af_can.c b/net/can/af_can.c index 60683211567..51adc4c2b86 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -114,7 +114,8 @@ static void can_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); } -static int can_create(struct net *net, struct socket *sock, int protocol) +static int can_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct can_proto *cp; @@ -125,7 +126,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol) if (protocol < 0 || protocol >= CAN_NPROTO) return -EINVAL; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; #ifdef CONFIG_MODULES @@ -160,11 +161,6 @@ static int can_create(struct net *net, struct socket *sock, int protocol) goto errout; } - if (cp->capability >= 0 && !capable(cp->capability)) { - err = -EPERM; - goto errout; - } - sock->ops = cp->ops; sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot); @@ -379,8 +375,8 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, return &d->rx[RX_ALL]; /* extra filterlists for the subscription of a single non-RTR can_id */ - if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) - && !(*can_id & CAN_RTR_FLAG)) { + if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) && + !(*can_id & CAN_RTR_FLAG)) { if (*can_id & CAN_EFF_FLAG) { if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) { @@ -529,8 +525,8 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, */ hlist_for_each_entry_rcu(r, next, rl, list) { - if (r->can_id == can_id && r->mask == mask - && r->func == func && r->data == data) + if (r->can_id == can_id && r->mask == mask && + r->func == func && r->data == data) break; } @@ -842,7 +838,7 @@ static struct packet_type can_packet __read_mostly = { .func = can_rcv, }; -static struct net_proto_family can_family_ops __read_mostly = { +static const struct net_proto_family can_family_ops = { .family = PF_CAN, .create = can_create, .owner = THIS_MODULE, diff --git a/net/can/bcm.c b/net/can/bcm.c index 597da4f8f88..e32af52238a 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -132,23 +132,27 @@ static inline struct bcm_sock *bcm_sk(const struct sock *sk) /* * procfs functions */ -static char *bcm_proc_getifname(int ifindex) +static char *bcm_proc_getifname(char *result, int ifindex) { struct net_device *dev; if (!ifindex) return "any"; - /* no usage counting */ - dev = __dev_get_by_index(&init_net, ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(&init_net, ifindex); if (dev) - return dev->name; + strcpy(result, dev->name); + else + strcpy(result, "???"); + rcu_read_unlock(); - return "???"; + return result; } static int bcm_proc_show(struct seq_file *m, void *v) { + char ifname[IFNAMSIZ]; struct sock *sk = (struct sock *)m->private; struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; @@ -157,7 +161,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) seq_printf(m, " / sk %p", sk); seq_printf(m, " / bo %p", bo); seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs); - seq_printf(m, " / bound %s", bcm_proc_getifname(bo->ifindex)); + seq_printf(m, " / bound %s", bcm_proc_getifname(ifname, bo->ifindex)); seq_printf(m, " <<<\n"); list_for_each_entry(op, &bo->rx_ops, list) { @@ -169,7 +173,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) continue; seq_printf(m, "rx_op: %03X %-5s ", - op->can_id, bcm_proc_getifname(op->ifindex)); + op->can_id, bcm_proc_getifname(ifname, op->ifindex)); seq_printf(m, "[%d]%c ", op->nframes, (op->flags & RX_CHECK_DLC)?'d':' '); if (op->kt_ival1.tv64) @@ -194,7 +198,8 @@ static int bcm_proc_show(struct seq_file *m, void *v) list_for_each_entry(op, &bo->tx_ops, list) { seq_printf(m, "tx_op: %03X %s [%d] ", - op->can_id, bcm_proc_getifname(op->ifindex), + op->can_id, + bcm_proc_getifname(ifname, op->ifindex), op->nframes); if (op->kt_ival1.tv64) @@ -1534,7 +1539,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { msg->msg_namelen = sizeof(struct sockaddr_can); @@ -1576,7 +1581,6 @@ static struct proto bcm_proto __read_mostly = { static struct can_proto bcm_can_proto __read_mostly = { .type = SOCK_DGRAM, .protocol = CAN_BCM, - .capability = -1, .ops = &bcm_ops, .prot = &bcm_proto, }; diff --git a/net/can/raw.c b/net/can/raw.c index b5e897922d3..abca920440b 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -424,8 +424,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_RAW) return -EINVAL; - if (optlen < 0) - return -EINVAL; switch (optname) { @@ -702,7 +700,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, return err; } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { msg->msg_namelen = sizeof(struct sockaddr_can); @@ -744,7 +742,6 @@ static struct proto raw_proto __read_mostly = { static struct can_proto raw_can_proto __read_mostly = { .type = SOCK_RAW, .protocol = CAN_RAW, - .capability = -1, .ops = &raw_ops, .prot = &raw_proto, }; diff --git a/net/compat.c b/net/compat.c index a407c3addba..a1fb1b079a8 100644 --- a/net/compat.c +++ b/net/compat.c @@ -390,9 +390,6 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, int err; struct socket *sock; - if (optlen < 0) - return -EINVAL; - if ((sock = sockfd_lookup(fd, &err))!=NULL) { err = security_socket_setsockopt(sock,level,optname); @@ -727,10 +724,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt); /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) -static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), +static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4)}; + AL(4),AL(5)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -755,13 +752,35 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } +asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, + unsigned vlen, unsigned int flags, + struct compat_timespec __user *timeout) +{ + int datagrams; + struct timespec ktspec; + + if (timeout == NULL) + return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, + flags | MSG_CMSG_COMPAT, NULL); + + if (get_compat_timespec(&ktspec, timeout)) + return -EFAULT; + + datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, + flags | MSG_CMSG_COMPAT, &ktspec); + if (datagrams > 0 && put_compat_timespec(&ktspec, timeout)) + datagrams = -EFAULT; + + return datagrams; +} + asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_ACCEPT4) + if (call < SYS_SOCKET || call > SYS_RECVMMSG) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -823,6 +842,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; + case SYS_RECVMMSG: + ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3], + compat_ptr(a[4])); + break; case SYS_ACCEPT4: ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; diff --git a/net/core/Makefile b/net/core/Makefile index 796f46eece5..08791ac3e05 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -6,7 +6,6 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-$(CONFIG_HAS_DMA) += skb_dma_map.o obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 4ade3011bb3..95c2e0840d0 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -271,6 +271,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) } kfree_skb(skb); + atomic_inc(&sk->sk_drops); sk_mem_reclaim_partial(sk); return err; diff --git a/net/core/dev.c b/net/core/dev.c index b8f74cfb1bf..6fe7d739e59 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -79,6 +79,7 @@ #include <linux/cpu.h> #include <linux/types.h> #include <linux/kernel.h> +#include <linux/hash.h> #include <linux/sched.h> #include <linux/mutex.h> #include <linux/string.h> @@ -104,6 +105,7 @@ #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> +#include <net/xfrm.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/kmod.h> @@ -175,7 +177,7 @@ static struct list_head ptype_all __read_mostly; /* Taps */ * The @dev_base_head list is protected by @dev_base_lock and the rtnl * semaphore. * - * Pure readers hold dev_base_lock for reading. + * Pure readers hold dev_base_lock for reading, or rcu_read_lock() * * Writers must hold the rtnl semaphore while they loop through the * dev_base_head list, and hold dev_base_lock for writing when they do the @@ -193,18 +195,15 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); -#define NETDEV_HASHBITS 8 -#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS) - static inline struct hlist_head *dev_name_hash(struct net *net, const char *name) { unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); - return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)]; } static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) { - return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; + return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; } /* Device list insertion */ @@ -215,23 +214,26 @@ static int list_netdevice(struct net_device *dev) ASSERT_RTNL(); write_lock_bh(&dev_base_lock); - list_add_tail(&dev->dev_list, &net->dev_base_head); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); - hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); + list_add_tail_rcu(&dev->dev_list, &net->dev_base_head); + hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); + hlist_add_head_rcu(&dev->index_hlist, + dev_index_hash(net, dev->ifindex)); write_unlock_bh(&dev_base_lock); return 0; } -/* Device list removal */ +/* Device list removal + * caller must respect a RCU grace period before freeing/reusing dev + */ static void unlist_netdevice(struct net_device *dev) { ASSERT_RTNL(); /* Unlink dev from the device chain */ write_lock_bh(&dev_base_lock); - list_del(&dev->dev_list); - hlist_del(&dev->name_hlist); - hlist_del(&dev->index_hlist); + list_del_rcu(&dev->dev_list); + hlist_del_rcu(&dev->name_hlist); + hlist_del_rcu(&dev->index_hlist); write_unlock_bh(&dev_base_lock); } @@ -587,18 +589,44 @@ __setup("netdev=", netdev_boot_setup); struct net_device *__dev_get_by_name(struct net *net, const char *name) { struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_name_hash(net, name); - hlist_for_each(p, dev_name_hash(net, name)) { - struct net_device *dev - = hlist_entry(p, struct net_device, name_hlist); + hlist_for_each_entry(dev, p, head, name_hlist) if (!strncmp(dev->name, name, IFNAMSIZ)) return dev; - } + return NULL; } EXPORT_SYMBOL(__dev_get_by_name); /** + * dev_get_by_name_rcu - find a device by its name + * @net: the applicable net namespace + * @name: name to find + * + * Find an interface by name. + * If the name is found a pointer to the device is returned. + * If the name is not found then %NULL is returned. + * The reference counters are not incremented so the caller must be + * careful with locks. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_name_rcu(struct net *net, const char *name) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_name_hash(net, name); + + hlist_for_each_entry_rcu(dev, p, head, name_hlist) + if (!strncmp(dev->name, name, IFNAMSIZ)) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_get_by_name_rcu); + +/** * dev_get_by_name - find a device by its name * @net: the applicable net namespace * @name: name to find @@ -614,11 +642,11 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); if (dev) dev_hold(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } EXPORT_SYMBOL(dev_get_by_name); @@ -638,17 +666,42 @@ EXPORT_SYMBOL(dev_get_by_name); struct net_device *__dev_get_by_index(struct net *net, int ifindex) { struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_index_hash(net, ifindex); - hlist_for_each(p, dev_index_hash(net, ifindex)) { - struct net_device *dev - = hlist_entry(p, struct net_device, index_hlist); + hlist_for_each_entry(dev, p, head, index_hlist) if (dev->ifindex == ifindex) return dev; - } + return NULL; } EXPORT_SYMBOL(__dev_get_by_index); +/** + * dev_get_by_index_rcu - find a device by its ifindex + * @net: the applicable net namespace + * @ifindex: index of device + * + * Search for an interface by index. Returns %NULL if the device + * is not found or a pointer to the device. The device has not + * had its reference counter increased so the caller must be careful + * about locking. The caller must hold RCU lock. + */ + +struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) +{ + struct hlist_node *p; + struct net_device *dev; + struct hlist_head *head = dev_index_hash(net, ifindex); + + hlist_for_each_entry_rcu(dev, p, head, index_hlist) + if (dev->ifindex == ifindex) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_get_by_index_rcu); + /** * dev_get_by_index - find a device by its ifindex @@ -665,11 +718,11 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); if (dev) dev_hold(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } EXPORT_SYMBOL(dev_get_by_index); @@ -748,15 +801,15 @@ struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, struct net_device *dev, *ret; ret = NULL; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { if (((dev->flags ^ if_flags) & mask) == 0) { dev_hold(dev); ret = dev; break; } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(dev_get_by_flags); @@ -841,7 +894,8 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) free_page((unsigned long) inuse); } - snprintf(buf, IFNAMSIZ, name, i); + if (buf != name) + snprintf(buf, IFNAMSIZ, name, i); if (!__dev_get_by_name(net, buf)) return i; @@ -881,6 +935,21 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); +static int dev_get_valid_name(struct net *net, const char *name, char *buf, + bool fmt) +{ + if (!dev_valid_name(name)) + return -EINVAL; + + if (fmt && strchr(name, '%')) + return __dev_alloc_name(net, name, buf); + else if (__dev_get_by_name(net, name)) + return -EEXIST; + else if (buf != name) + strlcpy(buf, name, IFNAMSIZ); + + return 0; +} /** * dev_change_name - change name of a device @@ -904,28 +973,20 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - if (!dev_valid_name(newname)) - return -EINVAL; - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) return 0; memcpy(oldname, dev->name, IFNAMSIZ); - if (strchr(newname, '%')) { - err = dev_alloc_name(dev, newname); - if (err < 0) - return err; - } else if (__dev_get_by_name(net, newname)) - return -EEXIST; - else - strlcpy(dev->name, newname, IFNAMSIZ); + err = dev_get_valid_name(net, newname, dev->name, 1); + if (err < 0) + return err; rollback: /* For now only devices in the initial network namespace * are in sysfs. */ - if (net == &init_net) { + if (net_eq(net, &init_net)) { ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); @@ -935,21 +996,27 @@ rollback: write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); - hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + write_unlock_bh(&dev_base_lock); + + synchronize_rcu(); + + write_lock_bh(&dev_base_lock); + hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name)); write_unlock_bh(&dev_base_lock); ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev); ret = notifier_to_errno(ret); if (ret) { - if (err) { - printk(KERN_ERR - "%s: name change rollback failed: %d.\n", - dev->name, ret); - } else { + /* err >= 0 after dev_alloc_name() or stores the first errno */ + if (err >= 0) { err = ret; memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; + } else { + printk(KERN_ERR + "%s: name change rollback failed: %d.\n", + dev->name, ret); } } @@ -1037,9 +1104,9 @@ void dev_load(struct net *net, const char *name) { struct net_device *dev; - read_lock(&dev_base_lock); - dev = __dev_get_by_name(net, name); - read_unlock(&dev_base_lock); + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, name); + rcu_read_unlock(); if (!dev && capable(CAP_NET_ADMIN)) request_module("%s", name); @@ -1286,6 +1353,7 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } @@ -1352,6 +1420,45 @@ static inline void net_timestamp(struct sk_buff *skb) skb->tstamp.tv64 = 0; } +/** + * dev_forward_skb - loopback an skb to another netif + * + * @dev: destination network device + * @skb: buffer to forward + * + * return values: + * NET_RX_SUCCESS (no congestion) + * NET_RX_DROP (packet was dropped) + * + * dev_forward_skb can be used for injecting an skb from the + * start_xmit function of one device into the receive queue + * of another device. + * + * The receiving device may be in another namespace, so + * we have to clear all information in the skb that could + * impact namespace isolation. + */ +int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) +{ + skb_orphan(skb); + + if (!(dev->flags & IFF_UP)) + return NET_RX_DROP; + + if (skb->len > (dev->mtu + dev->hard_header_len)) + return NET_RX_DROP; + + skb_dst_drop(skb); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, dev); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + return netif_rx(skb); +} +EXPORT_SYMBOL_GPL(dev_forward_skb); + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1700,7 +1807,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; - int rc; + int rc = NETDEV_TX_OK; if (likely(!skb->next)) { if (!list_empty(&ptype_all)) @@ -1748,6 +1855,8 @@ gso: nskb->next = NULL; rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc != NETDEV_TX_OK)) { + if (rc & ~NETDEV_TX_MASK) + goto out_kfree_gso_skb; nskb->next = skb->next; skb->next = nskb; return rc; @@ -1757,11 +1866,12 @@ gso: return NETDEV_TX_BUSY; } while (skb->next); - skb->destructor = DEV_GSO_CB(skb)->destructor; - +out_kfree_gso_skb: + if (likely(skb->next == NULL)) + skb->destructor = DEV_GSO_CB(skb)->destructor; out_kfree_skb: kfree_skb(skb); - return NETDEV_TX_OK; + return rc; } static u32 skb_tx_hashrnd; @@ -1788,16 +1898,43 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) } EXPORT_SYMBOL(skb_tx_hash); +static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) +{ + if (unlikely(queue_index >= dev->real_num_tx_queues)) { + if (net_ratelimit()) { + WARN(1, "%s selects TX queue %d, but " + "real number of TX queues is %d\n", + dev->name, queue_index, + dev->real_num_tx_queues); + } + return 0; + } + return queue_index; +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { - const struct net_device_ops *ops = dev->netdev_ops; - u16 queue_index = 0; + u16 queue_index; + struct sock *sk = skb->sk; - if (ops->ndo_select_queue) - queue_index = ops->ndo_select_queue(dev, skb); - else if (dev->real_num_tx_queues > 1) - queue_index = skb_tx_hash(dev, skb); + if (sk_tx_queue_recorded(sk)) { + queue_index = sk_tx_queue_get(sk); + } else { + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } else { + queue_index = 0; + if (dev->real_num_tx_queues > 1) + queue_index = skb_tx_hash(dev, skb); + + if (sk && sk->sk_dst_cache) + sk_tx_queue_set(sk, queue_index); + } + } skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); @@ -1934,8 +2071,8 @@ gso: HARD_TX_LOCK(dev, txq, cpu); if (!netif_tx_queue_stopped(txq)) { - rc = NET_XMIT_SUCCESS; - if (!dev_hard_start_xmit(skb, dev, txq)) { + rc = dev_hard_start_xmit(skb, dev, txq); + if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; } @@ -2190,7 +2327,7 @@ static int ing_filter(struct sk_buff *skb) if (MAX_RED_LOOP < ttl++) { printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, dev->ifindex); + skb->skb_iif, dev->ifindex); return TC_ACT_SHOT; } @@ -2291,15 +2428,15 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->tstamp.tv64) net_timestamp(skb); - if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; - if (!skb->iif) - skb->iif = skb->dev->ifindex; + if (!skb->skb_iif) + skb->skb_iif = skb->dev->ifindex; null_or_orig = NULL; orig_dev = skb->dev; @@ -2439,7 +2576,7 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); -int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_type *ptype; @@ -2447,7 +2584,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int same_flow; int mac_len; - int ret; + enum gro_result ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2531,7 +2668,8 @@ normal: } EXPORT_SYMBOL(dev_gro_receive); -static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static gro_result_t +__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; @@ -2539,33 +2677,35 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return GRO_NORMAL; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) - && !compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = + (p->dev == skb->dev) && + !compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } return dev_gro_receive(napi, skb); } -int napi_skb_finish(int ret, struct sk_buff *skb) +gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { - int err = NET_RX_SUCCESS; - switch (ret) { case GRO_NORMAL: - return netif_receive_skb(skb); + if (netif_receive_skb(skb)) + ret = GRO_DROP; + break; case GRO_DROP: - err = NET_RX_DROP; - /* fall through */ - case GRO_MERGED_FREE: kfree_skb(skb); break; + + case GRO_HELD: + case GRO_MERGED: + break; } - return err; + return ret; } EXPORT_SYMBOL(napi_skb_finish); @@ -2585,7 +2725,7 @@ void skb_gro_reset_offset(struct sk_buff *skb) } EXPORT_SYMBOL(skb_gro_reset_offset); -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); @@ -2604,49 +2744,41 @@ EXPORT_SYMBOL(napi_reuse_skb); struct sk_buff *napi_get_frags(struct napi_struct *napi) { - struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; if (!skb) { - skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); - if (!skb) - goto out; - - skb_reserve(skb, NET_IP_ALIGN); - - napi->skb = skb; + skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD); + if (skb) + napi->skb = skb; } - -out: return skb; } EXPORT_SYMBOL(napi_get_frags); -int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) +gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, + gro_result_t ret) { - int err = NET_RX_SUCCESS; - switch (ret) { case GRO_NORMAL: case GRO_HELD: skb->protocol = eth_type_trans(skb, napi->dev); - if (ret == GRO_NORMAL) - return netif_receive_skb(skb); - - skb_gro_pull(skb, -ETH_HLEN); + if (ret == GRO_HELD) + skb_gro_pull(skb, -ETH_HLEN); + else if (netif_receive_skb(skb)) + ret = GRO_DROP; break; case GRO_DROP: - err = NET_RX_DROP; - /* fall through */ - case GRO_MERGED_FREE: napi_reuse_skb(napi, skb); break; + + case GRO_MERGED: + break; } - return err; + return ret; } EXPORT_SYMBOL(napi_frags_finish); @@ -2687,12 +2819,12 @@ out: } EXPORT_SYMBOL(napi_frags_skb); -int napi_gro_frags(struct napi_struct *napi) +gro_result_t napi_gro_frags(struct napi_struct *napi) { struct sk_buff *skb = napi_frags_skb(napi); if (!skb) - return NET_RX_DROP; + return GRO_DROP; return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); } @@ -2937,15 +3069,15 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifr.ifr_ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); return -ENODEV; } strcpy(ifr.ifr_name, dev->name); - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; @@ -3015,18 +3147,18 @@ static int dev_ifconf(struct net *net, char __user *arg) * in detail. */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); loff_t off; struct net_device *dev; - read_lock(&dev_base_lock); + rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; off = 1; - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (off++ == *pos) return dev; @@ -3035,16 +3167,18 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net *net = seq_file_net(seq); + struct net_device *dev = (v == SEQ_START_TOKEN) ? + first_net_device(seq_file_net(seq)) : + next_net_device((struct net_device *)v); + ++*pos; - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device((struct net_device *)v); + return rcu_dereference(dev); } void dev_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) @@ -4253,12 +4387,12 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) EXPORT_SYMBOL(dev_set_mac_address); /* - * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock) + * Perform the SIOCxIFxxx calls, inside rcu_read_lock() */ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; - struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name); if (!dev) return -ENODEV; @@ -4490,9 +4624,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCGIFINDEX: case SIOCGIFTXQLEN: dev_load(net, ifr.ifr_name); - read_lock(&dev_base_lock); + rcu_read_lock(); ret = dev_ifsioc_locked(net, &ifr, cmd); - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (!ret) { if (colon) *colon = ':'; @@ -4635,59 +4769,82 @@ static void net_set_todo(struct net_device *dev) list_add_tail(&dev->todo_list, &net_todo_list); } -static void rollback_registered(struct net_device *dev) +static void rollback_registered_many(struct list_head *head) { + struct net_device *dev, *tmp; + BUG_ON(dev_boot_phase); ASSERT_RTNL(); - /* Some devices call without registering for initialization unwind. */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); + list_for_each_entry_safe(dev, tmp, head, unreg_list) { + /* Some devices call without registering + * for initialization unwind. Remove those + * devices and proceed with the remaining. + */ + if (dev->reg_state == NETREG_UNINITIALIZED) { + pr_debug("unregister_netdevice: device %s/%p never " + "was registered\n", dev->name, dev); - WARN_ON(1); - return; - } + WARN_ON(1); + list_del(&dev->unreg_list); + continue; + } - BUG_ON(dev->reg_state != NETREG_REGISTERED); + BUG_ON(dev->reg_state != NETREG_REGISTERED); - /* If device is running, close it first. */ - dev_close(dev); + /* If device is running, close it first. */ + dev_close(dev); - /* And unlink it from device chain. */ - unlist_netdevice(dev); + /* And unlink it from device chain. */ + unlist_netdevice(dev); - dev->reg_state = NETREG_UNREGISTERING; + dev->reg_state = NETREG_UNREGISTERING; + } synchronize_net(); - /* Shutdown queueing discipline. */ - dev_shutdown(dev); + list_for_each_entry(dev, head, unreg_list) { + /* Shutdown queueing discipline. */ + dev_shutdown(dev); - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* - * Flush the unicast and multicast chains - */ - dev_unicast_flush(dev); - dev_addr_discard(dev); + /* + * Flush the unicast and multicast chains + */ + dev_unicast_flush(dev); + dev_addr_discard(dev); - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); - /* Notifier chain MUST detach us from master device. */ - WARN_ON(dev->master); + /* Notifier chain MUST detach us from master device. */ + WARN_ON(dev->master); - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); + } + + /* Process any work delayed until the end of the batch */ + dev = list_entry(head->next, struct net_device, unreg_list); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); synchronize_net(); - dev_put(dev); + list_for_each_entry(dev, head, unreg_list) + dev_put(dev); +} + +static void rollback_registered(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + rollback_registered_many(&single); } static void __netdev_init_queue_locks_one(struct net_device *dev, @@ -4746,6 +4903,33 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) EXPORT_SYMBOL(netdev_fix_features); /** + * netif_stacked_transfer_operstate - transfer operstate + * @rootdev: the root or lower level device to transfer state from + * @dev: the device to transfer operstate to + * + * Transfer operational state from root to device. This is normally + * called when a stacking relationship exists between the root + * device and the device(a leaf device). + */ +void netif_stacked_transfer_operstate(const struct net_device *rootdev, + struct net_device *dev) +{ + if (rootdev->operstate == IF_OPER_DORMANT) + netif_dormant_on(dev); + else + netif_dormant_off(dev); + + if (netif_carrier_ok(rootdev)) { + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else { + if (netif_carrier_ok(dev)) + netif_carrier_off(dev); + } +} +EXPORT_SYMBOL(netif_stacked_transfer_operstate); + +/** * register_netdevice - register a network device * @dev: device to register * @@ -4764,8 +4948,6 @@ EXPORT_SYMBOL(netdev_fix_features); int register_netdevice(struct net_device *dev) { - struct hlist_head *head; - struct hlist_node *p; int ret; struct net *net = dev_net(dev); @@ -4794,26 +4976,14 @@ int register_netdevice(struct net_device *dev) } } - if (!dev_valid_name(dev->name)) { - ret = -EINVAL; + ret = dev_get_valid_name(net, dev->name, dev->name, 0); + if (ret) goto err_uninit; - } dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Check for existence of name */ - head = dev_name_hash(net, dev->name); - hlist_for_each(p, head) { - struct net_device *d - = hlist_entry(p, struct net_device, name_hlist); - if (!strncmp(d->name, dev->name, IFNAMSIZ)) { - ret = -EEXIST; - goto err_uninit; - } - } - /* Fix illegal checksum combinations */ if ((dev->features & NETIF_F_HW_CSUM) && (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { @@ -4836,6 +5006,12 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_GSO; netdev_initialize_kobject(dev); + + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); + ret = notifier_to_errno(ret); + if (ret) + goto err_uninit; + ret = netdev_register_kobject(dev); if (ret) goto err_uninit; @@ -4960,6 +5136,8 @@ static void netdev_wait_allrefs(struct net_device *dev) { unsigned long rebroadcast_time, warning_time; + linkwatch_forget_dev(dev); + rebroadcast_time = warning_time = jiffies; while (atomic_read(&dev->refcnt) != 0) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { @@ -4967,6 +5145,8 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users + * should have already handle it the first time */ if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -5062,6 +5242,32 @@ void netdev_run_todo(void) } /** + * dev_txq_stats_fold - fold tx_queues stats + * @dev: device to get statistics from + * @stats: struct net_device_stats to hold results + */ +void dev_txq_stats_fold(const struct net_device *dev, + struct net_device_stats *stats) +{ + unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; + unsigned int i; + struct netdev_queue *txq; + + for (i = 0; i < dev->num_tx_queues; i++) { + txq = netdev_get_tx_queue(dev, i); + tx_bytes += txq->tx_bytes; + tx_packets += txq->tx_packets; + tx_dropped += txq->tx_dropped; + } + if (tx_bytes || tx_packets || tx_dropped) { + stats->tx_bytes = tx_bytes; + stats->tx_packets = tx_packets; + stats->tx_dropped = tx_dropped; + } +} +EXPORT_SYMBOL(dev_txq_stats_fold); + +/** * dev_get_stats - get network device statistics * @dev: device to get statistics from * @@ -5075,25 +5281,9 @@ const struct net_device_stats *dev_get_stats(struct net_device *dev) if (ops->ndo_get_stats) return ops->ndo_get_stats(dev); - else { - unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - struct net_device_stats *stats = &dev->stats; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } - return stats; - } + + dev_txq_stats_fold(dev, &dev->stats); + return &dev->stats; } EXPORT_SYMBOL(dev_get_stats); @@ -5173,6 +5363,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); @@ -5237,25 +5429,47 @@ void synchronize_net(void) EXPORT_SYMBOL(synchronize_net); /** - * unregister_netdevice - remove device from the kernel + * unregister_netdevice_queue - remove device from the kernel * @dev: device + * @head: list * * This function shuts down a device interface and removes it * from the kernel tables. + * If head not NULL, device is queued to be unregistered later. * * Callers must hold the rtnl semaphore. You may want * unregister_netdev() instead of this. */ -void unregister_netdevice(struct net_device *dev) +void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) { ASSERT_RTNL(); - rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); + if (head) { + list_move_tail(&dev->unreg_list, head); + } else { + rollback_registered(dev); + /* Finish processing unregister after unlock */ + net_set_todo(dev); + } +} +EXPORT_SYMBOL(unregister_netdevice_queue); + +/** + * unregister_netdevice_many - unregister many devices + * @head: list of devices + */ +void unregister_netdevice_many(struct list_head *head) +{ + struct net_device *dev; + + if (!list_empty(head)) { + rollback_registered_many(head); + list_for_each_entry(dev, head, unreg_list) + net_set_todo(dev); + } } -EXPORT_SYMBOL(unregister_netdevice); +EXPORT_SYMBOL(unregister_netdevice_many); /** * unregister_netdev - remove device from the kernel @@ -5292,8 +5506,6 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - char buf[IFNAMSIZ]; - const char *destname; int err; ASSERT_RTNL(); @@ -5326,20 +5538,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char * we can use it in the destination network namespace. */ err = -EEXIST; - destname = dev->name; - if (__dev_get_by_name(net, destname)) { + if (__dev_get_by_name(net, dev->name)) { /* We get here if we can't use the current device name */ if (!pat) goto out; - if (!dev_valid_name(pat)) - goto out; - if (strchr(pat, '%')) { - if (__dev_alloc_name(net, pat, buf) < 0) - goto out; - destname = buf; - } else - destname = pat; - if (__dev_get_by_name(net, destname)) + if (dev_get_valid_name(net, pat, dev->name, 1)) goto out; } @@ -5363,6 +5566,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char this device. They should clean all the things. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); /* * Flush the unicast and multicast chains @@ -5375,10 +5579,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Actually switch the network namespace */ dev_net_set(dev, net); - /* Assign the new device name */ - if (destname != dev->name) - strcpy(dev->name, destname); - /* If there is an ifindex conflict assign a new one */ if (__dev_get_by_index(net, dev->ifindex)) { int iflink = (dev->iflink == dev->ifindex); @@ -5483,7 +5683,7 @@ unsigned long netdev_increment_features(unsigned long all, unsigned long one, one |= NETIF_F_ALL_CSUM; one |= all & NETIF_F_ONE_FOR_ALL; - all &= one | NETIF_F_LLTX | NETIF_F_GSO; + all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; all |= one & mask & NETIF_F_ONE_FOR_ALL; return all; @@ -5565,14 +5765,13 @@ static struct pernet_operations __net_initdata netdev_net_ops = { static void __net_exit default_device_exit(struct net *net) { - struct net_device *dev; + struct net_device *dev, *aux; /* - * Push all migratable of the network devices back to the + * Push all migratable network devices back to the * initial network namespace */ rtnl_lock(); -restart: - for_each_netdev(net, dev) { + for_each_netdev_safe(net, dev, aux) { int err; char fb_name[IFNAMSIZ]; @@ -5580,11 +5779,9 @@ restart: if (dev->features & NETIF_F_NETNS_LOCAL) continue; - /* Delete virtual devices */ - if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) { - dev->rtnl_link_ops->dellink(dev); - goto restart; - } + /* Leave virtual devices for the generic cleanup */ + if (dev->rtnl_link_ops) + continue; /* Push remaing network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); @@ -5594,13 +5791,37 @@ restart: __func__, dev->name, err); BUG(); } - goto restart; } rtnl_unlock(); } +static void __net_exit default_device_exit_batch(struct list_head *net_list) +{ + /* At exit all network devices most be removed from a network + * namespace. Do this in the reverse order of registeration. + * Do this across as many network namespaces as possible to + * improve batching efficiency. + */ + struct net_device *dev; + struct net *net; + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + for_each_netdev_reverse(net, dev) { + if (dev->rtnl_link_ops) + dev->rtnl_link_ops->dellink(dev, &dev_kill_list); + else + unregister_netdevice_queue(dev, &dev_kill_list); + } + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); +} + static struct pernet_operations __net_initdata default_device_ops = { .exit = default_device_exit, + .exit_batch = default_device_exit_batch, }; /* diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 0a113f26bc9..b8e9d3a8688 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -41,7 +41,7 @@ static void send_dm_alert(struct work_struct *unused); * netlink alerts */ static int trace_state = TRACE_OFF; -static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(trace_state_lock); struct per_cpu_dm_data { struct work_struct dm_alert_work; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4c12ddb5f5e..d8aee584e8d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -198,13 +198,6 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); if (rc >= 0) info.n_priv_flags = rc; - } else { - /* code path for obsolete hooks */ - - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); } if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); @@ -309,6 +302,26 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return ret; } +static int ethtool_reset(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value reset; + int ret; + + if (!dev->ethtool_ops->reset) + return -EOPNOTSUPP; + + if (copy_from_user(&reset, useraddr, sizeof(reset))) + return -EFAULT; + + ret = dev->ethtool_ops->reset(dev, &reset.data); + if (ret) + return ret; + + if (copy_to_user(useraddr, &reset, sizeof(reset))) + return -EFAULT; + return 0; +} + static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; @@ -684,16 +697,10 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) u64 *data; int ret, test_len; - if (!ops->self_test) - return -EOPNOTSUPP; - if (!ops->get_sset_count && !ops->self_test_count) + if (!ops->self_test || !ops->get_sset_count) return -EOPNOTSUPP; - if (ops->get_sset_count) - test_len = ops->get_sset_count(dev, ETH_SS_TEST); - else - /* code path for obsolete hook */ - test_len = ops->self_test_count(dev); + test_len = ops->get_sset_count(dev, ETH_SS_TEST); if (test_len < 0) return test_len; WARN_ON(test_len == 0); @@ -728,36 +735,17 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) u8 *data; int ret; - if (!ops->get_strings) + if (!ops->get_strings || !ops->get_sset_count) return -EOPNOTSUPP; if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - if (ops->get_sset_count) { - ret = ops->get_sset_count(dev, gstrings.string_set); - if (ret < 0) - return ret; - - gstrings.len = ret; - } else { - /* code path for obsolete hooks */ - - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; - } - } + ret = ops->get_sset_count(dev, gstrings.string_set); + if (ret < 0) + return ret; + + gstrings.len = ret; data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); if (!data) @@ -798,16 +786,10 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) u64 *data; int ret, n_stats; - if (!ops->get_ethtool_stats) - return -EOPNOTSUPP; - if (!ops->get_sset_count && !ops->get_stats_count) + if (!ops->get_ethtool_stats || !ops->get_sset_count) return -EOPNOTSUPP; - if (ops->get_sset_count) - n_stats = ops->get_sset_count(dev, ETH_SS_STATS); - else - /* code path for obsolete hook */ - n_stats = ops->get_stats_count(dev); + n_stats = ops->get_sset_count(dev, ETH_SS_STATS); if (n_stats < 0) return n_stats; WARN_ON(n_stats == 0); @@ -1127,6 +1109,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_FLASHDEV: rc = ethtool_flash_device(dev, useraddr); break; + case ETHTOOL_RESET: + rc = ethtool_reset(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bd309384f8b..02a3b2c69c1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -72,7 +72,7 @@ static void flush_route_cache(struct fib_rules_ops *ops) ops->flush_cache(ops); } -int fib_rules_register(struct fib_rules_ops *ops) +static int __fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; @@ -102,6 +102,28 @@ errout: return err; } +struct fib_rules_ops * +fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) +{ + struct fib_rules_ops *ops; + int err; + + ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); + if (ops == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&ops->rules_list); + ops->fro_net = net; + + err = __fib_rules_register(ops); + if (err) { + kfree(ops); + ops = ERR_PTR(err); + } + + return ops; +} + EXPORT_SYMBOL_GPL(fib_rules_register); void fib_rules_cleanup_ops(struct fib_rules_ops *ops) @@ -115,6 +137,15 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); +static void fib_rules_put_rcu(struct rcu_head *head) +{ + struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu); + struct net *net = ops->fro_net; + + release_net(net); + kfree(ops); +} + void fib_rules_unregister(struct fib_rules_ops *ops) { struct net *net = ops->fro_net; @@ -124,8 +155,7 @@ void fib_rules_unregister(struct fib_rules_ops *ops) fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); - synchronize_rcu(); - release_net(net); + call_rcu(&ops->rcu, fib_rules_put_rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); @@ -135,7 +165,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->ifindex && (rule->ifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->iif)) + goto out; + + if (rule->oifindex && (rule->oifindex != fl->oif)) goto out; if ((rule->mark ^ fl->mark) & rule->mark_mask) @@ -248,14 +281,24 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); - if (tb[FRA_IFNAME]) { + if (tb[FRA_IIFNAME]) { + struct net_device *dev; + + rule->iifindex = -1; + nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, rule->iifname); + if (dev) + rule->iifindex = dev->ifindex; + } + + if (tb[FRA_OIFNAME]) { struct net_device *dev; - rule->ifindex = -1; - nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(net, rule->ifname); + rule->oifindex = -1; + nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ); + dev = __dev_get_by_name(net, rule->oifname); if (dev) - rule->ifindex = dev->ifindex; + rule->oifindex = dev->ifindex; } if (tb[FRA_FWMARK]) { @@ -274,7 +317,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->flags = frh->flags; rule->table = frh_get_table(frh, tb); - if (!rule->pref && ops->default_pref) + if (!tb[FRA_PRIORITY] && ops->default_pref) rule->pref = ops->default_pref(ops); err = -EINVAL; @@ -388,8 +431,12 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) continue; - if (tb[FRA_IFNAME] && - nla_strcmp(tb[FRA_IFNAME], rule->ifname)) + if (tb[FRA_IIFNAME] && + nla_strcmp(tb[FRA_IIFNAME], rule->iifname)) + continue; + + if (tb[FRA_OIFNAME] && + nla_strcmp(tb[FRA_OIFNAME], rule->oifname)) continue; if (tb[FRA_FWMARK] && @@ -447,7 +494,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) { size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) - + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ + + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */ + + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */ + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ @@ -481,11 +529,18 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL) frh->flags |= FIB_RULE_UNRESOLVED; - if (rule->ifname[0]) { - NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); + if (rule->iifname[0]) { + NLA_PUT_STRING(skb, FRA_IIFNAME, rule->iifname); - if (rule->ifindex == -1) - frh->flags |= FIB_RULE_DEV_DETACHED; + if (rule->iifindex == -1) + frh->flags |= FIB_RULE_IIF_DETACHED; + } + + if (rule->oifname[0]) { + NLA_PUT_STRING(skb, FRA_OIFNAME, rule->oifname); + + if (rule->oifindex == -1) + frh->flags |= FIB_RULE_OIF_DETACHED; } if (rule->pref) @@ -600,9 +655,12 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) struct fib_rule *rule; list_for_each_entry(rule, rules, list) { - if (rule->ifindex == -1 && - strcmp(dev->name, rule->ifname) == 0) - rule->ifindex = dev->ifindex; + if (rule->iifindex == -1 && + strcmp(dev->name, rule->iifname) == 0) + rule->iifindex = dev->ifindex; + if (rule->oifindex == -1 && + strcmp(dev->name, rule->oifname) == 0) + rule->oifindex = dev->ifindex; } } @@ -610,9 +668,12 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) { struct fib_rule *rule; - list_for_each_entry(rule, rules, list) - if (rule->ifindex == dev->ifindex) - rule->ifindex = -1; + list_for_each_entry(rule, rules, list) { + if (rule->iifindex == dev->ifindex) + rule->iifindex = -1; + if (rule->oifindex == dev->ifindex) + rule->oifindex = -1; + } } diff --git a/net/core/filter.c b/net/core/filter.c index d1d779ca096..08db7b9143a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -303,6 +303,12 @@ load_b: case SKF_AD_IFINDEX: A = skb->dev->ifindex; continue; + case SKF_AD_MARK: + A = skb->mark; + continue; + case SKF_AD_QUEUE: + A = skb->queue_mapping; + continue; case SKF_AD_NLATTR: { struct nlattr *nla; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 8569310268a..393b1d8618e 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -127,6 +127,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) /** * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV * @d: dumping handle + * @b: basic statistics * @r: rate estimator statistics * * Appends the rate estimator statistics to the top level TLV created by @@ -136,8 +137,13 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r) +gnet_stats_copy_rate_est(struct gnet_dump *d, + const struct gnet_stats_basic_packed *b, + struct gnet_stats_rate_est *r) { + if (b && !gen_estimator_active(b, r)) + return 0; + if (d->compat_tc_stats) { d->tc_stats.bps = r->bps; d->tc_stats.pps = r->pps; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index bf8f7af699d..5910b555a54 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -35,7 +35,7 @@ static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); -static struct net_device *lweventlist; +static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned char default_operstate(const struct net_device *dev) @@ -89,8 +89,10 @@ static void linkwatch_add_event(struct net_device *dev) unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); - dev->link_watch_next = lweventlist; - lweventlist = dev; + if (list_empty(&dev->link_watch_list)) { + list_add_tail(&dev->link_watch_list, &lweventlist); + dev_hold(dev); + } spin_unlock_irqrestore(&lweventlist_lock, flags); } @@ -133,9 +135,35 @@ static void linkwatch_schedule_work(int urgent) } +static void linkwatch_do_dev(struct net_device *dev) +{ + /* + * Make sure the above read is complete since it can be + * rewritten as soon as we clear the bit below. + */ + smp_mb__before_clear_bit(); + + /* We are about to handle this device, + * so new events can be accepted + */ + clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); + + rfc2863_policy(dev); + if (dev->flags & IFF_UP) { + if (netif_carrier_ok(dev)) + dev_activate(dev); + else + dev_deactivate(dev); + + netdev_state_change(dev); + } + dev_put(dev); +} + static void __linkwatch_run_queue(int urgent_only) { - struct net_device *next; + struct net_device *dev; + LIST_HEAD(wrk); /* * Limit the number of linkwatch events to one @@ -153,46 +181,40 @@ static void __linkwatch_run_queue(int urgent_only) clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); - next = lweventlist; - lweventlist = NULL; - spin_unlock_irq(&lweventlist_lock); + list_splice_init(&lweventlist, &wrk); - while (next) { - struct net_device *dev = next; + while (!list_empty(&wrk)) { - next = dev->link_watch_next; + dev = list_first_entry(&wrk, struct net_device, link_watch_list); + list_del_init(&dev->link_watch_list); if (urgent_only && !linkwatch_urgent_event(dev)) { - linkwatch_add_event(dev); + list_add_tail(&dev->link_watch_list, &lweventlist); continue; } - - /* - * Make sure the above read is complete since it can be - * rewritten as soon as we clear the bit below. - */ - smp_mb__before_clear_bit(); - - /* We are about to handle this device, - * so new events can be accepted - */ - clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); - - rfc2863_policy(dev); - if (dev->flags & IFF_UP) { - if (netif_carrier_ok(dev)) - dev_activate(dev); - else - dev_deactivate(dev); - - netdev_state_change(dev); - } - - dev_put(dev); + spin_unlock_irq(&lweventlist_lock); + linkwatch_do_dev(dev); + spin_lock_irq(&lweventlist_lock); } - if (lweventlist) + if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); + spin_unlock_irq(&lweventlist_lock); +} + +void linkwatch_forget_dev(struct net_device *dev) +{ + unsigned long flags; + int clean = 0; + + spin_lock_irqsave(&lweventlist_lock, flags); + if (!list_empty(&dev->link_watch_list)) { + list_del_init(&dev->link_watch_list); + clean = 1; + } + spin_unlock_irqrestore(&lweventlist_lock, flags); + if (clean) + linkwatch_do_dev(dev); } @@ -216,8 +238,6 @@ void linkwatch_fire_event(struct net_device *dev) bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { - dev_hold(dev); - linkwatch_add_event(dev); } else if (!urgent) return; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e587e681969..f35377b643e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2092,7 +2092,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (h > s_h) s_idx = 0; for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { - if (dev_net(n->dev) != net) + if (!net_eq(dev_net(n->dev), net)) continue; if (idx < s_idx) goto next; @@ -2566,21 +2566,18 @@ static struct neigh_sysctl_table { } neigh_sysctl_template __read_mostly = { .neigh_vars = { { - .ctl_name = NET_NEIGH_MCAST_SOLICIT, .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_UCAST_SOLICIT, .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_APP_SOLICIT, .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, @@ -2593,38 +2590,30 @@ static struct neigh_sysctl_table { .proc_handler = proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_REACHABLE_TIME, .procname = "base_reachable_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, .procname = "delay_first_probe_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_GC_STALE_TIME, .procname = "gc_stale_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_UNRES_QLEN, .procname = "unres_qlen", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_PROXY_QLEN, .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, @@ -2649,45 +2638,36 @@ static struct neigh_sysctl_table { .proc_handler = proc_dointvec_userhz_jiffies, }, { - .ctl_name = NET_NEIGH_RETRANS_TIME_MS, .procname = "retrans_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, .procname = "base_reachable_time_ms", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_NEIGH_GC_INTERVAL, .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_NEIGH_GC_THRESH1, .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_GC_THRESH2, .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NEIGH_GC_THRESH3, .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, @@ -2699,7 +2679,7 @@ static struct neigh_sysctl_table { int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, - proc_handler *handler, ctl_handler *strategy) + proc_handler *handler) { struct neigh_sysctl_table *t; const char *dev_name_source = NULL; @@ -2710,10 +2690,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, #define NEIGH_CTL_PATH_DEV 3 struct ctl_path neigh_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "proto", .ctl_name = 0, }, - { .procname = "neigh", .ctl_name = 0, }, - { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, }, + { .procname = "net", }, + { .procname = "proto", }, + { .procname = "neigh", }, + { .procname = "default", }, { }, }; @@ -2738,7 +2718,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (dev) { dev_name_source = dev->name; - neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex; /* Terminate the table early */ memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); } else { @@ -2750,31 +2729,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, } - if (handler || strategy) { + if (handler) { /* RetransTime */ t->neigh_vars[3].proc_handler = handler; - t->neigh_vars[3].strategy = strategy; t->neigh_vars[3].extra1 = dev; - if (!strategy) - t->neigh_vars[3].ctl_name = CTL_UNNUMBERED; /* ReachableTime */ t->neigh_vars[4].proc_handler = handler; - t->neigh_vars[4].strategy = strategy; t->neigh_vars[4].extra1 = dev; - if (!strategy) - t->neigh_vars[4].ctl_name = CTL_UNNUMBERED; /* RetransTime (in milliseconds)*/ t->neigh_vars[12].proc_handler = handler; - t->neigh_vars[12].strategy = strategy; t->neigh_vars[12].extra1 = dev; - if (!strategy) - t->neigh_vars[12].ctl_name = CTL_UNNUMBERED; /* ReachableTime (in milliseconds) */ t->neigh_vars[13].proc_handler = handler; - t->neigh_vars[13].strategy = strategy; t->neigh_vars[13].extra1 = dev; - if (!strategy) - t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; } t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); @@ -2782,9 +2749,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, goto free; neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name; - neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id; neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; - neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; t->sysctl_header = register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 427ded84122..fbc1c7472c5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -130,6 +130,48 @@ static ssize_t show_carrier(struct device *dev, return -EINVAL; } +static ssize_t show_speed(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + int ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (netif_running(netdev) && + netdev->ethtool_ops && + netdev->ethtool_ops->get_settings) { + struct ethtool_cmd cmd = { ETHTOOL_GSET }; + + if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) + ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); + } + rtnl_unlock(); + return ret; +} + +static ssize_t show_duplex(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct net_device *netdev = to_net_dev(dev); + int ret = -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (netif_running(netdev) && + netdev->ethtool_ops && + netdev->ethtool_ops->get_settings) { + struct ethtool_cmd cmd = { ETHTOOL_GSET }; + + if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) + ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); + } + rtnl_unlock(); + return ret; +} + static ssize_t show_dormant(struct device *dev, struct device_attribute *attr, char *buf) { @@ -259,6 +301,8 @@ static struct device_attribute net_class_attributes[] = { __ATTR(address, S_IRUGO, show_address, NULL), __ATTR(broadcast, S_IRUGO, show_broadcast, NULL), __ATTR(carrier, S_IRUGO, show_carrier, NULL), + __ATTR(speed, S_IRUGO, show_speed, NULL), + __ATTR(duplex, S_IRUGO, show_duplex, NULL), __ATTR(dormant, S_IRUGO, show_dormant, NULL), __ATTR(operstate, S_IRUGO, show_operstate, NULL), __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), @@ -481,7 +525,7 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); - if (dev_net(net) != &init_net) + if (!net_eq(dev_net(net), &init_net)) return; device_del(dev); @@ -500,15 +544,22 @@ int netdev_register_kobject(struct net_device *net) dev_set_name(dev, "%s", net->name); #ifdef CONFIG_SYSFS - *groups++ = &netstat_group; + /* Allow for a device specific group */ + if (*groups) + groups++; + *groups++ = &netstat_group; #ifdef CONFIG_WIRELESS_EXT_SYSFS - if (net->wireless_handlers || net->ieee80211_ptr) + if (net->ieee80211_ptr) + *groups++ = &wireless_group; +#ifdef CONFIG_WIRELESS_EXT + else if (net->wireless_handlers) *groups++ = &wireless_group; #endif +#endif #endif /* CONFIG_SYSFS */ - if (dev_net(net) != &init_net) + if (!net_eq(dev_net(net), &init_net)) return 0; return device_add(dev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1c1af2756f3..bd8c4712ea2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -27,14 +27,64 @@ EXPORT_SYMBOL(init_net); #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ +static int ops_init(const struct pernet_operations *ops, struct net *net) +{ + int err; + if (ops->id && ops->size) { + void *data = kzalloc(ops->size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + err = net_assign_generic(net, *ops->id, data); + if (err) { + kfree(data); + return err; + } + } + if (ops->init) + return ops->init(net); + return 0; +} + +static void ops_free(const struct pernet_operations *ops, struct net *net) +{ + if (ops->id && ops->size) { + int id = *ops->id; + kfree(net_generic(net, id)); + } +} + +static void ops_exit_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->exit) { + list_for_each_entry(net, net_exit_list, exit_list) + ops->exit(net); + } + if (ops->exit_batch) + ops->exit_batch(net_exit_list); +} + +static void ops_free_list(const struct pernet_operations *ops, + struct list_head *net_exit_list) +{ + struct net *net; + if (ops->size && ops->id) { + list_for_each_entry(net, net_exit_list, exit_list) + ops_free(ops, net); + } +} + /* * setup_net runs the initializers for the network namespace object. */ static __net_init int setup_net(struct net *net) { /* Must be called with net_mutex held */ - struct pernet_operations *ops; + const struct pernet_operations *ops, *saved_ops; int error = 0; + LIST_HEAD(net_exit_list); atomic_set(&net->count, 1); @@ -43,11 +93,9 @@ static __net_init int setup_net(struct net *net) #endif list_for_each_entry(ops, &pernet_list, list) { - if (ops->init) { - error = ops->init(net); - if (error < 0) - goto out_undo; - } + error = ops_init(ops, net); + if (error < 0) + goto out_undo; } out: return error; @@ -56,10 +104,14 @@ out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ - list_for_each_entry_continue_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } + list_add(&net->exit_list, &net_exit_list); + saved_ops = ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + ops = saved_ops; + list_for_each_entry_continue_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); rcu_barrier(); goto out; @@ -147,18 +199,29 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) return net_create(); } +static DEFINE_SPINLOCK(cleanup_list_lock); +static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ + static void cleanup_net(struct work_struct *work) { - struct pernet_operations *ops; - struct net *net; + const struct pernet_operations *ops; + struct net *net, *tmp; + LIST_HEAD(net_kill_list); + LIST_HEAD(net_exit_list); - net = container_of(work, struct net, work); + /* Atomically snapshot the list of namespaces to cleanup */ + spin_lock_irq(&cleanup_list_lock); + list_replace_init(&cleanup_list, &net_kill_list); + spin_unlock_irq(&cleanup_list_lock); mutex_lock(&net_mutex); /* Don't let anyone else find us. */ rtnl_lock(); - list_del_rcu(&net->list); + list_for_each_entry(net, &net_kill_list, cleanup_list) { + list_del_rcu(&net->list); + list_add_tail(&net->exit_list, &net_exit_list); + } rtnl_unlock(); /* @@ -169,10 +232,12 @@ static void cleanup_net(struct work_struct *work) synchronize_rcu(); /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_exit_list(ops, &net_exit_list); + + /* Free the net generic variables */ + list_for_each_entry_reverse(ops, &pernet_list, list) + ops_free_list(ops, &net_exit_list); mutex_unlock(&net_mutex); @@ -182,14 +247,23 @@ static void cleanup_net(struct work_struct *work) rcu_barrier(); /* Finally it is safe to free my network namespace structure */ - net_free(net); + list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { + list_del_init(&net->exit_list); + net_free(net); + } } +static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - queue_work(netns_wq, &net->work); + unsigned long flags; + + spin_lock_irqsave(&cleanup_list_lock, flags); + list_add(&net->cleanup_list, &cleanup_list); + spin_unlock_irqrestore(&cleanup_list_lock, flags); + + queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); @@ -259,18 +333,20 @@ static int __init net_ns_init(void) pure_initcall(net_ns_init); #ifdef CONFIG_NET_NS -static int register_pernet_operations(struct list_head *list, - struct pernet_operations *ops) +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) { - struct net *net, *undo_net; + struct net *net; int error; + LIST_HEAD(net_exit_list); list_add_tail(&ops->list, list); - if (ops->init) { + if (ops->init || (ops->id && ops->size)) { for_each_net(net) { - error = ops->init(net); + error = ops_init(ops, net); if (error) goto out_undo; + list_add_tail(&net->exit_list, &net_exit_list); } } return 0; @@ -278,45 +354,82 @@ static int register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - if (ops->exit) { - for_each_net(undo_net) { - if (undo_net == net) - goto undone; - ops->exit(undo_net); - } - } -undone: + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); return error; } -static void unregister_pernet_operations(struct pernet_operations *ops) +static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; + LIST_HEAD(net_exit_list); list_del(&ops->list); - if (ops->exit) - for_each_net(net) - ops->exit(net); + for_each_net(net) + list_add_tail(&net->exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); } #else +static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) +{ + int err = 0; + err = ops_init(ops, &init_net); + if (err) + ops_free(ops, &init_net); + return err; + +} + +static void __unregister_pernet_operations(struct pernet_operations *ops) +{ + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); +} + +#endif /* CONFIG_NET_NS */ + +static DEFINE_IDA(net_generic_ids); + static int register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { - if (ops->init == NULL) - return 0; - return ops->init(&init_net); + int error; + + if (ops->id) { +again: + error = ida_get_new_above(&net_generic_ids, 1, ops->id); + if (error < 0) { + if (error == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + return error; + } + } + error = __register_pernet_operations(list, ops); + if (error) { + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); + } + + return error; } static void unregister_pernet_operations(struct pernet_operations *ops) { - if (ops->exit) - ops->exit(&init_net); + + __unregister_pernet_operations(ops); + rcu_barrier(); + if (ops->id) + ida_remove(&net_generic_ids, *ops->id); } -#endif - -static DEFINE_IDA(net_generic_ids); /** * register_pernet_subsys - register a network namespace subsystem @@ -364,38 +477,6 @@ void unregister_pernet_subsys(struct pernet_operations *module) } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); -int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) -{ - int rv; - - mutex_lock(&net_mutex); -again: - rv = ida_get_new_above(&net_generic_ids, 1, id); - if (rv < 0) { - if (rv == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - rv = register_pernet_operations(first_device, ops); - if (rv < 0) - ida_remove(&net_generic_ids, *id); -out: - mutex_unlock(&net_mutex); - return rv; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); - -void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); - /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem @@ -427,30 +508,6 @@ int register_pernet_device(struct pernet_operations *ops) } EXPORT_SYMBOL_GPL(register_pernet_device); -int register_pernet_gen_device(int *id, struct pernet_operations *ops) -{ - int error; - mutex_lock(&net_mutex); -again: - error = ida_get_new_above(&net_generic_ids, 1, id); - if (error) { - if (error == -EAGAIN) { - ida_pre_get(&net_generic_ids, GFP_KERNEL); - goto again; - } - goto out; - } - error = register_pernet_operations(&pernet_list, ops); - if (error) - ida_remove(&net_generic_ids, *id); - else if (first_device == &pernet_list) - first_device = &ops->list; -out: - mutex_unlock(&net_mutex); - return error; -} -EXPORT_SYMBOL_GPL(register_pernet_gen_device); - /** * unregister_pernet_device - unregister a network namespace netdevice * @ops: pernet operations structure to manipulate @@ -470,17 +527,6 @@ void unregister_pernet_device(struct pernet_operations *ops) } EXPORT_SYMBOL_GPL(unregister_pernet_device); -void unregister_pernet_gen_device(int id, struct pernet_operations *ops) -{ - mutex_lock(&net_mutex); - if (&ops->list == first_device) - first_device = first_device->next; - unregister_pernet_operations(ops); - ida_remove(&net_generic_ids, id); - mutex_unlock(&net_mutex); -} -EXPORT_SYMBOL_GPL(unregister_pernet_gen_device); - static void net_generic_release(struct rcu_head *rcu) { struct net_generic *ng; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6eb8d47cbf3..a23b45f08ec 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -340,6 +340,7 @@ struct pktgen_dev { __u16 cur_udp_src; __u16 cur_queue_map; __u32 cur_pkt_size; + __u32 last_pkt_size; __u8 hh[14]; /* = { @@ -363,6 +364,7 @@ struct pktgen_dev { * device name (not when the inject is * started as it used to do.) */ + char odevname[32]; struct flow_state *flows; unsigned cflows; /* Concurrent flows (config) */ unsigned lflow; /* Flow length (config) */ @@ -426,7 +428,7 @@ static const char version[] = static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname); + const char *ifname, bool exact); static int pktgen_device_event(struct notifier_block *, unsigned long, void *); static void pktgen_run_all_threads(void); static void pktgen_reset_all_threads(void); @@ -528,7 +530,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_printf(seq, " frags: %d delay: %llu clone_skb: %d ifname: %s\n", pkt_dev->nfrags, (unsigned long long) pkt_dev->delay, - pkt_dev->clone_skb, pkt_dev->odev->name); + pkt_dev->clone_skb, pkt_dev->odevname); seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); @@ -1688,13 +1690,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) if (pkt_dev->running) - seq_printf(seq, "%s ", pkt_dev->odev->name); + seq_printf(seq, "%s ", pkt_dev->odevname); seq_printf(seq, "\nStopped: "); list_for_each_entry(pkt_dev, &t->if_list, list) if (!pkt_dev->running) - seq_printf(seq, "%s ", pkt_dev->odev->name); + seq_printf(seq, "%s ", pkt_dev->odevname); if (t->result[0]) seq_printf(seq, "\nResult: %s\n", t->result); @@ -1817,9 +1819,10 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) { struct pktgen_thread *t; struct pktgen_dev *pkt_dev = NULL; + bool exact = (remove == FIND); list_for_each_entry(t, &pktgen_threads, th_list) { - pkt_dev = pktgen_find_dev(t, ifname); + pkt_dev = pktgen_find_dev(t, ifname, exact); if (pkt_dev) { if (remove) { if_lock(t); @@ -1994,7 +1997,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) "queue_map_min (zero-based) (%d) exceeds valid range " "[0 - %d] for (%d) queues on %s, resetting\n", pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odev->name); + pkt_dev->odevname); pkt_dev->queue_map_min = ntxq - 1; } if (pkt_dev->queue_map_max >= ntxq) { @@ -2002,7 +2005,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) "queue_map_max (zero-based) (%d) exceeds valid range " "[0 - %d] for (%d) queues on %s, resetting\n", pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq, - pkt_dev->odev->name); + pkt_dev->odevname); pkt_dev->queue_map_max = ntxq - 1; } @@ -2049,9 +2052,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) read_lock_bh(&idev->lock); for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { - if (ifp->scope == IFA_LINK - && !(ifp-> - flags & IFA_F_TENTATIVE)) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & IFA_F_TENTATIVE)) { ipv6_addr_copy(&pkt_dev-> cur_in6_saddr, &ifp->addr); @@ -3262,7 +3264,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) if (!pkt_dev->running) { printk(KERN_WARNING "pktgen: interface: %s is already " - "stopped\n", pkt_dev->odev->name); + "stopped\n", pkt_dev->odevname); return -EINVAL; } @@ -3434,7 +3436,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->clone_count--; /* back out increment, OOM */ return; } - + pkt_dev->last_pkt_size = pkt_dev->skb->len; pkt_dev->allocated_skbs++; pkt_dev->clone_count = 0; /* reset counter */ } @@ -3446,12 +3448,14 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) txq = netdev_get_tx_queue(odev, queue_map); __netif_tx_lock_bh(txq); - atomic_inc(&(pkt_dev->skb->users)); - if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) + if (unlikely(netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq))) { ret = NETDEV_TX_BUSY; - else - ret = (*xmit)(pkt_dev->skb, odev); + pkt_dev->last_ok = 0; + goto unlock; + } + atomic_inc(&(pkt_dev->skb->users)); + ret = (*xmit)(pkt_dev->skb, odev); switch (ret) { case NETDEV_TX_OK: @@ -3459,12 +3463,12 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 1; pkt_dev->sofar++; pkt_dev->seq_num++; - pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; + pkt_dev->tx_bytes += pkt_dev->last_pkt_size; break; default: /* Drivers are not supposed to return other values! */ if (net_ratelimit()) pr_info("pktgen: %s xmit error: %d\n", - odev->name, ret); + pkt_dev->odevname, ret); pkt_dev->errors++; /* fallthru */ case NETDEV_TX_LOCKED: @@ -3473,6 +3477,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) atomic_dec(&(pkt_dev->skb->users)); pkt_dev->last_ok = 0; } +unlock: __netif_tx_unlock_bh(txq); /* If pkt_dev->count is zero, then run forever */ @@ -3566,13 +3571,18 @@ static int pktgen_thread_worker(void *arg) } static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname) + const char *ifname, bool exact) { struct pktgen_dev *p, *pkt_dev = NULL; - if_lock(t); + size_t len = strlen(ifname); + if_lock(t); list_for_each_entry(p, &t->if_list, list) - if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) { + if (strncmp(p->odevname, ifname, len) == 0) { + if (p->odevname[len]) { + if (exact || p->odevname[len] != '@') + continue; + } pkt_dev = p; break; } @@ -3615,6 +3625,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) { struct pktgen_dev *pkt_dev; int err; + int node = cpu_to_node(t->cpu); /* We don't allow a device to be on several threads */ @@ -3624,11 +3635,13 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return -EBUSY; } - pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); + pkt_dev = kzalloc_node(sizeof(struct pktgen_dev), GFP_KERNEL, node); if (!pkt_dev) return -ENOMEM; - pkt_dev->flows = vmalloc(MAX_CFLOWS * sizeof(struct flow_state)); + strcpy(pkt_dev->odevname, ifname); + pkt_dev->flows = vmalloc_node(MAX_CFLOWS * sizeof(struct flow_state), + node); if (pkt_dev->flows == NULL) { kfree(pkt_dev); return -ENOMEM; @@ -3690,7 +3703,8 @@ static int __init pktgen_create_thread(int cpu) struct proc_dir_entry *pe; struct task_struct *p; - t = kzalloc(sizeof(struct pktgen_thread), GFP_KERNEL); + t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL, + cpu_to_node(cpu)); if (!t) { printk(KERN_ERR "pktgen: ERROR: out of memory, can't " "create new thread.\n"); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eb42873f2a3..33148a56819 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -38,7 +38,6 @@ #include <asm/uaccess.h> #include <asm/system.h> -#include <asm/string.h> #include <linux/inet.h> #include <linux/netdevice.h> @@ -53,8 +52,7 @@ #include <net/rtnetlink.h> #include <net/net_namespace.h> -struct rtnl_link -{ +struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; }; @@ -65,6 +63,7 @@ void rtnl_lock(void) { mutex_lock(&rtnl_mutex); } +EXPORT_SYMBOL(rtnl_lock); void __rtnl_unlock(void) { @@ -76,16 +75,19 @@ void rtnl_unlock(void) /* This fellow will unlock it for us. */ netdev_run_todo(); } +EXPORT_SYMBOL(rtnl_unlock); int rtnl_trylock(void) { return mutex_trylock(&rtnl_mutex); } +EXPORT_SYMBOL(rtnl_trylock); int rtnl_is_locked(void) { return mutex_is_locked(&rtnl_mutex); } +EXPORT_SYMBOL(rtnl_is_locked); static struct rtnl_link *rtnl_msg_handlers[NPROTO]; @@ -168,7 +170,6 @@ int __rtnl_register(int protocol, int msgtype, return 0; } - EXPORT_SYMBOL_GPL(__rtnl_register); /** @@ -188,7 +189,6 @@ void rtnl_register(int protocol, int msgtype, "protocol = %d, message type = %d\n", protocol, msgtype); } - EXPORT_SYMBOL_GPL(rtnl_register); /** @@ -213,7 +213,6 @@ int rtnl_unregister(int protocol, int msgtype) return 0; } - EXPORT_SYMBOL_GPL(rtnl_unregister); /** @@ -230,7 +229,6 @@ void rtnl_unregister_all(int protocol) kfree(rtnl_msg_handlers[protocol]); rtnl_msg_handlers[protocol] = NULL; } - EXPORT_SYMBOL_GPL(rtnl_unregister_all); static LIST_HEAD(link_ops); @@ -248,12 +246,11 @@ static LIST_HEAD(link_ops); int __rtnl_link_register(struct rtnl_link_ops *ops) { if (!ops->dellink) - ops->dellink = unregister_netdevice; + ops->dellink = unregister_netdevice_queue; list_add_tail(&ops->list, &link_ops); return 0; } - EXPORT_SYMBOL_GPL(__rtnl_link_register); /** @@ -271,19 +268,18 @@ int rtnl_link_register(struct rtnl_link_ops *ops) rtnl_unlock(); return err; } - EXPORT_SYMBOL_GPL(rtnl_link_register); static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) { struct net_device *dev; -restart: + LIST_HEAD(list_kill); + for_each_netdev(net, dev) { - if (dev->rtnl_link_ops == ops) { - ops->dellink(dev); - goto restart; - } + if (dev->rtnl_link_ops == ops) + ops->dellink(dev, &list_kill); } + unregister_netdevice_many(&list_kill); } void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) @@ -309,7 +305,6 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) } list_del(&ops->list); } - EXPORT_SYMBOL_GPL(__rtnl_link_unregister); /** @@ -322,7 +317,6 @@ void rtnl_link_unregister(struct rtnl_link_ops *ops) __rtnl_link_unregister(ops); rtnl_unlock(); } - EXPORT_SYMBOL_GPL(rtnl_link_unregister); static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) @@ -427,12 +421,13 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data struct rtattr *rta; int size = RTA_LENGTH(attrlen); - rta = (struct rtattr*)skb_put(skb, RTA_ALIGN(size)); + rta = (struct rtattr *)skb_put(skb, RTA_ALIGN(size)); rta->rta_type = attrtype; rta->rta_len = size; memcpy(RTA_DATA(rta), data, attrlen); memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); } +EXPORT_SYMBOL(__rta_fill); int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) { @@ -454,6 +449,7 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) return nlmsg_unicast(rtnl, skb, pid); } +EXPORT_SYMBOL(rtnl_unicast); void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) @@ -466,6 +462,7 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, nlmsg_notify(rtnl, skb, pid, group, report, flags); } +EXPORT_SYMBOL(rtnl_notify); void rtnl_set_sk_err(struct net *net, u32 group, int error) { @@ -473,6 +470,7 @@ void rtnl_set_sk_err(struct net *net, u32 group, int error) netlink_set_err(rtnl, 0, group, error); } +EXPORT_SYMBOL(rtnl_set_sk_err); int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { @@ -501,6 +499,7 @@ nla_put_failure: nla_nest_cancel(skb, mx); return -EMSGSIZE; } +EXPORT_SYMBOL(rtnetlink_put_metrics); int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, u32 ts, u32 tsage, long expires, u32 error) @@ -520,14 +519,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); } - EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); static void set_operstate(struct net_device *dev, unsigned char transition) { unsigned char operstate = dev->operstate; - switch(transition) { + switch (transition) { case IF_OPER_UP: if ((operstate == IF_OPER_DORMANT || operstate == IF_OPER_UNKNOWN) && @@ -682,22 +680,33 @@ nla_put_failure: static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int idx; - int s_idx = cb->args[0]; + int h, s_h; + int idx = 0, s_idx; struct net_device *dev; - - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) - break; + struct hlist_head *head; + struct hlist_node *node; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI) <= 0) + goto out; cont: - idx++; + idx++; + } } - cb->args[0] = idx; +out: + cb->args[1] = idx; + cb->args[0] = h; return skb->len; } @@ -717,12 +726,27 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, }; +EXPORT_SYMBOL(ifla_policy); static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_KIND] = { .type = NLA_STRING }, [IFLA_INFO_DATA] = { .type = NLA_NESTED }, }; +struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) +{ + struct net *net; + /* Examine the link attributes and figure out which + * network namespace we are talking about. + */ + if (tb[IFLA_NET_NS_PID]) + net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + else + net = get_net(src_net); + return net; +} +EXPORT_SYMBOL(rtnl_link_get_net); + static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) { if (dev) { @@ -746,8 +770,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, int err; if (tb[IFLA_NET_NS_PID]) { - struct net *net; - net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); + struct net *net = rtnl_link_get_net(dev_net(dev), tb); if (IS_ERR(net)) { err = PTR_ERR(net); goto errout; @@ -910,9 +933,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = dev_get_by_name(net, ifname); + dev = __dev_get_by_name(net, ifname); else goto errout; @@ -921,12 +944,11 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto errout; } - if ((err = validate_linkmsg(dev, tb)) < 0) - goto errout_dev; + err = validate_linkmsg(dev, tb); + if (err < 0) + goto errout; err = do_setlink(dev, ifm, tb, ifname, 0); -errout_dev: - dev_put(dev); errout: return err; } @@ -963,12 +985,12 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!ops) return -EOPNOTSUPP; - ops->dellink(dev); + ops->dellink(dev, NULL); return 0; } -struct net_device *rtnl_create_link(struct net *net, char *ifname, - const struct rtnl_link_ops *ops, struct nlattr *tb[]) +struct net_device *rtnl_create_link(struct net *src_net, struct net *net, + char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; struct net_device *dev; @@ -976,7 +998,8 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, unsigned int real_num_queues = 1; if (ops->get_tx_queues) { - err = ops->get_tx_queues(net, tb, &num_queues, &real_num_queues); + err = ops->get_tx_queues(src_net, tb, &num_queues, + &real_num_queues); if (err) goto err; } @@ -985,16 +1008,16 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname, if (!dev) goto err; + dev_net_set(dev, net); + dev->rtnl_link_ops = ops; dev->real_num_tx_queues = real_num_queues; + if (strchr(dev->name, '%')) { err = dev_alloc_name(dev, dev->name); if (err < 0) goto err_free; } - dev_net_set(dev, net); - dev->rtnl_link_ops = ops; - if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); if (tb[IFLA_ADDRESS]) @@ -1017,6 +1040,7 @@ err_free: err: return ERR_PTR(err); } +EXPORT_SYMBOL(rtnl_create_link); static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { @@ -1050,7 +1074,8 @@ replay: else dev = NULL; - if ((err = validate_linkmsg(dev, tb)) < 0) + err = validate_linkmsg(dev, tb); + if (err < 0) return err; if (tb[IFLA_LINKINFO]) { @@ -1071,6 +1096,7 @@ replay: if (1) { struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; + struct net *dest_net; if (ops) { if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { @@ -1135,17 +1161,19 @@ replay: if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = rtnl_create_link(net, ifname, ops, tb); + dest_net = rtnl_link_get_net(net, tb); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) err = PTR_ERR(dev); else if (ops->newlink) - err = ops->newlink(dev, tb, data); + err = ops->newlink(net, dev, tb, data); else err = register_netdevice(dev); - if (err < 0 && !IS_ERR(dev)) free_netdev(dev); + + put_net(dest_net); return err; } } @@ -1154,6 +1182,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; + char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; @@ -1163,19 +1192,23 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (err < 0) return err; + if (tb[IFLA_IFNAME]) + nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); + ifm = nlmsg_data(nlh); - if (ifm->ifi_index > 0) { - dev = dev_get_by_index(net, ifm->ifi_index); - if (dev == NULL) - return -ENODEV; - } else + if (ifm->ifi_index > 0) + dev = __dev_get_by_index(net, ifm->ifi_index); + else if (tb[IFLA_IFNAME]) + dev = __dev_get_by_name(net, ifname); + else return -EINVAL; + if (dev == NULL) + return -ENODEV; + nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); - if (nskb == NULL) { - err = -ENOBUFS; - goto errout; - } + if (nskb == NULL) + return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); @@ -1183,11 +1216,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); - goto errout; - } - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); -errout: - dev_put(dev); + } else + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); return err; } @@ -1199,7 +1229,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (s_idx == 0) s_idx = 1; - for (idx=1; idx<NPROTO; idx++) { + for (idx = 1; idx < NPROTO; idx++) { int type = cb->nlh->nlmsg_type-RTM_BASE; if (idx < s_idx || idx == PF_PACKET) continue; @@ -1266,7 +1296,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) return 0; - family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family; + family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; if (family >= NPROTO) return -EAFNOSUPPORT; @@ -1299,7 +1329,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlh->nlmsg_len > min_len) { int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void*)nlh + NLMSG_ALIGN(min_len); + struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); while (RTA_OK(attr, attrlen)) { unsigned flavor = attr->rta_type; @@ -1405,14 +1435,3 @@ void __init rtnetlink_init(void) rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); } -EXPORT_SYMBOL(__rta_fill); -EXPORT_SYMBOL(rtnetlink_put_metrics); -EXPORT_SYMBOL(rtnl_lock); -EXPORT_SYMBOL(rtnl_trylock); -EXPORT_SYMBOL(rtnl_unlock); -EXPORT_SYMBOL(rtnl_is_locked); -EXPORT_SYMBOL(rtnl_unicast); -EXPORT_SYMBOL(rtnl_notify); -EXPORT_SYMBOL(rtnl_set_sk_err); -EXPORT_SYMBOL(rtnl_create_link); -EXPORT_SYMBOL(ifla_policy); diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c deleted file mode 100644 index 79687dfd695..00000000000 --- a/net/core/skb_dma_map.c +++ /dev/null @@ -1,65 +0,0 @@ -/* skb_dma_map.c: DMA mapping helpers for socket buffers. - * - * Copyright (C) David S. Miller <davem@davemloft.net> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/dma-mapping.h> -#include <linux/skbuff.h> - -int skb_dma_map(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir) -{ - struct skb_shared_info *sp = skb_shinfo(skb); - dma_addr_t map; - int i; - - map = dma_map_single(dev, skb->data, - skb_headlen(skb), dir); - if (dma_mapping_error(dev, map)) - goto out_err; - - sp->dma_head = map; - for (i = 0; i < sp->nr_frags; i++) { - skb_frag_t *fp = &sp->frags[i]; - - map = dma_map_page(dev, fp->page, fp->page_offset, - fp->size, dir); - if (dma_mapping_error(dev, map)) - goto unwind; - sp->dma_maps[i] = map; - } - - return 0; - -unwind: - while (--i >= 0) { - skb_frag_t *fp = &sp->frags[i]; - - dma_unmap_page(dev, sp->dma_maps[i], - fp->size, dir); - } - dma_unmap_single(dev, sp->dma_head, - skb_headlen(skb), dir); -out_err: - return -ENOMEM; -} -EXPORT_SYMBOL(skb_dma_map); - -void skb_dma_unmap(struct device *dev, struct sk_buff *skb, - enum dma_data_direction dir) -{ - struct skb_shared_info *sp = skb_shinfo(skb); - int i; - - dma_unmap_single(dev, sp->dma_head, - skb_headlen(skb), dir); - for (i = 0; i < sp->nr_frags; i++) { - skb_frag_t *fp = &sp->frags[i]; - - dma_unmap_page(dev, sp->dma_maps[i], - fp->size, dir); - } -} -EXPORT_SYMBOL(skb_dma_unmap); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 80a96166df3..93c4e060c91 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -93,7 +93,7 @@ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, /* Pipe buffer operations for a socket. */ -static struct pipe_buf_operations sock_pipe_buf_ops = { +static const struct pipe_buf_operations sock_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, @@ -493,6 +493,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) { struct skb_shared_info *shinfo; + if (irqs_disabled()) + return 0; + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) return 0; @@ -546,7 +549,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->protocol = old->protocol; new->mark = old->mark; - new->iif = old->iif; + new->skb_iif = old->skb_iif; __nf_copy(new, old); #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) @@ -2701,7 +2704,8 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = 1; goto done; - } + } else if (skb_gro_len(p) != pinfo->gso_size) + return -E2BIG; headroom = skb_headroom(p); nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); diff --git a/net/core/sock.c b/net/core/sock.c index 7626b6aacd6..76ff58d43e2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -274,25 +274,27 @@ static void sock_disable_timestamp(struct sock *sk, int flag) int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - int err = 0; + int err; int skb_len; + unsigned long flags; + struct sk_buff_head *list = &sk->sk_receive_queue; /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK */ if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) { - err = -ENOMEM; - goto out; + atomic_inc(&sk->sk_drops); + return -ENOMEM; } err = sk_filter(sk, skb); if (err) - goto out; + return err; if (!sk_rmem_schedule(sk, skb->truesize)) { - err = -ENOBUFS; - goto out; + atomic_inc(&sk->sk_drops); + return -ENOBUFS; } skb->dev = NULL; @@ -305,12 +307,14 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ skb_len = skb->len; - skb_queue_tail(&sk->sk_receive_queue, skb); + spin_lock_irqsave(&list->lock, flags); + skb->dropcount = atomic_read(&sk->sk_drops); + __skb_queue_tail(list, skb); + spin_unlock_irqrestore(&list->lock, flags); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, skb_len); -out: - return err; + return 0; } EXPORT_SYMBOL(sock_queue_rcv_skb); @@ -348,11 +352,18 @@ discard_and_relse: } EXPORT_SYMBOL(sk_receive_skb); +void sk_reset_txq(struct sock *sk) +{ + sk_tx_queue_clear(sk); +} +EXPORT_SYMBOL(sk_reset_txq); + struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = sk->sk_dst_cache; if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + sk_tx_queue_clear(sk); sk->sk_dst_cache = NULL; dst_release(dst); return NULL; @@ -406,17 +417,18 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) if (copy_from_user(devname, optval, optlen)) goto out; - if (devname[0] == '\0') { - index = 0; - } else { - struct net_device *dev = dev_get_by_name(net, devname); + index = 0; + if (devname[0] != '\0') { + struct net_device *dev; + rcu_read_lock(); + dev = dev_get_by_name_rcu(net, devname); + if (dev) + index = dev->ifindex; + rcu_read_unlock(); ret = -ENODEV; if (!dev) goto out; - - index = dev->ifindex; - dev_put(dev); } lock_sock(sk); @@ -702,6 +714,12 @@ set_rcvbuf: /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ + case SO_RXQ_OVFL: + if (valbool) + sock_set_flag(sk, SOCK_RXQ_OVFL); + else + sock_reset_flag(sk, SOCK_RXQ_OVFL); + break; default: ret = -ENOPROTOOPT; break; @@ -901,6 +919,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sk->sk_mark; break; + case SO_RXQ_OVFL: + v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); + break; + default: return -ENOPROTOOPT; } @@ -939,7 +961,8 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) void *sptr = nsk->sk_security; #endif BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != - sizeof(osk->sk_node) + sizeof(osk->sk_refcnt)); + sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + + sizeof(osk->sk_tx_queue_mapping)); memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); #ifdef CONFIG_SECURITY_NETWORK @@ -983,6 +1006,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; + sk_tx_queue_clear(sk); } return sk; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7db1de0497c..06124872af5 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -10,14 +10,15 @@ #include <linux/module.h> #include <linux/socket.h> #include <linux/netdevice.h> +#include <linux/ratelimit.h> #include <linux/init.h> + #include <net/ip.h> #include <net/sock.h> static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { - .ctl_name = NET_CORE_WMEM_MAX, .procname = "wmem_max", .data = &sysctl_wmem_max, .maxlen = sizeof(int), @@ -25,7 +26,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_RMEM_MAX, .procname = "rmem_max", .data = &sysctl_rmem_max, .maxlen = sizeof(int), @@ -33,7 +33,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_WMEM_DEFAULT, .procname = "wmem_default", .data = &sysctl_wmem_default, .maxlen = sizeof(int), @@ -41,7 +40,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_RMEM_DEFAULT, .procname = "rmem_default", .data = &sysctl_rmem_default, .maxlen = sizeof(int), @@ -49,7 +47,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_DEV_WEIGHT, .procname = "dev_weight", .data = &weight_p, .maxlen = sizeof(int), @@ -57,7 +54,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_MAX_BACKLOG, .procname = "netdev_max_backlog", .data = &netdev_max_backlog, .maxlen = sizeof(int), @@ -65,16 +61,13 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_MSG_COST, .procname = "message_cost", .data = &net_ratelimit_state.interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_CORE_MSG_BURST, .procname = "message_burst", .data = &net_ratelimit_state.burst, .maxlen = sizeof(int), @@ -82,7 +75,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CORE_OPTMEM_MAX, .procname = "optmem_max", .data = &sysctl_optmem_max, .maxlen = sizeof(int), @@ -91,7 +83,6 @@ static struct ctl_table net_core_table[] = { }, #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, .maxlen = sizeof(int), @@ -99,31 +90,29 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_WARNINGS, .procname = "warnings", .data = &net_msg_warn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; static struct ctl_table netns_core_table[] = { { - .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; __net_initdata struct ctl_path net_core_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "core", .ctl_name = NET_CORE, }, + { .procname = "net", }, + { .procname = "core", }, { }, }; @@ -134,7 +123,7 @@ static __net_init int sysctl_core_net_init(struct net *net) net->core.sysctl_somaxconn = SOMAXCONN; tbl = netns_core_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; diff --git a/net/core/utils.c b/net/core/utils.c index 83221aee708..838250241d2 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -24,6 +24,8 @@ #include <linux/types.h> #include <linux/percpu.h> #include <linux/init.h> +#include <linux/ratelimit.h> + #include <net/sock.h> #include <asm/byteorder.h> diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index ac1205df6c8..db9f5b39388 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1085,8 +1085,8 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb, u8 value_byte; u32 value_int; - if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->setbcncfg - || !netdev->dcbnl_ops->setbcnrp) + if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->setbcncfg || + !netdev->dcbnl_ops->setbcnrp) return ret; ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, @@ -1126,7 +1126,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = -EINVAL; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e8cf99e880b..a47a8c918ee 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -33,20 +33,20 @@ static int ccid2_debug; #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) -static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) +static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc) { int len = 0; int pipe = 0; - struct ccid2_seq *seqp = hctx->ccid2hctx_seqh; + struct ccid2_seq *seqp = hc->tx_seqh; /* there is data in the chain */ - if (seqp != hctx->ccid2hctx_seqt) { + if (seqp != hc->tx_seqt) { seqp = seqp->ccid2s_prev; len++; if (!seqp->ccid2s_acked) pipe++; - while (seqp != hctx->ccid2hctx_seqt) { + while (seqp != hc->tx_seqt) { struct ccid2_seq *prev = seqp->ccid2s_prev; len++; @@ -63,30 +63,30 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) } } - BUG_ON(pipe != hctx->ccid2hctx_pipe); + BUG_ON(pipe != hc->tx_pipe); ccid2_pr_debug("len of chain=%d\n", len); do { seqp = seqp->ccid2s_prev; len++; - } while (seqp != hctx->ccid2hctx_seqh); + } while (seqp != hc->tx_seqh); ccid2_pr_debug("total len=%d\n", len); - BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); + BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN); } #else #define ccid2_pr_debug(format, a...) -#define ccid2_hc_tx_check_sanity(hctx) +#define ccid2_hc_tx_check_sanity(hc) #endif -static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) +static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) { struct ccid2_seq *seqp; int i; /* check if we have space to preserve the pointer to the buffer */ - if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / - sizeof(struct ccid2_seq*))) + if (hc->tx_seqbufc >= (sizeof(hc->tx_seqbuf) / + sizeof(struct ccid2_seq *))) return -ENOMEM; /* allocate buffer and initialize linked list */ @@ -102,29 +102,29 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx) seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; /* This is the first allocation. Initiate the head and tail. */ - if (hctx->ccid2hctx_seqbufc == 0) - hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; + if (hc->tx_seqbufc == 0) + hc->tx_seqh = hc->tx_seqt = seqp; else { /* link the existing list with the one we just created */ - hctx->ccid2hctx_seqh->ccid2s_next = seqp; - seqp->ccid2s_prev = hctx->ccid2hctx_seqh; + hc->tx_seqh->ccid2s_next = seqp; + seqp->ccid2s_prev = hc->tx_seqh; - hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; - seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt; + hc->tx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1]; + seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hc->tx_seqt; } /* store the original pointer to the buffer so we can free it */ - hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; - hctx->ccid2hctx_seqbufc++; + hc->tx_seqbuf[hc->tx_seqbufc] = seqp; + hc->tx_seqbufc++; return 0; } static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) + if (hc->tx_pipe < hc->tx_cwnd) return 0; return 1; /* XXX CCID should dequeue when ready instead of polling */ @@ -133,7 +133,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { struct dccp_sock *dp = dccp_sk(sk); - u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2); + u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); /* * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from @@ -155,10 +155,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) +static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val) { ccid2_pr_debug("change SRTT to %ld\n", val); - hctx->ccid2hctx_srtt = val; + hc->tx_srtt = val; } static void ccid2_start_rto_timer(struct sock *sk); @@ -166,45 +166,44 @@ static void ccid2_start_rto_timer(struct sock *sk); static void ccid2_hc_tx_rto_expire(unsigned long data) { struct sock *sk = (struct sock *)data; - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); long s; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { - sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, - jiffies + HZ / 5); + sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + HZ / 5); goto out; } ccid2_pr_debug("RTO_EXPIRE\n"); - ccid2_hc_tx_check_sanity(hctx); + ccid2_hc_tx_check_sanity(hc); /* back-off timer */ - hctx->ccid2hctx_rto <<= 1; + hc->tx_rto <<= 1; - s = hctx->ccid2hctx_rto / HZ; + s = hc->tx_rto / HZ; if (s > 60) - hctx->ccid2hctx_rto = 60 * HZ; + hc->tx_rto = 60 * HZ; ccid2_start_rto_timer(sk); /* adjust pipe, cwnd etc */ - hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2; - if (hctx->ccid2hctx_ssthresh < 2) - hctx->ccid2hctx_ssthresh = 2; - hctx->ccid2hctx_cwnd = 1; - hctx->ccid2hctx_pipe = 0; + hc->tx_ssthresh = hc->tx_cwnd / 2; + if (hc->tx_ssthresh < 2) + hc->tx_ssthresh = 2; + hc->tx_cwnd = 1; + hc->tx_pipe = 0; /* clear state about stuff we sent */ - hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; - hctx->ccid2hctx_packets_acked = 0; + hc->tx_seqt = hc->tx_seqh; + hc->tx_packets_acked = 0; /* clear ack ratio state. */ - hctx->ccid2hctx_rpseq = 0; - hctx->ccid2hctx_rpdupack = -1; + hc->tx_rpseq = 0; + hc->tx_rpdupack = -1; ccid2_change_l_ack_ratio(sk, 1); - ccid2_hc_tx_check_sanity(hctx); + ccid2_hc_tx_check_sanity(hc); out: bh_unlock_sock(sk); sock_put(sk); @@ -212,42 +211,40 @@ out: static void ccid2_start_rto_timer(struct sock *sk) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto); + ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto); - BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer)); - sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer, - jiffies + hctx->ccid2hctx_rto); + BUG_ON(timer_pending(&hc->tx_rtotimer)); + sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); } static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); struct ccid2_seq *next; - hctx->ccid2hctx_pipe++; + hc->tx_pipe++; - hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss; - hctx->ccid2hctx_seqh->ccid2s_acked = 0; - hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; + hc->tx_seqh->ccid2s_seq = dp->dccps_gss; + hc->tx_seqh->ccid2s_acked = 0; + hc->tx_seqh->ccid2s_sent = jiffies; - next = hctx->ccid2hctx_seqh->ccid2s_next; + next = hc->tx_seqh->ccid2s_next; /* check if we need to alloc more space */ - if (next == hctx->ccid2hctx_seqt) { - if (ccid2_hc_tx_alloc_seq(hctx)) { + if (next == hc->tx_seqt) { + if (ccid2_hc_tx_alloc_seq(hc)) { DCCP_CRIT("packet history - out of memory!"); /* FIXME: find a more graceful way to bail out */ return; } - next = hctx->ccid2hctx_seqh->ccid2s_next; - BUG_ON(next == hctx->ccid2hctx_seqt); + next = hc->tx_seqh->ccid2s_next; + BUG_ON(next == hc->tx_seqt); } - hctx->ccid2hctx_seqh = next; + hc->tx_seqh = next; - ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, - hctx->ccid2hctx_pipe); + ccid2_pr_debug("cwnd=%d pipe=%d\n", hc->tx_cwnd, hc->tx_pipe); /* * FIXME: The code below is broken and the variables have been removed @@ -270,12 +267,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) */ #if 0 /* Ack Ratio. Need to maintain a concept of how many windows we sent */ - hctx->ccid2hctx_arsent++; + hc->tx_arsent++; /* We had an ack loss in this window... */ - if (hctx->ccid2hctx_ackloss) { - if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) { - hctx->ccid2hctx_arsent = 0; - hctx->ccid2hctx_ackloss = 0; + if (hc->tx_ackloss) { + if (hc->tx_arsent >= hc->tx_cwnd) { + hc->tx_arsent = 0; + hc->tx_ackloss = 0; } } else { /* No acks lost up to now... */ @@ -285,28 +282,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio - dp->dccps_l_ack_ratio; - denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom; + denom = hc->tx_cwnd * hc->tx_cwnd / denom; - if (hctx->ccid2hctx_arsent >= denom) { + if (hc->tx_arsent >= denom) { ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1); - hctx->ccid2hctx_arsent = 0; + hc->tx_arsent = 0; } } else { /* we can't increase ack ratio further [1] */ - hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/ + hc->tx_arsent = 0; /* or maybe set it to cwnd*/ } } #endif /* setup RTO timer */ - if (!timer_pending(&hctx->ccid2hctx_rtotimer)) + if (!timer_pending(&hc->tx_rtotimer)) ccid2_start_rto_timer(sk); #ifdef CONFIG_IP_DCCP_CCID2_DEBUG do { - struct ccid2_seq *seqp = hctx->ccid2hctx_seqt; + struct ccid2_seq *seqp = hc->tx_seqt; - while (seqp != hctx->ccid2hctx_seqh) { + while (seqp != hc->tx_seqh) { ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", (unsigned long long)seqp->ccid2s_seq, seqp->ccid2s_acked, seqp->ccid2s_sent); @@ -314,7 +311,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) } } while (0); ccid2_pr_debug("=========\n"); - ccid2_hc_tx_check_sanity(hctx); + ccid2_hc_tx_check_sanity(hc); #endif } @@ -382,9 +379,9 @@ out_invalid_option: static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer); + sk_stop_timer(sk, &hc->tx_rtotimer); ccid2_pr_debug("deleted RTO timer\n"); } @@ -392,75 +389,75 @@ static inline void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, unsigned int *maxincr) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) { - if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) { - hctx->ccid2hctx_cwnd += 1; - *maxincr -= 1; - hctx->ccid2hctx_packets_acked = 0; + if (hc->tx_cwnd < hc->tx_ssthresh) { + if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { + hc->tx_cwnd += 1; + *maxincr -= 1; + hc->tx_packets_acked = 0; } - } else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) { - hctx->ccid2hctx_cwnd += 1; - hctx->ccid2hctx_packets_acked = 0; + } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { + hc->tx_cwnd += 1; + hc->tx_packets_acked = 0; } /* update RTO */ - if (hctx->ccid2hctx_srtt == -1 || - time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { + if (hc->tx_srtt == -1 || + time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; int s; /* first measurement */ - if (hctx->ccid2hctx_srtt == -1) { + if (hc->tx_srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", r, jiffies, (unsigned long long)seqp->ccid2s_seq); - ccid2_change_srtt(hctx, r); - hctx->ccid2hctx_rttvar = r >> 1; + ccid2_change_srtt(hc, r); + hc->tx_rttvar = r >> 1; } else { /* RTTVAR */ - long tmp = hctx->ccid2hctx_srtt - r; + long tmp = hc->tx_srtt - r; long srtt; if (tmp < 0) tmp *= -1; tmp >>= 2; - hctx->ccid2hctx_rttvar *= 3; - hctx->ccid2hctx_rttvar >>= 2; - hctx->ccid2hctx_rttvar += tmp; + hc->tx_rttvar *= 3; + hc->tx_rttvar >>= 2; + hc->tx_rttvar += tmp; /* SRTT */ - srtt = hctx->ccid2hctx_srtt; + srtt = hc->tx_srtt; srtt *= 7; srtt >>= 3; tmp = r >> 3; srtt += tmp; - ccid2_change_srtt(hctx, srtt); + ccid2_change_srtt(hc, srtt); } - s = hctx->ccid2hctx_rttvar << 2; + s = hc->tx_rttvar << 2; /* clock granularity is 1 when based on jiffies */ if (!s) s = 1; - hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s; + hc->tx_rto = hc->tx_srtt + s; /* must be at least a second */ - s = hctx->ccid2hctx_rto / HZ; + s = hc->tx_rto / HZ; /* DCCP doesn't require this [but I like it cuz my code sux] */ #if 1 if (s < 1) - hctx->ccid2hctx_rto = HZ; + hc->tx_rto = HZ; #endif /* max 60 seconds */ if (s > 60) - hctx->ccid2hctx_rto = HZ * 60; + hc->tx_rto = HZ * 60; - hctx->ccid2hctx_lastrtt = jiffies; + hc->tx_lastrtt = jiffies; ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n", - hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar, - hctx->ccid2hctx_rto, HZ, r); + hc->tx_srtt, hc->tx_rttvar, + hc->tx_rto, HZ, r); } /* we got a new ack, so re-start RTO timer */ @@ -470,40 +467,40 @@ static inline void ccid2_new_ack(struct sock *sk, static void ccid2_hc_tx_dec_pipe(struct sock *sk) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - if (hctx->ccid2hctx_pipe == 0) + if (hc->tx_pipe == 0) DCCP_BUG("pipe == 0"); else - hctx->ccid2hctx_pipe--; + hc->tx_pipe--; - if (hctx->ccid2hctx_pipe == 0) + if (hc->tx_pipe == 0) ccid2_hc_tx_kill_rto_timer(sk); } static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { + if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); return; } - hctx->ccid2hctx_last_cong = jiffies; + hc->tx_last_cong = jiffies; - hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U; - hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U); + hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; + hc->tx_ssthresh = max(hc->tx_cwnd, 2U); /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd) - ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd); + if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd) + ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); } static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); u64 ackno, seqno; struct ccid2_seq *seqp; unsigned char *vector; @@ -512,7 +509,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) int done = 0; unsigned int maxincr = 0; - ccid2_hc_tx_check_sanity(hctx); + ccid2_hc_tx_check_sanity(hc); /* check reverse path congestion */ seqno = DCCP_SKB_CB(skb)->dccpd_seq; @@ -521,21 +518,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * -sorbo. */ /* need to bootstrap */ - if (hctx->ccid2hctx_rpdupack == -1) { - hctx->ccid2hctx_rpdupack = 0; - hctx->ccid2hctx_rpseq = seqno; + if (hc->tx_rpdupack == -1) { + hc->tx_rpdupack = 0; + hc->tx_rpseq = seqno; } else { /* check if packet is consecutive */ - if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1) - hctx->ccid2hctx_rpseq = seqno; + if (dccp_delta_seqno(hc->tx_rpseq, seqno) == 1) + hc->tx_rpseq = seqno; /* it's a later packet */ - else if (after48(seqno, hctx->ccid2hctx_rpseq)) { - hctx->ccid2hctx_rpdupack++; + else if (after48(seqno, hc->tx_rpseq)) { + hc->tx_rpdupack++; /* check if we got enough dupacks */ - if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) { - hctx->ccid2hctx_rpdupack = -1; /* XXX lame */ - hctx->ccid2hctx_rpseq = 0; + if (hc->tx_rpdupack >= NUMDUPACK) { + hc->tx_rpdupack = -1; /* XXX lame */ + hc->tx_rpseq = 0; ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); } @@ -544,7 +541,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* check forward path congestion */ /* still didn't send out new data packets */ - if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) + if (hc->tx_seqh == hc->tx_seqt) return; switch (DCCP_SKB_CB(skb)->dccpd_type) { @@ -556,14 +553,14 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; - if (after48(ackno, hctx->ccid2hctx_high_ack)) - hctx->ccid2hctx_high_ack = ackno; + if (after48(ackno, hc->tx_high_ack)) + hc->tx_high_ack = ackno; - seqp = hctx->ccid2hctx_seqt; + seqp = hc->tx_seqt; while (before48(seqp->ccid2s_seq, ackno)) { seqp = seqp->ccid2s_next; - if (seqp == hctx->ccid2hctx_seqh) { - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + if (seqp == hc->tx_seqh) { + seqp = hc->tx_seqh->ccid2s_prev; break; } } @@ -573,7 +570,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * packets per acknowledgement. Rounding up avoids that cwnd is not * advanced when Ack Ratio is 1 and gives a slight edge otherwise. */ - if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) + if (hc->tx_cwnd < hc->tx_ssthresh) maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); /* go through all ack vectors */ @@ -592,7 +589,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * seqnos. */ while (after48(seqp->ccid2s_seq, ackno)) { - if (seqp == hctx->ccid2hctx_seqt) { + if (seqp == hc->tx_seqt) { done = 1; break; } @@ -624,7 +621,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) (unsigned long long)seqp->ccid2s_seq); ccid2_hc_tx_dec_pipe(sk); } - if (seqp == hctx->ccid2hctx_seqt) { + if (seqp == hc->tx_seqt) { done = 1; break; } @@ -643,11 +640,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* The state about what is acked should be correct now * Check for NUMDUPACK */ - seqp = hctx->ccid2hctx_seqt; - while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) { + seqp = hc->tx_seqt; + while (before48(seqp->ccid2s_seq, hc->tx_high_ack)) { seqp = seqp->ccid2s_next; - if (seqp == hctx->ccid2hctx_seqh) { - seqp = hctx->ccid2hctx_seqh->ccid2s_prev; + if (seqp == hc->tx_seqh) { + seqp = hc->tx_seqh->ccid2s_prev; break; } } @@ -658,7 +655,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (done == NUMDUPACK) break; } - if (seqp == hctx->ccid2hctx_seqt) + if (seqp == hc->tx_seqt) break; seqp = seqp->ccid2s_prev; } @@ -681,86 +678,86 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid2_congestion_event(sk, seqp); ccid2_hc_tx_dec_pipe(sk); } - if (seqp == hctx->ccid2hctx_seqt) + if (seqp == hc->tx_seqt) break; seqp = seqp->ccid2s_prev; } - hctx->ccid2hctx_seqt = last_acked; + hc->tx_seqt = last_acked; } /* trim acked packets in tail */ - while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) { - if (!hctx->ccid2hctx_seqt->ccid2s_acked) + while (hc->tx_seqt != hc->tx_seqh) { + if (!hc->tx_seqt->ccid2s_acked) break; - hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; + hc->tx_seqt = hc->tx_seqt->ccid2s_next; } - ccid2_hc_tx_check_sanity(hctx); + ccid2_hc_tx_check_sanity(hc); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { - struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); + struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); struct dccp_sock *dp = dccp_sk(sk); u32 max_ratio; /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ - hctx->ccid2hctx_ssthresh = ~0U; + hc->tx_ssthresh = ~0U; /* * RFC 4341, 5: "The cwnd parameter is initialized to at most four * packets for new connections, following the rules from [RFC3390]". * We need to convert the bytes of RFC3390 into the packets of RFC 4341. */ - hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); + hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); /* Make sure that Ack Ratio is enabled and within bounds. */ - max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2); + max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio) dp->dccps_l_ack_ratio = max_ratio; /* XXX init ~ to window size... */ - if (ccid2_hc_tx_alloc_seq(hctx)) + if (ccid2_hc_tx_alloc_seq(hc)) return -ENOMEM; - hctx->ccid2hctx_rto = 3 * HZ; - ccid2_change_srtt(hctx, -1); - hctx->ccid2hctx_rttvar = -1; - hctx->ccid2hctx_rpdupack = -1; - hctx->ccid2hctx_last_cong = jiffies; - setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire, + hc->tx_rto = 3 * HZ; + ccid2_change_srtt(hc, -1); + hc->tx_rttvar = -1; + hc->tx_rpdupack = -1; + hc->tx_last_cong = jiffies; + setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); - ccid2_hc_tx_check_sanity(hctx); + ccid2_hc_tx_check_sanity(hc); return 0; } static void ccid2_hc_tx_exit(struct sock *sk) { - struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); int i; ccid2_hc_tx_kill_rto_timer(sk); - for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) - kfree(hctx->ccid2hctx_seqbuf[i]); - hctx->ccid2hctx_seqbufc = 0; + for (i = 0; i < hc->tx_seqbufc; i++) + kfree(hc->tx_seqbuf[i]); + hc->tx_seqbufc = 0; } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { const struct dccp_sock *dp = dccp_sk(sk); - struct ccid2_hc_rx_sock *hcrx = ccid2_hc_rx_sk(sk); + struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk); switch (DCCP_SKB_CB(skb)->dccpd_type) { case DCCP_PKT_DATA: case DCCP_PKT_DATAACK: - hcrx->ccid2hcrx_data++; - if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) { + hc->rx_data++; + if (hc->rx_data >= dp->dccps_r_ack_ratio) { dccp_send_ack(sk); - hcrx->ccid2hcrx_data = 0; + hc->rx_data = 0; } break; } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 326ac90fb90..1ec6a30103b 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -40,34 +40,34 @@ struct ccid2_seq { /** * struct ccid2_hc_tx_sock - CCID2 TX half connection - * @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 - * @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465) - * @ccid2hctx_lastrtt -time RTT was last measured - * @ccid2hctx_rpseq - last consecutive seqno - * @ccid2hctx_rpdupack - dupacks since rpseq + * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 + * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) + * @tx_lastrtt: time RTT was last measured + * @tx_rpseq: last consecutive seqno + * @tx_rpdupack: dupacks since rpseq */ struct ccid2_hc_tx_sock { - u32 ccid2hctx_cwnd; - u32 ccid2hctx_ssthresh; - u32 ccid2hctx_pipe; - u32 ccid2hctx_packets_acked; - struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; - int ccid2hctx_seqbufc; - struct ccid2_seq *ccid2hctx_seqh; - struct ccid2_seq *ccid2hctx_seqt; - long ccid2hctx_rto; - long ccid2hctx_srtt; - long ccid2hctx_rttvar; - unsigned long ccid2hctx_lastrtt; - struct timer_list ccid2hctx_rtotimer; - u64 ccid2hctx_rpseq; - int ccid2hctx_rpdupack; - unsigned long ccid2hctx_last_cong; - u64 ccid2hctx_high_ack; + u32 tx_cwnd; + u32 tx_ssthresh; + u32 tx_pipe; + u32 tx_packets_acked; + struct ccid2_seq *tx_seqbuf[CCID2_SEQBUF_MAX]; + int tx_seqbufc; + struct ccid2_seq *tx_seqh; + struct ccid2_seq *tx_seqt; + long tx_rto; + long tx_srtt; + long tx_rttvar; + unsigned long tx_lastrtt; + struct timer_list tx_rtotimer; + u64 tx_rpseq; + int tx_rpdupack; + unsigned long tx_last_cong; + u64 tx_high_ack; }; struct ccid2_hc_rx_sock { - int ccid2hcrx_data; + int rx_data; }; static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 34dcc798c45..bcd7632299f 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -64,14 +64,14 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) static void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) { - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); + enum ccid3_hc_tx_states oldstate = hc->tx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); WARN_ON(state == oldstate); - hctx->ccid3hctx_state = state; + hc->tx_state = state; } /* @@ -85,37 +85,32 @@ static void ccid3_hc_tx_set_state(struct sock *sk, */ static inline u64 rfc3390_initial_rate(struct sock *sk) { - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - const __u32 w_init = clamp_t(__u32, 4380U, - 2 * hctx->ccid3hctx_s, 4 * hctx->ccid3hctx_s); + const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); + const __u32 w_init = clamp_t(__u32, 4380U, 2 * hc->tx_s, 4 * hc->tx_s); - return scaled_div(w_init << 6, hctx->ccid3hctx_rtt); + return scaled_div(w_init << 6, hc->tx_rtt); } /* * Recalculate t_ipi and delta (should be called whenever X changes) */ -static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx) +static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) { /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */ - hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6, - hctx->ccid3hctx_x); + hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ - hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, - TFRC_OPSYS_HALF_TIME_GRAN); - - ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", - hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta, - hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6)); + hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); + ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi, + hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6)); } -static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) +static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) { - u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); + u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count); - return delta / hctx->ccid3hctx_rtt; + return delta / hc->tx_rtt; } /** @@ -130,9 +125,9 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hctx, ktime_t now) */ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) { - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - __u64 min_rate = 2 * hctx->ccid3hctx_x_recv; - const __u64 old_x = hctx->ccid3hctx_x; + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); + __u64 min_rate = 2 * hc->tx_x_recv; + const __u64 old_x = hc->tx_x; ktime_t now = stamp ? *stamp : ktime_get_real(); /* @@ -141,37 +136,31 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) * a sender is idle if it has not sent anything over a 2-RTT-period. * For consistency with X and X_recv, min_rate is also scaled by 2^6. */ - if (ccid3_hc_tx_idle_rtt(hctx, now) >= 2) { + if (ccid3_hc_tx_idle_rtt(hc, now) >= 2) { min_rate = rfc3390_initial_rate(sk); - min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv); + min_rate = max(min_rate, 2 * hc->tx_x_recv); } - if (hctx->ccid3hctx_p > 0) { + if (hc->tx_p > 0) { - hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6, - min_rate); - hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, - (((__u64)hctx->ccid3hctx_s) << 6) / - TFRC_T_MBI); + hc->tx_x = min(((__u64)hc->tx_x_calc) << 6, min_rate); + hc->tx_x = max(hc->tx_x, (((__u64)hc->tx_s) << 6) / TFRC_T_MBI); - } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) - - (s64)hctx->ccid3hctx_rtt >= 0) { + } else if (ktime_us_delta(now, hc->tx_t_ld) - (s64)hc->tx_rtt >= 0) { - hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate); - hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, - scaled_div(((__u64)hctx->ccid3hctx_s) << 6, - hctx->ccid3hctx_rtt)); - hctx->ccid3hctx_t_ld = now; + hc->tx_x = min(2 * hc->tx_x, min_rate); + hc->tx_x = max(hc->tx_x, + scaled_div(((__u64)hc->tx_s) << 6, hc->tx_rtt)); + hc->tx_t_ld = now; } - if (hctx->ccid3hctx_x != old_x) { + if (hc->tx_x != old_x) { ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " "X_recv=%u\n", (unsigned)(old_x >> 6), - (unsigned)(hctx->ccid3hctx_x >> 6), - hctx->ccid3hctx_x_calc, - (unsigned)(hctx->ccid3hctx_x_recv >> 6)); + (unsigned)(hc->tx_x >> 6), hc->tx_x_calc, + (unsigned)(hc->tx_x_recv >> 6)); - ccid3_update_send_interval(hctx); + ccid3_update_send_interval(hc); } } @@ -179,37 +168,37 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) * @len: DCCP packet payload size in bytes */ -static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) +static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hc, int len) { - const u16 old_s = hctx->ccid3hctx_s; + const u16 old_s = hc->tx_s; - hctx->ccid3hctx_s = tfrc_ewma(hctx->ccid3hctx_s, len, 9); + hc->tx_s = tfrc_ewma(hc->tx_s, len, 9); - if (hctx->ccid3hctx_s != old_s) - ccid3_update_send_interval(hctx); + if (hc->tx_s != old_s) + ccid3_update_send_interval(hc); } /* * Update Window Counter using the algorithm from [RFC 4342, 8.1]. * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt(). */ -static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, +static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc, ktime_t now) { - u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count), - quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt; + u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count), + quarter_rtts = (4 * delta) / hc->tx_rtt; if (quarter_rtts > 0) { - hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U); - hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ + hc->tx_t_last_win_count = now; + hc->tx_last_win_count += min(quarter_rtts, 5U); + hc->tx_last_win_count &= 0xF; /* mod 16 */ } } static void ccid3_hc_tx_no_feedback_timer(unsigned long data) { struct sock *sk = (struct sock *)data; - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -220,24 +209,23 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) } ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, - ccid3_tx_state_name(hctx->ccid3hctx_state)); + ccid3_tx_state_name(hc->tx_state)); - if (hctx->ccid3hctx_state == TFRC_SSTATE_FBACK) + if (hc->tx_state == TFRC_SSTATE_FBACK) ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); - else if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) + else if (hc->tx_state != TFRC_SSTATE_NO_FBACK) goto out; /* * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 */ - if (hctx->ccid3hctx_t_rto == 0 || /* no feedback received yet */ - hctx->ccid3hctx_p == 0) { + if (hc->tx_t_rto == 0 || /* no feedback received yet */ + hc->tx_p == 0) { /* halve send rate directly */ - hctx->ccid3hctx_x = max(hctx->ccid3hctx_x / 2, - (((__u64)hctx->ccid3hctx_s) << 6) / - TFRC_T_MBI); - ccid3_update_send_interval(hctx); + hc->tx_x = max(hc->tx_x / 2, + (((__u64)hc->tx_s) << 6) / TFRC_T_MBI); + ccid3_update_send_interval(hc); } else { /* * Modify the cached value of X_recv @@ -249,33 +237,32 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * * Note that X_recv is scaled by 2^6 while X_calc is not */ - BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc); + BUG_ON(hc->tx_p && !hc->tx_x_calc); - if (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5)) - hctx->ccid3hctx_x_recv = - max(hctx->ccid3hctx_x_recv / 2, - (((__u64)hctx->ccid3hctx_s) << 6) / - (2 * TFRC_T_MBI)); + if (hc->tx_x_calc > (hc->tx_x_recv >> 5)) + hc->tx_x_recv = + max(hc->tx_x_recv / 2, + (((__u64)hc->tx_s) << 6) / (2*TFRC_T_MBI)); else { - hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; - hctx->ccid3hctx_x_recv <<= 4; + hc->tx_x_recv = hc->tx_x_calc; + hc->tx_x_recv <<= 4; } ccid3_hc_tx_update_x(sk, NULL); } ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n", - (unsigned long long)hctx->ccid3hctx_x); + (unsigned long long)hc->tx_x); /* * Set new timeout for the nofeedback timer. * See comments in packet_recv() regarding the value of t_RTO. */ - if (unlikely(hctx->ccid3hctx_t_rto == 0)) /* no feedback yet */ + if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ t_nfb = TFRC_INITIAL_TIMEOUT; else - t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); + t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); restart_timer: - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + sk_reset_timer(sk, &hc->tx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); out: bh_unlock_sock(sk); @@ -291,7 +278,7 @@ out: static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); ktime_t now = ktime_get_real(); s64 delay; @@ -303,18 +290,17 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) if (unlikely(skb->len == 0)) return -EBADMSG; - switch (hctx->ccid3hctx_state) { + switch (hc->tx_state) { case TFRC_SSTATE_NO_SENT: - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, - (jiffies + - usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); - hctx->ccid3hctx_last_win_count = 0; - hctx->ccid3hctx_t_last_win_count = now; + sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); + hc->tx_last_win_count = 0; + hc->tx_t_last_win_count = now; /* Set t_0 for initial packet */ - hctx->ccid3hctx_t_nom = now; + hc->tx_t_nom = now; - hctx->ccid3hctx_s = skb->len; + hc->tx_s = skb->len; /* * Use initial RTT sample when available: recommended by erratum @@ -323,9 +309,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) */ if (dp->dccps_syn_rtt) { ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); - hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; - hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = now; + hc->tx_rtt = dp->dccps_syn_rtt; + hc->tx_x = rfc3390_initial_rate(sk); + hc->tx_t_ld = now; } else { /* * Sender does not have RTT sample: @@ -333,17 +319,17 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) * is needed in several parts (e.g. window counter); * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. */ - hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT; - hctx->ccid3hctx_x = hctx->ccid3hctx_s; - hctx->ccid3hctx_x <<= 6; + hc->tx_rtt = DCCP_FALLBACK_RTT; + hc->tx_x = hc->tx_s; + hc->tx_x <<= 6; } - ccid3_update_send_interval(hctx); + ccid3_update_send_interval(hc); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); break; case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: - delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now); + delay = ktime_us_delta(hc->tx_t_nom, now); ccid3_pr_debug("delay=%ld\n", (long)delay); /* * Scheduling of packet transmissions [RFC 3448, 4.6] @@ -353,10 +339,10 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) * else * // send the packet in (t_nom - t_now) milliseconds. */ - if (delay - (s64)hctx->ccid3hctx_delta >= 1000) + if (delay - (s64)hc->tx_delta >= 1000) return (u32)delay / 1000L; - ccid3_hc_tx_update_win_count(hctx, now); + ccid3_hc_tx_update_win_count(hc, now); break; case TFRC_SSTATE_TERM: DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); @@ -365,28 +351,27 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) /* prepare to send now (add options etc.) */ dp->dccps_hc_tx_insert_options = 1; - DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + DCCP_SKB_CB(skb)->dccpd_ccval = hc->tx_last_win_count; /* set the nominal send time for the next following packet */ - hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, - hctx->ccid3hctx_t_ipi); + hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi); return 0; } static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len) { - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); - ccid3_hc_tx_update_s(hctx, len); + ccid3_hc_tx_update_s(hc, len); - if (tfrc_tx_hist_add(&hctx->ccid3hctx_hist, dccp_sk(sk)->dccps_gss)) + if (tfrc_tx_hist_add(&hc->tx_hist, dccp_sk(sk)->dccps_gss)) DCCP_CRIT("packet history - out of memory!"); } static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; ktime_t now; unsigned long t_nfb; @@ -397,15 +382,15 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) return; /* ... and only in the established state */ - if (hctx->ccid3hctx_state != TFRC_SSTATE_FBACK && - hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) + if (hc->tx_state != TFRC_SSTATE_FBACK && + hc->tx_state != TFRC_SSTATE_NO_FBACK) return; - opt_recv = &hctx->ccid3hctx_options_received; + opt_recv = &hc->tx_options_received; now = ktime_get_real(); /* Estimate RTT from history if ACK number is valid */ - r_sample = tfrc_tx_hist_rtt(hctx->ccid3hctx_hist, + r_sample = tfrc_tx_hist_rtt(hc->tx_hist, DCCP_SKB_CB(skb)->dccpd_ack_seq, now); if (r_sample == 0) { DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk, @@ -415,37 +400,37 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update receive rate in units of 64 * bytes/second */ - hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate; - hctx->ccid3hctx_x_recv <<= 6; + hc->tx_x_recv = opt_recv->ccid3or_receive_rate; + hc->tx_x_recv <<= 6; /* Update loss event rate (which is scaled by 1e6) */ pinv = opt_recv->ccid3or_loss_event_rate; if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */ - hctx->ccid3hctx_p = 0; + hc->tx_p = 0; else /* can not exceed 100% */ - hctx->ccid3hctx_p = scaled_div(1, pinv); + hc->tx_p = scaled_div(1, pinv); /* * Validate new RTT sample and update moving average */ r_sample = dccp_sample_rtt(sk, r_sample); - hctx->ccid3hctx_rtt = tfrc_ewma(hctx->ccid3hctx_rtt, r_sample, 9); + hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); /* * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 */ - if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + if (hc->tx_state == TFRC_SSTATE_NO_FBACK) { ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); - if (hctx->ccid3hctx_t_rto == 0) { + if (hc->tx_t_rto == 0) { /* * Initial feedback packet: Larger Initial Windows (4.2) */ - hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = now; + hc->tx_x = rfc3390_initial_rate(sk); + hc->tx_t_ld = now; - ccid3_update_send_interval(hctx); + ccid3_update_send_interval(hc); goto done_computing_x; - } else if (hctx->ccid3hctx_p == 0) { + } else if (hc->tx_p == 0) { /* * First feedback after nofeedback timer expiry (4.3) */ @@ -454,25 +439,20 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update sending rate (step 4 of [RFC 3448, 4.3]) */ - if (hctx->ccid3hctx_p > 0) - hctx->ccid3hctx_x_calc = - tfrc_calc_x(hctx->ccid3hctx_s, - hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p); + if (hc->tx_p > 0) + hc->tx_x_calc = tfrc_calc_x(hc->tx_s, hc->tx_rtt, hc->tx_p); ccid3_hc_tx_update_x(sk, &now); done_computing_x: ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " "p=%u, X_calc=%u, X_recv=%u, X=%u\n", - dccp_role(sk), - sk, hctx->ccid3hctx_rtt, r_sample, - hctx->ccid3hctx_s, hctx->ccid3hctx_p, - hctx->ccid3hctx_x_calc, - (unsigned)(hctx->ccid3hctx_x_recv >> 6), - (unsigned)(hctx->ccid3hctx_x >> 6)); + dccp_role(sk), sk, hc->tx_rtt, r_sample, + hc->tx_s, hc->tx_p, hc->tx_x_calc, + (unsigned)(hc->tx_x_recv >> 6), + (unsigned)(hc->tx_x >> 6)); /* unschedule no feedback timer */ - sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + sk_stop_timer(sk, &hc->tx_no_feedback_timer); /* * As we have calculated new ipi, delta, t_nom it is possible @@ -486,21 +466,19 @@ done_computing_x: * This can help avoid triggering the nofeedback timer too * often ('spinning') on LANs with small RTTs. */ - hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, - (CONFIG_IP_DCCP_CCID3_RTO * - (USEC_PER_SEC / 1000))); + hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * + (USEC_PER_SEC / 1000))); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) */ - t_nfb = max(hctx->ccid3hctx_t_rto, 2 * hctx->ccid3hctx_t_ipi); + t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); ccid3_pr_debug("%s(%p), Scheduled no feedback timer to " "expire in %lu jiffies (%luus)\n", - dccp_role(sk), - sk, usecs_to_jiffies(t_nfb), t_nfb); + dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb); - sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + sk_reset_timer(sk, &hc->tx_no_feedback_timer, jiffies + usecs_to_jiffies(t_nfb)); } @@ -510,11 +488,11 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, { int rc = 0; const struct dccp_sock *dp = dccp_sk(sk); - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; __be32 opt_val; - opt_recv = &hctx->ccid3hctx_options_received; + opt_recv = &hc->tx_options_received; if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { opt_recv->ccid3or_seqno = dp->dccps_gsr; @@ -568,56 +546,55 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) { - struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid); + struct ccid3_hc_tx_sock *hc = ccid_priv(ccid); - hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; - hctx->ccid3hctx_hist = NULL; - setup_timer(&hctx->ccid3hctx_no_feedback_timer, + hc->tx_state = TFRC_SSTATE_NO_SENT; + hc->tx_hist = NULL; + setup_timer(&hc->tx_no_feedback_timer, ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); - return 0; } static void ccid3_hc_tx_exit(struct sock *sk) { - struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); - sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + sk_stop_timer(sk, &hc->tx_no_feedback_timer); - tfrc_tx_hist_purge(&hctx->ccid3hctx_hist); + tfrc_tx_hist_purge(&hc->tx_hist); } static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) { - struct ccid3_hc_tx_sock *hctx; + struct ccid3_hc_tx_sock *hc; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - hctx = ccid3_hc_tx_sk(sk); - info->tcpi_rto = hctx->ccid3hctx_t_rto; - info->tcpi_rtt = hctx->ccid3hctx_rtt; + hc = ccid3_hc_tx_sk(sk); + info->tcpi_rto = hc->tx_t_rto; + info->tcpi_rtt = hc->tx_rtt; } static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_tx_sock *hctx; + const struct ccid3_hc_tx_sock *hc; const void *val; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return -EINVAL; - hctx = ccid3_hc_tx_sk(sk); + hc = ccid3_hc_tx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_TX_INFO: - if (len < sizeof(hctx->ccid3hctx_tfrc)) + if (len < sizeof(hc->tx_tfrc)) return -EINVAL; - len = sizeof(hctx->ccid3hctx_tfrc); - val = &hctx->ccid3hctx_tfrc; + len = sizeof(hc->tx_tfrc); + val = &hc->tx_tfrc; break; default: return -ENOPROTOOPT; @@ -657,34 +634,34 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) static void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) { - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; + struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); + enum ccid3_hc_rx_states oldstate = hc->rx_state; ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); WARN_ON(state == oldstate); - hcrx->ccid3hcrx_state = state; + hc->rx_state = state; } static void ccid3_hc_rx_send_feedback(struct sock *sk, const struct sk_buff *skb, enum ccid3_fback_type fbtype) { - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); ktime_t now; s64 delta = 0; - if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM)) + if (unlikely(hc->rx_state == TFRC_RSTATE_TERM)) return; now = ktime_get_real(); switch (fbtype) { case CCID3_FBACK_INITIAL: - hcrx->ccid3hcrx_x_recv = 0; - hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ + hc->rx_x_recv = 0; + hc->rx_pinv = ~0U; /* see RFC 4342, 8.5 */ break; case CCID3_FBACK_PARAM_CHANGE: /* @@ -697,27 +674,26 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, * the number of bytes since last feedback. * This is a safe fallback, since X is bounded above by X_calc. */ - if (hcrx->ccid3hcrx_x_recv > 0) + if (hc->rx_x_recv > 0) break; /* fall through */ case CCID3_FBACK_PERIODIC: - delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback); + delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback); if (delta <= 0) DCCP_BUG("delta (%ld) <= 0", (long)delta); else - hcrx->ccid3hcrx_x_recv = - scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); + hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); break; default: return; } ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, - hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv); + hc->rx_x_recv, hc->rx_pinv); - hcrx->ccid3hcrx_tstamp_last_feedback = now; - hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval; - hcrx->ccid3hcrx_bytes_recv = 0; + hc->rx_tstamp_last_feedback = now; + hc->rx_last_counter = dccp_hdr(skb)->dccph_ccval; + hc->rx_bytes_recv = 0; dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); @@ -725,19 +701,19 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) { - const struct ccid3_hc_rx_sock *hcrx; + const struct ccid3_hc_rx_sock *hc; __be32 x_recv, pinv; if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; - hcrx = ccid3_hc_rx_sk(sk); + hc = ccid3_hc_rx_sk(sk); if (dccp_packet_without_ack(skb)) return 0; - x_recv = htonl(hcrx->ccid3hcrx_x_recv); - pinv = htonl(hcrx->ccid3hcrx_pinv); + x_recv = htonl(hc->rx_x_recv); + pinv = htonl(hc->rx_pinv); if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)) || @@ -760,26 +736,26 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) */ static u32 ccid3_first_li(struct sock *sk) { - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); u32 x_recv, p, delta; u64 fval; - if (hcrx->ccid3hcrx_rtt == 0) { + if (hc->rx_rtt == 0) { DCCP_WARN("No RTT estimate available, using fallback RTT\n"); - hcrx->ccid3hcrx_rtt = DCCP_FALLBACK_RTT; + hc->rx_rtt = DCCP_FALLBACK_RTT; } - delta = ktime_to_us(net_timedelta(hcrx->ccid3hcrx_tstamp_last_feedback)); - x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); + delta = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback)); + x_recv = scaled_div32(hc->rx_bytes_recv, delta); if (x_recv == 0) { /* would also trigger divide-by-zero */ DCCP_WARN("X_recv==0\n"); - if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { + if ((x_recv = hc->rx_x_recv) == 0) { DCCP_BUG("stored value of X_recv is zero"); return ~0U; } } - fval = scaled_div(hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_rtt); + fval = scaled_div(hc->rx_s, hc->rx_rtt); fval = scaled_div32(fval, x_recv); p = tfrc_calc_x_reverse_lookup(fval); @@ -791,19 +767,19 @@ static u32 ccid3_first_li(struct sock *sk) static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; const bool is_data_packet = dccp_data_packet(skb); - if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) { + if (unlikely(hc->rx_state == TFRC_RSTATE_NO_DATA)) { if (is_data_packet) { const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; do_feedback = CCID3_FBACK_INITIAL; ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); - hcrx->ccid3hcrx_s = payload; + hc->rx_s = payload; /* - * Not necessary to update ccid3hcrx_bytes_recv here, + * Not necessary to update rx_bytes_recv here, * since X_recv = 0 for the first feedback packet (cf. * RFC 3448, 6.3) -- gerrit */ @@ -811,7 +787,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) goto update_records; } - if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb)) + if (tfrc_rx_hist_duplicate(&hc->rx_hist, skb)) return; /* done receiving */ if (is_data_packet) { @@ -819,20 +795,20 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* * Update moving-average of s and the sum of received payload bytes */ - hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9); - hcrx->ccid3hcrx_bytes_recv += payload; + hc->rx_s = tfrc_ewma(hc->rx_s, payload, 9); + hc->rx_bytes_recv += payload; } /* * Perform loss detection and handle pending losses */ - if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist, + if (tfrc_rx_handle_loss(&hc->rx_hist, &hc->rx_li_hist, skb, ndp, ccid3_first_li, sk)) { do_feedback = CCID3_FBACK_PARAM_CHANGE; goto done_receiving; } - if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist)) + if (tfrc_rx_hist_loss_pending(&hc->rx_hist)) return; /* done receiving */ /* @@ -841,17 +817,17 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(!is_data_packet)) goto update_records; - if (!tfrc_lh_is_initialised(&hcrx->ccid3hcrx_li_hist)) { - const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb); + if (!tfrc_lh_is_initialised(&hc->rx_li_hist)) { + const u32 sample = tfrc_rx_hist_sample_rtt(&hc->rx_hist, skb); /* * Empty loss history: no loss so far, hence p stays 0. * Sample RTT values, since an RTT estimate is required for the * computation of p when the first loss occurs; RFC 3448, 6.3.1. */ if (sample != 0) - hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9); + hc->rx_rtt = tfrc_ewma(hc->rx_rtt, sample, 9); - } else if (tfrc_lh_update_i_mean(&hcrx->ccid3hcrx_li_hist, skb)) { + } else if (tfrc_lh_update_i_mean(&hc->rx_li_hist, skb)) { /* * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean * has decreased (resp. p has increased), send feedback now. @@ -862,11 +838,11 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 */ - if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3) + if (SUB16(dccp_hdr(skb)->dccph_ccval, hc->rx_last_counter) > 3) do_feedback = CCID3_FBACK_PERIODIC; update_records: - tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp); + tfrc_rx_hist_add_packet(&hc->rx_hist, skb, ndp); done_receiving: if (do_feedback) @@ -875,41 +851,41 @@ done_receiving: static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) { - struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid); + struct ccid3_hc_rx_sock *hc = ccid_priv(ccid); - hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; - tfrc_lh_init(&hcrx->ccid3hcrx_li_hist); - return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist); + hc->rx_state = TFRC_RSTATE_NO_DATA; + tfrc_lh_init(&hc->rx_li_hist); + return tfrc_rx_hist_alloc(&hc->rx_hist); } static void ccid3_hc_rx_exit(struct sock *sk) { - struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); - tfrc_rx_hist_purge(&hcrx->ccid3hcrx_hist); - tfrc_lh_cleanup(&hcrx->ccid3hcrx_li_hist); + tfrc_rx_hist_purge(&hc->rx_hist); + tfrc_lh_cleanup(&hc->rx_li_hist); } static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) { - const struct ccid3_hc_rx_sock *hcrx; + const struct ccid3_hc_rx_sock *hc; /* Listen socks doesn't have a private CCID block */ if (sk->sk_state == DCCP_LISTEN) return; - hcrx = ccid3_hc_rx_sk(sk); - info->tcpi_ca_state = hcrx->ccid3hcrx_state; + hc = ccid3_hc_rx_sk(sk); + info->tcpi_ca_state = hc->rx_state; info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - info->tcpi_rcv_rtt = hcrx->ccid3hcrx_rtt; + info->tcpi_rcv_rtt = hc->rx_rtt; } static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, u32 __user *optval, int __user *optlen) { - const struct ccid3_hc_rx_sock *hcrx; + const struct ccid3_hc_rx_sock *hc; struct tfrc_rx_info rx_info; const void *val; @@ -917,15 +893,15 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, if (sk->sk_state == DCCP_LISTEN) return -EINVAL; - hcrx = ccid3_hc_rx_sk(sk); + hc = ccid3_hc_rx_sk(sk); switch (optname) { case DCCP_SOCKOPT_CCID_RX_INFO: if (len < sizeof(rx_info)) return -EINVAL; - rx_info.tfrcrx_x_recv = hcrx->ccid3hcrx_x_recv; - rx_info.tfrcrx_rtt = hcrx->ccid3hcrx_rtt; - rx_info.tfrcrx_p = hcrx->ccid3hcrx_pinv == 0 ? ~0U : - scaled_div(1, hcrx->ccid3hcrx_pinv); + rx_info.tfrcrx_x_recv = hc->rx_x_recv; + rx_info.tfrcrx_rtt = hc->rx_rtt; + rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : + scaled_div(1, hc->rx_pinv); len = sizeof(rx_info); val = &rx_info; break; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index e5a24414384..03263577665 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -75,44 +75,44 @@ enum ccid3_hc_tx_states { /** * struct ccid3_hc_tx_sock - CCID3 sender half-connection socket - * @ccid3hctx_x - Current sending rate in 64 * bytes per second - * @ccid3hctx_x_recv - Receive rate in 64 * bytes per second - * @ccid3hctx_x_calc - Calculated rate in bytes per second - * @ccid3hctx_rtt - Estimate of current round trip time in usecs - * @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 - * @ccid3hctx_s - Packet size in bytes - * @ccid3hctx_t_rto - Nofeedback Timer setting in usecs - * @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs - * @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states - * @ccid3hctx_last_win_count - Last window counter sent - * @ccid3hctx_t_last_win_count - Timestamp of earliest packet - * with last_win_count value sent - * @ccid3hctx_no_feedback_timer - Handle to no feedback timer - * @ccid3hctx_t_ld - Time last doubled during slow start - * @ccid3hctx_t_nom - Nominal send time of next packet - * @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs - * @ccid3hctx_hist - Packet history - * @ccid3hctx_options_received - Parsed set of retrieved options + * @tx_x: Current sending rate in 64 * bytes per second + * @tx_x_recv: Receive rate in 64 * bytes per second + * @tx_x_calc: Calculated rate in bytes per second + * @tx_rtt: Estimate of current round trip time in usecs + * @tx_p: Current loss event rate (0-1) scaled by 1000000 + * @tx_s: Packet size in bytes + * @tx_t_rto: Nofeedback Timer setting in usecs + * @tx_t_ipi: Interpacket (send) interval (RFC 3448, 4.6) in usecs + * @tx_state: Sender state, one of %ccid3_hc_tx_states + * @tx_last_win_count: Last window counter sent + * @tx_t_last_win_count: Timestamp of earliest packet + * with last_win_count value sent + * @tx_no_feedback_timer: Handle to no feedback timer + * @tx_t_ld: Time last doubled during slow start + * @tx_t_nom: Nominal send time of next packet + * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs + * @tx_hist: Packet history + * @tx_options_received: Parsed set of retrieved options */ struct ccid3_hc_tx_sock { - struct tfrc_tx_info ccid3hctx_tfrc; -#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x -#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv -#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc -#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt -#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p -#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto -#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi - u16 ccid3hctx_s; - enum ccid3_hc_tx_states ccid3hctx_state:8; - u8 ccid3hctx_last_win_count; - ktime_t ccid3hctx_t_last_win_count; - struct timer_list ccid3hctx_no_feedback_timer; - ktime_t ccid3hctx_t_ld; - ktime_t ccid3hctx_t_nom; - u32 ccid3hctx_delta; - struct tfrc_tx_hist_entry *ccid3hctx_hist; - struct ccid3_options_received ccid3hctx_options_received; + struct tfrc_tx_info tx_tfrc; +#define tx_x tx_tfrc.tfrctx_x +#define tx_x_recv tx_tfrc.tfrctx_x_recv +#define tx_x_calc tx_tfrc.tfrctx_x_calc +#define tx_rtt tx_tfrc.tfrctx_rtt +#define tx_p tx_tfrc.tfrctx_p +#define tx_t_rto tx_tfrc.tfrctx_rto +#define tx_t_ipi tx_tfrc.tfrctx_ipi + u16 tx_s; + enum ccid3_hc_tx_states tx_state:8; + u8 tx_last_win_count; + ktime_t tx_t_last_win_count; + struct timer_list tx_no_feedback_timer; + ktime_t tx_t_ld; + ktime_t tx_t_nom; + u32 tx_delta; + struct tfrc_tx_hist_entry *tx_hist; + struct ccid3_options_received tx_options_received; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) @@ -131,32 +131,32 @@ enum ccid3_hc_rx_states { /** * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket - * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) - * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) - * @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4) - * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1) - * @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states - * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes - * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3) - * @ccid3hcrx_rtt - Receiver estimate of RTT - * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent - * @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent - * @ccid3hcrx_hist - Packet history (loss detection + RTT sampling) - * @ccid3hcrx_li_hist - Loss Interval database - * @ccid3hcrx_s - Received packet size in bytes - * @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5) + * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3) + * @rx_rtt: Receiver estimate of rtt (non-standard) + * @rx_p: Current loss event rate (RFC 3448 5.4) + * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) + * @rx_state: Receiver state, one of %ccid3_hc_rx_states + * @rx_bytes_recv: Total sum of DCCP payload bytes + * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) + * @rx_rtt: Receiver estimate of RTT + * @rx_tstamp_last_feedback: Time at which last feedback was sent + * @rx_tstamp_last_ack: Time at which last feedback was sent + * @rx_hist: Packet history (loss detection + RTT sampling) + * @rx_li_hist: Loss Interval database + * @rx_s: Received packet size in bytes + * @rx_pinv: Inverse of Loss Event Rate (RFC 4342, sec. 8.5) */ struct ccid3_hc_rx_sock { - u8 ccid3hcrx_last_counter:4; - enum ccid3_hc_rx_states ccid3hcrx_state:8; - u32 ccid3hcrx_bytes_recv; - u32 ccid3hcrx_x_recv; - u32 ccid3hcrx_rtt; - ktime_t ccid3hcrx_tstamp_last_feedback; - struct tfrc_rx_hist ccid3hcrx_hist; - struct tfrc_loss_hist ccid3hcrx_li_hist; - u16 ccid3hcrx_s; -#define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean + u8 rx_last_counter:4; + enum ccid3_hc_rx_states rx_state:8; + u32 rx_bytes_recv; + u32 rx_x_recv; + u32 rx_rtt; + ktime_t rx_tstamp_last_feedback; + struct tfrc_rx_hist rx_hist; + struct tfrc_loss_hist rx_li_hist; + u16 rx_s; +#define rx_pinv rx_li_hist.i_mean }; static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7302e1498d4..dad7bc4878e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -62,10 +62,10 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->saddr, + tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - inet->sport, usin->sin_port, sk, 1); + inet->inet_sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; @@ -77,12 +77,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (inet->opt == NULL || !inet->opt->srr) daddr = rt->rt_dst; - if (inet->saddr == 0) - inet->saddr = rt->rt_src; - inet->rcv_saddr = inet->saddr; + if (inet->inet_saddr == 0) + inet->inet_saddr = rt->rt_src; + inet->inet_rcv_saddr = inet->inet_saddr; - inet->dport = usin->sin_port; - inet->daddr = daddr; + inet->inet_dport = usin->sin_port; + inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) @@ -98,17 +98,19 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport, - sk); + err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, + inet->inet_dport, sk); if (err != 0) goto failure; /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); - dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr, - inet->sport, inet->dport); - inet->id = dp->dccps_iss ^ jiffies; + dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); + inet->inet_id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; @@ -123,7 +125,7 @@ failure: dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; - inet->dport = 0; + inet->inet_dport = 0; goto out; } @@ -352,7 +354,9 @@ void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb) struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr); + dh->dccph_checksum = dccp_v4_csum_finish(skb, + inet->inet_saddr, + inet->inet_daddr); } EXPORT_SYMBOL_GPL(dccp_v4_send_check); @@ -393,18 +397,18 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->daddr = ireq->rmt_addr; - newinet->rcv_saddr = ireq->loc_addr; - newinet->saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->rmt_addr; + newinet->inet_rcv_saddr = ireq->loc_addr; + newinet->inet_saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->id = jiffies; + newinet->inet_id = jiffies; dccp_sync_mss(newsk, dst_mtu(dst)); - __inet_hash_nolisten(newsk); + __inet_hash_nolisten(newsk, NULL); __inet_inherit_port(sk, newsk); return newsk; @@ -473,7 +477,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->u.dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct request_values *rv_unused) { int err = -1; struct sk_buff *skb; @@ -622,7 +627,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_iss = dccp_v4_init_sequence(skb); dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req)) + if (dccp_v4_send_response(sk, req, NULL)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); @@ -987,7 +992,6 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e48ca5d4565..baf05cf43c2 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -46,7 +46,7 @@ static void dccp_v6_hash(struct sock *sk) return; } local_bh_disable(); - __inet6_hash(sk); + __inet6_hash(sk, NULL); local_bh_enable(); } } @@ -158,8 +158,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); @@ -241,7 +241,8 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, + struct request_values *rv_unused) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -468,7 +469,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_iss = dccp_v6_init_sequence(skb); dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req)) + if (dccp_v6_send_response(sk, req, NULL)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); @@ -510,11 +511,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF), - newinet->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); - ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF), - newinet->saddr); + ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); @@ -642,9 +641,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, dccp_sync_mss(newsk, dst_mtu(dst)); - newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; + newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; + newinet->inet_rcv_saddr = LOOPBACK4_IPV6; - __inet6_hash(newsk); + __inet6_hash(newsk, NULL); __inet_inherit_port(sk, newsk); return newsk; @@ -970,12 +970,9 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; - } else { - ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), - inet->saddr); - ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), - inet->rcv_saddr); } + ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); return err; } @@ -988,7 +985,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { @@ -1021,7 +1018,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, /* set the source address */ ipv6_addr_copy(&np->saddr, saddr); - inet->rcv_saddr = LOOPBACK4_IPV6; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; __ip6_dst_store(sk, dst, NULL, NULL); @@ -1030,7 +1027,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_ext_hdr_len = (np->opt->opt_flen + np->opt->opt_nflen); - inet->dport = usin->sin6_port; + inet->inet_dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); err = inet6_hash_connect(&dccp_death_row, sk); @@ -1039,7 +1036,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, - inet->sport, inet->dport); + inet->inet_sport, + inet->inet_dport); err = dccp_connect(sk); if (err) goto late_failure; @@ -1050,7 +1048,7 @@ late_failure: dccp_set_state(sk, DCCP_CLOSED); __sk_dst_reset(sk); failure: - inet->dport = 0; + inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } @@ -1188,7 +1186,6 @@ static struct inet_protosw dccp_v6_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v6_prot, .ops = &inet6_dccp_ops, - .capability = -1, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5ca49cec95f..af226a06314 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -184,7 +184,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, * counter (backoff, monitored by dccp_response_timer). */ req->retrans++; - req->rsk_ops->rtx_syn_ack(sk, req); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); } /* Network Duplicate, discard packet */ return NULL; diff --git a/net/dccp/output.c b/net/dccp/output.c index c96119fda68..d6bb753bf6a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -99,8 +99,8 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* Build DCCP header and checksum it. */ dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_type = dcb->dccpd_type; - dh->dccph_sport = inet->sport; - dh->dccph_dport = inet->dport; + dh->dccph_sport = inet->inet_sport; + dh->dccph_dport = inet->inet_dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; dh->dccph_cscov = dp->dccps_pcslen; diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 37731da4148..dc328425fa2 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -75,26 +75,25 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { const struct inet_sock *inet = inet_sk(sk); - struct ccid3_hc_tx_sock *hctx = NULL; + struct ccid3_hc_tx_sock *hc = NULL; if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3) - hctx = ccid3_hc_tx_sk(sk); - - if (port == 0 || ntohs(inet->dport) == port || - ntohs(inet->sport) == port) { - if (hctx) - printl("%pI4:%u %pI4:%u %d %d %d %d %u " - "%llu %llu %d\n", - &inet->saddr, ntohs(inet->sport), - &inet->daddr, ntohs(inet->dport), size, - hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, - hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc, - hctx->ccid3hctx_x_recv >> 6, - hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi); + hc = ccid3_hc_tx_sk(sk); + + if (port == 0 || ntohs(inet->inet_dport) == port || + ntohs(inet->inet_sport) == port) { + if (hc) + printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n", + &inet->inet_saddr, ntohs(inet->inet_sport), + &inet->inet_daddr, ntohs(inet->inet_dport), size, + hc->tx_s, hc->tx_rtt, hc->tx_p, + hc->tx_x_calc, hc->tx_x_recv >> 6, + hc->tx_x >> 6, hc->tx_t_ipi); else printl("%pI4:%u %pI4:%u %d\n", - &inet->saddr, ntohs(inet->sport), - &inet->daddr, ntohs(inet->dport), size); + &inet->inet_saddr, ntohs(inet->inet_sport), + &inet->inet_daddr, ntohs(inet->inet_dport), + size); } jprobe_return(); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a156319fd0a..671cd1413d5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -278,7 +278,7 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_send_head = NULL; } - inet->dport = 0; + inet->inet_dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -290,7 +290,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet_csk_delack_init(sk); __sk_dst_reset(sk); - WARN_ON(inet->num && !icsk->icsk_bind_hash); + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -1060,11 +1060,12 @@ static int __init dccp_init(void) for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) ; do { - dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / + unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); - while (dccp_hashinfo.ehash_size & - (dccp_hashinfo.ehash_size - 1)) - dccp_hashinfo.ehash_size--; + + while (hash_size & (hash_size - 1)) + hash_size--; + dccp_hashinfo.ehash_mask = hash_size - 1; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); } while (!dccp_hashinfo.ehash && --ehash_order > 0); @@ -1074,7 +1075,7 @@ static int __init dccp_init(void) goto out_free_bind_bucket_cachep; } - for (i = 0; i < dccp_hashinfo.ehash_size; i++) { + for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) { INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); } @@ -1153,7 +1154,7 @@ static void __exit dccp_fini(void) get_order(dccp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))); free_pages((unsigned long)dccp_hashinfo.ehash, - get_order(dccp_hashinfo.ehash_size * + get_order((dccp_hashinfo.ehash_mask + 1) * sizeof(struct inet_ehash_bucket))); inet_ehash_locks_free(&dccp_hashinfo); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index a5a1856234e..563943822e5 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -93,13 +93,13 @@ static struct ctl_table dccp_default_table[] = { .proc_handler = proc_dointvec_ms_jiffies, }, - { .ctl_name = 0, } + { } }; static struct ctl_path dccp_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "dccp", .ctl_name = NET_DCCP, }, - { .procname = "default", .ctl_name = NET_DCCP_DEFAULT, }, + { .procname = "net", }, + { .procname = "dccp", }, + { .procname = "default", }, { } }; diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 162d1e683c3..bbfeb5eae46 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -38,7 +38,7 @@ static int dccp_write_timeout(struct sock *sk) if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { if (icsk->icsk_retransmits != 0) - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); retry_until = icsk->icsk_syn_retries ? : sysctl_dccp_request_retries; } else { @@ -63,7 +63,7 @@ static int dccp_write_timeout(struct sock *sk) Golden words :-). */ - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); } retry_until = sysctl_dccp_retries2; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7a58c87baf1..2b494fac946 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -675,11 +675,12 @@ char *dn_addr2asc(__u16 addr, char *buf) -static int dn_create(struct net *net, struct socket *sock, int protocol) +static int dn_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; switch(sock->type) { @@ -749,9 +750,9 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(saddr->sdn_flags & SDF_WILD)) { if (le16_to_cpu(saddr->sdn_nodeaddrl)) { - read_lock(&dev_base_lock); + rcu_read_lock(); ldev = NULL; - for_each_netdev(&init_net, dev) { + for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { @@ -759,7 +760,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) break; } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (ldev == NULL) return -EADDRNOTAVAIL; } @@ -1955,7 +1956,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, } if ((flags & MSG_TRYHARD) && sk->sk_dst_cache) - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); mss = scp->segsize_rem; fctype = scp->services_rem & NSP_FC_MASK; @@ -2325,7 +2326,7 @@ static const struct file_operations dn_socket_seq_fops = { }; #endif -static struct net_proto_family dn_family_ops = { +static const struct net_proto_family dn_family_ops = { .family = AF_DECnet, .create = dn_create, .owner = THIS_MODULE, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 6e1f085db06..238af093495 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -68,7 +68,7 @@ extern struct neigh_table dn_neigh_table; */ __le16 decnet_address = 0; -static DEFINE_RWLOCK(dndev_lock); +static DEFINE_SPINLOCK(dndev_lock); static struct net_device *decnet_default_device; static BLOCKING_NOTIFIER_HEAD(dnaddr_chain); @@ -89,7 +89,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "ethernet", - .ctl_name = NET_DECNET_CONF_ETHER, .up = dn_eth_up, .down = dn_eth_down, .timer3 = dn_send_brd_hello, @@ -101,7 +100,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "ipgre", - .ctl_name = NET_DECNET_CONF_GRE, .timer3 = dn_send_brd_hello, }, #if 0 @@ -112,7 +110,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 120, .name = "x25", - .ctl_name = NET_DECNET_CONF_X25, .timer3 = dn_send_ptp_hello, }, #endif @@ -124,7 +121,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "ppp", - .ctl_name = NET_DECNET_CONF_PPP, .timer3 = dn_send_brd_hello, }, #endif @@ -135,7 +131,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 120, .name = "ddcmp", - .ctl_name = NET_DECNET_CONF_DDCMP, .timer3 = dn_send_ptp_hello, }, { @@ -145,7 +140,6 @@ static struct dn_dev_parms dn_dev_list[] = { .t2 = 1, .t3 = 10, .name = "loopback", - .ctl_name = NET_DECNET_CONF_LOOPBACK, .timer3 = dn_send_brd_hello, } }; @@ -166,10 +160,6 @@ static int max_priority[] = { 127 }; /* From DECnet spec */ static int dn_forwarding_proc(ctl_table *, int, void __user *, size_t *, loff_t *); -static int dn_forwarding_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; ctl_table dn_dev_vars[5]; @@ -177,44 +167,36 @@ static struct dn_dev_sysctl_table { NULL, { { - .ctl_name = NET_DECNET_CONF_DEV_FORWARDING, .procname = "forwarding", .data = (void *)DN_DEV_PARMS_OFFSET(forwarding), .maxlen = sizeof(int), .mode = 0644, .proc_handler = dn_forwarding_proc, - .strategy = dn_forwarding_sysctl, }, { - .ctl_name = NET_DECNET_CONF_DEV_PRIORITY, .procname = "priority", .data = (void *)DN_DEV_PARMS_OFFSET(priority), .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_priority, .extra2 = &max_priority }, { - .ctl_name = NET_DECNET_CONF_DEV_T2, .procname = "t2", .data = (void *)DN_DEV_PARMS_OFFSET(t2), .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, { - .ctl_name = NET_DECNET_CONF_DEV_T3, .procname = "t3", .data = (void *)DN_DEV_PARMS_OFFSET(t3), .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t3, .extra2 = &max_t3 }, @@ -230,9 +212,9 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms * #define DN_CTL_PATH_DEV 3 struct ctl_path dn_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "decnet", .ctl_name = NET_DECNET, }, - { .procname = "conf", .ctl_name = NET_DECNET_CONF, }, + { .procname = "net", }, + { .procname = "decnet", }, + { .procname = "conf", }, { /* to be set */ }, { }, }; @@ -248,10 +230,8 @@ static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms * if (dev) { dn_ctl_path[DN_CTL_PATH_DEV].procname = dev->name; - dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = dev->ifindex; } else { dn_ctl_path[DN_CTL_PATH_DEV].procname = parms->name; - dn_ctl_path[DN_CTL_PATH_DEV].ctl_name = parms->ctl_name; } t->dn_dev_vars[0].extra1 = (void *)dev; @@ -317,44 +297,6 @@ static int dn_forwarding_proc(ctl_table *table, int write, #endif } -static int dn_forwarding_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ -#ifdef CONFIG_DECNET_ROUTER - struct net_device *dev = table->extra1; - struct dn_dev *dn_db; - int value; - - if (table->extra1 == NULL) - return -EINVAL; - - dn_db = dev->dn_ptr; - - if (newval && newlen) { - if (newlen != sizeof(int)) - return -EINVAL; - - if (get_user(value, (int __user *)newval)) - return -EFAULT; - if (value < 0) - return -EINVAL; - if (value > 2) - return -EINVAL; - - if (dn_db->parms.down) - dn_db->parms.down(dev); - dn_db->parms.forwarding = value; - if (dn_db->parms.up) - dn_db->parms.up(dev); - } - - return 0; -#else - return -EINVAL; -#endif -} - #else /* CONFIG_SYSCTL */ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) { @@ -557,7 +499,8 @@ rarok: struct net_device *dn_dev_get_default(void) { struct net_device *dev; - read_lock(&dndev_lock); + + spin_lock(&dndev_lock); dev = decnet_default_device; if (dev) { if (dev->dn_ptr) @@ -565,7 +508,8 @@ struct net_device *dn_dev_get_default(void) else dev = NULL; } - read_unlock(&dndev_lock); + spin_unlock(&dndev_lock); + return dev; } @@ -575,13 +519,15 @@ int dn_dev_set_default(struct net_device *dev, int force) int rv = -EBUSY; if (!dev->dn_ptr) return -ENODEV; - write_lock(&dndev_lock); + + spin_lock(&dndev_lock); if (force || decnet_default_device == NULL) { old = decnet_default_device; decnet_default_device = dev; rv = 0; } - write_unlock(&dndev_lock); + spin_unlock(&dndev_lock); + if (old) dev_put(old); return rv; @@ -589,26 +535,29 @@ int dn_dev_set_default(struct net_device *dev, int force) static void dn_dev_check_default(struct net_device *dev) { - write_lock(&dndev_lock); + spin_lock(&dndev_lock); if (dev == decnet_default_device) { decnet_default_device = NULL; } else { dev = NULL; } - write_unlock(&dndev_lock); + spin_unlock(&dndev_lock); + if (dev) dev_put(dev); } +/* + * Called with RTNL + */ static struct dn_dev *dn_dev_by_index(int ifindex) { struct net_device *dev; struct dn_dev *dn_dev = NULL; - dev = dev_get_by_index(&init_net, ifindex); - if (dev) { + + dev = __dev_get_by_index(&init_net, ifindex); + if (dev) dn_dev = dev->dn_ptr; - dev_put(dev); - } return dn_dev; } @@ -629,7 +578,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct dn_ifaddr *ifa, **ifap; int err = -EINVAL; - if (net != &init_net) + if (!net_eq(net, &init_net)) goto errout; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); @@ -668,7 +617,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct dn_ifaddr *ifa; int err; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); @@ -782,7 +731,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct dn_dev *dn_db; struct dn_ifaddr *ifa; - if (net != &init_net) + if (!net_eq(net, &init_net)) return 0; skip_ndevs = cb->args[0]; @@ -826,13 +775,17 @@ static int dn_dev_get_first(struct net_device *dev, __le16 *addr) struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; struct dn_ifaddr *ifa; int rv = -ENODEV; + if (dn_db == NULL) goto out; + + rtnl_lock(); ifa = dn_db->ifa_list; if (ifa != NULL) { *addr = ifa->ifa_local; rv = 0; } + rtnl_unlock(); out: return rv; } @@ -854,9 +807,7 @@ int dn_dev_bind_default(__le16 *addr) dev = dn_dev_get_default(); last_chance: if (dev) { - read_lock(&dev_base_lock); rv = dn_dev_get_first(dev, addr); - read_unlock(&dev_base_lock); dev_put(dev); if (rv == 0 || dev == init_net.loopback_dev) return rv; @@ -1321,18 +1272,18 @@ static inline int is_dn_dev(struct net_device *dev) } static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(&dev_base_lock) + __acquires(rcu) { int i; struct net_device *dev; - read_lock(&dev_base_lock); + rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; i = 1; - for_each_netdev(&init_net, dev) { + for_each_netdev_rcu(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1353,7 +1304,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) dev = net_device_entry(&init_net.dev_base_head); - for_each_netdev_continue(&init_net, dev) { + for_each_netdev_continue_rcu(&init_net, dev) { if (!is_dn_dev(dev)) continue; @@ -1364,9 +1315,9 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void dn_dev_seq_stop(struct seq_file *seq, void *v) - __releases(&dev_base_lock) + __releases(rcu) { - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static char *dn_type2asc(char type) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 27ea2e9b080..e9d48700e83 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -509,7 +509,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; if (dn_fib_check_attr(r, rta)) @@ -529,7 +529,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct rtattr **rta = arg; struct rtmsg *r = NLMSG_DATA(nlh); - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; if (dn_fib_check_attr(r, rta)) @@ -607,8 +607,8 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) ASSERT_RTNL(); /* Scan device list */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { dn_db = dev->dn_ptr; if (dn_db == NULL) continue; @@ -619,7 +619,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa) } } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (found_it == 0) { fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 57662cabaf9..a03284061a3 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -908,8 +908,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old dev_put(dev_out); goto out; } - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; if (!dn_dev_islocal(dev, oldflp->fld_src)) @@ -922,7 +922,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old dev_out = dev; break; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); if (dev_out == NULL) goto out; dev_hold(dev_out); @@ -1517,7 +1517,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void struct sk_buff *skb; struct flowi fl; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; memset(&fl, 0, sizeof(fl)); @@ -1602,7 +1602,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) int h, s_h; int idx, s_idx; - if (net != &init_net) + if (!net_eq(net, &init_net)) return 0; if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 72495f25269..7466c546f28 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -33,7 +33,7 @@ #include <net/dn_dev.h> #include <net/dn_route.h> -static struct fib_rules_ops dn_fib_rules_ops; +static struct fib_rules_ops *dn_fib_rules_ops; struct dn_fib_rule { @@ -56,7 +56,7 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) }; int err; - err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(dn_fib_rules_ops, flp, 0, &arg); res->r = arg.rule; return err; @@ -217,9 +217,9 @@ static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) struct list_head *pos; struct fib_rule *rule; - if (!list_empty(&dn_fib_rules_ops.rules_list)) { - pos = dn_fib_rules_ops.rules_list.next; - if (pos->next != &dn_fib_rules_ops.rules_list) { + if (!list_empty(&dn_fib_rules_ops->rules_list)) { + pos = dn_fib_rules_ops->rules_list.next; + if (pos->next != &dn_fib_rules_ops->rules_list) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -234,7 +234,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) dn_rt_cache_flush(-1); } -static struct fib_rules_ops dn_fib_rules_ops = { +static struct fib_rules_ops dn_fib_rules_ops_template = { .family = AF_DECnet, .rule_size = sizeof(struct dn_fib_rule), .addr_size = sizeof(u16), @@ -247,21 +247,23 @@ static struct fib_rules_ops dn_fib_rules_ops = { .flush_cache = dn_fib_rule_flush_cache, .nlgroup = RTNLGRP_DECnet_RULE, .policy = dn_fib_rule_policy, - .rules_list = LIST_HEAD_INIT(dn_fib_rules_ops.rules_list), .owner = THIS_MODULE, .fro_net = &init_net, }; void __init dn_fib_rules_init(void) { - BUG_ON(fib_default_rule_add(&dn_fib_rules_ops, 0x7fff, + dn_fib_rules_ops = + fib_rules_register(&dn_fib_rules_ops_template, &init_net); + BUG_ON(IS_ERR(dn_fib_rules_ops)); + BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff, RT_TABLE_MAIN, 0)); - fib_rules_register(&dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - fib_rules_unregister(&dn_fib_rules_ops); + fib_rules_unregister(dn_fib_rules_ops); + rcu_barrier(); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 67054b0d550..b9a33bb5e9c 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -471,7 +471,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_node *node; int dumped = 0; - if (net != &init_net) + if (!net_eq(net, &init_net)) return 0; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && @@ -581,8 +581,9 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct DN_FIB_SCAN_KEY(f, fp, key) { if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority) break; - if (f->fn_type == type && f->fn_scope == r->rtm_scope - && DN_FIB_INFO(f) == fi) + if (f->fn_type == type && + f->fn_scope == r->rtm_scope && + DN_FIB_INFO(f) == fi) goto out; } diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 26b0ab1e9f5..be3eb8e2328 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -131,39 +131,6 @@ static int parse_addr(__le16 *addr, char *str) return 0; } - -static int dn_node_address_strategy(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - size_t len; - __le16 addr; - - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len != sizeof(unsigned short)) - return -EINVAL; - if (put_user(decnet_address, (__le16 __user *)oldval)) - return -EFAULT; - } - } - if (newval && newlen) { - if (newlen != sizeof(unsigned short)) - return -EINVAL; - if (get_user(addr, (__le16 __user *)newval)) - return -EFAULT; - - dn_dev_devices_off(); - - decnet_address = addr; - - dn_dev_devices_on(); - } - return 0; -} - static int dn_node_address_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -215,65 +182,6 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } - -static int dn_def_dev_strategy(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - size_t len; - struct net_device *dev; - char devname[17]; - size_t namel; - int rv = 0; - - devname[0] = 0; - - if (oldval && oldlenp) { - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - dev = dn_dev_get_default(); - if (dev) { - strcpy(devname, dev->name); - dev_put(dev); - } - - namel = strlen(devname) + 1; - if (len > namel) len = namel; - - if (copy_to_user(oldval, devname, len)) - return -EFAULT; - - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - if (newval && newlen) { - if (newlen > 16) - return -E2BIG; - - if (copy_from_user(devname, newval, newlen)) - return -EFAULT; - - devname[newlen] = 0; - - dev = dev_get_by_name(&init_net, devname); - if (dev == NULL) - return -ENODEV; - - rv = -ENODEV; - if (dev->dn_ptr != NULL) { - rv = dn_dev_set_default(dev, 1); - if (rv) - dev_put(dev); - } - } - - return rv; -} - - static int dn_def_dev_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -339,138 +247,112 @@ static int dn_def_dev_handler(ctl_table *table, int write, static ctl_table dn_table[] = { { - .ctl_name = NET_DECNET_NODE_ADDRESS, .procname = "node_address", .maxlen = 7, .mode = 0644, .proc_handler = dn_node_address_handler, - .strategy = dn_node_address_strategy, }, { - .ctl_name = NET_DECNET_NODE_NAME, .procname = "node_name", .data = node_name, .maxlen = 7, .mode = 0644, .proc_handler = proc_dostring, - .strategy = sysctl_string, }, { - .ctl_name = NET_DECNET_DEFAULT_DEVICE, .procname = "default_device", .maxlen = 16, .mode = 0644, .proc_handler = dn_def_dev_handler, - .strategy = dn_def_dev_strategy, }, { - .ctl_name = NET_DECNET_TIME_WAIT, .procname = "time_wait", .data = &decnet_time_wait, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_decnet_time_wait, .extra2 = &max_decnet_time_wait }, { - .ctl_name = NET_DECNET_DN_COUNT, .procname = "dn_count", .data = &decnet_dn_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, { - .ctl_name = NET_DECNET_DI_COUNT, .procname = "di_count", .data = &decnet_di_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, { - .ctl_name = NET_DECNET_DR_COUNT, .procname = "dr_count", .data = &decnet_dr_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_state_count, .extra2 = &max_state_count }, { - .ctl_name = NET_DECNET_DST_GC_INTERVAL, .procname = "dst_gc_interval", .data = &decnet_dst_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_decnet_dst_gc_interval, .extra2 = &max_decnet_dst_gc_interval }, { - .ctl_name = NET_DECNET_NO_FC_MAX_CWND, .procname = "no_fc_max_cwnd", .data = &decnet_no_fc_max_cwnd, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_decnet_no_fc_max_cwnd, .extra2 = &max_decnet_no_fc_max_cwnd }, { - .ctl_name = NET_DECNET_MEM, .procname = "decnet_mem", .data = &sysctl_decnet_mem, .maxlen = sizeof(sysctl_decnet_mem), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, { - .ctl_name = NET_DECNET_RMEM, .procname = "decnet_rmem", .data = &sysctl_decnet_rmem, .maxlen = sizeof(sysctl_decnet_rmem), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, { - .ctl_name = NET_DECNET_WMEM, .procname = "decnet_wmem", .data = &sysctl_decnet_wmem, .maxlen = sizeof(sysctl_decnet_wmem), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, { - .ctl_name = NET_DECNET_DEBUG_LEVEL, .procname = "debug", .data = &decnet_debug_level, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec, }, - {0} + { } }; static struct ctl_path dn_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "decnet", .ctl_name = NET_DECNET, }, + { .procname = "net", }, + { .procname = "decnet", }, { } }; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 0e0254fd767..29b4931aae5 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -457,15 +457,15 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, iov[0].iov_len = size; for (i = 0; i < msg->msg_iovlen; i++) { void __user *base = msg->msg_iov[i].iov_base; - size_t len = msg->msg_iov[i].iov_len; + size_t iov_len = msg->msg_iov[i].iov_len; /* Check it now since we switch to KERNEL_DS later. */ - if (!access_ok(VERIFY_READ, base, len)) { + if (!access_ok(VERIFY_READ, base, iov_len)) { mutex_unlock(&econet_mutex); return -EFAULT; } iov[i+1].iov_base = base; - iov[i+1].iov_len = len; - size += len; + iov[i+1].iov_len = iov_len; + size += iov_len; } /* Get a skbuff (no data, just holds our cb information) */ @@ -605,13 +605,14 @@ static struct proto econet_proto = { * Create an Econet socket */ -static int econet_create(struct net *net, struct socket *sock, int protocol) +static int econet_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct econet_sock *eo; int err; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; /* Econet only provides datagram services. */ @@ -742,7 +743,7 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg return 0; } -static struct net_proto_family econet_family_ops = { +static const struct net_proto_family econet_family_ops = { .family = PF_ECONET, .create = econet_create, .owner = THIS_MODULE, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5a883affecd..dd3db88f8f0 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -393,10 +393,3 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) return ((ssize_t) l); } EXPORT_SYMBOL(sysfs_format_mac); - -char *print_mac(char *buf, const unsigned char *addr) -{ - _format_mac_addr(buf, MAC_BUF_SIZE, addr, ETH_ALEN); - return buf; -} -EXPORT_SYMBOL(print_mac); diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 4068a9f5113..ce2d3358285 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,5 +1,5 @@ -obj-$(CONFIG_IEEE802154) += nl802154.o af_802154.o wpan-class.o -nl802154-y := netlink.o nl_policy.o +obj-$(CONFIG_IEEE802154) += ieee802154.o af_802154.o +ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o wpan-class.o af_802154-y := af_ieee802154.o raw.o dgram.o ccflags-y += -Wall -DDEBUG diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index cd949d5e451..bad1c49fd96 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -234,14 +234,14 @@ static const struct proto_ops ieee802154_dgram_ops = { * set the state. */ static int ieee802154_create(struct net *net, struct socket *sock, - int protocol) + int protocol, int kern) { struct sock *sk; int rc; struct proto *proto; const struct proto_ops *ops; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; switch (sock->type) { @@ -285,7 +285,7 @@ out: return rc; } -static struct net_proto_family ieee802154_family_ops = { +static const struct net_proto_family ieee802154_family_ops = { .family = PF_IEEE802154, .create = ieee802154_create, .owner = THIS_MODULE, diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index a413b1bf446..9aac5aee157 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -303,7 +303,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, if (err) goto done; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; @@ -318,7 +318,6 @@ out: static int dgram_rcv_skb(struct sock *sk, struct sk_buff *skb) { if (sock_queue_rcv_skb(sk, skb) < 0) { - atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h new file mode 100644 index 00000000000..aadec428e6e --- /dev/null +++ b/net/ieee802154/ieee802154.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2007, 2008, 2009 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ +#ifndef IEEE_802154_LOCAL_H +#define IEEE_802154_LOCAL_H + +int __init ieee802154_nl_init(void); +void __exit ieee802154_nl_exit(void); + +#define IEEE802154_OP(_cmd, _func) \ + { \ + .cmd = _cmd, \ + .policy = ieee802154_policy, \ + .doit = _func, \ + .dumpit = NULL, \ + .flags = GENL_ADMIN_PERM, \ + } + +#define IEEE802154_DUMP(_cmd, _func, _dump) \ + { \ + .cmd = _cmd, \ + .policy = ieee802154_policy, \ + .doit = _func, \ + .dumpit = _dump, \ + } + +struct genl_info; + +struct sk_buff *ieee802154_nl_create(int flags, u8 req); +int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group); +struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, + int flags, u8 req); +int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info); + +extern struct genl_family nl802154_family; +int nl802154_mac_register(void); +int nl802154_phy_register(void); + +#endif diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index ca767bde17a..33137b99e47 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -23,21 +23,15 @@ */ #include <linux/kernel.h> -#include <linux/if_arp.h> -#include <linux/netdevice.h> -#include <net/netlink.h> #include <net/genetlink.h> -#include <net/sock.h> #include <linux/nl802154.h> -#include <net/af_ieee802154.h> -#include <net/nl802154.h> -#include <net/ieee802154.h> -#include <net/ieee802154_netdev.h> + +#include "ieee802154.h" static unsigned int ieee802154_seq_num; static DEFINE_SPINLOCK(ieee802154_seq_lock); -static struct genl_family ieee802154_coordinator_family = { +struct genl_family nl802154_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = IEEE802154_NL_NAME, @@ -45,16 +39,8 @@ static struct genl_family ieee802154_coordinator_family = { .maxattr = IEEE802154_ATTR_MAX, }; -static struct genl_multicast_group ieee802154_coord_mcgrp = { - .name = IEEE802154_MCAST_COORD_NAME, -}; - -static struct genl_multicast_group ieee802154_beacon_mcgrp = { - .name = IEEE802154_MCAST_BEACON_NAME, -}; - /* Requests to userspace */ -static struct sk_buff *ieee802154_nl_create(int flags, u8 req) +struct sk_buff *ieee802154_nl_create(int flags, u8 req) { void *hdr; struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); @@ -65,7 +51,7 @@ static struct sk_buff *ieee802154_nl_create(int flags, u8 req) spin_lock_irqsave(&ieee802154_seq_lock, f); hdr = genlmsg_put(msg, 0, ieee802154_seq_num++, - &ieee802154_coordinator_family, flags, req); + &nl802154_family, flags, req); spin_unlock_irqrestore(&ieee802154_seq_lock, f); if (!hdr) { nlmsg_free(msg); @@ -75,7 +61,7 @@ static struct sk_buff *ieee802154_nl_create(int flags, u8 req) return msg; } -static int ieee802154_nl_finish(struct sk_buff *msg) +int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) { /* XXX: nlh is right at the start of msg */ void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); @@ -83,607 +69,70 @@ static int ieee802154_nl_finish(struct sk_buff *msg) if (genlmsg_end(msg, hdr) < 0) goto out; - return genlmsg_multicast(msg, 0, ieee802154_coord_mcgrp.id, - GFP_ATOMIC); + return genlmsg_multicast(msg, 0, group, GFP_ATOMIC); out: nlmsg_free(msg); return -ENOBUFS; } -int ieee802154_nl_assoc_indic(struct net_device *dev, - struct ieee802154_addr *addr, u8 cap) -{ - struct sk_buff *msg; - - pr_debug("%s\n", __func__); - - if (addr->addr_type != IEEE802154_ADDR_LONG) { - pr_err("%s: received non-long source address!\n", __func__); - return -EINVAL; - } - - msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_INDIC); - if (!msg) - return -ENOBUFS; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - - NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, - addr->hwaddr); - - NLA_PUT_U8(msg, IEEE802154_ATTR_CAPABILITY, cap); - - return ieee802154_nl_finish(msg); - -nla_put_failure: - nlmsg_free(msg); - return -ENOBUFS; -} -EXPORT_SYMBOL(ieee802154_nl_assoc_indic); - -int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr, - u8 status) -{ - struct sk_buff *msg; - - pr_debug("%s\n", __func__); - - msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_CONF); - if (!msg) - return -ENOBUFS; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - - NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr); - NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); - - return ieee802154_nl_finish(msg); - -nla_put_failure: - nlmsg_free(msg); - return -ENOBUFS; -} -EXPORT_SYMBOL(ieee802154_nl_assoc_confirm); - -int ieee802154_nl_disassoc_indic(struct net_device *dev, - struct ieee802154_addr *addr, u8 reason) -{ - struct sk_buff *msg; - - pr_debug("%s\n", __func__); - - msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_INDIC); - if (!msg) - return -ENOBUFS; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - - if (addr->addr_type == IEEE802154_ADDR_LONG) - NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, - addr->hwaddr); - else - NLA_PUT_U16(msg, IEEE802154_ATTR_SRC_SHORT_ADDR, - addr->short_addr); - - NLA_PUT_U8(msg, IEEE802154_ATTR_REASON, reason); - - return ieee802154_nl_finish(msg); - -nla_put_failure: - nlmsg_free(msg); - return -ENOBUFS; -} -EXPORT_SYMBOL(ieee802154_nl_disassoc_indic); - -int ieee802154_nl_disassoc_confirm(struct net_device *dev, u8 status) -{ - struct sk_buff *msg; - - pr_debug("%s\n", __func__); - - msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_CONF); - if (!msg) - return -ENOBUFS; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - - NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); - - return ieee802154_nl_finish(msg); - -nla_put_failure: - nlmsg_free(msg); - return -ENOBUFS; -} -EXPORT_SYMBOL(ieee802154_nl_disassoc_confirm); - -int ieee802154_nl_beacon_indic(struct net_device *dev, - u16 panid, u16 coord_addr) -{ - struct sk_buff *msg; - - pr_debug("%s\n", __func__); - - msg = ieee802154_nl_create(0, IEEE802154_BEACON_NOTIFY_INDIC); - if (!msg) - return -ENOBUFS; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr); - NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid); - - return ieee802154_nl_finish(msg); - -nla_put_failure: - nlmsg_free(msg); - return -ENOBUFS; -} -EXPORT_SYMBOL(ieee802154_nl_beacon_indic); - -int ieee802154_nl_scan_confirm(struct net_device *dev, - u8 status, u8 scan_type, u32 unscanned, u8 page, - u8 *edl/* , struct list_head *pan_desc_list */) -{ - struct sk_buff *msg; - - pr_debug("%s\n", __func__); - - msg = ieee802154_nl_create(0, IEEE802154_SCAN_CONF); - if (!msg) - return -ENOBUFS; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - - NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); - NLA_PUT_U8(msg, IEEE802154_ATTR_SCAN_TYPE, scan_type); - NLA_PUT_U32(msg, IEEE802154_ATTR_CHANNELS, unscanned); - NLA_PUT_U8(msg, IEEE802154_ATTR_PAGE, page); - - if (edl) - NLA_PUT(msg, IEEE802154_ATTR_ED_LIST, 27, edl); - - return ieee802154_nl_finish(msg); - -nla_put_failure: - nlmsg_free(msg); - return -ENOBUFS; -} -EXPORT_SYMBOL(ieee802154_nl_scan_confirm); - -int ieee802154_nl_start_confirm(struct net_device *dev, u8 status) -{ - struct sk_buff *msg; - - pr_debug("%s\n", __func__); - - msg = ieee802154_nl_create(0, IEEE802154_START_CONF); - if (!msg) - return -ENOBUFS; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - - NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); - - return ieee802154_nl_finish(msg); - -nla_put_failure: - nlmsg_free(msg); - return -ENOBUFS; -} -EXPORT_SYMBOL(ieee802154_nl_start_confirm); - -static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, - u32 seq, int flags, struct net_device *dev) +struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, + int flags, u8 req) { void *hdr; + struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); - pr_debug("%s\n", __func__); - - hdr = genlmsg_put(msg, 0, seq, &ieee802154_coordinator_family, flags, - IEEE802154_LIST_IFACE); - if (!hdr) - goto out; - - NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); - NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); - - NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, - dev->dev_addr); - NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR, - ieee802154_mlme_ops(dev)->get_short_addr(dev)); - NLA_PUT_U16(msg, IEEE802154_ATTR_PAN_ID, - ieee802154_mlme_ops(dev)->get_pan_id(dev)); - return genlmsg_end(msg, hdr); - -nla_put_failure: - genlmsg_cancel(msg, hdr); -out: - return -EMSGSIZE; -} - -/* Requests from userspace */ -static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) -{ - struct net_device *dev; - - if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { - char name[IFNAMSIZ + 1]; - nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], - sizeof(name)); - dev = dev_get_by_name(&init_net, name); - } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) - dev = dev_get_by_index(&init_net, - nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); - else - return NULL; - - if (!dev) + if (!msg) return NULL; - if (dev->type != ARPHRD_IEEE802154) { - dev_put(dev); + hdr = genlmsg_put_reply(msg, info, + &nl802154_family, flags, req); + if (!hdr) { + nlmsg_free(msg); return NULL; } - return dev; -} - -static int ieee802154_associate_req(struct sk_buff *skb, - struct genl_info *info) -{ - struct net_device *dev; - struct ieee802154_addr addr; - u8 page; - int ret = -EINVAL; - - if (!info->attrs[IEEE802154_ATTR_CHANNEL] || - !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || - (!info->attrs[IEEE802154_ATTR_COORD_HW_ADDR] && - !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]) || - !info->attrs[IEEE802154_ATTR_CAPABILITY]) - return -EINVAL; - - dev = ieee802154_nl_get_dev(info); - if (!dev) - return -ENODEV; - - if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { - addr.addr_type = IEEE802154_ADDR_LONG; - nla_memcpy(addr.hwaddr, - info->attrs[IEEE802154_ATTR_COORD_HW_ADDR], - IEEE802154_ADDR_LEN); - } else { - addr.addr_type = IEEE802154_ADDR_SHORT; - addr.short_addr = nla_get_u16( - info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); - } - addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); - - if (info->attrs[IEEE802154_ATTR_PAGE]) - page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); - else - page = 0; - - ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr, - nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]), - page, - nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); - - dev_put(dev); - return ret; -} - -static int ieee802154_associate_resp(struct sk_buff *skb, - struct genl_info *info) -{ - struct net_device *dev; - struct ieee802154_addr addr; - int ret = -EINVAL; - - if (!info->attrs[IEEE802154_ATTR_STATUS] || - !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || - !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) - return -EINVAL; - - dev = ieee802154_nl_get_dev(info); - if (!dev) - return -ENODEV; - - addr.addr_type = IEEE802154_ADDR_LONG; - nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], - IEEE802154_ADDR_LEN); - addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - - - ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr, - nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), - nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); - - dev_put(dev); - return ret; -} - -static int ieee802154_disassociate_req(struct sk_buff *skb, - struct genl_info *info) -{ - struct net_device *dev; - struct ieee802154_addr addr; - int ret = -EINVAL; - - if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && - !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || - !info->attrs[IEEE802154_ATTR_REASON]) - return -EINVAL; - - dev = ieee802154_nl_get_dev(info); - if (!dev) - return -ENODEV; - - if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { - addr.addr_type = IEEE802154_ADDR_LONG; - nla_memcpy(addr.hwaddr, - info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], - IEEE802154_ADDR_LEN); - } else { - addr.addr_type = IEEE802154_ADDR_SHORT; - addr.short_addr = nla_get_u16( - info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]); - } - addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - - ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, - nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); - - dev_put(dev); - return ret; -} - -/* - * PANid, channel, beacon_order = 15, superframe_order = 15, - * PAN_coordinator, battery_life_extension = 0, - * coord_realignment = 0, security_enable = 0 -*/ -static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) -{ - struct net_device *dev; - struct ieee802154_addr addr; - - u8 channel, bcn_ord, sf_ord; - u8 page; - int pan_coord, blx, coord_realign; - int ret; - - if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || - !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || - !info->attrs[IEEE802154_ATTR_CHANNEL] || - !info->attrs[IEEE802154_ATTR_BCN_ORD] || - !info->attrs[IEEE802154_ATTR_SF_ORD] || - !info->attrs[IEEE802154_ATTR_PAN_COORD] || - !info->attrs[IEEE802154_ATTR_BAT_EXT] || - !info->attrs[IEEE802154_ATTR_COORD_REALIGN] - ) - return -EINVAL; - - dev = ieee802154_nl_get_dev(info); - if (!dev) - return -ENODEV; - - addr.addr_type = IEEE802154_ADDR_SHORT; - addr.short_addr = nla_get_u16( - info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); - addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); - - channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]); - bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]); - sf_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_SF_ORD]); - pan_coord = nla_get_u8(info->attrs[IEEE802154_ATTR_PAN_COORD]); - blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]); - coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]); - - if (info->attrs[IEEE802154_ATTR_PAGE]) - page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); - else - page = 0; - - - if (addr.short_addr == IEEE802154_ADDR_BROADCAST) { - ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS); - dev_put(dev); - return -EINVAL; - } - - ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, - bcn_ord, sf_ord, pan_coord, blx, coord_realign); - - dev_put(dev); - return ret; -} - -static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) -{ - struct net_device *dev; - int ret; - u8 type; - u32 channels; - u8 duration; - u8 page; - - if (!info->attrs[IEEE802154_ATTR_SCAN_TYPE] || - !info->attrs[IEEE802154_ATTR_CHANNELS] || - !info->attrs[IEEE802154_ATTR_DURATION]) - return -EINVAL; - - dev = ieee802154_nl_get_dev(info); - if (!dev) - return -ENODEV; - - type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); - channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); - duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]); - - if (info->attrs[IEEE802154_ATTR_PAGE]) - page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); - else - page = 0; - - - ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, - duration); - - dev_put(dev); - return ret; + return msg; } -static int ieee802154_list_iface(struct sk_buff *skb, - struct genl_info *info) +int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) { - /* Request for interface name, index, type, IEEE address, - PAN Id, short address */ - struct sk_buff *msg; - struct net_device *dev = NULL; - int rc = -ENOBUFS; - - pr_debug("%s\n", __func__); - - dev = ieee802154_nl_get_dev(info); - if (!dev) - return -ENODEV; - - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (!msg) - goto out_dev; - - rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, - 0, dev); - if (rc < 0) - goto out_free; + /* XXX: nlh is right at the start of msg */ + void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); - dev_put(dev); + if (genlmsg_end(msg, hdr) < 0) + goto out; - return genlmsg_unicast(&init_net, msg, info->snd_pid); -out_free: + return genlmsg_reply(msg, info); +out: nlmsg_free(msg); -out_dev: - dev_put(dev); - return rc; - -} - -static int ieee802154_dump_iface(struct sk_buff *skb, - struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - struct net_device *dev; - int idx; - int s_idx = cb->args[0]; - - pr_debug("%s\n", __func__); - - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) - goto cont; - - if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) - break; -cont: - idx++; - } - cb->args[0] = idx; - - return skb->len; + return -ENOBUFS; } -#define IEEE802154_OP(_cmd, _func) \ - { \ - .cmd = _cmd, \ - .policy = ieee802154_policy, \ - .doit = _func, \ - .dumpit = NULL, \ - .flags = GENL_ADMIN_PERM, \ - } - -#define IEEE802154_DUMP(_cmd, _func, _dump) \ - { \ - .cmd = _cmd, \ - .policy = ieee802154_policy, \ - .doit = _func, \ - .dumpit = _dump, \ - } - -static struct genl_ops ieee802154_coordinator_ops[] = { - IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req), - IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp), - IEEE802154_OP(IEEE802154_DISASSOCIATE_REQ, ieee802154_disassociate_req), - IEEE802154_OP(IEEE802154_SCAN_REQ, ieee802154_scan_req), - IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req), - IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, - ieee802154_dump_iface), -}; - -static int __init ieee802154_nl_init(void) +int __init ieee802154_nl_init(void) { int rc; - int i; - rc = genl_register_family(&ieee802154_coordinator_family); + rc = genl_register_family(&nl802154_family); if (rc) goto fail; - rc = genl_register_mc_group(&ieee802154_coordinator_family, - &ieee802154_coord_mcgrp); + rc = nl802154_mac_register(); if (rc) goto fail; - rc = genl_register_mc_group(&ieee802154_coordinator_family, - &ieee802154_beacon_mcgrp); + rc = nl802154_phy_register(); if (rc) goto fail; - - for (i = 0; i < ARRAY_SIZE(ieee802154_coordinator_ops); i++) { - rc = genl_register_ops(&ieee802154_coordinator_family, - &ieee802154_coordinator_ops[i]); - if (rc) - goto fail; - } - return 0; fail: - genl_unregister_family(&ieee802154_coordinator_family); + genl_unregister_family(&nl802154_family); return rc; } -module_init(ieee802154_nl_init); -static void __exit ieee802154_nl_exit(void) +void __exit ieee802154_nl_exit(void) { - genl_unregister_family(&ieee802154_coordinator_family); + genl_unregister_family(&nl802154_family); } -module_exit(ieee802154_nl_exit); - -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("ieee 802.15.4 configuration interface"); diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c new file mode 100644 index 00000000000..135c1678fb1 --- /dev/null +++ b/net/ieee802154/nl-mac.c @@ -0,0 +1,617 @@ +/* + * Netlink inteface for IEEE 802.15.4 stack + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin <slapin@ossfans.org> + * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> + * Maxim Osipov <maxim.osipov@siemens.com> + */ + +#include <linux/kernel.h> +#include <linux/if_arp.h> +#include <linux/netdevice.h> +#include <net/netlink.h> +#include <net/genetlink.h> +#include <net/sock.h> +#include <linux/nl802154.h> +#include <net/af_ieee802154.h> +#include <net/nl802154.h> +#include <net/ieee802154.h> +#include <net/ieee802154_netdev.h> +#include <net/wpan-phy.h> + +#include "ieee802154.h" + +static struct genl_multicast_group ieee802154_coord_mcgrp = { + .name = IEEE802154_MCAST_COORD_NAME, +}; + +static struct genl_multicast_group ieee802154_beacon_mcgrp = { + .name = IEEE802154_MCAST_BEACON_NAME, +}; + +int ieee802154_nl_assoc_indic(struct net_device *dev, + struct ieee802154_addr *addr, u8 cap) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + if (addr->addr_type != IEEE802154_ADDR_LONG) { + pr_err("%s: received non-long source address!\n", __func__); + return -EINVAL; + } + + msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_INDIC); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, + addr->hwaddr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_CAPABILITY, cap); + + return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_assoc_indic); + +int ieee802154_nl_assoc_confirm(struct net_device *dev, u16 short_addr, + u8 status) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_ASSOCIATE_CONF); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR, short_addr); + NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); + + return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_assoc_confirm); + +int ieee802154_nl_disassoc_indic(struct net_device *dev, + struct ieee802154_addr *addr, u8 reason) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_INDIC); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + if (addr->addr_type == IEEE802154_ADDR_LONG) + NLA_PUT(msg, IEEE802154_ATTR_SRC_HW_ADDR, IEEE802154_ADDR_LEN, + addr->hwaddr); + else + NLA_PUT_U16(msg, IEEE802154_ATTR_SRC_SHORT_ADDR, + addr->short_addr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_REASON, reason); + + return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_disassoc_indic); + +int ieee802154_nl_disassoc_confirm(struct net_device *dev, u8 status) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_DISASSOCIATE_CONF); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); + + return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_disassoc_confirm); + +int ieee802154_nl_beacon_indic(struct net_device *dev, + u16 panid, u16 coord_addr) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_BEACON_NOTIFY_INDIC); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_SHORT_ADDR, coord_addr); + NLA_PUT_U16(msg, IEEE802154_ATTR_COORD_PAN_ID, panid); + + return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_beacon_indic); + +int ieee802154_nl_scan_confirm(struct net_device *dev, + u8 status, u8 scan_type, u32 unscanned, u8 page, + u8 *edl/* , struct list_head *pan_desc_list */) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_SCAN_CONF); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); + NLA_PUT_U8(msg, IEEE802154_ATTR_SCAN_TYPE, scan_type); + NLA_PUT_U32(msg, IEEE802154_ATTR_CHANNELS, unscanned); + NLA_PUT_U8(msg, IEEE802154_ATTR_PAGE, page); + + if (edl) + NLA_PUT(msg, IEEE802154_ATTR_ED_LIST, 27, edl); + + return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_scan_confirm); + +int ieee802154_nl_start_confirm(struct net_device *dev, u8 status) +{ + struct sk_buff *msg; + + pr_debug("%s\n", __func__); + + msg = ieee802154_nl_create(0, IEEE802154_START_CONF); + if (!msg) + return -ENOBUFS; + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + + NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status); + + return ieee802154_nl_mcast(msg, ieee802154_coord_mcgrp.id); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} +EXPORT_SYMBOL(ieee802154_nl_start_confirm); + +static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, + u32 seq, int flags, struct net_device *dev) +{ + void *hdr; + struct wpan_phy *phy; + + pr_debug("%s\n", __func__); + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, + IEEE802154_LIST_IFACE); + if (!hdr) + goto out; + + phy = ieee802154_mlme_ops(dev)->get_phy(dev); + BUG_ON(!phy); + + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); + NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex); + + NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN, + dev->dev_addr); + NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR, + ieee802154_mlme_ops(dev)->get_short_addr(dev)); + NLA_PUT_U16(msg, IEEE802154_ATTR_PAN_ID, + ieee802154_mlme_ops(dev)->get_pan_id(dev)); + wpan_phy_put(phy); + return genlmsg_end(msg, hdr); + +nla_put_failure: + wpan_phy_put(phy); + genlmsg_cancel(msg, hdr); +out: + return -EMSGSIZE; +} + +/* Requests from userspace */ +static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) +{ + struct net_device *dev; + + if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { + char name[IFNAMSIZ + 1]; + nla_strlcpy(name, info->attrs[IEEE802154_ATTR_DEV_NAME], + sizeof(name)); + dev = dev_get_by_name(&init_net, name); + } else if (info->attrs[IEEE802154_ATTR_DEV_INDEX]) + dev = dev_get_by_index(&init_net, + nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX])); + else + return NULL; + + if (!dev) + return NULL; + + if (dev->type != ARPHRD_IEEE802154) { + dev_put(dev); + return NULL; + } + + return dev; +} + +static int ieee802154_associate_req(struct sk_buff *skb, + struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + u8 page; + int ret = -EINVAL; + + if (!info->attrs[IEEE802154_ATTR_CHANNEL] || + !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || + (!info->attrs[IEEE802154_ATTR_COORD_HW_ADDR] && + !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]) || + !info->attrs[IEEE802154_ATTR_CAPABILITY]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { + addr.addr_type = IEEE802154_ADDR_LONG; + nla_memcpy(addr.hwaddr, + info->attrs[IEEE802154_ATTR_COORD_HW_ADDR], + IEEE802154_ADDR_LEN); + } else { + addr.addr_type = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_u16( + info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); + } + addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); + + if (info->attrs[IEEE802154_ATTR_PAGE]) + page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); + else + page = 0; + + ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr, + nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]), + page, + nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); + + dev_put(dev); + return ret; +} + +static int ieee802154_associate_resp(struct sk_buff *skb, + struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + int ret = -EINVAL; + + if (!info->attrs[IEEE802154_ATTR_STATUS] || + !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || + !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + addr.addr_type = IEEE802154_ADDR_LONG; + nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], + IEEE802154_ADDR_LEN); + addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + + + ret = ieee802154_mlme_ops(dev)->assoc_resp(dev, &addr, + nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), + nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); + + dev_put(dev); + return ret; +} + +static int ieee802154_disassociate_req(struct sk_buff *skb, + struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + int ret = -EINVAL; + + if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && + !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || + !info->attrs[IEEE802154_ATTR_REASON]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { + addr.addr_type = IEEE802154_ADDR_LONG; + nla_memcpy(addr.hwaddr, + info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], + IEEE802154_ADDR_LEN); + } else { + addr.addr_type = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_u16( + info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]); + } + addr.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); + + ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, + nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); + + dev_put(dev); + return ret; +} + +/* + * PANid, channel, beacon_order = 15, superframe_order = 15, + * PAN_coordinator, battery_life_extension = 0, + * coord_realignment = 0, security_enable = 0 +*/ +static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev; + struct ieee802154_addr addr; + + u8 channel, bcn_ord, sf_ord; + u8 page; + int pan_coord, blx, coord_realign; + int ret; + + if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || + !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || + !info->attrs[IEEE802154_ATTR_CHANNEL] || + !info->attrs[IEEE802154_ATTR_BCN_ORD] || + !info->attrs[IEEE802154_ATTR_SF_ORD] || + !info->attrs[IEEE802154_ATTR_PAN_COORD] || + !info->attrs[IEEE802154_ATTR_BAT_EXT] || + !info->attrs[IEEE802154_ATTR_COORD_REALIGN] + ) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + addr.addr_type = IEEE802154_ADDR_SHORT; + addr.short_addr = nla_get_u16( + info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR]); + addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]); + + channel = nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]); + bcn_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_BCN_ORD]); + sf_ord = nla_get_u8(info->attrs[IEEE802154_ATTR_SF_ORD]); + pan_coord = nla_get_u8(info->attrs[IEEE802154_ATTR_PAN_COORD]); + blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]); + coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]); + + if (info->attrs[IEEE802154_ATTR_PAGE]) + page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); + else + page = 0; + + + if (addr.short_addr == IEEE802154_ADDR_BROADCAST) { + ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS); + dev_put(dev); + return -EINVAL; + } + + ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, + bcn_ord, sf_ord, pan_coord, blx, coord_realign); + + dev_put(dev); + return ret; +} + +static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) +{ + struct net_device *dev; + int ret; + u8 type; + u32 channels; + u8 duration; + u8 page; + + if (!info->attrs[IEEE802154_ATTR_SCAN_TYPE] || + !info->attrs[IEEE802154_ATTR_CHANNELS] || + !info->attrs[IEEE802154_ATTR_DURATION]) + return -EINVAL; + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); + channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); + duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]); + + if (info->attrs[IEEE802154_ATTR_PAGE]) + page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]); + else + page = 0; + + + ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, + duration); + + dev_put(dev); + return ret; +} + +static int ieee802154_list_iface(struct sk_buff *skb, + struct genl_info *info) +{ + /* Request for interface name, index, type, IEEE address, + PAN Id, short address */ + struct sk_buff *msg; + struct net_device *dev = NULL; + int rc = -ENOBUFS; + + pr_debug("%s\n", __func__); + + dev = ieee802154_nl_get_dev(info); + if (!dev) + return -ENODEV; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, + 0, dev); + if (rc < 0) + goto out_free; + + dev_put(dev); + + return genlmsg_reply(msg, info); +out_free: + nlmsg_free(msg); +out_dev: + dev_put(dev); + return rc; + +} + +static int ieee802154_dump_iface(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx; + int s_idx = cb->args[0]; + + pr_debug("%s\n", __func__); + + idx = 0; + for_each_netdev(net, dev) { + if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) + goto cont; + + if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) + break; +cont: + idx++; + } + cb->args[0] = idx; + + return skb->len; +} + +static struct genl_ops ieee802154_coordinator_ops[] = { + IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req), + IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp), + IEEE802154_OP(IEEE802154_DISASSOCIATE_REQ, ieee802154_disassociate_req), + IEEE802154_OP(IEEE802154_SCAN_REQ, ieee802154_scan_req), + IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req), + IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface, + ieee802154_dump_iface), +}; + +/* + * No need to unregister as family unregistration will do it. + */ +int nl802154_mac_register(void) +{ + int i; + int rc; + + rc = genl_register_mc_group(&nl802154_family, + &ieee802154_coord_mcgrp); + if (rc) + return rc; + + rc = genl_register_mc_group(&nl802154_family, + &ieee802154_beacon_mcgrp); + if (rc) + return rc; + + for (i = 0; i < ARRAY_SIZE(ieee802154_coordinator_ops); i++) { + rc = genl_register_ops(&nl802154_family, + &ieee802154_coordinator_ops[i]); + if (rc) + return rc; + } + + return 0; +} diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c new file mode 100644 index 00000000000..199a2d9d12f --- /dev/null +++ b/net/ieee802154/nl-phy.c @@ -0,0 +1,344 @@ +/* + * Netlink inteface for IEEE 802.15.4 stack + * + * Copyright 2007, 2008 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Sergey Lapin <slapin@ossfans.org> + * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> + * Maxim Osipov <maxim.osipov@siemens.com> + */ + +#include <linux/kernel.h> +#include <net/netlink.h> +#include <net/genetlink.h> +#include <net/wpan-phy.h> +#include <net/af_ieee802154.h> +#include <net/ieee802154_netdev.h> +#include <net/rtnetlink.h> /* for rtnl_{un,}lock */ +#include <linux/nl802154.h> + +#include "ieee802154.h" + +static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, + u32 seq, int flags, struct wpan_phy *phy) +{ + void *hdr; + int i, pages = 0; + uint32_t *buf = kzalloc(32 * sizeof(uint32_t), GFP_KERNEL); + + pr_debug("%s\n", __func__); + + if (!buf) + goto out; + + hdr = genlmsg_put(msg, 0, seq, &nl802154_family, flags, + IEEE802154_LIST_PHY); + if (!hdr) + goto out; + + mutex_lock(&phy->pib_lock); + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); + + NLA_PUT_U8(msg, IEEE802154_ATTR_PAGE, phy->current_page); + NLA_PUT_U8(msg, IEEE802154_ATTR_CHANNEL, phy->current_channel); + for (i = 0; i < 32; i++) { + if (phy->channels_supported[i]) + buf[pages++] = phy->channels_supported[i] | (i << 27); + } + if (pages) + NLA_PUT(msg, IEEE802154_ATTR_CHANNEL_PAGE_LIST, + pages * sizeof(uint32_t), buf); + + mutex_unlock(&phy->pib_lock); + return genlmsg_end(msg, hdr); + +nla_put_failure: + mutex_unlock(&phy->pib_lock); + genlmsg_cancel(msg, hdr); +out: + kfree(buf); + return -EMSGSIZE; +} + +static int ieee802154_list_phy(struct sk_buff *skb, + struct genl_info *info) +{ + /* Request for interface name, index, type, IEEE address, + PAN Id, short address */ + struct sk_buff *msg; + struct wpan_phy *phy; + const char *name; + int rc = -ENOBUFS; + + pr_debug("%s\n", __func__); + + if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); + if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') + return -EINVAL; /* phy name should be null-terminated */ + + + phy = wpan_phy_find(name); + if (!phy) + return -ENODEV; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + goto out_dev; + + rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq, + 0, phy); + if (rc < 0) + goto out_free; + + wpan_phy_put(phy); + + return genlmsg_reply(msg, info); +out_free: + nlmsg_free(msg); +out_dev: + wpan_phy_put(phy); + return rc; + +} + +struct dump_phy_data { + struct sk_buff *skb; + struct netlink_callback *cb; + int idx, s_idx; +}; + +static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) +{ + int rc; + struct dump_phy_data *data = _data; + + pr_debug("%s\n", __func__); + + if (data->idx++ < data->s_idx) + return 0; + + rc = ieee802154_nl_fill_phy(data->skb, + NETLINK_CB(data->cb->skb).pid, + data->cb->nlh->nlmsg_seq, + NLM_F_MULTI, + phy); + + if (rc < 0) { + data->idx--; + return rc; + } + + return 0; +} + +static int ieee802154_dump_phy(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct dump_phy_data data = { + .cb = cb, + .skb = skb, + .s_idx = cb->args[0], + .idx = 0, + }; + + pr_debug("%s\n", __func__); + + wpan_phy_for_each(ieee802154_dump_phy_iter, &data); + + cb->args[0] = data.idx; + + return skb->len; +} + +static int ieee802154_add_iface(struct sk_buff *skb, + struct genl_info *info) +{ + struct sk_buff *msg; + struct wpan_phy *phy; + const char *name; + const char *devname; + int rc = -ENOBUFS; + struct net_device *dev; + + pr_debug("%s\n", __func__); + + if (!info->attrs[IEEE802154_ATTR_PHY_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); + if (name[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] != '\0') + return -EINVAL; /* phy name should be null-terminated */ + + if (info->attrs[IEEE802154_ATTR_DEV_NAME]) { + devname = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]); + if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] + != '\0') + return -EINVAL; /* phy name should be null-terminated */ + } else { + devname = "wpan%d"; + } + + if (strlen(devname) >= IFNAMSIZ) + return -ENAMETOOLONG; + + phy = wpan_phy_find(name); + if (!phy) + return -ENODEV; + + msg = ieee802154_nl_new_reply(info, 0, IEEE802154_ADD_IFACE); + if (!msg) + goto out_dev; + + if (!phy->add_iface) { + rc = -EINVAL; + goto nla_put_failure; + } + + dev = phy->add_iface(phy, devname); + if (IS_ERR(dev)) { + rc = PTR_ERR(dev); + goto nla_put_failure; + } + + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name); + + dev_put(dev); + + wpan_phy_put(phy); + + return ieee802154_nl_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); +out_dev: + wpan_phy_put(phy); + return rc; +} + +static int ieee802154_del_iface(struct sk_buff *skb, + struct genl_info *info) +{ + struct sk_buff *msg; + struct wpan_phy *phy; + const char *name; + int rc; + struct net_device *dev; + + pr_debug("%s\n", __func__); + + if (!info->attrs[IEEE802154_ATTR_DEV_NAME]) + return -EINVAL; + + name = nla_data(info->attrs[IEEE802154_ATTR_DEV_NAME]); + if (name[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1] != '\0') + return -EINVAL; /* name should be null-terminated */ + + dev = dev_get_by_name(genl_info_net(info), name); + if (!dev) + return -ENODEV; + + phy = ieee802154_mlme_ops(dev)->get_phy(dev); + BUG_ON(!phy); + + rc = -EINVAL; + /* phy name is optional, but should be checked if it's given */ + if (info->attrs[IEEE802154_ATTR_PHY_NAME]) { + struct wpan_phy *phy2; + + const char *pname = + nla_data(info->attrs[IEEE802154_ATTR_PHY_NAME]); + if (pname[nla_len(info->attrs[IEEE802154_ATTR_PHY_NAME]) - 1] + != '\0') + /* name should be null-terminated */ + goto out_dev; + + phy2 = wpan_phy_find(pname); + if (!phy2) + goto out_dev; + + if (phy != phy2) { + wpan_phy_put(phy2); + goto out_dev; + } + } + + rc = -ENOBUFS; + + msg = ieee802154_nl_new_reply(info, 0, IEEE802154_DEL_IFACE); + if (!msg) + goto out_dev; + + if (!phy->del_iface) { + rc = -EINVAL; + goto nla_put_failure; + } + + rtnl_lock(); + phy->del_iface(phy, dev); + + /* We don't have device anymore */ + dev_put(dev); + dev = NULL; + + rtnl_unlock(); + + + NLA_PUT_STRING(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)); + NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, name); + + wpan_phy_put(phy); + + return ieee802154_nl_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); +out_dev: + wpan_phy_put(phy); + if (dev) + dev_put(dev); + + return rc; +} + +static struct genl_ops ieee802154_phy_ops[] = { + IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy, + ieee802154_dump_phy), + IEEE802154_OP(IEEE802154_ADD_IFACE, ieee802154_add_iface), + IEEE802154_OP(IEEE802154_DEL_IFACE, ieee802154_del_iface), +}; + +/* + * No need to unregister as family unregistration will do it. + */ +int nl802154_phy_register(void) +{ + int i; + int rc; + + for (i = 0; i < ARRAY_SIZE(ieee802154_phy_ops); i++) { + rc = genl_register_ops(&nl802154_family, + &ieee802154_phy_ops[i]); + if (rc) + return rc; + } + + return 0; +} diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 2363ebee02e..6adda4d46f9 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -27,6 +27,7 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_DEV_NAME] = { .type = NLA_STRING, }, [IEEE802154_ATTR_DEV_INDEX] = { .type = NLA_U32, }, + [IEEE802154_ATTR_PHY_NAME] = { .type = NLA_STRING, }, [IEEE802154_ATTR_STATUS] = { .type = NLA_U8, }, [IEEE802154_ATTR_SHORT_ADDR] = { .type = NLA_U16, }, @@ -50,5 +51,6 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_CHANNELS] = { .type = NLA_U32, }, [IEEE802154_ATTR_DURATION] = { .type = NLA_U8, }, [IEEE802154_ATTR_ED_LIST] = { .len = 27 }, + [IEEE802154_ATTR_CHANNEL_PAGE_LIST] = { .len = 32 * 4, }, }; diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c index 30e74eee07d..9c9b85c0003 100644 --- a/net/ieee802154/raw.c +++ b/net/ieee802154/raw.c @@ -191,7 +191,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (err) goto done; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (flags & MSG_TRUNC) copied = skb->len; @@ -206,7 +206,6 @@ out: static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { if (sock_queue_rcv_skb(sk, skb) < 0) { - atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c index f306604da67..268691256a6 100644 --- a/net/ieee802154/wpan-class.c +++ b/net/ieee802154/wpan-class.c @@ -22,6 +22,8 @@ #include <net/wpan-phy.h> +#include "ieee802154.h" + #define MASTER_SHOW_COMPLEX(name, format_string, args...) \ static ssize_t name ## _show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -30,7 +32,7 @@ static ssize_t name ## _show(struct device *dev, \ int ret; \ \ mutex_lock(&phy->pib_lock); \ - ret = sprintf(buf, format_string "\n", args); \ + ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \ mutex_unlock(&phy->pib_lock); \ return ret; \ } @@ -40,12 +42,30 @@ static ssize_t name ## _show(struct device *dev, \ MASTER_SHOW(current_channel, "%d"); MASTER_SHOW(current_page, "%d"); -MASTER_SHOW(channels_supported, "%#x"); MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB", ((signed char) (phy->transmit_power << 2)) >> 2, (phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1 ); MASTER_SHOW(cca_mode, "%d"); +static ssize_t channels_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); + int ret; + int i, len = 0; + + mutex_lock(&phy->pib_lock); + for (i = 0; i < 32; i++) { + ret = snprintf(buf + len, PAGE_SIZE - len, + "%#09x\n", phy->channels_supported[i]); + if (ret < 0) + break; + len += ret; + } + mutex_unlock(&phy->pib_lock); + return len; +} + static struct device_attribute pmib_attrs[] = { __ATTR_RO(current_channel), __ATTR_RO(current_page), @@ -91,6 +111,31 @@ struct wpan_phy *wpan_phy_find(const char *str) } EXPORT_SYMBOL(wpan_phy_find); +struct wpan_phy_iter_data { + int (*fn)(struct wpan_phy *phy, void *data); + void *data; +}; + +static int wpan_phy_iter(struct device *dev, void *_data) +{ + struct wpan_phy_iter_data *wpid = _data; + struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev); + return wpid->fn(phy, wpid->data); +} + +int wpan_phy_for_each(int (*fn)(struct wpan_phy *phy, void *data), + void *data) +{ + struct wpan_phy_iter_data wpid = { + .fn = fn, + .data = data, + }; + + return class_for_each_device(&wpan_phy_class, NULL, + &wpid, wpan_phy_iter); +} +EXPORT_SYMBOL(wpan_phy_for_each); + static int wpan_phy_idx_valid(int idx) { return idx >= 0; @@ -118,14 +163,15 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size) phy->dev.class = &wpan_phy_class; + phy->current_channel = -1; /* not initialised */ + phy->current_page = 0; /* for compatibility */ + return phy; } EXPORT_SYMBOL(wpan_phy_alloc); -int wpan_phy_register(struct device *parent, struct wpan_phy *phy) +int wpan_phy_register(struct wpan_phy *phy) { - phy->dev.parent = parent; - return device_add(&phy->dev); } EXPORT_SYMBOL(wpan_phy_register); @@ -144,16 +190,31 @@ EXPORT_SYMBOL(wpan_phy_free); static int __init wpan_phy_class_init(void) { - return class_register(&wpan_phy_class); + int rc; + rc = class_register(&wpan_phy_class); + if (rc) + goto err; + + rc = ieee802154_nl_init(); + if (rc) + goto err_nl; + + return 0; +err_nl: + class_unregister(&wpan_phy_class); +err: + return rc; } subsys_initcall(wpan_phy_class_init); static void __exit wpan_phy_class_exit(void) { + ieee802154_nl_exit(); class_unregister(&wpan_phy_class); } module_exit(wpan_phy_class_exit); -MODULE_DESCRIPTION("IEEE 802.15.4 device class"); MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("IEEE 802.15.4 configuration interface"); +MODULE_AUTHOR("Dmitry Eremin-Solenikov"); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 57737b8d171..7d12c6a9b19 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -174,12 +174,12 @@ static int inet_autobind(struct sock *sk) /* We may need to bind the socket. */ lock_sock(sk); inet = inet_sk(sk); - if (!inet->num) { + if (!inet->inet_num) { if (sk->sk_prot->get_port(sk, 0)) { release_sock(sk); return -EAGAIN; } - inet->sport = htons(inet->num); + inet->inet_sport = htons(inet->inet_num); } release_sock(sk); return 0; @@ -262,7 +262,8 @@ static inline int inet_netns_ok(struct net *net, int protocol) * Create an inet socket. */ -static int inet_create(struct net *net, struct socket *sock, int protocol) +static int inet_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct inet_protosw *answer; @@ -325,7 +326,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) goto out_rcu_unlock; err = -EAFNOSUPPORT; @@ -354,7 +355,7 @@ lookup_protocol: inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; if (SOCK_RAW == sock->type) { - inet->num = protocol; + inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } @@ -364,7 +365,7 @@ lookup_protocol: else inet->pmtudisc = IP_PMTUDISC_WANT; - inet->id = 0; + inet->inet_id = 0; sock_init_data(sock, sk); @@ -381,13 +382,13 @@ lookup_protocol: sk_refcnt_debug_inc(sk); - if (inet->num) { + if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ - inet->sport = htons(inet->num); + inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ sk->sk_prot->hash(sk); } @@ -494,27 +495,27 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check these errors (active socket, double bind). */ err = -EINVAL; - if (sk->sk_state != TCP_CLOSE || inet->num) + if (sk->sk_state != TCP_CLOSE || inet->inet_num) goto out_release_sock; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) - inet->saddr = 0; /* Use device */ + inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ if (sk->sk_prot->get_port(sk, snum)) { - inet->saddr = inet->rcv_saddr = 0; + inet->inet_saddr = inet->inet_rcv_saddr = 0; err = -EADDRINUSE; goto out_release_sock; } - if (inet->rcv_saddr) + if (inet->inet_rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - inet->sport = htons(inet->num); - inet->daddr = 0; - inet->dport = 0; + inet->inet_sport = htons(inet->inet_num); + inet->inet_daddr = 0; + inet->inet_dport = 0; sk_dst_reset(sk); err = 0; out_release_sock: @@ -532,7 +533,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); - if (!inet_sk(sk)->num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); } @@ -685,21 +686,21 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr); sin->sin_family = AF_INET; if (peer) { - if (!inet->dport || + if (!inet->inet_dport || (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1)) return -ENOTCONN; - sin->sin_port = inet->dport; - sin->sin_addr.s_addr = inet->daddr; + sin->sin_port = inet->inet_dport; + sin->sin_addr.s_addr = inet->inet_daddr; } else { - __be32 addr = inet->rcv_saddr; + __be32 addr = inet->inet_rcv_saddr; if (!addr) - addr = inet->saddr; - sin->sin_port = inet->sport; + addr = inet->inet_saddr; + sin->sin_port = inet->inet_sport; sin->sin_addr.s_addr = addr; } memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); @@ -714,7 +715,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; /* We may need to bind the socket. */ - if (!inet_sk(sk)->num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->sendmsg(iocb, sk, msg, size); @@ -728,7 +729,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, struct sock *sk = sock->sk; /* We may need to bind the socket. */ - if (!inet_sk(sk)->num && inet_autobind(sk)) + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; if (sk->sk_prot->sendpage) @@ -931,7 +932,7 @@ static const struct proto_ops inet_sockraw_ops = { #endif }; -static struct net_proto_family inet_family_ops = { +static const struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, .owner = THIS_MODULE, @@ -947,7 +948,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, @@ -958,7 +958,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, - .capability = -1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }, @@ -969,7 +968,6 @@ static struct inet_protosw inetsw_array[] = .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, - .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } @@ -1059,9 +1057,9 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct inet_sock *inet = inet_sk(sk); int err; struct rtable *rt; - __be32 old_saddr = inet->saddr; + __be32 old_saddr = inet->inet_saddr; __be32 new_saddr; - __be32 daddr = inet->daddr; + __be32 daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; @@ -1071,7 +1069,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->sport, inet->dport, sk, 0); + inet->inet_sport, inet->inet_dport, sk, 0); if (err) return err; @@ -1087,7 +1085,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) __func__, &old_saddr, &new_saddr); } - inet->saddr = inet->rcv_saddr = new_saddr; + inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; /* * XXX The only one ugly spot where we need to @@ -1113,7 +1111,7 @@ int inet_sk_rebuild_header(struct sock *sk) return 0; /* Reroute. */ - daddr = inet->daddr; + daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; { @@ -1123,7 +1121,7 @@ int inet_sk_rebuild_header(struct sock *sk) .nl_u = { .ip4_u = { .daddr = daddr, - .saddr = inet->saddr, + .saddr = inet->inet_saddr, .tos = RT_CONN_FLAGS(sk), }, }, @@ -1131,8 +1129,8 @@ int inet_sk_rebuild_header(struct sock *sk) .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { - .sport = inet->sport, - .dport = inet->dport, + .sport = inet->inet_sport, + .dport = inet->inet_dport, }, }, }; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 5c662703eb1..7ed3e4ae93a 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,3 +1,4 @@ +#include <crypto/hash.h> #include <linux/err.h> #include <linux/module.h> #include <net/ip.h> @@ -5,10 +6,67 @@ #include <net/ah.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> -#include <linux/spinlock.h> +#include <linux/scatterlist.h> #include <net/icmp.h> #include <net/protocol.h> +struct ah_skb_cb { + struct xfrm_skb_cb xfrm; + void *tmp; +}; + +#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0])) + +static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags, + unsigned int size) +{ + unsigned int len; + + len = size + crypto_ahash_digestsize(ahash) + + (crypto_ahash_alignmask(ahash) & + ~(crypto_tfm_ctx_alignment() - 1)); + + len = ALIGN(len, crypto_tfm_ctx_alignment()); + + len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash); + len = ALIGN(len, __alignof__(struct scatterlist)); + + len += sizeof(struct scatterlist) * nfrags; + + return kmalloc(len, GFP_ATOMIC); +} + +static inline u8 *ah_tmp_auth(void *tmp, unsigned int offset) +{ + return tmp + offset; +} + +static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp, + unsigned int offset) +{ + return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1); +} + +static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash, + u8 *icv) +{ + struct ahash_request *req; + + req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash), + crypto_tfm_ctx_alignment()); + + ahash_request_set_tfm(req, ahash); + + return req; +} + +static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, + struct ahash_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_ahash_reqsize(ahash), + __alignof__(struct scatterlist)); +} /* Clear mutable options and find final destination to substitute * into IP header for icv calculation. Options are already checked @@ -54,20 +112,72 @@ static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr) return 0; } +static void ah_output_done(struct crypto_async_request *base, int err) +{ + u8 *icv; + struct iphdr *iph; + struct sk_buff *skb = base->data; + struct xfrm_state *x = skb_dst(skb)->xfrm; + struct ah_data *ahp = x->data; + struct iphdr *top_iph = ip_hdr(skb); + struct ip_auth_hdr *ah = ip_auth_hdr(skb); + int ihl = ip_hdrlen(skb); + + iph = AH_SKB_CB(skb)->tmp; + icv = ah_tmp_icv(ahp->ahash, iph, ihl); + memcpy(ah->auth_data, icv, ahp->icv_trunc_len); + + top_iph->tos = iph->tos; + top_iph->ttl = iph->ttl; + top_iph->frag_off = iph->frag_off; + if (top_iph->ihl != 5) { + top_iph->daddr = iph->daddr; + memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); + } + + err = ah->nexthdr; + + kfree(AH_SKB_CB(skb)->tmp); + xfrm_output_resume(skb, err); +} + static int ah_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + int nfrags; + int ihl; + u8 *icv; + struct sk_buff *trailer; + struct crypto_ahash *ahash; + struct ahash_request *req; + struct scatterlist *sg; struct iphdr *iph, *top_iph; struct ip_auth_hdr *ah; struct ah_data *ahp; - union { - struct iphdr iph; - char buf[60]; - } tmp_iph; + + ahp = x->data; + ahash = ahp->ahash; + + if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + goto out; + nfrags = err; skb_push(skb, -skb_network_offset(skb)); + ah = ip_auth_hdr(skb); + ihl = ip_hdrlen(skb); + + err = -ENOMEM; + iph = ah_alloc_tmp(ahash, nfrags, ihl); + if (!iph) + goto out; + + icv = ah_tmp_icv(ahash, iph, ihl); + req = ah_tmp_req(ahash, icv); + sg = ah_req_sg(ahash, req); + + memset(ah->auth_data, 0, ahp->icv_trunc_len); + top_iph = ip_hdr(skb); - iph = &tmp_iph.iph; iph->tos = top_iph->tos; iph->ttl = top_iph->ttl; @@ -78,10 +188,9 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); err = ip_clear_mutable_options(top_iph, &top_iph->daddr); if (err) - goto error; + goto out_free; } - ah = ip_auth_hdr(skb); ah->nexthdr = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_AH; @@ -91,20 +200,31 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ttl = 0; top_iph->check = 0; - ahp = x->data; ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); - spin_lock_bh(&x->lock); - err = ah_mac_digest(ahp, skb, ah->auth_data); - memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); - spin_unlock_bh(&x->lock); + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); - if (err) - goto error; + ahash_request_set_crypt(req, sg, icv, skb->len); + ahash_request_set_callback(req, 0, ah_output_done, skb); + + AH_SKB_CB(skb)->tmp = iph; + + err = crypto_ahash_digest(req); + if (err) { + if (err == -EINPROGRESS) + goto out; + + if (err == -EBUSY) + err = NET_XMIT_DROP; + goto out_free; + } + + memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; @@ -114,28 +234,67 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } - err = 0; - -error: +out_free: + kfree(iph); +out: return err; } +static void ah_input_done(struct crypto_async_request *base, int err) +{ + u8 *auth_data; + u8 *icv; + struct iphdr *work_iph; + struct sk_buff *skb = base->data; + struct xfrm_state *x = xfrm_input_state(skb); + struct ah_data *ahp = x->data; + struct ip_auth_hdr *ah = ip_auth_hdr(skb); + int ihl = ip_hdrlen(skb); + int ah_hlen = (ah->hdrlen + 2) << 2; + + work_iph = AH_SKB_CB(skb)->tmp; + auth_data = ah_tmp_auth(work_iph, ihl); + icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); + + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + if (err) + goto out; + + skb->network_header += ah_hlen; + memcpy(skb_network_header(skb), work_iph, ihl); + __skb_pull(skb, ah_hlen + ihl); + skb_set_transport_header(skb, -ihl); + + err = ah->nexthdr; +out: + kfree(AH_SKB_CB(skb)->tmp); + xfrm_input_resume(skb, err); +} + static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; int nexthdr; - int err = -EINVAL; - struct iphdr *iph; + int nfrags; + u8 *auth_data; + u8 *icv; + struct sk_buff *trailer; + struct crypto_ahash *ahash; + struct ahash_request *req; + struct scatterlist *sg; + struct iphdr *iph, *work_iph; struct ip_auth_hdr *ah; struct ah_data *ahp; - char work_buf[60]; + int err = -ENOMEM; if (!pskb_may_pull(skb, sizeof(*ah))) goto out; ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; + ahash = ahp->ahash; + nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; @@ -156,9 +315,24 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) ah = (struct ip_auth_hdr *)skb->data; iph = ip_hdr(skb); + ihl = ip_hdrlen(skb); + + if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + goto out; + nfrags = err; + + work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len); + if (!work_iph) + goto out; + + auth_data = ah_tmp_auth(work_iph, ihl); + icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); + req = ah_tmp_req(ahash, icv); + sg = ah_req_sg(ahash, req); - ihl = skb->data - skb_network_header(skb); - memcpy(work_buf, iph, ihl); + memcpy(work_iph, iph, ihl); + memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); + memset(ah->auth_data, 0, ahp->icv_trunc_len); iph->ttl = 0; iph->tos = 0; @@ -166,35 +340,44 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) iph->check = 0; if (ihl > sizeof(*iph)) { __be32 dummy; - if (ip_clear_mutable_options(iph, &dummy)) - goto out; + err = ip_clear_mutable_options(iph, &dummy); + if (err) + goto out_free; } - spin_lock(&x->lock); - { - u8 auth_data[MAX_AH_AUTH_LEN]; + skb_push(skb, ihl); - memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); - skb_push(skb, ihl); - err = ah_mac_digest(ahp, skb, ah->auth_data); - if (err) - goto unlock; - if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) - err = -EBADMSG; + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); + + ahash_request_set_crypt(req, sg, icv, skb->len); + ahash_request_set_callback(req, 0, ah_input_done, skb); + + AH_SKB_CB(skb)->tmp = work_iph; + + err = crypto_ahash_digest(req); + if (err) { + if (err == -EINPROGRESS) + goto out; + + if (err == -EBUSY) + err = NET_XMIT_DROP; + goto out_free; } -unlock: - spin_unlock(&x->lock); + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; if (err) - goto out; + goto out_free; skb->network_header += ah_hlen; - memcpy(skb_network_header(skb), work_buf, ihl); - skb->transport_header = skb->network_header; + memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); + skb_set_transport_header(skb, -ihl); - return nexthdr; + err = nexthdr; +out_free: + kfree (work_iph); out: return err; } @@ -222,7 +405,7 @@ static int ah_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; - struct crypto_hash *tfm; + struct crypto_ahash *ahash; if (!x->aalg) goto error; @@ -231,44 +414,40 @@ static int ah_init_state(struct xfrm_state *x) goto error; ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); - if (ahp == NULL) + if (!ahp) return -ENOMEM; - tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) + ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); + if (IS_ERR(ahash)) goto error; - ahp->tfm = tfm; - if (crypto_hash_setkey(tfm, x->aalg->alg_key, - (x->aalg->alg_key_len + 7) / 8)) + ahp->ahash = ahash; + if (crypto_ahash_setkey(ahash, x->aalg->alg_key, + (x->aalg->alg_key_len + 7) / 8)) goto error; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_hash(). + * after a successful crypto_alloc_ahash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_hash_digestsize(tfm)) { + crypto_ahash_digestsize(ahash)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_hash_digestsize(tfm), + x->aalg->alg_name, crypto_ahash_digestsize(ahash), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; - ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; + ahp->icv_trunc_len = x->aalg->alg_trunc_len/8; BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); - ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL); - if (!ahp->work_icv) - goto error; - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) @@ -279,8 +458,7 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { - kfree(ahp->work_icv); - crypto_free_hash(ahp->tfm); + crypto_free_ahash(ahp->ahash); kfree(ahp); } return -EINVAL; @@ -293,8 +471,7 @@ static void ah_destroy(struct xfrm_state *x) if (!ahp) return; - kfree(ahp->work_icv); - crypto_free_hash(ahp->tfm); + crypto_free_ahash(ahp->ahash); kfree(ahp); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4e80f336c0c..c95cd93acf2 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1240,7 +1240,7 @@ void __init arp_init(void) arp_proc_init(); #ifdef CONFIG_SYSCTL neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL, NULL); + NET_IPV4_NEIGH, "ipv4", NULL); #endif register_netdevice_notifier(&arp_netdev_notifier); } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 039cc1ffe97..1e029dc7545 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -2017,7 +2017,7 @@ req_setattr_failure: * values on failure. * */ -int cipso_v4_delopt(struct ip_options **opt_ptr) +static int cipso_v4_delopt(struct ip_options **opt_ptr) { int hdr_delta = 0; struct ip_options *opt = *opt_ptr; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 5e6c5a0f3fd..fb2465811b4 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -39,7 +39,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); oif = sk->sk_bound_dev_if; - saddr = inet->saddr; + saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { if (!oif) oif = inet->mc_index; @@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, - inet->sport, usin->sin_port, sk, 1); + inet->inet_sport, usin->sin_port, sk, 1); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -60,14 +60,14 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ip_rt_put(rt); return -EACCES; } - if (!inet->saddr) - inet->saddr = rt->rt_src; /* Update source address */ - if (!inet->rcv_saddr) - inet->rcv_saddr = rt->rt_src; - inet->daddr = rt->rt_dst; - inet->dport = usin->sin_port; + if (!inet->inet_saddr) + inet->inet_saddr = rt->rt_src; /* Update source address */ + if (!inet->inet_rcv_saddr) + inet->inet_rcv_saddr = rt->rt_src; + inet->inet_daddr = rt->rt_dst; + inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; - inet->id = jiffies; + inet->inet_id = jiffies; sk_dst_set(sk, &rt->u.dst); return(0); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5df2f6a0b0f..5cdbc102a41 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -140,11 +140,11 @@ void in_dev_finish_destroy(struct in_device *idev) #endif dev_put(dev); if (!idev->dead) - printk("Freeing alive in_device %p\n", idev); - else { + pr_err("Freeing alive in_device %p\n", idev); + else kfree(idev); - } } +EXPORT_SYMBOL(in_dev_finish_destroy); static struct in_device *inetdev_init(struct net_device *dev) { @@ -159,7 +159,8 @@ static struct in_device *inetdev_init(struct net_device *dev) sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; - if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) + in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl); + if (!in_dev->arp_parms) goto out_kfree; if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) dev_disable_lro(dev); @@ -405,13 +406,15 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; - read_lock(&dev_base_lock); - dev = __dev_get_by_index(net, ifindex); + + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); if (dev) in_dev = in_dev_get(dev); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return in_dev; } +EXPORT_SYMBOL(inetdev_by_index); /* Called only from RTNL semaphored context. No locks. */ @@ -557,7 +560,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg * Determine a default network mask, based on the IP address. */ -static __inline__ int inet_abc_len(__be32 addr) +static inline int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ @@ -646,13 +649,15 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) + dev = __dev_get_by_name(net, ifr.ifr_name); + if (!dev) goto done; if (colon) *colon = ':'; - if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { + in_dev = __in_dev_get_rtnl(dev); + if (in_dev) { if (tryaddrmatch) { /* Matthias Andree */ /* compare label and address (4.4BSD style) */ @@ -720,7 +725,8 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { ret = -ENOBUFS; - if ((ifa = inet_alloc_ifa()) == NULL) + ifa = inet_alloc_ifa(); + if (!ifa) break; if (colon) memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); @@ -822,10 +828,10 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len) struct ifreq ifr; int done = 0; - if (!in_dev || (ifa = in_dev->ifa_list) == NULL) + if (!in_dev) goto out; - for (; ifa; ifa = ifa->ifa_next) { + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { if (!buf) { done += sizeof(ifr); continue; @@ -875,36 +881,33 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) if (!addr) addr = ifa->ifa_local; } endfor_ifa(in_dev); -no_in_dev: - rcu_read_unlock(); if (addr) - goto out; + goto out_unlock; +no_in_dev: /* Not loopback addresses on loopback should be preferred in this case. It is importnat that lo is the first interface in dev_base list. */ - read_lock(&dev_base_lock); - rcu_read_lock(); - for_each_netdev(net, dev) { - if ((in_dev = __in_dev_get_rcu(dev)) == NULL) + for_each_netdev_rcu(net, dev) { + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) continue; for_primary_ifa(in_dev) { if (ifa->ifa_scope != RT_SCOPE_LINK && ifa->ifa_scope <= scope) { addr = ifa->ifa_local; - goto out_unlock_both; + goto out_unlock; } } endfor_ifa(in_dev); } -out_unlock_both: - read_unlock(&dev_base_lock); +out_unlock: rcu_read_unlock(); -out: return addr; } +EXPORT_SYMBOL(inet_select_addr); static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, __be32 local, int scope) @@ -940,7 +943,7 @@ static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst, } } endfor_ifa(in_dev); - return same? addr : 0; + return same ? addr : 0; } /* @@ -961,17 +964,16 @@ __be32 inet_confirm_addr(struct in_device *in_dev, return confirm_addr_indev(in_dev, dst, local, scope); net = dev_net(in_dev->dev); - read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(net, dev) { - if ((in_dev = __in_dev_get_rcu(dev))) { + for_each_netdev_rcu(net, dev) { + in_dev = __in_dev_get_rcu(dev); + if (in_dev) { addr = confirm_addr_indev(in_dev, dst, local, scope); if (addr) break; } } rcu_read_unlock(); - read_unlock(&dev_base_lock); return addr; } @@ -984,14 +986,16 @@ int register_inetaddr_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&inetaddr_chain, nb); } +EXPORT_SYMBOL(register_inetaddr_notifier); int unregister_inetaddr_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&inetaddr_chain, nb); } +EXPORT_SYMBOL(unregister_inetaddr_notifier); -/* Rename ifa_labels for a device name change. Make some effort to preserve existing - * alias numbering and to create unique labels if possible. +/* Rename ifa_labels for a device name change. Make some effort to preserve + * existing alias numbering and to create unique labels if possible. */ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) { @@ -1010,11 +1014,10 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev) sprintf(old, ":%d", named); dot = old; } - if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) { + if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) strcat(ifa->ifa_label, dot); - } else { + else strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot); - } skip: rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0); } @@ -1061,8 +1064,9 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (!inetdev_valid_mtu(dev->mtu)) break; if (dev->flags & IFF_LOOPBACK) { - struct in_ifaddr *ifa; - if ((ifa = inet_alloc_ifa()) != NULL) { + struct in_ifaddr *ifa = inet_alloc_ifa(); + + if (ifa) { ifa->ifa_local = ifa->ifa_address = htonl(INADDR_LOOPBACK); ifa->ifa_prefixlen = 8; @@ -1170,38 +1174,54 @@ nla_put_failure: static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int idx, ip_idx; + int h, s_h; + int idx, s_idx; + int ip_idx, s_ip_idx; struct net_device *dev; struct in_device *in_dev; struct in_ifaddr *ifa; - int s_ip_idx, s_idx = cb->args[0]; + struct hlist_head *head; + struct hlist_node *node; - s_ip_idx = ip_idx = cb->args[1]; - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - s_ip_idx = 0; - if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) - goto cont; - - for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; - ifa = ifa->ifa_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + s_ip_idx = ip_idx = cb->args[2]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + rcu_read_lock(); + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + s_ip_idx = 0; + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + goto cont; + + for (ifa = in_dev->ifa_list, ip_idx = 0; ifa; + ifa = ifa->ifa_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + if (inet_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - RTM_NEWADDR, NLM_F_MULTI) <= 0) - goto done; - } + RTM_NEWADDR, NLM_F_MULTI) <= 0) { + rcu_read_unlock(); + goto done; + } + } cont: - idx++; + idx++; + } + rcu_read_unlock(); } done: - cb->args[0] = idx; - cb->args[1] = ip_idx; + cb->args[0] = h; + cb->args[1] = idx; + cb->args[2] = ip_idx; return skb->len; } @@ -1239,18 +1259,18 @@ static void devinet_copy_dflt_conf(struct net *net, int i) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { struct in_device *in_dev; - rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); if (in_dev && !test_bit(i, in_dev->cnf.state)) in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i]; - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } +/* called with RTNL locked */ static void inet_forward_change(struct net *net) { struct net_device *dev; @@ -1259,7 +1279,6 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; - read_lock(&dev_base_lock); for_each_netdev(net, dev) { struct in_device *in_dev; if (on) @@ -1270,7 +1289,6 @@ static void inet_forward_change(struct net *net) IN_DEV_CONF_SET(in_dev, FORWARDING, on); rcu_read_unlock(); } - read_unlock(&dev_base_lock); } static int devinet_conf_proc(ctl_table *ctl, int write, @@ -1293,58 +1311,6 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_conf_sysctl(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - struct ipv4_devconf *cnf; - struct net *net; - int *valp = table->data; - int new; - int i; - - if (!newval || !newlen) - return 0; - - if (newlen != sizeof(int)) - return -EINVAL; - - if (get_user(new, (int __user *)newval)) - return -EFAULT; - - if (new == *valp) - return 0; - - if (oldval && oldlenp) { - size_t len; - - if (get_user(len, oldlenp)) - return -EFAULT; - - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, valp, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - *valp = new; - - cnf = table->extra1; - net = table->extra2; - i = (int *)table->data - cnf->data; - - set_bit(i, cnf->state); - - if (cnf == net->ipv4.devconf_dflt) - devinet_copy_dflt_conf(net, i); - - return 1; -} - static int devinet_sysctl_forward(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1390,47 +1356,28 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush_strategy(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); - struct net *net = table->extra2; - - if (ret == 1) - rt_cache_flush(net, 0); - - return ret; -} - - -#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \ +#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \ { \ - .ctl_name = NET_IPV4_CONF_ ## attr, \ .procname = name, \ .data = ipv4_devconf.data + \ NET_IPV4_CONF_ ## attr - 1, \ .maxlen = sizeof(int), \ .mode = mval, \ .proc_handler = proc, \ - .strategy = sysctl, \ .extra1 = &ipv4_devconf, \ } #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \ - devinet_conf_sysctl) + DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc) #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \ - devinet_conf_sysctl) + DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc) -#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \ - DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl) +#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \ + DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc) #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \ - DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \ - ipv4_doint_and_flush_strategy) + DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush) static struct devinet_sysctl_table { struct ctl_table_header *sysctl_header; @@ -1439,8 +1386,7 @@ static struct devinet_sysctl_table { } devinet_sysctl = { .devinet_vars = { DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding", - devinet_sysctl_forward, - devinet_conf_sysctl), + devinet_sysctl_forward), DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"), @@ -1450,6 +1396,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"), DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE, "accept_source_route"), + DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"), DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"), DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"), @@ -1471,7 +1418,7 @@ static struct devinet_sysctl_table { }; static int __devinet_sysctl_register(struct net *net, char *dev_name, - int ctl_name, struct ipv4_devconf *p) + struct ipv4_devconf *p) { int i; struct devinet_sysctl_table *t; @@ -1479,9 +1426,9 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, #define DEVINET_CTL_PATH_DEV 3 struct ctl_path devinet_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "conf", .ctl_name = NET_IPV4_CONF, }, + { .procname = "net", }, + { .procname = "ipv4", }, + { .procname = "conf", }, { /* to be set */ }, { }, }; @@ -1506,7 +1453,6 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, goto free; devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name; - devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name; t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path, t->devinet_vars); @@ -1540,9 +1486,9 @@ static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf) static void devinet_sysctl_register(struct in_device *idev) { neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4, - NET_IPV4_NEIGH, "ipv4", NULL, NULL); + NET_IPV4_NEIGH, "ipv4", NULL); __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name, - idev->dev->ifindex, &idev->cnf); + &idev->cnf); } static void devinet_sysctl_unregister(struct in_device *idev) @@ -1553,14 +1499,12 @@ static void devinet_sysctl_unregister(struct in_device *idev) static struct ctl_table ctl_forward_entry[] = { { - .ctl_name = NET_IPV4_FORWARD, .procname = "ip_forward", .data = &ipv4_devconf.data[ NET_IPV4_CONF_FORWARDING - 1], .maxlen = sizeof(int), .mode = 0644, .proc_handler = devinet_sysctl_forward, - .strategy = devinet_conf_sysctl, .extra1 = &ipv4_devconf, .extra2 = &init_net, }, @@ -1568,8 +1512,8 @@ static struct ctl_table ctl_forward_entry[] = { }; static __net_initdata struct ctl_path net_ipv4_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { }, }; #endif @@ -1587,7 +1531,7 @@ static __net_init int devinet_init_net(struct net *net) all = &ipv4_devconf; dflt = &ipv4_devconf_dflt; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL); if (all == NULL) goto err_alloc_all; @@ -1608,13 +1552,11 @@ static __net_init int devinet_init_net(struct net *net) } #ifdef CONFIG_SYSCTL - err = __devinet_sysctl_register(net, "all", - NET_PROTO_CONF_ALL, all); + err = __devinet_sysctl_register(net, "all", all); if (err < 0) goto err_reg_all; - err = __devinet_sysctl_register(net, "default", - NET_PROTO_CONF_DEFAULT, dflt); + err = __devinet_sysctl_register(net, "default", dflt); if (err < 0) goto err_reg_dflt; @@ -1680,8 +1622,3 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); } -EXPORT_SYMBOL(in_dev_finish_destroy); -EXPORT_SYMBOL(inet_select_addr); -EXPORT_SYMBOL(inetdev_by_index); -EXPORT_SYMBOL(register_inetaddr_notifier); -EXPORT_SYMBOL(unregister_inetaddr_notifier); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 12f7287e902..1948895beb6 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -530,7 +530,7 @@ static int esp_init_authenc(struct xfrm_state *x) } err = crypto_aead_setauthsize( - aead, aalg_desc->uinfo.auth.icv_truncbits / 8); + aead, x->aalg->alg_trunc_len / 8); if (err) goto free_key; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index aa00398be80..3323168ee52 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -125,7 +125,7 @@ void fib_select_default(struct net *net, #endif tb = fib_get_table(net, table); if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - tb->tb_select_default(tb, flp, res); + fib_table_select_default(tb, flp, res); } static void fib_flush(struct net *net) @@ -139,7 +139,7 @@ static void fib_flush(struct net *net) for (h = 0; h < FIB_TABLE_HASHSZ; h++) { head = &net->ipv4.fib_table_hash[h]; hlist_for_each_entry(tb, node, head, tb_hlist) - flushed += tb->tb_flush(tb); + flushed += fib_table_flush(tb); } if (flushed) @@ -162,7 +162,7 @@ struct net_device * ip_dev_find(struct net *net, __be32 addr) #endif local_table = fib_get_table(net, RT_TABLE_LOCAL); - if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) + if (!local_table || fib_table_lookup(local_table, &fl, &res)) return NULL; if (res.type != RTN_LOCAL) goto out; @@ -200,7 +200,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; - if (!local_table->tb_lookup(local_table, &fl, &res)) { + if (!fib_table_lookup(local_table, &fl, &res)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; fib_res_put(&res); @@ -241,16 +241,17 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, .iif = oif }; struct fib_result res; - int no_addr, rpf; + int no_addr, rpf, accept_local; int ret; struct net *net; - no_addr = rpf = 0; + no_addr = rpf = accept_local = 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; rpf = IN_DEV_RPFILTER(in_dev); + accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); } rcu_read_unlock(); @@ -260,8 +261,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, net = dev_net(dev); if (fib_lookup(net, &fl, &res)) goto last_resort; - if (res.type != RTN_UNICAST) - goto e_inval_res; + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !accept_local) + goto e_inval_res; + } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -476,13 +479,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = tb->tb_delete(tb, &cfg); + err = fib_table_delete(tb, &cfg); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = tb->tb_insert(tb, &cfg); + err = fib_table_insert(tb, &cfg); else err = -ENOBUFS; } @@ -597,7 +600,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar goto errout; } - err = tb->tb_delete(tb, &cfg); + err = fib_table_delete(tb, &cfg); errout: return err; } @@ -619,7 +622,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *ar goto errout; } - err = tb->tb_insert(tb, &cfg); + err = fib_table_insert(tb, &cfg); errout: return err; } @@ -650,7 +653,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (dumped) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); - if (tb->tb_dump(tb, skb, cb) < 0) + if (fib_table_dump(tb, skb, cb) < 0) goto out; dumped = 1; next: @@ -704,9 +707,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - tb->tb_insert(tb, &cfg); + fib_table_insert(tb, &cfg); else - tb->tb_delete(tb, &cfg); + fib_table_delete(tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -835,7 +838,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) local_bh_disable(); frn->tb_id = tb->tb_id; - frn->err = tb->tb_lookup(tb, &fl, &res); + frn->err = fib_table_lookup(tb, &fl, &res); if (!frn->err) { frn->prefixlen = res.prefixlen; @@ -895,11 +898,11 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, int force) +static void fib_disable_ip(struct net_device *dev, int force, int delay) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(dev_net(dev), delay); arp_ifdown(dev); } @@ -922,7 +925,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. Disable IP. */ - fib_disable_ip(dev, 1); + fib_disable_ip(dev, 1, 0); } else { rt_cache_flush(dev_net(dev), -1); } @@ -937,7 +940,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct in_device *in_dev = __in_dev_get_rtnl(dev); if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, 2); + fib_disable_ip(dev, 2, -1); return NOTIFY_DONE; } @@ -955,12 +958,15 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: - fib_disable_ip(dev, 0); + fib_disable_ip(dev, 0, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: rt_cache_flush(dev_net(dev), 0); break; + case NETDEV_UNREGISTER_BATCH: + rt_cache_flush_batch(); + break; } return NOTIFY_DONE; } @@ -1012,7 +1018,7 @@ static void __net_exit ip_fib_net_exit(struct net *net) head = &net->ipv4.fib_table_hash[i]; hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { hlist_del(node); - tb->tb_flush(tb); + fib_table_flush(tb); kfree(tb); } } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index ecd39454235..14972017b9c 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -242,8 +242,8 @@ fn_new_zone(struct fn_hash *table, int z) return fz; } -static int -fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) +int fib_table_lookup(struct fib_table *tb, + const struct flowi *flp, struct fib_result *res) { int err; struct fn_zone *fz; @@ -274,8 +274,8 @@ out: return err; } -static void -fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) +void fib_table_select_default(struct fib_table *tb, + const struct flowi *flp, struct fib_result *res) { int order, last_idx; struct hlist_node *node; @@ -366,7 +366,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) return NULL; } -static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fib_node *new_f = NULL; @@ -544,8 +544,7 @@ out: return err; } - -static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *)tb->tb_data; struct fib_node *f; @@ -662,7 +661,7 @@ static int fn_flush_list(struct fn_zone *fz, int idx) return found; } -static int fn_hash_flush(struct fib_table *tb) +int fib_table_flush(struct fib_table *tb) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fn_zone *fz; @@ -743,7 +742,8 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, return skb->len; } -static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) +int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, + struct netlink_callback *cb) { int m, s_m; struct fn_zone *fz; @@ -787,12 +787,7 @@ struct fib_table *fib_hash_table(u32 id) tb->tb_id = id; tb->tb_default = -1; - tb->tb_lookup = fn_hash_lookup; - tb->tb_insert = fn_hash_insert; - tb->tb_delete = fn_hash_delete; - tb->tb_flush = fn_hash_flush; - tb->tb_select_default = fn_hash_select_default; - tb->tb_dump = fn_hash_dump; + memset(tb->tb_data, 0, sizeof(struct fn_hash)); return tb; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 92d9d97ec5e..ca2d07b1c70 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -94,7 +94,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) goto errout; - err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); + err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); if (err > 0) err = -EAGAIN; errout: @@ -284,7 +284,7 @@ static int fib_default_rules_init(struct fib_rules_ops *ops) { int err; - err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, FIB_RULE_PERMANENT); + err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0); if (err < 0) return err; err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0); @@ -301,13 +301,9 @@ int __net_init fib4_rules_init(struct net *net) int err; struct fib_rules_ops *ops; - ops = kmemdup(&fib4_rules_ops_template, sizeof(*ops), GFP_KERNEL); - if (ops == NULL) - return -ENOMEM; - INIT_LIST_HEAD(&ops->rules_list); - ops->fro_net = net; - - fib_rules_register(ops); + ops = fib_rules_register(&fib4_rules_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); err = fib_default_rules_init(ops); if (err < 0) @@ -318,12 +314,10 @@ int __net_init fib4_rules_init(struct net *net) fail: /* also cleans all rules already added */ fib_rules_unregister(ops); - kfree(ops); return err; } void __net_exit fib4_rules_exit(struct net *net) { fib_rules_unregister(net->ipv4.rules_ops); - kfree(net->ipv4.rules_ops); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9b096d6ff3f..ed19aa6919c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -228,7 +228,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) head = &fib_info_hash[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { - if (fi->fib_net != nfi->fib_net) + if (!net_eq(fi->fib_net, nfi->fib_net)) continue; if (fi->fib_nhs != nfi->fib_nhs) continue; @@ -1047,7 +1047,7 @@ int fib_sync_down_addr(struct net *net, __be32 local) return 0; hlist_for_each_entry(fi, node, head, fib_lhash) { - if (fi->fib_net != net) + if (!net_eq(fi->fib_net, net)) continue; if (fi->fib_prefsrc == local) { fi->fib_flags |= RTNH_F_DEAD; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 291bdf50a21..af5d8979286 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1174,7 +1174,7 @@ done: /* * Caller must hold RTNL. */ -static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *new_fa; @@ -1373,8 +1373,8 @@ static int check_leaf(struct trie *t, struct leaf *l, return 1; } -static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res) +int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, + struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; int ret; @@ -1595,7 +1595,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) /* * Caller must hold RTNL. */ -static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; @@ -1786,7 +1786,7 @@ static struct leaf *trie_leafindex(struct trie *t, int index) /* * Caller must hold RTNL. */ -static int fn_trie_flush(struct fib_table *tb) +int fib_table_flush(struct fib_table *tb) { struct trie *t = (struct trie *) tb->tb_data; struct leaf *l, *ll = NULL; @@ -1807,9 +1807,9 @@ static int fn_trie_flush(struct fib_table *tb) return found; } -static void fn_trie_select_default(struct fib_table *tb, - const struct flowi *flp, - struct fib_result *res) +void fib_table_select_default(struct fib_table *tb, + const struct flowi *flp, + struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; int order, last_idx; @@ -1952,8 +1952,8 @@ static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb, return skb->len; } -static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) +int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, + struct netlink_callback *cb) { struct leaf *l; struct trie *t = (struct trie *) tb->tb_data; @@ -2020,12 +2020,6 @@ struct fib_table *fib_hash_table(u32 id) tb->tb_id = id; tb->tb_default = -1; - tb->tb_lookup = fn_trie_lookup; - tb->tb_insert = fn_trie_insert; - tb->tb_delete = fn_trie_delete; - tb->tb_flush = fn_trie_flush; - tb->tb_select_default = fn_trie_select_default; - tb->tb_dump = fn_trie_dump; t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5bc13fe816d..fe11f60ce41 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -501,15 +501,16 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (!(rt->rt_flags & RTCF_LOCAL)) { struct net_device *dev = NULL; + rcu_read_lock(); if (rt->fl.iif && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index(net, rt->fl.iif); + dev = dev_get_by_index_rcu(net, rt->fl.iif); - if (dev) { + if (dev) saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); - dev_put(dev); - } else + else saddr = 0; + rcu_read_unlock(); } tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | @@ -1165,6 +1166,10 @@ static int __net_init icmp_sk_init(struct net *net) sk->sk_sndbuf = (2 * ((64 * 1024) + sizeof(struct sk_buff))); + /* + * Speedup sock_wfree() + */ + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); inet_sk(sk)->pmtudisc = IP_PMTUDISC_DONT; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d41e5de79a8..76c08402c93 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1899,8 +1899,9 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct err = -EADDRNOTAVAIL; for (pmc=inet->mc_list; pmc; pmc=pmc->next) { - if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr - && pmc->multi.imr_ifindex == imr.imr_ifindex) + if ((pmc->multi.imr_multiaddr.s_addr == + imr.imr_multiaddr.s_addr) && + (pmc->multi.imr_ifindex == imr.imr_ifindex)) break; } if (!pmc) { /* must have a prior join */ @@ -2311,9 +2312,10 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); state->in_dev = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct in_device *in_dev; - in_dev = in_dev_get(state->dev); + + in_dev = __in_dev_get_rcu(state->dev); if (!in_dev) continue; read_lock(&in_dev->mc_list_lock); @@ -2323,7 +2325,6 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) break; } read_unlock(&in_dev->mc_list_lock); - in_dev_put(in_dev); } return im; } @@ -2333,16 +2334,15 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); im = im->next; while (!im) { - if (likely(state->in_dev != NULL)) { + if (likely(state->in_dev != NULL)) read_unlock(&state->in_dev->mc_list_lock); - in_dev_put(state->in_dev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->in_dev = NULL; break; } - state->in_dev = in_dev_get(state->dev); + state->in_dev = __in_dev_get_rcu(state->dev); if (!state->in_dev) continue; read_lock(&state->in_dev->mc_list_lock); @@ -2361,9 +2361,9 @@ static struct ip_mc_list *igmp_mc_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp_mc_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(rcu) { - read_lock(&dev_base_lock); + rcu_read_lock(); return *pos ? igmp_mc_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2379,16 +2379,15 @@ static void *igmp_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp_mc_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(rcu) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); if (likely(state->in_dev != NULL)) { read_unlock(&state->in_dev->mc_list_lock); - in_dev_put(state->in_dev); state->in_dev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp_mc_seq_show(struct seq_file *seq, void *v) @@ -2462,9 +2461,9 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct in_device *idev; - idev = in_dev_get(state->dev); + idev = __in_dev_get_rcu(state->dev); if (unlikely(idev == NULL)) continue; read_lock(&idev->mc_list_lock); @@ -2480,7 +2479,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) spin_unlock_bh(&im->lock); } read_unlock(&idev->mc_list_lock); - in_dev_put(idev); } return psf; } @@ -2494,16 +2492,15 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l spin_unlock_bh(&state->im->lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock(&state->idev->mc_list_lock); - in_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; goto out; } - state->idev = in_dev_get(state->dev); + state->idev = __in_dev_get_rcu(state->dev); if (!state->idev) continue; read_lock(&state->idev->mc_list_lock); @@ -2528,8 +2525,9 @@ static struct ip_sf_list *igmp_mcf_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp_mcf_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) { - read_lock(&dev_base_lock); + rcu_read_lock(); return *pos ? igmp_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2545,6 +2543,7 @@ static void *igmp_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) { struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq); if (likely(state->im != NULL)) { @@ -2553,11 +2552,10 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) } if (likely(state->idev != NULL)) { read_unlock(&state->idev->mc_list_lock); - in_dev_put(state->idev); state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp_mcf_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 537731b3bcb..ee16475f8fc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -112,7 +112,7 @@ again: hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (ib_net(tb) == net && tb->port == rover) { + if (net_eq(ib_net(tb), net) && tb->port == rover) { if (tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN && @@ -158,7 +158,7 @@ have_snum: hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (ib_net(tb) == net && tb->port == snum) + if (net_eq(ib_net(tb), net) && tb->port == snum) goto tb_found; } tb = NULL; @@ -358,6 +358,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; struct flowi fl = { .oif = sk->sk_bound_dev_if, + .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = ((opt && opt->srr) ? opt->faddr : @@ -367,7 +368,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = - { .sport = inet_sk(sk)->sport, + { .sport = inet_sk(sk)->inet_sport, .dport = ireq->rmt_port } } }; struct net *net = sock_net(sk); @@ -530,7 +531,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, &expire, &resend); if (!expire && (!resend || - !req->rsk_ops->rtx_syn_ack(parent, req) || + !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || inet_rsk(req)->acked)) { unsigned long timeo; @@ -574,9 +575,9 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newsk->sk_state = TCP_SYN_RECV; newicsk->icsk_bind_hash = NULL; - inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; - inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); - inet_sk(newsk)->sport = inet_rsk(req)->loc_port; + inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; @@ -607,8 +608,8 @@ void inet_csk_destroy_sock(struct sock *sk) /* It cannot be in hash table! */ WARN_ON(!sk_unhashed(sk)); - /* If it has not 0 inet_sk(sk)->num, it must be bound */ - WARN_ON(inet_sk(sk)->num && !inet_csk(sk)->icsk_bind_hash); + /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */ + WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -643,8 +644,8 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) * after validation is complete. */ sk->sk_state = TCP_LISTEN; - if (!sk->sk_prot->get_port(sk, inet->num)) { - inet->sport = htons(inet->num); + if (!sk->sk_prot->get_port(sk, inet->inet_num)) { + inet->inet_sport = htons(inet->inet_num); sk_dst_reset(sk); sk->sk_prot->hash(sk); @@ -720,8 +721,8 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) const struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; - sin->sin_addr.s_addr = inet->daddr; - sin->sin_port = inet->dport; + sin->sin_addr.s_addr = inet->inet_daddr; + sin->sin_port = inet->inet_dport; } EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index a706a47f4db..bdb78dd180c 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -116,10 +116,10 @@ static int inet_csk_diag_fill(struct sock *sk, r->id.idiag_cookie[0] = (u32)(unsigned long)sk; r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); - r->id.idiag_sport = inet->sport; - r->id.idiag_dport = inet->dport; - r->id.idiag_src[0] = inet->rcv_saddr; - r->id.idiag_dst[0] = inet->daddr; + r->id.idiag_sport = inet->inet_sport; + r->id.idiag_dport = inet->inet_dport; + r->id.idiag_src[0] = inet->inet_rcv_saddr; + r->id.idiag_dst[0] = inet->inet_daddr; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { @@ -504,11 +504,11 @@ static int inet_csk_diag_dump(struct sock *sk, } else #endif { - entry.saddr = &inet->rcv_saddr; - entry.daddr = &inet->daddr; + entry.saddr = &inet->inet_rcv_saddr; + entry.daddr = &inet->inet_daddr; } - entry.sport = inet->num; - entry.dport = ntohs(inet->dport); + entry.sport = inet->inet_num; + entry.dport = ntohs(inet->inet_dport); entry.userlocks = sk->sk_userlocks; if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) @@ -584,7 +584,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, if (tmo < 0) tmo = 0; - r->id.idiag_sport = inet->sport; + r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = ireq->rmt_port; r->id.idiag_src[0] = ireq->loc_addr; r->id.idiag_dst[0] = ireq->rmt_addr; @@ -639,7 +639,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { bc = (struct rtattr *)(r + 1); - entry.sport = inet->num; + entry.sport = inet->inet_num; entry.userlocks = sk->sk_userlocks; } @@ -732,7 +732,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - if (r->id.idiag_sport != inet->sport && + if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_listen; @@ -774,7 +774,7 @@ skip_listen_ht: if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) goto unlock; - for (i = s_i; i < hashinfo->ehash_size; i++) { + for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; @@ -797,10 +797,10 @@ skip_listen_ht: goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; - if (r->id.idiag_sport != inet->sport && + if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_normal; - if (r->id.idiag_dport != inet->dport && + if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next_normal; if (inet_csk_diag_dump(sk, skb, cb) < 0) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 625cc5f64c9..2b79377b468 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -64,7 +64,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, atomic_inc(&hashinfo->bsockets); - inet_sk(sk)->num = snum; + inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &tb->owners); tb->num_owners++; inet_csk(sk)->icsk_bind_hash = tb; @@ -76,7 +76,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num, + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; @@ -88,7 +88,7 @@ static void __inet_put_port(struct sock *sk) __sk_del_bind_node(sk); tb->num_owners--; inet_csk(sk)->icsk_bind_hash = NULL; - inet_sk(sk)->num = 0; + inet_sk(sk)->inet_num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } @@ -105,7 +105,7 @@ EXPORT_SYMBOL(inet_put_port); void __inet_inherit_port(struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num, + const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; @@ -126,9 +126,9 @@ static inline int compute_score(struct sock *sk, struct net *net, int score = -1; struct inet_sock *inet = inet_sk(sk); - if (net_eq(sock_net(sk), net) && inet->num == hnum && + if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && !ipv6_only_sock(sk)) { - __be32 rcv_saddr = inet->rcv_saddr; + __be32 rcv_saddr = inet->inet_rcv_saddr; score = sk->sk_family == PF_INET ? 1 : 0; if (rcv_saddr) { if (rcv_saddr != daddr) @@ -209,7 +209,7 @@ struct sock * __inet_lookup_established(struct net *net, * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - unsigned int slot = hash & (hashinfo->ehash_size - 1); + unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; rcu_read_lock(); @@ -273,18 +273,20 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - __be32 daddr = inet->rcv_saddr; - __be32 saddr = inet->daddr; + __be32 daddr = inet->inet_rcv_saddr; + __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) - const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); - unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); + unsigned int hash = inet_ehashfn(net, daddr, lport, + saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; + int twrefcnt = 0; spin_lock(lock); @@ -312,25 +314,28 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, unique: /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); + inet->inet_num = lport; + inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); + if (tw) { + twrefcnt = inet_twsk_unhash(tw); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + } spin_unlock(lock); + if (twrefcnt) + inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } - return 0; not_unique: @@ -341,16 +346,18 @@ not_unique: static inline u32 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, - inet->dport); + return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, + inet->inet_daddr, + inet->inet_dport); } -void __inet_hash_nolisten(struct sock *sk) +int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; spinlock_t *lock; struct inet_ehash_bucket *head; + int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); @@ -361,8 +368,13 @@ void __inet_hash_nolisten(struct sock *sk) spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); + if (tw) { + WARN_ON(sk->sk_hash != tw->tw_hash); + twrefcnt = inet_twsk_unhash(tw); + } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + return twrefcnt; } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); @@ -372,7 +384,7 @@ static void __inet_hash(struct sock *sk) struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { - __inet_hash_nolisten(sk); + __inet_hash_nolisten(sk, NULL); return; } @@ -421,14 +433,15 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **), - void (*hash)(struct sock *sk)) + int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)) { struct inet_hashinfo *hinfo = death_row->hashinfo; - const unsigned short snum = inet_sk(sk)->num; + const unsigned short snum = inet_sk(sk)->inet_num; struct inet_bind_hashbucket *head; struct inet_bind_bucket *tb; int ret; struct net *net = sock_net(sk); + int twrefcnt = 1; if (!snum) { int i, remaining, low, high, port; @@ -452,7 +465,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (ib_net(tb) == net && tb->port == port) { + if (net_eq(ib_net(tb), net) && + tb->port == port) { if (tb->fastreuse >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); @@ -485,14 +499,19 @@ ok: /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { - inet_sk(sk)->sport = htons(port); - hash(sk); + inet_sk(sk)->inet_sport = htons(port); + twrefcnt += hash(sk, tw); } + if (tw) + twrefcnt += inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head->lock); if (tw) { inet_twsk_deschedule(tw, death_row); - inet_twsk_put(tw); + while (twrefcnt) { + twrefcnt--; + inet_twsk_put(tw); + } } ret = 0; @@ -503,7 +522,7 @@ ok: tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { - hash(sk); + hash(sk, NULL); spin_unlock_bh(&head->lock); return 0; } else { diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 6a667dae315..47038cb6c13 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -64,15 +64,15 @@ static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, if (iph->ihl != IPH_LEN_WO_OPTIONS) return -1; - if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack - || tcph->rst || tcph->syn || tcph->fin) + if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack || + tcph->rst || tcph->syn || tcph->fin) return -1; if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) return -1; - if (tcph->doff != TCPH_LEN_WO_OPTIONS - && tcph->doff != TCPH_LEN_W_TIMESTAMP) + if (tcph->doff != TCPH_LEN_WO_OPTIONS && + tcph->doff != TCPH_LEN_W_TIMESTAMP) return -1; /* check tcp options (only timestamp allowed) */ @@ -262,10 +262,10 @@ static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, struct iphdr *iph, struct tcphdr *tcph) { - if ((lro_desc->iph->saddr != iph->saddr) - || (lro_desc->iph->daddr != iph->daddr) - || (lro_desc->tcph->source != tcph->source) - || (lro_desc->tcph->dest != tcph->dest)) + if ((lro_desc->iph->saddr != iph->saddr) || + (lro_desc->iph->daddr != iph->daddr) || + (lro_desc->tcph->source != tcph->source) || + (lro_desc->tcph->dest != tcph->dest)) return -1; return 0; } @@ -339,9 +339,9 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, u64 flags; int vlan_hdr_len = 0; - if (!lro_mgr->get_skb_header - || lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, - &flags, priv)) + if (!lro_mgr->get_skb_header || + lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, + &flags, priv)) goto out; if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) @@ -351,8 +351,8 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, if (!lro_desc) goto out; - if ((skb->protocol == htons(ETH_P_8021Q)) - && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) + if ((skb->protocol == htons(ETH_P_8021Q)) && + !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) vlan_hdr_len = VLAN_HLEN; if (!lro_desc->active) { /* start new lro session */ @@ -446,9 +446,9 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, int hdr_len = LRO_MAX_PG_HLEN; int vlan_hdr_len = 0; - if (!lro_mgr->get_frag_header - || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, - (void *)&tcph, &flags, priv)) { + if (!lro_mgr->get_frag_header || + lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, + (void *)&tcph, &flags, priv)) { mac_hdr = page_address(frags->page) + frags->page_offset; goto out1; } @@ -472,8 +472,8 @@ static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, if (!skb) goto out; - if ((skb->protocol == htons(ETH_P_8021Q)) - && !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) + if ((skb->protocol == htons(ETH_P_8021Q)) && + !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) vlan_hdr_len = VLAN_HLEN; iph = (void *)(skb->data + vlan_hdr_len); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 13f0781f35c..cc94cc2d8b2 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,40 +14,87 @@ #include <net/inet_timewait_sock.h> #include <net/ip.h> + +/** + * inet_twsk_unhash - unhash a timewait socket from established hash + * @tw: timewait socket + * + * unhash a timewait socket from established hash, if hashed. + * ehash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. + */ +int inet_twsk_unhash(struct inet_timewait_sock *tw) +{ + if (hlist_nulls_unhashed(&tw->tw_node)) + return 0; + + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ + return 1; +} + +/** + * inet_twsk_bind_unhash - unhash a timewait socket from bind hash + * @tw: timewait socket + * @hashinfo: hashinfo pointer + * + * unhash a timewait socket from bind hash, if hashed. + * bind hash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. + */ +int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) +{ + struct inet_bind_bucket *tb = tw->tw_tb; + + if (!tb) + return 0; + + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ + return 1; +} + /* Must be called with locally disabled BHs. */ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; - struct inet_bind_bucket *tb; + int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spin_lock(lock); - if (hlist_nulls_unhashed(&tw->tw_node)) { - spin_unlock(lock); - return; - } - hlist_nulls_del_rcu(&tw->tw_node); - sk_nulls_node_init(&tw->tw_node); + refcnt = inet_twsk_unhash(tw); spin_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); - tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + refcnt += inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); + #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - inet_twsk_put(tw); + while (refcnt) { + inet_twsk_put(tw); + refcnt--; + } } static noinline void inet_twsk_free(struct inet_timewait_sock *tw) @@ -86,7 +133,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; @@ -101,16 +148,24 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, * Should be done before removing sk from established chain * because readers are lockless and search established first. */ - atomic_inc(&tw->tw_refcnt); inet_twsk_add_node_rcu(tw, &ehead->twchain); /* Step 3: Remove SK from established hash. */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + /* + * Notes : + * - We initially set tw_refcnt to 0 in inet_twsk_alloc() + * - We add one reference for the bhash link + * - We add one reference for the ehash link + * - We want this refcnt update done before allowing other + * threads to find this tw in ehash chain. + */ + atomic_add(1 + 1 + 1, &tw->tw_refcnt); + spin_unlock(lock); } - EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) @@ -124,14 +179,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat kmemcheck_annotate_bitfield(tw, flags); /* Give us an identity. */ - tw->tw_daddr = inet->daddr; - tw->tw_rcv_saddr = inet->rcv_saddr; + tw->tw_daddr = inet->inet_daddr; + tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; - tw->tw_num = inet->num; + tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; - tw->tw_sport = inet->sport; - tw->tw_dport = inet->dport; + tw->tw_sport = inet->inet_sport; + tw->tw_dport = inet->inet_dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; @@ -139,14 +194,18 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); - atomic_set(&tw->tw_refcnt, 1); + /* + * Because we use RCU lookups, we should not set tw_refcnt + * to a non null value before everything is setup for this + * timewait socket. + */ + atomic_set(&tw->tw_refcnt, 0); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); } return tw; } - EXPORT_SYMBOL_GPL(inet_twsk_alloc); /* Returns non-zero if quota exceeded. */ @@ -225,7 +284,6 @@ void inet_twdr_hangman(unsigned long data) out: spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_hangman); void inet_twdr_twkill_work(struct work_struct *work) @@ -256,7 +314,6 @@ void inet_twdr_twkill_work(struct work_struct *work) spin_unlock_bh(&twdr->death_lock); } } - EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); /* These are always called from BH context. See callers in @@ -276,7 +333,6 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw, spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); } - EXPORT_SYMBOL(inet_twsk_deschedule); void inet_twsk_schedule(struct inet_timewait_sock *tw, @@ -357,7 +413,6 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, mod_timer(&twdr->tw_timer, jiffies + twdr->period); spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twsk_schedule); void inet_twdr_twcal_tick(unsigned long data) @@ -418,40 +473,48 @@ out: #endif spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, +void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { struct inet_timewait_sock *tw; struct sock *sk; struct hlist_nulls_node *node; - int h; + unsigned int slot; - local_bh_disable(); - for (h = 0; h < (hashinfo->ehash_size); h++) { - struct inet_ehash_bucket *head = - inet_ehash_bucket(hashinfo, h); - spinlock_t *lock = inet_ehash_lockp(hashinfo, h); + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + rcu_read_lock(); restart: - spin_lock(lock); - sk_nulls_for_each(sk, node, &head->twchain) { - + sk_nulls_for_each_rcu(sk, node, &head->twchain) { tw = inet_twsk(sk); - if (!net_eq(twsk_net(tw), net) || - tw->tw_family != family) + if ((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count)) continue; - atomic_inc(&tw->tw_refcnt); - spin_unlock(lock); + if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) + continue; + + if (unlikely((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); - - goto restart; + goto restart_rcu; } - spin_unlock(lock); + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); } - local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b1fbe18feb5..6bcfe52a9c8 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -67,9 +67,6 @@ * ip_id_count: idlock */ -/* Exported for inet_getid inline function. */ -DEFINE_SPINLOCK(inet_peer_idlock); - static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height @@ -390,7 +387,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) n->v4daddr = daddr; atomic_set(&n->refcnt, 1); atomic_set(&n->rid, 0); - n->ip_id_count = secure_ip_id(daddr); + atomic_set(&n->ip_id_count, secure_ip_id(daddr)); n->tcp_ts_stamp = 0; write_lock_bh(&peer_pool_lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 575f9bd51cc..86964b353c3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -206,10 +206,11 @@ static void ip_expire(unsigned long arg) struct sk_buff *head = qp->q.fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ - if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) { + rcu_read_lock(); + head->dev = dev_get_by_index_rcu(net, qp->iif); + if (head->dev) icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); - dev_put(head->dev); - } + rcu_read_unlock(); } out: spin_unlock(&qp->q.lock); @@ -563,7 +564,7 @@ out_oversize: printk(KERN_INFO "Oversized IP packet from %pI4.\n", &qp->saddr); out_fail: - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); return err; } @@ -603,7 +604,6 @@ static int zero; static struct ctl_table ip4_frags_ns_ctl_table[] = { { - .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(int), @@ -611,7 +611,6 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, .procname = "ipfrag_low_thresh", .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(int), @@ -619,26 +618,22 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_IPFRAG_TIME, .procname = "ipfrag_time", .data = &init_net.ipv4.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { } }; static struct ctl_table ip4_frags_ctl_table[] = { { - .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", .data = &ip4_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { .procname = "ipfrag_max_dist", @@ -657,7 +652,7 @@ static int ip4_frags_ns_ctl_register(struct net *net) struct ctl_table_header *hdr; table = ip4_frags_ns_ctl_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; @@ -675,7 +670,7 @@ static int ip4_frags_ns_ctl_register(struct net *net) return 0; err_reg: - if (net != &init_net) + if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 14333385262..f36ce156cac 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -125,7 +125,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); #define HASH_SIZE 16 -static int ipgre_net_id; +static int ipgre_net_id __read_mostly; struct ipgre_net { struct ip_tunnel *tunnels[4][HASH_SIZE]; @@ -156,8 +156,13 @@ struct ipgre_net { #define tunnels_r tunnels[2] #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ipgre_lock); -static DEFINE_RWLOCK(ipgre_lock); +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) /* Given src, dst and key, find appropriate for input tunnel. */ @@ -175,7 +180,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, ARPHRD_ETHER : ARPHRD_IPGRE; int score, cand_score = 4; - for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { if (local != t->parms.iph.saddr || remote != t->parms.iph.daddr || key != t->parms.i_key || @@ -200,7 +205,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, } } - for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { if (remote != t->parms.iph.daddr || key != t->parms.i_key || !(t->dev->flags & IFF_UP)) @@ -224,7 +229,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, } } - for (t = ign->tunnels_l[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { if ((local != t->parms.iph.saddr && (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) || @@ -250,7 +255,7 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, } } - for (t = ign->tunnels_wc[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { if (t->parms.i_key != key || !(t->dev->flags & IFF_UP)) continue; @@ -276,8 +281,9 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, if (cand != NULL) return cand; - if (ign->fb_tunnel_dev->flags & IFF_UP) - return netdev_priv(ign->fb_tunnel_dev); + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); return NULL; } @@ -311,10 +317,10 @@ static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) { struct ip_tunnel **tp = ipgre_bucket(ign, t); + spin_lock_bh(&ipgre_lock); t->next = *tp; - write_lock_bh(&ipgre_lock); - *tp = t; - write_unlock_bh(&ipgre_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ipgre_lock); } static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) @@ -323,9 +329,9 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ipgre_lock); + spin_lock_bh(&ipgre_lock); *tp = t->next; - write_unlock_bh(&ipgre_lock); + spin_unlock_bh(&ipgre_lock); break; } } @@ -476,7 +482,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - read_lock(&ipgre_lock); + rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, @@ -494,7 +500,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - read_unlock(&ipgre_lock); + rcu_read_unlock(); return; } @@ -573,7 +579,7 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - read_lock(&ipgre_lock); + rcu_read_lock(); if ((tunnel = ipgre_tunnel_lookup(skb->dev, iph->saddr, iph->daddr, key, gre_proto))) { @@ -647,13 +653,13 @@ static int ipgre_rcv(struct sk_buff *skb) ipgre_ecn_decapsulate(iph, skb); netif_rx(skb); - read_unlock(&ipgre_lock); + rcu_read_unlock(); return(0); } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: - read_unlock(&ipgre_lock); + rcu_read_unlock(); drop_nolock: kfree_skb(skb); return(0); @@ -662,7 +668,8 @@ drop_nolock: static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->dev->stats; + struct net_device_stats *stats = &dev->stats; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct iphdr *old_iph = ip_hdr(skb); struct iphdr *tiph; u8 tos; @@ -810,7 +817,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - stats->tx_dropped++; + txq->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -1283,33 +1290,27 @@ static const struct net_protocol ipgre_protocol = { .netns_ok = 1, }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign) +static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) { int prio; for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = ign->tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); + struct ip_tunnel *t = ign->tunnels[prio][h]; + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = t->next; + } } } } static int ipgre_init_net(struct net *net) { + struct ipgre_net *ign = net_generic(net, ipgre_net_id); int err; - struct ipgre_net *ign; - - err = -ENOMEM; - ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL); - if (ign == NULL) - goto err_alloc; - - err = net_assign_generic(net, ipgre_net_id, ign); - if (err < 0) - goto err_assign; ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", ipgre_tunnel_setup); @@ -1330,27 +1331,26 @@ static int ipgre_init_net(struct net *net) err_reg_dev: free_netdev(ign->fb_tunnel_dev); err_alloc_dev: - /* nothing */ -err_assign: - kfree(ign); -err_alloc: return err; } static void ipgre_exit_net(struct net *net) { struct ipgre_net *ign; + LIST_HEAD(list); ign = net_generic(net, ipgre_net_id); rtnl_lock(); - ipgre_destroy_tunnels(ign); + ipgre_destroy_tunnels(ign, &list); + unregister_netdevice_many(&list); rtnl_unlock(); - kfree(ign); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit = ipgre_exit_net, + .id = &ipgre_net_id, + .size = sizeof(struct ipgre_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1471,7 +1471,7 @@ static void ipgre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], +static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct ip_tunnel *nt; @@ -1670,7 +1670,7 @@ static int __init ipgre_init(void) return -EAGAIN; } - err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); + err = register_pernet_device(&ipgre_net_ops); if (err < 0) goto gen_device_failed; @@ -1688,7 +1688,7 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); + unregister_pernet_device(&ipgre_net_ops); gen_device_failed: inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); goto out; @@ -1698,7 +1698,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); + unregister_pernet_device(&ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 6c98b43badf..c29de9879fd 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -161,10 +161,10 @@ int ip_call_ra_chain(struct sk_buff *skb) /* If socket is bound to an interface, only report * the packet if it came from that interface. */ - if (sk && inet_sk(sk)->num == protocol && + if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex) && - sock_net(sk) == dev_net(dev)) { + net_eq(sock_net(sk), dev_net(dev))) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f9895180f48..e34013a78ef 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -264,9 +264,11 @@ int ip_mc_output(struct sk_buff *skb) This check is duplicated in ip_mr_input at the moment. */ - && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED)) + && + ((rt->rt_flags & RTCF_LOCAL) || + !(IPCB(skb)->flags & IPSKB_FORWARDED)) #endif - ) { + ) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, @@ -329,7 +331,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) __be32 daddr; /* Use correct destination address if we have options. */ - daddr = inet->daddr; + daddr = inet->inet_daddr; if(opt && opt->srr) daddr = opt->faddr; @@ -338,13 +340,13 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = daddr, - .saddr = inet->saddr, + .saddr = inet->inet_saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = - { .sport = inet->sport, - .dport = inet->dport } } }; + { .sport = inet->inet_sport, + .dport = inet->inet_dport } } }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times @@ -379,7 +381,7 @@ packet_routed: if (opt && opt->optlen) { iph->ihl += opt->optlen >> 2; - ip_options_build(skb, opt, inet->daddr, rt, 0); + ip_options_build(skb, opt, inet->inet_daddr, rt, 0); } ip_select_ident_more(iph, &rt->u.dst, sk, @@ -501,8 +503,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + truesizes += frag->truesize; } /* Everything is OK. Generate! */ @@ -846,7 +848,8 @@ int ip_append_data(struct sock *sk, maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + length > 0xFFFF - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen); + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, + mtu-exthdrlen); return -EMSGSIZE; } @@ -1100,7 +1103,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; if (inet->cork.length + size > 0xFFFF - fragheaderlen) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu); + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu); return -EMSGSIZE; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e982b5c1ee1..cafad9baff0 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -245,7 +245,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, { struct ip_ra_chain *ra, *new_ra, **rap; - if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num == IPPROTO_RAW) + if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -480,7 +480,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, case IP_OPTIONS: { struct ip_options *opt = NULL; - if (optlen > 40 || optlen < 0) + if (optlen > 40) goto e_inval; err = ip_options_get_from_user(sock_net(sk), &opt, optval, optlen); @@ -492,7 +492,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, if (sk->sk_family == PF_INET || (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && - inet->daddr != LOOPBACK4_IPV6)) { + inet->inet_daddr != LOOPBACK4_IPV6)) { #endif if (inet->opt) icsk->icsk_ext_hdr_len -= inet->opt->optlen; @@ -575,7 +575,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, inet->hdrincl = val ? 1 : 0; break; case IP_MTU_DISCOVER: - if (val < 0 || val > 3) + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) goto e_inval; inet->pmtudisc = val; break; @@ -1180,8 +1180,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; - info.ipi_addr.s_addr = inet->rcv_saddr; - info.ipi_spec_dst.s_addr = inet->rcv_saddr; + info.ipi_addr.s_addr = inet->inet_rcv_saddr; + info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; info.ipi_ifindex = inet->mc_index; put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f8d04c25645..4e08b7f2331 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1172,10 +1172,9 @@ static int __init ic_dynamic(void) schedule_timeout_uninterruptible(1); #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ - if ((ic_got_reply & IC_BOOTP) - && (ic_proto_enabled & IC_USE_DHCP) - && ic_dhcp_msgtype != DHCPACK) - { + if ((ic_got_reply & IC_BOOTP) && + (ic_proto_enabled & IC_USE_DHCP) && + ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; printk(","); continue; @@ -1344,9 +1343,9 @@ static int __init ip_auto_config(void) */ if (ic_myaddr == NONE || #ifdef CONFIG_ROOT_NFS - (root_server_addr == NONE - && ic_servaddr == NONE - && ROOT_DEV == Root_NFS) || + (root_server_addr == NONE && + ic_servaddr == NONE && + ROOT_DEV == Root_NFS) || #endif ic_first_dev->next) { #ifdef IPCONFIG_DYNAMIC diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 08ccd344de7..eda04fed337 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -119,7 +119,7 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) -static int ipip_net_id; +static int ipip_net_id __read_mostly; struct ipip_net { struct ip_tunnel *tunnels_r_l[HASH_SIZE]; struct ip_tunnel *tunnels_r[HASH_SIZE]; @@ -134,7 +134,13 @@ static void ipip_fb_tunnel_init(struct net_device *dev); static void ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); -static DEFINE_RWLOCK(ipip_lock); +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ipip_lock); + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, __be32 remote, __be32 local) @@ -144,20 +150,21 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, struct ip_tunnel *t; struct ipip_net *ipn = net_generic(net, ipip_net_id); - for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) return t; - } - for (t = ipn->tunnels_r[h0]; t; t = t->next) { + + for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) return t; - } - for (t = ipn->tunnels_l[h1]; t; t = t->next) { + + for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) return t; - } - if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP)) + + t = rcu_dereference(ipn->tunnels_wc[0]); + if (t && (t->dev->flags&IFF_UP)) return t; return NULL; } @@ -193,9 +200,9 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ipip_lock); + spin_lock_bh(&ipip_lock); *tp = t->next; - write_unlock_bh(&ipip_lock); + spin_unlock_bh(&ipip_lock); break; } } @@ -205,10 +212,10 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) { struct ip_tunnel **tp = ipip_bucket(ipn, t); + spin_lock_bh(&ipip_lock); t->next = *tp; - write_lock_bh(&ipip_lock); - *tp = t; - write_unlock_bh(&ipip_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ipip_lock); } static struct ip_tunnel * ipip_tunnel_locate(struct net *net, @@ -267,9 +274,9 @@ static void ipip_tunnel_uninit(struct net_device *dev) struct ipip_net *ipn = net_generic(net, ipip_net_id); if (dev == ipn->fb_tunnel_dev) { - write_lock_bh(&ipip_lock); + spin_lock_bh(&ipip_lock); ipn->tunnels_wc[0] = NULL; - write_unlock_bh(&ipip_lock); + spin_unlock_bh(&ipip_lock); } else ipip_tunnel_unlink(ipn, netdev_priv(dev)); dev_put(dev); @@ -318,7 +325,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) err = -ENOENT; - read_lock(&ipip_lock); + rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL || t->parms.iph.daddr == 0) goto out; @@ -333,7 +340,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - read_unlock(&ipip_lock); + rcu_read_unlock(); return err; } @@ -351,11 +358,11 @@ static int ipip_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - read_lock(&ipip_lock); + rcu_read_lock(); if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr)) != NULL) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - read_unlock(&ipip_lock); + rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -374,10 +381,10 @@ static int ipip_rcv(struct sk_buff *skb) nf_reset(skb); ipip_ecn_decapsulate(iph, skb); netif_rx(skb); - read_unlock(&ipip_lock); + rcu_read_unlock(); return 0; } - read_unlock(&ipip_lock); + rcu_read_unlock(); return -1; } @@ -390,7 +397,8 @@ static int ipip_rcv(struct sk_buff *skb) static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->dev->stats; + struct net_device_stats *stats = &dev->stats; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; @@ -438,25 +446,27 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error; } - if (tiph->frag_off) + df |= old_iph->frag_off & htons(IP_DF); + + if (df) { mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - if (mtu < 68) { - stats->collisions++; - ip_rt_put(rt); - goto tx_error; - } - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + if (mtu < 68) { + stats->collisions++; + ip_rt_put(rt); + goto tx_error; + } - df |= (old_iph->frag_off&htons(IP_DF)); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); - if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; + if ((old_iph->frag_off & htons(IP_DF)) && + mtu < ntohs(old_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } } if (tunnel->err_count > 0) { @@ -478,7 +488,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - stats->tx_dropped++; + txq->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -746,33 +756,27 @@ static struct xfrm_tunnel ipip_handler = { static const char banner[] __initconst = KERN_INFO "IPv4 over IPv4 tunneling driver\n"; -static void ipip_destroy_tunnels(struct ipip_net *ipn) +static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) { int prio; for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = ipn->tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); + struct ip_tunnel *t = ipn->tunnels[prio][h]; + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = t->next; + } } } } static int ipip_init_net(struct net *net) { + struct ipip_net *ipn = net_generic(net, ipip_net_id); int err; - struct ipip_net *ipn; - - err = -ENOMEM; - ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL); - if (ipn == NULL) - goto err_alloc; - - err = net_assign_generic(net, ipip_net_id, ipn); - if (err < 0) - goto err_assign; ipn->tunnels[0] = ipn->tunnels_wc; ipn->tunnels[1] = ipn->tunnels_l; @@ -799,27 +803,26 @@ err_reg_dev: free_netdev(ipn->fb_tunnel_dev); err_alloc_dev: /* nothing */ -err_assign: - kfree(ipn); -err_alloc: return err; } static void ipip_exit_net(struct net *net) { - struct ipip_net *ipn; + struct ipip_net *ipn = net_generic(net, ipip_net_id); + LIST_HEAD(list); - ipn = net_generic(net, ipip_net_id); rtnl_lock(); - ipip_destroy_tunnels(ipn); - unregister_netdevice(ipn->fb_tunnel_dev); + ipip_destroy_tunnels(ipn, &list); + unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); + unregister_netdevice_many(&list); rtnl_unlock(); - kfree(ipn); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, .exit = ipip_exit_net, + .id = &ipip_net_id, + .size = sizeof(struct ipip_net), }; static int __init ipip_init(void) @@ -833,7 +836,7 @@ static int __init ipip_init(void) return -EAGAIN; } - err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops); + err = register_pernet_device(&ipip_net_ops); if (err) xfrm4_tunnel_deregister(&ipip_handler, AF_INET); @@ -845,7 +848,7 @@ static void __exit ipip_fini(void) if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) printk(KERN_INFO "ipip close: can't deregister tunnel\n"); - unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops); + unregister_pernet_device(&ipip_net_ops); } module_init(ipip_init); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 630a56df7b4..54596f73eff 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -275,7 +275,8 @@ failure: * @notify: Set to 1, if the caller is a notifier_call */ -static int vif_delete(struct net *net, int vifi, int notify) +static int vif_delete(struct net *net, int vifi, int notify, + struct list_head *head) { struct vif_device *v; struct net_device *dev; @@ -319,7 +320,7 @@ static int vif_delete(struct net *net, int vifi, int notify) } if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); dev_put(dev); return 0; @@ -469,8 +470,18 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) return err; } break; + + case VIFF_USE_IFINDEX: case 0: - dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); + if (vifc->vifc_flags == VIFF_USE_IFINDEX) { + dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); + if (dev && dev->ip_ptr == NULL) { + dev_put(dev); + return -EADDRNOTAVAIL; + } + } else + dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); + if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); @@ -483,8 +494,10 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) return -EINVAL; } - if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { + dev_put(dev); return -EADDRNOTAVAIL; + } IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; ip_rt_multicast_event(in_dev); @@ -860,14 +873,16 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) static void mroute_clean_tables(struct net *net) { int i; + LIST_HEAD(list); /* * Shut down all active vif entries */ for (i = 0; i < net->ipv4.maxvif; i++) { if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) - vif_delete(net, i, 0); + vif_delete(net, i, 0, &list); } + unregister_netdevice_many(&list); /* * Wipe the cache @@ -946,7 +961,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi switch (optname) { case MRT_INIT: if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->num != IPPROTO_IGMP) + inet_sk(sk)->inet_num != IPPROTO_IGMP) return -EOPNOTSUPP; if (optlen != sizeof(int)) return -ENOPROTOOPT; @@ -983,7 +998,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi if (optname == MRT_ADD_VIF) { ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); } else { - ret = vif_delete(net, vif.vifc_vifi, 0); + ret = vif_delete(net, vif.vifc_vifi, 0, NULL); } rtnl_unlock(); return ret; @@ -1146,6 +1161,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v struct net *net = dev_net(dev); struct vif_device *v; int ct; + LIST_HEAD(list); if (!net_eq(dev_net(dev), net)) return NOTIFY_DONE; @@ -1155,8 +1171,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v v = &net->ipv4.vif_table[0]; for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { if (v->dev == dev) - vif_delete(net, ct, 1); + vif_delete(net, ct, 1, &list); } + unregister_netdevice_many(&list); return NOTIFY_DONE; } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 1725dc0ef68..c14623fc4d5 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -155,10 +155,10 @@ static int nf_ip_reroute(struct sk_buff *skb, if (entry->hook == NF_INET_LOCAL_OUT) { const struct iphdr *iph = ip_hdr(skb); - if (!(iph->tos == rt_info->tos - && skb->mark == rt_info->mark - && iph->daddr == rt_info->daddr - && iph->saddr == rt_info->saddr)) + if (!(iph->tos == rt_info->tos && + skb->mark == rt_info->mark && + iph->daddr == rt_info->daddr && + iph->saddr == rt_info->saddr)) return ip_route_me_harder(skb, RTN_UNSPEC); } return 0; @@ -248,9 +248,9 @@ module_exit(ipv4_netfilter_fini); #ifdef CONFIG_SYSCTL struct ctl_path nf_net_ipv4_netfilter_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "netfilter", .ctl_name = NET_IPV4_NETFILTER, }, + { .procname = "net", }, + { .procname = "ipv4", }, + { .procname = "netfilter", }, { } }; EXPORT_SYMBOL_GPL(nf_net_ipv4_netfilter_sysctl_path); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 27774c99d88..06632762ba5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -384,11 +384,11 @@ static int mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) - && (strcmp(t->target.u.user.name, - ARPT_STANDARD_TARGET) == 0) - && t->verdict < 0 - && unconditional(&e->arp)) || visited) { + if ((e->target_offset == sizeof(struct arpt_entry) && + (strcmp(t->target.u.user.name, + ARPT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->arp)) || + visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -427,8 +427,8 @@ static int mark_source_chains(struct xt_table_info *newinfo, int newpos = t->verdict; if (strcmp(t->target.u.user.name, - ARPT_STANDARD_TARGET) == 0 - && newpos >= 0) { + ARPT_STANDARD_TARGET) == 0 && + newpos >= 0) { if (newpos > newinfo->size - sizeof(struct arpt_entry)) { duprintf("mark_source_chains: " @@ -559,8 +559,8 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, { unsigned int h; - if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 - || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1251,8 +1251,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); - if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 - || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index c156db21598..2855f1f38cb 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -497,10 +497,9 @@ ipq_rcv_nl_event(struct notifier_block *this, { struct netlink_notify *n = ptr; - if (event == NETLINK_URELEASE && - n->protocol == NETLINK_FIREWALL && n->pid) { + if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { write_lock_bh(&queue_lock); - if ((n->net == &init_net) && (n->pid == peer_pid)) + if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -516,14 +515,13 @@ static struct ctl_table_header *ipq_sysctl_header; static ctl_table ipq_table[] = { { - .ctl_name = NET_IPQ_QMAX, .procname = NET_IPQ_QMAX_NAME, .data = &queue_maxlen, .maxlen = sizeof(queue_maxlen), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; #endif @@ -622,7 +620,7 @@ cleanup_netlink_notifier: static void __exit ip_queue_fini(void) { nf_unregister_queue_handlers(&nfqh); - synchronize_net(); + ipq_flush(NULL, 0); #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cde755d5eea..572330a552e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -89,9 +89,9 @@ ip_packet_match(const struct iphdr *ip, #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, - IPT_INV_SRCIP) - || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) { + IPT_INV_SRCIP) || + FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, + IPT_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", @@ -122,8 +122,8 @@ ip_packet_match(const struct iphdr *ip, } /* Check specific protocol */ - if (ipinfo->proto - && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { + if (ipinfo->proto && + FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { dprintf("Packet protocol %hi does not match %hi.%s\n", ip->protocol, ipinfo->proto, ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); @@ -246,11 +246,11 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) - && strcmp(t->target.u.kernel.target->name, - IPT_STANDARD_TARGET) == 0 - && t->verdict < 0 - && unconditional(&s->ip)) { + if (s->target_offset == sizeof(struct ipt_entry) && + strcmp(t->target.u.kernel.target->name, + IPT_STANDARD_TARGET) == 0 && + t->verdict < 0 && + unconditional(&s->ip)) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -388,8 +388,8 @@ ipt_do_table(struct sk_buff *skb, back = get_entry(table_base, back->comefrom); continue; } - if (table_base + v != ipt_next_entry(e) - && !(e->ip.flags & IPT_F_GOTO)) { + if (table_base + v != ipt_next_entry(e) && + !(e->ip.flags & IPT_F_GOTO)) { /* Save old back ptr in next entry */ struct ipt_entry *next = ipt_next_entry(e); next->comefrom = (void *)back - table_base; @@ -473,11 +473,11 @@ mark_source_chains(struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) - && (strcmp(t->target.u.user.name, - IPT_STANDARD_TARGET) == 0) - && t->verdict < 0 - && unconditional(&e->ip)) || visited) { + if ((e->target_offset == sizeof(struct ipt_entry) && + (strcmp(t->target.u.user.name, + IPT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->ip)) || + visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -524,8 +524,8 @@ mark_source_chains(struct xt_table_info *newinfo, int newpos = t->verdict; if (strcmp(t->target.u.user.name, - IPT_STANDARD_TARGET) == 0 - && newpos >= 0) { + IPT_STANDARD_TARGET) == 0 && + newpos >= 0) { if (newpos > newinfo->size - sizeof(struct ipt_entry)) { duprintf("mark_source_chains: " @@ -735,8 +735,8 @@ check_entry_size_and_hooks(struct ipt_entry *e, { unsigned int h; - if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 - || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1548,8 +1548,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); - if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 - || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2e4f98b8552..40ca2d240ab 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -303,9 +303,9 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if (ip_hdr(skb)->protocol == IPPROTO_ICMP - && (ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && + (ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) return XT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, @@ -362,8 +362,8 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par) return false; } - if (e->ip.dmsk.s_addr != htonl(0xffffffff) - || e->ip.dst.s_addr == 0) { + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || + e->ip.dst.s_addr == 0) { printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); return false; } @@ -495,14 +495,14 @@ arp_mangle(unsigned int hook, struct clusterip_config *c; /* we don't care about non-ethernet and non-ipv4 ARP */ - if (arp->ar_hrd != htons(ARPHRD_ETHER) - || arp->ar_pro != htons(ETH_P_IP) - || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) return NF_ACCEPT; /* we only want to mangle arp requests and replies */ - if (arp->ar_op != htons(ARPOP_REPLY) - && arp->ar_op != htons(ARPOP_REQUEST)) + if (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST)) return NF_ACCEPT; payload = (void *)(arp+1); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f7e2fa0974d..ea5cea2415c 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -50,7 +50,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) struct tcphdr _tcph, *tcph; __be16 oldval; - /* Not enought header? */ + /* Not enough header? */ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) return false; @@ -85,8 +85,8 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) if (!set_ect_ip(skb, einfo)) return NF_DROP; - if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) - && ip_hdr(skb)->protocol == IPPROTO_TCP) + if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && + ip_hdr(skb)->protocol == IPPROTO_TCP) if (!set_ect_tcp(skb, einfo)) return NF_DROP; @@ -108,8 +108,8 @@ static bool ecn_tg_check(const struct xt_tgchk_param *par) einfo->ip_ect); return false; } - if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) - && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && + (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { printk(KERN_WARNING "ECN: cannot use TCP operations on a " "non-tcp rule\n"); return false; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index acc44c69eb6..ee128efa1c8 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -74,8 +74,8 @@ static void dump_packet(const struct nf_loginfo *info, if (ntohs(ih->frag_off) & IP_OFFSET) printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((logflags & IPT_LOG_IPOPT) - && ih->ihl * 4 > sizeof(struct iphdr)) { + if ((logflags & IPT_LOG_IPOPT) && + ih->ihl * 4 > sizeof(struct iphdr)) { const unsigned char *op; unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; unsigned int i, optsize; @@ -146,8 +146,8 @@ static void dump_packet(const struct nf_loginfo *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((logflags & IPT_LOG_TCPOPT) - && th->doff * 4 > sizeof(struct tcphdr)) { + if ((logflags & IPT_LOG_TCPOPT) && + th->doff * 4 > sizeof(struct tcphdr)) { unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; const unsigned char *op; unsigned int i, optsize; @@ -238,9 +238,9 @@ static void dump_packet(const struct nf_loginfo *info, printk("TYPE=%u CODE=%u ", ich->type, ich->code); /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES - && required_len[ich->type] - && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { + if (ich->type <= NR_ICMP_TYPES && + required_len[ich->type] && + skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { printk("INCOMPLETE [%u bytes] ", skb->len - iphoff - ih->ihl*4); break; @@ -276,8 +276,8 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH - && ich->code == ICMP_FRAG_NEEDED) + if (ich->type == ICMP_DEST_UNREACH && + ich->code == ICMP_FRAG_NEEDED) printk("MTU=%u ", ntohs(ich->un.frag.mtu)); } break; @@ -407,8 +407,8 @@ ipt_log_packet(u_int8_t pf, if (in && !out) { /* MAC logging for input chain only. */ printk("MAC="); - if (skb->dev && skb->dev->hard_header_len - && skb->mac_header != skb->network_header) { + if (skb->dev && skb->dev->hard_header_len && + skb->mac_header != skb->network_header) { int i; const unsigned char *p = skb_mac_header(skb); for (i = 0; i < skb->dev->hard_header_len; i++,p++) diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index dada0863946..650b54042b0 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -59,8 +59,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index c93ae44bff2..5113b8f1a37 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -184,8 +184,8 @@ static bool reject_tg_check(const struct xt_tgchk_param *par) return false; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ip.proto != IPPROTO_TCP - || (e->ip.invflags & XT_INV_PROTO)) { + if (e->ip.proto != IPPROTO_TCP || + (e->ip.invflags & XT_INV_PROTO)) { printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); return false; } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index d32cc4bb328..399061c3fd7 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -226,9 +226,9 @@ static void ipt_ulog_packet(unsigned int hooknum, else *(pm->prefix) = '\0'; - if (in && in->hard_header_len > 0 - && skb->mac_header != skb->network_header - && in->hard_header_len <= ULOG_MAC_LEN) { + if (in && in->hard_header_len > 0 && + skb->mac_header != skb->network_header && + in->hard_header_len <= ULOG_MAC_LEN) { memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); pm->mac_len = in->hard_header_len; } else diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 6289b64144c..2a1e56b7190 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -96,8 +96,8 @@ static bool ecn_mt_check(const struct xt_mtchk_param *par) if (info->invert & IPT_ECN_OP_MATCH_MASK) return false; - if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) - && ip->proto != IPPROTO_TCP) { + if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && + ip->proto != IPPROTO_TCP) { printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" " non-tcp packets\n"); return false; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 036047f9b0f..fae78c3076c 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -130,8 +130,8 @@ ipt_local_hook(unsigned int hook, u_int32_t mark; /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; /* Save things which could affect route */ diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 99eb76c65d2..3bd3d6388da 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -94,8 +94,8 @@ ipt_local_out_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { /* Somebody is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_security); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index aa95bb82ee6..d171b123a65 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -195,7 +195,6 @@ static int log_invalid_proto_max = 255; static ctl_table ip_ct_sysctl_table[] = { { - .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, .procname = "ip_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), @@ -203,7 +202,6 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", .data = &init_net.ct.count, .maxlen = sizeof(int), @@ -211,7 +209,6 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, .procname = "ip_conntrack_buckets", .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), @@ -219,7 +216,6 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), @@ -227,19 +223,15 @@ static ctl_table ip_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ @@ -255,10 +247,10 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) struct nf_conntrack_tuple tuple; memset(&tuple, 0, sizeof(tuple)); - tuple.src.u3.ip = inet->rcv_saddr; - tuple.src.u.tcp.port = inet->sport; - tuple.dst.u3.ip = inet->daddr; - tuple.dst.u.tcp.port = inet->dport; + tuple.src.u3.ip = inet->inet_rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.ip = inet->inet_daddr; + tuple.dst.u.tcp.port = inet->inet_dport; tuple.src.l3num = PF_INET; tuple.dst.protonum = sk->sk_protocol; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index d71ba767734..7afd39b5b78 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -54,8 +54,8 @@ static const u_int8_t invmap[] = { static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { - if (orig->dst.u.icmp.type >= sizeof(invmap) - || !invmap[orig->dst.u.icmp.type]) + if (orig->dst.u.icmp.type >= sizeof(invmap) || + !invmap[orig->dst.u.icmp.type]) return false; tuple->src.u.icmp.id = orig->src.u.icmp.id; @@ -101,8 +101,8 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, [ICMP_ADDRESS] = 1 }; - if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) - || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { + if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || + !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ pr_debug("icmp: can't create new conn with type %u\n", ct->tuplehash[0].tuple.dst.u.icmp.type); @@ -201,11 +201,11 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, } /* Need to track icmp error message? */ - if (icmph->type != ICMP_DEST_UNREACH - && icmph->type != ICMP_SOURCE_QUENCH - && icmph->type != ICMP_TIME_EXCEEDED - && icmph->type != ICMP_PARAMETERPROB - && icmph->type != ICMP_REDIRECT) + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_SOURCE_QUENCH && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB && + icmph->type != ICMP_REDIRECT) return NF_ACCEPT; return icmp_error_message(net, skb, ctinfo, hooknum); @@ -238,17 +238,17 @@ static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { static int icmp_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMP_TYPE] - || !tb[CTA_PROTO_ICMP_CODE] - || !tb[CTA_PROTO_ICMP_ID]) + if (!tb[CTA_PROTO_ICMP_TYPE] || + !tb[CTA_PROTO_ICMP_CODE] || + !tb[CTA_PROTO_ICMP_ID]) return -EINVAL; tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); - if (tuple->dst.u.icmp.type >= sizeof(invmap) - || !invmap[tuple->dst.u.icmp.type]) + if (tuple->dst.u.icmp.type >= sizeof(invmap) || + !invmap[tuple->dst.u.icmp.type]) return -EINVAL; return 0; @@ -270,9 +270,7 @@ static struct ctl_table icmp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table icmp_compat_sysctl_table[] = { @@ -283,9 +281,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 68afc6ecd34..fe1a64479dd 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -750,6 +750,8 @@ static int __init nf_nat_init(void) BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, nfnetlink_parse_nat_setup); + BUG_ON(nf_ct_nat_offset != NULL); + rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset); return 0; cleanup_extend: @@ -764,6 +766,7 @@ static void __exit nf_nat_cleanup(void) nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); + rcu_assign_pointer(nf_ct_nat_offset, NULL); synchronize_net(); } diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 09172a65d9b..7f10a6be019 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -41,18 +41,14 @@ adjust_tcp_sequence(u32 seq, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - int dir; - struct nf_nat_seq *this_way, *other_way; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way = &nat->seq[dir]; - pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq); - - dir = CTINFO2DIR(ctinfo); - - this_way = &nat->seq[dir]; - other_way = &nat->seq[!dir]; + pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", + seq, sizediff); - pr_debug("nf_nat_resize_packet: Seq_offset before: "); + pr_debug("adjust_tcp_sequence: Seq_offset before: "); DUMP_OFFSET(this_way); spin_lock_bh(&nf_nat_seqofs_lock); @@ -63,16 +59,38 @@ adjust_tcp_sequence(u32 seq, * retransmit */ if (this_way->offset_before == this_way->offset_after || before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; - this_way->offset_before = this_way->offset_after; - this_way->offset_after += sizediff; + this_way->correction_pos = seq; + this_way->offset_before = this_way->offset_after; + this_way->offset_after += sizediff; } spin_unlock_bh(&nf_nat_seqofs_lock); - pr_debug("nf_nat_resize_packet: Seq_offset after: "); + pr_debug("adjust_tcp_sequence: Seq_offset after: "); DUMP_OFFSET(this_way); } +/* Get the offset value, for conntrack */ +s16 nf_nat_get_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way; + s16 offset; + + if (!nat) + return 0; + + this_way = &nat->seq[dir]; + spin_lock_bh(&nf_nat_seqofs_lock); + offset = after(seq, this_way->correction_pos) + ? this_way->offset_after : this_way->offset_before; + spin_unlock_bh(&nf_nat_seqofs_lock); + + return offset; +} +EXPORT_SYMBOL_GPL(nf_nat_get_offset); + /* Frobs data inside this packet, which is linear. */ static void mangle_contents(struct sk_buff *skb, unsigned int dataoff, @@ -189,11 +207,6 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, adjust_tcp_sequence(ntohl(tcph->seq), (int)rep_len - (int)match_len, ct, ctinfo); - /* Tell TCP window tracking about seq change */ - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo), - (int)rep_len - (int)match_len); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; @@ -415,12 +428,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph->seq = newseq; tcph->ack_seq = newack; - if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) - return 0; - - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); - - return 1; + return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); } /* Setup NAT on this expected conntrack so it follows master. */ diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 5f41d017ddd..5678e9562c1 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -197,11 +197,11 @@ nf_nat_out(unsigned int hooknum, (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - if (ct->tuplehash[dir].tuple.src.u3.ip != - ct->tuplehash[!dir].tuple.dst.u3.ip - || ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all - ) + if ((ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + ) return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; } #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ab996f9c0fe..ce154b47f1d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -87,7 +87,7 @@ void raw_hash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; struct hlist_head *head; - head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)]; + head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; write_lock_bh(&h->lock); sk_add_node(sk, head); @@ -115,9 +115,9 @@ static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, sk_for_each_from(sk, node) { struct inet_sock *inet = inet_sk(sk); - if (net_eq(sock_net(sk), net) && inet->num == num && - !(inet->daddr && inet->daddr != raddr) && - !(inet->rcv_saddr && inet->rcv_saddr != laddr) && + if (net_eq(sock_net(sk), net) && inet->inet_num == num && + !(inet->inet_daddr && inet->inet_daddr != raddr) && + !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ } @@ -292,7 +292,6 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) /* Charge it to the socket. */ if (sock_queue_rcv_skb(sk, skb) < 0) { - atomic_inc(&sk->sk_drops); kfree_skb(skb); return NET_RX_DROP; } @@ -327,7 +326,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, int err; if (length > rt->u.dst.dev->mtu) { - ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, + ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, rt->u.dst.dev->mtu); return -EMSGSIZE; } @@ -500,10 +499,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; - daddr = inet->daddr; + daddr = inet->inet_daddr; } - ipc.addr = inet->saddr; + ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.shtx.flags = 0; ipc.oif = sk->sk_bound_dev_if; @@ -645,9 +644,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) goto out; - inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; + inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) - inet->saddr = 0; /* Use device */ + inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; out: return ret; @@ -692,7 +691,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (err) goto done; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); /* Copy the address. */ if (sin) { @@ -717,7 +716,7 @@ static int raw_init(struct sock *sk) { struct raw_sock *rp = raw_sk(sk); - if (inet_sk(sk)->num == IPPROTO_ICMP) + if (inet_sk(sk)->inet_num == IPPROTO_ICMP) memset(&rp->filter, 0, sizeof(rp->filter)); return 0; } @@ -754,7 +753,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { if (optname == ICMP_FILTER) { - if (inet_sk(sk)->num != IPPROTO_ICMP) + if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_seticmpfilter(sk, optval, optlen); @@ -784,7 +783,7 @@ static int do_raw_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { if (optname == ICMP_FILTER) { - if (inet_sk(sk)->num != IPPROTO_ICMP) + if (inet_sk(sk)->inet_num != IPPROTO_ICMP) return -EOPNOTSUPP; else return raw_geticmpfilter(sk, optval, optlen); @@ -943,10 +942,10 @@ EXPORT_SYMBOL_GPL(raw_seq_stop); static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) { struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr, - src = inet->rcv_saddr; + __be32 dest = inet->inet_daddr, + src = inet->inet_rcv_saddr; __u16 destp = 0, - srcp = inet->num; + srcp = inet->inet_num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b1050a5d87..e446496f564 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -513,43 +513,42 @@ static const struct file_operations rt_cpu_seq_fops = { }; #ifdef CONFIG_NET_CLS_ROUTE -static int ip_rt_acct_read(char *buffer, char **start, off_t offset, - int length, int *eof, void *data) -{ - unsigned int i; - - if ((offset & 3) || (length & 3)) - return -EIO; - - if (offset >= sizeof(struct ip_rt_acct) * 256) { - *eof = 1; - return 0; - } - - if (offset + length >= sizeof(struct ip_rt_acct) * 256) { - length = sizeof(struct ip_rt_acct) * 256 - offset; - *eof = 1; +static int rt_acct_proc_show(struct seq_file *m, void *v) +{ + struct ip_rt_acct *dst, *src; + unsigned int i, j; + + dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); + if (!dst) + return -ENOMEM; + + for_each_possible_cpu(i) { + src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); + for (j = 0; j < 256; j++) { + dst[j].o_bytes += src[j].o_bytes; + dst[j].o_packets += src[j].o_packets; + dst[j].i_bytes += src[j].i_bytes; + dst[j].i_packets += src[j].i_packets; + } } - offset /= sizeof(u32); - - if (length > 0) { - u32 *dst = (u32 *) buffer; - - *start = buffer; - memset(dst, 0, length); - - for_each_possible_cpu(i) { - unsigned int j; - u32 *src; + seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); + kfree(dst); + return 0; +} - src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset; - for (j = 0; j < length/4; j++) - dst[j] += src[j]; - } - } - return length; +static int rt_acct_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, rt_acct_proc_show, NULL); } + +static const struct file_operations rt_acct_proc_fops = { + .owner = THIS_MODULE, + .open = rt_acct_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif static int __net_init ip_rt_do_proc_init(struct net *net) @@ -567,8 +566,7 @@ static int __net_init ip_rt_do_proc_init(struct net *net) goto err2; #ifdef CONFIG_NET_CLS_ROUTE - pde = create_proc_read_entry("rt_acct", 0, net->proc_net, - ip_rt_acct_read, NULL); + pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); if (!pde) goto err3; #endif @@ -703,7 +701,7 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) { - return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); + return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); } static inline int rt_is_expired(struct rtable *rth) @@ -902,6 +900,12 @@ void rt_cache_flush(struct net *net, int delay) rt_do_flush(!in_softirq()); } +/* Flush previous cache invalidated entries from the cache */ +void rt_cache_flush_batch(void) +{ + rt_do_flush(!in_softirq()); +} + /* * We change rt_genid and let gc do the cleanup */ @@ -1346,9 +1350,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, return; net = dev_net(dev); - if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) - || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) - || ipv4_is_zeronet(new_gw)) + if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || + ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || + ipv4_is_zeronet(new_gw)) goto reject_redirect; if (!rt_caching(net)) @@ -1628,9 +1632,6 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, __be32 daddr = iph->daddr; unsigned short est_mtu = 0; - if (ipv4_config.no_pmtu_disc) - return 0; - for (k = 0; k < 2; k++) { for (i = 0; i < 2; i++) { unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], @@ -2314,10 +2315,11 @@ skip_cache: ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE - || (!ipv4_is_local_multicast(daddr) && - IN_DEV_MFORWARD(in_dev)) + || + (!ipv4_is_local_multicast(daddr) && + IN_DEV_MFORWARD(in_dev)) #endif - ) { + ) { rcu_read_unlock(); return ip_route_input_mc(skb, daddr, saddr, tos, dev, our); @@ -2514,9 +2516,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, of another iface. --ANK */ - if (oldflp->oif == 0 - && (ipv4_is_multicast(oldflp->fl4_dst) || - oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + if (oldflp->oif == 0 && + (ipv4_is_multicast(oldflp->fl4_dst) || + oldflp->fl4_dst == htonl(0xFFFFFFFF))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = ip_dev_find(net, oldflp->fl4_src); if (dev_out == NULL) @@ -2855,7 +2857,7 @@ static int rt_fill_info(struct net *net, error = rt->u.dst.error; expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; if (rt->peer) { - id = rt->peer->ip_id_count; + id = atomic_read(&rt->peer->ip_id_count) & 0xffff; if (rt->peer->tcp_ts_stamp) { ts = rt->peer->tcp_ts; tsage = get_seconds() - rt->peer->tcp_ts_stamp; @@ -3056,23 +3058,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, return -EINVAL; } -static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, - size_t newlen) -{ - int delay; - struct net *net; - if (newlen != sizeof(int)) - return -EINVAL; - if (get_user(delay, (int __user *)newval)) - return -EFAULT; - net = (struct net *)table->extra1; - rt_cache_flush(net, delay); - return 0; -} - static void rt_secret_reschedule(int old) { struct net *net; @@ -3117,23 +3102,8 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, return ret; } -static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, - size_t newlen) -{ - int old = ip_rt_secret_interval; - int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); - - rt_secret_reschedule(old); - - return ret; -} - static ctl_table ipv4_route_table[] = { { - .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, .maxlen = sizeof(int), @@ -3141,7 +3111,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_MAX_SIZE, .procname = "max_size", .data = &ip_rt_max_size, .maxlen = sizeof(int), @@ -3151,43 +3120,34 @@ static ctl_table ipv4_route_table[] = { { /* Deprecated. Use gc_min_interval_ms */ - .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", .data = &ip_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL, .procname = "gc_interval", .data = &ip_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD, .procname = "redirect_load", .data = &ip_rt_redirect_load, .maxlen = sizeof(int), @@ -3195,7 +3155,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER, .procname = "redirect_number", .data = &ip_rt_redirect_number, .maxlen = sizeof(int), @@ -3203,7 +3162,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE, .procname = "redirect_silence", .data = &ip_rt_redirect_silence, .maxlen = sizeof(int), @@ -3211,7 +3169,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_ERROR_COST, .procname = "error_cost", .data = &ip_rt_error_cost, .maxlen = sizeof(int), @@ -3219,7 +3176,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_ERROR_BURST, .procname = "error_burst", .data = &ip_rt_error_burst, .maxlen = sizeof(int), @@ -3227,7 +3183,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", .data = &ip_rt_gc_elasticity, .maxlen = sizeof(int), @@ -3235,16 +3190,13 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", .data = &ip_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV4_ROUTE_MIN_PMTU, .procname = "min_pmtu", .data = &ip_rt_min_pmtu, .maxlen = sizeof(int), @@ -3252,7 +3204,6 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", .data = &ip_rt_min_advmss, .maxlen = sizeof(int), @@ -3260,50 +3211,46 @@ static ctl_table ipv4_route_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL, .procname = "secret_interval", .data = &ip_rt_secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = ipv4_sysctl_rt_secret_interval, - .strategy = ipv4_sysctl_rt_secret_interval_strategy, }, - { .ctl_name = 0 } + { } }; static struct ctl_table empty[1]; static struct ctl_table ipv4_skeleton[] = { - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, + { .procname = "route", .mode = 0555, .child = ipv4_route_table}, - { .procname = "neigh", .ctl_name = NET_IPV4_NEIGH, + { .procname = "neigh", .mode = 0555, .child = empty}, { } }; static __net_initdata struct ctl_path ipv4_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { }, }; static struct ctl_table ipv4_route_flush_table[] = { { - .ctl_name = NET_IPV4_ROUTE_FLUSH, .procname = "flush", .maxlen = sizeof(int), .mode = 0200, .proc_handler = ipv4_sysctl_rtcache_flush, - .strategy = ipv4_sysctl_rtcache_flush_strategy, }, - { .ctl_name = 0 }, + { }, }; static __net_initdata struct ctl_path ipv4_route_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, - { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { .procname = "net", }, + { .procname = "ipv4", }, + { .procname = "route", }, { }, }; @@ -3312,7 +3259,7 @@ static __net_init int sysctl_route_net_init(struct net *net) struct ctl_table *tbl; tbl = ipv4_route_flush_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a6e0e077ac3..26399ad2a28 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -253,6 +253,8 @@ EXPORT_SYMBOL(cookie_check_timestamp); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -263,7 +265,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, int mss; struct rtable *rt; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -276,13 +277,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) @@ -298,12 +292,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -333,7 +321,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi fl = { .nl_u = { .ip4_u = + struct flowi fl = { .mark = sk->sk_mark, + .nl_u = { .ip4_u = { .daddr = ((opt && opt->srr) ? opt->faddr : ireq->rmt_addr), @@ -351,6 +340,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, &rt->u.dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + /* Try to redo what tcp_v4_send_synack did. */ req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 2dcf04d9b00..7e3712ce399 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -63,34 +63,6 @@ static int ipv4_local_port_range(ctl_table *table, int write, return ret; } -/* Validate changes from sysctl interface. */ -static int ipv4_sysctl_local_port_range(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int ret; - int range[2]; - ctl_table tmp = { - .data = &range, - .maxlen = sizeof(range), - .mode = table->mode, - .extra1 = &ip_local_port_range_min, - .extra2 = &ip_local_port_range_max, - }; - - inet_get_local_port_range(range, range + 1); - ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); - if (ret == 0 && newval && newlen) { - if (range[1] < range[0]) - ret = -EINVAL; - else - set_local_port_range(range); - } - return ret; -} - - static int proc_tcp_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -109,25 +81,6 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, return ret; } -static int sysctl_tcp_congestion_control(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - char val[TCP_CA_NAME_MAX]; - ctl_table tbl = { - .data = val, - .maxlen = TCP_CA_NAME_MAX, - }; - int ret; - - tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); - if (ret == 1 && newval && newlen) - ret = tcp_set_default_congestion_control(val); - return ret; -} - static int proc_tcp_available_congestion_control(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, @@ -165,32 +118,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int strategy_allowed_congestion_control(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, - size_t newlen) -{ - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; - int ret; - - tbl.data = kmalloc(tbl.maxlen, GFP_USER); - if (!tbl.data) - return -ENOMEM; - - tcp_get_available_congestion_control(tbl.data, tbl.maxlen); - ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); - if (ret == 1 && newval && newlen) - ret = tcp_set_allowed_congestion_control(tbl.data); - kfree(tbl.data); - - return ret; - -} - static struct ctl_table ipv4_table[] = { { - .ctl_name = NET_IPV4_TCP_TIMESTAMPS, .procname = "tcp_timestamps", .data = &sysctl_tcp_timestamps, .maxlen = sizeof(int), @@ -198,7 +127,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, .procname = "tcp_window_scaling", .data = &sysctl_tcp_window_scaling, .maxlen = sizeof(int), @@ -206,7 +134,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_SACK, .procname = "tcp_sack", .data = &sysctl_tcp_sack, .maxlen = sizeof(int), @@ -214,7 +141,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_RETRANS_COLLAPSE, .procname = "tcp_retrans_collapse", .data = &sysctl_tcp_retrans_collapse, .maxlen = sizeof(int), @@ -222,17 +148,14 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_DEFAULT_TTL, .procname = "ip_default_ttl", .data = &sysctl_ip_default_ttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = ipv4_doint_and_flush, - .strategy = ipv4_doint_and_flush_strategy, .extra2 = &init_net, }, { - .ctl_name = NET_IPV4_NO_PMTU_DISC, .procname = "ip_no_pmtu_disc", .data = &ipv4_config.no_pmtu_disc, .maxlen = sizeof(int), @@ -240,7 +163,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_NONLOCAL_BIND, .procname = "ip_nonlocal_bind", .data = &sysctl_ip_nonlocal_bind, .maxlen = sizeof(int), @@ -248,7 +170,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_SYN_RETRIES, .procname = "tcp_syn_retries", .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), @@ -256,7 +177,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_SYNACK_RETRIES, .procname = "tcp_synack_retries", .data = &sysctl_tcp_synack_retries, .maxlen = sizeof(int), @@ -264,7 +184,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MAX_ORPHANS, .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), @@ -272,7 +191,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MAX_TW_BUCKETS, .procname = "tcp_max_tw_buckets", .data = &tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), @@ -280,7 +198,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_DYNADDR, .procname = "ip_dynaddr", .data = &sysctl_ip_dynaddr, .maxlen = sizeof(int), @@ -288,16 +205,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, .procname = "tcp_keepalive_time", .data = &sysctl_tcp_keepalive_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_TCP_KEEPALIVE_PROBES, .procname = "tcp_keepalive_probes", .data = &sysctl_tcp_keepalive_probes, .maxlen = sizeof(int), @@ -305,26 +219,21 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_KEEPALIVE_INTVL, .procname = "tcp_keepalive_intvl", .data = &sysctl_tcp_keepalive_intvl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_TCP_RETRIES1, .procname = "tcp_retries1", .data = &sysctl_tcp_retries1, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra2 = &tcp_retr1_max }, { - .ctl_name = NET_IPV4_TCP_RETRIES2, .procname = "tcp_retries2", .data = &sysctl_tcp_retries2, .maxlen = sizeof(int), @@ -332,17 +241,14 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_TCP_FIN_TIMEOUT, .procname = "tcp_fin_timeout", .data = &sysctl_tcp_fin_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, #ifdef CONFIG_SYN_COOKIES { - .ctl_name = NET_TCP_SYNCOOKIES, .procname = "tcp_syncookies", .data = &sysctl_tcp_syncookies, .maxlen = sizeof(int), @@ -351,7 +257,6 @@ static struct ctl_table ipv4_table[] = { }, #endif { - .ctl_name = NET_TCP_TW_RECYCLE, .procname = "tcp_tw_recycle", .data = &tcp_death_row.sysctl_tw_recycle, .maxlen = sizeof(int), @@ -359,7 +264,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_ABORT_ON_OVERFLOW, .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, .maxlen = sizeof(int), @@ -367,7 +271,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_STDURG, .procname = "tcp_stdurg", .data = &sysctl_tcp_stdurg, .maxlen = sizeof(int), @@ -375,7 +278,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_RFC1337, .procname = "tcp_rfc1337", .data = &sysctl_tcp_rfc1337, .maxlen = sizeof(int), @@ -383,7 +285,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MAX_SYN_BACKLOG, .procname = "tcp_max_syn_backlog", .data = &sysctl_max_syn_backlog, .maxlen = sizeof(int), @@ -391,17 +292,14 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", .data = &sysctl_local_ports.range, .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = ipv4_local_port_range, - .strategy = ipv4_sysctl_local_port_range, }, #ifdef CONFIG_IP_MULTICAST { - .ctl_name = NET_IPV4_IGMP_MAX_MEMBERSHIPS, .procname = "igmp_max_memberships", .data = &sysctl_igmp_max_memberships, .maxlen = sizeof(int), @@ -411,7 +309,6 @@ static struct ctl_table ipv4_table[] = { #endif { - .ctl_name = NET_IPV4_IGMP_MAX_MSF, .procname = "igmp_max_msf", .data = &sysctl_igmp_max_msf, .maxlen = sizeof(int), @@ -419,7 +316,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, .procname = "inet_peer_threshold", .data = &inet_peer_threshold, .maxlen = sizeof(int), @@ -427,43 +323,34 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_INET_PEER_MINTTL, .procname = "inet_peer_minttl", .data = &inet_peer_minttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_INET_PEER_MAXTTL, .procname = "inet_peer_maxttl", .data = &inet_peer_maxttl, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, .procname = "inet_peer_gc_mintime", .data = &inet_peer_gc_mintime, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, .procname = "inet_peer_gc_maxtime", .data = &inet_peer_gc_maxtime, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { - .ctl_name = NET_TCP_ORPHAN_RETRIES, .procname = "tcp_orphan_retries", .data = &sysctl_tcp_orphan_retries, .maxlen = sizeof(int), @@ -471,7 +358,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_FACK, .procname = "tcp_fack", .data = &sysctl_tcp_fack, .maxlen = sizeof(int), @@ -479,7 +365,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_REORDERING, .procname = "tcp_reordering", .data = &sysctl_tcp_reordering, .maxlen = sizeof(int), @@ -487,7 +372,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_ECN, .procname = "tcp_ecn", .data = &sysctl_tcp_ecn, .maxlen = sizeof(int), @@ -495,7 +379,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_DSACK, .procname = "tcp_dsack", .data = &sysctl_tcp_dsack, .maxlen = sizeof(int), @@ -503,7 +386,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_MEM, .procname = "tcp_mem", .data = &sysctl_tcp_mem, .maxlen = sizeof(sysctl_tcp_mem), @@ -511,7 +393,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_WMEM, .procname = "tcp_wmem", .data = &sysctl_tcp_wmem, .maxlen = sizeof(sysctl_tcp_wmem), @@ -519,7 +400,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_RMEM, .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, .maxlen = sizeof(sysctl_tcp_rmem), @@ -527,7 +407,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_APP_WIN, .procname = "tcp_app_win", .data = &sysctl_tcp_app_win, .maxlen = sizeof(int), @@ -535,7 +414,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_ADV_WIN_SCALE, .procname = "tcp_adv_win_scale", .data = &sysctl_tcp_adv_win_scale, .maxlen = sizeof(int), @@ -543,7 +421,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_TW_REUSE, .procname = "tcp_tw_reuse", .data = &sysctl_tcp_tw_reuse, .maxlen = sizeof(int), @@ -551,7 +428,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_FRTO, .procname = "tcp_frto", .data = &sysctl_tcp_frto, .maxlen = sizeof(int), @@ -559,7 +435,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_FRTO_RESPONSE, .procname = "tcp_frto_response", .data = &sysctl_tcp_frto_response, .maxlen = sizeof(int), @@ -567,7 +442,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_LOW_LATENCY, .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), @@ -575,7 +449,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_TCP_NO_METRICS_SAVE, .procname = "tcp_no_metrics_save", .data = &sysctl_tcp_nometrics_save, .maxlen = sizeof(int), @@ -583,7 +456,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_MODERATE_RCVBUF, .procname = "tcp_moderate_rcvbuf", .data = &sysctl_tcp_moderate_rcvbuf, .maxlen = sizeof(int), @@ -591,7 +463,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_TSO_WIN_DIVISOR, .procname = "tcp_tso_win_divisor", .data = &sysctl_tcp_tso_win_divisor, .maxlen = sizeof(int), @@ -599,15 +470,12 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_CONG_CONTROL, .procname = "tcp_congestion_control", .mode = 0644, .maxlen = TCP_CA_NAME_MAX, .proc_handler = proc_tcp_congestion_control, - .strategy = sysctl_tcp_congestion_control, }, { - .ctl_name = NET_TCP_ABC, .procname = "tcp_abc", .data = &sysctl_tcp_abc, .maxlen = sizeof(int), @@ -615,7 +483,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_MTU_PROBING, .procname = "tcp_mtu_probing", .data = &sysctl_tcp_mtu_probing, .maxlen = sizeof(int), @@ -623,7 +490,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_TCP_BASE_MSS, .procname = "tcp_base_mss", .data = &sysctl_tcp_base_mss, .maxlen = sizeof(int), @@ -631,7 +497,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS, .procname = "tcp_workaround_signed_windows", .data = &sysctl_tcp_workaround_signed_windows, .maxlen = sizeof(int), @@ -640,7 +505,6 @@ static struct ctl_table ipv4_table[] = { }, #ifdef CONFIG_NET_DMA { - .ctl_name = NET_TCP_DMA_COPYBREAK, .procname = "tcp_dma_copybreak", .data = &sysctl_tcp_dma_copybreak, .maxlen = sizeof(int), @@ -649,7 +513,6 @@ static struct ctl_table ipv4_table[] = { }, #endif { - .ctl_name = NET_TCP_SLOW_START_AFTER_IDLE, .procname = "tcp_slow_start_after_idle", .data = &sysctl_tcp_slow_start_after_idle, .maxlen = sizeof(int), @@ -658,7 +521,6 @@ static struct ctl_table ipv4_table[] = { }, #ifdef CONFIG_NETLABEL { - .ctl_name = NET_CIPSOV4_CACHE_ENABLE, .procname = "cipso_cache_enable", .data = &cipso_v4_cache_enabled, .maxlen = sizeof(int), @@ -666,7 +528,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, .procname = "cipso_cache_bucket_size", .data = &cipso_v4_cache_bucketsize, .maxlen = sizeof(int), @@ -674,7 +535,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CIPSOV4_RBM_OPTFMT, .procname = "cipso_rbm_optfmt", .data = &cipso_v4_rbm_optfmt, .maxlen = sizeof(int), @@ -682,7 +542,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, .procname = "cipso_rbm_strictvalid", .data = &cipso_v4_rbm_strictvalid, .maxlen = sizeof(int), @@ -697,15 +556,12 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_tcp_available_congestion_control, }, { - .ctl_name = NET_TCP_ALLOWED_CONG_CONTROL, .procname = "tcp_allowed_congestion_control", .maxlen = TCP_CA_BUF_MAX, .mode = 0644, .proc_handler = proc_allowed_congestion_control, - .strategy = strategy_allowed_congestion_control, }, { - .ctl_name = NET_TCP_MAX_SSTHRESH, .procname = "tcp_max_ssthresh", .data = &sysctl_tcp_max_ssthresh, .maxlen = sizeof(int), @@ -713,41 +569,41 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_cookie_size", + .data = &sysctl_tcp_cookie_size, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero }, { - .ctl_name = CTL_UNNUMBERED, .procname = "udp_rmem_min", .data = &sysctl_udp_rmem_min, .maxlen = sizeof(sysctl_udp_rmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero }, { - .ctl_name = CTL_UNNUMBERED, .procname = "udp_wmem_min", .data = &sysctl_udp_wmem_min, .maxlen = sizeof(sysctl_udp_wmem_min), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero }, - { .ctl_name = 0 } + { } }; static struct ctl_table ipv4_net_table[] = { { - .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, .procname = "icmp_echo_ignore_all", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all, .maxlen = sizeof(int), @@ -755,7 +611,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_BROADCASTS, .procname = "icmp_echo_ignore_broadcasts", .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts, .maxlen = sizeof(int), @@ -763,7 +618,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_IGNORE_BOGUS_ERROR_RESPONSES, .procname = "icmp_ignore_bogus_error_responses", .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), @@ -771,7 +625,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, .procname = "icmp_errors_use_inbound_ifaddr", .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr, .maxlen = sizeof(int), @@ -779,16 +632,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV4_ICMP_RATELIMIT, .procname = "icmp_ratelimit", .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies }, { - .ctl_name = NET_IPV4_ICMP_RATEMASK, .procname = "icmp_ratemask", .data = &init_net.ipv4.sysctl_icmp_ratemask, .maxlen = sizeof(int), @@ -796,7 +646,6 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rt_cache_rebuild_count", .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, .maxlen = sizeof(int), @@ -807,8 +656,8 @@ static struct ctl_table ipv4_net_table[] = { }; struct ctl_path net_ipv4_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { }, }; EXPORT_SYMBOL_GPL(net_ipv4_ctl_path); @@ -818,7 +667,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) struct ctl_table *table; table = ipv4_net_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL); if (table == NULL) goto err_alloc; @@ -849,7 +698,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) return 0; err_reg: - if (net != &init_net) + if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 98440ad8255..b0a26bb25e2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -264,6 +264,7 @@ #include <linux/cache.h> #include <linux/err.h> #include <linux/crypto.h> +#include <linux/time.h> #include <net/icmp.h> #include <net/tcp.h> @@ -1183,7 +1184,9 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) #if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); - WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); + WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), + KERN_INFO "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", + tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); #endif if (inet_csk_ack_scheduled(sk)) { @@ -1430,11 +1433,13 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* Now that we have two receive queues this * shouldn't happen. */ - if (before(*seq, TCP_SKB_CB(skb)->seq)) { - printk(KERN_INFO "recvmsg bug: copied %X " - "seq %X\n", *seq, TCP_SKB_CB(skb)->seq); + if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), + KERN_INFO "recvmsg bug: copied %X " + "seq %X rcvnxt %X fl %X\n", *seq, + TCP_SKB_CB(skb)->seq, tp->rcv_nxt, + flags)) break; - } + offset = *seq - TCP_SKB_CB(skb)->seq; if (tcp_hdr(skb)->syn) offset--; @@ -1443,8 +1448,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (tcp_hdr(skb)->fin) goto found_fin_ok; WARN(!(flags & MSG_PEEK), KERN_INFO "recvmsg bug 2: " - "copied %X seq %X\n", *seq, - TCP_SKB_CB(skb)->seq); + "copied %X seq %X rcvnxt %X fl %X\n", + *seq, TCP_SKB_CB(skb)->seq, + tp->rcv_nxt, flags); } /* Well, if we have backlog, try to process it now yet. */ @@ -2037,7 +2043,7 @@ int tcp_disconnect(struct sock *sk, int flags) __skb_queue_purge(&sk->sk_async_wait_queue); #endif - inet->dport = 0; + inet->inet_dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -2054,6 +2060,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->bytes_acked = 0; + tp->window_clamp = 0; tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); @@ -2061,7 +2068,7 @@ int tcp_disconnect(struct sock *sk, int flags) memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - WARN_ON(inet->num && !icsk->icsk_bind_hash); + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -2078,8 +2085,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int val; int err = 0; - /* This is a string value all the others are int's */ - if (optname == TCP_CONGESTION) { + /* These are data/string values, all the others are ints */ + switch (optname) { + case TCP_CONGESTION: { char name[TCP_CA_NAME_MAX]; if (optlen < 1) @@ -2096,6 +2104,93 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } + case TCP_COOKIE_TRANSACTIONS: { + struct tcp_cookie_transactions ctd; + struct tcp_cookie_values *cvp = NULL; + + if (sizeof(ctd) > optlen) + return -EINVAL; + if (copy_from_user(&ctd, optval, sizeof(ctd))) + return -EFAULT; + + if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || + ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) + return -EINVAL; + + if (ctd.tcpct_cookie_desired == 0) { + /* default to global value */ + } else if ((0x1 & ctd.tcpct_cookie_desired) || + ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || + ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { + return -EINVAL; + } + + if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { + /* Supercedes all other values */ + lock_sock(sk); + if (tp->cookie_values != NULL) { + kref_put(&tp->cookie_values->kref, + tcp_cookie_values_release); + tp->cookie_values = NULL; + } + tp->rx_opt.cookie_in_always = 0; /* false */ + tp->rx_opt.cookie_out_never = 1; /* true */ + release_sock(sk); + return err; + } + + /* Allocate ancillary memory before locking. + */ + if (ctd.tcpct_used > 0 || + (tp->cookie_values == NULL && + (sysctl_tcp_cookie_size > 0 || + ctd.tcpct_cookie_desired > 0 || + ctd.tcpct_s_data_desired > 0))) { + cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, + GFP_KERNEL); + if (cvp == NULL) + return -ENOMEM; + } + lock_sock(sk); + tp->rx_opt.cookie_in_always = + (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); + tp->rx_opt.cookie_out_never = 0; /* false */ + + if (tp->cookie_values != NULL) { + if (cvp != NULL) { + /* Changed values are recorded by a changed + * pointer, ensuring the cookie will differ, + * without separately hashing each value later. + */ + kref_put(&tp->cookie_values->kref, + tcp_cookie_values_release); + kref_init(&cvp->kref); + tp->cookie_values = cvp; + } else { + cvp = tp->cookie_values; + } + } + if (cvp != NULL) { + cvp->cookie_desired = ctd.tcpct_cookie_desired; + + if (ctd.tcpct_used > 0) { + memcpy(cvp->s_data_payload, ctd.tcpct_value, + ctd.tcpct_used); + cvp->s_data_desired = ctd.tcpct_used; + cvp->s_data_constant = 1; /* true */ + } else { + /* No constant payload data. */ + cvp->s_data_desired = ctd.tcpct_s_data_desired; + cvp->s_data_constant = 0; /* false */ + } + } + release_sock(sk); + return err; + } + default: + /* fallthru */ + break; + }; if (optlen < sizeof(int)) return -EINVAL; @@ -2420,6 +2515,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; + + case TCP_COOKIE_TRANSACTIONS: { + struct tcp_cookie_transactions ctd; + struct tcp_cookie_values *cvp = tp->cookie_values; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < sizeof(ctd)) + return -EINVAL; + + memset(&ctd, 0, sizeof(ctd)); + ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? + TCP_COOKIE_IN_ALWAYS : 0) + | (tp->rx_opt.cookie_out_never ? + TCP_COOKIE_OUT_NEVER : 0); + + if (cvp != NULL) { + ctd.tcpct_flags |= (cvp->s_data_in ? + TCP_S_DATA_IN : 0) + | (cvp->s_data_out ? + TCP_S_DATA_OUT : 0); + + ctd.tcpct_cookie_desired = cvp->cookie_desired; + ctd.tcpct_s_data_desired = cvp->s_data_desired; + + memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], + cvp->cookie_pair_size); + ctd.tcpct_used = cvp->cookie_pair_size; + } + + if (put_user(sizeof(ctd), optlen)) + return -EFAULT; + if (copy_to_user(optval, &ctd, sizeof(ctd))) + return -EFAULT; + return 0; + } default: return -ENOPROTOOPT; } @@ -2842,6 +2973,135 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif +/** + * Each Responder maintains up to two secret values concurrently for + * efficient secret rollover. Each secret value has 4 states: + * + * Generating. (tcp_secret_generating != tcp_secret_primary) + * Generates new Responder-Cookies, but not yet used for primary + * verification. This is a short-term state, typically lasting only + * one round trip time (RTT). + * + * Primary. (tcp_secret_generating == tcp_secret_primary) + * Used both for generation and primary verification. + * + * Retiring. (tcp_secret_retiring != tcp_secret_secondary) + * Used for verification, until the first failure that can be + * verified by the newer Generating secret. At that time, this + * cookie's state is changed to Secondary, and the Generating + * cookie's state is changed to Primary. This is a short-term state, + * typically lasting only one round trip time (RTT). + * + * Secondary. (tcp_secret_retiring == tcp_secret_secondary) + * Used for secondary verification, after primary verification + * failures. This state lasts no more than twice the Maximum Segment + * Lifetime (2MSL). Then, the secret is discarded. + */ +struct tcp_cookie_secret { + /* The secret is divided into two parts. The digest part is the + * equivalent of previously hashing a secret and saving the state, + * and serves as an initialization vector (IV). The message part + * serves as the trailing secret. + */ + u32 secrets[COOKIE_WORKSPACE_WORDS]; + unsigned long expires; +}; + +#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) +#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) +#define TCP_SECRET_LIFE (HZ * 600) + +static struct tcp_cookie_secret tcp_secret_one; +static struct tcp_cookie_secret tcp_secret_two; + +/* Essentially a circular list, without dynamic allocation. */ +static struct tcp_cookie_secret *tcp_secret_generating; +static struct tcp_cookie_secret *tcp_secret_primary; +static struct tcp_cookie_secret *tcp_secret_retiring; +static struct tcp_cookie_secret *tcp_secret_secondary; + +static DEFINE_SPINLOCK(tcp_secret_locker); + +/* Select a pseudo-random word in the cookie workspace. + */ +static inline u32 tcp_cookie_work(const u32 *ws, const int n) +{ + return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; +} + +/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. + * Called in softirq context. + * Returns: 0 for success. + */ +int tcp_cookie_generator(u32 *bakery) +{ + unsigned long jiffy = jiffies; + + if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { + spin_lock_bh(&tcp_secret_locker); + if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { + /* refreshed by another */ + memcpy(bakery, + &tcp_secret_generating->secrets[0], + COOKIE_WORKSPACE_WORDS); + } else { + /* still needs refreshing */ + get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); + + /* The first time, paranoia assumes that the + * randomization function isn't as strong. But, + * this secret initialization is delayed until + * the last possible moment (packet arrival). + * Although that time is observable, it is + * unpredictably variable. Mash in the most + * volatile clock bits available, and expire the + * secret extra quickly. + */ + if (unlikely(tcp_secret_primary->expires == + tcp_secret_secondary->expires)) { + struct timespec tv; + + getnstimeofday(&tv); + bakery[COOKIE_DIGEST_WORDS+0] ^= + (u32)tv.tv_nsec; + + tcp_secret_secondary->expires = jiffy + + TCP_SECRET_1MSL + + (0x0f & tcp_cookie_work(bakery, 0)); + } else { + tcp_secret_secondary->expires = jiffy + + TCP_SECRET_LIFE + + (0xff & tcp_cookie_work(bakery, 1)); + tcp_secret_primary->expires = jiffy + + TCP_SECRET_2MSL + + (0x1f & tcp_cookie_work(bakery, 2)); + } + memcpy(&tcp_secret_secondary->secrets[0], + bakery, COOKIE_WORKSPACE_WORDS); + + rcu_assign_pointer(tcp_secret_generating, + tcp_secret_secondary); + rcu_assign_pointer(tcp_secret_retiring, + tcp_secret_primary); + /* + * Neither call_rcu() nor synchronize_rcu() needed. + * Retiring data is not freed. It is replaced after + * further (locked) pointer updates, and a quiet time + * (minimum 1MSL, maximum LIFE - 2MSL). + */ + } + spin_unlock_bh(&tcp_secret_locker); + } else { + rcu_read_lock_bh(); + memcpy(bakery, + &rcu_dereference(tcp_secret_generating)->secrets[0], + COOKIE_WORKSPACE_WORDS); + rcu_read_unlock_bh(); + } + return 0; +} +EXPORT_SYMBOL(tcp_cookie_generator); + void tcp_done(struct sock *sk) { if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) @@ -2876,6 +3136,7 @@ void __init tcp_init(void) struct sk_buff *skb = NULL; unsigned long nr_pages, limit; int order, i, max_share; + unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2898,11 +3159,10 @@ void __init tcp_init(void) (totalram_pages >= 128 * 1024) ? 13 : 15, 0, - &tcp_hashinfo.ehash_size, NULL, + &tcp_hashinfo.ehash_mask, thash_entries ? 0 : 512 * 1024); - tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; - for (i = 0; i < tcp_hashinfo.ehash_size; i++) { + for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); } @@ -2911,7 +3171,7 @@ void __init tcp_init(void) tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", sizeof(struct inet_bind_hashbucket), - tcp_hashinfo.ehash_size, + tcp_hashinfo.ehash_mask + 1, (totalram_pages >= 128 * 1024) ? 13 : 15, 0, @@ -2966,10 +3226,19 @@ void __init tcp_init(void) sysctl_tcp_rmem[2] = max(87380, max_share); printk(KERN_INFO "TCP: Hash tables configured " - "(established %d bind %d)\n", - tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); + "(established %u bind %u)\n", + tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); + + memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); + memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); + tcp_secret_one.expires = jiffy; /* past due */ + tcp_secret_two.expires = jiffy; /* past due */ + tcp_secret_generating = &tcp_secret_one; + tcp_secret_primary = &tcp_secret_one; + tcp_secret_retiring = &tcp_secret_two; + tcp_secret_secondary = &tcp_secret_two; } EXPORT_SYMBOL(tcp_close); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index fcbcd4ff6c5..939edb3b8e4 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -27,7 +27,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_rqueue = sk->sk_ack_backlog; r->idiag_wqueue = sk->sk_max_ack_backlog; } else { - r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); r->idiag_wqueue = tp->write_seq - tp->snd_una; } if (info != NULL) diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 26d5c7fc7de..7c94a495541 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -92,8 +92,8 @@ static inline void measure_rtt(struct sock *sk, u32 srtt) if (icsk->icsk_ca_state == TCP_CA_Open) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; - if (ca->maxRTT < srtt - && srtt <= ca->maxRTT + msecs_to_jiffies(20)) + if (ca->maxRTT < srtt && + srtt <= ca->maxRTT + msecs_to_jiffies(20)) ca->maxRTT = srtt; } } @@ -123,9 +123,9 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt ca->packetcount += pkts_acked; - if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) - && now - ca->lasttime >= ca->minRTT - && ca->minRTT > 0) { + if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) && + now - ca->lasttime >= ca->minRTT && + ca->minRTT > 0) { __u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime); if (htcp_ccount(ca) <= 3) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d86784be7ab..12cab7d74db 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) * "len" is invariant segment length, including TCP header. */ len += skb->data - skb_transport_header(skb); - if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || + if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) || /* If PSH is not set, packet should be * full sized, provided peer TCP is not badly broken. * This observation (if it is correct 8)) allows @@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); - hint = min(hint, TCP_MIN_RCVMSS); + hint = min(hint, TCP_MSS_DEFAULT); hint = max(hint, TCP_MIN_MSS); inet_csk(sk)->icsk_ack.rcv_mss = hint; @@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) * they differ. Since neither occurs due to loss, TCP should really * ignore them. */ -static inline int tcp_dupack_heurestics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(struct tcp_sock *tp) { return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } @@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk) return 1; /* Not-A-Trick#2 : Classic rule... */ - if (tcp_dupack_heurestics(tp) > tp->reordering) + if (tcp_dupack_heuristics(tp) > tp->reordering) return 1; /* Trick#3 : when we use RFC2988 timer restart, fast @@ -2717,6 +2717,35 @@ static void tcp_try_undo_dsack(struct sock *sk) } } +/* We can clear retrans_stamp when there are no retransmissions in the + * window. It would seem that it is trivially available for us in + * tp->retrans_out, however, that kind of assumptions doesn't consider + * what will happen if errors occur when sending retransmission for the + * second time. ...It could the that such segment has only + * TCPCB_EVER_RETRANS set at the present time. It seems that checking + * the head skb is enough except for some reneging corner cases that + * are not worth the effort. + * + * Main reason for all this complexity is the fact that connection dying + * time now depends on the validity of the retrans_stamp, in particular, + * that successive retransmissions of a segment must not advance + * retrans_stamp under any conditions. + */ +static int tcp_any_retrans_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (tp->retrans_out) + return 1; + + skb = tcp_write_queue_head(sk); + if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) + return 1; + + return 0; +} + /* Undo during fast recovery after partial ACK. */ static int tcp_try_undo_partial(struct sock *sk, int acked) @@ -2729,7 +2758,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ - if (tp->retrans_out == 0) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); @@ -2788,7 +2817,7 @@ static void tcp_try_keep_open(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int state = TCP_CA_Open; - if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) + if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2803,7 +2832,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) tcp_verify_left_out(tp); - if (!tp->frto_counter && tp->retrans_out == 0) + if (!tp->frto_counter && !tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -3698,7 +3727,7 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab) + u8 **hvpp, int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); @@ -3737,7 +3766,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling) { + !estab && sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { @@ -3753,7 +3783,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps))) { + (!estab && sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -3761,7 +3792,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack) { + !estab && sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK)) { opt_rx->sack_ok = 1; tcp_sack_reset(opt_rx); } @@ -3782,7 +3814,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, */ break; #endif - } + case TCPOPT_COOKIE: + /* This option is variable length. + */ + switch (opsize) { + case TCPOLEN_COOKIE_BASE: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_PAIR: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_MIN+0: + case TCPOLEN_COOKIE_MIN+2: + case TCPOLEN_COOKIE_MIN+4: + case TCPOLEN_COOKIE_MIN+6: + case TCPOLEN_COOKIE_MAX: + /* 16-bit multiple */ + opt_rx->cookie_plus = opsize; + *hvpp = ptr; + default: + /* ignore option */ + break; + }; + break; + }; ptr += opsize-2; length -= opsize; @@ -3810,17 +3865,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) * If it is wrong it falls back on tcp_parse_options(). */ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp) + struct tcp_sock *tp, u8 **hvpp) { - if (th->doff == sizeof(struct tcphdr) >> 2) { + /* In the spirit of fast parsing, compare doff directly to constant + * values. Because equality is used, short doff can be ignored here. + */ + if (th->doff == (sizeof(*th) / 4)) { tp->rx_opt.saw_tstamp = 0; return 0; } else if (tp->rx_opt.tstamp_ok && - th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { + th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); return 1; } @@ -4075,8 +4133,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4105,13 +4165,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -4845,11 +4907,11 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) struct tcp_sock *tp = tcp_sk(sk); /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss + if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). Or... */ - && __tcp_select_window(sk) >= tp->rcv_wnd) || + __tcp_select_window(sk) >= tp->rcv_wnd) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ @@ -5070,10 +5132,12 @@ out: static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, int syn_inerr) { + u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp, &hash_location) && + tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5361,11 +5425,14 @@ discard: static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { - struct tcp_sock *tp = tcp_sk(sk); + u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_cookie_values *cvp = tp->cookie_values; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, 0); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst); if (th->ack) { /* rfc793: @@ -5462,6 +5529,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Change state from SYN-SENT only after copied_seq * is initialized. */ tp->copied_seq = tp->rcv_nxt; + + if (cvp != NULL && + cvp->cookie_pair_size > 0 && + tp->rx_opt.cookie_plus > 0) { + int cookie_size = tp->rx_opt.cookie_plus + - TCPOLEN_COOKIE_BASE; + int cookie_pair_size = cookie_size + + cvp->cookie_desired; + + /* A cookie extension option was sent and returned. + * Note that each incoming SYNACK replaces the + * Responder cookie. The initial exchange is most + * fragile, as protection against spoofing relies + * entirely upon the sequence and timestamp (above). + * This replacement strategy allows the correct pair to + * pass through, while any others will be filtered via + * Responder verification later. + */ + if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { + memcpy(&cvp->cookie_pair[cvp->cookie_desired], + hash_location, cookie_size); + cvp->cookie_pair_size = cookie_pair_size; + } + } + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7cda24b53f6..15e96030ce4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -165,10 +165,10 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->saddr, + tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - inet->sport, usin->sin_port, sk, 1); + inet->inet_sport, usin->sin_port, sk, 1); if (tmp < 0) { if (tmp == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -183,11 +183,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!inet->opt || !inet->opt->srr) daddr = rt->rt_dst; - if (!inet->saddr) - inet->saddr = rt->rt_src; - inet->rcv_saddr = inet->saddr; + if (!inet->inet_saddr) + inet->inet_saddr = rt->rt_src; + inet->inet_rcv_saddr = inet->inet_saddr; - if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { + if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { /* Reset inherited state */ tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; @@ -204,20 +204,20 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * when trying new connection. */ if (peer != NULL && - peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { + (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; tp->rx_opt.ts_recent = peer->tcp_ts; } } - inet->dport = usin->sin_port; - inet->daddr = daddr; + inet->inet_dport = usin->sin_port; + inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; - tp->rx_opt.mss_clamp = 536; + tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; /* Socket identity is still unknown (sport may be zero). * However we set state to SYN-SENT and not releasing socket @@ -230,7 +230,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; err = ip_route_newports(&rt, IPPROTO_TCP, - inet->sport, inet->dport, sk); + inet->inet_sport, inet->inet_dport, sk); if (err) goto failure; @@ -239,12 +239,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sk_setup_caps(sk, &rt->u.dst); if (!tp->write_seq) - tp->write_seq = secure_tcp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, + tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, usin->sin_port); - inet->id = tp->write_seq ^ jiffies; + inet->inet_id = tp->write_seq ^ jiffies; err = tcp_connect(sk); rt = NULL; @@ -261,7 +261,7 @@ failure: tcp_set_state(sk, TCP_CLOSE); ip_rt_put(rt); sk->sk_route_caps = 0; - inet->dport = 0; + inet->inet_dport = 0; return err; } @@ -520,12 +520,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct tcphdr *th = tcp_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v4_check(len, inet->saddr, - inet->daddr, 0); + th->check = ~tcp_v4_check(len, inet->inet_saddr, + inet->inet_daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } else { - th->check = tcp_v4_check(len, inet->saddr, inet->daddr, + th->check = tcp_v4_check(len, inet->inet_saddr, + inet->inet_daddr, csum_partial(th, th->doff << 2, skb->csum)); @@ -741,8 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, * This still operates on a request_sock only, not on a big * socket. */ -static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct request_values *rvp) { const struct inet_request_sock *ireq = inet_rsk(req); int err = -1; @@ -752,7 +754,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req); + skb = tcp_make_synack(sk, dst, req, rvp); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -773,9 +775,10 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, return err; } -static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req) +static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, + struct request_values *rvp) { - return __tcp_v4_send_synack(sk, req, NULL); + return __tcp_v4_send_synack(sk, NULL, req, rvp); } /* @@ -848,7 +851,7 @@ static struct tcp_md5sig_key * struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); + return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); } EXPORT_SYMBOL(tcp_v4_md5_lookup); @@ -923,7 +926,7 @@ EXPORT_SYMBOL(tcp_v4_md5_do_add); static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, u8 *newkey, u8 newkeylen) { - return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, + return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, newkey, newkeylen); } @@ -1089,8 +1092,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, __be32 saddr, daddr; if (sk) { - saddr = inet_sk(sk)->saddr; - daddr = inet_sk(sk)->daddr; + saddr = inet_sk(sk)->inet_saddr; + daddr = inet_sk(sk)->inet_daddr; } else if (req) { saddr = inet_rsk(req)->loc_addr; daddr = inet_rsk(req)->rmt_addr; @@ -1210,13 +1213,16 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct inet_request_sock *ireq; + struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; + u8 *hash_location; struct request_sock *req; + struct inet_request_sock *ireq; + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = NULL; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; - struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; #else @@ -1256,27 +1262,65 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; #endif + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; + ireq->opt = tcp_v4_save_options(sk, skb); + + dst = inet_csk_route_req(sk, req); + if(!dst) + goto drop_and_free; + tcp_clear_options(&tmp_opt); - tmp_opt.mss_clamp = 536; - tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; + tmp_opt.mss_clamp = TCP_MSS_DEFAULT; + tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_release; + + /* Secret recipe starts with IP addresses */ + *mess++ ^= daddr; + *mess++ ^= saddr; - tcp_parse_options(skb, &tmp_opt, 0); + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; + +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_release; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; - tcp_openreq_init(req, &tmp_opt, skb); - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - ireq->no_srccheck = inet_sk(sk)->transparent; - ireq->opt = tcp_v4_save_options(sk, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; + goto drop_and_release; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1301,10 +1345,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { - if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && + if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); @@ -1333,7 +1376,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } tcp_rsk(req)->snt_isn = isn; - if (__tcp_v4_send_synack(sk, req, dst) || want_cookie) + if (__tcp_v4_send_synack(sk, dst, req, + (struct request_values *)&tmp_ext) || + want_cookie) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); @@ -1380,9 +1425,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->daddr = ireq->rmt_addr; - newinet->rcv_saddr = ireq->loc_addr; - newinet->saddr = ireq->loc_addr; + newinet->inet_daddr = ireq->rmt_addr; + newinet->inet_rcv_saddr = ireq->loc_addr; + newinet->inet_saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); @@ -1390,7 +1435,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newinet->opt) inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; - newinet->id = newtp->write_seq ^ jiffies; + newinet->inet_id = newtp->write_seq ^ jiffies; tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); @@ -1403,7 +1448,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { + key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); + if (key != NULL) { /* * We're using one, so create a matching key * on the newsk structure. If we fail to get @@ -1412,13 +1458,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); if (newkey != NULL) - tcp_v4_md5_do_add(newsk, newinet->daddr, + tcp_v4_md5_do_add(newsk, newinet->inet_daddr, newkey, key->keylen); newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; } #endif - __inet_hash_nolisten(newsk); + __inet_hash_nolisten(newsk, NULL); __inet_inherit_port(sk, newsk); return newsk; @@ -1711,8 +1757,8 @@ int tcp_v4_remember_stamp(struct sock *sk) struct inet_peer *peer = NULL; int release_it = 0; - if (!rt || rt->rt_dst != inet->daddr) { - peer = inet_getpeer(inet->daddr, 1); + if (!rt || rt->rt_dst != inet->inet_daddr) { + peer = inet_getpeer(inet->inet_daddr, 1); release_it = 1; } else { if (!rt->peer) @@ -1722,9 +1768,9 @@ int tcp_v4_remember_stamp(struct sock *sk) if (peer) { if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && - peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; peer->tcp_ts = tp->rx_opt.ts_recent; } if (release_it) @@ -1743,9 +1789,9 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && - peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; peer->tcp_ts = tcptw->tw_ts_recent; } inet_putpeer(peer); @@ -1810,7 +1856,7 @@ static int tcp_v4_init_sock(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sysctl_tcp_reordering; icsk->icsk_ca_ops = &tcp_init_congestion_ops; @@ -1826,6 +1872,19 @@ static int tcp_v4_init_sock(struct sock *sk) tp->af_specific = &tcp_sock_ipv4_specific; #endif + /* TCP Cookie Transactions */ + if (sysctl_tcp_cookie_size > 0) { + /* Default, cookies without s_data_payload. */ + tp->cookie_values = + kzalloc(sizeof(*tp->cookie_values), + sk->sk_allocation); + if (tp->cookie_values != NULL) + kref_init(&tp->cookie_values->kref); + } + /* Presumed zeroed, in order of appearance: + * cookie_in_always, cookie_out_never, + * s_data_constant, s_data_in, s_data_out + */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -1879,6 +1938,13 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } + /* TCP Cookie Transactions */ + if (tp->cookie_values != NULL) { + kref_put(&tp->cookie_values->kref, + tcp_cookie_values_release); + tp->cookie_values = NULL; + } + percpu_counter_dec(&tcp_sockets_allocated); } @@ -2000,7 +2066,7 @@ static void *established_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); void *rc = NULL; - for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { + for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { struct sock *sk; struct hlist_nulls_node *node; struct inet_timewait_sock *tw; @@ -2061,10 +2127,10 @@ get_tw: st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ - while (++st->bucket < tcp_hashinfo.ehash_size && + while (++st->bucket <= tcp_hashinfo.ehash_mask && empty_bucket(st)) ; - if (st->bucket >= tcp_hashinfo.ehash_size) + if (st->bucket > tcp_hashinfo.ehash_mask) return NULL; spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); @@ -2225,7 +2291,7 @@ static void get_openreq4(struct sock *sk, struct request_sock *req, " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", i, ireq->loc_addr, - ntohs(inet_sk(sk)->sport), + ntohs(inet_sk(sk)->inet_sport), ireq->rmt_addr, ntohs(ireq->rmt_port), TCP_SYN_RECV, @@ -2248,10 +2314,11 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); + __be32 dest = inet->inet_daddr; + __be32 src = inet->inet_rcv_saddr; + __u16 destp = ntohs(inet->inet_dport); + __u16 srcp = ntohs(inet->inet_sport); + int rx_queue; if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; @@ -2267,12 +2334,19 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) timer_expires = jiffies; } + if (sk->sk_state == TCP_LISTEN) + rx_queue = sk->sk_ack_backlog; + else + /* + * because we dont lock socket, we might find a transient negative value + */ + rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); + seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, - sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : - (tp->rcv_nxt - tp->copied_seq), + rx_queue, timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, @@ -2463,12 +2537,17 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); +} + +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { - .init = tcp_sk_init, - .exit = tcp_sk_exit, + .init = tcp_sk_init, + .exit = tcp_sk_exit, + .exit_batch = tcp_sk_exit_batch, }; void __init tcp_v4_init(void) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index ce3c41ff50b..de870377fbb 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -143,8 +143,8 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk) goto out; /* we can't calc remote HZ with no different!! */ - if (tp->rx_opt.rcv_tsval == lp->remote_ref_time - || tp->rx_opt.rcv_tsecr == lp->local_ref_time) + if (tp->rx_opt.rcv_tsval == lp->remote_ref_time || + tp->rx_opt.rcv_tsecr == lp->local_ref_time) goto out; m = HZ * (tp->rx_opt.rcv_tsval - diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4c03598ed92..87accec8d09 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -26,13 +26,7 @@ #include <net/inet_common.h> #include <net/xfrm.h> -#ifdef CONFIG_SYSCTL -#define SYNC_INIT 0 /* let the user enable it */ -#else -#define SYNC_INIT 1 -#endif - -int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; +int sysctl_tcp_syncookies __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_syncookies); int sysctl_tcp_abort_on_overflow __read_mostly; @@ -96,13 +90,14 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { - struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_options_received tmp_opt; + u8 *hash_location; + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; - tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, 0); + tmp_opt.tstamp_ok = 1; + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -389,14 +384,43 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, const struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); - struct tcp_sock *newtp; + struct tcp_sock *newtp = tcp_sk(newsk); + struct tcp_sock *oldtp = tcp_sk(sk); + struct tcp_cookie_values *oldcvp = oldtp->cookie_values; + + /* TCP Cookie Transactions require space for the cookie pair, + * as it differs for each connection. There is no need to + * copy any s_data_payload stored at the original socket. + * Failure will prevent resuming the connection. + * + * Presumed copied, in order of appearance: + * cookie_in_always, cookie_out_never + */ + if (oldcvp != NULL) { + struct tcp_cookie_values *newcvp = + kzalloc(sizeof(*newtp->cookie_values), + GFP_ATOMIC); + + if (newcvp != NULL) { + kref_init(&newcvp->kref); + newcvp->cookie_desired = + oldcvp->cookie_desired; + newtp->cookie_values = newcvp; + } else { + /* Not Yet Implemented */ + newtp->cookie_values = NULL; + } + } /* Now setup tcp_sock */ - newtp = tcp_sk(newsk); newtp->pred_flags = 0; - newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; - newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; - newtp->snd_up = treq->snt_isn + 1; + + newtp->rcv_wup = newtp->copied_seq = + newtp->rcv_nxt = treq->rcv_isn + 1; + + newtp->snd_sml = newtp->snd_una = + newtp->snd_nxt = newtp->snd_up = + treq->snt_isn + 1 + tcp_s_data_size(oldtp); tcp_prequeue_init(newtp); @@ -429,8 +453,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = treq->snt_isn + 1; - newtp->pushed_seq = newtp->write_seq; + newtp->write_seq = newtp->pushed_seq = + treq->snt_isn + 1 + tcp_s_data_size(oldtp); newtp->rx_opt.saw_tstamp = 0; @@ -476,7 +500,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if (newtp->af_specific->md5_lookup(sk, newsk)) newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED; #endif - if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) + if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); @@ -495,15 +519,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct request_sock **prev) { + struct tcp_options_received tmp_opt; + u8 *hash_location; + struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; - struct tcp_options_received tmp_opt; - struct sock *child; - tmp_opt.saw_tstamp = 0; - if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, 0); + if ((th->doff > (sizeof(*th) >> 2)) && (req->ts_recent)) { + tmp_opt.tstamp_ok = 1; + tcp_parse_options(skb, &tmp_opt, &hash_location, 1, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -537,7 +562,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. */ - req->rsk_ops->rtx_syn_ack(sk, req); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); return NULL; } @@ -596,7 +621,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Invalid ACK: reset will be sent by listening socket */ if ((flg & TCP_FLAG_ACK) && - (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1)) + (TCP_SKB_CB(skb)->ack_seq != + tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fcd278a7080..93316a96d82 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -59,6 +59,10 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ +EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); + + /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { @@ -362,15 +366,45 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) +#define OPTION_COOKIE_EXTENSION (1 << 4) struct tcp_out_options { u8 options; /* bit field of OPTION_* */ u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ + u8 hash_size; /* bytes in hash_location */ u16 mss; /* 0 to disable */ __u32 tsval, tsecr; /* need to include OPTION_TS */ + __u8 *hash_location; /* temporary pointer, overloaded */ }; +/* The sysctl int routines are generic, so check consistency here. + */ +static u8 tcp_cookie_size_check(u8 desired) +{ + if (desired > 0) { + /* previously specified */ + return desired; + } + if (sysctl_tcp_cookie_size <= 0) { + /* no default specified */ + return 0; + } + if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { + /* value too small, specify minimum */ + return TCP_COOKIE_MIN; + } + if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { + /* value too large, specify maximum */ + return TCP_COOKIE_MAX; + } + if (0x1 & sysctl_tcp_cookie_size) { + /* 8-bit multiple, illegal, fix it */ + return (u8)(sysctl_tcp_cookie_size + 0x1); + } + return (u8)sysctl_tcp_cookie_size; +} + /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -385,17 +419,34 @@ struct tcp_out_options { * (but it may well be that other scenarios fail similarly). */ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, - const struct tcp_out_options *opts, - __u8 **md5_hash) { - if (unlikely(OPTION_MD5 & opts->options)) { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - *md5_hash = (__u8 *)ptr; + struct tcp_out_options *opts) +{ + u8 options = opts->options; /* mungable copy */ + + /* Having both authentication and cookies for security is redundant, + * and there's certainly not enough room. Instead, the cookie-less + * extension variant is proposed. + * + * Consider the pessimal case with authentication. The options + * could look like: + * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 + */ + if (unlikely(OPTION_MD5 & options)) { + if (unlikely(OPTION_COOKIE_EXTENSION & options)) { + *ptr++ = htonl((TCPOPT_COOKIE << 24) | + (TCPOLEN_COOKIE_BASE << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + } else { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); + } + options &= ~OPTION_COOKIE_EXTENSION; + /* overload cookie hash location */ + opts->hash_location = (__u8 *)ptr; ptr += 4; - } else { - *md5_hash = NULL; } if (unlikely(opts->mss)) { @@ -404,12 +455,13 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, opts->mss); } - if (likely(OPTION_TS & opts->options)) { - if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { + if (likely(OPTION_TS & options)) { + if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + options &= ~OPTION_SACK_ADVERTISE; } else { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -420,15 +472,52 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - if (unlikely(OPTION_SACK_ADVERTISE & opts->options && - !(OPTION_TS & opts->options))) { + /* Specification requires after timestamp, so do it now. + * + * Consider the pessimal case without authentication. The options + * could look like: + * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 + */ + if (unlikely(OPTION_COOKIE_EXTENSION & options)) { + __u8 *cookie_copy = opts->hash_location; + u8 cookie_size = opts->hash_size; + + /* 8-bit multiple handled in tcp_cookie_size_check() above, + * and elsewhere. + */ + if (0x2 & cookie_size) { + __u8 *p = (__u8 *)ptr; + + /* 16-bit multiple */ + *p++ = TCPOPT_COOKIE; + *p++ = TCPOLEN_COOKIE_BASE + cookie_size; + *p++ = *cookie_copy++; + *p++ = *cookie_copy++; + ptr++; + cookie_size -= 2; + } else { + /* 32-bit multiple */ + *ptr++ = htonl(((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_COOKIE << 8) | + TCPOLEN_COOKIE_BASE) + + cookie_size); + } + + if (cookie_size > 0) { + memcpy(ptr, cookie_copy, cookie_size); + ptr += (cookie_size / 4); + } + } + + if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); } - if (unlikely(OPTION_WSCALE & opts->options)) { + if (unlikely(OPTION_WSCALE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | @@ -463,13 +552,18 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); - unsigned size = 0; + struct tcp_cookie_values *cvp = tp->cookie_values; + struct dst_entry *dst = __sk_dst_get(sk); + unsigned remaining = MAX_TCP_OPTION_SPACE; + u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? + tcp_cookie_size_check(cvp->cookie_desired) : + 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + remaining -= TCPOLEN_MD5SIG_ALIGNED; } #else *md5 = NULL; @@ -485,26 +579,76 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, * SACKs don't matter, we never delay an ACK when we have any of those * going out. */ opts->mss = tcp_advertise_mss(sk); - size += TCPOLEN_MSS_ALIGNED; + remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(sysctl_tcp_timestamps && *md5 == NULL)) { + if (likely(sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) && + *md5 == NULL)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = tp->rx_opt.ts_recent; - size += TCPOLEN_TSTAMP_ALIGNED; + remaining -= TCPOLEN_TSTAMP_ALIGNED; } - if (likely(sysctl_tcp_window_scaling)) { + if (likely(sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE))) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; - size += TCPOLEN_WSCALE_ALIGNED; + remaining -= TCPOLEN_WSCALE_ALIGNED; } - if (likely(sysctl_tcp_sack)) { + if (likely(sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK))) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) - size += TCPOLEN_SACKPERM_ALIGNED; + remaining -= TCPOLEN_SACKPERM_ALIGNED; } - return size; + /* Note that timestamps are required by the specification. + * + * Odd numbers of bytes are prohibited by the specification, ensuring + * that the cookie is 16-bit aligned, and the resulting cookie pair is + * 32-bit aligned. + */ + if (*md5 == NULL && + (OPTION_TS & opts->options) && + cookie_size > 0) { + int need = TCPOLEN_COOKIE_BASE + cookie_size; + + if (0x2 & need) { + /* 32-bit multiple */ + need += 2; /* NOPs */ + + if (need > remaining) { + /* try shrinking cookie to fit */ + cookie_size -= 2; + need -= 4; + } + } + while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { + cookie_size -= 4; + need -= 4; + } + if (TCP_COOKIE_MIN <= cookie_size) { + opts->options |= OPTION_COOKIE_EXTENSION; + opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; + opts->hash_size = cookie_size; + + /* Remember for future incarnations. */ + cvp->cookie_desired = cookie_size; + + if (cvp->cookie_desired != cvp->cookie_pair_size) { + /* Currently use random bytes as a nonce, + * assuming these are completely unpredictable + * by hostile users of the same system. + */ + get_random_bytes(&cvp->cookie_pair[0], + cookie_size); + cvp->cookie_pair_size = cookie_size; + } + + remaining -= need; + } + } + return MAX_TCP_OPTION_SPACE - remaining; } /* Set up TCP options for SYN-ACKs. */ @@ -512,48 +656,77 @@ static unsigned tcp_synack_options(struct sock *sk, struct request_sock *req, unsigned mss, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { - unsigned size = 0; + struct tcp_md5sig_key **md5, + struct tcp_extend_values *xvp) +{ struct inet_request_sock *ireq = inet_rsk(req); - char doing_ts; + unsigned remaining = MAX_TCP_OPTION_SPACE; + u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? + xvp->cookie_plus : + 0; + bool doing_ts = ireq->tstamp_ok; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); if (*md5) { opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + + /* We can't fit any SACK blocks in a packet with MD5 + TS + * options. There was discussion about disabling SACK + * rather than TS in order to fit in better with old, + * buggy kernels, but that was deemed to be unnecessary. + */ + doing_ts &= !ireq->sack_ok; } #else *md5 = NULL; #endif - /* we can't fit any SACK blocks in a packet with MD5 + TS - options. There was discussion about disabling SACK rather than TS in - order to fit in better with old, buggy kernels, but that was deemed - to be unnecessary. */ - doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok); - + /* We always send an MSS option. */ opts->mss = mss; - size += TCPOLEN_MSS_ALIGNED; + remaining -= TCPOLEN_MSS_ALIGNED; if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; opts->options |= OPTION_WSCALE; - size += TCPOLEN_WSCALE_ALIGNED; + remaining -= TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; opts->tsval = TCP_SKB_CB(skb)->when; opts->tsecr = req->ts_recent; - size += TCPOLEN_TSTAMP_ALIGNED; + remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(ireq->sack_ok)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!doing_ts)) - size += TCPOLEN_SACKPERM_ALIGNED; + remaining -= TCPOLEN_SACKPERM_ALIGNED; } - return size; + /* Similar rationale to tcp_syn_options() applies here, too. + * If the <SYN> options fit, the same options should fit now! + */ + if (*md5 == NULL && + doing_ts && + cookie_plus > TCPOLEN_COOKIE_BASE) { + int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ + + if (0x2 & need) { + /* 32-bit multiple */ + need += 2; /* NOPs */ + } + if (need <= remaining) { + opts->options |= OPTION_COOKIE_EXTENSION; + opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; + remaining -= need; + } else { + /* There's no error return, so flag it. */ + xvp->cookie_out_never = 1; /* true */ + opts->hash_size = 0; + } + } + return MAX_TCP_OPTION_SPACE - remaining; } /* Compute TCP options for ESTABLISHED sockets. This is not the @@ -619,7 +792,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcp_out_options opts; unsigned tcp_options_size, tcp_header_size; struct tcp_md5sig_key *md5; - __u8 *md5_hash_location; struct tcphdr *th; int err; @@ -661,8 +833,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, /* Build TCP header and checksum it. */ th = tcp_hdr(skb); - th->source = inet->sport; - th->dest = inet->dport; + th->source = inet->inet_sport; + th->dest = inet->inet_dport; th->seq = htonl(tcb->seq); th->ack_seq = htonl(tp->rcv_nxt); *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | @@ -690,7 +862,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, } } - tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); + tcp_options_write((__be32 *)(th + 1), tp, &opts); if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) TCP_ECN_send(sk, skb, tcp_header_size); @@ -698,7 +870,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, /* Calculate the MD5 hash, as we have all we need now */ if (md5) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; - tp->af_specific->calc_md5_hash(md5_hash_location, + tp->af_specific->calc_md5_hash(opts.hash_location, md5, sk, NULL, skb); } #endif @@ -1918,8 +2090,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * case, when window is shrunk to zero. In this case * our retransmit serves as a zero window probe. */ - if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) - && TCP_SKB_CB(skb)->seq != tp->snd_una) + if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && + TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; if (skb->len > cur_mss) { @@ -2219,16 +2391,17 @@ int tcp_send_synack(struct sock *sk) /* Prepare a SYN-ACK. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req) + struct request_sock *req, + struct request_values *rvp) { + struct tcp_out_options opts; + struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; - int tcp_header_size; - struct tcp_out_options opts; struct sk_buff *skb; struct tcp_md5sig_key *md5; - __u8 *md5_hash_location; + int tcp_header_size; int mss; skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); @@ -2266,8 +2439,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5) + - sizeof(struct tcphdr); + skb, &opts, &md5, xvp) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2284,19 +2457,58 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, */ tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); + + if (OPTION_COOKIE_EXTENSION & opts.options) { + const struct tcp_cookie_values *cvp = tp->cookie_values; + + if (cvp != NULL && + cvp->s_data_constant && + cvp->s_data_desired > 0) { + u8 *buf = skb_put(skb, cvp->s_data_desired); + + /* copy data directly from the listening socket. */ + memcpy(buf, cvp->s_data_payload, cvp->s_data_desired); + TCP_SKB_CB(skb)->end_seq += cvp->s_data_desired; + } + + if (opts.hash_size > 0) { + __u32 workspace[SHA_WORKSPACE_WORDS]; + u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; + u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; + + /* Secret recipe depends on the Timestamp, (future) + * Sequence and Acknowledgment Numbers, Initiator + * Cookie, and others handled by IP variant caller. + */ + *tail-- ^= opts.tsval; + *tail-- ^= tcp_rsk(req)->rcv_isn + 1; + *tail-- ^= TCP_SKB_CB(skb)->seq + 1; + + /* recommended */ + *tail-- ^= ((th->dest << 16) | th->source); + *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ + + sha_transform((__u32 *)&xvp->cookie_bakery[0], + (char *)mess, + &workspace[0]); + opts.hash_location = + (__u8 *)&xvp->cookie_bakery[0]; + } + } + th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); - tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); + tcp_options_write((__be32 *)(th + 1), tp, &opts); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ if (md5) { - tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location, + tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, md5, NULL, req, skb); } #endif @@ -2315,7 +2527,9 @@ static void tcp_connect_init(struct sock *sk) * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. */ tp->tcp_header_len = sizeof(struct tcphdr) + - (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + (sysctl_tcp_timestamps && + (!dst_feature(dst, RTAX_FEATURE_NO_TSTAMP) ? + TCPOLEN_TSTAMP_ALIGNED : 0)); #ifdef CONFIG_TCP_MD5SIG if (tp->af_specific->md5_lookup(sk, sk) != NULL) @@ -2341,7 +2555,8 @@ static void tcp_connect_init(struct sock *sk) tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, - sysctl_tcp_window_scaling, + (sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)), &rcv_wscale); tp->rx_opt.rcv_wscale = rcv_wscale; diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 59f5b5e7c56..bb110c5ce1d 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -94,8 +94,9 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct inet_sock *inet = inet_sk(sk); /* Only update if port matches */ - if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port) - && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { + if ((port == 0 || ntohs(inet->inet_dport) == port || + ntohs(inet->inet_sport) == port) && + (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { spin_lock(&tcp_probe.lock); /* If log fills, just silently drop */ @@ -103,10 +104,10 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcp_log *p = tcp_probe.log + tcp_probe.head; p->tstamp = ktime_get(); - p->saddr = inet->saddr; - p->sport = inet->sport; - p->daddr = inet->daddr; - p->dport = inet->dport; + p->saddr = inet->inet_saddr; + p->sport = inet->inet_sport; + p->daddr = inet->inet_daddr; + p->dport = inet->inet_dport; p->length = skb->len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cdb2ca7684d..8816a20c259 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -132,6 +132,35 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } } +/* This function calculates a "timeout" which is equivalent to the timeout of a + * TCP connection after "boundary" unsucessful, exponentially backed-off + * retransmissions with an initial RTO of TCP_RTO_MIN. + */ +static bool retransmits_timed_out(struct sock *sk, + unsigned int boundary) +{ + unsigned int timeout, linear_backoff_thresh; + unsigned int start_ts; + + if (!inet_csk(sk)->icsk_retransmits) + return false; + + if (unlikely(!tcp_sk(sk)->retrans_stamp)) + start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; + else + start_ts = tcp_sk(sk)->retrans_stamp; + + linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + + if (boundary <= linear_backoff_thresh) + timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; + else + timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + + return (tcp_time_stamp - start_ts) >= timeout; +} + /* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { @@ -141,14 +170,14 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); - dst_negative_advice(&sk->sk_dst_cache); + dst_negative_advice(&sk->sk_dst_cache, sk); } retry_until = sysctl_tcp_retries2; @@ -303,15 +332,15 @@ void tcp_retransmit_timer(struct sock *sk) struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", - &inet->daddr, ntohs(inet->dport), - inet->num, tp->snd_una, tp->snd_nxt); + &inet->inet_daddr, ntohs(inet->inet_dport), + inet->inet_num, tp->snd_una, tp->snd_nxt); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", - &np->daddr, ntohs(inet->dport), - inet->num, tp->snd_una, tp->snd_nxt); + &np->daddr, ntohs(inet->inet_dport), + inet->inet_num, tp->snd_una, tp->snd_nxt); } #endif #endif diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index e9bbff74648..b612acf7618 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -165,9 +165,8 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * every other rtt. */ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (veno->inc - && tp->snd_cwnd < - tp->snd_cwnd_clamp) { + if (veno->inc && + tp->snd_cwnd < tp->snd_cwnd_clamp) { tp->snd_cwnd++; veno->inc = 0; } else diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 66b6821b984..a0f24035889 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -157,8 +157,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (queue > TCP_YEAH_ALPHA || rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) { - if (queue > TCP_YEAH_ALPHA - && tp->snd_cwnd > yeah->reno_count) { + if (queue > TCP_YEAH_ALPHA && + tp->snd_cwnd > yeah->reno_count) { u32 reduction = min(queue / TCP_YEAH_GAMMA , tp->snd_cwnd >> TCP_YEAH_EPSILON); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0fa9f70e4b1..1f9534846ca 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -106,7 +106,7 @@ #include <net/xfrm.h> #include "udp_impl.h" -struct udp_table udp_table; +struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); int sysctl_udp_mem[3] __read_mostly; @@ -121,28 +121,30 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); -#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) +#define MAX_UDP_PORTS 65536 +#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2)) + const struct sock *sk2), + unsigned int log) { struct sock *sk2; struct hlist_nulls_node *node; sk_nulls_for_each(sk2, node, &hslot->head) - if (net_eq(sock_net(sk2), net) && - sk2 != sk && - (bitmap || sk2->sk_hash == num) && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if - || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + (bitmap || udp_sk(sk2)->udp_port_hash == num) && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (*saddr_comp)(sk, sk2)) { if (bitmap) - __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, + __set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap); else return 1; @@ -150,18 +152,51 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, return 0; } +/* + * Note: we still hold spinlock of primary hash chain, so no other writer + * can insert/delete a socket with local_port == num + */ +static int udp_lib_lport_inuse2(struct net *net, __u16 num, + struct udp_hslot *hslot2, + struct sock *sk, + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2)) +{ + struct sock *sk2; + struct hlist_nulls_node *node; + int res = 0; + + spin_lock(&hslot2->lock); + udp_portaddr_for_each_entry(sk2, node, &hslot2->head) + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + (udp_sk(sk2)->udp_port_hash == num) && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_comp)(sk, sk2)) { + res = 1; + break; + } + spin_unlock(&hslot2->lock); + return res; +} + /** * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up * @saddr_comp: AF-dependent comparison of bound local IP addresses + * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, + * with NULL address */ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2)) + const struct sock *sk2), + unsigned int hash2_nulladdr) { - struct udp_hslot *hslot; + struct udp_hslot *hslot, *hslot2; struct udp_table *udptable = sk->sk_prot->h.udp_table; int error = 1; struct net *net = sock_net(sk); @@ -180,13 +215,15 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, /* * force rand to be an odd multiple of UDP_HTABLE_SIZE */ - rand = (rand | 1) * UDP_HTABLE_SIZE; - for (last = first + UDP_HTABLE_SIZE; first != last; first++) { - hslot = &udptable->hash[udp_hashfn(net, first)]; + rand = (rand | 1) * (udptable->mask + 1); + for (last = first + udptable->mask + 1; + first != last; + first++) { + hslot = udp_hashslot(udptable, net, first); bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, - saddr_comp); + saddr_comp, udptable->log); snum = first; /* @@ -196,7 +233,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, */ do { if (low <= snum && snum <= high && - !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + !test_bit(snum >> udptable->log, bitmap)) goto found; snum += rand; } while (snum != first); @@ -204,17 +241,51 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, } goto fail; } else { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + hslot = udp_hashslot(udptable, net, snum); spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) + if (hslot->count > 10) { + int exist; + unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; + + slot2 &= udptable->mask; + hash2_nulladdr &= udptable->mask; + + hslot2 = udp_hashslot2(udptable, slot2); + if (hslot->count < hslot2->count) + goto scan_primary_hash; + + exist = udp_lib_lport_inuse2(net, snum, hslot2, + sk, saddr_comp); + if (!exist && (hash2_nulladdr != slot2)) { + hslot2 = udp_hashslot2(udptable, hash2_nulladdr); + exist = udp_lib_lport_inuse2(net, snum, hslot2, + sk, saddr_comp); + } + if (exist) + goto fail_unlock; + else + goto found; + } +scan_primary_hash: + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, + saddr_comp, 0)) goto fail_unlock; } found: - inet_sk(sk)->num = snum; - sk->sk_hash = snum; + inet_sk(sk)->inet_num = snum; + udp_sk(sk)->udp_port_hash = snum; + udp_sk(sk)->udp_portaddr_hash ^= snum; if (sk_unhashed(sk)) { sk_nulls_add_node_rcu(sk, &hslot->head); + hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + spin_lock(&hslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &hslot2->head); + hslot2->count++; + spin_unlock(&hslot2->lock); } error = 0; fail_unlock: @@ -229,13 +300,26 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); return (!ipv6_only_sock(sk2) && - (!inet1->rcv_saddr || !inet2->rcv_saddr || - inet1->rcv_saddr == inet2->rcv_saddr)); + (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || + inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); +} + +static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, + unsigned int port) +{ + return jhash_1word(saddr, net_hash_mix(net)) ^ port; } int udp_v4_get_port(struct sock *sk, unsigned short snum) { - return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); + unsigned int hash2_nulladdr = + udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); + unsigned int hash2_partial = + udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); + + /* precompute partial secondary hash */ + udp_sk(sk)->udp_portaddr_hash = hash2_partial; + return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, @@ -244,23 +328,61 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && !ipv6_only_sock(sk)) { struct inet_sock *inet = inet_sk(sk); score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) + if (inet->inet_rcv_saddr) { + if (inet->inet_rcv_saddr != daddr) + return -1; + score += 2; + } + if (inet->inet_daddr) { + if (inet->inet_daddr != saddr) + return -1; + score += 2; + } + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) return -1; score += 2; } - if (inet->daddr) { - if (inet->daddr != saddr) + } + return score; +} + +/* + * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) + */ +#define SCORE2_MAX (1 + 2 + 2 + 2) +static inline int compute_score2(struct sock *sk, struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) { + struct inet_sock *inet = inet_sk(sk); + + if (inet->inet_rcv_saddr != daddr) + return -1; + if (inet->inet_num != hnum) + return -1; + + score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->inet_daddr) { + if (inet->inet_daddr != saddr) return -1; score += 2; } - if (inet->dport) { - if (inet->dport != sport) + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; score += 2; } @@ -273,6 +395,51 @@ static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, return score; } + +/* called with read_rcu_lock() */ +static struct sock *udp4_lib_lookup2(struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, int dif, + struct udp_hslot *hslot2, unsigned int slot2) +{ + struct sock *sk, *result; + struct hlist_nulls_node *node; + int score, badness; + +begin: + result = NULL; + badness = -1; + udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + score = compute_score2(sk, net, saddr, sport, + daddr, hnum, dif); + if (score > badness) { + result = sk; + badness = score; + if (score == SCORE2_MAX) + goto exact_match; + } + } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot2) + goto begin; + + if (result) { +exact_match: + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score2(result, net, saddr, sport, + daddr, hnum, dif) < badness)) { + sock_put(result); + goto begin; + } + } + return result; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -283,11 +450,35 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); - unsigned int hash = udp_hashfn(net, hnum); - struct udp_hslot *hslot = &udptable->hash[hash]; + unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); + struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness; rcu_read_lock(); + if (hslot->count > 10) { + hash2 = udp4_portaddr_hash(net, daddr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + if (hslot->count < hslot2->count) + goto begin; + + result = udp4_lib_lookup2(net, saddr, sport, + daddr, hnum, dif, + hslot2, slot2); + if (!result) { + hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + if (hslot->count < hslot2->count) + goto begin; + + result = udp4_lib_lookup2(net, INADDR_ANY, sport, + daddr, hnum, dif, + hslot2, slot2); + } + rcu_read_unlock(); + return result; + } begin: result = NULL; badness = -1; @@ -304,7 +495,7 @@ begin: * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ - if (get_nulls_value(node) != hash) + if (get_nulls_value(node) != slot) goto begin; if (result) { @@ -354,12 +545,13 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (!net_eq(sock_net(s), net) || - s->sk_hash != hnum || - (inet->daddr && inet->daddr != rmt_addr) || - (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || - ipv6_only_sock(s) || + if (!net_eq(sock_net(s), net) || + udp_sk(s)->udp_port_hash != hnum || + (inet->inet_daddr && inet->inet_daddr != rmt_addr) || + (inet->inet_dport != rmt_port && inet->inet_dport) || + (inet->inet_rcv_saddr && + inet->inet_rcv_saddr != loc_addr) || + ipv6_only_sock(s) || (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) continue; if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) @@ -642,14 +834,14 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = inet->daddr; - dport = inet->dport; + daddr = inet->inet_daddr; + dport = inet->inet_dport; /* Open fast path for connected socket. Route will not be used, if at least one option is set. */ connected = 1; } - ipc.addr = inet->saddr; + ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; err = sock_tx_timestamp(msg, sk, &ipc.shtx); @@ -704,7 +896,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = - { .sport = inet->sport, + { .sport = inet->inet_sport, .dport = dport } } }; struct net *net = sock_net(sk); @@ -748,7 +940,7 @@ back_from_confirm: inet->cork.fl.fl4_dst = daddr; inet->cork.fl.fl_ip_dport = dport; inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->sport; + inet->cork.fl.fl_ip_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: @@ -862,6 +1054,7 @@ static unsigned int first_packet_length(struct sock *sk) udp_lib_checksum_complete(skb)) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, IS_UDPLITE(sk)); + atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); __skb_queue_tail(&list_kill, skb); } @@ -982,7 +1175,7 @@ try_again: UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); /* Copy the address. */ if (sin) { @@ -1023,15 +1216,15 @@ int udp_disconnect(struct sock *sk, int flags) */ sk->sk_state = TCP_CLOSE; - inet->daddr = 0; - inet->dport = 0; + inet->inet_daddr = 0; + inet->inet_dport = 0; sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { sk->sk_prot->unhash(sk); - inet->sport = 0; + inet->inet_sport = 0; } sk_dst_reset(sk); return 0; @@ -1042,13 +1235,22 @@ void udp_lib_unhash(struct sock *sk) { if (sk_hashed(sk)) { struct udp_table *udptable = sk->sk_prot->h.udp_table; - unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); - struct udp_hslot *hslot = &udptable->hash[hash]; + struct udp_hslot *hslot, *hslot2; + + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); if (sk_nulls_del_node_init_rcu(sk)) { - inet_sk(sk)->num = 0; + hslot->count--; + inet_sk(sk)->inet_num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); } spin_unlock_bh(&hslot->lock); } @@ -1057,25 +1259,22 @@ EXPORT_SYMBOL(udp_lib_unhash); static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { - int is_udplite = IS_UDPLITE(sk); - int rc; + int rc = sock_queue_rcv_skb(sk, skb); + + if (rc < 0) { + int is_udplite = IS_UDPLITE(sk); - if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { + if (rc == -ENOMEM) UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; } return 0; -drop: - UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); - return -1; } /* returns: @@ -1182,53 +1381,88 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) drop: UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; } + +static void flush_stack(struct sock **stack, unsigned int count, + struct sk_buff *skb, unsigned int final) +{ + unsigned int i; + struct sk_buff *skb1 = NULL; + struct sock *sk; + + for (i = 0; i < count; i++) { + sk = stack[i]; + if (likely(skb1 == NULL)) + skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); + + if (!skb1) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + } + + if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) + skb1 = NULL; + } + if (unlikely(skb1)) + kfree_skb(skb1); +} + /* * Multicasts and broadcasts go to each listener. * - * Note: called only from the BH handler context, - * so we don't need to lock the hashes. + * Note: called only from the BH handler context. */ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, struct udp_table *udptable) { - struct sock *sk; - struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; + struct sock *sk, *stack[256 / sizeof(struct sock *)]; + struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); int dif; + unsigned int i, count = 0; spin_lock(&hslot->lock); sk = sk_nulls_head(&hslot->head); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); - if (sk) { - struct sock *sknext = NULL; - - do { - struct sk_buff *skb1 = skb; - - sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, - daddr, uh->source, saddr, - dif); - if (sknext) - skb1 = skb_clone(skb, GFP_ATOMIC); - - if (skb1) { - int ret = udp_queue_rcv_skb(sk, skb1); - if (ret > 0) - /* we should probably re-process instead - * of dropping packets here. */ - kfree_skb(skb1); - } - sk = sknext; - } while (sknext); - } else - consume_skb(skb); + while (sk) { + stack[count++] = sk; + sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, + daddr, uh->source, saddr, dif); + if (unlikely(count == ARRAY_SIZE(stack))) { + if (!sk) + break; + flush_stack(stack, count, skb, ~0); + count = 0; + } + } + /* + * before releasing chain lock, we must take a reference on sockets + */ + for (i = 0; i < count; i++) + sock_hold(stack[i]); + spin_unlock(&hslot->lock); + + /* + * do the slow work with no lock held + */ + if (count) { + flush_stack(stack, count, skb, count - 1); + + for (i = 0; i < count; i++) + sock_put(stack[i]); + } else { + kfree_skb(skb); + } return 0; } @@ -1620,9 +1854,14 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { + for (state->bucket = start; state->bucket <= state->udp_table->mask; + ++state->bucket) { struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; + + if (hlist_nulls_empty(&hslot->head)) + continue; + spin_lock_bh(&hslot->lock); sk_nulls_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) @@ -1647,7 +1886,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { - if (state->bucket < UDP_HTABLE_SIZE) + if (state->bucket <= state->udp_table->mask) spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); return udp_get_first(seq, state->bucket + 1); } @@ -1667,7 +1906,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) static void *udp_seq_start(struct seq_file *seq, loff_t *pos) { struct udp_iter_state *state = seq->private; - state->bucket = UDP_HTABLE_SIZE; + state->bucket = MAX_UDP_PORTS; return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1689,7 +1928,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v) { struct udp_iter_state *state = seq->private; - if (state->bucket < UDP_HTABLE_SIZE) + if (state->bucket <= state->udp_table->mask) spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); } @@ -1744,12 +1983,12 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) { struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); + __be32 dest = inet->inet_daddr; + __be32 src = inet->inet_rcv_saddr; + __u16 destp = ntohs(inet->inet_dport); + __u16 srcp = ntohs(inet->inet_sport); - seq_printf(f, "%4d: %08X:%04X %08X:%04X" + seq_printf(f, "%5d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), @@ -1815,21 +2054,60 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -void __init udp_table_init(struct udp_table *table) +static __initdata unsigned long uhash_entries; +static int __init set_uhash_entries(char *str) { - int i; + if (!str) + return 0; + uhash_entries = simple_strtoul(str, &str, 0); + if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) + uhash_entries = UDP_HTABLE_SIZE_MIN; + return 1; +} +__setup("uhash_entries=", set_uhash_entries); - for (i = 0; i < UDP_HTABLE_SIZE; i++) { +void __init udp_table_init(struct udp_table *table, const char *name) +{ + unsigned int i; + + if (!CONFIG_BASE_SMALL) + table->hash = alloc_large_system_hash(name, + 2 * sizeof(struct udp_hslot), + uhash_entries, + 21, /* one slot per 2 MB */ + 0, + &table->log, + &table->mask, + 64 * 1024); + /* + * Make sure hash table has the minimum size + */ + if (CONFIG_BASE_SMALL || table->mask < UDP_HTABLE_SIZE_MIN - 1) { + table->hash = kmalloc(UDP_HTABLE_SIZE_MIN * + 2 * sizeof(struct udp_hslot), GFP_KERNEL); + if (!table->hash) + panic(name); + table->log = ilog2(UDP_HTABLE_SIZE_MIN); + table->mask = UDP_HTABLE_SIZE_MIN - 1; + } + table->hash2 = table->hash + (table->mask + 1); + for (i = 0; i <= table->mask; i++) { INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); + table->hash[i].count = 0; spin_lock_init(&table->hash[i].lock); } + for (i = 0; i <= table->mask; i++) { + INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); + table->hash2[i].count = 0; + spin_lock_init(&table->hash2[i].lock); + } } void __init udp_init(void) { unsigned long nr_pages, limit; - udp_table_init(&udp_table); + udp_table_init(&udp_table, "UDP"); /* Set the pressure threshold up by the same strategy of TCP. It is a * fraction of global memory that is up to 1/2 at 256 MB, decreasing * toward zero with the amount of memory, with a floor of 128 pages. diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 95248d7f75e..66f79513f4a 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -12,7 +12,7 @@ */ #include "udp_impl.h" -struct udp_table udplite_table; +struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); static int udplite_rcv(struct sk_buff *skb) @@ -64,7 +64,6 @@ static struct inet_protosw udplite4_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplite_prot, .ops = &inet_dgram_ops, - .capability = -1, .no_check = 0, /* must checksum (RFC 3828) */ .flags = INET_PROTOSW_PERMANENT, }; @@ -110,7 +109,7 @@ static inline int udplite4_proc_init(void) void __init udplite4_register(void) { - udp_table_init(&udplite_table); + udp_table_init(&udplite_table, "UDP-Lite"); if (proto_register(&udplite_prot, 1)) goto out_register_err; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 74fb2eb833e..8c08a28d8f8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -267,7 +267,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { #ifdef CONFIG_SYSCTL static struct ctl_table xfrm4_policy_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm4_gc_thresh", .data = &xfrm4_dst_ops.gc_thresh, .maxlen = sizeof(int), diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ead6c7a42f4..a578096152a 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -170,6 +170,25 @@ config IPV6_SIT Saying M here will produce a module called sit. If unsure, say Y. +config IPV6_SIT_6RD + bool "IPv6: IPv6 Rapid Deployment (6RD) (EXPERIMENTAL)" + depends on IPV6_SIT && EXPERIMENTAL + default n + ---help--- + IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon + mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly + deploy IPv6 unicast service to IPv4 sites to which it provides + customer premise equipment. Like 6to4, it utilizes stateless IPv6 in + IPv4 encapsulation in order to transit IPv4-only network + infrastructure. Unlike 6to4, a 6rd service provider uses an IPv6 + prefix of its own in place of the fixed 6to4 prefix. + + With this option enabled, the SIT driver offers 6rd functionality by + providing additional ioctl API to configure the IPv6 Prefix for in + stead of static 2002::/16 for 6to4. + + If unsure, say N. + config IPV6_NDISC_NODETYPE bool diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1fd0a3d775d..de7a194a64a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -481,9 +481,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -491,9 +490,8 @@ static void addrconf_forward_change(struct net *net, __s32 newf) if (changed) dev_forward_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) @@ -1137,10 +1135,9 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, hiscore->rule = -1; hiscore->ifa = NULL; - read_lock(&dev_base_lock); rcu_read_lock(); - for_each_netdev(net, dev) { + for_each_netdev_rcu(net, dev) { struct inet6_dev *idev; /* Candidate Source Address (section 4) @@ -1235,7 +1232,6 @@ try_nextdev: read_unlock_bh(&idev->lock); } rcu_read_unlock(); - read_unlock(&dev_base_lock); if (!hiscore->ifa) return -EADDRNOTAVAIL; @@ -3485,85 +3481,114 @@ enum addr_type_t ANYCAST_ADDR, }; +/* called with rcu_read_lock() */ +static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, + struct netlink_callback *cb, enum addr_type_t type, + int s_ip_idx, int *p_ip_idx) +{ + struct inet6_ifaddr *ifa; + struct ifmcaddr6 *ifmca; + struct ifacaddr6 *ifaca; + int err = 1; + int ip_idx = *p_ip_idx; + + read_lock_bh(&idev->lock); + switch (type) { + case UNICAST_ADDR: + /* unicast address incl. temp addr */ + for (ifa = idev->addr_list; ifa; + ifa = ifa->if_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + err = inet6_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWADDR, + NLM_F_MULTI); + if (err <= 0) + break; + } + break; + case MULTICAST_ADDR: + /* multicast address */ + for (ifmca = idev->mc_list; ifmca; + ifmca = ifmca->next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + err = inet6_fill_ifmcaddr(skb, ifmca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETMULTICAST, + NLM_F_MULTI); + if (err <= 0) + break; + } + break; + case ANYCAST_ADDR: + /* anycast address */ + for (ifaca = idev->ac_list; ifaca; + ifaca = ifaca->aca_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + err = inet6_fill_ifacaddr(skb, ifaca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_GETANYCAST, + NLM_F_MULTI); + if (err <= 0) + break; + } + break; + default: + break; + } + read_unlock_bh(&idev->lock); + *p_ip_idx = ip_idx; + return err; +} + static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, enum addr_type_t type) { + struct net *net = sock_net(skb->sk); + int h, s_h; int idx, ip_idx; int s_idx, s_ip_idx; - int err = 1; struct net_device *dev; - struct inet6_dev *idev = NULL; - struct inet6_ifaddr *ifa; - struct ifmcaddr6 *ifmca; - struct ifacaddr6 *ifaca; - struct net *net = sock_net(skb->sk); + struct inet6_dev *idev; + struct hlist_head *head; + struct hlist_node *node; - s_idx = cb->args[0]; - s_ip_idx = ip_idx = cb->args[1]; + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + s_ip_idx = ip_idx = cb->args[2]; - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if (idx > s_idx) - s_ip_idx = 0; - ip_idx = 0; - if ((idev = in6_dev_get(dev)) == NULL) - goto cont; - read_lock_bh(&idev->lock); - switch (type) { - case UNICAST_ADDR: - /* unicast address incl. temp addr */ - for (ifa = idev->addr_list; ifa; - ifa = ifa->if_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWADDR, - NLM_F_MULTI); - } - break; - case MULTICAST_ADDR: - /* multicast address */ - for (ifmca = idev->mc_list; ifmca; - ifmca = ifmca->next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_GETMULTICAST, - NLM_F_MULTI); - } - break; - case ANYCAST_ADDR: - /* anycast address */ - for (ifaca = idev->ac_list; ifaca; - ifaca = ifaca->aca_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_GETANYCAST, - NLM_F_MULTI); - } - break; - default: - break; - } - read_unlock_bh(&idev->lock); - in6_dev_put(idev); - - if (err <= 0) - break; + rcu_read_lock(); + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) + s_ip_idx = 0; + ip_idx = 0; + if ((idev = __in6_dev_get(dev)) == NULL) + goto cont; + + if (in6_dump_addrs(idev, skb, cb, type, + s_ip_idx, &ip_idx) <= 0) + goto done; cont: - idx++; + idx++; + } } - cb->args[0] = idx; - cb->args[1] = ip_idx; +done: + rcu_read_unlock(); + cb->args[0] = h; + cb->args[1] = idx; + cb->args[2] = ip_idx; + return skb->len; } @@ -3708,6 +3733,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #endif array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; + array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; } static inline size_t inet6_if_nlmsg_size(void) @@ -3826,28 +3852,39 @@ nla_put_failure: static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - int idx, err; - int s_idx = cb->args[0]; + int h, s_h; + int idx = 0, s_idx; struct net_device *dev; struct inet6_dev *idev; + struct hlist_head *head; + struct hlist_node *node; - read_lock(&dev_base_lock); - idx = 0; - for_each_netdev(net, dev) { - if (idx < s_idx) - goto cont; - if ((idev = in6_dev_get(dev)) == NULL) - goto cont; - err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); - in6_dev_put(idev); - if (err <= 0) - break; + s_h = cb->args[0]; + s_idx = cb->args[1]; + + rcu_read_lock(); + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { + if (idx < s_idx) + goto cont; + idev = __in6_dev_get(dev); + if (!idev) + goto cont; + if (inet6_fill_ifinfo(skb, idev, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWLINK, NLM_F_MULTI) <= 0) + goto out; cont: - idx++; + idx++; + } } - read_unlock(&dev_base_lock); - cb->args[0] = idx; +out: + rcu_read_unlock(); + cb->args[1] = idx; + cb->args[0] = h; return skb->len; } @@ -4000,41 +4037,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, return ret; } -static int addrconf_sysctl_forward_strategy(ctl_table *table, - void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int *valp = table->data; - int val = *valp; - int new; - - if (!newval || !newlen) - return 0; - if (newlen != sizeof(int)) - return -EINVAL; - if (get_user(new, (int __user *)newval)) - return -EFAULT; - if (new == *valp) - return 0; - if (oldval && oldlenp) { - size_t len; - if (get_user(len, oldlenp)) - return -EFAULT; - if (len) { - if (len > table->maxlen) - len = table->maxlen; - if (copy_to_user(oldval, valp, len)) - return -EFAULT; - if (put_user(len, oldlenp)) - return -EFAULT; - } - } - - *valp = new; - return addrconf_fixup_forwarding(table, valp, val); -} - static void dev_disable_change(struct inet6_dev *idev) { if (!idev || !idev->dev) @@ -4051,9 +4053,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - read_lock(&dev_base_lock); - for_each_netdev(net, dev) { - rcu_read_lock(); + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.disable_ipv6) ^ (!newf); @@ -4061,9 +4062,8 @@ static void addrconf_disable_change(struct net *net, __s32 newf) if (changed) dev_disable_change(idev); } - rcu_read_unlock(); } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) @@ -4113,16 +4113,13 @@ static struct addrconf_sysctl_table .sysctl_header = NULL, .addrconf_vars = { { - .ctl_name = NET_IPV6_FORWARDING, .procname = "forwarding", .data = &ipv6_devconf.forwarding, .maxlen = sizeof(int), .mode = 0644, .proc_handler = addrconf_sysctl_forward, - .strategy = addrconf_sysctl_forward_strategy, }, { - .ctl_name = NET_IPV6_HOP_LIMIT, .procname = "hop_limit", .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), @@ -4130,7 +4127,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_MTU, .procname = "mtu", .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), @@ -4138,7 +4134,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_RA, .procname = "accept_ra", .data = &ipv6_devconf.accept_ra, .maxlen = sizeof(int), @@ -4146,7 +4141,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, .procname = "accept_redirects", .data = &ipv6_devconf.accept_redirects, .maxlen = sizeof(int), @@ -4154,7 +4148,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_AUTOCONF, .procname = "autoconf", .data = &ipv6_devconf.autoconf, .maxlen = sizeof(int), @@ -4162,7 +4155,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_DAD_TRANSMITS, .procname = "dad_transmits", .data = &ipv6_devconf.dad_transmits, .maxlen = sizeof(int), @@ -4170,7 +4162,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_RTR_SOLICITS, .procname = "router_solicitations", .data = &ipv6_devconf.rtr_solicits, .maxlen = sizeof(int), @@ -4178,25 +4169,20 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, .procname = "router_solicitation_interval", .data = &ipv6_devconf.rtr_solicit_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_FORCE_MLD_VERSION, .procname = "force_mld_version", .data = &ipv6_devconf.force_mld_version, .maxlen = sizeof(int), @@ -4205,7 +4191,6 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_PRIVACY { - .ctl_name = NET_IPV6_USE_TEMPADDR, .procname = "use_tempaddr", .data = &ipv6_devconf.use_tempaddr, .maxlen = sizeof(int), @@ -4213,7 +4198,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_TEMP_VALID_LFT, .procname = "temp_valid_lft", .data = &ipv6_devconf.temp_valid_lft, .maxlen = sizeof(int), @@ -4221,7 +4205,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, .procname = "temp_prefered_lft", .data = &ipv6_devconf.temp_prefered_lft, .maxlen = sizeof(int), @@ -4229,7 +4212,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_REGEN_MAX_RETRY, .procname = "regen_max_retry", .data = &ipv6_devconf.regen_max_retry, .maxlen = sizeof(int), @@ -4237,7 +4219,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, .procname = "max_desync_factor", .data = &ipv6_devconf.max_desync_factor, .maxlen = sizeof(int), @@ -4246,7 +4227,6 @@ static struct addrconf_sysctl_table }, #endif { - .ctl_name = NET_IPV6_MAX_ADDRESSES, .procname = "max_addresses", .data = &ipv6_devconf.max_addresses, .maxlen = sizeof(int), @@ -4254,7 +4234,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, .procname = "accept_ra_defrtr", .data = &ipv6_devconf.accept_ra_defrtr, .maxlen = sizeof(int), @@ -4262,7 +4241,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, .procname = "accept_ra_pinfo", .data = &ipv6_devconf.accept_ra_pinfo, .maxlen = sizeof(int), @@ -4271,7 +4249,6 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_ROUTER_PREF { - .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF, .procname = "accept_ra_rtr_pref", .data = &ipv6_devconf.accept_ra_rtr_pref, .maxlen = sizeof(int), @@ -4279,17 +4256,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, .procname = "router_probe_interval", .data = &ipv6_devconf.rtr_probe_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, #ifdef CONFIG_IPV6_ROUTE_INFO { - .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, .procname = "accept_ra_rt_info_max_plen", .data = &ipv6_devconf.accept_ra_rt_info_max_plen, .maxlen = sizeof(int), @@ -4299,7 +4273,6 @@ static struct addrconf_sysctl_table #endif #endif { - .ctl_name = NET_IPV6_PROXY_NDP, .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), @@ -4307,7 +4280,6 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, .procname = "accept_source_route", .data = &ipv6_devconf.accept_source_route, .maxlen = sizeof(int), @@ -4316,7 +4288,6 @@ static struct addrconf_sysctl_table }, #ifdef CONFIG_IPV6_OPTIMISTIC_DAD { - .ctl_name = CTL_UNNUMBERED, .procname = "optimistic_dad", .data = &ipv6_devconf.optimistic_dad, .maxlen = sizeof(int), @@ -4327,7 +4298,6 @@ static struct addrconf_sysctl_table #endif #ifdef CONFIG_IPV6_MROUTE { - .ctl_name = CTL_UNNUMBERED, .procname = "mc_forwarding", .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), @@ -4336,16 +4306,13 @@ static struct addrconf_sysctl_table }, #endif { - .ctl_name = CTL_UNNUMBERED, .procname = "disable_ipv6", .data = &ipv6_devconf.disable_ipv6, .maxlen = sizeof(int), .mode = 0644, .proc_handler = addrconf_sysctl_disable, - .strategy = sysctl_intvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "accept_dad", .data = &ipv6_devconf.accept_dad, .maxlen = sizeof(int), @@ -4353,13 +4320,20 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { - .ctl_name = 0, /* sentinel */ + .procname = "force_tllao", + .data = &ipv6_devconf.force_tllao, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + /* sentinel */ } }, }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, - int ctl_name, struct inet6_dev *idev, struct ipv6_devconf *p) + struct inet6_dev *idev, struct ipv6_devconf *p) { int i; struct addrconf_sysctl_table *t; @@ -4367,9 +4341,9 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, #define ADDRCONF_CTL_PATH_DEV 3 struct ctl_path addrconf_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv6", .ctl_name = NET_IPV6, }, - { .procname = "conf", .ctl_name = NET_IPV6_CONF, }, + { .procname = "net", }, + { .procname = "ipv6", }, + { .procname = "conf", }, { /* to be set */ }, { }, }; @@ -4395,7 +4369,6 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, goto free; addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].procname = t->dev_name; - addrconf_ctl_path[ADDRCONF_CTL_PATH_DEV].ctl_name = ctl_name; t->sysctl_header = register_net_sysctl_table(net, addrconf_ctl_path, t->addrconf_vars); @@ -4431,10 +4404,9 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) { neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - ndisc_ifinfo_sysctl_strategy); + &ndisc_ifinfo_sysctl_change); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, - idev->dev->ifindex, idev, &idev->cnf); + idev, &idev->cnf); } static void addrconf_sysctl_unregister(struct inet6_dev *idev) @@ -4455,7 +4427,7 @@ static int addrconf_init_net(struct net *net) all = &ipv6_devconf; dflt = &ipv6_devconf_dflt; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); if (all == NULL) goto err_alloc_all; @@ -4473,13 +4445,11 @@ static int addrconf_init_net(struct net *net) net->ipv6.devconf_dflt = dflt; #ifdef CONFIG_SYSCTL - err = __addrconf_sysctl_register(net, "all", NET_PROTO_CONF_ALL, - NULL, all); + err = __addrconf_sysctl_register(net, "all", NULL, all); if (err < 0) goto err_reg_all; - err = __addrconf_sysctl_register(net, "default", NET_PROTO_CONF_DEFAULT, - NULL, dflt); + err = __addrconf_sysctl_register(net, "default", NULL, dflt); if (err < 0) goto err_reg_dflt; #endif @@ -4503,7 +4473,7 @@ static void addrconf_exit_net(struct net *net) __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); __addrconf_sysctl_unregister(net->ipv6.devconf_all); #endif - if (net != &init_net) { + if (!net_eq(net, &init_net)) { kfree(net->ipv6.devconf_dflt); kfree(net->ipv6.devconf_all); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e127a32f954..12e69d364dd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -95,7 +95,8 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } -static int inet6_create(struct net *net, struct socket *sock, int protocol) +static int inet6_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; @@ -158,7 +159,7 @@ lookup_protocol: } err = -EPERM; - if (answer->capability > 0 && !capable(answer->capability)) + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; @@ -185,7 +186,7 @@ lookup_protocol: inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; if (SOCK_RAW == sock->type) { - inet->num = protocol; + inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } @@ -228,12 +229,12 @@ lookup_protocol: */ sk_refcnt_debug_inc(sk); - if (inet->num) { + if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically shares. */ - inet->sport = htons(inet->num); + inet->inet_sport = htons(inet->inet_num); sk->sk_prot->hash(sk); } if (sk->sk_prot->init) { @@ -281,7 +282,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) lock_sock(sk); /* Check these errors (active socket, double bind). */ - if (sk->sk_state != TCP_CLOSE || inet->num) { + if (sk->sk_state != TCP_CLOSE || inet->inet_num) { err = -EINVAL; goto out; } @@ -314,6 +315,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; + rcu_read_lock(); if (addr_type & IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { @@ -326,12 +328,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { err = -EINVAL; - goto out; + goto out_unlock; } - dev = dev_get_by_index(net, sk->sk_bound_dev_if); + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; - goto out; + goto out_unlock; } } @@ -342,19 +344,16 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (!(addr_type & IPV6_ADDR_MULTICAST)) { if (!ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { - if (dev) - dev_put(dev); err = -EADDRNOTAVAIL; - goto out; + goto out_unlock; } } - if (dev) - dev_put(dev); + rcu_read_unlock(); } } - inet->rcv_saddr = v4addr; - inet->saddr = v4addr; + inet->inet_rcv_saddr = v4addr; + inet->inet_saddr = v4addr; ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); @@ -375,12 +374,15 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - inet->sport = htons(inet->num); - inet->dport = 0; - inet->daddr = 0; + inet->inet_sport = htons(inet->inet_num); + inet->inet_dport = 0; + inet->inet_daddr = 0; out: release_sock(sk); return err; +out_unlock: + rcu_read_unlock(); + goto out; } EXPORT_SYMBOL(inet6_bind); @@ -441,12 +443,12 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; if (peer) { - if (!inet->dport) + if (!inet->inet_dport) return -ENOTCONN; if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1) return -ENOTCONN; - sin->sin6_port = inet->dport; + sin->sin6_port = inet->inet_dport; ipv6_addr_copy(&sin->sin6_addr, &np->daddr); if (np->sndflow) sin->sin6_flowinfo = np->flow_label; @@ -456,7 +458,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, else ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr); - sin->sin6_port = inet->sport; + sin->sin6_port = inet->inet_sport; } if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = sk->sk_bound_dev_if; @@ -552,7 +554,7 @@ const struct proto_ops inet6_dgram_ops = { #endif }; -static struct net_proto_family inet6_family_ops = { +static const struct net_proto_family inet6_family_ops = { .family = PF_INET6, .create = inet6_create, .owner = THIS_MODULE, @@ -654,8 +656,9 @@ int inet6_sk_rebuild_header(struct sock *sk) ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index c1589e2f1dc..c2f300c314b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -24,18 +24,92 @@ * This file is derived from net/ipv4/ah.c. */ +#include <crypto/hash.h> #include <linux/module.h> #include <net/ip.h> #include <net/ah.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> -#include <linux/spinlock.h> #include <linux/string.h> +#include <linux/scatterlist.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/xfrm.h> +#define IPV6HDR_BASELEN 8 + +struct tmp_ext { +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + struct in6_addr saddr; +#endif + struct in6_addr daddr; + char hdrs[0]; +}; + +struct ah_skb_cb { + struct xfrm_skb_cb xfrm; + void *tmp; +}; + +#define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0])) + +static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags, + unsigned int size) +{ + unsigned int len; + + len = size + crypto_ahash_digestsize(ahash) + + (crypto_ahash_alignmask(ahash) & + ~(crypto_tfm_ctx_alignment() - 1)); + + len = ALIGN(len, crypto_tfm_ctx_alignment()); + + len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash); + len = ALIGN(len, __alignof__(struct scatterlist)); + + len += sizeof(struct scatterlist) * nfrags; + + return kmalloc(len, GFP_ATOMIC); +} + +static inline struct tmp_ext *ah_tmp_ext(void *base) +{ + return base + IPV6HDR_BASELEN; +} + +static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset) +{ + return tmp + offset; +} + +static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp, + unsigned int offset) +{ + return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1); +} + +static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash, + u8 *icv) +{ + struct ahash_request *req; + + req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash), + crypto_tfm_ctx_alignment()); + + ahash_request_set_tfm(req, ahash); + + return req; +} + +static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, + struct ahash_request *req) +{ + return (void *)ALIGN((unsigned long)(req + 1) + + crypto_ahash_reqsize(ahash), + __alignof__(struct scatterlist)); +} + static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) { u8 *opt = (u8 *)opthdr; @@ -218,24 +292,85 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) return 0; } +static void ah6_output_done(struct crypto_async_request *base, int err) +{ + int extlen; + u8 *iph_base; + u8 *icv; + struct sk_buff *skb = base->data; + struct xfrm_state *x = skb_dst(skb)->xfrm; + struct ah_data *ahp = x->data; + struct ipv6hdr *top_iph = ipv6_hdr(skb); + struct ip_auth_hdr *ah = ip_auth_hdr(skb); + struct tmp_ext *iph_ext; + + extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr); + if (extlen) + extlen += sizeof(*iph_ext); + + iph_base = AH_SKB_CB(skb)->tmp; + iph_ext = ah_tmp_ext(iph_base); + icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen); + + memcpy(ah->auth_data, icv, ahp->icv_trunc_len); + memcpy(top_iph, iph_base, IPV6HDR_BASELEN); + + if (extlen) { +#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + memcpy(&top_iph->saddr, iph_ext, extlen); +#else + memcpy(&top_iph->daddr, iph_ext, extlen); +#endif + } + + err = ah->nexthdr; + + kfree(AH_SKB_CB(skb)->tmp); + xfrm_output_resume(skb, err); +} + static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + int nfrags; int extlen; + u8 *iph_base; + u8 *icv; + u8 nexthdr; + struct sk_buff *trailer; + struct crypto_ahash *ahash; + struct ahash_request *req; + struct scatterlist *sg; struct ipv6hdr *top_iph; struct ip_auth_hdr *ah; struct ah_data *ahp; - u8 nexthdr; - char tmp_base[8]; - struct { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - struct in6_addr saddr; -#endif - struct in6_addr daddr; - char hdrs[0]; - } *tmp_ext; + struct tmp_ext *iph_ext; + + ahp = x->data; + ahash = ahp->ahash; + + if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + goto out; + nfrags = err; skb_push(skb, -skb_network_offset(skb)); + extlen = skb_network_header_len(skb) - sizeof(struct ipv6hdr); + if (extlen) + extlen += sizeof(*iph_ext); + + err = -ENOMEM; + iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen); + if (!iph_base) + goto out; + + iph_ext = ah_tmp_ext(iph_base); + icv = ah_tmp_icv(ahash, iph_ext, extlen); + req = ah_tmp_req(ahash, icv); + sg = ah_req_sg(ahash, req); + + ah = ip_auth_hdr(skb); + memset(ah->auth_data, 0, ahp->icv_trunc_len); + top_iph = ipv6_hdr(skb); top_iph->payload_len = htons(skb->len - sizeof(*top_iph)); @@ -245,31 +380,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) /* When there are no extension headers, we only need to save the first * 8 bytes of the base IP header. */ - memcpy(tmp_base, top_iph, sizeof(tmp_base)); + memcpy(iph_base, top_iph, IPV6HDR_BASELEN); - tmp_ext = NULL; - extlen = skb_transport_offset(skb) - sizeof(struct ipv6hdr); if (extlen) { - extlen += sizeof(*tmp_ext); - tmp_ext = kmalloc(extlen, GFP_ATOMIC); - if (!tmp_ext) { - err = -ENOMEM; - goto error; - } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - memcpy(tmp_ext, &top_iph->saddr, extlen); + memcpy(iph_ext, &top_iph->saddr, extlen); #else - memcpy(tmp_ext, &top_iph->daddr, extlen); + memcpy(iph_ext, &top_iph->daddr, extlen); #endif err = ipv6_clear_mutable_options(top_iph, - extlen - sizeof(*tmp_ext) + + extlen - sizeof(*iph_ext) + sizeof(*top_iph), XFRM_POLICY_OUT); if (err) - goto error_free_iph; + goto out_free; } - ah = ip_auth_hdr(skb); ah->nexthdr = nexthdr; top_iph->priority = 0; @@ -278,36 +404,80 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->flow_lbl[2] = 0; top_iph->hop_limit = 0; - ahp = x->data; ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); - spin_lock_bh(&x->lock); - err = ah_mac_digest(ahp, skb, ah->auth_data); - memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); - spin_unlock_bh(&x->lock); + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); - if (err) - goto error_free_iph; + ahash_request_set_crypt(req, sg, icv, skb->len); + ahash_request_set_callback(req, 0, ah6_output_done, skb); + + AH_SKB_CB(skb)->tmp = iph_base; - memcpy(top_iph, tmp_base, sizeof(tmp_base)); - if (tmp_ext) { + err = crypto_ahash_digest(req); + if (err) { + if (err == -EINPROGRESS) + goto out; + + if (err == -EBUSY) + err = NET_XMIT_DROP; + goto out_free; + } + + memcpy(ah->auth_data, icv, ahp->icv_trunc_len); + memcpy(top_iph, iph_base, IPV6HDR_BASELEN); + + if (extlen) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - memcpy(&top_iph->saddr, tmp_ext, extlen); + memcpy(&top_iph->saddr, iph_ext, extlen); #else - memcpy(&top_iph->daddr, tmp_ext, extlen); + memcpy(&top_iph->daddr, iph_ext, extlen); #endif -error_free_iph: - kfree(tmp_ext); } -error: +out_free: + kfree(iph_base); +out: return err; } +static void ah6_input_done(struct crypto_async_request *base, int err) +{ + u8 *auth_data; + u8 *icv; + u8 *work_iph; + struct sk_buff *skb = base->data; + struct xfrm_state *x = xfrm_input_state(skb); + struct ah_data *ahp = x->data; + struct ip_auth_hdr *ah = ip_auth_hdr(skb); + int hdr_len = skb_network_header_len(skb); + int ah_hlen = (ah->hdrlen + 2) << 2; + + work_iph = AH_SKB_CB(skb)->tmp; + auth_data = ah_tmp_auth(work_iph, hdr_len); + icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); + + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; + if (err) + goto out; + + skb->network_header += ah_hlen; + memcpy(skb_network_header(skb), work_iph, hdr_len); + __skb_pull(skb, ah_hlen + hdr_len); + skb_set_transport_header(skb, -hdr_len); + + err = ah->nexthdr; +out: + kfree(AH_SKB_CB(skb)->tmp); + xfrm_input_resume(skb, err); +} + + + static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) { /* @@ -325,14 +495,21 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) * There is offset of AH before IPv6 header after the process. */ + u8 *auth_data; + u8 *icv; + u8 *work_iph; + struct sk_buff *trailer; + struct crypto_ahash *ahash; + struct ahash_request *req; + struct scatterlist *sg; struct ip_auth_hdr *ah; struct ipv6hdr *ip6h; struct ah_data *ahp; - unsigned char *tmp_hdr = NULL; u16 hdr_len; u16 ah_hlen; int nexthdr; - int err = -EINVAL; + int nfrags; + int err = -ENOMEM; if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) goto out; @@ -345,9 +522,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb->ip_summed = CHECKSUM_NONE; - hdr_len = skb->data - skb_network_header(skb); + hdr_len = skb_network_header_len(skb); ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; + ahash = ahp->ahash; + nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; @@ -358,48 +537,67 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, ah_hlen)) goto out; - tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC); - if (!tmp_hdr) - goto out; ip6h = ipv6_hdr(skb); + + skb_push(skb, hdr_len); + + if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + goto out; + nfrags = err; + + work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len); + if (!work_iph) + goto out; + + auth_data = ah_tmp_auth(work_iph, hdr_len); + icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); + req = ah_tmp_req(ahash, icv); + sg = ah_req_sg(ahash, req); + + memcpy(work_iph, ip6h, hdr_len); + memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); + memset(ah->auth_data, 0, ahp->icv_trunc_len); + if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN)) - goto free_out; + goto out_free; + ip6h->priority = 0; ip6h->flow_lbl[0] = 0; ip6h->flow_lbl[1] = 0; ip6h->flow_lbl[2] = 0; ip6h->hop_limit = 0; - spin_lock(&x->lock); - { - u8 auth_data[MAX_AH_AUTH_LEN]; + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); - memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); - memset(ah->auth_data, 0, ahp->icv_trunc_len); - skb_push(skb, hdr_len); - err = ah_mac_digest(ahp, skb, ah->auth_data); - if (err) - goto unlock; - if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) - err = -EBADMSG; + ahash_request_set_crypt(req, sg, icv, skb->len); + ahash_request_set_callback(req, 0, ah6_input_done, skb); + + AH_SKB_CB(skb)->tmp = work_iph; + + err = crypto_ahash_digest(req); + if (err) { + if (err == -EINPROGRESS) + goto out; + + if (err == -EBUSY) + err = NET_XMIT_DROP; + goto out_free; } -unlock: - spin_unlock(&x->lock); + err = memcmp(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG: 0; if (err) - goto free_out; + goto out_free; skb->network_header += ah_hlen; - memcpy(skb_network_header(skb), tmp_hdr, hdr_len); + memcpy(skb_network_header(skb), work_iph, hdr_len); skb->transport_header = skb->network_header; __skb_pull(skb, ah_hlen + hdr_len); - kfree(tmp_hdr); + err = nexthdr; - return nexthdr; - -free_out: - kfree(tmp_hdr); +out_free: + kfree(work_iph); out: return err; } @@ -430,7 +628,7 @@ static int ah6_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; - struct crypto_hash *tfm; + struct crypto_ahash *ahash; if (!x->aalg) goto error; @@ -442,12 +640,12 @@ static int ah6_init_state(struct xfrm_state *x) if (ahp == NULL) return -ENOMEM; - tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) + ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); + if (IS_ERR(ahash)) goto error; - ahp->tfm = tfm; - if (crypto_hash_setkey(tfm, x->aalg->alg_key, + ahp->ahash = ahash; + if (crypto_ahash_setkey(ahash, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8)) goto error; @@ -461,22 +659,18 @@ static int ah6_init_state(struct xfrm_state *x) BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_hash_digestsize(tfm)) { + crypto_ahash_digestsize(ahash)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_hash_digestsize(tfm), + x->aalg->alg_name, crypto_ahash_digestsize(ahash), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; - ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8; + ahp->icv_trunc_len = x->aalg->alg_trunc_len/8; BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); - ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL); - if (!ahp->work_icv) - goto error; - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); switch (x->props.mode) { @@ -495,8 +689,7 @@ static int ah6_init_state(struct xfrm_state *x) error: if (ahp) { - kfree(ahp->work_icv); - crypto_free_hash(ahp->tfm); + crypto_free_ahash(ahp->ahash); kfree(ahp); } return -EINVAL; @@ -509,8 +702,7 @@ static void ah6_destroy(struct xfrm_state *x) if (!ahp) return; - kfree(ahp->work_icv); - crypto_free_hash(ahp->tfm); + crypto_free_ahash(ahp->ahash); kfree(ahp); } diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 1ae58bec1de..f1c74c8ef9d 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -404,13 +404,13 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, if (dev) return ipv6_chk_acast_dev(dev, addr); - read_lock(&dev_base_lock); - for_each_netdev(net, dev) + rcu_read_lock(); + for_each_netdev_rcu(net, dev) if (ipv6_chk_acast_dev(dev, addr)) { found = 1; break; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return found; } @@ -431,9 +431,9 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (!idev) continue; read_lock_bh(&idev->lock); @@ -443,7 +443,6 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) break; } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return im; } @@ -454,16 +453,15 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im im = im->aca_next; while (!im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; break; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -482,29 +480,30 @@ static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) } static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return ac6_get_idx(seq, *pos); } static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ifacaddr6 *im; - im = ac6_get_next(seq, v); + struct ifacaddr6 *im = ac6_get_next(seq, v); + ++*pos; return im; } static void ac6_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct ac6_iter_state *state = ac6_seq_private(seq); + if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); + state->idev = NULL; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int ac6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e2bdc6d83a4..e6f9cdf780f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -98,17 +98,15 @@ ipv4_connected: if (err) goto out; - ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr); + ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); - if (ipv6_addr_any(&np->saddr)) { - ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff), - inet->saddr); - } + if (ipv6_addr_any(&np->saddr)) + ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); + + if (ipv6_addr_any(&np->rcv_saddr)) + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, + &np->rcv_saddr); - if (ipv6_addr_any(&np->rcv_saddr)) { - ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff), - inet->rcv_saddr); - } goto out; } @@ -136,7 +134,7 @@ ipv4_connected: ipv6_addr_copy(&np->daddr, daddr); np->flow_label = fl.fl6_flowlabel; - inet->dport = usin->sin6_port; + inet->inet_dport = usin->sin6_port; /* * Check for a route to destination an obtain the @@ -147,8 +145,9 @@ ipv4_connected: ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.oif = np->mcast_oif; @@ -190,7 +189,7 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); - inet->rcv_saddr = LOOPBACK4_IPV6; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; } ip6_dst_store(sk, dst, @@ -329,9 +328,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = IP6CB(skb)->iif; } else { - ipv6_addr_set(&sin->sin6_addr, 0, 0, - htonl(0xffff), - *(__be32 *)(nh + serr->addr_offset)); + ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset), + &sin->sin6_addr); } } @@ -351,8 +349,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) } else { struct inet_sock *inet = inet_sk(sk); - ipv6_addr_set(&sin->sin6_addr, 0, 0, - htonl(0xffff), ip_hdr(skb)->saddr); + ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, + &sin->sin6_addr); if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } @@ -539,12 +537,17 @@ int datagram_send_ctl(struct net *net, addr_type = __ipv6_addr_type(&src_info->ipi6_addr); + rcu_read_lock(); if (fl->oif) { - dev = dev_get_by_index(net, fl->oif); - if (!dev) + dev = dev_get_by_index_rcu(net, fl->oif); + if (!dev) { + rcu_read_unlock(); return -ENODEV; - } else if (addr_type & IPV6_ADDR_LINKLOCAL) + } + } else if (addr_type & IPV6_ADDR_LINKLOCAL) { + rcu_read_unlock(); return -EINVAL; + } if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; @@ -555,8 +558,7 @@ int datagram_send_ctl(struct net *net, ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); } - if (dev) - dev_put(dev); + rcu_read_unlock(); if (err) goto exit_f; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index af597c73ebe..668a46b655e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -473,7 +473,7 @@ static int esp_init_authenc(struct xfrm_state *x) } err = crypto_aead_setauthsize( - aead, aalg_desc->uinfo.auth.icv_truncbits / 8); + aead, x->aalg->alg_trunc_len / 8); if (err) goto free_key; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 00a7a5e4ac9..b7aa7c64cc4 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -264,44 +264,36 @@ static struct fib_rules_ops fib6_rules_ops_template = { static int fib6_rules_net_init(struct net *net) { + struct fib_rules_ops *ops; int err = -ENOMEM; - net->ipv6.fib6_rules_ops = kmemdup(&fib6_rules_ops_template, - sizeof(*net->ipv6.fib6_rules_ops), - GFP_KERNEL); - if (!net->ipv6.fib6_rules_ops) - goto out; + ops = fib_rules_register(&fib6_rules_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv6.fib6_rules_ops = ops; - net->ipv6.fib6_rules_ops->fro_net = net; - INIT_LIST_HEAD(&net->ipv6.fib6_rules_ops->rules_list); err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, - RT6_TABLE_LOCAL, FIB_RULE_PERMANENT); + RT6_TABLE_LOCAL, 0); if (err) goto out_fib6_rules_ops; err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) - goto out_fib6_default_rule_add; + goto out_fib6_rules_ops; - err = fib_rules_register(net->ipv6.fib6_rules_ops); - if (err) - goto out_fib6_default_rule_add; out: return err; -out_fib6_default_rule_add: - fib_rules_cleanup_ops(net->ipv6.fib6_rules_ops); out_fib6_rules_ops: - kfree(net->ipv6.fib6_rules_ops); + fib_rules_unregister(ops); goto out; } static void fib6_rules_net_exit(struct net *net) { fib_rules_unregister(net->ipv6.fib6_rules_ops); - kfree(net->ipv6.fib6_rules_ops); } static struct pernet_operations fib6_rules_net_ops = { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f23ebbec063..4ae661bc367 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -942,15 +942,13 @@ EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL ctl_table ipv6_icmp_table_template[] = { { - .ctl_name = NET_IPV6_ICMP_RATELIMIT, .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies }, - { .ctl_name = 0 }, + { }, }; struct ctl_table *ipv6_icmp_sysctl_init(struct net *net) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index cc4797dd832..3516e6fe2e5 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -132,7 +132,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_family = AF_INET6; ipv6_addr_copy(&sin6->sin6_addr, &np->daddr); - sin6->sin6_port = inet_sk(sk)->dport; + sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; @@ -168,8 +168,7 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) if (dst) { struct rt6_info *rt = (struct rt6_info *)dst; if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { - sk->sk_dst_cache = NULL; - dst_release(dst); + __sk_dst_reset(sk); dst = NULL; } } @@ -194,8 +193,9 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_sport = inet->sport; - fl.fl_ip_dport = inet->dport; + fl.mark = sk->sk_mark; + fl.fl_ip_sport = inet->inet_sport; + fl.fl_ip_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1bcc3431859..633a6c26613 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -22,9 +22,10 @@ #include <net/inet6_hashtables.h> #include <net/ip.h> -void __inet6_hash(struct sock *sk) +int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); @@ -45,10 +46,15 @@ void __inet6_hash(struct sock *sk) lock = inet_ehash_lockp(hashinfo, hash); spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); + if (tw) { + WARN_ON(sk->sk_hash != tw->tw_hash); + twrefcnt = inet_twsk_unhash(tw); + } spin_unlock(lock); } sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); + return twrefcnt; } EXPORT_SYMBOL(__inet6_hash); @@ -73,7 +79,7 @@ struct sock *__inet6_lookup_established(struct net *net, * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - unsigned int slot = hash & (hashinfo->ehash_size - 1); + unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; @@ -125,7 +131,7 @@ static int inline compute_score(struct sock *sk, struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && + if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -214,15 +220,16 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *daddr = &np->rcv_saddr; const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; - const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, - inet->dport); + inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; + int twrefcnt = 0; spin_lock(lock); @@ -248,21 +255,25 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, unique: /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ - inet->num = lport; - inet->sport = htons(lport); + inet->inet_num = lport; + inet->inet_sport = htons(lport); + sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); - sk->sk_hash = hash; + if (tw) { + twrefcnt = inet_twsk_unhash(tw); + NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); + } spin_unlock(lock); + if (twrefcnt) + inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - if (twp != NULL) { + if (twp) { *twp = tw; - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); - } else if (tw != NULL) { + } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule(tw, death_row); - NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); } @@ -279,7 +290,7 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk) const struct ipv6_pinfo *np = inet6_sk(sk); return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, np->daddr.s6_addr32, - inet->dport); + inet->inet_dport); } int inet6_hash_connect(struct inet_timewait_death_row *death_row, diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7712578bdc6..6e7bffa2205 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -67,7 +67,7 @@ static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) struct ip6_flowlabel *fl; for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) { - if (fl->label == label && fl->fl_net == net) + if (fl->label == label && net_eq(fl->fl_net, net)) return fl; } return NULL; @@ -163,7 +163,8 @@ static void ip6_fl_purge(struct net *net) struct ip6_flowlabel *fl, **flp; flp = &fl_ht[i]; while ((fl = *flp) != NULL) { - if (fl->fl_net == net && atomic_read(&fl->users) == 0) { + if (net_eq(fl->fl_net, net) && + atomic_read(&fl->users) == 0) { *flp = fl->next; fl_free(fl); atomic_dec(&fl_size); @@ -377,8 +378,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, goto done; fl->share = freq->flr_share; addr_type = ipv6_addr_type(&freq->flr_dst); - if ((addr_type&IPV6_ADDR_MAPPED) - || addr_type == IPV6_ADDR_ANY) { + if ((addr_type & IPV6_ADDR_MAPPED) || + addr_type == IPV6_ADDR_ANY) { err = -EINVAL; goto done; } @@ -421,8 +422,8 @@ static int mem_check(struct sock *sk) if (room <= 0 || ((count >= FL_MAX_PER_SOCK || - (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) - && !capable(CAP_NET_ADMIN))) + (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && + !capable(CAP_NET_ADMIN))) return -ENOBUFS; return 0; @@ -630,7 +631,7 @@ static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { fl = fl_ht[state->bucket]; - while (fl && fl->fl_net != net) + while (fl && !net_eq(fl->fl_net, net)) fl = fl->next; if (fl) break; @@ -645,7 +646,7 @@ static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flo fl = fl->next; try_again: - while (fl && fl->fl_net != net) + while (fl && !net_eq(fl->fl_net, net)) fl = fl->next; while (!fl) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c595bbe1ed9..d453d07b0df 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -78,7 +78,7 @@ static void ip6_fb_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); -static int ip6_tnl_net_id; +static int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; @@ -88,8 +88,10 @@ struct ip6_tnl_net { struct ip6_tnl **tnls[2]; }; -/* lock for the tunnel lists */ -static DEFINE_RWLOCK(ip6_tnl_lock); +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ip6_tnl_lock); static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) { @@ -130,6 +132,9 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) * else %NULL **/ +#define for_each_ip6_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + static struct ip6_tnl * ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) { @@ -138,13 +143,14 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) { + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } - if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP)) + t = rcu_dereference(ip6n->tnls_wc[0]); + if (t && (t->dev->flags & IFF_UP)) return t; return NULL; @@ -186,10 +192,10 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); + spin_lock_bh(&ip6_tnl_lock); t->next = *tp; - write_lock_bh(&ip6_tnl_lock); - *tp = t; - write_unlock_bh(&ip6_tnl_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ip6_tnl_lock); } /** @@ -204,9 +210,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ip6_tnl_lock); + spin_lock_bh(&ip6_tnl_lock); *tp = t->next; - write_unlock_bh(&ip6_tnl_lock); + spin_unlock_bh(&ip6_tnl_lock); break; } } @@ -313,9 +319,9 @@ ip6_tnl_dev_uninit(struct net_device *dev) struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) { - write_lock_bh(&ip6_tnl_lock); + spin_lock_bh(&ip6_tnl_lock); ip6n->tnls_wc[0] = NULL; - write_unlock_bh(&ip6_tnl_lock); + spin_unlock_bh(&ip6_tnl_lock); } else { ip6_tnl_unlink(ip6n, t); } @@ -409,7 +415,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, in trouble since we might need the source address for further processing of the error. */ - read_lock(&ip6_tnl_lock); + rcu_read_lock(); if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr)) == NULL) goto out; @@ -482,7 +488,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, *msg = rel_msg; out: - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); return err; } @@ -652,6 +658,7 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } +/* called with rcu_read_lock() */ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) { struct ip6_tnl_parm *p = &t->parms; @@ -662,15 +669,13 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) struct net_device *ldev = NULL; if (p->link) - ldev = dev_get_by_index(net, p->link); + ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(&p->laddr) || likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) ret = 1; - if (ldev) - dev_put(ldev); } return ret; } @@ -693,23 +698,23 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, struct ip6_tnl *t; struct ipv6hdr *ipv6h = ipv6_hdr(skb); - read_lock(&ip6_tnl_lock); + rcu_read_lock(); if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { if (t->parms.proto != ipproto && t->parms.proto != 0) { - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); goto discard; } if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); goto discard; } if (!ip6_tnl_rcv_ctl(t)) { t->dev->stats.rx_dropped++; - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); goto discard; } secpath_reset(skb); @@ -727,10 +732,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, t->dev->stats.rx_packets++; t->dev->stats.rx_bytes += skb->len; netif_rx(skb); - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); return 0; } - read_unlock(&ip6_tnl_lock); + rcu_read_unlock(); return 1; discard: @@ -798,8 +803,9 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) if (p->flags & IP6_TNL_F_CAP_XMIT) { struct net_device *ldev = NULL; + rcu_read_lock(); if (p->link) - ldev = dev_get_by_index(net, p->link); + ldev = dev_get_by_index_rcu(net, p->link); if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) printk(KERN_WARNING @@ -813,8 +819,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) p->name); else ret = 1; - if (ldev) - dev_put(ldev); + rcu_read_unlock(); } return ret; } @@ -1387,29 +1392,25 @@ static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) { int h; struct ip6_tnl *t; + LIST_HEAD(list); for (h = 0; h < HASH_SIZE; h++) { - while ((t = ip6n->tnls_r_l[h]) != NULL) - unregister_netdevice(t->dev); + t = ip6n->tnls_r_l[h]; + while (t != NULL) { + unregister_netdevice_queue(t->dev, &list); + t = t->next; + } } t = ip6n->tnls_wc[0]; - unregister_netdevice(t->dev); + unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_many(&list); } static int ip6_tnl_init_net(struct net *net) { + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); int err; - struct ip6_tnl_net *ip6n; - - err = -ENOMEM; - ip6n = kzalloc(sizeof(struct ip6_tnl_net), GFP_KERNEL); - if (ip6n == NULL) - goto err_alloc; - - err = net_assign_generic(net, ip6_tnl_net_id, ip6n); - if (err < 0) - goto err_assign; ip6n->tnls[0] = ip6n->tnls_wc; ip6n->tnls[1] = ip6n->tnls_r_l; @@ -1432,27 +1433,23 @@ static int ip6_tnl_init_net(struct net *net) err_register: free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: - /* nothing */ -err_assign: - kfree(ip6n); -err_alloc: return err; } static void ip6_tnl_exit_net(struct net *net) { - struct ip6_tnl_net *ip6n; + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - ip6n = net_generic(net, ip6_tnl_net_id); rtnl_lock(); ip6_tnl_destroy_tunnels(ip6n); rtnl_unlock(); - kfree(ip6n); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, .exit = ip6_tnl_exit_net, + .id = &ip6_tnl_net_id, + .size = sizeof(struct ip6_tnl_net), }; /** @@ -1477,7 +1474,7 @@ static int __init ip6_tunnel_init(void) goto unreg_ip4ip6; } - err = register_pernet_gen_device(&ip6_tnl_net_id, &ip6_tnl_net_ops); + err = register_pernet_device(&ip6_tnl_net_ops); if (err < 0) goto err_pernet; return 0; @@ -1501,7 +1498,7 @@ static void __exit ip6_tunnel_cleanup(void) if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n"); - unregister_pernet_gen_device(ip6_tnl_net_id, &ip6_tnl_net_ops); + unregister_pernet_device(&ip6_tnl_net_ops); } module_init(ip6_tunnel_init); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 716153941fc..52e0f74fdfe 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -477,7 +477,7 @@ failure: * Delete a VIF entry */ -static int mif6_delete(struct net *net, int vifi) +static int mif6_delete(struct net *net, int vifi, struct list_head *head) { struct mif_device *v; struct net_device *dev; @@ -519,7 +519,7 @@ static int mif6_delete(struct net *net, int vifi) in6_dev->cnf.mc_forwarding--; if (v->flags & MIFF_REGISTER) - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); dev_put(dev); return 0; @@ -976,6 +976,7 @@ static int ip6mr_device_event(struct notifier_block *this, struct net *net = dev_net(dev); struct mif_device *v; int ct; + LIST_HEAD(list); if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; @@ -983,8 +984,10 @@ static int ip6mr_device_event(struct notifier_block *this, v = &net->ipv6.vif6_table[0]; for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) { if (v->dev == dev) - mif6_delete(net, ct); + mif6_delete(net, ct, &list); } + unregister_netdevice_many(&list); + return NOTIFY_DONE; } @@ -1188,14 +1191,16 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) static void mroute_clean_tables(struct net *net) { int i; + LIST_HEAD(list); /* * Shut down all active vif entries */ for (i = 0; i < net->ipv6.maxvif; i++) { if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(net, i); + mif6_delete(net, i, &list); } + unregister_netdevice_many(&list); /* * Wipe the cache @@ -1297,7 +1302,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns switch (optname) { case MRT6_INIT: if (sk->sk_type != SOCK_RAW || - inet_sk(sk)->num != IPPROTO_ICMPV6) + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; if (optlen < sizeof(int)) return -EINVAL; @@ -1325,7 +1330,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (copy_from_user(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); - ret = mif6_delete(net, mifi); + ret = mif6_delete(net, mifi, NULL); rtnl_unlock(); return ret; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4f7aaf6996a..430454ee5ea 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -64,7 +64,7 @@ int ip6_ra_control(struct sock *sk, int sel) struct ip6_ra_chain *ra, *new_ra, **rap; /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ - if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) + if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_RAW) return -ENOPROTOOPT; new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -106,7 +106,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, if (inet_sk(sk)->is_icsk) { if (opt && !((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) && - inet_sk(sk)->daddr != LOOPBACK4_IPV6) { + inet_sk(sk)->inet_daddr != LOOPBACK4_IPV6) { struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -234,7 +234,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_V6ONLY: if (optlen < sizeof(int) || - inet_sk(sk)->num) + inet_sk(sk)->inet_num) goto e_inval; np->ipv6only = valbool; retv = 0; @@ -424,6 +424,7 @@ sticky_done: fl.fl6_flowlabel = 0; fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; if (optlen == 0) goto update; @@ -665,7 +666,7 @@ done: case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; - if (val<0 || val>3) + if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) goto e_inval; np->pmtudisc = val; retv = 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f9fcf690bd5..1f9c44442e6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2375,9 +2375,9 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); state->idev = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (!idev) continue; read_lock_bh(&idev->lock); @@ -2387,7 +2387,6 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) break; } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return im; } @@ -2398,16 +2397,15 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr im = im->next; while (!im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; break; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -2426,31 +2424,31 @@ static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return igmp6_mc_get_idx(seq, *pos); } static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ifmcaddr6 *im; - im = igmp6_mc_get_next(seq, v); + struct ifmcaddr6 *im = igmp6_mc_get_next(seq, v); + ++*pos; return im; } static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); + if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp6_mc_seq_show(struct seq_file *seq, void *v) @@ -2507,9 +2505,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) state->idev = NULL; state->im = NULL; - for_each_netdev(net, state->dev) { + for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; - idev = in6_dev_get(state->dev); + idev = __in6_dev_get(state->dev); if (unlikely(idev == NULL)) continue; read_lock_bh(&idev->lock); @@ -2525,7 +2523,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) spin_unlock_bh(&im->mca_lock); } read_unlock_bh(&idev->lock); - in6_dev_put(idev); } return psf; } @@ -2539,16 +2536,15 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s spin_unlock_bh(&state->im->mca_lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) { + if (likely(state->idev != NULL)) read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); - } - state->dev = next_net_device(state->dev); + + state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; goto out; } - state->idev = in6_dev_get(state->dev); + state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; read_lock_bh(&state->idev->lock); @@ -2573,9 +2569,9 @@ static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos) } static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(dev_base_lock) + __acquires(RCU) { - read_lock(&dev_base_lock); + rcu_read_lock(); return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } @@ -2591,7 +2587,7 @@ static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) - __releases(dev_base_lock) + __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (likely(state->im != NULL)) { @@ -2600,11 +2596,10 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) } if (likely(state->idev != NULL)) { read_unlock_bh(&state->idev->lock); - in6_dev_put(state->idev); state->idev = NULL; } state->dev = NULL; - read_unlock(&dev_base_lock); + rcu_read_unlock(); } static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f74e4e2cdd0..c4585279809 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -598,6 +598,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, icmp6h.icmp6_solicited = solicited; icmp6h.icmp6_override = override; + inc_opt |= ifp->idev->cnf.force_tllao; __ndisc_send(dev, neigh, daddr, src_addr, &icmp6h, solicited_addr, inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); @@ -1768,42 +1769,6 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu return ret; } -int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - struct net_device *dev = ctl->extra1; - struct inet6_dev *idev; - int ret; - - if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME) - ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default"); - - switch (ctl->ctl_name) { - case NET_NEIGH_REACHABLE_TIME: - ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen); - break; - case NET_NEIGH_RETRANS_TIME_MS: - case NET_NEIGH_REACHABLE_TIME_MS: - ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen); - break; - default: - ret = 0; - } - - if (newval && newlen && ret > 0 && - dev && (idev = in6_dev_get(dev)) != NULL) { - if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME || - ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS) - idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); - idev->tstamp = jiffies; - inet6_ifinfo_notify(RTM_NEWLINK, idev); - in6_dev_put(idev); - } - - return ret; -} #endif @@ -1857,8 +1822,7 @@ int __init ndisc_init(void) #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", - &ndisc_ifinfo_sysctl_change, - &ndisc_ifinfo_sysctl_strategy); + &ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; #endif diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 1cf3f0c6a95..7854052be60 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -36,7 +36,6 @@ #define IPQ_QMAX_DEFAULT 1024 #define IPQ_PROC_FS_NAME "ip6_queue" -#define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); @@ -499,10 +498,9 @@ ipq_rcv_nl_event(struct notifier_block *this, { struct netlink_notify *n = ptr; - if (event == NETLINK_URELEASE && - n->protocol == NETLINK_IP6_FW && n->pid) { + if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { write_lock_bh(&queue_lock); - if ((n->net == &init_net) && (n->pid == peer_pid)) + if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) __ipq_reset(); write_unlock_bh(&queue_lock); } @@ -518,14 +516,13 @@ static struct ctl_table_header *ipq_sysctl_header; static ctl_table ipq_table[] = { { - .ctl_name = NET_IPQ_QMAX, .procname = NET_IPQ_QMAX_NAME, .data = &queue_maxlen, .maxlen = sizeof(queue_maxlen), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; #endif @@ -625,7 +622,7 @@ cleanup_netlink_notifier: static void __exit ip6_queue_fini(void) { nf_unregister_queue_handlers(&nfqh); - synchronize_net(); + ipq_flush(NULL, 0); #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index cc9f8ef303f..480d7f8c980 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -105,9 +105,9 @@ ip6_packet_match(const struct sk_buff *skb, #define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, - &ip6info->src), IP6T_INV_SRCIP) - || FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, - &ip6info->dst), IP6T_INV_DSTIP)) { + &ip6info->src), IP6T_INV_SRCIP) || + FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, + &ip6info->dst), IP6T_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); /* dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr, @@ -277,11 +277,11 @@ get_chainname_rulenum(struct ip6t_entry *s, struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) - && strcmp(t->target.u.kernel.target->name, - IP6T_STANDARD_TARGET) == 0 - && t->verdict < 0 - && unconditional(&s->ipv6)) { + if (s->target_offset == sizeof(struct ip6t_entry) && + strcmp(t->target.u.kernel.target->name, + IP6T_STANDARD_TARGET) == 0 && + t->verdict < 0 && + unconditional(&s->ipv6)) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -418,8 +418,8 @@ ip6t_do_table(struct sk_buff *skb, back = get_entry(table_base, back->comefrom); continue; } - if (table_base + v != ip6t_next_entry(e) - && !(e->ipv6.flags & IP6T_F_GOTO)) { + if (table_base + v != ip6t_next_entry(e) && + !(e->ipv6.flags & IP6T_F_GOTO)) { /* Save old back ptr in next entry */ struct ip6t_entry *next = ip6t_next_entry(e); next->comefrom = (void *)back - table_base; @@ -505,11 +505,11 @@ mark_source_chains(struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) - && (strcmp(t->target.u.user.name, - IP6T_STANDARD_TARGET) == 0) - && t->verdict < 0 - && unconditional(&e->ipv6)) || visited) { + if ((e->target_offset == sizeof(struct ip6t_entry) && + (strcmp(t->target.u.user.name, + IP6T_STANDARD_TARGET) == 0) && + t->verdict < 0 && + unconditional(&e->ipv6)) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -556,8 +556,8 @@ mark_source_chains(struct xt_table_info *newinfo, int newpos = t->verdict; if (strcmp(t->target.u.user.name, - IP6T_STANDARD_TARGET) == 0 - && newpos >= 0) { + IP6T_STANDARD_TARGET) == 0 && + newpos >= 0) { if (newpos > newinfo->size - sizeof(struct ip6t_entry)) { duprintf("mark_source_chains: " @@ -767,8 +767,8 @@ check_entry_size_and_hooks(struct ip6t_entry *e, { unsigned int h; - if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 - || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -1584,8 +1584,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, int ret, off, h; duprintf("check_compat_entry_size_and_hooks %p\n", e); - if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 - || (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 7018cac4fdd..b285fdf1905 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -249,8 +249,8 @@ static void dump_packet(const struct nf_loginfo *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((logflags & IP6T_LOG_TCPOPT) - && th->doff * 4 > sizeof(struct tcphdr)) { + if ((logflags & IP6T_LOG_TCPOPT) && + th->doff * 4 > sizeof(struct tcphdr)) { u_int8_t _opt[60 - sizeof(struct tcphdr)]; const u_int8_t *op; unsigned int i; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 5a7f00cd15c..8311ca31816 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -223,8 +223,8 @@ static bool reject_tg6_check(const struct xt_tgchk_param *par) return false; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ipv6.proto != IPPROTO_TCP - || (e->ipv6.invflags & XT_INV_PROTO)) { + if (e->ipv6.proto != IPPROTO_TCP || + (e->ipv6.invflags & XT_INV_PROTO)) { printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); return false; } diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 3a82f24746b..ac0b7c629d7 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -77,17 +77,14 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par) ahinfo->hdrres, ah->reserved, !(ahinfo->hdrres && ah->reserved)); - return (ah != NULL) - && - spi_match(ahinfo->spis[0], ahinfo->spis[1], - ntohl(ah->spi), - !!(ahinfo->invflags & IP6T_AH_INV_SPI)) - && - (!ahinfo->hdrlen || - (ahinfo->hdrlen == hdrlen) ^ - !!(ahinfo->invflags & IP6T_AH_INV_LEN)) - && - !(ahinfo->hdrres && ah->reserved); + return (ah != NULL) && + spi_match(ahinfo->spis[0], ahinfo->spis[1], + ntohl(ah->spi), + !!(ahinfo->invflags & IP6T_AH_INV_SPI)) && + (!ahinfo->hdrlen || + (ahinfo->hdrlen == hdrlen) ^ + !!(ahinfo->invflags & IP6T_AH_INV_LEN)) && + !(ahinfo->hdrres && ah->reserved); } static bool ah_mt6_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 673aa0a5084..7b91c2598ed 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -70,41 +70,36 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par) pr_debug("res %02X %02X%04X %02X ", fraginfo->flags & IP6T_FRAG_RES, fh->reserved, ntohs(fh->frag_off) & 0x6, - !((fraginfo->flags & IP6T_FRAG_RES) - && (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); + !((fraginfo->flags & IP6T_FRAG_RES) && + (fh->reserved || (ntohs(fh->frag_off) & 0x06)))); pr_debug("first %02X %02X %02X ", fraginfo->flags & IP6T_FRAG_FST, ntohs(fh->frag_off) & ~0x7, - !((fraginfo->flags & IP6T_FRAG_FST) - && (ntohs(fh->frag_off) & ~0x7))); + !((fraginfo->flags & IP6T_FRAG_FST) && + (ntohs(fh->frag_off) & ~0x7))); pr_debug("mf %02X %02X %02X ", fraginfo->flags & IP6T_FRAG_MF, ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_MF) - && !((ntohs(fh->frag_off) & IP6_MF)))); + !((fraginfo->flags & IP6T_FRAG_MF) && + !((ntohs(fh->frag_off) & IP6_MF)))); pr_debug("last %02X %02X %02X\n", fraginfo->flags & IP6T_FRAG_NMF, ntohs(fh->frag_off) & IP6_MF, - !((fraginfo->flags & IP6T_FRAG_NMF) - && (ntohs(fh->frag_off) & IP6_MF))); - - return (fh != NULL) - && - id_match(fraginfo->ids[0], fraginfo->ids[1], - ntohl(fh->identification), - !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) - && - !((fraginfo->flags & IP6T_FRAG_RES) - && (fh->reserved || (ntohs(fh->frag_off) & 0x6))) - && - !((fraginfo->flags & IP6T_FRAG_FST) - && (ntohs(fh->frag_off) & ~0x7)) - && - !((fraginfo->flags & IP6T_FRAG_MF) - && !(ntohs(fh->frag_off) & IP6_MF)) - && - !((fraginfo->flags & IP6T_FRAG_NMF) - && (ntohs(fh->frag_off) & IP6_MF)); + !((fraginfo->flags & IP6T_FRAG_NMF) && + (ntohs(fh->frag_off) & IP6_MF))); + + return (fh != NULL) && + id_match(fraginfo->ids[0], fraginfo->ids[1], + ntohl(fh->identification), + !!(fraginfo->invflags & IP6T_FRAG_INV_IDS)) && + !((fraginfo->flags & IP6T_FRAG_RES) && + (fh->reserved || (ntohs(fh->frag_off) & 0x6))) && + !((fraginfo->flags & IP6T_FRAG_FST) && + (ntohs(fh->frag_off) & ~0x7)) && + !((fraginfo->flags & IP6T_FRAG_MF) && + !(ntohs(fh->frag_off) & IP6_MF)) && + !((fraginfo->flags & IP6T_FRAG_NMF) && + (ntohs(fh->frag_off) & IP6_MF)); } static bool frag_mt6_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 356b8d6f6ba..b77307fc874 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -92,16 +92,13 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par) !((rtinfo->flags & IP6T_RT_RES) && (((const struct rt0_hdr *)rh)->reserved))); - ret = (rh != NULL) - && + ret = (rh != NULL) && (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS))) - && + !!(rtinfo->invflags & IP6T_RT_INV_SGS))) && (!(rtinfo->flags & IP6T_RT_LEN) || ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN))) - && + !!(rtinfo->invflags & IP6T_RT_INV_LEN))) && (!(rtinfo->flags & IP6T_RT_TYP) || ((rtinfo->rt_type == rh->type) ^ !!(rtinfo->invflags & IP6T_RT_INV_TYP))); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 6f4383ad86f..ad378efd0eb 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -79,8 +79,8 @@ ip6t_local_out_hook(unsigned int hook, { #if 0 /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 0ad91433ed6..a929c19d30e 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -102,8 +102,8 @@ ip6t_local_out_hook(unsigned int hook, #if 0 /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; @@ -122,11 +122,11 @@ ip6t_local_out_hook(unsigned int hook, ret = ip6t_do_table(skb, hook, in, out, dev_net(out)->ipv6.ip6table_mangle); - if (ret != NF_DROP && ret != NF_STOLEN - && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) - || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) - || skb->mark != mark - || ipv6_hdr(skb)->hop_limit != hop_limit)) + if (ret != NF_DROP && ret != NF_STOLEN && + (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || + memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || + skb->mark != mark || + ipv6_hdr(skb)->hop_limit != hop_limit)) return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; return ret; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 642dcb127ba..c7b8bd1d798 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -244,18 +244,18 @@ static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = { static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMPV6_TYPE] - || !tb[CTA_PROTO_ICMPV6_CODE] - || !tb[CTA_PROTO_ICMPV6_ID]) + if (!tb[CTA_PROTO_ICMPV6_TYPE] || + !tb[CTA_PROTO_ICMPV6_CODE] || + !tb[CTA_PROTO_ICMPV6_ID]) return -EINVAL; tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]); tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]); tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]); - if (tuple->dst.u.icmp.type < 128 - || tuple->dst.u.icmp.type - 128 >= sizeof(invmap) - || !invmap[tuple->dst.u.icmp.type - 128]) + if (tuple->dst.u.icmp.type < 128 || + tuple->dst.u.icmp.type - 128 >= sizeof(invmap) || + !invmap[tuple->dst.u.icmp.type - 128]) return -EINVAL; return 0; @@ -277,9 +277,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_SYSCTL */ diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f3aba255ad9..e0b9424fa1b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -83,7 +83,6 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, .procname = "nf_conntrack_frag6_low_thresh", .data = &nf_init_frags.low_thresh, .maxlen = sizeof(unsigned int), @@ -91,14 +90,13 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, .procname = "nf_conntrack_frag6_high_thresh", .data = &nf_init_frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; #endif diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4f24570b086..926ce8eeffa 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -72,7 +72,7 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, int is_multicast = ipv6_addr_is_multicast(loc_addr); sk_for_each_from(sk, node) - if (inet_sk(sk)->num == num) { + if (inet_sk(sk)->inet_num == num) { struct ipv6_pinfo *np = inet6_sk(sk); if (!net_eq(sock_net(sk), net)) @@ -249,7 +249,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Raw sockets are IPv6 only */ if (addr_type == IPV6_ADDR_MAPPED) - return(-EADDRNOTAVAIL); + return -EADDRNOTAVAIL; lock_sock(sk); @@ -257,6 +257,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (sk->sk_state != TCP_CLOSE) goto out; + rcu_read_lock(); /* Check if the address belongs to the host. */ if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; @@ -272,13 +273,13 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) - goto out; + goto out_unlock; - dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); - if (!dev) { - err = -ENODEV; - goto out; - } + err = -ENODEV; + dev = dev_get_by_index_rcu(sock_net(sk), + sk->sk_bound_dev_if); + if (!dev) + goto out_unlock; } /* ipv4 addr of the socket is invalid. Only the @@ -289,20 +290,18 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { - if (dev) - dev_put(dev); - goto out; + goto out_unlock; } } - if (dev) - dev_put(dev); } - inet->rcv_saddr = inet->saddr = v4addr; + inet->inet_rcv_saddr = inet->inet_saddr = v4addr; ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr); if (!(addr_type & IPV6_ADDR_MULTICAST)) ipv6_addr_copy(&np->saddr, &addr->sin6_addr); err = 0; +out_unlock: + rcu_read_unlock(); out: release_sock(sk); return err; @@ -381,8 +380,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) } /* Charge it to the socket. */ - if (sock_queue_rcv_skb(sk,skb)<0) { - atomic_inc(&sk->sk_drops); + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } @@ -416,14 +414,14 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) skb_network_header_len(skb)); if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, inet->num, skb->csum)) + skb->len, inet->inet_num, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } if (!skb_csum_unnecessary(skb)) skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, - inet->num, 0)); + inet->inet_num, 0)); if (inet->hdrincl) { if (skb_checksum_complete(skb)) { @@ -497,7 +495,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_scope_id = IP6CB(skb)->iif; } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (np->rxopt.all) datagram_recv_ctl(sk, msg, skb); @@ -518,7 +516,6 @@ csum_copy_err: as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; - atomic_inc(&sk->sk_drops); goto out; } @@ -766,8 +763,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, proto = ntohs(sin6->sin6_port); if (!proto) - proto = inet->num; - else if (proto != inet->num) + proto = inet->inet_num; + else if (proto != inet->inet_num) return(-EINVAL); if (proto > 255) @@ -800,7 +797,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - proto = inet->num; + proto = inet->inet_num; daddr = &np->daddr; fl.fl6_flowlabel = np->flow_label; } @@ -967,7 +964,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_CHECKSUM: - if (inet_sk(sk)->num == IPPROTO_ICMPV6 && + if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && level == IPPROTO_IPV6) { /* * RFC3542 tells that IPV6_CHECKSUM socket @@ -1007,7 +1004,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); @@ -1030,7 +1027,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, case SOL_RAW: break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: @@ -1087,7 +1084,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); @@ -1110,7 +1107,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, case SOL_RAW: break; case SOL_ICMPV6: - if (inet_sk(sk)->num != IPPROTO_ICMPV6) + if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, level, optname, optval, optlen); case SOL_IPV6: @@ -1157,7 +1154,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) static void rawv6_close(struct sock *sk, long timeout) { - if (inet_sk(sk)->num == IPPROTO_RAW) + if (inet_sk(sk)->inet_num == IPPROTO_RAW) ip6_ra_control(sk, -1); ip6mr_sk_done(sk); sk_common_release(sk); @@ -1176,7 +1173,7 @@ static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); - switch (inet_sk(sk)->num) { + switch (inet_sk(sk)->inet_num) { case IPPROTO_ICMPV6: rp->checksum = 1; rp->offset = 2; @@ -1226,7 +1223,7 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) dest = &np->daddr; src = &np->rcv_saddr; destp = 0; - srcp = inet_sk(sp)->num; + srcp = inet_sk(sp)->inet_num; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", @@ -1338,7 +1335,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .capability = CAP_NET_RAW, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index da5bd0ed83d..4d98549a686 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -208,18 +208,17 @@ static void ip6_frag_expire(unsigned long data) fq_kill(fq); net = container_of(fq->q.net, struct net, ipv6.frags); - dev = dev_get_by_index(net, fq->iif); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) - goto out; + goto out_rcu_unlock; - rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) - goto out; + goto out_rcu_unlock; /* But use as source device on which LAST ARRIVED @@ -228,9 +227,9 @@ static void ip6_frag_expire(unsigned long data) */ fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); +out_rcu_unlock: + rcu_read_unlock(); out: - if (dev) - dev_put(dev); spin_unlock(&fq->q.lock); fq_put(fq); } @@ -636,7 +635,6 @@ static const struct inet6_protocol frag_protocol = #ifdef CONFIG_SYSCTL static struct ctl_table ip6_frags_ns_ctl_table[] = { { - .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(int), @@ -644,7 +642,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(int), @@ -652,26 +649,22 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { } }; static struct ctl_table ip6_frags_ctl_table[] = { { - .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", .data = &ip6_frags.secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies }, { } }; @@ -682,7 +675,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net) struct ctl_table_header *hdr; table = ip6_frags_ns_ctl_table; - if (net != &init_net) { + if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; @@ -700,7 +693,7 @@ static int ip6_frags_ns_sysctl_register(struct net *net) return 0; err_reg: - if (net != &init_net) + if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d6fe7646a8f..db3b2730389 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1471,9 +1471,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, }, }, }, - .gateway = *gateway, }; + ipv6_addr_copy(&rdfl.gateway, gateway); + if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; @@ -2546,7 +2547,6 @@ ctl_table ipv6_route_table_template[] = { .proc_handler = ipv6_sysctl_rtcache_flush }, { - .ctl_name = NET_IPV6_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ip6_dst_ops_template.gc_thresh, .maxlen = sizeof(int), @@ -2554,7 +2554,6 @@ ctl_table ipv6_route_table_template[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, .procname = "max_size", .data = &init_net.ipv6.sysctl.ip6_rt_max_size, .maxlen = sizeof(int), @@ -2562,69 +2561,55 @@ ctl_table ipv6_route_table_template[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, .procname = "gc_min_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, .procname = "gc_timeout", .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, .procname = "gc_interval", .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, .procname = "gc_elasticity", .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, .procname = "mtu_expires", .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, .procname = "min_adv_mss", .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, .procname = "gc_min_interval_ms", .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, - .strategy = sysctl_ms_jiffies, }, - { .ctl_name = 0 } + { } }; struct ctl_table *ipv6_route_sysctl_init(struct net *net) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dbd19a78ca7..976e68244b9 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -66,7 +66,7 @@ static void ipip6_fb_tunnel_init(struct net_device *dev); static void ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); -static int sit_net_id; +static int sit_net_id __read_mostly; struct sit_net { struct ip_tunnel *tunnels_r_l[HASH_SIZE]; struct ip_tunnel *tunnels_r[HASH_SIZE]; @@ -77,8 +77,17 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; -static DEFINE_RWLOCK(ipip6_lock); +/* + * Locking : hash tables are protected by RCU and a spinlock + */ +static DEFINE_SPINLOCK(ipip6_lock); + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) +/* + * Must be invoked with rcu_read_lock + */ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, struct net_device *dev, __be32 remote, __be32 local) { @@ -87,26 +96,26 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); - for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - for (t = sitn->tunnels_r[h0]; t; t = t->next) { + for_each_ip_tunnel_rcu(sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - for (t = sitn->tunnels_l[h1]; t; t = t->next) { + for_each_ip_tunnel_rcu(sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && (!dev || !t->parms.link || dev->iflink == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } - t = sitn->tunnels_wc[0]; + t = rcu_dereference(sitn->tunnels_wc[0]); if ((t != NULL) && (t->dev->flags & IFF_UP)) return t; return NULL; @@ -143,9 +152,9 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { if (t == *tp) { - write_lock_bh(&ipip6_lock); + spin_lock_bh(&ipip6_lock); *tp = t->next; - write_unlock_bh(&ipip6_lock); + spin_unlock_bh(&ipip6_lock); break; } } @@ -155,10 +164,27 @@ static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) { struct ip_tunnel **tp = ipip6_bucket(sitn, t); + spin_lock_bh(&ipip6_lock); t->next = *tp; - write_lock_bh(&ipip6_lock); - *tp = t; - write_unlock_bh(&ipip6_lock); + rcu_assign_pointer(*tp, t); + spin_unlock_bh(&ipip6_lock); +} + +static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) +{ +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel *t = netdev_priv(dev); + + if (t->dev == sitn->fb_tunnel_dev) { + ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); + t->ip6rd.relay_prefix = 0; + t->ip6rd.prefixlen = 16; + t->ip6rd.relay_prefixlen = 0; + } else { + struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev); + memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd)); + } +#endif } static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, @@ -204,6 +230,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, nt->parms = *parms; ipip6_tunnel_init(dev); + ipip6_tunnel_clone_6rd(dev, sitn); if (parms->i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; @@ -222,15 +249,22 @@ failed: return NULL; } +static DEFINE_SPINLOCK(ipip6_prl_lock); + +#define for_each_prl_rcu(start) \ + for (prl = rcu_dereference(start); \ + prl; \ + prl = rcu_dereference(prl->next)) + static struct ip_tunnel_prl_entry * __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) { - struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *)NULL; + struct ip_tunnel_prl_entry *prl; - for (p = t->prl; p; p = p->next) - if (p->addr == addr) + for_each_prl_rcu(t->prl) + if (prl->addr == addr) break; - return p; + return prl; } @@ -255,7 +289,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, kcalloc(cmax, sizeof(*kp), GFP_KERNEL) : NULL; - read_lock(&ipip6_lock); + rcu_read_lock(); ca = t->prl_count < cmax ? t->prl_count : cmax; @@ -273,7 +307,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, } c = 0; - for (prl = t->prl; prl; prl = prl->next) { + for_each_prl_rcu(t->prl) { if (c >= cmax) break; if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) @@ -285,7 +319,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, break; } out: - read_unlock(&ipip6_lock); + rcu_read_unlock(); len = sizeof(*kp) * c; ret = 0; @@ -306,12 +340,14 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) if (a->addr == htonl(INADDR_ANY)) return -EINVAL; - write_lock(&ipip6_lock); + spin_lock(&ipip6_prl_lock); for (p = t->prl; p; p = p->next) { if (p->addr == a->addr) { - if (chg) - goto update; + if (chg) { + p->flags = a->flags; + goto out; + } err = -EEXIST; goto out; } @@ -328,46 +364,63 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg) goto out; } + INIT_RCU_HEAD(&p->rcu_head); p->next = t->prl; - t->prl = p; - t->prl_count++; -update: p->addr = a->addr; p->flags = a->flags; + t->prl_count++; + rcu_assign_pointer(t->prl, p); out: - write_unlock(&ipip6_lock); + spin_unlock(&ipip6_prl_lock); return err; } +static void prl_entry_destroy_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_tunnel_prl_entry, rcu_head)); +} + +static void prl_list_destroy_rcu(struct rcu_head *head) +{ + struct ip_tunnel_prl_entry *p, *n; + + p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); + do { + n = p->next; + kfree(p); + p = n; + } while (p); +} + static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { struct ip_tunnel_prl_entry *x, **p; int err = 0; - write_lock(&ipip6_lock); + spin_lock(&ipip6_prl_lock); if (a && a->addr != htonl(INADDR_ANY)) { for (p = &t->prl; *p; p = &(*p)->next) { if ((*p)->addr == a->addr) { x = *p; *p = x->next; - kfree(x); + call_rcu(&x->rcu_head, prl_entry_destroy_rcu); t->prl_count--; goto out; } } err = -ENXIO; } else { - while (t->prl) { + if (t->prl) { + t->prl_count = 0; x = t->prl; - t->prl = t->prl->next; - kfree(x); - t->prl_count--; + call_rcu(&x->rcu_head, prl_list_destroy_rcu); + t->prl = NULL; } } out: - write_unlock(&ipip6_lock); + spin_unlock(&ipip6_prl_lock); return err; } @@ -377,7 +430,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) struct ip_tunnel_prl_entry *p; int ok = 1; - read_lock(&ipip6_lock); + rcu_read_lock(); p = __ipip6_tunnel_locate_prl(t, iph->saddr); if (p) { if (p->flags & PRL_DEFAULT) @@ -393,7 +446,7 @@ isatap_chksrc(struct sk_buff *skb, struct iphdr *iph, struct ip_tunnel *t) else ok = 0; } - read_unlock(&ipip6_lock); + rcu_read_unlock(); return ok; } @@ -403,9 +456,9 @@ static void ipip6_tunnel_uninit(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { - write_lock_bh(&ipip6_lock); + spin_lock_bh(&ipip6_lock); sitn->tunnels_wc[0] = NULL; - write_unlock_bh(&ipip6_lock); + spin_unlock_bh(&ipip6_lock); dev_put(dev); } else { ipip6_tunnel_unlink(sitn, netdev_priv(dev)); @@ -458,7 +511,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = -ENOENT; - read_lock(&ipip6_lock); + rcu_read_lock(); t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->daddr, @@ -476,7 +529,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - read_unlock(&ipip6_lock); + rcu_read_unlock(); return err; } @@ -496,7 +549,7 @@ static int ipip6_rcv(struct sk_buff *skb) iph = ip_hdr(skb); - read_lock(&ipip6_lock); + rcu_read_lock(); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -510,7 +563,7 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; - read_unlock(&ipip6_lock); + rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -521,28 +574,52 @@ static int ipip6_rcv(struct sk_buff *skb) nf_reset(skb); ipip6_ecn_decapsulate(iph, skb); netif_rx(skb); - read_unlock(&ipip6_lock); + rcu_read_unlock(); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - read_unlock(&ipip6_lock); + rcu_read_unlock(); out: kfree_skb(skb); return 0; } -/* Returns the embedded IPv4 address if the IPv6 address - comes from 6to4 (RFC 3056) addr space */ - -static inline __be32 try_6to4(struct in6_addr *v6dst) +/* + * Returns the embedded IPv4 address if the IPv6 address + * comes from 6rd / 6to4 (RFC 3056) addr space. + */ +static inline +__be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel) { __be32 dst = 0; +#ifdef CONFIG_IPV6_SIT_6RD + if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, + tunnel->ip6rd.prefixlen)) { + unsigned pbw0, pbi0; + int pbi1; + u32 d; + + pbw0 = tunnel->ip6rd.prefixlen >> 5; + pbi0 = tunnel->ip6rd.prefixlen & 0x1f; + + d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> + tunnel->ip6rd.relay_prefixlen; + + pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; + if (pbi1 > 0) + d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >> + (32 - pbi1); + + dst = tunnel->ip6rd.relay_prefix | htonl(d); + } +#else if (v6dst->s6_addr16[0] == htons(0x2002)) { /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */ memcpy(&dst, &v6dst->s6_addr16[1], 4); } +#endif return dst; } @@ -555,10 +632,12 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->dev->stats; + struct net_device_stats *stats = &dev->stats; + struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; + __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ struct iphdr *iph; /* Our new IP header */ @@ -595,7 +674,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (!dst) - dst = try_6to4(&iph6->daddr); + dst = try_6rd(&iph6->daddr, tunnel); if (!dst) { struct neighbour *neigh = NULL; @@ -648,25 +727,28 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - if (tiph->frag_off) + if (df) { mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - if (mtu < 68) { - stats->collisions++; - ip_rt_put(rt); - goto tx_error; - } - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + if (mtu < 68) { + stats->collisions++; + ip_rt_put(rt); + goto tx_error; + } - if (skb->len > mtu) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); - ip_rt_put(rt); - goto tx_error; + if (mtu < IPV6_MIN_MTU) { + mtu = IPV6_MIN_MTU; + df = 0; + } + + if (tunnel->parms.iph.daddr && skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + + if (skb->len > mtu) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + ip_rt_put(rt); + goto tx_error; + } } if (tunnel->err_count > 0) { @@ -688,7 +770,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) { ip_rt_put(rt); - stats->tx_dropped++; + txq->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -714,11 +796,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr)>>2; - if (mtu > IPV6_MIN_MTU) - iph->frag_off = tiph->frag_off; - else - iph->frag_off = 0; - + iph->frag_off = df; iph->protocol = IPPROTO_IPV6; iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); iph->daddr = rt->rt_dst; @@ -785,9 +863,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) struct ip_tunnel *t; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif switch (cmd) { case SIOCGETTUNNEL: +#ifdef CONFIG_IPV6_SIT_6RD + case SIOCGET6RD: +#endif t = NULL; if (dev == sitn->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { @@ -798,9 +882,25 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) } if (t == NULL) t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; + + err = -EFAULT; + if (cmd == SIOCGETTUNNEL) { + memcpy(&p, &t->parms, sizeof(p)); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, + sizeof(p))) + goto done; +#ifdef CONFIG_IPV6_SIT_6RD + } else { + ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix); + ip6rd.relay_prefix = t->ip6rd.relay_prefix; + ip6rd.prefixlen = t->ip6rd.prefixlen; + ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; + if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, + sizeof(ip6rd))) + goto done; +#endif + } + err = 0; break; case SIOCADDTUNNEL: @@ -921,6 +1021,54 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) netdev_state_change(dev); break; +#ifdef CONFIG_IPV6_SIT_6RD + case SIOCADD6RD: + case SIOCCHG6RD: + case SIOCDEL6RD: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + err = -EFAULT; + if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, + sizeof(ip6rd))) + goto done; + + t = netdev_priv(dev); + + if (cmd != SIOCDEL6RD) { + struct in6_addr prefix; + __be32 relay_prefix; + + err = -EINVAL; + if (ip6rd.relay_prefixlen > 32 || + ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) + goto done; + + ipv6_addr_prefix(&prefix, &ip6rd.prefix, + ip6rd.prefixlen); + if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) + goto done; + if (ip6rd.relay_prefixlen) + relay_prefix = ip6rd.relay_prefix & + htonl(0xffffffffUL << + (32 - ip6rd.relay_prefixlen)); + else + relay_prefix = 0; + if (relay_prefix != ip6rd.relay_prefix) + goto done; + + ipv6_addr_copy(&t->ip6rd.prefix, &prefix); + t->ip6rd.relay_prefix = relay_prefix; + t->ip6rd.prefixlen = ip6rd.prefixlen; + t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; + } else + ipip6_tunnel_clone_6rd(dev, sitn); + + err = 0; + break; +#endif + default: err = -EINVAL; } @@ -997,33 +1145,27 @@ static struct xfrm_tunnel sit_handler = { .priority = 1, }; -static void sit_destroy_tunnels(struct sit_net *sitn) +static void sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { int prio; for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - while ((t = sitn->tunnels[prio][h]) != NULL) - unregister_netdevice(t->dev); + struct ip_tunnel *t = sitn->tunnels[prio][h]; + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = t->next; + } } } } static int sit_init_net(struct net *net) { + struct sit_net *sitn = net_generic(net, sit_net_id); int err; - struct sit_net *sitn; - - err = -ENOMEM; - sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL); - if (sitn == NULL) - goto err_alloc; - - err = net_assign_generic(net, sit_net_id, sitn); - if (err < 0) - goto err_assign; sitn->tunnels[0] = sitn->tunnels_wc; sitn->tunnels[1] = sitn->tunnels_l; @@ -1039,6 +1181,7 @@ static int sit_init_net(struct net *net) dev_net_set(sitn->fb_tunnel_dev, net); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); if ((err = register_netdev(sitn->fb_tunnel_dev))) goto err_reg_dev; @@ -1049,35 +1192,34 @@ err_reg_dev: dev_put(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: - /* nothing */ -err_assign: - kfree(sitn); -err_alloc: return err; } static void sit_exit_net(struct net *net) { - struct sit_net *sitn; + struct sit_net *sitn = net_generic(net, sit_net_id); + LIST_HEAD(list); - sitn = net_generic(net, sit_net_id); rtnl_lock(); - sit_destroy_tunnels(sitn); - unregister_netdevice(sitn->fb_tunnel_dev); + sit_destroy_tunnels(sitn, &list); + unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); + unregister_netdevice_many(&list); rtnl_unlock(); - kfree(sitn); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, .exit = sit_exit_net, + .id = &sit_net_id, + .size = sizeof(struct sit_net), }; static void __exit sit_cleanup(void) { xfrm4_tunnel_deregister(&sit_handler, AF_INET6); - unregister_pernet_gen_device(sit_net_id, &sit_net_ops); + unregister_pernet_device(&sit_net_ops); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } static int __init sit_init(void) @@ -1091,7 +1233,7 @@ static int __init sit_init(void) return -EAGAIN; } - err = register_pernet_gen_device(&sit_net_id, &sit_net_ops); + err = register_pernet_device(&sit_net_ops); if (err < 0) xfrm4_tunnel_deregister(&sit_handler, AF_INET6); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 6b6ae913b5d..5b9af508b8f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -159,6 +159,8 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { + struct tcp_options_received tcp_opt; + u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -171,7 +173,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - struct tcp_options_received tcp_opt; if (!sysctl_tcp_syncookies || !th->ack) goto out; @@ -184,13 +185,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); - /* check for timestamp cookie support */ - memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, 0); - - if (tcp_opt.saw_tstamp) - cookie_check_timestamp(&tcp_opt); - ret = NULL; req = inet6_reqsk_alloc(&tcp6_request_sock_ops); if (!req) @@ -224,12 +218,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->rcv_wscale = tcp_opt.rcv_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; @@ -252,8 +240,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out_free; @@ -264,6 +253,21 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out_free; } + /* check for timestamp cookie support */ + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, dst); + + if (tcp_opt.saw_tstamp) + cookie_check_timestamp(&tcp_opt); + + req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; + + ireq->snd_wscale = tcp_opt.snd_wscale; + ireq->rcv_wscale = tcp_opt.rcv_wscale; + ireq->sack_ok = tcp_opt.sack_ok; + ireq->wscale_ok = tcp_opt.wscale_ok; + ireq->tstamp_ok = tcp_opt.saw_tstamp; + req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 0dc6a4e5ed4..c690736885b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,45 +16,41 @@ static ctl_table ipv6_table_template[] = { { - .ctl_name = NET_IPV6_ROUTE, .procname = "route", .maxlen = 0, .mode = 0555, .child = ipv6_route_table_template }, { - .ctl_name = NET_IPV6_ICMP, .procname = "icmp", .maxlen = 0, .mode = 0555, .child = ipv6_icmp_table_template }, { - .ctl_name = NET_IPV6_BINDV6ONLY, .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; static ctl_table ipv6_rotable[] = { { - .ctl_name = NET_IPV6_MLD_MAX_MSF, .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; struct ctl_path net_ipv6_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv6", .ctl_name = NET_IPV6, }, + { .procname = "net", }, + { .procname = "ipv6", }, { }, }; EXPORT_SYMBOL_GPL(net_ipv6_ctl_path); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 21d100b68b1..ee9cf62458d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -96,7 +96,7 @@ static void tcp_v6_hash(struct sock *sk) return; } local_bh_disable(); - __inet6_hash(sk); + __inet6_hash(sk, NULL); local_bh_enable(); } } @@ -226,10 +226,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, #endif goto failure; } else { - ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF), - inet->saddr); - ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF), - inet->rcv_saddr); + ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, + &np->rcv_saddr); } return err; @@ -243,8 +242,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&fl.fl6_src, (saddr ? saddr : &np->saddr)); fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->sport; + fl.fl_ip_sport = inet->inet_sport; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, /* set the source address */ ipv6_addr_copy(&np->saddr, saddr); - inet->rcv_saddr = LOOPBACK4_IPV6; + inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); @@ -288,7 +288,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); - inet->dport = usin->sin6_port; + inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); err = inet6_hash_connect(&tcp_death_row, sk); @@ -298,8 +298,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!tp->write_seq) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, - inet->sport, - inet->dport); + inet->inet_sport, + inet->inet_dport); err = tcp_connect(sk); if (err) @@ -311,7 +311,7 @@ late_failure: tcp_set_state(sk, TCP_CLOSE); __sk_dst_reset(sk); failure: - inet->dport = 0; + inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } @@ -383,8 +383,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->dport; - fl.fl_ip_sport = inet->sport; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet->inet_dport; + fl.fl_ip_sport = inet->inet_sport; security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { @@ -460,7 +461,8 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) +static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, + struct request_values *rvp) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -477,6 +479,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.fl6_flowlabel = 0; fl.oif = treq->iif; + fl.mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -497,7 +500,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; - skb = tcp_make_synack(sk, dst, req); + skb = tcp_make_synack(sk, dst, req, rvp); if (skb) { struct tcphdr *th = tcp_hdr(skb); @@ -1159,11 +1162,14 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { + struct tcp_extend_values tmp_ext; + struct tcp_options_received tmp_opt; + u8 *hash_location; + struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_options_received tmp_opt; struct tcp_sock *tp = tcp_sk(sk); - struct request_sock *req = NULL; + struct dst_entry *dst = __sk_dst_get(sk); __u32 isn = TCP_SKB_CB(skb)->when; #ifdef CONFIG_SYN_COOKIES int want_cookie = 0; @@ -1202,8 +1208,52 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, dst); + + if (tmp_opt.cookie_plus > 0 && + tmp_opt.saw_tstamp && + !tp->rx_opt.cookie_out_never && + (sysctl_tcp_cookie_size > 0 || + (tp->cookie_values != NULL && + tp->cookie_values->cookie_desired > 0))) { + u8 *c; + u32 *d; + u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; + int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; + + if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) + goto drop_and_free; + + /* Secret recipe starts with IP addresses */ + d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + *mess++ ^= *d++; + + /* plus variable length Initiator Cookie */ + c = (u8 *)mess; + while (l-- > 0) + *c++ ^= *hash_location++; - tcp_parse_options(skb, &tmp_opt, 0); +#ifdef CONFIG_SYN_COOKIES + want_cookie = 0; /* not our kind of cookie */ +#endif + tmp_ext.cookie_out_never = 0; /* false */ + tmp_ext.cookie_plus = tmp_opt.cookie_plus; + } else if (!tp->rx_opt.cookie_in_always) { + /* redundant indications, but ensure initialization. */ + tmp_ext.cookie_out_never = 1; /* true */ + tmp_ext.cookie_plus = 0; + } else { + goto drop_and_free; + } + tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1236,23 +1286,21 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v6_init_sequence(skb); } - tcp_rsk(req)->snt_isn = isn; security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req)) - goto drop; + if (tcp_v6_send_synack(sk, req, + (struct request_values *)&tmp_ext) || + want_cookie) + goto drop_and_free; - if (!want_cookie) { - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - return 0; - } + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + return 0; +drop_and_free: + reqsk_free(req); drop: - if (req) - reqsk_free(req); - return 0; /* don't send reset */ } @@ -1290,11 +1338,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF), - newinet->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); - ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF), - newinet->saddr); + ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); @@ -1345,6 +1391,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -1431,7 +1478,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(newsk); - newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; + newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; + newinet->inet_rcv_saddr = LOOPBACK4_IPV6; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1448,7 +1496,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } #endif - __inet6_hash(newsk); + __inet6_hash(newsk, NULL); __inet_inherit_port(sk, newsk); return newsk; @@ -1848,7 +1896,7 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sysctl_tcp_reordering; @@ -1864,6 +1912,19 @@ static int tcp_v6_init_sock(struct sock *sk) tp->af_specific = &tcp_sock_ipv6_specific; #endif + /* TCP Cookie Transactions */ + if (sysctl_tcp_cookie_size > 0) { + /* Default, cookies without s_data_payload. */ + tp->cookie_values = + kzalloc(sizeof(*tp->cookie_values), + sk->sk_allocation); + if (tp->cookie_values != NULL) + kref_init(&tp->cookie_values->kref); + } + /* Presumed zeroed, in order of appearance: + * cookie_in_always, cookie_out_never, + * s_data_constant, s_data_in, s_data_out + */ sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; @@ -1931,8 +1992,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) dest = &np->daddr; src = &np->rcv_saddr; - destp = ntohs(inet->dport); - srcp = ntohs(inet->sport); + destp = ntohs(inet->inet_dport); + srcp = ntohs(inet->inet_sport); if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; @@ -2109,7 +2170,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, @@ -2124,12 +2184,17 @@ static int tcpv6_net_init(struct net *net) static void tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); - inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6); +} + +static void tcpv6_net_exit_batch(struct list_head *net_exit_list) +{ + inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { - .init = tcpv6_net_init, - .exit = tcpv6_net_exit, + .init = tcpv6_net_init, + .exit = tcpv6_net_exit, + .exit_batch = tcpv6_net_exit_batch, }; int __init tcpv6_init(void) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index cf538ed5ef6..69ebdbe78c4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,7 +53,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; + __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); @@ -63,8 +63,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) return (!sk2_ipv6only && - (!sk_rcv_saddr || !sk2_rcv_saddr || - sk_rcv_saddr == sk2_rcv_saddr)); + (!sk1_rcv_saddr || !sk2_rcv_saddr || + sk1_rcv_saddr == sk2_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -81,9 +81,33 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 0; } +static unsigned int udp6_portaddr_hash(struct net *net, + const struct in6_addr *addr6, + unsigned int port) +{ + unsigned int hash, mix = net_hash_mix(net); + + if (ipv6_addr_any(addr6)) + hash = jhash_1word(0, mix); + else if (ipv6_addr_v4mapped(addr6)) + hash = jhash_1word(addr6->s6_addr32[3], mix); + else + hash = jhash2(addr6->s6_addr32, 4, mix); + + return hash ^ port; +} + + int udp_v6_get_port(struct sock *sk, unsigned short snum) { - return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); + unsigned int hash2_nulladdr = + udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); + unsigned int hash2_partial = + udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + + /* precompute partial secondary hash */ + udp_sk(sk)->udp_portaddr_hash = hash2_partial; + return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } static inline int compute_score(struct sock *sk, struct net *net, @@ -94,14 +118,14 @@ static inline int compute_score(struct sock *sk, struct net *net, { int score = -1; - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); score = 0; - if (inet->dport) { - if (inet->dport != sport) + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; score++; } @@ -124,6 +148,86 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } +#define SCORE2_MAX (1 + 1 + 1) +static inline int compute_score2(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && + sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score = 0; + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; + } + if (!ipv6_addr_any(&np->daddr)) { + if (!ipv6_addr_equal(&np->daddr, saddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + + +/* called with read_rcu_lock() */ +static struct sock *udp6_lib_lookup2(struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned int hnum, int dif, + struct udp_hslot *hslot2, unsigned int slot2) +{ + struct sock *sk, *result; + struct hlist_nulls_node *node; + int score, badness; + +begin: + result = NULL; + badness = -1; + udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + score = compute_score2(sk, net, saddr, sport, + daddr, hnum, dif); + if (score > badness) { + result = sk; + badness = score; + if (score == SCORE2_MAX) + goto exact_match; + } + } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot2) + goto begin; + + if (result) { +exact_match: + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score2(result, net, saddr, sport, + daddr, hnum, dif) < badness)) { + sock_put(result); + goto begin; + } + } + return result; +} + static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *saddr, __be16 sport, struct in6_addr *daddr, __be16 dport, @@ -132,11 +236,35 @@ static struct sock *__udp6_lib_lookup(struct net *net, struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); - unsigned int hash = udp_hashfn(net, hnum); - struct udp_hslot *hslot = &udptable->hash[hash]; + unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); + struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; int score, badness; rcu_read_lock(); + if (hslot->count > 10) { + hash2 = udp6_portaddr_hash(net, daddr, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + if (hslot->count < hslot2->count) + goto begin; + + result = udp6_lib_lookup2(net, saddr, sport, + daddr, hnum, dif, + hslot2, slot2); + if (!result) { + hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); + slot2 = hash2 & udptable->mask; + hslot2 = &udptable->hash2[slot2]; + if (hslot->count < hslot2->count) + goto begin; + + result = udp6_lib_lookup2(net, &in6addr_any, sport, + daddr, hnum, dif, + hslot2, slot2); + } + rcu_read_unlock(); + return result; + } begin: result = NULL; badness = -1; @@ -152,7 +280,7 @@ begin: * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ - if (get_nulls_value(node) != hash) + if (get_nulls_value(node) != slot) goto begin; if (result) { @@ -252,7 +380,7 @@ try_again: UDP_MIB_INDATAGRAMS, is_udplite); } - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); /* Copy the address. */ if (msg->msg_name) { @@ -265,8 +393,8 @@ try_again: sin6->sin6_scope_id = 0; if (is_udp4) - ipv6_addr_set(&sin6->sin6_addr, 0, 0, - htonl(0xffff), ip_hdr(skb)->saddr); + ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, + &sin6->sin6_addr); else { ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr); @@ -383,18 +511,18 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) { + if (rc == -ENOMEM) UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); - atomic_inc(&sk->sk_drops); - } - goto drop; + goto drop_no_sk_drops_inc; } return 0; drop: + atomic_inc(&sk->sk_drops); +drop_no_sk_drops_inc: UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; @@ -415,10 +543,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, if (!net_eq(sock_net(s), net)) continue; - if (s->sk_hash == num && s->sk_family == PF_INET6) { + if (udp_sk(s)->udp_port_hash == num && + s->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(s); - if (inet->dport) { - if (inet->dport != rmt_port) + if (inet->inet_dport) { + if (inet->inet_dport != rmt_port) continue; } if (!ipv6_addr_any(&np->daddr) && @@ -440,6 +569,33 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, return NULL; } +static void flush_stack(struct sock **stack, unsigned int count, + struct sk_buff *skb, unsigned int final) +{ + unsigned int i; + struct sock *sk; + struct sk_buff *skb1; + + for (i = 0; i < count; i++) { + skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); + + sk = stack[i]; + if (skb1) { + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + } else { + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, IS_UDPLITE(sk)); + } + } +} /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. @@ -448,41 +604,43 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr, struct udp_table *udptable) { - struct sock *sk, *sk2; + struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); - struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; + struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); int dif; + unsigned int i, count = 0; spin_lock(&hslot->lock); sk = sk_nulls_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); - if (!sk) { - kfree_skb(skb); - goto out; - } - - sk2 = sk; - while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, - uh->source, saddr, dif))) { - struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); - if (buff) { - bh_lock_sock(sk2); - if (!sock_owned_by_user(sk2)) - udpv6_queue_rcv_skb(sk2, buff); - else - sk_add_backlog(sk2, buff); - bh_unlock_sock(sk2); + while (sk) { + stack[count++] = sk; + sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, + uh->source, saddr, dif); + if (unlikely(count == ARRAY_SIZE(stack))) { + if (!sk) + break; + flush_stack(stack, count, skb, ~0); + count = 0; } } - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); -out: + /* + * before releasing the lock, we must take reference on sockets + */ + for (i = 0; i < count; i++) + sock_hold(stack[i]); + spin_unlock(&hslot->lock); + + if (count) { + flush_stack(stack, count, skb, count - 1); + + for (i = 0; i < count; i++) + sock_put(stack[i]); + } else { + kfree_skb(skb); + } return 0; } @@ -792,7 +950,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (ipv6_addr_v4mapped(daddr)) { struct sockaddr_in sin; sin.sin_family = AF_INET; - sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; + sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport; sin.sin_addr.s_addr = daddr->s6_addr32[3]; msg->msg_name = &sin; msg->msg_namelen = sizeof(sin); @@ -865,7 +1023,7 @@ do_udp_sendmsg: if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl.fl_ip_dport = inet->dport; + fl.fl_ip_dport = inet->inet_dport; daddr = &np->daddr; fl.fl6_flowlabel = np->flow_label; connected = 1; @@ -877,6 +1035,8 @@ do_udp_sendmsg: if (!fl.oif) fl.oif = np->sticky_pktinfo.ipi6_ifindex; + fl.mark = sk->sk_mark; + if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); @@ -909,7 +1069,7 @@ do_udp_sendmsg: fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl_ip_sport = inet->sport; + fl.fl_ip_sport = inet->inet_sport; /* merge ip6_build_xmit from ip6_output */ if (opt && opt->srcrt) { @@ -1190,10 +1350,10 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket dest = &np->daddr; src = &np->rcv_saddr; - destp = ntohs(inet->dport); - srcp = ntohs(inet->sport); + destp = ntohs(inet->inet_dport); + srcp = ntohs(inet->inet_sport); seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", bucket, src->s6_addr32[0], src->s6_addr32[1], @@ -1282,7 +1442,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .capability =-1, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index d737a27ee01..6ea6938919e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -62,7 +62,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .capability = -1, .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8ec3d45cd1d..7254e3f899a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -309,7 +309,6 @@ static void xfrm6_policy_fini(void) #ifdef CONFIG_SYSCTL static struct ctl_table xfrm6_policy_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm6_gc_thresh", .data = &xfrm6_dst_ops.gc_thresh, .maxlen = sizeof(int), diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 81a95c00e50..438831d3359 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -23,7 +23,7 @@ */ #include <linux/module.h> #include <linux/xfrm.h> -#include <linux/list.h> +#include <linux/rculist.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipv6.h> @@ -36,14 +36,15 @@ * per xfrm_address_t. */ struct xfrm6_tunnel_spi { - struct hlist_node list_byaddr; - struct hlist_node list_byspi; - xfrm_address_t addr; - u32 spi; - atomic_t refcnt; + struct hlist_node list_byaddr; + struct hlist_node list_byspi; + xfrm_address_t addr; + u32 spi; + atomic_t refcnt; + struct rcu_head rcu_head; }; -static DEFINE_RWLOCK(xfrm6_tunnel_spi_lock); +static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock); static u32 xfrm6_tunnel_spi; @@ -107,6 +108,7 @@ static void xfrm6_tunnel_spi_fini(void) if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i])) return; } + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); xfrm6_tunnel_spi_kmem = NULL; } @@ -116,7 +118,7 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos; - hlist_for_each_entry(x6spi, pos, + hlist_for_each_entry_rcu(x6spi, pos, &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], list_byaddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) @@ -131,10 +133,10 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; u32 spi; - read_lock_bh(&xfrm6_tunnel_spi_lock); + rcu_read_lock_bh(); x6spi = __xfrm6_tunnel_spi_lookup(saddr); spi = x6spi ? x6spi->spi : 0; - read_unlock_bh(&xfrm6_tunnel_spi_lock); + rcu_read_unlock_bh(); return htonl(spi); } @@ -185,14 +187,15 @@ alloc_spi: if (!x6spi) goto out; + INIT_RCU_HEAD(&x6spi->rcu_head); memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr)); x6spi->spi = spi; atomic_set(&x6spi->refcnt, 1); - hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]); + hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]); index = xfrm6_tunnel_spi_hash_byaddr(saddr); - hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]); + hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]); out: return spi; } @@ -202,26 +205,32 @@ __be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr) struct xfrm6_tunnel_spi *x6spi; u32 spi; - write_lock_bh(&xfrm6_tunnel_spi_lock); + spin_lock_bh(&xfrm6_tunnel_spi_lock); x6spi = __xfrm6_tunnel_spi_lookup(saddr); if (x6spi) { atomic_inc(&x6spi->refcnt); spi = x6spi->spi; } else spi = __xfrm6_tunnel_alloc_spi(saddr); - write_unlock_bh(&xfrm6_tunnel_spi_lock); + spin_unlock_bh(&xfrm6_tunnel_spi_lock); return htonl(spi); } EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi); +static void x6spi_destroy_rcu(struct rcu_head *head) +{ + kmem_cache_free(xfrm6_tunnel_spi_kmem, + container_of(head, struct xfrm6_tunnel_spi, rcu_head)); +} + void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) { struct xfrm6_tunnel_spi *x6spi; struct hlist_node *pos, *n; - write_lock_bh(&xfrm6_tunnel_spi_lock); + spin_lock_bh(&xfrm6_tunnel_spi_lock); hlist_for_each_entry_safe(x6spi, pos, n, &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)], @@ -229,14 +238,14 @@ void xfrm6_tunnel_free_spi(xfrm_address_t *saddr) { if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) { if (atomic_dec_and_test(&x6spi->refcnt)) { - hlist_del(&x6spi->list_byaddr); - hlist_del(&x6spi->list_byspi); - kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi); + hlist_del_rcu(&x6spi->list_byaddr); + hlist_del_rcu(&x6spi->list_byspi); + call_rcu(&x6spi->rcu_head, x6spi_destroy_rcu); break; } } } - write_unlock_bh(&xfrm6_tunnel_spi_lock); + spin_unlock_bh(&xfrm6_tunnel_spi_lock); } EXPORT_SYMBOL(xfrm6_tunnel_free_spi); diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 66c7a20011f..f9759b54a6d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1298,6 +1298,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname, int opt; int rc = -EINVAL; + lock_kernel(); if (optlen != sizeof(int)) goto out; @@ -1312,6 +1313,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname, ipx_sk(sk)->type = opt; rc = 0; out: + unlock_kernel(); return rc; } @@ -1323,6 +1325,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, int len; int rc = -ENOPROTOOPT; + lock_kernel(); if (!(level == SOL_IPX && optname == IPX_TYPE)) goto out; @@ -1343,6 +1346,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, rc = 0; out: + unlock_kernel(); return rc; } @@ -1352,12 +1356,13 @@ static struct proto ipx_proto = { .obj_size = sizeof(struct ipx_sock), }; -static int ipx_create(struct net *net, struct socket *sock, int protocol) +static int ipx_create(struct net *net, struct socket *sock, int protocol, + int kern) { int rc = -ESOCKTNOSUPPORT; struct sock *sk; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; /* @@ -1390,6 +1395,7 @@ static int ipx_release(struct socket *sock) if (!sk) goto out; + lock_kernel(); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); @@ -1397,6 +1403,7 @@ static int ipx_release(struct socket *sock) sock->sk = NULL; sk_refcnt_debug_release(sk); ipx_destroy_socket(sk); + unlock_kernel(); out: return 0; } @@ -1424,7 +1431,8 @@ static __be16 ipx_first_free_socketnum(struct ipx_interface *intrfc) return htons(socketNum); } -static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +static int __ipx_bind(struct socket *sock, + struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; struct ipx_sock *ipxs = ipx_sk(sk); @@ -1519,6 +1527,17 @@ out: return rc; } +static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + int rc; + + lock_kernel(); + rc = __ipx_bind(sock, uaddr, addr_len); + unlock_kernel(); + + return rc; +} + static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { @@ -1531,6 +1550,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; + lock_kernel(); if (addr_len != sizeof(*addr)) goto out; addr = (struct sockaddr_ipx *)uaddr; @@ -1550,7 +1570,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, IPX_NODE_LEN); #endif /* CONFIG_IPX_INTERN */ - rc = ipx_bind(sock, (struct sockaddr *)&uaddr, + rc = __ipx_bind(sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); if (rc) goto out; @@ -1577,6 +1597,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, ipxrtr_put(rt); rc = 0; out: + unlock_kernel(); return rc; } @@ -1592,6 +1613,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, *uaddr_len = sizeof(struct sockaddr_ipx); + lock_kernel(); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) @@ -1626,6 +1648,19 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, rc = 0; out: + unlock_kernel(); + return rc; +} + +static unsigned int ipx_datagram_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + int rc; + + lock_kernel(); + rc = datagram_poll(file, sock, wait); + unlock_kernel(); + return rc; } @@ -1700,6 +1735,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, int rc = -EINVAL; int flags = msg->msg_flags; + lock_kernel(); /* Socket gets bound below anyway */ /* if (sk->sk_zapped) return -EIO; */ /* Socket not bound */ @@ -1723,7 +1759,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, memcpy(uaddr.sipx_node, ipxs->intrfc->if_node, IPX_NODE_LEN); #endif - rc = ipx_bind(sock, (struct sockaddr *)&uaddr, + rc = __ipx_bind(sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); if (rc) goto out; @@ -1751,6 +1787,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, if (rc >= 0) rc = len; out: + unlock_kernel(); return rc; } @@ -1765,6 +1802,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int copied, rc; + lock_kernel(); /* put the autobinding in */ if (!ipxs->port) { struct sockaddr_ipx uaddr; @@ -1779,7 +1817,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, memcpy(uaddr.sipx_node, ipxs->intrfc->if_node, IPX_NODE_LEN); #endif /* CONFIG_IPX_INTERN */ - rc = ipx_bind(sock, (struct sockaddr *)&uaddr, + rc = __ipx_bind(sock, (struct sockaddr *)&uaddr, sizeof(struct sockaddr_ipx)); if (rc) goto out; @@ -1823,6 +1861,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk, skb); out: + unlock_kernel(); return rc; } @@ -1834,6 +1873,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sock *sk = sock->sk; void __user *argp = (void __user *)arg; + lock_kernel(); switch (cmd) { case TIOCOUTQ: amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); @@ -1896,6 +1936,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -ENOIOCTLCMD; break; } + unlock_kernel(); return rc; } @@ -1927,13 +1968,13 @@ static int ipx_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long * Socket family declarations */ -static struct net_proto_family ipx_family_ops = { +static const struct net_proto_family ipx_family_ops = { .family = PF_IPX, .create = ipx_create, .owner = THIS_MODULE, }; -static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { +static const struct proto_ops ipx_dgram_ops = { .family = PF_IPX, .owner = THIS_MODULE, .release = ipx_release, @@ -1942,7 +1983,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = ipx_getname, - .poll = datagram_poll, + .poll = ipx_datagram_poll, .ioctl = ipx_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ipx_compat_ioctl, @@ -1957,8 +1998,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { .sendpage = sock_no_sendpage, }; -SOCKOPS_WRAP(ipx_dgram, PF_IPX); - static struct packet_type ipx_8023_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_3), .func = ipx_rcv, diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 633fcab3558..bd6dca00fb8 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -18,19 +18,18 @@ extern int sysctl_ipx_pprop_broadcasting; static struct ctl_table ipx_table[] = { { - .ctl_name = NET_IPX_PPROP_BROADCASTING, .procname = "ipx_pprop_broadcasting", .data = &sysctl_ipx_pprop_broadcasting, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { 0 }, + { }, }; static struct ctl_path ipx_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipx", .ctl_name = NET_IPX, }, + { .procname = "net", }, + { .procname = "ipx", }, { } }; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index dd35641835f..10093aab617 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -61,7 +61,7 @@ #include <net/irda/af_irda.h> -static int irda_create(struct net *net, struct socket *sock, int protocol); +static int irda_create(struct net *net, struct socket *sock, int protocol, int kern); static const struct proto_ops irda_stream_ops; static const struct proto_ops irda_seqpacket_ops; @@ -714,11 +714,14 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_irda saddr; struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); + int err; + lock_kernel(); memset(&saddr, 0, sizeof(saddr)); if (peer) { + err = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; + goto out; saddr.sir_family = AF_IRDA; saddr.sir_lsap_sel = self->dtsap_sel; @@ -735,8 +738,10 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr, /* uaddr_len come to us uninitialised */ *uaddr_len = sizeof (struct sockaddr_irda); memcpy(uaddr, &saddr, *uaddr_len); - - return 0; + err = 0; +out: + unlock_kernel(); + return err; } /* @@ -748,21 +753,25 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr, static int irda_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; + int err = -EOPNOTSUPP; IRDA_DEBUG(2, "%s()\n", __func__); + lock_kernel(); if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && (sk->sk_type != SOCK_DGRAM)) - return -EOPNOTSUPP; + goto out; if (sk->sk_state != TCP_LISTEN) { sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; - return 0; + err = 0; } +out: + unlock_kernel(); - return -EOPNOTSUPP; + return err; } /* @@ -783,36 +792,40 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len != sizeof(struct sockaddr_irda)) return -EINVAL; + lock_kernel(); #ifdef CONFIG_IRDA_ULTRA /* Special care for Ultra sockets */ if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA)) { self->pid = addr->sir_lsap_sel; + err = -EOPNOTSUPP; if (self->pid & 0x80) { IRDA_DEBUG(0, "%s(), extension in PID not supp!\n", __func__); - return -EOPNOTSUPP; + goto out; } err = irda_open_lsap(self, self->pid); if (err < 0) - return err; + goto out; /* Pretend we are connected */ sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; + err = 0; - return 0; + goto out; } #endif /* CONFIG_IRDA_ULTRA */ self->ias_obj = irias_new_object(addr->sir_name, jiffies); + err = -ENOMEM; if (self->ias_obj == NULL) - return -ENOMEM; + goto out; err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); if (err < 0) { kfree(self->ias_obj->name); kfree(self->ias_obj); - return err; + goto out; } /* Register with LM-IAS */ @@ -820,7 +833,10 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) self->stsap_sel, IAS_KERNEL_ATTR); irias_insert_object(self->ias_obj); - return 0; + err = 0; +out: + unlock_kernel(); + return err; } /* @@ -839,22 +855,26 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) IRDA_DEBUG(2, "%s()\n", __func__); - err = irda_create(sock_net(sk), newsock, sk->sk_protocol); + lock_kernel(); + err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); if (err) - return err; + goto out; + err = -EINVAL; if (sock->state != SS_UNCONNECTED) - return -EINVAL; + goto out; if ((sk = sock->sk) == NULL) - return -EINVAL; + goto out; + err = -EOPNOTSUPP; if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && (sk->sk_type != SOCK_DGRAM)) - return -EOPNOTSUPP; + goto out; + err = -EINVAL; if (sk->sk_state != TCP_LISTEN) - return -EINVAL; + goto out; /* * The read queue this time is holding sockets ready to use @@ -875,18 +895,20 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) break; /* Non blocking operation */ + err = -EWOULDBLOCK; if (flags & O_NONBLOCK) - return -EWOULDBLOCK; + goto out; err = wait_event_interruptible(*(sk->sk_sleep), skb_peek(&sk->sk_receive_queue)); if (err) - return err; + goto out; } newsk = newsock->sk; + err = -EIO; if (newsk == NULL) - return -EIO; + goto out; newsk->sk_state = TCP_ESTABLISHED; @@ -894,10 +916,11 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Now attach up the new socket */ new->tsap = irttp_dup(self->tsap, new); + err = -EPERM; /* value does not seem to make sense. -arnd */ if (!new->tsap) { IRDA_DEBUG(0, "%s(), dup failed!\n", __func__); kfree_skb(skb); - return -1; + goto out; } new->stsap_sel = new->tsap->stsap_sel; @@ -921,8 +944,10 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) newsock->state = SS_CONNECTED; irda_connect_response(new); - - return 0; + err = 0; +out: + unlock_kernel(); + return err; } /* @@ -955,28 +980,34 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, IRDA_DEBUG(2, "%s(%p)\n", __func__, self); + lock_kernel(); /* Don't allow connect for Ultra sockets */ + err = -ESOCKTNOSUPPORT; if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA)) - return -ESOCKTNOSUPPORT; + goto out; if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { sock->state = SS_CONNECTED; - return 0; /* Connect completed during a ERESTARTSYS event */ + err = 0; + goto out; /* Connect completed during a ERESTARTSYS event */ } if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) { sock->state = SS_UNCONNECTED; - return -ECONNREFUSED; + err = -ECONNREFUSED; + goto out; } + err = -EISCONN; /* No reconnect on a seqpacket socket */ if (sk->sk_state == TCP_ESTABLISHED) - return -EISCONN; /* No reconnect on a seqpacket socket */ + goto out; sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; + err = -EINVAL; if (addr_len != sizeof(struct sockaddr_irda)) - return -EINVAL; + goto out; /* Check if user supplied any destination device address */ if ((!addr->sir_addr) || (addr->sir_addr == DEV_ADDR_ANY)) { @@ -984,7 +1015,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, err = irda_discover_daddr_and_lsap_sel(self, addr->sir_name); if (err) { IRDA_DEBUG(0, "%s(), auto-connect failed!\n", __func__); - return err; + goto out; } } else { /* Use the one provided by the user */ @@ -1000,7 +1031,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, err = irda_find_lsap_sel(self, addr->sir_name); if (err) { IRDA_DEBUG(0, "%s(), connect failed!\n", __func__); - return err; + goto out; } } else { /* Directly connect to the remote LSAP @@ -1025,29 +1056,35 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, self->max_sdu_size_rx, NULL); if (err) { IRDA_DEBUG(0, "%s(), connect failed!\n", __func__); - return err; + goto out; } /* Now the loop */ + err = -EINPROGRESS; if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) - return -EINPROGRESS; + goto out; + err = -ERESTARTSYS; if (wait_event_interruptible(*(sk->sk_sleep), (sk->sk_state != TCP_SYN_SENT))) - return -ERESTARTSYS; + goto out; if (sk->sk_state != TCP_ESTABLISHED) { sock->state = SS_UNCONNECTED; err = sock_error(sk); - return err? err : -ECONNRESET; + if (!err) + err = -ECONNRESET; + goto out; } sock->state = SS_CONNECTED; /* At this point, IrLMP has assigned our source address */ self->saddr = irttp_get_saddr(self->tsap); - - return 0; + err = 0; +out: + unlock_kernel(); + return err; } static struct proto irda_proto = { @@ -1062,7 +1099,8 @@ static struct proto irda_proto = { * Create IrDA socket * */ -static int irda_create(struct net *net, struct socket *sock, int protocol) +static int irda_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct irda_sock *self; @@ -1192,6 +1230,7 @@ static int irda_release(struct socket *sock) if (sk == NULL) return 0; + lock_kernel(); lock_sock(sk); sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; @@ -1210,6 +1249,7 @@ static int irda_release(struct socket *sock) /* Destroy networking socket if we are the last reference on it, * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */ sock_put(sk); + unlock_kernel(); /* Notes on socket locking and deallocation... - Jean II * In theory we should put pairs of sock_hold() / sock_put() to @@ -1257,28 +1297,37 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); + lock_kernel(); /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | - MSG_NOSIGNAL)) - return -EINVAL; + MSG_NOSIGNAL)) { + err = -EINVAL; + goto out; + } if (sk->sk_shutdown & SEND_SHUTDOWN) goto out_err; - if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; + if (sk->sk_state != TCP_ESTABLISHED) { + err = -ENOTCONN; + goto out; + } self = irda_sk(sk); /* Check if IrTTP is wants us to slow down */ if (wait_event_interruptible(*(sk->sk_sleep), - (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) - return -ERESTARTSYS; + (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) { + err = -ERESTARTSYS; + goto out; + } /* Check if we are still connected */ - if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; + if (sk->sk_state != TCP_ESTABLISHED) { + err = -ENOTCONN; + goto out; + } /* Check that we don't send out too big frames */ if (len > self->max_data_size) { @@ -1310,11 +1359,16 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err); goto out_err; } + + unlock_kernel(); /* Tell client how much data we actually sent */ return len; - out_err: - return sk_stream_error(sk, msg->msg_flags, err); +out_err: + err = sk_stream_error(sk, msg->msg_flags, err); +out: + unlock_kernel(); + return err; } @@ -1335,13 +1389,14 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); + lock_kernel(); if ((err = sock_error(sk)) < 0) - return err; + goto out; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) - return err; + goto out; skb_reset_transport_header(skb); copied = skb->len; @@ -1369,8 +1424,12 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, irttp_flow_request(self->tsap, FLOW_START); } } - + unlock_kernel(); return copied; + +out: + unlock_kernel(); + return err; } /* @@ -1388,15 +1447,19 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(3, "%s()\n", __func__); + lock_kernel(); if ((err = sock_error(sk)) < 0) - return err; + goto out; + err = -EINVAL; if (sock->flags & __SO_ACCEPTCON) - return(-EINVAL); + goto out; + err =-EOPNOTSUPP; if (flags & MSG_OOB) - return -EOPNOTSUPP; + goto out; + err = 0; target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); @@ -1408,7 +1471,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, if (skb == NULL) { DEFINE_WAIT(wait); - int ret = 0; + err = 0; if (copied >= target) break; @@ -1418,25 +1481,25 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, /* * POSIX 1003.1g mandates this order. */ - ret = sock_error(sk); - if (ret) + err = sock_error(sk); + if (err) ; else if (sk->sk_shutdown & RCV_SHUTDOWN) ; else if (noblock) - ret = -EAGAIN; + err = -EAGAIN; else if (signal_pending(current)) - ret = sock_intr_errno(timeo); + err = sock_intr_errno(timeo); else if (sk->sk_state != TCP_ESTABLISHED) - ret = -ENOTCONN; + err = -ENOTCONN; else if (skb_peek(&sk->sk_receive_queue) == NULL) /* Wait process until data arrives */ schedule(); finish_wait(sk->sk_sleep, &wait); - if (ret) - return ret; + if (err) + goto out; if (sk->sk_shutdown & RCV_SHUTDOWN) break; @@ -1489,7 +1552,9 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, } } - return copied; +out: + unlock_kernel(); + return err ? : copied; } /* @@ -1507,18 +1572,23 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int err; + lock_kernel(); + IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); + err = -EINVAL; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) - return -EINVAL; + goto out; if (sk->sk_shutdown & SEND_SHUTDOWN) { send_sig(SIGPIPE, current, 0); - return -EPIPE; + err = -EPIPE; + goto out; } + err = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; + goto out; self = irda_sk(sk); @@ -1535,8 +1605,9 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, skb = sock_alloc_send_skb(sk, len + self->max_header_size, msg->msg_flags & MSG_DONTWAIT, &err); + err = -ENOBUFS; if (!skb) - return -ENOBUFS; + goto out; skb_reserve(skb, self->max_header_size); skb_reset_transport_header(skb); @@ -1546,7 +1617,7 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len); if (err) { kfree_skb(skb); - return err; + goto out; } /* @@ -1556,9 +1627,13 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock, err = irttp_udata_request(self->tsap, skb); if (err) { IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err); - return err; + goto out; } + unlock_kernel(); return len; +out: + unlock_kernel(); + return err; } /* @@ -1580,12 +1655,15 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); + lock_kernel(); + err = -EINVAL; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) - return -EINVAL; + goto out; + err = -EPIPE; if (sk->sk_shutdown & SEND_SHUTDOWN) { send_sig(SIGPIPE, current, 0); - return -EPIPE; + goto out; } self = irda_sk(sk); @@ -1593,16 +1671,18 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, /* Check if an address was specified with sendto. Jean II */ if (msg->msg_name) { struct sockaddr_irda *addr = (struct sockaddr_irda *) msg->msg_name; + err = -EINVAL; /* Check address, extract pid. Jean II */ if (msg->msg_namelen < sizeof(*addr)) - return -EINVAL; + goto out; if (addr->sir_family != AF_IRDA) - return -EINVAL; + goto out; pid = addr->sir_lsap_sel; if (pid & 0x80) { IRDA_DEBUG(0, "%s(), extension in PID not supp!\n", __func__); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } } else { /* Check that the socket is properly bound to an Ultra @@ -1611,7 +1691,8 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, (sk->sk_state != TCP_ESTABLISHED)) { IRDA_DEBUG(0, "%s(), socket not bound to Ultra PID.\n", __func__); - return -ENOTCONN; + err = -ENOTCONN; + goto out; } /* Use PID from socket */ bound = 1; @@ -1630,8 +1711,9 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, skb = sock_alloc_send_skb(sk, len + self->max_header_size, msg->msg_flags & MSG_DONTWAIT, &err); + err = -ENOBUFS; if (!skb) - return -ENOBUFS; + goto out; skb_reserve(skb, self->max_header_size); skb_reset_transport_header(skb); @@ -1641,16 +1723,16 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len); if (err) { kfree_skb(skb); - return err; + goto out; } err = irlmp_connless_data_request((bound ? self->lsap : NULL), skb, pid); - if (err) { + if (err) IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err); - return err; - } - return len; +out: + unlock_kernel(); + return err ? : len; } #endif /* CONFIG_IRDA_ULTRA */ @@ -1664,6 +1746,8 @@ static int irda_shutdown(struct socket *sock, int how) IRDA_DEBUG(1, "%s(%p)\n", __func__, self); + lock_kernel(); + sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); @@ -1684,6 +1768,8 @@ static int irda_shutdown(struct socket *sock, int how) self->daddr = DEV_ADDR_ANY; /* Until we get re-connected */ self->saddr = 0x0; /* so IrLMP assign us any link */ + unlock_kernel(); + return 0; } @@ -1699,6 +1785,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); + lock_kernel(); poll_wait(file, sk->sk_sleep, wait); mask = 0; @@ -1746,18 +1833,34 @@ static unsigned int irda_poll(struct file * file, struct socket *sock, default: break; } + unlock_kernel(); return mask; } +static unsigned int irda_datagram_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + int err; + + lock_kernel(); + err = datagram_poll(file, sock, wait); + unlock_kernel(); + + return err; +} + /* * Function irda_ioctl (sock, cmd, arg) */ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; + int err; IRDA_DEBUG(4, "%s(), cmd=%#x\n", __func__, cmd); + lock_kernel(); + err = -EINVAL; switch (cmd) { case TIOCOUTQ: { long amount; @@ -1765,9 +1868,8 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; - if (put_user(amount, (unsigned int __user *)arg)) - return -EFAULT; - return 0; + err = put_user(amount, (unsigned int __user *)arg); + break; } case TIOCINQ: { @@ -1776,15 +1878,14 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) /* These two are safe on a single CPU system as only user tasks fiddle here */ if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; - if (put_user(amount, (unsigned int __user *)arg)) - return -EFAULT; - return 0; + err = put_user(amount, (unsigned int __user *)arg); + break; } case SIOCGSTAMP: if (sk != NULL) - return sock_get_timestamp(sk, (struct timeval __user *)arg); - return -EINVAL; + err = sock_get_timestamp(sk, (struct timeval __user *)arg); + break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -1796,14 +1897,14 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFNETMASK: case SIOCGIFMETRIC: case SIOCSIFMETRIC: - return -EINVAL; + break; default: IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __func__); - return -ENOIOCTLCMD; + err = -ENOIOCTLCMD; } + unlock_kernel(); - /*NOTREACHED*/ - return 0; + return err; } #ifdef CONFIG_COMPAT @@ -1825,7 +1926,7 @@ static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon * Set some options for the socket * */ -static int irda_setsockopt(struct socket *sock, int level, int optname, +static int __irda_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; @@ -2083,6 +2184,18 @@ static int irda_setsockopt(struct socket *sock, int level, int optname, return 0; } +static int irda_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + int err; + + lock_kernel(); + err = __irda_setsockopt(sock, level, optname, optval, optlen); + unlock_kernel(); + + return err; +} + /* * Function irda_extract_ias_value(ias_opt, ias_value) * @@ -2135,7 +2248,7 @@ static int irda_extract_ias_value(struct irda_ias_set *ias_opt, /* * Function irda_getsockopt (sock, level, optname, optval, optlen) */ -static int irda_getsockopt(struct socket *sock, int level, int optname, +static int __irda_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; @@ -2463,13 +2576,25 @@ bed: return 0; } -static struct net_proto_family irda_family_ops = { +static int irda_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + int err; + + lock_kernel(); + err = __irda_getsockopt(sock, level, optname, optval, optlen); + unlock_kernel(); + + return err; +} + +static const struct net_proto_family irda_family_ops = { .family = PF_IRDA, .create = irda_create, .owner = THIS_MODULE, }; -static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { +static const struct proto_ops irda_stream_ops = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2493,7 +2618,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = { .sendpage = sock_no_sendpage, }; -static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { +static const struct proto_ops irda_seqpacket_ops = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2502,7 +2627,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { .socketpair = sock_no_socketpair, .accept = irda_accept, .getname = irda_getname, - .poll = datagram_poll, + .poll = irda_datagram_poll, .ioctl = irda_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = irda_compat_ioctl, @@ -2517,7 +2642,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = { .sendpage = sock_no_sendpage, }; -static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { +static const struct proto_ops irda_dgram_ops = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2526,7 +2651,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { .socketpair = sock_no_socketpair, .accept = irda_accept, .getname = irda_getname, - .poll = datagram_poll, + .poll = irda_datagram_poll, .ioctl = irda_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = irda_compat_ioctl, @@ -2542,7 +2667,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = { }; #ifdef CONFIG_IRDA_ULTRA -static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { +static const struct proto_ops irda_ultra_ops = { .family = PF_IRDA, .owner = THIS_MODULE, .release = irda_release, @@ -2551,7 +2676,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = irda_getname, - .poll = datagram_poll, + .poll = irda_datagram_poll, .ioctl = irda_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = irda_compat_ioctl, @@ -2567,13 +2692,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = { }; #endif /* CONFIG_IRDA_ULTRA */ -SOCKOPS_WRAP(irda_stream, PF_IRDA); -SOCKOPS_WRAP(irda_seqpacket, PF_IRDA); -SOCKOPS_WRAP(irda_dgram, PF_IRDA); -#ifdef CONFIG_IRDA_ULTRA -SOCKOPS_WRAP(irda_ultra, PF_IRDA); -#endif /* CONFIG_IRDA_ULTRA */ - /* * Function irsock_init (pro) * diff --git a/net/irda/irlap.c b/net/irda/irlap.c index 356e65b1dc4..783c5f367d2 100644 --- a/net/irda/irlap.c +++ b/net/irda/irlap.c @@ -450,10 +450,10 @@ void irlap_disconnect_request(struct irlap_cb *self) /* Check if we are in the right state for disconnecting */ switch (self->state) { - case LAP_XMIT_P: /* FALLTROUGH */ - case LAP_XMIT_S: /* FALLTROUGH */ - case LAP_CONN: /* FALLTROUGH */ - case LAP_RESET_WAIT: /* FALLTROUGH */ + case LAP_XMIT_P: /* FALLTHROUGH */ + case LAP_XMIT_S: /* FALLTHROUGH */ + case LAP_CONN: /* FALLTHROUGH */ + case LAP_RESET_WAIT: /* FALLTHROUGH */ case LAP_RESET_CHECK: irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL); break; @@ -485,9 +485,9 @@ void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason) IRDA_DEBUG(1, "%s(), Sending reset request!\n", __func__); irlap_do_event(self, RESET_REQUEST, NULL, NULL); break; - case LAP_NO_RESPONSE: /* FALLTROUGH */ - case LAP_DISC_INDICATION: /* FALLTROUGH */ - case LAP_FOUND_NONE: /* FALLTROUGH */ + case LAP_NO_RESPONSE: /* FALLTHROUGH */ + case LAP_DISC_INDICATION: /* FALLTHROUGH */ + case LAP_FOUND_NONE: /* FALLTHROUGH */ case LAP_MEDIA_BUSY: irlmp_link_disconnect_indication(self->notify.instance, self, reason, NULL); diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c index c5c51959e3c..94a9884d714 100644 --- a/net/irda/irlap_event.c +++ b/net/irda/irlap_event.c @@ -1741,7 +1741,7 @@ static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event, * Function irlap_state_xmit_s (event, skb, info) * * XMIT_S, The secondary station has been given the right to transmit, - * and we therefor do not expect to receive any transmissions from other + * and we therefore do not expect to receive any transmissions from other * stations. */ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event, diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 7bf5b913828..0e7d8bde145 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -105,7 +105,7 @@ int __init irlmp_init(void) init_timer(&irlmp->discovery_timer); - /* Do discovery every 3 seconds, conditionaly */ + /* Do discovery every 3 seconds, conditionally */ if (sysctl_discovery) irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ); @@ -1842,7 +1842,7 @@ LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason) reason = LM_CONNECT_FAILURE; break; default: - IRDA_DEBUG(1, "%s(), Unknow IrLAP disconnect reason %d!\n", + IRDA_DEBUG(1, "%s(), Unknown IrLAP disconnect reason %d!\n", __func__, lap_reason); reason = LM_LAP_DISCONNECT; break; diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index b001c361ad3..4300df35d37 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -249,6 +249,7 @@ #include <linux/poll.h> #include <linux/capability.h> #include <linux/ctype.h> /* isspace() */ +#include <linux/string.h> /* skip_spaces() */ #include <asm/uaccess.h> #include <linux/init.h> diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index cccc2e93234..b26dee784ab 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -1403,8 +1403,8 @@ irnet_connect_indication(void * instance, /* Socket already connecting ? On primary ? */ if(0 #ifdef ALLOW_SIMULT_CONNECT - || ((irttp_is_primary(server->tsap) == 1) /* primary */ - && (test_and_clear_bit(0, &new->ttp_connect))) + || ((irttp_is_primary(server->tsap) == 1) && /* primary */ + (test_and_clear_bit(0, &new->ttp_connect))) #endif /* ALLOW_SIMULT_CONNECT */ ) { diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 7dea882dbb7..156020d138b 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -76,9 +76,8 @@ irnet_ctrl_write(irnet_socket * ap, /* Look at the next command */ start = next; - /* Scrap whitespaces before the command */ - while(isspace(*start)) - start++; + /* Scrap whitespaces before the command */ + start = skip_spaces(start); /* ',' is our command separator */ next = strchr(start, ','); @@ -133,8 +132,7 @@ irnet_ctrl_write(irnet_socket * ap, char * endp; /* Scrap whitespaces before the command */ - while(isspace(*begp)) - begp++; + begp = skip_spaces(begp); /* Convert argument to a number (last arg is the base) */ addr = simple_strtoul(begp, &endp, 16); diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index 5c86567e5a7..d0b70dadf73 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -113,26 +113,21 @@ static int do_discovery(ctl_table *table, int write, /* One file */ static ctl_table irda_table[] = { { - .ctl_name = NET_IRDA_DISCOVERY, .procname = "discovery", .data = &sysctl_discovery, .maxlen = sizeof(int), .mode = 0644, .proc_handler = do_discovery, - .strategy = sysctl_intvec }, { - .ctl_name = NET_IRDA_DEVNAME, .procname = "devname", .data = sysctl_devname, .maxlen = 65, .mode = 0644, .proc_handler = do_devname, - .strategy = sysctl_string }, #ifdef CONFIG_IRDA_DEBUG { - .ctl_name = NET_IRDA_DEBUG, .procname = "debug", .data = &irda_debug, .maxlen = sizeof(int), @@ -142,7 +137,6 @@ static ctl_table irda_table[] = { #endif #ifdef CONFIG_IRDA_FAST_RR { - .ctl_name = NET_IRDA_FAST_POLL, .procname = "fast_poll_increase", .data = &sysctl_fast_poll_increase, .maxlen = sizeof(int), @@ -151,18 +145,15 @@ static ctl_table irda_table[] = { }, #endif { - .ctl_name = NET_IRDA_DISCOVERY_SLOTS, .procname = "discovery_slots", .data = &sysctl_discovery_slots, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_discovery_slots, .extra2 = &max_discovery_slots }, { - .ctl_name = NET_IRDA_DISCOVERY_TIMEOUT, .procname = "discovery_timeout", .data = &sysctl_discovery_timeout, .maxlen = sizeof(int), @@ -170,99 +161,83 @@ static ctl_table irda_table[] = { .proc_handler = proc_dointvec }, { - .ctl_name = NET_IRDA_SLOT_TIMEOUT, .procname = "slot_timeout", .data = &sysctl_slot_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_slot_timeout, .extra2 = &max_slot_timeout }, { - .ctl_name = NET_IRDA_MAX_BAUD_RATE, .procname = "max_baud_rate", .data = &sysctl_max_baud_rate, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_baud_rate, .extra2 = &max_max_baud_rate }, { - .ctl_name = NET_IRDA_MIN_TX_TURN_TIME, .procname = "min_tx_turn_time", .data = &sysctl_min_tx_turn_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_min_tx_turn_time, .extra2 = &max_min_tx_turn_time }, { - .ctl_name = NET_IRDA_MAX_TX_DATA_SIZE, .procname = "max_tx_data_size", .data = &sysctl_max_tx_data_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_tx_data_size, .extra2 = &max_max_tx_data_size }, { - .ctl_name = NET_IRDA_MAX_TX_WINDOW, .procname = "max_tx_window", .data = &sysctl_max_tx_window, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_tx_window, .extra2 = &max_max_tx_window }, { - .ctl_name = NET_IRDA_MAX_NOREPLY_TIME, .procname = "max_noreply_time", .data = &sysctl_max_noreply_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_max_noreply_time, .extra2 = &max_max_noreply_time }, { - .ctl_name = NET_IRDA_WARN_NOREPLY_TIME, .procname = "warn_noreply_time", .data = &sysctl_warn_noreply_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_warn_noreply_time, .extra2 = &max_warn_noreply_time }, { - .ctl_name = NET_IRDA_LAP_KEEPALIVE_TIME, .procname = "lap_keepalive_time", .data = &sysctl_lap_keepalive_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_lap_keepalive_time, .extra2 = &max_lap_keepalive_time }, - { .ctl_name = 0 } + { } }; static struct ctl_path irda_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "irda", .ctl_name = NET_IRDA, }, + { .procname = "net", }, + { .procname = "irda", }, { } }; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index bada1b9c670..c18286a2167 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -221,7 +221,7 @@ static int afiucv_pm_restore_thaw(struct device *dev) return 0; } -static struct dev_pm_ops afiucv_pm_ops = { +static const struct dev_pm_ops afiucv_pm_ops = { .prepare = afiucv_pm_prepare, .complete = afiucv_pm_complete, .freeze = afiucv_pm_freeze, @@ -428,7 +428,6 @@ static void iucv_sock_close(struct sock *sk) break; default: - sock_set_flag(sk, SOCK_ZAPPED); /* nothing to do here */ break; } @@ -482,7 +481,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) } /* Create an IUCV socket */ -static int iucv_sock_create(struct net *net, struct socket *sock, int protocol) +static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -536,7 +536,7 @@ void iucv_accept_enqueue(struct sock *parent, struct sock *sk) list_add_tail(&iucv_sk(sk)->accept_q, &par->accept_q); spin_unlock_irqrestore(&par->accept_q_lock, flags); iucv_sk(sk)->parent = parent; - parent->sk_ack_backlog++; + sk_acceptq_added(parent); } void iucv_accept_unlink(struct sock *sk) @@ -547,7 +547,7 @@ void iucv_accept_unlink(struct sock *sk) spin_lock_irqsave(&par->accept_q_lock, flags); list_del_init(&iucv_sk(sk)->accept_q); spin_unlock_irqrestore(&par->accept_q_lock, flags); - iucv_sk(sk)->parent->sk_ack_backlog--; + sk_acceptq_removed(iucv_sk(sk)->parent); iucv_sk(sk)->parent = NULL; sock_put(sk); } @@ -1715,7 +1715,7 @@ static const struct proto_ops iucv_sock_ops = { .getsockopt = iucv_sock_getsockopt, }; -static struct net_proto_family iucv_sock_family_ops = { +static const struct net_proto_family iucv_sock_family_ops = { .family = AF_IUCV, .owner = THIS_MODULE, .create = iucv_sock_create, diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 3973d0e61e5..fd8b28361a6 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -93,7 +93,7 @@ static int iucv_pm_freeze(struct device *); static int iucv_pm_thaw(struct device *); static int iucv_pm_restore(struct device *); -static struct dev_pm_ops iucv_pm_ops = { +static const struct dev_pm_ops iucv_pm_ops = { .prepare = iucv_pm_prepare, .complete = iucv_pm_complete, .freeze = iucv_pm_freeze, @@ -1768,7 +1768,6 @@ static void iucv_tasklet_fn(unsigned long ignored) */ static void iucv_work_fn(struct work_struct *work) { - typedef void iucv_irq_fn(struct iucv_irq_data *); LIST_HEAD(work_queue); struct iucv_irq_list *p, *n; @@ -1878,14 +1877,25 @@ int iucv_path_table_empty(void) static int iucv_pm_freeze(struct device *dev) { int cpu; + struct iucv_irq_list *p, *n; int rc = 0; #ifdef CONFIG_PM_DEBUG printk(KERN_WARNING "iucv_pm_freeze\n"); #endif + if (iucv_pm_state != IUCV_PM_FREEZING) { + for_each_cpu_mask_nr(cpu, iucv_irq_cpumask) + smp_call_function_single(cpu, iucv_block_cpu_almost, + NULL, 1); + cancel_work_sync(&iucv_work); + list_for_each_entry_safe(p, n, &iucv_work_queue, list) { + list_del_init(&p->list); + iucv_sever_pathid(p->data.ippathid, + iucv_error_no_listener); + kfree(p); + } + } iucv_pm_state = IUCV_PM_FREEZING; - for_each_cpu_mask_nr(cpu, iucv_irq_cpumask) - smp_call_function_single(cpu, iucv_block_cpu_almost, NULL, 1); if (dev->driver && dev->driver->pm && dev->driver->pm->freeze) rc = dev->driver->pm->freeze(dev); if (iucv_path_table_empty()) diff --git a/net/key/af_key.c b/net/key/af_key.c index 4e98193dfa0..76fa6fef647 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -35,7 +35,7 @@ #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) -static int pfkey_net_id; +static int pfkey_net_id __read_mostly; struct netns_pfkey { /* List of all pfkey sockets. */ struct hlist_head table; @@ -177,7 +177,8 @@ static struct proto key_proto = { .obj_size = sizeof(struct pfkey_sock), }; -static int pfkey_create(struct net *net, struct socket *sock, int protocol) +static int pfkey_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; @@ -1192,6 +1193,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->aalg->alg_key_len = key->sadb_key_bits; memcpy(x->aalg->alg_key, key+1, keysize); } + x->aalg->alg_trunc_len = a->uinfo.auth.icv_truncbits; x->props.aalgo = sa->sadb_sa_auth; /* x->algo.flags = sa->sadb_sa_flags; */ } @@ -3606,7 +3608,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (err) goto out_free; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; @@ -3644,7 +3646,7 @@ static const struct proto_ops pfkey_ops = { .recvmsg = pfkey_recvmsg, }; -static struct net_proto_family pfkey_family_ops = { +static const struct net_proto_family pfkey_family_ops = { .family = PF_KEY, .create = pfkey_create, .owner = THIS_MODULE, @@ -3764,28 +3766,14 @@ static struct xfrm_mgr pfkeyv2_mgr = static int __net_init pfkey_net_init(struct net *net) { - struct netns_pfkey *net_pfkey; + struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); int rv; - net_pfkey = kmalloc(sizeof(struct netns_pfkey), GFP_KERNEL); - if (!net_pfkey) { - rv = -ENOMEM; - goto out_kmalloc; - } INIT_HLIST_HEAD(&net_pfkey->table); atomic_set(&net_pfkey->socks_nr, 0); - rv = net_assign_generic(net, pfkey_net_id, net_pfkey); - if (rv < 0) - goto out_assign; + rv = pfkey_init_proc(net); - if (rv < 0) - goto out_proc; - return 0; -out_proc: -out_assign: - kfree(net_pfkey); -out_kmalloc: return rv; } @@ -3795,17 +3783,18 @@ static void __net_exit pfkey_net_exit(struct net *net) pfkey_exit_proc(net); BUG_ON(!hlist_empty(&net_pfkey->table)); - kfree(net_pfkey); } static struct pernet_operations pfkey_net_ops = { .init = pfkey_net_init, .exit = pfkey_net_exit, + .id = &pfkey_net_id, + .size = sizeof(struct netns_pfkey), }; static void __exit ipsec_pfkey_exit(void) { - unregister_pernet_gen_subsys(pfkey_net_id, &pfkey_net_ops); + unregister_pernet_subsys(&pfkey_net_ops); xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); proto_unregister(&key_proto); @@ -3824,7 +3813,7 @@ static int __init ipsec_pfkey_init(void) err = xfrm_register_km(&pfkeyv2_mgr); if (err != 0) goto out_sock_unregister; - err = register_pernet_gen_subsys(&pfkey_net_id, &pfkey_net_ops); + err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_xfrm_unregister_km; out: diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7aa4fd17010..3a66546cad0 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -140,14 +140,17 @@ static struct proto llc_proto = { /** * llc_ui_create - alloc and init a new llc_ui socket + * @net: network namespace (must be default network) * @sock: Socket to initialize and attach allocated sk to. * @protocol: Unused. + * @kern: on behalf of kernel or userspace * * Allocate and initialize a new llc_ui socket, validate the user wants a * socket type we have available. * Returns 0 upon success, negative upon failure. */ -static int llc_ui_create(struct net *net, struct socket *sock, int protocol) +static int llc_ui_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; int rc = -ESOCKTNOSUPPORT; @@ -155,7 +158,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol) if (!capable(CAP_NET_RAW)) return -EPERM; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { @@ -1092,7 +1095,7 @@ out: return rc; } -static struct net_proto_family llc_ui_family_ops = { +static const struct net_proto_family llc_ui_family_ops = { .family = PF_LLC, .create = llc_ui_create, .owner = THIS_MODULE, diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 57b9304d444..e2ebe358626 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -15,86 +15,73 @@ static struct ctl_table llc2_timeout_table[] = { { - .ctl_name = NET_LLC2_ACK_TIMEOUT, .procname = "ack", .data = &sysctl_llc2_ack_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_LLC2_BUSY_TIMEOUT, .procname = "busy", .data = &sysctl_llc2_busy_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_LLC2_P_TIMEOUT, .procname = "p", .data = &sysctl_llc2_p_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, { - .ctl_name = NET_LLC2_REJ_TIMEOUT, .procname = "rej", .data = &sysctl_llc2_rej_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, - { 0 }, + { }, }; static struct ctl_table llc_station_table[] = { { - .ctl_name = NET_LLC_STATION_ACK_TIMEOUT, .procname = "ack_timeout", .data = &sysctl_llc_station_ack_timeout, .maxlen = sizeof(long), .mode = 0644, .proc_handler = proc_dointvec_jiffies, - .strategy = sysctl_jiffies, }, - { 0 }, + { }, }; static struct ctl_table llc2_dir_timeout_table[] = { { - .ctl_name = NET_LLC2, .procname = "timeout", .mode = 0555, .child = llc2_timeout_table, }, - { 0 }, + { }, }; static struct ctl_table llc_table[] = { { - .ctl_name = NET_LLC2, .procname = "llc2", .mode = 0555, .child = llc2_dir_timeout_table, }, { - .ctl_name = NET_LLC_STATION, .procname = "station", .mode = 0555, .child = llc_station_table, }, - { 0 }, + { }, }; static struct ctl_path llc_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "llc", .ctl_name = NET_LLC, }, + { .procname = "net", }, + { .procname = "llc", }, { } }; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 4d5543af312..a10d508b07e 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -194,6 +194,19 @@ config MAC80211_VERBOSE_MPL_DEBUG Do not select this option. +config MAC80211_VERBOSE_MHWMP_DEBUG + bool "Verbose mesh HWMP routing debugging" + depends on MAC80211_DEBUG_MENU + depends on MAC80211_MESH + ---help--- + Selecting this option causes mac80211 to print out very + verbose mesh routing (HWMP) debugging messages (when mac80211 + is taking part in a mesh network). + It should not be selected on production systems as those + messages are remotely triggerable. + + Do not select this option. + config MAC80211_DEBUG_COUNTERS bool "Extra statistics for TX/RX debugging" depends on MAC80211_DEBUG_MENU diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 9f3cf712932..298cfcc1bf8 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MAC80211) += mac80211.o # mac80211 objects mac80211-y := \ - main.o \ + main.o status.o \ sta_info.o \ wep.o \ wpa.o \ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index bc064d7933f..51c7dc3c4c3 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -41,7 +41,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - if (drv_ampdu_action(local, IEEE80211_AMPDU_RX_STOP, + if (drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL)) printk(KERN_DEBUG "HW problem - can not stop rx " "aggregation for tid %d\n", tid); @@ -85,10 +86,6 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r struct ieee80211_local *local = sdata->local; struct sta_info *sta; - /* stop HW Rx aggregation. ampdu_action existence - * already verified in session init so we add the BUG_ON */ - BUG_ON(!local->ops->ampdu_action); - rcu_read_lock(); sta = sta_info_get(local, ra); @@ -170,7 +167,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); } void ieee80211_process_addba_request(struct ieee80211_local *local, @@ -210,9 +207,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, * check if configuration can support the BA policy * and if buffer size does not exceeds max value */ /* XXX: check own ht delayed BA capability?? */ - if (((ba_policy != 1) - && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) - || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { + if (((ba_policy != 1) && + (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || + (buf_size > IEEE80211_MAX_AMPDU_BUF)) { status = WLAN_STATUS_INVALID_QOS_PARAM; #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) @@ -284,7 +281,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, goto end; } - ret = drv_ampdu_action(local, IEEE80211_AMPDU_RX_START, + ret = drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b09948ceec4..5e3a7eccef5 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -91,7 +91,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); } void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) @@ -120,16 +120,22 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 bar->control = cpu_to_le16(bar_control); bar->start_seq_num = cpu_to_le16(ssn); - ieee80211_tx_skb(sdata, skb, 0); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } -static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator) +int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator) { struct ieee80211_local *local = sta->local; int ret; u8 *state; +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + state = &sta->ampdu_mlme.tid_state_tx[tid]; if (*state == HT_AGG_STATE_OPERATIONAL) @@ -138,12 +144,12 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, *state = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - ret = drv_ampdu_action(local, IEEE80211_AMPDU_TX_STOP, + ret = drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_TX_STOP, &sta->sta, tid, NULL); /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { - *state = HT_AGG_STATE_OPERATIONAL; /* * We may have pending packets get stuck in this case... * Not bothering with a workaround for now. @@ -173,12 +179,14 @@ static void sta_addba_resp_timer_expired(unsigned long data) /* check if the TID waits for addBA response */ spin_lock_bh(&sta->lock); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK)) != + HT_ADDBA_REQUESTED_MSK) { spin_unlock_bh(&sta->lock); *state = HT_AGG_STATE_IDLE; #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "timer expired on tid %d but we are not " - "expecting addBA response there", tid); + "(or no longer) expecting addBA response there", + tid); #endif return; } @@ -196,11 +204,11 @@ static inline int ieee80211_ac_from_tid(int tid) return ieee802_1d_to_ac[tid & 7]; } -int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) +int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; u8 *state; int ret = 0; u16 start_seq_num; @@ -208,52 +216,37 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) if (WARN_ON(!local->ops->ampdu_action)) return -EINVAL; - if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) + if ((tid >= STA_TID_NUM) || + !(local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n", - ra, tid); + pubsta->addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find the station\n"); -#endif - ret = -ENOENT; - goto unlock; - } - /* * The aggregation code is not prepared to handle * anything but STA/AP due to the BSSID handling. * IBSS could work in the code but isn't supported * by drivers or the standard. */ - if (sta->sdata->vif.type != NL80211_IFTYPE_STATION && - sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN && - sta->sdata->vif.type != NL80211_IFTYPE_AP) { - ret = -EINVAL; - goto unlock; - } + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) + return -EINVAL; if (test_sta_flags(sta, WLAN_STA_SUSPEND)) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Suspend in progress. " "Denying BA session request\n"); #endif - ret = -EINVAL; - goto unlock; + return -EINVAL; } spin_lock_bh(&sta->lock); spin_lock(&local->ampdu_lock); - sdata = sta->sdata; - /* we have tried too many times, receiver does not want A-MPDU */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { ret = -EBUSY; @@ -310,8 +303,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) start_seq_num = sta->tid_seq[tid]; - ret = drv_ampdu_action(local, IEEE80211_AMPDU_TX_START, - &sta->sta, tid, &start_seq_num); + ret = drv_ampdu_action(local, &sdata->vif, + IEEE80211_AMPDU_TX_START, + pubsta, tid, &start_seq_num); if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG @@ -336,7 +330,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; - ieee80211_send_addba_request(sta->sdata, ra, tid, + ieee80211_send_addba_request(sdata, pubsta->addr, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, 0x40, 5000); @@ -348,7 +342,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); #endif - goto unlock; + return 0; err_free: kfree(sta->ampdu_mlme.tid_tx[tid]); @@ -360,8 +354,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) err_unlock_sta: spin_unlock(&local->ampdu_lock); spin_unlock_bh(&sta->lock); - unlock: - rcu_read_unlock(); return ret; } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); @@ -428,13 +420,15 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, ieee80211_agg_splice_finish(local, sta, tid); spin_unlock(&local->ampdu_lock); - drv_ampdu_action(local, IEEE80211_AMPDU_TX_OPERATIONAL, + drv_ampdu_action(local, &sta->sdata->vif, + IEEE80211_AMPDU_TX_OPERATIONAL, &sta->sta, tid, NULL); } -void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) +void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; u8 *state; @@ -483,10 +477,11 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); -void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, +void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); @@ -501,6 +496,7 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; + ra_tid->vif = vif; skb->pkt_type = IEEE80211_ADDBA_MSG; skb_queue_tail(&local->skb_queue, skb); @@ -523,11 +519,6 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, goto unlock; } -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); unlock: @@ -535,36 +526,27 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, return ret; } -int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, - u8 *ra, u16 tid, +int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, enum ieee80211_back_parties initiator) { - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - int ret = 0; + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; - if (WARN_ON(!local->ops->ampdu_action)) + if (!local->ops->ampdu_action) return -EINVAL; if (tid >= STA_TID_NUM) return -EINVAL; - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return -ENOENT; - } - - ret = __ieee80211_stop_tx_ba_session(sta, tid, initiator); - rcu_read_unlock(); - return ret; + return __ieee80211_stop_tx_ba_session(sta, tid, initiator); } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); -void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) +void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; u8 *state; @@ -627,10 +609,11 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); -void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, +void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, const u8 *ra, u16 tid) { - struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; struct ieee80211_ra_tid *ra_tid; struct sk_buff *skb = dev_alloc_skb(0); @@ -645,6 +628,7 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, ra_tid = (struct ieee80211_ra_tid *) &skb->cb; memcpy(&ra_tid->ra, ra, ETH_ALEN); ra_tid->tid = tid; + ra_tid->vif = vif; skb->pkt_type = IEEE80211_DELBA_MSG; skb_queue_tail(&local->skb_queue, skb); @@ -666,21 +650,21 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, state = &sta->ampdu_mlme.tid_state_tx[tid]; - del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - spin_lock_bh(&sta->lock); if (!(*state & HT_ADDBA_REQUESTED_MSK)) - goto timer_still_needed; + goto out; if (mgmt->u.action.u.addba_resp.dialog_token != sta->ampdu_mlme.tid_tx[tid]->dialog_token) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - goto timer_still_needed; + goto out; } + del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ @@ -699,10 +683,6 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); } - goto out; - - timer_still_needed: - add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); out: spin_unlock_bh(&sta->lock); } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7b5131bd6fa..6dc3579c0ac 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -36,6 +36,15 @@ static bool nl80211_type_check(enum nl80211_iftype type) } } +static bool nl80211_params_check(enum nl80211_iftype type, + struct vif_params *params) +{ + if (!nl80211_type_check(type)) + return false; + + return true; +} + static int ieee80211_add_iface(struct wiphy *wiphy, char *name, enum nl80211_iftype type, u32 *flags, struct vif_params *params) @@ -45,7 +54,7 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, struct ieee80211_sub_if_data *sdata; int err; - if (!nl80211_type_check(type)) + if (!nl80211_params_check(type, params)) return -EINVAL; err = ieee80211_if_add(local, name, &dev, type, params); @@ -75,7 +84,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (netif_running(dev)) return -EBUSY; - if (!nl80211_type_check(type)) + if (!nl80211_params_check(type, params)) return -EINVAL; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -92,6 +101,13 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags) return 0; + if (type == NL80211_IFTYPE_AP_VLAN && + params && params->use_4addr == 0) + rcu_assign_pointer(sdata->u.vlan.sta, NULL); + else if (type == NL80211_IFTYPE_STATION && + params && params->use_4addr >= 0) + sdata->u.mgd.use_4addr = params->use_4addr; + sdata->u.mntr_flags = *flags; return 0; } @@ -338,7 +354,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->rx_packets = sta->rx_packets; sinfo->tx_packets = sta->tx_packets; - if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || + (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { sinfo->filled |= STATION_INFO_SIGNAL; sinfo->signal = (s8)sta->last_signal; } @@ -377,13 +394,13 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; int ret = -ENOENT; rcu_read_lock(); - sta = sta_info_get_by_idx(local, idx, dev); + sta = sta_info_get_by_idx(sdata, idx); if (sta) { ret = 0; memcpy(mac, sta->sta.addr, ETH_ALEN); @@ -738,13 +755,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, err = sta_info_insert(sta); if (err) { - /* STA has been freed */ - if (err == -EEXIST && layer2_update) { - /* Need to update layer 2 devices on reassociation */ - sta = sta_info_get(local, mac); - if (sta) - ieee80211_send_layer2_update(sta); - } rcu_read_unlock(); return err; } @@ -813,6 +823,15 @@ static int ieee80211_change_station(struct wiphy *wiphy, return -EINVAL; } + if (params->vlan->ieee80211_ptr->use_4addr) { + if (vlansdata->u.vlan.sta) { + rcu_read_unlock(); + return -EBUSY; + } + + rcu_assign_pointer(vlansdata->u.vlan.sta, sta); + } + sta->sdata = vlansdata; ieee80211_send_layer2_update(sta); } @@ -914,7 +933,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->generation = mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | - MPATH_INFO_DSN | + MPATH_INFO_SN | MPATH_INFO_METRIC | MPATH_INFO_EXPTIME | MPATH_INFO_DISCOVERY_TIMEOUT | @@ -922,7 +941,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, MPATH_INFO_FLAGS; pinfo->frame_qlen = mpath->frame_queue.qlen; - pinfo->dsn = mpath->dsn; + pinfo->sn = mpath->sn; pinfo->metric = mpath->metric; if (time_before(jiffies, mpath->exp_time)) pinfo->exptime = jiffies_to_msecs(mpath->exp_time - jiffies); @@ -934,8 +953,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; - if (mpath->flags & MESH_PATH_DSN_VALID) - pinfo->flags |= NL80211_MPATH_FLAG_DSN_VALID; + if (mpath->flags & MESH_PATH_SN_VALID) + pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; if (mpath->flags & MESH_PATH_RESOLVING) @@ -1008,7 +1027,10 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_mesh *ifmsh; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + ifmsh = &sdata->u.mesh; /* Set the config options which we are interested in setting */ conf = &(sdata->u.mesh.mshcfg); @@ -1043,6 +1065,10 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, mask)) conf->dot11MeshHWMPnetDiameterTraversalTime = nconf->dot11MeshHWMPnetDiameterTraversalTime; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOTMODE, mask)) { + conf->dot11MeshHWMPRootMode = nconf->dot11MeshHWMPRootMode; + ieee80211_mesh_root_setup(ifmsh); + } return 0; } diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 96991b68f04..e4b54093d41 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -1,3 +1,4 @@ + /* * mac80211 debugfs for wireless PHYs * @@ -38,16 +39,10 @@ static const struct file_operations name## _ops = { \ }; #define DEBUGFS_ADD(name) \ - local->debugfs.name = debugfs_create_file(#name, 0400, phyd, \ - local, &name## _ops); + debugfs_create_file(#name, 0400, phyd, local, &name## _ops); #define DEBUGFS_ADD_MODE(name, mode) \ - local->debugfs.name = debugfs_create_file(#name, mode, phyd, \ - local, &name## _ops); - -#define DEBUGFS_DEL(name) \ - debugfs_remove(local->debugfs.name); \ - local->debugfs.name = NULL; + debugfs_create_file(#name, mode, phyd, local, &name## _ops); DEBUGFS_READONLY_FILE(frequency, 20, "%d", @@ -57,7 +52,7 @@ DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x", local->wep_iv & 0xffffff); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", - local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>"); + local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); static ssize_t tsf_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -233,12 +228,7 @@ static const struct file_operations stats_ ##name## _ops = { \ }; #define DEBUGFS_STATS_ADD(name) \ - local->debugfs.stats.name = debugfs_create_file(#name, 0400, statsd,\ - local, &stats_ ##name## _ops); - -#define DEBUGFS_STATS_DEL(name) \ - debugfs_remove(local->debugfs.stats.name); \ - local->debugfs.stats.name = NULL; + debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); DEBUGFS_STATS_FILE(transmitted_fragment_count, 20, "%u", local->dot11TransmittedFragmentCount); @@ -326,7 +316,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(noack); statsd = debugfs_create_dir("statistics", phyd); - local->debugfs.statistics = statsd; /* if the dir failed, don't put all the other things into the root! */ if (!statsd) @@ -367,57 +356,3 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_STATS_ADD(dot11FCSErrorCount); DEBUGFS_STATS_ADD(dot11RTSSuccessCount); } - -void debugfs_hw_del(struct ieee80211_local *local) -{ - DEBUGFS_DEL(frequency); - DEBUGFS_DEL(total_ps_buffered); - DEBUGFS_DEL(wep_iv); - DEBUGFS_DEL(tsf); - DEBUGFS_DEL(queues); - DEBUGFS_DEL(reset); - DEBUGFS_DEL(noack); - - DEBUGFS_STATS_DEL(transmitted_fragment_count); - DEBUGFS_STATS_DEL(multicast_transmitted_frame_count); - DEBUGFS_STATS_DEL(failed_count); - DEBUGFS_STATS_DEL(retry_count); - DEBUGFS_STATS_DEL(multiple_retry_count); - DEBUGFS_STATS_DEL(frame_duplicate_count); - DEBUGFS_STATS_DEL(received_fragment_count); - DEBUGFS_STATS_DEL(multicast_received_frame_count); - DEBUGFS_STATS_DEL(transmitted_frame_count); - DEBUGFS_STATS_DEL(num_scans); -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - DEBUGFS_STATS_DEL(tx_handlers_drop); - DEBUGFS_STATS_DEL(tx_handlers_queued); - DEBUGFS_STATS_DEL(tx_handlers_drop_unencrypted); - DEBUGFS_STATS_DEL(tx_handlers_drop_fragment); - DEBUGFS_STATS_DEL(tx_handlers_drop_wep); - DEBUGFS_STATS_DEL(tx_handlers_drop_not_assoc); - DEBUGFS_STATS_DEL(tx_handlers_drop_unauth_port); - DEBUGFS_STATS_DEL(rx_handlers_drop); - DEBUGFS_STATS_DEL(rx_handlers_queued); - DEBUGFS_STATS_DEL(rx_handlers_drop_nullfunc); - DEBUGFS_STATS_DEL(rx_handlers_drop_defrag); - DEBUGFS_STATS_DEL(rx_handlers_drop_short); - DEBUGFS_STATS_DEL(rx_handlers_drop_passive_scan); - DEBUGFS_STATS_DEL(tx_expand_skb_head); - DEBUGFS_STATS_DEL(tx_expand_skb_head_cloned); - DEBUGFS_STATS_DEL(rx_expand_skb_head); - DEBUGFS_STATS_DEL(rx_expand_skb_head2); - DEBUGFS_STATS_DEL(rx_handlers_fragments); - DEBUGFS_STATS_DEL(tx_status_drop); -#endif - DEBUGFS_STATS_DEL(dot11ACKFailureCount); - DEBUGFS_STATS_DEL(dot11RTSFailureCount); - DEBUGFS_STATS_DEL(dot11FCSErrorCount); - DEBUGFS_STATS_DEL(dot11RTSSuccessCount); - - debugfs_remove(local->debugfs.statistics); - local->debugfs.statistics = NULL; - debugfs_remove(local->debugfs.stations); - local->debugfs.stations = NULL; - debugfs_remove(local->debugfs.keys); - local->debugfs.keys = NULL; -} diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h index dd2541935c2..68e6a2050f9 100644 --- a/net/mac80211/debugfs.h +++ b/net/mac80211/debugfs.h @@ -3,14 +3,12 @@ #ifdef CONFIG_MAC80211_DEBUGFS extern void debugfs_hw_add(struct ieee80211_local *local); -extern void debugfs_hw_del(struct ieee80211_local *local); extern int mac80211_open_file_generic(struct inode *inode, struct file *file); #else static inline void debugfs_hw_add(struct ieee80211_local *local) { return; } -static inline void debugfs_hw_del(struct ieee80211_local *local) {} #endif #endif /* __MAC80211_DEBUGFS_H */ diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 99c752588b3..e0f5224630d 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -225,8 +225,8 @@ static ssize_t key_key_read(struct file *file, char __user *userbuf, KEY_OPS(key); #define DEBUGFS_ADD(name) \ - key->debugfs.name = debugfs_create_file(#name, 0400,\ - key->debugfs.dir, key, &key_##name##_ops); + debugfs_create_file(#name, 0400, key->debugfs.dir, \ + key, &key_##name##_ops); void ieee80211_debugfs_key_add(struct ieee80211_key *key) { @@ -271,30 +271,12 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) DEBUGFS_ADD(ifindex); }; -#define DEBUGFS_DEL(name) \ - debugfs_remove(key->debugfs.name); key->debugfs.name = NULL; - void ieee80211_debugfs_key_remove(struct ieee80211_key *key) { if (!key) return; - DEBUGFS_DEL(keylen); - DEBUGFS_DEL(flags); - DEBUGFS_DEL(keyidx); - DEBUGFS_DEL(hw_key_idx); - DEBUGFS_DEL(tx_rx_count); - DEBUGFS_DEL(algorithm); - DEBUGFS_DEL(tx_spec); - DEBUGFS_DEL(rx_spec); - DEBUGFS_DEL(replays); - DEBUGFS_DEL(icverrors); - DEBUGFS_DEL(key); - DEBUGFS_DEL(ifindex); - - debugfs_remove(key->debugfs.stalink); - key->debugfs.stalink = NULL; - debugfs_remove(key->debugfs.dir); + debugfs_remove_recursive(key->debugfs.dir); key->debugfs.dir = NULL; } void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) @@ -302,7 +284,7 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfsdir) + if (!sdata->debugfs.dir) return; /* this is running under the key lock */ @@ -310,9 +292,9 @@ void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) key = sdata->default_key; if (key) { sprintf(buf, "../keys/%d", key->debugfs.cnt); - sdata->common_debugfs.default_key = + sdata->debugfs.default_key = debugfs_create_symlink("default_key", - sdata->debugfsdir, buf); + sdata->debugfs.dir, buf); } else ieee80211_debugfs_key_remove_default(sdata); } @@ -322,8 +304,8 @@ void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata) if (!sdata) return; - debugfs_remove(sdata->common_debugfs.default_key); - sdata->common_debugfs.default_key = NULL; + debugfs_remove(sdata->debugfs.default_key); + sdata->debugfs.default_key = NULL; } void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) @@ -331,7 +313,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) char buf[50]; struct ieee80211_key *key; - if (!sdata->debugfsdir) + if (!sdata->debugfs.dir) return; /* this is running under the key lock */ @@ -339,9 +321,9 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) key = sdata->default_mgmt_key; if (key) { sprintf(buf, "../keys/%d", key->debugfs.cnt); - sdata->common_debugfs.default_mgmt_key = + sdata->debugfs.default_mgmt_key = debugfs_create_symlink("default_mgmt_key", - sdata->debugfsdir, buf); + sdata->debugfs.dir, buf); } else ieee80211_debugfs_key_remove_mgmt_default(sdata); } @@ -351,8 +333,8 @@ void ieee80211_debugfs_key_remove_mgmt_default(struct ieee80211_sub_if_data *sda if (!sdata) return; - debugfs_remove(sdata->common_debugfs.default_mgmt_key); - sdata->common_debugfs.default_mgmt_key = NULL; + debugfs_remove(sdata->debugfs.default_mgmt_key); + sdata->debugfs.default_mgmt_key = NULL; } void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 61234e79022..472b2039906 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -149,12 +149,14 @@ IEEE80211_IF_FILE(path_refresh_time, u.mesh.mshcfg.path_refresh_time, DEC); IEEE80211_IF_FILE(min_discovery_timeout, u.mesh.mshcfg.min_discovery_timeout, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPRootMode, + u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); #endif -#define DEBUGFS_ADD(name, type)\ - sdata->debugfs.type.name = debugfs_create_file(#name, 0400,\ - sdata->debugfsdir, sdata, &name##_ops); +#define DEBUGFS_ADD(name, type) \ + debugfs_create_file(#name, 0400, sdata->debugfs.dir, \ + sdata, &name##_ops); static void add_sta_files(struct ieee80211_sub_if_data *sdata) { @@ -199,30 +201,32 @@ static void add_monitor_files(struct ieee80211_sub_if_data *sdata) } #ifdef CONFIG_MAC80211_MESH -#define MESHSTATS_ADD(name)\ - sdata->mesh_stats.name = debugfs_create_file(#name, 0400,\ - sdata->mesh_stats_dir, sdata, &name##_ops); static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) { - sdata->mesh_stats_dir = debugfs_create_dir("mesh_stats", - sdata->debugfsdir); + struct dentry *dir = debugfs_create_dir("mesh_stats", + sdata->debugfs.dir); + +#define MESHSTATS_ADD(name)\ + debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); + MESHSTATS_ADD(fwded_mcast); MESHSTATS_ADD(fwded_unicast); MESHSTATS_ADD(fwded_frames); MESHSTATS_ADD(dropped_frames_ttl); MESHSTATS_ADD(dropped_frames_no_route); MESHSTATS_ADD(estab_plinks); +#undef MESHSTATS_ADD } -#define MESHPARAMS_ADD(name)\ - sdata->mesh_config.name = debugfs_create_file(#name, 0600,\ - sdata->mesh_config_dir, sdata, &name##_ops); - static void add_mesh_config(struct ieee80211_sub_if_data *sdata) { - sdata->mesh_config_dir = debugfs_create_dir("mesh_config", - sdata->debugfsdir); + struct dentry *dir = debugfs_create_dir("mesh_config", + sdata->debugfs.dir); + +#define MESHPARAMS_ADD(name) \ + debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); + MESHPARAMS_ADD(dot11MeshMaxRetries); MESHPARAMS_ADD(dot11MeshRetryTimeout); MESHPARAMS_ADD(dot11MeshConfirmTimeout); @@ -236,12 +240,14 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshHWMPmaxPREQretries); MESHPARAMS_ADD(path_refresh_time); MESHPARAMS_ADD(min_discovery_timeout); + +#undef MESHPARAMS_ADD } #endif static void add_files(struct ieee80211_sub_if_data *sdata) { - if (!sdata->debugfsdir) + if (!sdata->debugfs.dir) return; switch (sdata->vif.type) { @@ -274,134 +280,6 @@ static void add_files(struct ieee80211_sub_if_data *sdata) } } -#define DEBUGFS_DEL(name, type) \ - do { \ - debugfs_remove(sdata->debugfs.type.name); \ - sdata->debugfs.type.name = NULL; \ - } while (0) - -static void del_sta_files(struct ieee80211_sub_if_data *sdata) -{ - DEBUGFS_DEL(drop_unencrypted, sta); - DEBUGFS_DEL(force_unicast_rateidx, sta); - DEBUGFS_DEL(max_ratectrl_rateidx, sta); - - DEBUGFS_DEL(bssid, sta); - DEBUGFS_DEL(aid, sta); - DEBUGFS_DEL(capab, sta); -} - -static void del_ap_files(struct ieee80211_sub_if_data *sdata) -{ - DEBUGFS_DEL(drop_unencrypted, ap); - DEBUGFS_DEL(force_unicast_rateidx, ap); - DEBUGFS_DEL(max_ratectrl_rateidx, ap); - - DEBUGFS_DEL(num_sta_ps, ap); - DEBUGFS_DEL(dtim_count, ap); - DEBUGFS_DEL(num_buffered_multicast, ap); -} - -static void del_wds_files(struct ieee80211_sub_if_data *sdata) -{ - DEBUGFS_DEL(drop_unencrypted, wds); - DEBUGFS_DEL(force_unicast_rateidx, wds); - DEBUGFS_DEL(max_ratectrl_rateidx, wds); - - DEBUGFS_DEL(peer, wds); -} - -static void del_vlan_files(struct ieee80211_sub_if_data *sdata) -{ - DEBUGFS_DEL(drop_unencrypted, vlan); - DEBUGFS_DEL(force_unicast_rateidx, vlan); - DEBUGFS_DEL(max_ratectrl_rateidx, vlan); -} - -static void del_monitor_files(struct ieee80211_sub_if_data *sdata) -{ -} - -#ifdef CONFIG_MAC80211_MESH -#define MESHSTATS_DEL(name) \ - do { \ - debugfs_remove(sdata->mesh_stats.name); \ - sdata->mesh_stats.name = NULL; \ - } while (0) - -static void del_mesh_stats(struct ieee80211_sub_if_data *sdata) -{ - MESHSTATS_DEL(fwded_mcast); - MESHSTATS_DEL(fwded_unicast); - MESHSTATS_DEL(fwded_frames); - MESHSTATS_DEL(dropped_frames_ttl); - MESHSTATS_DEL(dropped_frames_no_route); - MESHSTATS_DEL(estab_plinks); - debugfs_remove(sdata->mesh_stats_dir); - sdata->mesh_stats_dir = NULL; -} - -#define MESHPARAMS_DEL(name) \ - do { \ - debugfs_remove(sdata->mesh_config.name); \ - sdata->mesh_config.name = NULL; \ - } while (0) - -static void del_mesh_config(struct ieee80211_sub_if_data *sdata) -{ - MESHPARAMS_DEL(dot11MeshMaxRetries); - MESHPARAMS_DEL(dot11MeshRetryTimeout); - MESHPARAMS_DEL(dot11MeshConfirmTimeout); - MESHPARAMS_DEL(dot11MeshHoldingTimeout); - MESHPARAMS_DEL(dot11MeshTTL); - MESHPARAMS_DEL(auto_open_plinks); - MESHPARAMS_DEL(dot11MeshMaxPeerLinks); - MESHPARAMS_DEL(dot11MeshHWMPactivePathTimeout); - MESHPARAMS_DEL(dot11MeshHWMPpreqMinInterval); - MESHPARAMS_DEL(dot11MeshHWMPnetDiameterTraversalTime); - MESHPARAMS_DEL(dot11MeshHWMPmaxPREQretries); - MESHPARAMS_DEL(path_refresh_time); - MESHPARAMS_DEL(min_discovery_timeout); - debugfs_remove(sdata->mesh_config_dir); - sdata->mesh_config_dir = NULL; -} -#endif - -static void del_files(struct ieee80211_sub_if_data *sdata) -{ - if (!sdata->debugfsdir) - return; - - switch (sdata->vif.type) { - case NL80211_IFTYPE_MESH_POINT: -#ifdef CONFIG_MAC80211_MESH - del_mesh_stats(sdata); - del_mesh_config(sdata); -#endif - break; - case NL80211_IFTYPE_STATION: - del_sta_files(sdata); - break; - case NL80211_IFTYPE_ADHOC: - /* XXX */ - break; - case NL80211_IFTYPE_AP: - del_ap_files(sdata); - break; - case NL80211_IFTYPE_WDS: - del_wds_files(sdata); - break; - case NL80211_IFTYPE_MONITOR: - del_monitor_files(sdata); - break; - case NL80211_IFTYPE_AP_VLAN: - del_vlan_files(sdata); - break; - default: - break; - } -} - static int notif_registered; void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) @@ -412,16 +290,18 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) return; sprintf(buf, "netdev:%s", sdata->dev->name); - sdata->debugfsdir = debugfs_create_dir(buf, + sdata->debugfs.dir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); add_files(sdata); } void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) { - del_files(sdata); - debugfs_remove(sdata->debugfsdir); - sdata->debugfsdir = NULL; + if (!sdata->debugfs.dir) + return; + + debugfs_remove_recursive(sdata->debugfs.dir); + sdata->debugfs.dir = NULL; } static int netdev_notify(struct notifier_block *nb, @@ -444,7 +324,7 @@ static int netdev_notify(struct notifier_block *nb, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - dir = sdata->debugfsdir; + dir = sdata->debugfs.dir; if (!dir) return 0; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 33a2e892115..3f41608c808 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -57,7 +57,6 @@ STA_FILE(tx_filtered, tx_filtered_count, LU); STA_FILE(tx_retry_failed, tx_retry_failed, LU); STA_FILE(tx_retry_count, tx_retry_count, LU); STA_FILE(last_signal, last_signal, D); -STA_FILE(last_qual, last_qual, D); STA_FILE(last_noise, last_noise, D); STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU); @@ -67,10 +66,11 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, char buf[100]; struct sta_info *sta = file->private_data; u32 staflags = get_sta_flags(sta); - int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s", + int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s", staflags & WLAN_STA_AUTH ? "AUTH\n" : "", staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", - staflags & WLAN_STA_PS ? "PS\n" : "", + staflags & WLAN_STA_PS_STA ? "PS (sta)\n" : "", + staflags & WLAN_STA_PS_DRIVER ? "PS (driver)\n" : "", staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", staflags & WLAN_STA_WME ? "WME\n" : "", @@ -157,14 +157,38 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, } STA_OPS(agg_status); +static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + char buf[200], *p = buf; + int i; + struct sta_info *sta = file->private_data; + struct ieee80211_sta_ht_cap *htc = &sta->sta.ht_cap; + + p += scnprintf(p, sizeof(buf) + buf - p, "ht %ssupported\n", + htc->ht_supported ? "" : "not "); + if (htc->ht_supported) { + p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.2x\n", htc->cap); + p += scnprintf(p, sizeof(buf)+buf-p, "ampdu factor/density: %d/%d\n", + htc->ampdu_factor, htc->ampdu_density); + p += scnprintf(p, sizeof(buf)+buf-p, "MCS mask:"); + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) + p += scnprintf(p, sizeof(buf)+buf-p, " %.2x", + htc->mcs.rx_mask[i]); + p += scnprintf(p, sizeof(buf)+buf-p, "\nMCS rx highest: %d\n", + le16_to_cpu(htc->mcs.rx_highest)); + p += scnprintf(p, sizeof(buf)+buf-p, "MCS tx params: %x\n", + htc->mcs.tx_params); + } + + return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); +} +STA_OPS(ht_capa); + #define DEBUGFS_ADD(name) \ - sta->debugfs.name = debugfs_create_file(#name, 0400, \ + debugfs_create_file(#name, 0400, \ sta->debugfs.dir, sta, &sta_ ##name## _ops); -#define DEBUGFS_DEL(name) \ - debugfs_remove(sta->debugfs.name);\ - sta->debugfs.name = NULL; - void ieee80211_sta_debugfs_add(struct sta_info *sta) { @@ -209,36 +233,13 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(tx_retry_failed); DEBUGFS_ADD(tx_retry_count); DEBUGFS_ADD(last_signal); - DEBUGFS_ADD(last_qual); DEBUGFS_ADD(last_noise); DEBUGFS_ADD(wep_weak_iv_count); + DEBUGFS_ADD(ht_capa); } void ieee80211_sta_debugfs_remove(struct sta_info *sta) { - DEBUGFS_DEL(flags); - DEBUGFS_DEL(num_ps_buf_frames); - DEBUGFS_DEL(inactive_ms); - DEBUGFS_DEL(last_seq_ctrl); - DEBUGFS_DEL(agg_status); - DEBUGFS_DEL(aid); - DEBUGFS_DEL(dev); - DEBUGFS_DEL(rx_packets); - DEBUGFS_DEL(tx_packets); - DEBUGFS_DEL(rx_bytes); - DEBUGFS_DEL(tx_bytes); - DEBUGFS_DEL(rx_duplicates); - DEBUGFS_DEL(rx_fragments); - DEBUGFS_DEL(rx_dropped); - DEBUGFS_DEL(tx_fragments); - DEBUGFS_DEL(tx_filtered); - DEBUGFS_DEL(tx_retry_failed); - DEBUGFS_DEL(tx_retry_count); - DEBUGFS_DEL(last_signal); - DEBUGFS_DEL(last_qual); - DEBUGFS_DEL(last_noise); - DEBUGFS_DEL(wep_weak_iv_count); - - debugfs_remove(sta->debugfs.dir); + debugfs_remove_recursive(sta->debugfs.dir); sta->debugfs.dir = NULL; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 020a94a3110..921dd9c9ff6 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -239,15 +239,16 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local) } static inline int drv_ampdu_action(struct ieee80211_local *local, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn) { int ret = -EOPNOTSUPP; if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(&local->hw, action, + ret = local->ops->ampdu_action(&local->hw, vif, action, sta, tid, ssn); - trace_drv_ampdu_action(local, action, sta, tid, ssn, ret); + trace_drv_ampdu_action(local, vif, action, sta, tid, ssn, ret); return ret; } diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 37b9051afcf..ee94ea0c67e 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -131,17 +131,35 @@ TRACE_EVENT(drv_config, LOCAL_ENTRY __field(u32, changed) __field(int, ret) + __field(u32, flags) + __field(int, power_level) + __field(int, dynamic_ps_timeout) + __field(int, max_sleep_period) + __field(u16, listen_interval) + __field(u8, long_frame_max_tx_count) + __field(u8, short_frame_max_tx_count) + __field(int, center_freq) + __field(int, channel_type) ), TP_fast_assign( LOCAL_ASSIGN; __entry->changed = changed; __entry->ret = ret; + __entry->flags = local->hw.conf.flags; + __entry->power_level = local->hw.conf.power_level; + __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; + __entry->max_sleep_period = local->hw.conf.max_sleep_period; + __entry->listen_interval = local->hw.conf.listen_interval; + __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count; + __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count; + __entry->center_freq = local->hw.conf.channel->center_freq; + __entry->channel_type = local->hw.conf.channel_type; ), TP_printk( - LOCAL_PR_FMT " ch:%#x ret:%d", - LOCAL_PR_ARG, __entry->changed, __entry->ret + LOCAL_PR_FMT " ch:%#x freq:%d ret:%d", + LOCAL_PR_ARG, __entry->changed, __entry->center_freq, __entry->ret ) ); @@ -167,6 +185,8 @@ TRACE_EVENT(drv_bss_info_changed, __field(u64, timestamp) __field(u32, basic_rates) __field(u32, changed) + __field(bool, enable_beacon) + __field(u16, ht_operation_mode) ), TP_fast_assign( @@ -183,6 +203,8 @@ TRACE_EVENT(drv_bss_info_changed, __entry->assoc_cap = info->assoc_capability; __entry->timestamp = info->timestamp; __entry->basic_rates = info->basic_rates; + __entry->enable_beacon = info->enable_beacon; + __entry->ht_operation_mode = info->ht_operation_mode; ), TP_printk( @@ -634,11 +656,12 @@ TRACE_EVENT(drv_tx_last_beacon, TRACE_EVENT(drv_ampdu_action, TP_PROTO(struct ieee80211_local *local, + struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, u16 *ssn, int ret), - TP_ARGS(local, action, sta, tid, ssn, ret), + TP_ARGS(local, vif, action, sta, tid, ssn, ret), TP_STRUCT__entry( LOCAL_ENTRY @@ -647,10 +670,12 @@ TRACE_EVENT(drv_ampdu_action, __field(u16, tid) __field(u16, ssn) __field(int, ret) + VIF_ENTRY ), TP_fast_assign( LOCAL_ASSIGN; + VIF_ASSIGN; STA_ASSIGN; __entry->ret = ret; __entry->action = action; @@ -659,8 +684,8 @@ TRACE_EVENT(drv_ampdu_action, ), TP_printk( - LOCAL_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d", - LOCAL_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret ) ); #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 48ef1a282b9..3787455fb69 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -134,14 +134,13 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.delba.params = cpu_to_le16(params); mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); } void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_local *local = sdata->local; u16 tid, params; u16 initiator; @@ -161,10 +160,9 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, WLAN_BACK_INITIATOR, 0); else { /* WLAN_BACK_RECIPIENT */ spin_lock_bh(&sta->lock); - sta->ampdu_mlme.tid_state_tx[tid] = - HT_AGG_STATE_OPERATIONAL; + if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK) + ___ieee80211_stop_tx_ba_session(sta, tid, + WLAN_BACK_RECIPIENT); spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid, - WLAN_BACK_RECIPIENT); } } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index f1362f32c17..10d13856f86 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -455,6 +455,10 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); + if (time_before(jiffies, ifibss->last_scan_completed + + IEEE80211_IBSS_MERGE_INTERVAL)) + return; + if (ieee80211_sta_active_ibss(sdata)) return; @@ -655,7 +659,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n", sdata->dev->name, resp->da); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - ieee80211_tx_skb(sdata, skb, 0); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 588005c84a6..91dc8636d64 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -23,6 +23,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <linux/etherdevice.h> +#include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> #include <net/mac80211.h> #include "key.h" @@ -162,21 +163,17 @@ typedef unsigned __bitwise__ ieee80211_rx_result; /* frame is destined to interface currently processed (incl. multicast frames) */ #define IEEE80211_RX_RA_MATCH BIT(1) #define IEEE80211_RX_AMSDU BIT(2) -#define IEEE80211_RX_CMNTR_REPORTED BIT(3) -#define IEEE80211_RX_FRAGMENTED BIT(4) +#define IEEE80211_RX_FRAGMENTED BIT(3) +/* only add flags here that do not change with subframes of an aMPDU */ struct ieee80211_rx_data { struct sk_buff *skb; - struct net_device *dev; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; struct sta_info *sta; struct ieee80211_key *key; - struct ieee80211_rx_status *status; - struct ieee80211_rate *rate; unsigned int flags; - int sent_ps_buffered; int queue; u32 tkip_iv32; u16 tkip_iv16; @@ -209,6 +206,9 @@ struct ieee80211_if_wds { struct ieee80211_if_vlan { struct list_head list; + + /* used for all tx if the VLAN is configured to 4-addr mode */ + struct sta_info *sta; }; struct mesh_stats { @@ -312,6 +312,8 @@ struct ieee80211_if_managed { } mfp; /* management frame protection */ int wmm_last_param_set; + + u8 use_4addr; }; enum ieee80211_ibss_request { @@ -353,6 +355,7 @@ struct ieee80211_if_mesh { struct work_struct work; struct timer_list housekeeping_timer; struct timer_list mesh_path_timer; + struct timer_list mesh_path_root_timer; struct sk_buff_head skb_queue; unsigned long timers_running; @@ -362,23 +365,23 @@ struct ieee80211_if_mesh { u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; size_t mesh_id_len; /* Active Path Selection Protocol Identifier */ - u8 mesh_pp_id[4]; + u8 mesh_pp_id; /* Active Path Selection Metric Identifier */ - u8 mesh_pm_id[4]; + u8 mesh_pm_id; /* Congestion Control Mode Identifier */ - u8 mesh_cc_id[4]; + u8 mesh_cc_id; /* Synchronization Protocol Identifier */ - u8 mesh_sp_id[4]; + u8 mesh_sp_id; /* Authentication Protocol Identifier */ - u8 mesh_auth_id[4]; - /* Local mesh Destination Sequence Number */ - u32 dsn; + u8 mesh_auth_id; + /* Local mesh Sequence Number */ + u32 sn; /* Last used PREQ ID */ u32 preq_id; atomic_t mpaths; - /* Timestamp of last DSN update */ - unsigned long last_dsn_update; - /* Timestamp of last DSN sent */ + /* Timestamp of last SN update */ + unsigned long last_sn_update; + /* Timestamp of last SN sent */ unsigned long last_preq; struct mesh_rmc *rmc; spinlock_t mesh_preq_queue_lock; @@ -471,74 +474,11 @@ struct ieee80211_sub_if_data { } u; #ifdef CONFIG_MAC80211_DEBUGFS - struct dentry *debugfsdir; - union { - struct { - struct dentry *drop_unencrypted; - struct dentry *bssid; - struct dentry *aid; - struct dentry *capab; - struct dentry *force_unicast_rateidx; - struct dentry *max_ratectrl_rateidx; - } sta; - struct { - struct dentry *drop_unencrypted; - struct dentry *num_sta_ps; - struct dentry *dtim_count; - struct dentry *force_unicast_rateidx; - struct dentry *max_ratectrl_rateidx; - struct dentry *num_buffered_multicast; - } ap; - struct { - struct dentry *drop_unencrypted; - struct dentry *peer; - struct dentry *force_unicast_rateidx; - struct dentry *max_ratectrl_rateidx; - } wds; - struct { - struct dentry *drop_unencrypted; - struct dentry *force_unicast_rateidx; - struct dentry *max_ratectrl_rateidx; - } vlan; - struct { - struct dentry *mode; - } monitor; - } debugfs; struct { + struct dentry *dir; struct dentry *default_key; struct dentry *default_mgmt_key; - } common_debugfs; - -#ifdef CONFIG_MAC80211_MESH - struct dentry *mesh_stats_dir; - struct { - struct dentry *fwded_mcast; - struct dentry *fwded_unicast; - struct dentry *fwded_frames; - struct dentry *dropped_frames_ttl; - struct dentry *dropped_frames_no_route; - struct dentry *estab_plinks; - struct timer_list mesh_path_timer; - } mesh_stats; - - struct dentry *mesh_config_dir; - struct { - struct dentry *dot11MeshRetryTimeout; - struct dentry *dot11MeshConfirmTimeout; - struct dentry *dot11MeshHoldingTimeout; - struct dentry *dot11MeshMaxRetries; - struct dentry *dot11MeshTTL; - struct dentry *auto_open_plinks; - struct dentry *dot11MeshMaxPeerLinks; - struct dentry *dot11MeshHWMPactivePathTimeout; - struct dentry *dot11MeshHWMPpreqMinInterval; - struct dentry *dot11MeshHWMPnetDiameterTraversalTime; - struct dentry *dot11MeshHWMPmaxPREQretries; - struct dentry *path_refresh_time; - struct dentry *min_discovery_timeout; - } mesh_config; -#endif - + } debugfs; #endif /* must be last, dynamically sized area in this! */ struct ieee80211_vif vif; @@ -639,7 +579,6 @@ struct ieee80211_local { /* number of interfaces with corresponding FIF_ flags */ int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll; unsigned int filter_flags; /* FIF_* */ - struct iw_statistics wstats; /* protects the aggregated multicast list and filter calls */ spinlock_t filter_lock; @@ -662,6 +601,14 @@ struct ieee80211_local { bool suspended; /* + * Resuming is true while suspended, but when we're reprogramming the + * hardware -- at that time it's allowed to use ieee80211_queue_work() + * again even though some other parts of the stack are still suspended + * and we still drop received frames to avoid waking the stack. + */ + bool resuming; + + /* * quiescing is true during the suspend process _only_ to * ease timer cancelling etc. */ @@ -730,10 +677,9 @@ struct ieee80211_local { unsigned long scanning; struct cfg80211_ssid scan_ssid; struct cfg80211_scan_request *int_scan_req; - struct cfg80211_scan_request *scan_req; + struct cfg80211_scan_request *scan_req, *hw_scan_req; struct ieee80211_channel *scan_channel; - const u8 *orig_ies; - int orig_ies_len; + enum ieee80211_band hw_scan_band; int scan_channel_idx; int scan_ies_len; @@ -800,6 +746,7 @@ struct ieee80211_local { unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ bool pspolling; + bool scan_ps_enabled; /* * PS can only be enabled when we have exactly one managed * interface (and monitors) in PS, this then points there. @@ -818,53 +765,6 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { struct dentry *rcdir; - struct dentry *rcname; - struct dentry *frequency; - struct dentry *total_ps_buffered; - struct dentry *wep_iv; - struct dentry *tsf; - struct dentry *queues; - struct dentry *reset; - struct dentry *noack; - struct dentry *statistics; - struct local_debugfsdentries_statsdentries { - struct dentry *transmitted_fragment_count; - struct dentry *multicast_transmitted_frame_count; - struct dentry *failed_count; - struct dentry *retry_count; - struct dentry *multiple_retry_count; - struct dentry *frame_duplicate_count; - struct dentry *received_fragment_count; - struct dentry *multicast_received_frame_count; - struct dentry *transmitted_frame_count; - struct dentry *wep_undecryptable_count; - struct dentry *num_scans; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - struct dentry *tx_handlers_drop; - struct dentry *tx_handlers_queued; - struct dentry *tx_handlers_drop_unencrypted; - struct dentry *tx_handlers_drop_fragment; - struct dentry *tx_handlers_drop_wep; - struct dentry *tx_handlers_drop_not_assoc; - struct dentry *tx_handlers_drop_unauth_port; - struct dentry *rx_handlers_drop; - struct dentry *rx_handlers_queued; - struct dentry *rx_handlers_drop_nullfunc; - struct dentry *rx_handlers_drop_defrag; - struct dentry *rx_handlers_drop_short; - struct dentry *rx_handlers_drop_passive_scan; - struct dentry *tx_expand_skb_head; - struct dentry *tx_expand_skb_head_cloned; - struct dentry *rx_expand_skb_head; - struct dentry *rx_expand_skb_head2; - struct dentry *rx_handlers_fragments; - struct dentry *tx_status_drop; -#endif - struct dentry *dot11ACKFailureCount; - struct dentry *dot11RTSFailureCount; - struct dentry *dot11FCSErrorCount; - struct dentry *dot11RTSSuccessCount; - } stats; struct dentry *stations; struct dentry *keys; } debugfs; @@ -877,8 +777,9 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) return netdev_priv(dev); } -/* this struct represents 802.11n's RA/TID combination */ +/* this struct represents 802.11n's RA/TID combination along with our vif */ struct ieee80211_ra_tid { + struct ieee80211_vif *vif; u8 ra[ETH_ALEN]; u16 tid; }; @@ -905,12 +806,13 @@ struct ieee802_11_elems { u8 *wmm_param; struct ieee80211_ht_cap *ht_cap_elem; struct ieee80211_ht_info *ht_info_elem; - u8 *mesh_config; + struct ieee80211_meshconf_ie *mesh_config; u8 *mesh_id; u8 *peer_link; u8 *preq; u8 *prep; u8 *perr; + struct ieee80211_rann_ie *rann; u8 *ch_switch_elem; u8 *country_elem; u8 *pwr_constr_elem; @@ -932,7 +834,6 @@ struct ieee802_11_elems { u8 ext_supp_rates_len; u8 wmm_info_len; u8 wmm_param_len; - u8 mesh_config_len; u8 mesh_id_len; u8 peer_link_len; u8 preq_len; @@ -1055,6 +956,18 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); +/* + * radiotap header for status frames + */ +struct ieee80211_tx_status_rtap_hdr { + struct ieee80211_radiotap_header hdr; + u8 rate; + u8 padding_for_rate; + __le16 tx_flags; + u8 data_retries; +} __attribute__ ((packed)); + + /* HT */ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, struct ieee80211_ht_cap *ht_cap_ie, @@ -1083,6 +996,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_back_parties initiator); +int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, @@ -1122,8 +1037,7 @@ void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int ke struct ieee80211_hdr *hdr, const u8 *tsc, gfp_t gfp); void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata); -void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - int encrypt); +void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee802_11_parse_elems(u8 *start, size_t len, struct ieee802_11_elems *elems); u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, @@ -1160,7 +1074,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u8 *extra, size_t extra_len, const u8 *bssid, const u8 *key, u8 key_len, u8 key_idx); int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, - const u8 *ie, size_t ie_len); + const u8 *ie, size_t ie_len, + enum ieee80211_band band); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b8295cbd7e8..80c16f6e2af 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -184,10 +184,12 @@ static int ieee80211_open(struct net_device *dev) * No need to check netif_running since we do not allow * it to start up with this invalid address. */ - if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) + if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) { memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); + memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN); + } } /* @@ -212,8 +214,8 @@ static int ieee80211_open(struct net_device *dev) /* must be before the call to ieee80211_configure_filter */ local->monitors++; if (local->monitors == 1) { - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + local->hw.conf.flags |= IEEE80211_CONF_MONITOR; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) @@ -312,7 +314,7 @@ static int ieee80211_open(struct net_device *dev) if (sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_queue_work(&local->hw, &sdata->u.mgd.work); - netif_tx_start_all_queues(dev); + netif_start_queue(dev); return 0; err_del_interface: @@ -341,7 +343,7 @@ static int ieee80211_stop(struct net_device *dev) /* * Stop TX on this interface first. */ - netif_tx_stop_all_queues(dev); + netif_stop_queue(dev); /* * Now delete all active aggregation sessions. @@ -433,8 +435,8 @@ static int ieee80211_stop(struct net_device *dev) local->monitors--; if (local->monitors == 0) { - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) @@ -750,14 +752,12 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, ieee80211_mandatory_rates(sdata->local, sdata->local->hw.conf.channel->band); sdata->drop_unencrypted = 0; + if (type == NL80211_IFTYPE_STATION) + sdata->u.mgd.use_4addr = false; return 0; } -static struct device_type wiphy_type = { - .name = "wlan", -}; - int ieee80211_if_add(struct ieee80211_local *local, const char *name, struct net_device **new_dev, enum nl80211_iftype type, struct vif_params *params) @@ -788,8 +788,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, goto fail; memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); + memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN); SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); - SET_NETDEV_DEVTYPE(ndev, &wiphy_type); /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ sdata = netdev_priv(ndev); @@ -811,6 +811,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* setup type-dependent data */ ieee80211_setup_sdata(sdata, type); + if (params) { + ndev->ieee80211_ptr->use_4addr = params->use_4addr; + if (type == NL80211_IFTYPE_STATION) + sdata->u.mgd.use_4addr = params->use_4addr; + } + ret = register_netdevice(ndev); if (ret) goto fail; @@ -854,22 +860,18 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) void ieee80211_remove_interfaces(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata, *tmp; + LIST_HEAD(unreg_list); ASSERT_RTNL(); + mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { - /* - * we cannot hold the iflist_mtx across unregister_netdevice, - * but we only need to hold it for list modifications to lock - * out readers since we're under the RTNL here as all other - * writers. - */ - mutex_lock(&local->iflist_mtx); list_del(&sdata->list); - mutex_unlock(&local->iflist_mtx); - unregister_netdevice(sdata->dev); + unregister_netdevice_queue(sdata->dev, &unreg_list); } + mutex_unlock(&local->iflist_mtx); + unregister_netdevice_many(&unreg_list); } static u32 ieee80211_idle_off(struct ieee80211_local *local, diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 9572e00f532..a49f93b79e9 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -118,18 +118,6 @@ struct ieee80211_key { struct { struct dentry *stalink; struct dentry *dir; - struct dentry *keylen; - struct dentry *flags; - struct dentry *keyidx; - struct dentry *hw_key_idx; - struct dentry *tx_rx_count; - struct dentry *algorithm; - struct dentry *tx_spec; - struct dentry *rx_spec; - struct dentry *replays; - struct dentry *icverrors; - struct dentry *key; - struct dentry *ifindex; int cnt; } debugfs; #endif diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 797f53942e5..8116d1a96a4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -9,7 +9,6 @@ */ #include <net/mac80211.h> -#include <net/ieee80211_radiotap.h> #include <linux/module.h> #include <linux/init.h> #include <linux/netdevice.h> @@ -30,26 +29,11 @@ #include "rate.h" #include "mesh.h" #include "wep.h" -#include "wme.h" -#include "aes_ccm.h" #include "led.h" #include "cfg.h" #include "debugfs.h" #include "debugfs_netdev.h" -/* - * For seeing transmitted packets on monitor interfaces - * we have a radiotap header too. - */ -struct ieee80211_tx_status_rtap_hdr { - struct ieee80211_radiotap_header hdr; - u8 rate; - u8 padding_for_rate; - __le16 tx_flags; - u8 data_retries; -} __attribute__ ((packed)); - - void ieee80211_configure_filter(struct ieee80211_local *local) { u64 mc; @@ -253,28 +237,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_ERP_SLOT; } -void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, - struct sk_buff *skb) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int tmp; - - skb->pkt_type = IEEE80211_TX_STATUS_MSG; - skb_queue_tail(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS ? - &local->skb_queue : &local->skb_queue_unreliable, skb); - tmp = skb_queue_len(&local->skb_queue) + - skb_queue_len(&local->skb_queue_unreliable); - while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT && - (skb = skb_dequeue(&local->skb_queue_unreliable))) { - dev_kfree_skb_irq(skb); - tmp--; - I802_DEBUG_INC(local->tx_status_drop); - } - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_tx_status_irqsafe); - static void ieee80211_tasklet_handler(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *) data; @@ -296,14 +258,14 @@ static void ieee80211_tasklet_handler(unsigned long data) break; case IEEE80211_DELBA_MSG: ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_stop_tx_ba_cb(local_to_hw(local), - ra_tid->ra, ra_tid->tid); + ieee80211_stop_tx_ba_cb(ra_tid->vif, ra_tid->ra, + ra_tid->tid); dev_kfree_skb(skb); break; case IEEE80211_ADDBA_MSG: ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - ieee80211_start_tx_ba_cb(local_to_hw(local), - ra_tid->ra, ra_tid->tid); + ieee80211_start_tx_ba_cb(ra_tid->vif, ra_tid->ra, + ra_tid->tid); dev_kfree_skb(skb); break ; default: @@ -315,299 +277,6 @@ static void ieee80211_tasklet_handler(unsigned long data) } } -static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, - struct sta_info *sta, - struct sk_buff *skb) -{ - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - /* - * XXX: This is temporary! - * - * The problem here is that when we get here, the driver will - * quite likely have pretty much overwritten info->control by - * using info->driver_data or info->rate_driver_data. Thus, - * when passing out the frame to the driver again, we would be - * passing completely bogus data since the driver would then - * expect a properly filled info->control. In mac80211 itself - * the same problem occurs, since we need info->control.vif - * internally. - * - * To fix this, we should send the frame through TX processing - * again. However, it's not that simple, since the frame will - * have been software-encrypted (if applicable) already, and - * encrypting it again doesn't do much good. So to properly do - * that, we not only have to skip the actual 'raw' encryption - * (key selection etc. still has to be done!) but also the - * sequence number assignment since that impacts the crypto - * encapsulation, of course. - * - * Hence, for now, fix the bug by just dropping the frame. - */ - goto drop; - - sta->tx_filtered_count++; - - /* - * Clear the TX filter mask for this STA when sending the next - * packet. If the STA went to power save mode, this will happen - * when it wakes up for the next time. - */ - set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT); - - /* - * This code races in the following way: - * - * (1) STA sends frame indicating it will go to sleep and does so - * (2) hardware/firmware adds STA to filter list, passes frame up - * (3) hardware/firmware processes TX fifo and suppresses a frame - * (4) we get TX status before having processed the frame and - * knowing that the STA has gone to sleep. - * - * This is actually quite unlikely even when both those events are - * processed from interrupts coming in quickly after one another or - * even at the same time because we queue both TX status events and - * RX frames to be processed by a tasklet and process them in the - * same order that they were received or TX status last. Hence, there - * is no race as long as the frame RX is processed before the next TX - * status, which drivers can ensure, see below. - * - * Note that this can only happen if the hardware or firmware can - * actually add STAs to the filter list, if this is done by the - * driver in response to set_tim() (which will only reduce the race - * this whole filtering tries to solve, not completely solve it) - * this situation cannot happen. - * - * To completely solve this race drivers need to make sure that they - * (a) don't mix the irq-safe/not irq-safe TX status/RX processing - * functions and - * (b) always process RX events before TX status events if ordering - * can be unknown, for example with different interrupt status - * bits. - */ - if (test_sta_flags(sta, WLAN_STA_PS) && - skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { - skb_queue_tail(&sta->tx_filtered, skb); - return; - } - - if (!test_sta_flags(sta, WLAN_STA_PS) && - !(info->flags & IEEE80211_TX_INTFL_RETRIED)) { - /* Software retry the packet once */ - info->flags |= IEEE80211_TX_INTFL_RETRIED; - ieee80211_add_pending_skb(local, skb); - return; - } - - drop: -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: dropped TX filtered frame, " - "queue_len=%d PS=%d @%lu\n", - wiphy_name(local->hw.wiphy), - skb_queue_len(&sta->tx_filtered), - !!test_sta_flags(sta, WLAN_STA_PS), jiffies); -#endif - dev_kfree_skb(skb); -} - -void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) -{ - struct sk_buff *skb2; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - u16 frag, type; - __le16 fc; - struct ieee80211_supported_band *sband; - struct ieee80211_tx_status_rtap_hdr *rthdr; - struct ieee80211_sub_if_data *sdata; - struct net_device *prev_dev = NULL; - struct sta_info *sta; - int retry_count = -1, i; - - for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - /* the HW cannot have attempted that rate */ - if (i >= hw->max_rates) { - info->status.rates[i].idx = -1; - info->status.rates[i].count = 0; - } - - retry_count += info->status.rates[i].count; - } - if (retry_count < 0) - retry_count = 0; - - rcu_read_lock(); - - sband = local->hw.wiphy->bands[info->band]; - - sta = sta_info_get(local, hdr->addr1); - - if (sta) { - if (!(info->flags & IEEE80211_TX_STAT_ACK) && - test_sta_flags(sta, WLAN_STA_PS)) { - /* - * The STA is in power save mode, so assume - * that this TX packet failed because of that. - */ - ieee80211_handle_filtered_frame(local, sta, skb); - rcu_read_unlock(); - return; - } - - fc = hdr->frame_control; - - if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && - (ieee80211_is_data_qos(fc))) { - u16 tid, ssn; - u8 *qc; - - qc = ieee80211_get_qos_ctl(hdr); - tid = qc[0] & 0xf; - ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10) - & IEEE80211_SCTL_SEQ); - ieee80211_send_bar(sta->sdata, hdr->addr1, - tid, ssn); - } - - if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { - ieee80211_handle_filtered_frame(local, sta, skb); - rcu_read_unlock(); - return; - } else { - if (!(info->flags & IEEE80211_TX_STAT_ACK)) - sta->tx_retry_failed++; - sta->tx_retry_count += retry_count; - } - - rate_control_tx_status(local, sband, sta, skb); - if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - ieee80211s_update_metric(local, sta, skb); - } - - rcu_read_unlock(); - - ieee80211_led_tx(local, 0); - - /* SNMP counters - * Fragments are passed to low-level drivers as separate skbs, so these - * are actually fragments, not frames. Update frame counters only for - * the first fragment of the frame. */ - - frag = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; - type = le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_FTYPE; - - if (info->flags & IEEE80211_TX_STAT_ACK) { - if (frag == 0) { - local->dot11TransmittedFrameCount++; - if (is_multicast_ether_addr(hdr->addr1)) - local->dot11MulticastTransmittedFrameCount++; - if (retry_count > 0) - local->dot11RetryCount++; - if (retry_count > 1) - local->dot11MultipleRetryCount++; - } - - /* This counter shall be incremented for an acknowledged MPDU - * with an individual address in the address 1 field or an MPDU - * with a multicast address in the address 1 field of type Data - * or Management. */ - if (!is_multicast_ether_addr(hdr->addr1) || - type == IEEE80211_FTYPE_DATA || - type == IEEE80211_FTYPE_MGMT) - local->dot11TransmittedFragmentCount++; - } else { - if (frag == 0) - local->dot11FailedCount++; - } - - /* this was a transmitted frame, but now we want to reuse it */ - skb_orphan(skb); - - /* - * This is a bit racy but we can avoid a lot of work - * with this test... - */ - if (!local->monitors && !local->cooked_mntrs) { - dev_kfree_skb(skb); - return; - } - - /* send frame to monitor interfaces now */ - - if (skb_headroom(skb) < sizeof(*rthdr)) { - printk(KERN_ERR "ieee80211_tx_status: headroom too small\n"); - dev_kfree_skb(skb); - return; - } - - rthdr = (struct ieee80211_tx_status_rtap_hdr *) - skb_push(skb, sizeof(*rthdr)); - - memset(rthdr, 0, sizeof(*rthdr)); - rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); - rthdr->hdr.it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | - (1 << IEEE80211_RADIOTAP_DATA_RETRIES) | - (1 << IEEE80211_RADIOTAP_RATE)); - - if (!(info->flags & IEEE80211_TX_STAT_ACK) && - !is_multicast_ether_addr(hdr->addr1)) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); - - /* - * XXX: Once radiotap gets the bitmap reset thing the vendor - * extensions proposal contains, we can actually report - * the whole set of tries we did. - */ - if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || - (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); - else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) - rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); - if (info->status.rates[0].idx >= 0 && - !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) - rthdr->rate = sband->bitrates[ - info->status.rates[0].idx].bitrate / 5; - - /* for now report the total retry_count */ - rthdr->data_retries = retry_count; - - /* XXX: is this sufficient for BPF? */ - skb_set_mac_header(skb, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - memset(skb->cb, 0, sizeof(skb->cb)); - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { - if (!netif_running(sdata->dev)) - continue; - - if (prev_dev) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2) { - skb2->dev = prev_dev; - netif_rx(skb2); - } - } - - prev_dev = sdata->dev; - } - } - if (prev_dev) { - skb->dev = prev_dev; - netif_rx(skb); - skb = NULL; - } - rcu_read_unlock(); - dev_kfree_skb(skb); -} -EXPORT_SYMBOL(ieee80211_tx_status); - static void ieee80211_restart_work(struct work_struct *work) { struct ieee80211_local *local = @@ -659,7 +328,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, if (!wiphy) return NULL; - wiphy->netnsok = true; + wiphy->flags |= WIPHY_FLAG_NETNS_OK | + WIPHY_FLAG_4ADDR_AP | + WIPHY_FLAG_4ADDR_STATION; wiphy->privid = mac80211_wiphy_privid; /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */ @@ -901,6 +572,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) i++; } } + local->int_scan_req->n_channels = i; local->network_latency_notifier.notifier_call = ieee80211_max_network_latency; @@ -923,7 +595,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) fail_wep: sta_info_stop(local); fail_sta_info: - debugfs_hw_del(local); destroy_workqueue(local->workqueue); fail_workqueue: wiphy_unregister(local->hw.wiphy); @@ -959,10 +630,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) ieee80211_clear_tx_pending(local); sta_info_stop(local); rate_control_deinitialize(local); - debugfs_hw_del(local); - if (skb_queue_len(&local->skb_queue) - || skb_queue_len(&local->skb_queue_unreliable)) + if (skb_queue_len(&local->skb_queue) || + skb_queue_len(&local->skb_queue_unreliable)) printk(KERN_WARNING "%s: skb_queue not empty\n", wiphy_name(local->hw.wiphy)); skb_queue_purge(&local->skb_queue); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index f7364e56f1e..6a433142959 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 open80211s Ltd. + * Copyright (c) 2008, 2009 open80211s Ltd. * Authors: Luis Carlos Cobo <luisca@cozybit.com> * Javier Cardona <javier@cozybit.com> * @@ -14,18 +14,14 @@ #define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) +#define IEEE80211_MESH_RANN_INTERVAL (1 * HZ) -#define PP_OFFSET 1 /* Path Selection Protocol */ -#define PM_OFFSET 5 /* Path Selection Metric */ -#define CC_OFFSET 9 /* Congestion Control Mode */ -#define SP_OFFSET 13 /* Synchronization Protocol */ -#define AUTH_OFFSET 17 /* Authentication Protocol */ -#define CAPAB_OFFSET 22 -#define CAPAB_ACCEPT_PLINKS 0x80 -#define CAPAB_FORWARDING 0x10 +#define MESHCONF_CAPAB_ACCEPT_PLINKS 0x01 +#define MESHCONF_CAPAB_FORWARDING 0x08 #define TMR_RUNNING_HK 0 #define TMR_RUNNING_MP 1 +#define TMR_RUNNING_MPR 2 int mesh_allocated; static struct kmem_cache *rm_cache; @@ -50,7 +46,7 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data) struct ieee80211_local *local = sdata->local; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - ifmsh->wrkq_flags |= MESH_WORK_HOUSEKEEPING; + set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); if (local->quiescing) { set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); @@ -85,11 +81,11 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat */ if (ifmsh->mesh_id_len == ie->mesh_id_len && memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && - memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_sp_id, ie->mesh_config + SP_OFFSET, 4) == 0 && - memcmp(ifmsh->mesh_auth_id, ie->mesh_config + AUTH_OFFSET, 4) == 0) + (ifmsh->mesh_pp_id == ie->mesh_config->meshconf_psel) && + (ifmsh->mesh_pm_id == ie->mesh_config->meshconf_pmetric) && + (ifmsh->mesh_cc_id == ie->mesh_config->meshconf_congest) && + (ifmsh->mesh_sp_id == ie->mesh_config->meshconf_synch) && + (ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)) return true; return false; @@ -102,7 +98,8 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat */ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { - return (*(ie->mesh_config + CAPAB_OFFSET) & CAPAB_ACCEPT_PLINKS) != 0; + return (ie->mesh_config->meshconf_cap & + MESHCONF_CAPAB_ACCEPT_PLINKS) != 0; } /** @@ -128,18 +125,11 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_ids_set_default(struct ieee80211_if_mesh *sta) { - u8 oui[3] = {0x00, 0x0F, 0xAC}; - - memcpy(sta->mesh_pp_id, oui, sizeof(oui)); - memcpy(sta->mesh_pm_id, oui, sizeof(oui)); - memcpy(sta->mesh_cc_id, oui, sizeof(oui)); - memcpy(sta->mesh_sp_id, oui, sizeof(oui)); - memcpy(sta->mesh_auth_id, oui, sizeof(oui)); - sta->mesh_pp_id[sizeof(oui)] = 0; - sta->mesh_pm_id[sizeof(oui)] = 0; - sta->mesh_cc_id[sizeof(oui)] = 0xff; - sta->mesh_sp_id[sizeof(oui)] = 0xff; - sta->mesh_auth_id[sizeof(oui)] = 0x0; + sta->mesh_pp_id = 0; /* HWMP */ + sta->mesh_pm_id = 0; /* Airtime */ + sta->mesh_cc_id = 0; /* Disabled */ + sta->mesh_sp_id = 0; /* Neighbor Offset */ + sta->mesh_auth_id = 0; /* Disabled */ } int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) @@ -205,8 +195,8 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, list_del(&p->list); kmem_cache_free(rm_cache, p); --entries; - } else if ((seqnum == p->seqnum) - && (memcmp(sa, p->sa, ETH_ALEN) == 0)) + } else if ((seqnum == p->seqnum) && + (memcmp(sa, p->sa, ETH_ALEN) == 0)) return -1; } @@ -228,6 +218,7 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) struct ieee80211_supported_band *sband; u8 *pos; int len, i, rate; + u8 neighbors; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; len = sband->n_bitrates; @@ -251,46 +242,49 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) } } + if (sband->band == IEEE80211_BAND_2GHZ) { + pos = skb_put(skb, 2 + 1); + *pos++ = WLAN_EID_DS_PARAMS; + *pos++ = 1; + *pos++ = ieee80211_frequency_to_channel(local->hw.conf.channel->center_freq); + } + pos = skb_put(skb, 2 + sdata->u.mesh.mesh_id_len); *pos++ = WLAN_EID_MESH_ID; *pos++ = sdata->u.mesh.mesh_id_len; if (sdata->u.mesh.mesh_id_len) memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len); - pos = skb_put(skb, 2 + IEEE80211_MESH_CONFIG_LEN); + pos = skb_put(skb, 2 + sizeof(struct ieee80211_meshconf_ie)); *pos++ = WLAN_EID_MESH_CONFIG; - *pos++ = IEEE80211_MESH_CONFIG_LEN; - /* Version */ - *pos++ = 1; + *pos++ = sizeof(struct ieee80211_meshconf_ie); /* Active path selection protocol ID */ - memcpy(pos, sdata->u.mesh.mesh_pp_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_pp_id; /* Active path selection metric ID */ - memcpy(pos, sdata->u.mesh.mesh_pm_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_pm_id; /* Congestion control mode identifier */ - memcpy(pos, sdata->u.mesh.mesh_cc_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_cc_id; /* Synchronization protocol identifier */ - memcpy(pos, sdata->u.mesh.mesh_sp_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_sp_id; /* Authentication Protocol identifier */ - memcpy(pos, sdata->u.mesh.mesh_auth_id, 4); - pos += 4; + *pos++ = sdata->u.mesh.mesh_auth_id; - /* Mesh Formation Info */ - memset(pos, 0x00, 1); - pos += 1; + /* Mesh Formation Info - number of neighbors */ + neighbors = atomic_read(&sdata->u.mesh.mshstats.estab_plinks); + /* Number of neighbor mesh STAs or 15 whichever is smaller */ + neighbors = (neighbors > 15) ? 15 : neighbors; + *pos++ = neighbors << 1; /* Mesh capability */ sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata); - *pos = CAPAB_FORWARDING; - *pos++ |= sdata->u.mesh.accepting_plinks ? CAPAB_ACCEPT_PLINKS : 0x00; + *pos = MESHCONF_CAPAB_FORWARDING; + *pos++ |= sdata->u.mesh.accepting_plinks ? + MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; *pos++ = 0x00; return; @@ -355,6 +349,34 @@ static void ieee80211_mesh_path_timer(unsigned long data) ieee80211_queue_work(&local->hw, &ifmsh->work); } +static void ieee80211_mesh_path_root_timer(unsigned long data) +{ + struct ieee80211_sub_if_data *sdata = + (struct ieee80211_sub_if_data *) data; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_local *local = sdata->local; + + set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); + + if (local->quiescing) { + set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running); + return; + } + + ieee80211_queue_work(&local->hw, &ifmsh->work); +} + +void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) +{ + if (ifmsh->mshcfg.dot11MeshHWMPRootMode) + set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); + else { + clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); + /* stop running timer */ + del_timer_sync(&ifmsh->mesh_path_root_timer); + } +} + /** * ieee80211_fill_mesh_addresses - fill addresses of a locally originated mesh frame * @hdr: 802.11 frame header @@ -365,8 +387,9 @@ static void ieee80211_mesh_path_timer(unsigned long data) * * Return the length of the 802.11 (does not include a mesh control header) */ -int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, char - *meshda, char *meshsa) { +int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, + const u8 *meshda, const u8 *meshsa) +{ if (is_multicast_ether_addr(meshda)) { *fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA TA SA */ @@ -404,7 +427,7 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, char *addr5, char *addr6) { int aelen = 0; - memset(meshhdr, 0, sizeof(meshhdr)); + memset(meshhdr, 0, sizeof(*meshhdr)); meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); sdata->u.mesh.mesh_seqnum++; @@ -448,6 +471,15 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); } +static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + mesh_path_tx_root_frame(sdata); + mod_timer(&ifmsh->mesh_path_root_timer, + round_jiffies(jiffies + IEEE80211_MESH_RANN_INTERVAL)); +} + #ifdef CONFIG_PM void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) { @@ -462,6 +494,8 @@ void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); if (del_timer_sync(&ifmsh->mesh_path_timer)) set_bit(TMR_RUNNING_MP, &ifmsh->timers_running); + if (del_timer_sync(&ifmsh->mesh_path_root_timer)) + set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running); } void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) @@ -472,6 +506,9 @@ void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) add_timer(&ifmsh->housekeeping_timer); if (test_and_clear_bit(TMR_RUNNING_MP, &ifmsh->timers_running)) add_timer(&ifmsh->mesh_path_timer); + if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running)) + add_timer(&ifmsh->mesh_path_root_timer); + ieee80211_mesh_root_setup(ifmsh); } #endif @@ -480,7 +517,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_local *local = sdata->local; - ifmsh->wrkq_flags |= MESH_WORK_HOUSEKEEPING; + set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); + ieee80211_mesh_root_setup(ifmsh); ieee80211_queue_work(&local->hw, &ifmsh->work); sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | @@ -491,6 +529,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) { del_timer_sync(&sdata->u.mesh.housekeeping_timer); + del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); /* * If the timer fired while we waited for it, it will have * requeued the work. Now the work will be running again @@ -561,7 +600,7 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status) { switch (mgmt->u.action.category) { - case PLINK_CATEGORY: + case MESH_PLINK_CATEGORY: mesh_rx_plink_frame(sdata, mgmt, len, rx_status); break; case MESH_PATH_SEL_CATEGORY: @@ -628,6 +667,9 @@ static void ieee80211_mesh_work(struct work_struct *work) if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) ieee80211_mesh_housekeeping(sdata, ifmsh); + + if (test_and_clear_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags)) + ieee80211_mesh_rootpath(sdata); } void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) @@ -673,7 +715,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) MESH_MIN_DISCOVERY_TIMEOUT; ifmsh->accepting_plinks = true; ifmsh->preq_id = 0; - ifmsh->dsn = 0; + ifmsh->sn = 0; atomic_set(&ifmsh->mpaths, 0); mesh_rmc_init(sdata); ifmsh->last_preq = jiffies; @@ -684,6 +726,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) setup_timer(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, (unsigned long) sdata); + setup_timer(&ifmsh->mesh_path_root_timer, + ieee80211_mesh_path_root_timer, + (unsigned long) sdata); INIT_LIST_HEAD(&ifmsh->preq_queue.list); spin_lock_init(&ifmsh->mesh_preq_queue_lock); } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index dd1c19319f0..85562c59d7d 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 open80211s Ltd. + * Copyright (c) 2008, 2009 open80211s Ltd. * Authors: Luis Carlos Cobo <luisca@cozybit.com> * Javier Cardona <javier@cozybit.com> * @@ -26,7 +26,7 @@ * * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path - * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence + * @MESH_PATH_SN_VALID: the mesh path contains a valid destination sequence * number * @MESH_PATH_FIXED: the mesh path has been manually set and should not be * modified @@ -38,7 +38,7 @@ enum mesh_path_flags { MESH_PATH_ACTIVE = BIT(0), MESH_PATH_RESOLVING = BIT(1), - MESH_PATH_DSN_VALID = BIT(2), + MESH_PATH_SN_VALID = BIT(2), MESH_PATH_FIXED = BIT(3), MESH_PATH_RESOLVED = BIT(4), }; @@ -53,11 +53,13 @@ enum mesh_path_flags { * to grow. * @MESH_WORK_GROW_MPP_TABLE: the mesh portals table is full and needs to * grow + * @MESH_WORK_ROOT: the mesh root station needs to send a frame */ enum mesh_deferred_task_flags { MESH_WORK_HOUSEKEEPING, MESH_WORK_GROW_MPATH_TABLE, MESH_WORK_GROW_MPP_TABLE, + MESH_WORK_ROOT, }; /** @@ -70,7 +72,7 @@ enum mesh_deferred_task_flags { * @timer: mesh path discovery timer * @frame_queue: pending queue for frames sent to this destination while the * path is unresolved - * @dsn: destination sequence number of the destination + * @sn: target sequence number * @metric: current metric to this destination * @hop_count: hops to destination * @exp_time: in jiffies, when the path will expire or when it expired @@ -94,7 +96,7 @@ struct mesh_path { struct timer_list timer; struct sk_buff_head frame_queue; struct rcu_head rcu; - u32 dsn; + u32 sn; u32 metric; u8 hop_count; unsigned long exp_time; @@ -174,7 +176,7 @@ struct mesh_rmc { #define MESH_CFG_CMP_LEN (IEEE80211_MESH_CONFIG_LEN - 2) /* Default values, timeouts in ms */ -#define MESH_TTL 5 +#define MESH_TTL 31 #define MESH_MAX_RETR 3 #define MESH_RET_T 100 #define MESH_CONF_T 100 @@ -186,8 +188,9 @@ struct mesh_rmc { */ #define MESH_PREQ_MIN_INT 10 #define MESH_DIAM_TRAVERSAL_TIME 50 -/* Paths will be refreshed if they are closer than PATH_REFRESH_TIME to their - * expiration +/* A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds before + * timing out. This way it will remain ACTIVE and no data frames will be + * unnecesarily held in the pending queue. */ #define MESH_PATH_REFRESH_TIME 1000 #define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME) @@ -206,13 +209,19 @@ struct mesh_rmc { #define MESH_MAX_MPATHS 1024 /* Pending ANA approval */ -#define PLINK_CATEGORY 30 +#define MESH_PLINK_CATEGORY 30 #define MESH_PATH_SEL_CATEGORY 32 +#define MESH_PATH_SEL_ACTION 0 + +/* PERR reason codes */ +#define PEER_RCODE_UNSPECIFIED 11 +#define PERR_RCODE_NO_ROUTE 12 +#define PERR_RCODE_DEST_UNREACH 13 /* Public interfaces */ /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, - char *da, char *sa); + const u8 *da, const u8 *sa); int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, struct ieee80211_sub_if_data *sdata, char *addr4, char *addr5, char *addr6); @@ -234,6 +243,7 @@ ieee80211_rx_result ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); +void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); /* Mesh paths */ int mesh_nexthop_lookup(struct sk_buff *skb, @@ -274,8 +284,8 @@ void mesh_mpp_table_grow(void); u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl); /* Mesh paths */ -int mesh_path_error_tx(u8 *dest, __le32 dest_dsn, u8 *ra, - struct ieee80211_sub_if_data *sdata); +int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, __le16 target_rcode, + const u8 *ra, struct ieee80211_sub_if_data *sdata); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); @@ -288,6 +298,7 @@ void mesh_path_discard_frame(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata); void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); void mesh_path_restart(struct ieee80211_sub_if_data *sdata); +void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); extern int mesh_paths_generation; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 29b82e98eff..d28acb6b1f8 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 open80211s Ltd. + * Copyright (c) 2008, 2009 open80211s Ltd. * Author: Luis Carlos Cobo <luisca@cozybit.com> * * This program is free software; you can redistribute it and/or modify @@ -9,6 +9,12 @@ #include "mesh.h" +#ifdef CONFIG_MAC80211_VERBOSE_MHWMP_DEBUG +#define mhwmp_dbg(fmt, args...) printk(KERN_DEBUG "Mesh HWMP: " fmt, ##args) +#else +#define mhwmp_dbg(fmt, args...) do { (void)(0); } while (0) +#endif + #define TEST_FRAME_LEN 8192 #define MAX_METRIC 0xffffffff #define ARITH_SHIFT 8 @@ -21,6 +27,12 @@ #define MP_F_DO 0x1 /* Reply and forward */ #define MP_F_RF 0x2 +/* Unknown Sequence Number */ +#define MP_F_USN 0x01 +/* Reason code Present */ +#define MP_F_RCODE 0x02 + +static void mesh_queue_preq(struct mesh_path *, u8); static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) { @@ -29,6 +41,13 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) return get_unaligned_le32(preq_elem + offset); } +static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) +{ + if (ae) + offset += 6; + return get_unaligned_le16(preq_elem + offset); +} + /* HWMP IE processing macros */ #define AE_F (1<<6) #define AE_F_SET(x) (*x & AE_F) @@ -37,30 +56,33 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) #define PREQ_IE_TTL(x) (*(x + 2)) #define PREQ_IE_PREQ_ID(x) u32_field_get(x, 3, 0) #define PREQ_IE_ORIG_ADDR(x) (x + 7) -#define PREQ_IE_ORIG_DSN(x) u32_field_get(x, 13, 0); +#define PREQ_IE_ORIG_SN(x) u32_field_get(x, 13, 0); #define PREQ_IE_LIFETIME(x) u32_field_get(x, 17, AE_F_SET(x)); #define PREQ_IE_METRIC(x) u32_field_get(x, 21, AE_F_SET(x)); -#define PREQ_IE_DST_F(x) (*(AE_F_SET(x) ? x + 32 : x + 26)) -#define PREQ_IE_DST_ADDR(x) (AE_F_SET(x) ? x + 33 : x + 27) -#define PREQ_IE_DST_DSN(x) u32_field_get(x, 33, AE_F_SET(x)); +#define PREQ_IE_TARGET_F(x) (*(AE_F_SET(x) ? x + 32 : x + 26)) +#define PREQ_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 33 : x + 27) +#define PREQ_IE_TARGET_SN(x) u32_field_get(x, 33, AE_F_SET(x)); #define PREP_IE_FLAGS(x) PREQ_IE_FLAGS(x) #define PREP_IE_HOPCOUNT(x) PREQ_IE_HOPCOUNT(x) #define PREP_IE_TTL(x) PREQ_IE_TTL(x) #define PREP_IE_ORIG_ADDR(x) (x + 3) -#define PREP_IE_ORIG_DSN(x) u32_field_get(x, 9, 0); +#define PREP_IE_ORIG_SN(x) u32_field_get(x, 9, 0); #define PREP_IE_LIFETIME(x) u32_field_get(x, 13, AE_F_SET(x)); #define PREP_IE_METRIC(x) u32_field_get(x, 17, AE_F_SET(x)); -#define PREP_IE_DST_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) -#define PREP_IE_DST_DSN(x) u32_field_get(x, 27, AE_F_SET(x)); +#define PREP_IE_TARGET_ADDR(x) (AE_F_SET(x) ? x + 27 : x + 21) +#define PREP_IE_TARGET_SN(x) u32_field_get(x, 27, AE_F_SET(x)); -#define PERR_IE_DST_ADDR(x) (x + 2) -#define PERR_IE_DST_DSN(x) u32_field_get(x, 8, 0); +#define PERR_IE_TTL(x) (*(x)) +#define PERR_IE_TARGET_FLAGS(x) (*(x + 2)) +#define PERR_IE_TARGET_ADDR(x) (x + 3) +#define PERR_IE_TARGET_SN(x) u32_field_get(x, 9, 0); +#define PERR_IE_TARGET_RCODE(x) u16_field_get(x, 13, 0); #define MSEC_TO_TU(x) (x*1000/1024) -#define DSN_GT(x, y) ((long) (y) - (long) (x) < 0) -#define DSN_LT(x, y) ((long) (x) - (long) (y) < 0) +#define SN_GT(x, y) ((long) (y) - (long) (x) < 0) +#define SN_LT(x, y) ((long) (x) - (long) (y) < 0) #define net_traversal_jiffies(s) \ msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime) @@ -75,13 +97,17 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) enum mpath_frame_type { MPATH_PREQ = 0, MPATH_PREP, - MPATH_PERR + MPATH_PERR, + MPATH_RANN }; +static const u8 broadcast_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, - u8 *orig_addr, __le32 orig_dsn, u8 dst_flags, u8 *dst, - __le32 dst_dsn, u8 *da, u8 hop_count, u8 ttl, __le32 lifetime, - __le32 metric, __le32 preq_id, struct ieee80211_sub_if_data *sdata) + u8 *orig_addr, __le32 orig_sn, u8 target_flags, u8 *target, + __le32 target_sn, const u8 *da, u8 hop_count, u8 ttl, + __le32 lifetime, __le32 metric, __le32 preq_id, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); @@ -103,21 +129,30 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - /* BSSID is left zeroed, wildcard value */ + /* BSSID == SA */ + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; - mgmt->u.action.u.mesh_action.action_code = action; + mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; switch (action) { case MPATH_PREQ: + mhwmp_dbg("sending PREQ to %pM\n", target); ie_len = 37; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREQ; break; case MPATH_PREP: + mhwmp_dbg("sending PREP to %pM\n", target); ie_len = 31; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREP; break; + case MPATH_RANN: + mhwmp_dbg("sending RANN from %pM\n", orig_addr); + ie_len = sizeof(struct ieee80211_rann_ie); + pos = skb_put(skb, 2 + ie_len); + *pos++ = WLAN_EID_RANN; + break; default: kfree_skb(skb); return -ENOTSUPP; @@ -133,34 +168,40 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, } memcpy(pos, orig_addr, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &orig_dsn, 4); - pos += 4; - memcpy(pos, &lifetime, 4); + memcpy(pos, &orig_sn, 4); pos += 4; + if (action != MPATH_RANN) { + memcpy(pos, &lifetime, 4); + pos += 4; + } memcpy(pos, &metric, 4); pos += 4; if (action == MPATH_PREQ) { /* destination count */ *pos++ = 1; - *pos++ = dst_flags; + *pos++ = target_flags; + } + if (action != MPATH_RANN) { + memcpy(pos, target, ETH_ALEN); + pos += ETH_ALEN; + memcpy(pos, &target_sn, 4); } - memcpy(pos, dst, ETH_ALEN); - pos += ETH_ALEN; - memcpy(pos, &dst_dsn, 4); - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); return 0; } /** * mesh_send_path error - Sends a PERR mesh management frame * - * @dst: broken destination - * @dst_dsn: dsn of the broken destination + * @target: broken destination + * @target_sn: SN of the broken destination + * @target_rcode: reason code for this PERR * @ra: node this frame is addressed to */ -int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, - struct ieee80211_sub_if_data *sdata) +int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, + __le16 target_rcode, const u8 *ra, + struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); @@ -184,20 +225,32 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; - mgmt->u.action.u.mesh_action.action_code = MPATH_PERR; - ie_len = 12; + mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; + ie_len = 15; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PERR; *pos++ = ie_len; - /* mode flags, reserved */ - *pos++ = 0; + /* ttl */ + *pos++ = MESH_TTL; /* number of destinations */ *pos++ = 1; - memcpy(pos, dst, ETH_ALEN); + /* + * flags bit, bit 1 is unset if we know the sequence number and + * bit 2 is set if we have a reason code + */ + *pos = 0; + if (!target_sn) + *pos |= MP_F_USN; + if (target_rcode) + *pos |= MP_F_RCODE; + pos++; + memcpy(pos, target, ETH_ALEN); pos += ETH_ALEN; - memcpy(pos, &dst_dsn, 4); + memcpy(pos, &target_sn, 4); + pos += 4; + memcpy(pos, &target_rcode, 2); - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); return 0; } @@ -269,18 +322,17 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, */ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, - u8 *hwmp_ie) + u8 *hwmp_ie, enum mpath_frame_type action) { struct ieee80211_local *local = sdata->local; struct mesh_path *mpath; struct sta_info *sta; bool fresh_info; u8 *orig_addr, *ta; - u32 orig_dsn, orig_metric; + u32 orig_sn, orig_metric; unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; bool process = true; - u8 action = mgmt->u.action.u.mesh_action.action_code; rcu_read_lock(); sta = sta_info_get(local, mgmt->sa); @@ -296,7 +348,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, switch (action) { case MPATH_PREQ: orig_addr = PREQ_IE_ORIG_ADDR(hwmp_ie); - orig_dsn = PREQ_IE_ORIG_DSN(hwmp_ie); + orig_sn = PREQ_IE_ORIG_SN(hwmp_ie); orig_lifetime = PREQ_IE_LIFETIME(hwmp_ie); orig_metric = PREQ_IE_METRIC(hwmp_ie); break; @@ -309,7 +361,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, * information from both PREQ and PREP frames. */ orig_addr = PREP_IE_ORIG_ADDR(hwmp_ie); - orig_dsn = PREP_IE_ORIG_DSN(hwmp_ie); + orig_sn = PREP_IE_ORIG_SN(hwmp_ie); orig_lifetime = PREP_IE_LIFETIME(hwmp_ie); orig_metric = PREP_IE_METRIC(hwmp_ie); break; @@ -335,9 +387,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (mpath->flags & MESH_PATH_FIXED) fresh_info = false; else if ((mpath->flags & MESH_PATH_ACTIVE) && - (mpath->flags & MESH_PATH_DSN_VALID)) { - if (DSN_GT(mpath->dsn, orig_dsn) || - (mpath->dsn == orig_dsn && + (mpath->flags & MESH_PATH_SN_VALID)) { + if (SN_GT(mpath->sn, orig_sn) || + (mpath->sn == orig_sn && action == MPATH_PREQ && new_metric > mpath->metric)) { process = false; @@ -356,9 +408,9 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (fresh_info) { mesh_path_assign_nexthop(mpath, sta); - mpath->flags |= MESH_PATH_DSN_VALID; + mpath->flags |= MESH_PATH_SN_VALID; mpath->metric = new_metric; - mpath->dsn = orig_dsn; + mpath->sn = orig_sn; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; mesh_path_activate(mpath); @@ -397,7 +449,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (fresh_info) { mesh_path_assign_nexthop(mpath, sta); - mpath->flags &= ~MESH_PATH_DSN_VALID; + mpath->flags &= ~MESH_PATH_SN_VALID; mpath->metric = last_hop_metric; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; @@ -419,44 +471,47 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; - u8 *dst_addr, *orig_addr; - u8 dst_flags, ttl; - u32 orig_dsn, dst_dsn, lifetime; + u8 *target_addr, *orig_addr; + u8 target_flags, ttl; + u32 orig_sn, target_sn, lifetime; bool reply = false; bool forward = true; - /* Update destination DSN, if present */ - dst_addr = PREQ_IE_DST_ADDR(preq_elem); + /* Update target SN, if present */ + target_addr = PREQ_IE_TARGET_ADDR(preq_elem); orig_addr = PREQ_IE_ORIG_ADDR(preq_elem); - dst_dsn = PREQ_IE_DST_DSN(preq_elem); - orig_dsn = PREQ_IE_ORIG_DSN(preq_elem); - dst_flags = PREQ_IE_DST_F(preq_elem); + target_sn = PREQ_IE_TARGET_SN(preq_elem); + orig_sn = PREQ_IE_ORIG_SN(preq_elem); + target_flags = PREQ_IE_TARGET_F(preq_elem); - if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { + mhwmp_dbg("received PREQ from %pM\n", orig_addr); + + if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { + mhwmp_dbg("PREQ is for us\n"); forward = false; reply = true; metric = 0; - if (time_after(jiffies, ifmsh->last_dsn_update + + if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifmsh->last_dsn_update)) { - dst_dsn = ++ifmsh->dsn; - ifmsh->last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_sn_update)) { + target_sn = ++ifmsh->sn; + ifmsh->last_sn_update = jiffies; } } else { rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (mpath) { - if ((!(mpath->flags & MESH_PATH_DSN_VALID)) || - DSN_LT(mpath->dsn, dst_dsn)) { - mpath->dsn = dst_dsn; - mpath->flags |= MESH_PATH_DSN_VALID; - } else if ((!(dst_flags & MP_F_DO)) && + if ((!(mpath->flags & MESH_PATH_SN_VALID)) || + SN_LT(mpath->sn, target_sn)) { + mpath->sn = target_sn; + mpath->flags |= MESH_PATH_SN_VALID; + } else if ((!(target_flags & MP_F_DO)) && (mpath->flags & MESH_PATH_ACTIVE)) { reply = true; metric = mpath->metric; - dst_dsn = mpath->dsn; - if (dst_flags & MP_F_RF) - dst_flags |= MP_F_DO; + target_sn = mpath->sn; + if (target_flags & MP_F_RF) + target_flags |= MP_F_DO; else forward = false; } @@ -467,13 +522,14 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (reply) { lifetime = PREQ_IE_LIFETIME(preq_elem); ttl = ifmsh->mshcfg.dot11MeshTTL; - if (ttl != 0) - mesh_path_sel_frame_tx(MPATH_PREP, 0, dst_addr, - cpu_to_le32(dst_dsn), 0, orig_addr, - cpu_to_le32(orig_dsn), mgmt->sa, 0, ttl, + if (ttl != 0) { + mhwmp_dbg("replying to the PREQ\n"); + mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr, + cpu_to_le32(target_sn), 0, orig_addr, + cpu_to_le32(orig_sn), mgmt->sa, 0, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), 0, sdata); - else + } else ifmsh->mshstats.dropped_frames_ttl++; } @@ -487,13 +543,14 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, ifmsh->mshstats.dropped_frames_ttl++; return; } + mhwmp_dbg("forwarding the PREQ from %pM\n", orig_addr); --ttl; flags = PREQ_IE_FLAGS(preq_elem); preq_id = PREQ_IE_PREQ_ID(preq_elem); hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1; mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr, - cpu_to_le32(orig_dsn), dst_flags, dst_addr, - cpu_to_le32(dst_dsn), sdata->dev->broadcast, + cpu_to_le32(orig_sn), target_flags, target_addr, + cpu_to_le32(target_sn), broadcast_addr, hopcount, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), cpu_to_le32(preq_id), sdata); @@ -508,10 +565,12 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, u8 *prep_elem, u32 metric) { struct mesh_path *mpath; - u8 *dst_addr, *orig_addr; + u8 *target_addr, *orig_addr; u8 ttl, hopcount, flags; u8 next_hop[ETH_ALEN]; - u32 dst_dsn, orig_dsn, lifetime; + u32 target_sn, orig_sn, lifetime; + + mhwmp_dbg("received PREP from %pM\n", PREP_IE_ORIG_ADDR(prep_elem)); /* Note that we divert from the draft nomenclature and denominate * destination to what the draft refers to as origininator. So in this @@ -519,8 +578,8 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, * which corresponds with the originator of the PREQ which this PREP * replies */ - dst_addr = PREP_IE_DST_ADDR(prep_elem); - if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) + target_addr = PREP_IE_TARGET_ADDR(prep_elem); + if (memcmp(target_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) /* destination, no forwarding required */ return; @@ -531,7 +590,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, } rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (mpath) spin_lock_bh(&mpath->state_lock); else @@ -547,13 +606,13 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, lifetime = PREP_IE_LIFETIME(prep_elem); hopcount = PREP_IE_HOPCOUNT(prep_elem) + 1; orig_addr = PREP_IE_ORIG_ADDR(prep_elem); - dst_dsn = PREP_IE_DST_DSN(prep_elem); - orig_dsn = PREP_IE_ORIG_DSN(prep_elem); + target_sn = PREP_IE_TARGET_SN(prep_elem); + orig_sn = PREP_IE_ORIG_SN(prep_elem); mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, - cpu_to_le32(orig_dsn), 0, dst_addr, - cpu_to_le32(dst_dsn), mpath->next_hop->sta.addr, hopcount, ttl, - cpu_to_le32(lifetime), cpu_to_le32(metric), + cpu_to_le32(orig_sn), 0, target_addr, + cpu_to_le32(target_sn), mpath->next_hop->sta.addr, hopcount, + ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), 0, sdata); rcu_read_unlock(); @@ -570,32 +629,96 @@ fail: static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *perr_elem) { + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; - u8 *ta, *dst_addr; - u32 dst_dsn; + u8 ttl; + u8 *ta, *target_addr; + u8 target_flags; + u32 target_sn; + u16 target_rcode; ta = mgmt->sa; - dst_addr = PERR_IE_DST_ADDR(perr_elem); - dst_dsn = PERR_IE_DST_DSN(perr_elem); + ttl = PERR_IE_TTL(perr_elem); + if (ttl <= 1) { + ifmsh->mshstats.dropped_frames_ttl++; + return; + } + ttl--; + target_flags = PERR_IE_TARGET_FLAGS(perr_elem); + target_addr = PERR_IE_TARGET_ADDR(perr_elem); + target_sn = PERR_IE_TARGET_SN(perr_elem); + target_rcode = PERR_IE_TARGET_RCODE(perr_elem); + rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_ACTIVE && memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 && - (!(mpath->flags & MESH_PATH_DSN_VALID) || - DSN_GT(dst_dsn, mpath->dsn))) { + (!(mpath->flags & MESH_PATH_SN_VALID) || + SN_GT(target_sn, mpath->sn))) { mpath->flags &= ~MESH_PATH_ACTIVE; - mpath->dsn = dst_dsn; + mpath->sn = target_sn; spin_unlock_bh(&mpath->state_lock); - mesh_path_error_tx(dst_addr, cpu_to_le32(dst_dsn), - sdata->dev->broadcast, sdata); + mesh_path_error_tx(ttl, target_addr, cpu_to_le32(target_sn), + cpu_to_le16(target_rcode), + broadcast_addr, sdata); } else spin_unlock_bh(&mpath->state_lock); } rcu_read_unlock(); } +static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + struct ieee80211_rann_ie *rann) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct mesh_path *mpath; + u8 *ta; + u8 ttl, flags, hopcount; + u8 *orig_addr; + u32 orig_sn, metric; + + ta = mgmt->sa; + ttl = rann->rann_ttl; + if (ttl <= 1) { + ifmsh->mshstats.dropped_frames_ttl++; + return; + } + ttl--; + flags = rann->rann_flags; + orig_addr = rann->rann_addr; + orig_sn = rann->rann_seq; + hopcount = rann->rann_hopcount; + hopcount++; + metric = rann->rann_metric; + mhwmp_dbg("received RANN from %pM\n", orig_addr); + + rcu_read_lock(); + mpath = mesh_path_lookup(orig_addr, sdata); + if (!mpath) { + mesh_path_add(orig_addr, sdata); + mpath = mesh_path_lookup(orig_addr, sdata); + if (!mpath) { + rcu_read_unlock(); + sdata->u.mesh.mshstats.dropped_frames_no_route++; + return; + } + mesh_queue_preq(mpath, + PREQ_Q_F_START | PREQ_Q_F_REFRESH); + } + if (mpath->sn < orig_sn) { + mesh_path_sel_frame_tx(MPATH_RANN, flags, orig_addr, + cpu_to_le32(orig_sn), + 0, NULL, 0, broadcast_addr, + hopcount, ttl, 0, + cpu_to_le32(metric + mpath->metric), + 0, sdata); + mpath->sn = orig_sn; + } + rcu_read_unlock(); +} void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, @@ -614,34 +737,34 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, len - baselen, &elems); - switch (mgmt->u.action.u.mesh_action.action_code) { - case MPATH_PREQ: - if (!elems.preq || elems.preq_len != 37) + if (elems.preq) { + if (elems.preq_len != 37) /* Right now we support just 1 destination and no AE */ return; - last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.preq); - if (!last_hop_metric) - return; - hwmp_preq_frame_process(sdata, mgmt, elems.preq, last_hop_metric); - break; - case MPATH_PREP: - if (!elems.prep || elems.prep_len != 31) + last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.preq, + MPATH_PREQ); + if (last_hop_metric) + hwmp_preq_frame_process(sdata, mgmt, elems.preq, + last_hop_metric); + } + if (elems.prep) { + if (elems.prep_len != 31) /* Right now we support no AE */ return; - last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.prep); - if (!last_hop_metric) - return; - hwmp_prep_frame_process(sdata, mgmt, elems.prep, last_hop_metric); - break; - case MPATH_PERR: - if (!elems.perr || elems.perr_len != 12) + last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.prep, + MPATH_PREP); + if (last_hop_metric) + hwmp_prep_frame_process(sdata, mgmt, elems.prep, + last_hop_metric); + } + if (elems.perr) { + if (elems.perr_len != 15) /* Right now we support only one destination per PERR */ return; hwmp_perr_frame_process(sdata, mgmt, elems.perr); - default: - return; } - + if (elems.rann) + hwmp_rann_frame_process(sdata, mgmt, elems.rann); } /** @@ -661,7 +784,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_ATOMIC); if (!preq_node) { - printk(KERN_DEBUG "Mesh HWMP: could not allocate PREQ node\n"); + mhwmp_dbg("could not allocate PREQ node\n"); return; } @@ -670,7 +793,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) spin_unlock(&ifmsh->mesh_preq_queue_lock); kfree(preq_node); if (printk_ratelimit()) - printk(KERN_DEBUG "Mesh HWMP: PREQ node queue full\n"); + mhwmp_dbg("PREQ node queue full\n"); return; } @@ -705,7 +828,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; struct mesh_path *mpath; - u8 ttl, dst_flags; + u8 ttl, target_flags; u32 lifetime; spin_lock_bh(&ifmsh->mesh_preq_queue_lock); @@ -747,11 +870,11 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) ifmsh->last_preq = jiffies; - if (time_after(jiffies, ifmsh->last_dsn_update + + if (time_after(jiffies, ifmsh->last_sn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifmsh->last_dsn_update)) { - ++ifmsh->dsn; - sdata->u.mesh.last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_sn_update)) { + ++ifmsh->sn; + sdata->u.mesh.last_sn_update = jiffies; } lifetime = default_lifetime(sdata); ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; @@ -762,14 +885,14 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) } if (preq_node->flags & PREQ_Q_F_REFRESH) - dst_flags = MP_F_DO; + target_flags = MP_F_DO; else - dst_flags = MP_F_RF; + target_flags = MP_F_RF; spin_unlock_bh(&mpath->state_lock); mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr, - cpu_to_le32(ifmsh->dsn), dst_flags, mpath->dst, - cpu_to_le32(mpath->dsn), sdata->dev->broadcast, 0, + cpu_to_le32(ifmsh->sn), target_flags, mpath->dst, + cpu_to_le32(mpath->sn), broadcast_addr, 0, ttl, cpu_to_le32(lifetime), 0, cpu_to_le32(ifmsh->preq_id++), sdata); mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); @@ -796,15 +919,15 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct sk_buff *skb_to_free = NULL; struct mesh_path *mpath; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u8 *dst_addr = hdr->addr3; + u8 *target_addr = hdr->addr3; int err = 0; rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (!mpath) { - mesh_path_add(dst_addr, sdata); - mpath = mesh_path_lookup(dst_addr, sdata); + mesh_path_add(target_addr, sdata); + mpath = mesh_path_lookup(target_addr, sdata); if (!mpath) { sdata->u.mesh.mshstats.dropped_frames_no_route++; err = -ENOSPC; @@ -813,17 +936,16 @@ int mesh_nexthop_lookup(struct sk_buff *skb, } if (mpath->flags & MESH_PATH_ACTIVE) { - if (time_after(jiffies, mpath->exp_time + - msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) - && !memcmp(sdata->dev->dev_addr, hdr->addr4, - ETH_ALEN) - && !(mpath->flags & MESH_PATH_RESOLVING) - && !(mpath->flags & MESH_PATH_FIXED)) { + if (time_after(jiffies, + mpath->exp_time - + msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) && + !memcmp(sdata->dev->dev_addr, hdr->addr4, ETH_ALEN) && + !(mpath->flags & MESH_PATH_RESOLVING) && + !(mpath->flags & MESH_PATH_FIXED)) { mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); } - memcpy(hdr->addr1, mpath->next_hop->sta.addr, - ETH_ALEN); + memcpy(hdr->addr1, mpath->next_hop->sta.addr, ETH_ALEN); } else { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (!(mpath->flags & MESH_PATH_RESOLVING)) { @@ -882,3 +1004,14 @@ void mesh_path_timer(unsigned long data) endmpathtimer: rcu_read_unlock(); } + +void +mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->dev->dev_addr, + cpu_to_le32(++ifmsh->sn), + 0, NULL, 0, broadcast_addr, + 0, MESH_TTL, 0, 0, 0, sdata); +} diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 751c4d0e2b3..0192cfdacae 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 open80211s Ltd. + * Copyright (c) 2008, 2009 open80211s Ltd. * Author: Luis Carlos Cobo <luisca@cozybit.com> * * This program is free software; you can redistribute it and/or modify @@ -244,7 +244,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data * @addr: destination address of the path (ETH_ALEN length) * @sdata: local subif * - * Returns: 0 on sucess + * Returns: 0 on success * * State: the initial state of the new path is set to 0 */ @@ -449,6 +449,7 @@ err_path_alloc: */ void mesh_plink_broken(struct sta_info *sta) { + static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; struct mpath_node *node; struct hlist_node *p; @@ -463,11 +464,12 @@ void mesh_plink_broken(struct sta_info *sta) mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { mpath->flags &= ~MESH_PATH_ACTIVE; - ++mpath->dsn; + ++mpath->sn; spin_unlock_bh(&mpath->state_lock); - mesh_path_error_tx(mpath->dst, - cpu_to_le32(mpath->dsn), - sdata->dev->broadcast, sdata); + mesh_path_error_tx(MESH_TTL, mpath->dst, + cpu_to_le32(mpath->sn), + cpu_to_le16(PERR_RCODE_DEST_UNREACH), + bcast, sdata); } else spin_unlock_bh(&mpath->state_lock); } @@ -530,7 +532,7 @@ static void mesh_path_node_reclaim(struct rcu_head *rp) * @addr: dst address (ETH_ALEN length) * @sdata: local subif * - * Returns: 0 if succesful + * Returns: 0 if successful */ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) { @@ -601,7 +603,7 @@ void mesh_path_discard_frame(struct sk_buff *skb, { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct mesh_path *mpath; - u32 dsn = 0; + u32 sn = 0; if (memcmp(hdr->addr4, sdata->dev->dev_addr, ETH_ALEN) != 0) { u8 *ra, *da; @@ -610,8 +612,9 @@ void mesh_path_discard_frame(struct sk_buff *skb, ra = hdr->addr1; mpath = mesh_path_lookup(da, sdata); if (mpath) - dsn = ++mpath->dsn; - mesh_path_error_tx(skb->data, cpu_to_le32(dsn), ra, sdata); + sn = ++mpath->sn; + mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(sn), + cpu_to_le16(PERR_RCODE_NO_ROUTE), ra, sdata); } kfree_skb(skb); @@ -646,7 +649,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) { spin_lock_bh(&mpath->state_lock); mesh_path_assign_nexthop(mpath, next_hop); - mpath->dsn = 0xffff; + mpath->sn = 0xffff; mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index ffcbad75e09..0f7c6e6a424 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 open80211s Ltd. + * Copyright (c) 2008, 2009 open80211s Ltd. * Author: Luis Carlos Cobo <luisca@cozybit.com> * * This program is free software; you can redistribute it and/or modify @@ -18,9 +18,8 @@ #define mpl_dbg(fmt, args...) do { (void)(0); } while (0) #endif -#define PLINK_GET_FRAME_SUBTYPE(p) (p) -#define PLINK_GET_LLID(p) (p + 1) -#define PLINK_GET_PLID(p) (p + 3) +#define PLINK_GET_LLID(p) (p + 4) +#define PLINK_GET_PLID(p) (p + 6) #define mod_plink_timer(s, t) (mod_timer(&s->plink_timer, \ jiffies + HZ * t / 1000)) @@ -65,6 +64,7 @@ void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } static inline @@ -72,12 +72,13 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON); } /** * mesh_plink_fsm_restart - restart a mesh peer link finite state machine * - * @sta: mes peer link to restart + * @sta: mesh peer link to restart * * Locking: this function must be called holding sta->lock */ @@ -152,6 +153,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); struct ieee80211_mgmt *mgmt; bool include_plid = false; + static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A }; u8 *pos; int ie_len; @@ -169,7 +171,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ - mgmt->u.action.category = PLINK_CATEGORY; + mgmt->u.action.category = MESH_PLINK_CATEGORY; mgmt->u.action.u.plink_action.action_code = action; if (action == PLINK_CLOSE) @@ -179,7 +181,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (action == PLINK_CONFIRM) { pos = skb_put(skb, 4); /* two-byte status code followed by two-byte AID */ - memset(pos, 0, 4); + memset(pos, 0, 2); + memcpy(pos + 2, &plid, 2); } mesh_mgmt_ies_add(skb, sdata); } @@ -187,18 +190,18 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, /* Add Peer Link Management element */ switch (action) { case PLINK_OPEN: - ie_len = 3; + ie_len = 6; break; case PLINK_CONFIRM: - ie_len = 5; + ie_len = 8; include_plid = true; break; case PLINK_CLOSE: default: if (!plid) - ie_len = 5; + ie_len = 8; else { - ie_len = 7; + ie_len = 10; include_plid = true; } break; @@ -207,7 +210,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PEER_LINK; *pos++ = ie_len; - *pos++ = action; + memcpy(pos, meshpeeringproto, sizeof(meshpeeringproto)); + pos += 4; memcpy(pos, &llid, 2); if (include_plid) { pos += 2; @@ -218,7 +222,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, memcpy(pos, &reason, 2); } - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); return 0; } @@ -395,6 +399,17 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m u8 ie_len; u8 *baseaddr; __le16 plid, llid, reason; +#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG + static const char *mplstates[] = { + [PLINK_LISTEN] = "LISTEN", + [PLINK_OPN_SNT] = "OPN-SNT", + [PLINK_OPN_RCVD] = "OPN-RCVD", + [PLINK_CNF_RCVD] = "CNF_RCVD", + [PLINK_ESTAB] = "ESTAB", + [PLINK_HOLDING] = "HOLDING", + [PLINK_BLOCKED] = "BLOCKED" + }; +#endif /* need action_code, aux */ if (len < IEEE80211_MIN_ACTION_SIZE + 3) @@ -417,12 +432,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m return; } - ftype = *((u8 *)PLINK_GET_FRAME_SUBTYPE(elems.peer_link)); + ftype = mgmt->u.action.u.plink_action.action_code; ie_len = elems.peer_link_len; - if ((ftype == PLINK_OPEN && ie_len != 3) || - (ftype == PLINK_CONFIRM && ie_len != 5) || - (ftype == PLINK_CLOSE && ie_len != 5 && ie_len != 7)) { - mpl_dbg("Mesh plink: incorrect plink ie length\n"); + if ((ftype == PLINK_OPEN && ie_len != 6) || + (ftype == PLINK_CONFIRM && ie_len != 8) || + (ftype == PLINK_CLOSE && ie_len != 8 && ie_len != 10)) { + mpl_dbg("Mesh plink: incorrect plink ie length %d %d\n", + ftype, ie_len); return; } @@ -434,7 +450,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m * from the point of view of this host. */ memcpy(&plid, PLINK_GET_LLID(elems.peer_link), 2); - if (ftype == PLINK_CONFIRM || (ftype == PLINK_CLOSE && ie_len == 7)) + if (ftype == PLINK_CONFIRM || (ftype == PLINK_CLOSE && ie_len == 10)) memcpy(&llid, PLINK_GET_PLID(elems.peer_link), 2); rcu_read_lock(); @@ -532,8 +548,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } } - mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %d %d %d %d\n", - mgmt->sa, sta->plink_state, + mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", + mgmt->sa, mplstates[sta->plink_state], le16_to_cpu(sta->llid), le16_to_cpu(sta->plid), event); reason = 0; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dc5049d58c5..d8d50fb5e82 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -426,7 +426,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); } - ieee80211_tx_skb(sdata, skb, 0); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } @@ -458,10 +459,18 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, mgmt->u.deauth.reason_code = cpu_to_le16(reason); if (stype == IEEE80211_STYPE_DEAUTH) - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len, cookie); + if (cookie) + __cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); + else + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); else - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len, cookie); - ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED); + if (cookie) + __cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); + else + cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); + if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } void ieee80211_send_pspoll(struct ieee80211_local *local, @@ -492,7 +501,8 @@ void ieee80211_send_pspoll(struct ieee80211_local *local, memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN); memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN); - ieee80211_tx_skb(sdata, skb, 0); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } void ieee80211_send_nullfunc(struct ieee80211_local *local, @@ -525,7 +535,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN); - ieee80211_tx_skb(sdata, skb, 0); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } /* spectrum management related things */ @@ -923,7 +934,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps(local, -1); mutex_unlock(&local->iflist_mtx); - netif_tx_start_all_queues(sdata->dev); + netif_start_queue(sdata->dev); netif_carrier_on(sdata->dev); } @@ -1055,7 +1066,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, * time -- we don't want the scan code to enable queues. */ - netif_tx_stop_all_queues(sdata->dev); + netif_stop_queue(sdata->dev); netif_carrier_off(sdata->dev); rcu_read_lock(); @@ -1072,8 +1083,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_wmm_default(sdata); - ieee80211_recalc_idle(local); - /* channel(_type) changes are handled by ieee80211_hw_config */ local->oper_channel_type = NL80211_CHAN_NO_HT; @@ -1359,6 +1368,7 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, if (!wk) { ieee80211_set_disassoc(sdata, true); + ieee80211_recalc_idle(sdata->local); } else { list_del(&wk->list); kfree(wk); @@ -1392,6 +1402,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, sdata->dev->name, mgmt->sa, reason_code); ieee80211_set_disassoc(sdata, false); + ieee80211_recalc_idle(sdata->local); return RX_MGMT_CFG80211_DISASSOC; } @@ -1892,7 +1903,6 @@ ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, fc = le16_to_cpu(mgmt->frame_control); switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_REQ: case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_BEACON: case IEEE80211_STYPE_AUTH: @@ -1958,12 +1968,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, /* no action */ break; case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len, - NULL); + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); break; case RX_MGMT_CFG80211_DISASSOC: - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len, - NULL); + cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); break; default: WARN(1, "unexpected: %d", rma); @@ -2018,7 +2026,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, skb->len); break; case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len, NULL); + cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); break; default: WARN(1, "unexpected: %d", rma); @@ -2109,6 +2117,7 @@ static void ieee80211_sta_work(struct work_struct *work) " after %dms, disconnecting.\n", bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); ieee80211_set_disassoc(sdata, true); + ieee80211_recalc_idle(local); mutex_unlock(&ifmgd->mtx); /* * must be outside lock due to cfg80211, @@ -2500,6 +2509,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_work *wk; const u8 *bssid = NULL; + bool not_auth_yet = false; mutex_lock(&ifmgd->mtx); @@ -2509,6 +2519,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, } else list_for_each_entry(wk, &ifmgd->work_list, list) { if (&wk->bss->cbss == req->bss) { bssid = req->bss->bssid; + if (wk->state == IEEE80211_MGD_STATE_PROBE) + not_auth_yet = true; list_del(&wk->list); kfree(wk); break; @@ -2516,6 +2528,20 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, } /* + * If somebody requests authentication and we haven't + * sent out an auth frame yet there's no need to send + * out a deauth frame either. If the state was PROBE, + * then this is the case. If it's AUTH we have sent a + * frame, and if it's IDLE we have completed the auth + * process already. + */ + if (not_auth_yet) { + mutex_unlock(&ifmgd->mtx); + __cfg80211_auth_canceled(sdata->dev, bssid); + return 0; + } + + /* * cfg80211 should catch this ... but it's racy since * we can receive a deauth frame, process it, hand it * to cfg80211 while that's in a locked section already @@ -2535,6 +2561,8 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, IEEE80211_STYPE_DEAUTH, req->reason_code, cookie); + ieee80211_recalc_idle(sdata->local); + return 0; } @@ -2567,5 +2595,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_send_deauth_disassoc(sdata, req->bss->bssid, IEEE80211_STYPE_DISASSOC, req->reason_code, cookie); + + ieee80211_recalc_idle(sdata->local); + return 0; } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index b33efc4fc26..b9007f80cb9 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -163,8 +163,7 @@ struct rate_control_ref *rate_control_alloc(const char *name, #ifdef CONFIG_MAC80211_DEBUGFS debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); local->debugfs.rcdir = debugfsdir; - local->debugfs.rcname = debugfs_create_file("name", 0400, debugfsdir, - ref, &rcname_ops); + debugfs_create_file("name", 0400, debugfsdir, ref, &rcname_ops); #endif ref->priv = ref->ops->alloc(&local->hw, debugfsdir); @@ -188,9 +187,7 @@ static void rate_control_release(struct kref *kref) ctrl_ref->ops->free(ctrl_ref->priv); #ifdef CONFIG_MAC80211_DEBUGFS - debugfs_remove(ctrl_ref->local->debugfs.rcname); - ctrl_ref->local->debugfs.rcname = NULL; - debugfs_remove(ctrl_ref->local->debugfs.rcdir); + debugfs_remove_recursive(ctrl_ref->local->debugfs.rcdir); ctrl_ref->local->debugfs.rcdir = NULL; #endif @@ -287,9 +284,16 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, struct rate_control_ref *ref, *old; ASSERT_RTNL(); + if (local->open_count) return -EBUSY; + if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) { + if (WARN_ON(!local->ops->set_rts_threshold)) + return -EINVAL; + return 0; + } + ref = rate_control_alloc(name, local); if (!ref) { printk(KERN_WARNING "%s: Failed to select rate control " @@ -308,7 +312,6 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, "algorithm '%s'\n", wiphy_name(local->hw.wiphy), ref->ops->name); - return 0; } @@ -317,6 +320,10 @@ void rate_control_deinitialize(struct ieee80211_local *local) struct rate_control_ref *ref; ref = local->rate_ctrl; + + if (!ref) + return; + local->rate_ctrl = NULL; rate_control_put(ref); } diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 2ab5ad9e71c..cb9bd1f65e2 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -59,6 +59,9 @@ static inline void rate_control_rate_init(struct sta_info *sta) void *priv_sta = sta->rate_ctrl_priv; struct ieee80211_supported_band *sband; + if (!ref) + return; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; ref->ops->rate_init(ref->priv, sband, ista, priv_sta); @@ -72,7 +75,7 @@ static inline void rate_control_rate_update(struct ieee80211_local *local, struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; - if (ref->ops->rate_update) + if (ref && ref->ops->rate_update) ref->ops->rate_update(ref->priv, sband, ista, priv_sta, changed); } @@ -97,7 +100,7 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) { #ifdef CONFIG_MAC80211_DEBUGFS struct rate_control_ref *ref = sta->rate_ctrl; - if (sta->debugfs.dir && ref->ops->add_sta_debugfs) + if (ref && sta->debugfs.dir && ref->ops->add_sta_debugfs) ref->ops->add_sta_debugfs(ref->priv, sta->rate_ctrl_priv, sta->debugfs.dir); #endif @@ -107,7 +110,7 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) { #ifdef CONFIG_MAC80211_DEBUGFS struct rate_control_ref *ref = sta->rate_ctrl; - if (ref->ops->remove_sta_debugfs) + if (ref && ref->ops->remove_sta_debugfs) ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv); #endif } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7170bf4565a..9f2807aeaf5 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -27,11 +27,6 @@ #include "tkip.h" #include "wme.h" -static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, - struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff *skb, - u16 mpdu_seq_num, - int bar_req); /* * monitor mode reception * @@ -39,11 +34,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, * only useful for monitoring. */ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, - struct sk_buff *skb, - int rtap_len) + struct sk_buff *skb) { - skb_pull(skb, rtap_len); - if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) { if (likely(skb->len > FCS_LEN)) skb_trim(skb, skb->len - FCS_LEN); @@ -59,15 +51,14 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local, } static inline int should_drop_frame(struct sk_buff *skb, - int present_fcs_len, - int radiotap_len) + int present_fcs_len) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) return 1; - if (unlikely(skb->len < 16 + present_fcs_len + radiotap_len)) + if (unlikely(skb->len < 16 + present_fcs_len)) return 1; if (ieee80211_is_ctl(hdr->frame_control) && !ieee80211_is_pspoll(hdr->frame_control) && @@ -95,10 +86,6 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (len & 1) /* padding for RX_FLAGS if necessary */ len++; - /* make sure radiotap starts at a naturally aligned address */ - if (len % 8) - len = roundup(len, 8); - return len; } @@ -116,6 +103,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_radiotap_header *rthdr; unsigned char *pos; + u16 rx_flags = 0; rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len); memset(rthdr, 0, rtap_len); @@ -134,7 +122,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_TSFT */ if (status->flag & RX_FLAG_TSFT) { - *(__le64 *)pos = cpu_to_le64(status->mactime); + put_unaligned_le64(status->mactime, pos); rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); pos += 8; @@ -166,17 +154,20 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; /* IEEE80211_RADIOTAP_CHANNEL */ - *(__le16 *)pos = cpu_to_le16(status->freq); + put_unaligned_le16(status->freq, pos); pos += 2; if (status->band == IEEE80211_BAND_5GHZ) - *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_5GHZ); + put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ, + pos); + else if (status->flag & RX_FLAG_HT) + put_unaligned_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ, + pos); else if (rate->flags & IEEE80211_RATE_ERP_G) - *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_2GHZ); + put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ, + pos); else - *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_CCK | - IEEE80211_CHAN_2GHZ); + put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ, + pos); pos += 2; /* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */ @@ -205,10 +196,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_RX_FLAGS */ /* ensure 2 byte alignment for the 2 byte field as required */ - if ((pos - (unsigned char *)rthdr) & 1) + if ((pos - (u8 *)rthdr) & 1) pos++; if (status->flag & RX_FLAG_FAILED_PLCP_CRC) - *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADPLCP); + rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; + put_unaligned_le16(rx_flags, pos); pos += 2; } @@ -227,7 +219,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct sk_buff *skb, *skb2; struct net_device *prev_dev = NULL; int present_fcs_len = 0; - int rtap_len = 0; /* * First, we may need to make a copy of the skb because @@ -237,25 +228,23 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, * We don't need to, of course, if we aren't going to return * the SKB because it has a bad FCS/PLCP checksum. */ - if (status->flag & RX_FLAG_RADIOTAP) - rtap_len = ieee80211_get_radiotap_len(origskb->data); - else - /* room for the radiotap header based on driver features */ - needed_headroom = ieee80211_rx_radiotap_len(local, status); + + /* room for the radiotap header based on driver features */ + needed_headroom = ieee80211_rx_radiotap_len(local, status); if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) present_fcs_len = FCS_LEN; if (!local->monitors) { - if (should_drop_frame(origskb, present_fcs_len, rtap_len)) { + if (should_drop_frame(origskb, present_fcs_len)) { dev_kfree_skb(origskb); return NULL; } - return remove_monitor_info(local, origskb, rtap_len); + return remove_monitor_info(local, origskb); } - if (should_drop_frame(origskb, present_fcs_len, rtap_len)) { + if (should_drop_frame(origskb, present_fcs_len)) { /* only need to expand headroom if necessary */ skb = origskb; origskb = NULL; @@ -279,16 +268,14 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, */ skb = skb_copy_expand(origskb, needed_headroom, 0, GFP_ATOMIC); - origskb = remove_monitor_info(local, origskb, rtap_len); + origskb = remove_monitor_info(local, origskb); if (!skb) return origskb; } - /* if necessary, prepend radiotap information */ - if (!(status->flag & RX_FLAG_RADIOTAP)) - ieee80211_add_rx_radiotap_header(local, skb, rate, - needed_headroom); + /* prepend radiotap information */ + ieee80211_add_rx_radiotap_header(local, skb, rate, needed_headroom); skb_reset_mac_header(skb); skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -489,7 +476,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control); - char *dev_addr = rx->dev->dev_addr; + char *dev_addr = rx->sdata->dev->dev_addr; if (ieee80211_is_data(hdr->frame_control)) { if (is_multicast_ether_addr(hdr->addr1)) { @@ -518,7 +505,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) if (ieee80211_is_action(hdr->frame_control)) { mgmt = (struct ieee80211_mgmt *)hdr; - if (mgmt->u.action.category != PLINK_CATEGORY) + if (mgmt->u.action.category != MESH_PLINK_CATEGORY) return RX_DROP_MONITOR; return RX_CONTINUE; } @@ -543,6 +530,242 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +#define SEQ_MODULO 0x1000 +#define SEQ_MASK 0xfff + +static inline int seq_less(u16 sq1, u16 sq2) +{ + return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1); +} + +static inline u16 seq_inc(u16 sq) +{ + return (sq + 1) & SEQ_MASK; +} + +static inline u16 seq_sub(u16 sq1, u16 sq2) +{ + return (sq1 - sq2) & SEQ_MASK; +} + + +static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw, + struct tid_ampdu_rx *tid_agg_rx, + int index, + struct sk_buff_head *frames) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_rate *rate = NULL; + struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; + struct ieee80211_rx_status *status; + + if (!skb) + goto no_frame; + + status = IEEE80211_SKB_RXCB(skb); + + /* release the reordered frames to stack */ + sband = hw->wiphy->bands[status->band]; + if (!(status->flag & RX_FLAG_HT)) + rate = &sband->bitrates[status->rate_idx]; + tid_agg_rx->stored_mpdu_num--; + tid_agg_rx->reorder_buf[index] = NULL; + __skb_queue_tail(frames, skb); + +no_frame: + tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); +} + +static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw, + struct tid_ampdu_rx *tid_agg_rx, + u16 head_seq_num, + struct sk_buff_head *frames) +{ + int index; + + while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { + index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + tid_agg_rx->buf_size; + ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames); + } +} + +/* + * Timeout (in jiffies) for skb's that are waiting in the RX reorder buffer. If + * the skb was added to the buffer longer than this time ago, the earlier + * frames that have not yet been received are assumed to be lost and the skb + * can be released for processing. This may also release other skb's from the + * reorder buffer if there are no additional gaps between the frames. + */ +#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) + +/* + * As this function belongs to the RX path it must be under + * rcu_read_lock protection. It returns false if the frame + * can be processed immediately, true if it was consumed. + */ +static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, + struct tid_ampdu_rx *tid_agg_rx, + struct sk_buff *skb, + struct sk_buff_head *frames) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u16 sc = le16_to_cpu(hdr->seq_ctrl); + u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; + u16 head_seq_num, buf_size; + int index; + + buf_size = tid_agg_rx->buf_size; + head_seq_num = tid_agg_rx->head_seq_num; + + /* frame with out of date sequence number */ + if (seq_less(mpdu_seq_num, head_seq_num)) { + dev_kfree_skb(skb); + return true; + } + + /* + * If frame the sequence number exceeds our buffering window + * size release some previous frames to make room for this one. + */ + if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { + head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); + /* release stored frames up to new head to stack */ + ieee80211_release_reorder_frames(hw, tid_agg_rx, head_seq_num, + frames); + } + + /* Now the new frame is always in the range of the reordering buffer */ + + index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; + + /* check if we already stored this frame */ + if (tid_agg_rx->reorder_buf[index]) { + dev_kfree_skb(skb); + return true; + } + + /* + * If the current MPDU is in the right order and nothing else + * is stored we can process it directly, no need to buffer it. + */ + if (mpdu_seq_num == tid_agg_rx->head_seq_num && + tid_agg_rx->stored_mpdu_num == 0) { + tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); + return false; + } + + /* put the frame in the reordering buffer */ + tid_agg_rx->reorder_buf[index] = skb; + tid_agg_rx->reorder_time[index] = jiffies; + tid_agg_rx->stored_mpdu_num++; + /* release the buffer until next missing frame */ + index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + tid_agg_rx->buf_size; + if (!tid_agg_rx->reorder_buf[index] && + tid_agg_rx->stored_mpdu_num > 1) { + /* + * No buffers ready to be released, but check whether any + * frames in the reorder buffer have timed out. + */ + int j; + int skipped = 1; + for (j = (index + 1) % tid_agg_rx->buf_size; j != index; + j = (j + 1) % tid_agg_rx->buf_size) { + if (!tid_agg_rx->reorder_buf[j]) { + skipped++; + continue; + } + if (!time_after(jiffies, tid_agg_rx->reorder_time[j] + + HT_RX_REORDER_BUF_TIMEOUT)) + break; + +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "%s: release an RX reorder " + "frame due to timeout on earlier " + "frames\n", + wiphy_name(hw->wiphy)); +#endif + ieee80211_release_reorder_frame(hw, tid_agg_rx, + j, frames); + + /* + * Increment the head seq# also for the skipped slots. + */ + tid_agg_rx->head_seq_num = + (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK; + skipped = 0; + } + } else while (tid_agg_rx->reorder_buf[index]) { + ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames); + index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + tid_agg_rx->buf_size; + } + + return true; +} + +/* + * Reorder MPDUs from A-MPDUs, keeping them on a buffer. Returns + * true if the MPDU was buffered, false if it should be processed. + */ +static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, + struct sk_buff_head *frames) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_local *local = rx->local; + struct ieee80211_hw *hw = &local->hw; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct sta_info *sta = rx->sta; + struct tid_ampdu_rx *tid_agg_rx; + u16 sc; + int tid; + + if (!ieee80211_is_data_qos(hdr->frame_control)) + goto dont_reorder; + + /* + * filter the QoS data rx stream according to + * STA/TID and check if this STA/TID is on aggregation + */ + + if (!sta) + goto dont_reorder; + + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + + if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) + goto dont_reorder; + + tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + + /* qos null data frames are excluded */ + if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) + goto dont_reorder; + + /* new, potentially un-ordered, ampdu frame - process it */ + + /* reset session timer */ + if (tid_agg_rx->timeout) + mod_timer(&tid_agg_rx->session_timer, + TU_TO_EXP_TIME(tid_agg_rx->timeout)); + + /* if this mpdu is fragmented - terminate rx aggregation session */ + sc = le16_to_cpu(hdr->seq_ctrl); + if (sc & IEEE80211_SCTL_FRAG) { + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, + tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); + dev_kfree_skb(skb); + return; + } + + if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) + return; + + dont_reorder: + __skb_queue_tail(frames, skb); +} static ieee80211_rx_result debug_noinline ieee80211_rx_h_check(struct ieee80211_rx_data *rx) @@ -603,7 +826,9 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) static ieee80211_rx_result debug_noinline ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int keyidx; int hdrlen; ieee80211_rx_result result = RX_DROP_UNUSABLE; @@ -644,6 +869,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_CONTINUE; + /* start without a key */ + rx->key = NULL; + if (rx->sta) stakey = rcu_dereference(rx->sta->key); @@ -657,8 +885,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } else if (mmie_keyidx >= 0) { /* Broadcast/multicast robust management frame / BIP */ - if ((rx->status->flag & RX_FLAG_DECRYPTED) && - (rx->status->flag & RX_FLAG_IV_STRIPPED)) + if ((status->flag & RX_FLAG_DECRYPTED) && + (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; if (mmie_keyidx < NUM_DEFAULT_KEYS || @@ -690,8 +918,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * we somehow allow the driver to tell us which key * the hardware used if this flag is set? */ - if ((rx->status->flag & RX_FLAG_DECRYPTED) && - (rx->status->flag & RX_FLAG_IV_STRIPPED)) + if ((status->flag & RX_FLAG_DECRYPTED) && + (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -727,8 +955,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* Check for weak IVs if possible */ if (rx->sta && rx->key->conf.alg == ALG_WEP && ieee80211_is_data(hdr->frame_control) && - (!(rx->status->flag & RX_FLAG_IV_STRIPPED) || - !(rx->status->flag & RX_FLAG_DECRYPTED)) && + (!(status->flag & RX_FLAG_IV_STRIPPED) || + !(status->flag & RX_FLAG_DECRYPTED)) && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) rx->sta->wep_weak_iv_count++; @@ -748,7 +976,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) } /* either the frame has been decrypted or will be dropped */ - rx->status->flag |= RX_FLAG_DECRYPTED; + status->flag |= RX_FLAG_DECRYPTED; return result; } @@ -792,7 +1020,7 @@ static void ap_sta_ps_start(struct sta_info *sta) struct ieee80211_local *local = sdata->local; atomic_inc(&sdata->bss->num_sta_ps); - set_sta_flags(sta, WLAN_STA_PS); + set_sta_flags(sta, WLAN_STA_PS_STA); drv_sta_notify(local, &sdata->vif, STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", @@ -800,45 +1028,37 @@ static void ap_sta_ps_start(struct sta_info *sta) #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } -static int ap_sta_ps_end(struct sta_info *sta) +static void ap_sta_ps_end(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - struct ieee80211_local *local = sdata->local; - int sent, buffered; atomic_dec(&sdata->bss->num_sta_ps); - clear_sta_flags(sta, WLAN_STA_PS); - drv_sta_notify(local, &sdata->vif, STA_NOTIFY_AWAKE, &sta->sta); - - if (!skb_queue_empty(&sta->ps_tx_buf)) - sta_info_clear_tim_bit(sta); + clear_sta_flags(sta, WLAN_STA_PS_STA); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d exits power save mode\n", sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - /* Send all buffered frames to the station */ - sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); - buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); - sent += buffered; - local->total_ps_buffered -= buffered; - + if (test_sta_flags(sta, WLAN_STA_PS_DRIVER)) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames " - "since STA not sleeping anymore\n", sdata->dev->name, - sta->sta.addr, sta->sta.aid, sent - buffered, buffered); + printk(KERN_DEBUG "%s: STA %pM aid %d driver-ps-blocked\n", + sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + return; + } - return sent; + ieee80211_sta_ps_deliver_wakeup(sta); } static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!sta) return RX_CONTINUE; @@ -869,9 +1089,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; - sta->last_signal = rx->status->signal; - sta->last_qual = rx->status->qual; - sta->last_noise = rx->status->noise; + sta->last_signal = status->signal; + sta->last_noise = status->noise; /* * Change STA power saving mode only at the end of a frame @@ -880,7 +1099,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (!ieee80211_has_morefrags(hdr->frame_control) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { - if (test_sta_flags(sta, WLAN_STA_PS)) { + if (test_sta_flags(sta, WLAN_STA_PS_STA)) { /* * Ignore doze->wake transitions that are * indicated by non-data frames, the standard @@ -891,19 +1110,24 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) */ if (ieee80211_is_data(hdr->frame_control) && !ieee80211_has_pm(hdr->frame_control)) - rx->sent_ps_buffered += ap_sta_ps_end(sta); + ap_sta_ps_end(sta); } else { if (ieee80211_has_pm(hdr->frame_control)) ap_sta_ps_start(sta); } } - /* Drop data::nullfunc frames silently, since they are used only to - * control station power saving mode. */ - if (ieee80211_is_nullfunc(hdr->frame_control)) { + /* + * Drop (qos-)data::nullfunc frames silently, since they + * are used only to control station power saving mode. + */ + if (ieee80211_is_nullfunc(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control)) { I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); - /* Update counter and free packet here to avoid counting this - * as a dropped packed. */ + /* + * Update counter and free packet here to avoid + * counting this as a dropped packed. + */ sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; @@ -1103,9 +1327,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) static ieee80211_rx_result debug_noinline ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - struct sk_buff *skb; - int no_pending_pkts; + struct ieee80211_sub_if_data *sdata = rx->sdata; __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || @@ -1116,56 +1338,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return RX_DROP_UNUSABLE; - skb = skb_dequeue(&rx->sta->tx_filtered); - if (!skb) { - skb = skb_dequeue(&rx->sta->ps_tx_buf); - if (skb) - rx->local->total_ps_buffered--; - } - no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && - skb_queue_empty(&rx->sta->ps_tx_buf); - - if (skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - - /* - * Tell TX path to send this frame even though the STA may - * still remain is PS mode after this frame exchange. - */ - info->flags |= IEEE80211_TX_CTL_PSPOLL_RESPONSE; - -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n", - rx->sta->sta.addr, rx->sta->sta.aid, - skb_queue_len(&rx->sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - /* Use MoreData flag to indicate whether there are more - * buffered frames for this STA */ - if (no_pending_pkts) - hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); - else - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); - - ieee80211_add_pending_skb(rx->local, skb); - - if (no_pending_pkts) - sta_info_clear_tim_bit(rx->sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - } else if (!rx->sent_ps_buffered) { - /* - * FIXME: This can be the result of a race condition between - * us expiring a frame and the station polling for it. - * Should we send it a null-func frame indicating we - * have nothing buffered for it? - */ - printk(KERN_DEBUG "%s: STA %pM sent PS Poll even " - "though there are no buffered frames for it\n", - rx->dev->name, rx->sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - } + if (!test_sta_flags(rx->sta, WLAN_STA_PS_DRIVER)) + ieee80211_sta_ps_deliver_poll_response(rx->sta); + else + set_sta_flags(rx->sta, WLAN_STA_PSPOLL); /* Free PS Poll skb here instead of returning RX_DROP that would * count as an dropped frame. */ @@ -1206,11 +1382,14 @@ ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) { + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + /* * Pass through unencrypted frames if the hardware has * decrypted them already. */ - if (rx->status->flag & RX_FLAG_DECRYPTED) + if (status->flag & RX_FLAG_DECRYPTED) return 0; /* Drop unencrypted frames if key is set. */ @@ -1224,8 +1403,8 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) rx->key)) return -EACCES; /* BIP does not use Protected field, so need to check MMIE */ - if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) - && ieee80211_get_mmie_keyidx(rx->skb) < 0 && + if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && + ieee80211_get_mmie_keyidx(rx->skb) < 0 && rx->key)) return -EACCES; /* @@ -1244,8 +1423,18 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) static int __ieee80211_data_to_8023(struct ieee80211_rx_data *rx) { - struct net_device *dev = rx->dev; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = rx->sdata; + struct net_device *dev = sdata->dev; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + + if (ieee80211_has_a4(hdr->frame_control) && + sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) + return -1; + + if (is_multicast_ether_addr(hdr->addr1) && + ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) || + (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr))) + return -1; return ieee80211_data_to_8023(rx->skb, dev->dev_addr, sdata->vif.type); } @@ -1264,7 +1453,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) * of whether the frame was encrypted or not. */ if (ehdr->h_proto == htons(ETH_P_PAE) && - (compare_ether_addr(ehdr->h_dest, rx->dev->dev_addr) == 0 || + (compare_ether_addr(ehdr->h_dest, rx->sdata->dev->dev_addr) == 0 || compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0)) return true; @@ -1281,10 +1470,10 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) static void ieee80211_deliver_skb(struct ieee80211_rx_data *rx) { - struct net_device *dev = rx->dev; + struct ieee80211_sub_if_data *sdata = rx->sdata; + struct net_device *dev = sdata->dev; struct ieee80211_local *local = rx->local; struct sk_buff *skb, *xmit_skb; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; struct sta_info *dsta; @@ -1294,7 +1483,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && - (rx->flags & IEEE80211_RX_RA_MATCH)) { + (rx->flags & IEEE80211_RX_RA_MATCH) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest)) { /* * send multicast frames both to higher layers in @@ -1337,10 +1527,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb = NULL; } else { u8 *data = skb->data; - size_t len = skb->len; - u8 *new = __skb_push(skb, align); - memmove(new, data, len); - __skb_trim(skb, len); + size_t len = skb_headlen(skb); + skb->data -= align; + memmove(skb->data, data, len); + skb_set_tail_pointer(skb, len); } } #endif @@ -1365,7 +1555,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) static ieee80211_rx_result debug_noinline ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) { - struct net_device *dev = rx->dev; + struct net_device *dev = rx->sdata->dev; struct ieee80211_local *local = rx->local; u16 ethertype; u8 *payload; @@ -1490,12 +1680,11 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) unsigned int hdrlen; struct sk_buff *skb = rx->skb, *fwd_skb; struct ieee80211_local *local = rx->local; - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = rx->sdata; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); - sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); if (!ieee80211_is_data(hdr->frame_control)) return RX_CONTINUE; @@ -1504,19 +1693,27 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) /* illegal frame */ return RX_DROP_MONITOR; - if (!is_multicast_ether_addr(hdr->addr1) && - (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6)) { + if (mesh_hdr->flags & MESH_FLAGS_AE) { struct mesh_path *mppath; + char *proxied_addr; + char *mpp_addr; + + if (is_multicast_ether_addr(hdr->addr1)) { + mpp_addr = hdr->addr3; + proxied_addr = mesh_hdr->eaddr1; + } else { + mpp_addr = hdr->addr4; + proxied_addr = mesh_hdr->eaddr2; + } rcu_read_lock(); - mppath = mpp_path_lookup(mesh_hdr->eaddr2, sdata); + mppath = mpp_path_lookup(proxied_addr, sdata); if (!mppath) { - mpp_path_add(mesh_hdr->eaddr2, hdr->addr4, sdata); + mpp_path_add(proxied_addr, mpp_addr, sdata); } else { spin_lock_bh(&mppath->state_lock); - mppath->exp_time = jiffies; - if (compare_ether_addr(mppath->mpp, hdr->addr4) != 0) - memcpy(mppath->mpp, hdr->addr4, ETH_ALEN); + if (compare_ether_addr(mppath->mpp, mpp_addr) != 0) + memcpy(mppath->mpp, mpp_addr, ETH_ALEN); spin_unlock_bh(&mppath->state_lock); } rcu_read_unlock(); @@ -1524,7 +1721,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) /* Frame has reached destination. Don't forward */ if (!is_multicast_ether_addr(hdr->addr1) && - compare_ether_addr(rx->dev->dev_addr, hdr->addr3) == 0) + compare_ether_addr(sdata->dev->dev_addr, hdr->addr3) == 0) return RX_CONTINUE; mesh_hdr->ttl--; @@ -1541,10 +1738,10 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!fwd_skb && net_ratelimit()) printk(KERN_DEBUG "%s: failed to clone mesh frame\n", - rx->dev->name); + sdata->dev->name); fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; - memcpy(fwd_hdr->addr2, rx->dev->dev_addr, ETH_ALEN); + memcpy(fwd_hdr->addr2, sdata->dev->dev_addr, ETH_ALEN); info = IEEE80211_SKB_CB(fwd_skb); memset(info, 0, sizeof(*info)); info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; @@ -1578,7 +1775,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) } if (is_multicast_ether_addr(hdr->addr1) || - rx->dev->flags & IFF_PROMISC) + sdata->dev->flags & IFF_PROMISC) return RX_CONTINUE; else return RX_DROP_MONITOR; @@ -1588,7 +1785,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) static ieee80211_rx_result debug_noinline ieee80211_rx_h_data(struct ieee80211_rx_data *rx) { - struct net_device *dev = rx->dev; + struct ieee80211_sub_if_data *sdata = rx->sdata; + struct net_device *dev = sdata->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; __le16 fc = hdr->frame_control; int err; @@ -1599,6 +1797,14 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; + /* + * Allow the cooked monitor interface of an AP to see 4-addr frames so + * that a 4-addr station can be detected and moved into a separate VLAN + */ + if (ieee80211_has_a4(hdr->frame_control) && + sdata->vif.type == NL80211_IFTYPE_AP) + return RX_DROP_MONITOR; + err = __ieee80211_data_to_8023(rx); if (unlikely(err)) return RX_DROP_UNUSABLE; @@ -1617,7 +1823,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) } static ieee80211_rx_result debug_noinline -ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) +ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) { struct ieee80211_local *local = rx->local; struct ieee80211_hw *hw = &local->hw; @@ -1632,11 +1838,11 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) if (ieee80211_is_back_req(bar->frame_control)) { if (!rx->sta) - return RX_CONTINUE; + return RX_DROP_MONITOR; tid = le16_to_cpu(bar->control) >> 12; if (rx->sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) - return RX_CONTINUE; + return RX_DROP_MONITOR; tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid]; start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; @@ -1646,13 +1852,11 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(tid_agg_rx->timeout)); - /* manage reordering buffer according to requested */ - /* sequence number */ - rcu_read_lock(); - ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, - start_seq_num, 1); - rcu_read_unlock(); - return RX_DROP_UNUSABLE; + /* release stored frames up to start of BAR */ + ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num, + frames); + kfree_skb(skb); + return RX_QUEUED; } return RX_CONTINUE; @@ -1701,14 +1905,14 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.sa_query.trans_id, WLAN_SA_QUERY_TR_ID_LEN); - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); } static ieee80211_rx_result debug_noinline ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; int len = rx->skb->len; @@ -1820,7 +2024,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; if (!(rx->flags & IEEE80211_RX_RA_MATCH)) @@ -1858,11 +2062,11 @@ static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr, * Some hardware seem to generate incorrect Michael MIC * reports; ignore them to avoid triggering countermeasures. */ - goto ignore; + return; } if (!ieee80211_has_protected(hdr->frame_control)) - goto ignore; + return; if (rx->sdata->vif.type == NL80211_IFTYPE_AP && keyidx) { /* @@ -1871,37 +2075,35 @@ static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr, * group keys and only the AP is sending real multicast * frames in the BSS. */ - goto ignore; + return; } if (!ieee80211_is_data(hdr->frame_control) && !ieee80211_is_auth(hdr->frame_control)) - goto ignore; + return; mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr, NULL, GFP_ATOMIC); - ignore: - dev_kfree_skb(rx->skb); - rx->skb = NULL; } /* TODO: use IEEE80211_RX_FRAGMENTED */ -static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) +static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, + struct ieee80211_rate *rate) { struct ieee80211_sub_if_data *sdata; struct ieee80211_local *local = rx->local; struct ieee80211_rtap_hdr { struct ieee80211_radiotap_header hdr; u8 flags; - u8 rate; + u8 rate_or_pad; __le16 chan_freq; __le16 chan_flags; } __attribute__ ((packed)) *rthdr; struct sk_buff *skb = rx->skb, *skb2; struct net_device *prev_dev = NULL; - struct ieee80211_rx_status *status = rx->status; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - if (rx->flags & IEEE80211_RX_CMNTR_REPORTED) + if (status->flag & RX_FLAG_INTERNAL_CMTR) goto out_free_skb; if (skb_headroom(skb) < sizeof(*rthdr) && @@ -1913,10 +2115,13 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); rthdr->hdr.it_present = cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | (1 << IEEE80211_RADIOTAP_CHANNEL)); - rthdr->rate = rx->rate->bitrate / 5; + if (rate) { + rthdr->rate_or_pad = rate->bitrate / 5; + rthdr->hdr.it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); + } rthdr->chan_freq = cpu_to_le16(status->freq); if (status->band == IEEE80211_BAND_5GHZ) @@ -1959,7 +2164,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) } else goto out_free_skb; - rx->flags |= IEEE80211_RX_CMNTR_REPORTED; + status->flag |= RX_FLAG_INTERNAL_CMTR; return; out_free_skb: @@ -1969,62 +2174,87 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_data *rx, - struct sk_buff *skb) + struct sk_buff *skb, + struct ieee80211_rate *rate) { + struct sk_buff_head reorder_release; ieee80211_rx_result res = RX_DROP_MONITOR; + __skb_queue_head_init(&reorder_release); + rx->skb = skb; rx->sdata = sdata; - rx->dev = sdata->dev; #define CALL_RXH(rxh) \ do { \ res = rxh(rx); \ if (res != RX_CONTINUE) \ - goto rxh_done; \ + goto rxh_next; \ } while (0); + /* + * NB: the rxh_next label works even if we jump + * to it from here because then the list will + * be empty, which is a trivial check + */ CALL_RXH(ieee80211_rx_h_passive_scan) CALL_RXH(ieee80211_rx_h_check) - CALL_RXH(ieee80211_rx_h_decrypt) - CALL_RXH(ieee80211_rx_h_check_more_data) - CALL_RXH(ieee80211_rx_h_sta_process) - CALL_RXH(ieee80211_rx_h_defragment) - CALL_RXH(ieee80211_rx_h_ps_poll) - CALL_RXH(ieee80211_rx_h_michael_mic_verify) - /* must be after MMIC verify so header is counted in MPDU mic */ - CALL_RXH(ieee80211_rx_h_remove_qos_control) - CALL_RXH(ieee80211_rx_h_amsdu) + + ieee80211_rx_reorder_ampdu(rx, &reorder_release); + + while ((skb = __skb_dequeue(&reorder_release))) { + /* + * all the other fields are valid across frames + * that belong to an aMPDU since they are on the + * same TID from the same station + */ + rx->skb = skb; + + CALL_RXH(ieee80211_rx_h_decrypt) + CALL_RXH(ieee80211_rx_h_check_more_data) + CALL_RXH(ieee80211_rx_h_sta_process) + CALL_RXH(ieee80211_rx_h_defragment) + CALL_RXH(ieee80211_rx_h_ps_poll) + CALL_RXH(ieee80211_rx_h_michael_mic_verify) + /* must be after MMIC verify so header is counted in MPDU mic */ + CALL_RXH(ieee80211_rx_h_remove_qos_control) + CALL_RXH(ieee80211_rx_h_amsdu) #ifdef CONFIG_MAC80211_MESH - if (ieee80211_vif_is_mesh(&sdata->vif)) - CALL_RXH(ieee80211_rx_h_mesh_fwding); + if (ieee80211_vif_is_mesh(&sdata->vif)) + CALL_RXH(ieee80211_rx_h_mesh_fwding); #endif - CALL_RXH(ieee80211_rx_h_data) - CALL_RXH(ieee80211_rx_h_ctrl) - CALL_RXH(ieee80211_rx_h_action) - CALL_RXH(ieee80211_rx_h_mgmt) + CALL_RXH(ieee80211_rx_h_data) + + /* special treatment -- needs the queue */ + res = ieee80211_rx_h_ctrl(rx, &reorder_release); + if (res != RX_CONTINUE) + goto rxh_next; + + CALL_RXH(ieee80211_rx_h_action) + CALL_RXH(ieee80211_rx_h_mgmt) #undef CALL_RXH - rxh_done: - switch (res) { - case RX_DROP_MONITOR: - I802_DEBUG_INC(sdata->local->rx_handlers_drop); - if (rx->sta) - rx->sta->rx_dropped++; - /* fall through */ - case RX_CONTINUE: - ieee80211_rx_cooked_monitor(rx); - break; - case RX_DROP_UNUSABLE: - I802_DEBUG_INC(sdata->local->rx_handlers_drop); - if (rx->sta) - rx->sta->rx_dropped++; - dev_kfree_skb(rx->skb); - break; - case RX_QUEUED: - I802_DEBUG_INC(sdata->local->rx_handlers_queued); - break; + rxh_next: + switch (res) { + case RX_DROP_MONITOR: + I802_DEBUG_INC(sdata->local->rx_handlers_drop); + if (rx->sta) + rx->sta->rx_dropped++; + /* fall through */ + case RX_CONTINUE: + ieee80211_rx_cooked_monitor(rx, rate); + break; + case RX_DROP_UNUSABLE: + I802_DEBUG_INC(sdata->local->rx_handlers_drop); + if (rx->sta) + rx->sta->rx_dropped++; + dev_kfree_skb(rx->skb); + break; + case RX_QUEUED: + I802_DEBUG_INC(sdata->local->rx_handlers_queued); + break; + } } } @@ -2034,12 +2264,14 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_data *rx, struct ieee80211_hdr *hdr) { - u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, sdata->vif.type); + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); int multicast = is_multicast_ether_addr(hdr->addr1); switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: - if (!bssid) + if (!bssid && !sdata->u.mgd.use_4addr) return 0; if (!multicast && compare_ether_addr(sdata->dev->dev_addr, hdr->addr1) != 0) { @@ -2066,10 +2298,10 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } else if (!rx->sta) { int rate_idx; - if (rx->status->flag & RX_FLAG_HT) + if (status->flag & RX_FLAG_HT) rate_idx = 0; /* TODO: HT rates */ else - rate_idx = rx->status->rate_idx; + rate_idx = status->rate_idx; rx->sta = ieee80211_ibss_add_sta(sdata, bssid, hdr->addr2, BIT(rate_idx)); } @@ -2104,8 +2336,6 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, return 0; break; case NL80211_IFTYPE_MONITOR: - /* take everything */ - break; case NL80211_IFTYPE_UNSPECIFIED: case __NL80211_IFTYPE_AFTER_LAST: /* should never get here */ @@ -2138,23 +2368,9 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, rx.skb = skb; rx.local = local; - rx.status = status; - rx.rate = rate; - if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control)) local->dot11ReceivedFragmentCount++; - rx.sta = sta_info_get(local, hdr->addr2); - if (rx.sta) { - rx.sdata = rx.sta->sdata; - rx.dev = rx.sta->sdata->dev; - } - - if ((status->flag & RX_FLAG_MMIC_ERROR)) { - ieee80211_rx_michael_mic_report(hdr, &rx); - return; - } - if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || test_bit(SCAN_OFF_CHANNEL, &local->scanning))) rx.flags |= IEEE80211_RX_IN_SCAN; @@ -2162,13 +2378,20 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, ieee80211_parse_qos(&rx); ieee80211_verify_alignment(&rx); - skb = rx.skb; + rx.sta = sta_info_get(local, hdr->addr2); + if (rx.sta) + rx.sdata = rx.sta->sdata; if (rx.sdata && ieee80211_is_data(hdr->frame_control)) { rx.flags |= IEEE80211_RX_RA_MATCH; prepares = prepare_for_handlers(rx.sdata, &rx, hdr); - if (prepares) - prev = rx.sdata; + if (prepares) { + if (status->flag & RX_FLAG_MMIC_ERROR) { + if (rx.flags & IEEE80211_RX_RA_MATCH) + ieee80211_rx_michael_mic_report(hdr, &rx); + } else + prev = rx.sdata; + } } else list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!netif_running(sdata->dev)) continue; @@ -2183,6 +2406,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!prepares) continue; + if (status->flag & RX_FLAG_MMIC_ERROR) { + rx.sdata = sdata; + if (rx.flags & IEEE80211_RX_RA_MATCH) + ieee80211_rx_michael_mic_report(hdr, &rx); + continue; + } + /* * frame is destined for this interface, but if it's not * also for the previous one we handle that after the @@ -2208,240 +2438,15 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, prev->dev->name); continue; } - ieee80211_invoke_rx_handlers(prev, &rx, skb_new); + ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate); prev = sdata; } if (prev) - ieee80211_invoke_rx_handlers(prev, &rx, skb); + ieee80211_invoke_rx_handlers(prev, &rx, skb, rate); else dev_kfree_skb(skb); } -#define SEQ_MODULO 0x1000 -#define SEQ_MASK 0xfff - -static inline int seq_less(u16 sq1, u16 sq2) -{ - return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1); -} - -static inline u16 seq_inc(u16 sq) -{ - return (sq + 1) & SEQ_MASK; -} - -static inline u16 seq_sub(u16 sq1, u16 sq2) -{ - return (sq1 - sq2) & SEQ_MASK; -} - - -static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw, - struct tid_ampdu_rx *tid_agg_rx, - int index) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *rate; - struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; - struct ieee80211_rx_status *status; - - if (!skb) - goto no_frame; - - status = IEEE80211_SKB_RXCB(skb); - - /* release the reordered frames to stack */ - sband = hw->wiphy->bands[status->band]; - if (status->flag & RX_FLAG_HT) - rate = sband->bitrates; /* TODO: HT rates */ - else - rate = &sband->bitrates[status->rate_idx]; - __ieee80211_rx_handle_packet(hw, skb, rate); - tid_agg_rx->stored_mpdu_num--; - tid_agg_rx->reorder_buf[index] = NULL; - -no_frame: - tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); -} - - -/* - * Timeout (in jiffies) for skb's that are waiting in the RX reorder buffer. If - * the skb was added to the buffer longer than this time ago, the earlier - * frames that have not yet been received are assumed to be lost and the skb - * can be released for processing. This may also release other skb's from the - * reorder buffer if there are no additional gaps between the frames. - */ -#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) - -/* - * As it function blongs to Rx path it must be called with - * the proper rcu_read_lock protection for its flow. - */ -static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, - struct tid_ampdu_rx *tid_agg_rx, - struct sk_buff *skb, - u16 mpdu_seq_num, - int bar_req) -{ - u16 head_seq_num, buf_size; - int index; - - buf_size = tid_agg_rx->buf_size; - head_seq_num = tid_agg_rx->head_seq_num; - - /* frame with out of date sequence number */ - if (seq_less(mpdu_seq_num, head_seq_num)) { - dev_kfree_skb(skb); - return 1; - } - - /* if frame sequence number exceeds our buffering window size or - * block Ack Request arrived - release stored frames */ - if ((!seq_less(mpdu_seq_num, head_seq_num + buf_size)) || (bar_req)) { - /* new head to the ordering buffer */ - if (bar_req) - head_seq_num = mpdu_seq_num; - else - head_seq_num = - seq_inc(seq_sub(mpdu_seq_num, buf_size)); - /* release stored frames up to new head to stack */ - while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { - index = seq_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) - % tid_agg_rx->buf_size; - ieee80211_release_reorder_frame(hw, tid_agg_rx, - index); - } - if (bar_req) - return 1; - } - - /* now the new frame is always in the range of the reordering */ - /* buffer window */ - index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) - % tid_agg_rx->buf_size; - /* check if we already stored this frame */ - if (tid_agg_rx->reorder_buf[index]) { - dev_kfree_skb(skb); - return 1; - } - - /* if arrived mpdu is in the right order and nothing else stored */ - /* release it immediately */ - if (mpdu_seq_num == tid_agg_rx->head_seq_num && - tid_agg_rx->stored_mpdu_num == 0) { - tid_agg_rx->head_seq_num = - seq_inc(tid_agg_rx->head_seq_num); - return 0; - } - - /* put the frame in the reordering buffer */ - tid_agg_rx->reorder_buf[index] = skb; - tid_agg_rx->reorder_time[index] = jiffies; - tid_agg_rx->stored_mpdu_num++; - /* release the buffer until next missing frame */ - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) - % tid_agg_rx->buf_size; - if (!tid_agg_rx->reorder_buf[index] && - tid_agg_rx->stored_mpdu_num > 1) { - /* - * No buffers ready to be released, but check whether any - * frames in the reorder buffer have timed out. - */ - int j; - int skipped = 1; - for (j = (index + 1) % tid_agg_rx->buf_size; j != index; - j = (j + 1) % tid_agg_rx->buf_size) { - if (tid_agg_rx->reorder_buf[j] == NULL) { - skipped++; - continue; - } - if (!time_after(jiffies, tid_agg_rx->reorder_time[j] + - HZ / 10)) - break; - -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "%s: release an RX reorder " - "frame due to timeout on earlier " - "frames\n", - wiphy_name(hw->wiphy)); -#endif - ieee80211_release_reorder_frame(hw, tid_agg_rx, j); - - /* - * Increment the head seq# also for the skipped slots. - */ - tid_agg_rx->head_seq_num = - (tid_agg_rx->head_seq_num + skipped) & - SEQ_MASK; - skipped = 0; - } - } else while (tid_agg_rx->reorder_buf[index]) { - ieee80211_release_reorder_frame(hw, tid_agg_rx, index); - index = seq_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; - } - return 1; -} - -static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, - struct sk_buff *skb) -{ - struct ieee80211_hw *hw = &local->hw; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct sta_info *sta; - struct tid_ampdu_rx *tid_agg_rx; - u16 sc; - u16 mpdu_seq_num; - u8 ret = 0; - int tid; - - sta = sta_info_get(local, hdr->addr2); - if (!sta) - return ret; - - /* filter the QoS data rx stream according to - * STA/TID and check if this STA/TID is on aggregation */ - if (!ieee80211_is_data_qos(hdr->frame_control)) - goto end_reorder; - - tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; - - if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) - goto end_reorder; - - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; - - /* qos null data frames are excluded */ - if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) - goto end_reorder; - - /* new un-ordered ampdu frame - process it */ - - /* reset session timer */ - if (tid_agg_rx->timeout) - mod_timer(&tid_agg_rx->session_timer, - TU_TO_EXP_TIME(tid_agg_rx->timeout)); - - /* if this mpdu is fragmented - terminate rx aggregation session */ - sc = le16_to_cpu(hdr->seq_ctrl); - if (sc & IEEE80211_SCTL_FRAG) { - ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, - tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); - ret = 1; - goto end_reorder; - } - - /* according to mpdu sequence number deal with reordering buffer */ - mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; - ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, - mpdu_seq_num, 0); - end_reorder: - return ret; -} - /* * This is the receive path handler. It is called by a low level driver when an * 802.11 MPDU is received from the hardware. @@ -2481,14 +2486,22 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) goto drop; if (status->flag & RX_FLAG_HT) { - /* rate_idx is MCS index */ - if (WARN_ON(status->rate_idx < 0 || - status->rate_idx >= 76)) + /* + * rate_idx is MCS index, which can be [0-76] as documented on: + * + * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n + * + * Anything else would be some sort of driver or hardware error. + * The driver should catch hardware errors. + */ + if (WARN((status->rate_idx < 0 || + status->rate_idx > 76), + "Rate marked as an HT rate but passed " + "status->rate_idx is not " + "an MCS index [0-76]: %d (0x%02x)\n", + status->rate_idx, + status->rate_idx)) goto drop; - /* HT rates are not in the table - use the highest legacy rate - * for now since other parts of mac80211 may not yet be fully - * MCS aware. */ - rate = &sband->bitrates[sband->n_bitrates - 1]; } else { if (WARN_ON(status->rate_idx < 0 || status->rate_idx >= sband->n_bitrates)) @@ -2515,20 +2528,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) return; } - /* - * In theory, the block ack reordering should happen after duplicate - * removal (ieee80211_rx_h_check(), which is an RX handler). As such, - * the call to ieee80211_rx_reorder_ampdu() should really be moved to - * happen as a new RX handler between ieee80211_rx_h_check and - * ieee80211_rx_h_decrypt. This cleanup may eventually happen, but for - * the time being, the call can be here since RX reorder buf processing - * will implicitly skip duplicates. We could, in theory at least, - * process frames that ieee80211_rx_h_passive_scan would drop (e.g., - * frames from other than operational channel), but that should not - * happen in normal networks. - */ - if (!ieee80211_rx_reorder_ampdu(local, skb)) - __ieee80211_rx_handle_packet(hw, skb, rate); + __ieee80211_rx_handle_packet(hw, skb, rate); rcu_read_unlock(); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 71e10cabf81..f1a4c716030 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -12,8 +12,6 @@ * published by the Free Software Foundation. */ -/* TODO: figure out how to avoid that the "current BSS" expires */ - #include <linux/wireless.h> #include <linux/if_arp.h> #include <linux/rtnetlink.h> @@ -189,6 +187,39 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) return RX_QUEUED; } +/* return false if no more work */ +static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) +{ + struct cfg80211_scan_request *req = local->scan_req; + enum ieee80211_band band; + int i, ielen, n_chans; + + do { + if (local->hw_scan_band == IEEE80211_NUM_BANDS) + return false; + + band = local->hw_scan_band; + n_chans = 0; + for (i = 0; i < req->n_channels; i++) { + if (req->channels[i]->band == band) { + local->hw_scan_req->channels[n_chans] = + req->channels[i]; + n_chans++; + } + } + + local->hw_scan_band++; + } while (!n_chans); + + local->hw_scan_req->n_channels = n_chans; + + ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, + req->ie, req->ie_len, band); + local->hw_scan_req->ie_len = ielen; + + return true; +} + /* * inform AP that we will go to sleep so that it will buffer the frames * while we scan @@ -196,7 +227,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) static void ieee80211_scan_ps_enable(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - bool ps = false; + + local->scan_ps_enabled = false; /* FIXME: what to do when local->pspolling is true? */ @@ -204,12 +236,13 @@ static void ieee80211_scan_ps_enable(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&local->dynamic_ps_enable_work); if (local->hw.conf.flags & IEEE80211_CONF_PS) { - ps = true; + local->scan_ps_enabled = true; local->hw.conf.flags &= ~IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } - if (!ps || !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) + if (!(local->scan_ps_enabled) || + !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) /* * If power save was enabled, no need to send a nullfunc * frame because AP knows that we are sleeping. But if the @@ -230,7 +263,7 @@ static void ieee80211_scan_ps_disable(struct ieee80211_sub_if_data *sdata) if (!local->ps_sdata) ieee80211_send_nullfunc(local, sdata, 0); - else { + else if (local->scan_ps_enabled) { /* * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware * will send a nullfunc frame with the powersave bit set @@ -246,16 +279,19 @@ static void ieee80211_scan_ps_disable(struct ieee80211_sub_if_data *sdata) */ local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + } else if (local->hw.conf.dynamic_ps_timeout > 0) { + /* + * If IEEE80211_CONF_PS was not set and the dynamic_ps_timer + * had been running before leaving the operating channel, + * restart the timer now and send a nullfunc frame to inform + * the AP that we are awake. + */ + ieee80211_send_nullfunc(local, sdata, 0); + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } } -static void ieee80211_restore_scan_ies(struct ieee80211_local *local) -{ - kfree(local->scan_req->ie); - local->scan_req->ie = local->orig_ies; - local->scan_req->ie_len = local->orig_ies_len; -} - void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); @@ -264,25 +300,36 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) mutex_lock(&local->scan_mtx); - if (WARN_ON(!local->scanning)) { + /* + * It's ok to abort a not-yet-running scan (that + * we have one at all will be verified by checking + * local->scan_req next), but not to complete it + * successfully. + */ + if (WARN_ON(!local->scanning && !aborted)) + aborted = true; + + if (WARN_ON(!local->scan_req)) { mutex_unlock(&local->scan_mtx); return; } - if (WARN_ON(!local->scan_req)) { + was_hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); + if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { + ieee80211_queue_delayed_work(&local->hw, + &local->scan_work, 0); mutex_unlock(&local->scan_mtx); return; } - if (test_bit(SCAN_HW_SCANNING, &local->scanning)) - ieee80211_restore_scan_ies(local); + kfree(local->hw_scan_req); + local->hw_scan_req = NULL; if (local->scan_req != local->int_scan_req) cfg80211_scan_done(local->scan_req, aborted); local->scan_req = NULL; local->scan_sdata = NULL; - was_hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); local->scanning = 0; local->scan_channel = NULL; @@ -306,10 +353,10 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) { ieee80211_scan_ps_disable(sdata); - netif_tx_wake_all_queues(sdata->dev); + netif_wake_queue(sdata->dev); } } else - netif_tx_wake_all_queues(sdata->dev); + netif_wake_queue(sdata->dev); /* re-enable beaconing */ if (sdata->vif.type == NL80211_IFTYPE_AP || @@ -364,7 +411,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) * are handled in the scan state machine */ if (sdata->vif.type != NL80211_IFTYPE_STATION) - netif_tx_stop_all_queues(sdata->dev); + netif_stop_queue(sdata->dev); } mutex_unlock(&local->iflist_mtx); @@ -394,19 +441,23 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->ops->hw_scan) { u8 *ies; - int ielen; - ies = kmalloc(2 + IEEE80211_MAX_SSID_LEN + - local->scan_ies_len + req->ie_len, GFP_KERNEL); - if (!ies) + local->hw_scan_req = kmalloc( + sizeof(*local->hw_scan_req) + + req->n_channels * sizeof(req->channels[0]) + + 2 + IEEE80211_MAX_SSID_LEN + local->scan_ies_len + + req->ie_len, GFP_KERNEL); + if (!local->hw_scan_req) return -ENOMEM; - ielen = ieee80211_build_preq_ies(local, ies, - req->ie, req->ie_len); - local->orig_ies = req->ie; - local->orig_ies_len = req->ie_len; - req->ie = ies; - req->ie_len = ielen; + local->hw_scan_req->ssids = req->ssids; + local->hw_scan_req->n_ssids = req->n_ssids; + ies = (u8 *)local->hw_scan_req + + sizeof(*local->hw_scan_req) + + req->n_channels * sizeof(req->channels[0]); + local->hw_scan_req->ie = ies; + + local->hw_scan_band = 0; } local->scan_req = req; @@ -438,16 +489,17 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); mutex_unlock(&local->scan_mtx); - if (local->ops->hw_scan) - rc = drv_hw_scan(local, local->scan_req); - else + if (local->ops->hw_scan) { + WARN_ON(!ieee80211_prep_hw_scan(local)); + rc = drv_hw_scan(local, local->hw_scan_req); + } else rc = ieee80211_start_sw_scan(local); mutex_lock(&local->scan_mtx); if (rc) { - if (local->ops->hw_scan) - ieee80211_restore_scan_ies(local); + kfree(local->hw_scan_req); + local->hw_scan_req = NULL; local->scanning = 0; ieee80211_recalc_idle(local); @@ -523,7 +575,7 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca continue; if (sdata->vif.type == NL80211_IFTYPE_STATION) { - netif_tx_stop_all_queues(sdata->dev); + netif_stop_queue(sdata->dev); if (sdata->u.mgd.associated) ieee80211_scan_ps_enable(sdata); } @@ -558,7 +610,7 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) ieee80211_scan_ps_disable(sdata); - netif_tx_wake_all_queues(sdata->dev); + netif_wake_queue(sdata->dev); } } mutex_unlock(&local->iflist_mtx); @@ -574,23 +626,14 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, { int skip; struct ieee80211_channel *chan; - struct ieee80211_sub_if_data *sdata = local->scan_sdata; skip = 0; chan = local->scan_req->channels[local->scan_channel_idx]; - if (chan->flags & IEEE80211_CHAN_DISABLED || - (sdata->vif.type == NL80211_IFTYPE_ADHOC && - chan->flags & IEEE80211_CHAN_NO_IBSS)) + local->scan_channel = chan; + if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) skip = 1; - if (!skip) { - local->scan_channel = chan; - if (ieee80211_hw_config(local, - IEEE80211_CONF_CHANGE_CHANNEL)) - skip = 1; - } - /* advance state machine to next channel/band */ local->scan_channel_idx++; @@ -656,6 +699,14 @@ void ieee80211_scan_work(struct work_struct *work) return; } + if (local->hw_scan_req) { + int rc = drv_hw_scan(local, local->hw_scan_req); + mutex_unlock(&local->scan_mtx); + if (rc) + ieee80211_scan_completed(&local->hw, true); + return; + } + if (local->scan_req && !local->scanning) { struct cfg80211_scan_request *req = local->scan_req; int rc; diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 68953033403..aa743a895cf 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -65,7 +65,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; - ieee80211_tx_skb(sdata, skb, 1); + ieee80211_tx_skb(sdata, skb); } void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 594f2318c3d..71f370dd24b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -116,14 +116,15 @@ struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) return sta; } -struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, - struct net_device *dev) +struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, + int idx) { + struct ieee80211_local *local = sdata->local; struct sta_info *sta; int i = 0; list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (dev && dev != sta->sdata->dev) + if (sdata != sta->sdata) continue; if (i < idx) { ++i; @@ -147,8 +148,10 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, static void __sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { - rate_control_free_sta(sta); - rate_control_put(sta->rate_ctrl); + if (sta->rate_ctrl) { + rate_control_free_sta(sta); + rate_control_put(sta->rate_ctrl); + } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Destroyed STA %pM\n", @@ -171,6 +174,8 @@ void sta_info_destroy(struct sta_info *sta) local = sta->local; + cancel_work_sync(&sta->drv_unblock_wk); + rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); @@ -259,6 +264,38 @@ static void sta_info_hash_add(struct ieee80211_local *local, rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } +static void sta_unblock(struct work_struct *wk) +{ + struct sta_info *sta; + + sta = container_of(wk, struct sta_info, drv_unblock_wk); + + if (sta->dead) + return; + + if (!test_sta_flags(sta, WLAN_STA_PS_STA)) + ieee80211_sta_ps_deliver_wakeup(sta); + else if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) + ieee80211_sta_ps_deliver_poll_response(sta); +} + +static int sta_prepare_rate_control(struct ieee80211_local *local, + struct sta_info *sta, gfp_t gfp) +{ + if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) + return 0; + + sta->rate_ctrl = rate_control_get(local->rate_ctrl); + sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, + &sta->sta, gfp); + if (!sta->rate_ctrl_priv) { + rate_control_put(sta->rate_ctrl); + return -ENOMEM; + } + + return 0; +} + struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, gfp_t gfp) { @@ -272,16 +309,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); spin_lock_init(&sta->flaglock); + INIT_WORK(&sta->drv_unblock_wk, sta_unblock); memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; - sta->rate_ctrl = rate_control_get(local->rate_ctrl); - sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - &sta->sta, gfp); - if (!sta->rate_ctrl_priv) { - rate_control_put(sta->rate_ctrl); + if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; } @@ -478,8 +512,10 @@ static void __sta_info_unlink(struct sta_info **sta) } list_del(&(*sta)->list); + (*sta)->dead = true; - if (test_and_clear_sta_flags(*sta, WLAN_STA_PS)) { + if (test_and_clear_sta_flags(*sta, + WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) { BUG_ON(!sdata->bss); atomic_dec(&sdata->bss->num_sta_ps); @@ -489,6 +525,9 @@ static void __sta_info_unlink(struct sta_info **sta) local->num_sta--; local->sta_generation++; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + rcu_assign_pointer(sdata->u.vlan.sta, NULL); + if (local->ops->sta_notify) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, @@ -801,8 +840,8 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, sta_info_destroy(sta); } -struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, - const u8 *addr) +struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, + const u8 *addr) { struct sta_info *sta = sta_info_get(hw_to_local(hw), addr); @@ -810,4 +849,114 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, return NULL; return &sta->sta; } +EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); + +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, + const u8 *addr) +{ + struct ieee80211_sub_if_data *sdata; + + if (!vif) + return NULL; + + sdata = vif_to_sdata(vif); + + return ieee80211_find_sta_by_hw(&sdata->local->hw, addr); +} EXPORT_SYMBOL(ieee80211_find_sta); + +/* powersave support code */ +void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + int sent, buffered; + + drv_sta_notify(local, &sdata->vif, STA_NOTIFY_AWAKE, &sta->sta); + + if (!skb_queue_empty(&sta->ps_tx_buf)) + sta_info_clear_tim_bit(sta); + + /* Send all buffered frames to the station */ + sent = ieee80211_add_pending_skbs(local, &sta->tx_filtered); + buffered = ieee80211_add_pending_skbs(local, &sta->ps_tx_buf); + sent += buffered; + local->total_ps_buffered -= buffered; + +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA %pM aid %d sending %d filtered/%d PS frames " + "since STA not sleeping anymore\n", sdata->dev->name, + sta->sta.addr, sta->sta.aid, sent - buffered, buffered); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ +} + +void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + int no_pending_pkts; + + skb = skb_dequeue(&sta->tx_filtered); + if (!skb) { + skb = skb_dequeue(&sta->ps_tx_buf); + if (skb) + local->total_ps_buffered--; + } + no_pending_pkts = skb_queue_empty(&sta->tx_filtered) && + skb_queue_empty(&sta->ps_tx_buf); + + if (skb) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + + /* + * Tell TX path to send this frame even though the STA may + * still remain is PS mode after this frame exchange. + */ + info->flags |= IEEE80211_TX_CTL_PSPOLL_RESPONSE; + +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n", + sta->sta.addr, sta->sta.aid, + skb_queue_len(&sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + /* Use MoreData flag to indicate whether there are more + * buffered frames for this STA */ + if (no_pending_pkts) + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); + + ieee80211_add_pending_skb(local, skb); + + if (no_pending_pkts) + sta_info_clear_tim_bit(sta); +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + } else { + /* + * FIXME: This can be the result of a race condition between + * us expiring a frame and the station polling for it. + * Should we send it a null-func frame indicating we + * have nothing buffered for it? + */ + printk(KERN_DEBUG "%s: STA %pM sent PS Poll even " + "though there are no buffered frames for it\n", + sdata->dev->name, sta->sta.addr); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + } +} + +void ieee80211_sta_block_awake(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, bool block) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + if (block) + set_sta_flags(sta, WLAN_STA_PS_DRIVER); + else + ieee80211_queue_work(hw, &sta->drv_unblock_wk); +} +EXPORT_SYMBOL(ieee80211_sta_block_awake); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ccc3adf962c..b4810f6aa94 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -12,6 +12,7 @@ #include <linux/list.h> #include <linux/types.h> #include <linux/if_ether.h> +#include <linux/workqueue.h> #include "key.h" /** @@ -21,7 +22,7 @@ * * @WLAN_STA_AUTH: Station is authenticated. * @WLAN_STA_ASSOC: Station is associated. - * @WLAN_STA_PS: Station is in power-save mode + * @WLAN_STA_PS_STA: Station is in power-save mode * @WLAN_STA_AUTHORIZED: Station is authorized to send/receive traffic. * This bit is always checked so needs to be enabled for all stations * when virtual port control is not in use. @@ -36,11 +37,16 @@ * @WLAN_STA_MFP: Management frame protection is used with this STA. * @WLAN_STA_SUSPEND: Set/cleared during a suspend/resume cycle. * Used to deny ADDBA requests (both TX and RX). + * @WLAN_STA_PS_DRIVER: driver requires keeping this station in + * power-save mode logically to flush frames that might still + * be in the queues + * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping + * station in power-save mode, reply when the driver unblocks. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH = 1<<0, WLAN_STA_ASSOC = 1<<1, - WLAN_STA_PS = 1<<2, + WLAN_STA_PS_STA = 1<<2, WLAN_STA_AUTHORIZED = 1<<3, WLAN_STA_SHORT_PREAMBLE = 1<<4, WLAN_STA_ASSOC_AP = 1<<5, @@ -48,7 +54,9 @@ enum ieee80211_sta_info_flags { WLAN_STA_WDS = 1<<7, WLAN_STA_CLEAR_PS_FILT = 1<<9, WLAN_STA_MFP = 1<<10, - WLAN_STA_SUSPEND = 1<<11 + WLAN_STA_SUSPEND = 1<<11, + WLAN_STA_PS_DRIVER = 1<<12, + WLAN_STA_PSPOLL = 1<<13, }; #define STA_TID_NUM 16 @@ -177,6 +185,7 @@ struct sta_ampdu_mlme { * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses + * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP * @pin_status: used internally for pinning a STA struct into memory * @flags: STA flags, see &enum ieee80211_sta_info_flags @@ -193,7 +202,6 @@ struct sta_ampdu_mlme { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA - * @last_qual: qual of last received frame from this STA * @last_noise: noise of last received frame from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA @@ -217,6 +225,7 @@ struct sta_ampdu_mlme { * @plink_timer_was_running: used by suspend/resume to restore timers * @debugfs: debug filesystem info * @sta: station information we share with the driver + * @dead: set to true when sta is unlinked */ struct sta_info { /* General information, mostly static */ @@ -230,8 +239,12 @@ struct sta_info { spinlock_t lock; spinlock_t flaglock; + struct work_struct drv_unblock_wk; + u16 listen_interval; + bool dead; + /* * for use by the internal lifetime management, * see __sta_info_unlink @@ -259,7 +272,6 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; - int last_qual; int last_noise; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; @@ -301,28 +313,6 @@ struct sta_info { #ifdef CONFIG_MAC80211_DEBUGFS struct sta_info_debugfsdentries { struct dentry *dir; - struct dentry *flags; - struct dentry *num_ps_buf_frames; - struct dentry *inactive_ms; - struct dentry *last_seq_ctrl; - struct dentry *agg_status; - struct dentry *aid; - struct dentry *dev; - struct dentry *rx_packets; - struct dentry *tx_packets; - struct dentry *rx_bytes; - struct dentry *tx_bytes; - struct dentry *rx_duplicates; - struct dentry *rx_fragments; - struct dentry *rx_dropped; - struct dentry *tx_fragments; - struct dentry *tx_filtered; - struct dentry *tx_retry_failed; - struct dentry *tx_retry_count; - struct dentry *last_signal; - struct dentry *last_qual; - struct dentry *last_noise; - struct dentry *wep_weak_iv_count; bool add_has_run; } debugfs; #endif @@ -419,8 +409,8 @@ struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr); /* * Get STA info by index, BROKEN! */ -struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, - struct net_device *dev); +struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, + int idx); /* * Create a new STA info, caller owns returned structure * until sta_info_insert(). @@ -454,4 +444,7 @@ int sta_info_flush(struct ieee80211_local *local, void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time); +void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta); +void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta); + #endif /* STA_INFO_H */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c new file mode 100644 index 00000000000..d78f36c64c7 --- /dev/null +++ b/net/mac80211/status.c @@ -0,0 +1,341 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <net/mac80211.h> +#include "ieee80211_i.h" +#include "rate.h" +#include "mesh.h" +#include "led.h" + + +void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, + struct sk_buff *skb) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int tmp; + + skb->pkt_type = IEEE80211_TX_STATUS_MSG; + skb_queue_tail(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS ? + &local->skb_queue : &local->skb_queue_unreliable, skb); + tmp = skb_queue_len(&local->skb_queue) + + skb_queue_len(&local->skb_queue_unreliable); + while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT && + (skb = skb_dequeue(&local->skb_queue_unreliable))) { + dev_kfree_skb_irq(skb); + tmp--; + I802_DEBUG_INC(local->tx_status_drop); + } + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_tx_status_irqsafe); + +static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + /* + * XXX: This is temporary! + * + * The problem here is that when we get here, the driver will + * quite likely have pretty much overwritten info->control by + * using info->driver_data or info->rate_driver_data. Thus, + * when passing out the frame to the driver again, we would be + * passing completely bogus data since the driver would then + * expect a properly filled info->control. In mac80211 itself + * the same problem occurs, since we need info->control.vif + * internally. + * + * To fix this, we should send the frame through TX processing + * again. However, it's not that simple, since the frame will + * have been software-encrypted (if applicable) already, and + * encrypting it again doesn't do much good. So to properly do + * that, we not only have to skip the actual 'raw' encryption + * (key selection etc. still has to be done!) but also the + * sequence number assignment since that impacts the crypto + * encapsulation, of course. + * + * Hence, for now, fix the bug by just dropping the frame. + */ + goto drop; + + sta->tx_filtered_count++; + + /* + * Clear the TX filter mask for this STA when sending the next + * packet. If the STA went to power save mode, this will happen + * when it wakes up for the next time. + */ + set_sta_flags(sta, WLAN_STA_CLEAR_PS_FILT); + + /* + * This code races in the following way: + * + * (1) STA sends frame indicating it will go to sleep and does so + * (2) hardware/firmware adds STA to filter list, passes frame up + * (3) hardware/firmware processes TX fifo and suppresses a frame + * (4) we get TX status before having processed the frame and + * knowing that the STA has gone to sleep. + * + * This is actually quite unlikely even when both those events are + * processed from interrupts coming in quickly after one another or + * even at the same time because we queue both TX status events and + * RX frames to be processed by a tasklet and process them in the + * same order that they were received or TX status last. Hence, there + * is no race as long as the frame RX is processed before the next TX + * status, which drivers can ensure, see below. + * + * Note that this can only happen if the hardware or firmware can + * actually add STAs to the filter list, if this is done by the + * driver in response to set_tim() (which will only reduce the race + * this whole filtering tries to solve, not completely solve it) + * this situation cannot happen. + * + * To completely solve this race drivers need to make sure that they + * (a) don't mix the irq-safe/not irq-safe TX status/RX processing + * functions and + * (b) always process RX events before TX status events if ordering + * can be unknown, for example with different interrupt status + * bits. + */ + if (test_sta_flags(sta, WLAN_STA_PS_STA) && + skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { + skb_queue_tail(&sta->tx_filtered, skb); + return; + } + + if (!test_sta_flags(sta, WLAN_STA_PS_STA) && + !(info->flags & IEEE80211_TX_INTFL_RETRIED)) { + /* Software retry the packet once */ + info->flags |= IEEE80211_TX_INTFL_RETRIED; + ieee80211_add_pending_skb(local, skb); + return; + } + + drop: +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "%s: dropped TX filtered frame, " + "queue_len=%d PS=%d @%lu\n", + wiphy_name(local->hw.wiphy), + skb_queue_len(&sta->tx_filtered), + !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies); +#endif + dev_kfree_skb(skb); +} + +void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + struct sk_buff *skb2; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u16 frag, type; + __le16 fc; + struct ieee80211_supported_band *sband; + struct ieee80211_tx_status_rtap_hdr *rthdr; + struct ieee80211_sub_if_data *sdata; + struct net_device *prev_dev = NULL; + struct sta_info *sta; + int retry_count = -1, i; + bool injected; + + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + /* the HW cannot have attempted that rate */ + if (i >= hw->max_rates) { + info->status.rates[i].idx = -1; + info->status.rates[i].count = 0; + } + + retry_count += info->status.rates[i].count; + } + if (retry_count < 0) + retry_count = 0; + + rcu_read_lock(); + + sband = local->hw.wiphy->bands[info->band]; + + sta = sta_info_get(local, hdr->addr1); + + if (sta) { + if (!(info->flags & IEEE80211_TX_STAT_ACK) && + test_sta_flags(sta, WLAN_STA_PS_STA)) { + /* + * The STA is in power save mode, so assume + * that this TX packet failed because of that. + */ + ieee80211_handle_filtered_frame(local, sta, skb); + rcu_read_unlock(); + return; + } + + fc = hdr->frame_control; + + if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && + (ieee80211_is_data_qos(fc))) { + u16 tid, ssn; + u8 *qc; + + qc = ieee80211_get_qos_ctl(hdr); + tid = qc[0] & 0xf; + ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10) + & IEEE80211_SCTL_SEQ); + ieee80211_send_bar(sta->sdata, hdr->addr1, + tid, ssn); + } + + if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { + ieee80211_handle_filtered_frame(local, sta, skb); + rcu_read_unlock(); + return; + } else { + if (!(info->flags & IEEE80211_TX_STAT_ACK)) + sta->tx_retry_failed++; + sta->tx_retry_count += retry_count; + } + + rate_control_tx_status(local, sband, sta, skb); + if (ieee80211_vif_is_mesh(&sta->sdata->vif)) + ieee80211s_update_metric(local, sta, skb); + } + + rcu_read_unlock(); + + ieee80211_led_tx(local, 0); + + /* SNMP counters + * Fragments are passed to low-level drivers as separate skbs, so these + * are actually fragments, not frames. Update frame counters only for + * the first fragment of the frame. */ + + frag = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; + type = le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_FTYPE; + + if (info->flags & IEEE80211_TX_STAT_ACK) { + if (frag == 0) { + local->dot11TransmittedFrameCount++; + if (is_multicast_ether_addr(hdr->addr1)) + local->dot11MulticastTransmittedFrameCount++; + if (retry_count > 0) + local->dot11RetryCount++; + if (retry_count > 1) + local->dot11MultipleRetryCount++; + } + + /* This counter shall be incremented for an acknowledged MPDU + * with an individual address in the address 1 field or an MPDU + * with a multicast address in the address 1 field of type Data + * or Management. */ + if (!is_multicast_ether_addr(hdr->addr1) || + type == IEEE80211_FTYPE_DATA || + type == IEEE80211_FTYPE_MGMT) + local->dot11TransmittedFragmentCount++; + } else { + if (frag == 0) + local->dot11FailedCount++; + } + + /* this was a transmitted frame, but now we want to reuse it */ + skb_orphan(skb); + + /* + * This is a bit racy but we can avoid a lot of work + * with this test... + */ + if (!local->monitors && !local->cooked_mntrs) { + dev_kfree_skb(skb); + return; + } + + /* send frame to monitor interfaces now */ + + if (skb_headroom(skb) < sizeof(*rthdr)) { + printk(KERN_ERR "ieee80211_tx_status: headroom too small\n"); + dev_kfree_skb(skb); + return; + } + + rthdr = (struct ieee80211_tx_status_rtap_hdr *) + skb_push(skb, sizeof(*rthdr)); + + memset(rthdr, 0, sizeof(*rthdr)); + rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); + rthdr->hdr.it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | + (1 << IEEE80211_RADIOTAP_DATA_RETRIES) | + (1 << IEEE80211_RADIOTAP_RATE)); + + if (!(info->flags & IEEE80211_TX_STAT_ACK) && + !is_multicast_ether_addr(hdr->addr1)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); + + /* + * XXX: Once radiotap gets the bitmap reset thing the vendor + * extensions proposal contains, we can actually report + * the whole set of tries we did. + */ + if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); + else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); + if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) + rthdr->rate = sband->bitrates[ + info->status.rates[0].idx].bitrate / 5; + + /* for now report the total retry_count */ + rthdr->data_retries = retry_count; + + /* Need to make a copy before skb->cb gets cleared */ + injected = !!(info->flags & IEEE80211_TX_CTL_INJECTED); + + /* XXX: is this sufficient for BPF? */ + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + memset(skb->cb, 0, sizeof(skb->cb)); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (!netif_running(sdata->dev)) + continue; + + if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) && + !injected && + (type == IEEE80211_FTYPE_DATA)) + continue; + + if (prev_dev) { + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + skb2->dev = prev_dev; + netif_rx(skb2); + } + } + + prev_dev = sdata->dev; + } + } + if (prev_dev) { + skb->dev = prev_dev; + netif_rx(skb); + skb = NULL; + } + rcu_read_unlock(); + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(ieee80211_tx_status); diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 964b7faa7f1..4921d724b6c 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -301,9 +301,9 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, #endif if (key->local->ops->update_tkip_key && key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { - u8 bcast[ETH_ALEN] = + static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - u8 *sta_addr = key->sta->sta.addr; + const u8 *sta_addr = key->sta->sta.addr; if (is_multicast_ether_addr(ra)) sta_addr = bcast; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index eaa4118de98..8834cc93c71 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -317,12 +317,11 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (!atomic_read(&tx->sdata->bss->num_sta_ps)) return TX_CONTINUE; - /* buffered in hardware */ - if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) { - info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; + info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; + /* device releases frame after DTIM beacon */ + if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) return TX_CONTINUE; - } /* buffered in mac80211 */ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -367,15 +366,16 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; u32 staflags; - if (unlikely(!sta || ieee80211_is_probe_resp(hdr->frame_control) - || ieee80211_is_auth(hdr->frame_control) - || ieee80211_is_assoc_resp(hdr->frame_control) - || ieee80211_is_reassoc_resp(hdr->frame_control))) + if (unlikely(!sta || + ieee80211_is_probe_resp(hdr->frame_control) || + ieee80211_is_auth(hdr->frame_control) || + ieee80211_is_assoc_resp(hdr->frame_control) || + ieee80211_is_reassoc_resp(hdr->frame_control))) return TX_CONTINUE; staflags = get_sta_flags(sta); - if (unlikely((staflags & WLAN_STA_PS) && + if (unlikely((staflags & (WLAN_STA_PS_STA | WLAN_STA_PS_DRIVER)) && !(info->flags & IEEE80211_TX_CTL_PSPOLL_RESPONSE))) { #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %pM aid %d: PS buffer (entries " @@ -398,8 +398,13 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) } else tx->local->total_ps_buffered++; - /* Queue frame to be sent after STA sends an PS Poll frame */ - if (skb_queue_empty(&sta->ps_tx_buf)) + /* + * Queue frame to be sent after STA wakes up/polls, + * but don't set the TIM bit if the driver is blocking + * wakeup or poll response transmissions anyway. + */ + if (skb_queue_empty(&sta->ps_tx_buf) && + !(staflags & WLAN_STA_PS_DRIVER)) sta_info_set_tim_bit(sta); info->control.jiffies = jiffies; @@ -409,7 +414,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) return TX_QUEUED; } #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) { + else if (unlikely(staflags & WLAN_STA_PS_STA)) { printk(KERN_DEBUG "%s: STA %pM in PS mode, but pspoll " "set -> send frame\n", tx->dev->name, sta->sta.addr); @@ -1047,7 +1052,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, hdr = (struct ieee80211_hdr *) skb->data; - tx->sta = sta_info_get(local, hdr->addr1); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + tx->sta = rcu_dereference(sdata->u.vlan.sta); + if (!tx->sta) + tx->sta = sta_info_get(local, hdr->addr1); if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) { @@ -1201,23 +1209,26 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) struct sk_buff *skb = tx->skb; ieee80211_tx_result res = TX_DROP; -#define CALL_TXH(txh) \ - res = txh(tx); \ - if (res != TX_CONTINUE) \ - goto txh_done; - - CALL_TXH(ieee80211_tx_h_check_assoc) - CALL_TXH(ieee80211_tx_h_ps_buf) - CALL_TXH(ieee80211_tx_h_select_key) - CALL_TXH(ieee80211_tx_h_michael_mic_add) - CALL_TXH(ieee80211_tx_h_rate_ctrl) - CALL_TXH(ieee80211_tx_h_misc) - CALL_TXH(ieee80211_tx_h_sequence) - CALL_TXH(ieee80211_tx_h_fragment) +#define CALL_TXH(txh) \ + do { \ + res = txh(tx); \ + if (res != TX_CONTINUE) \ + goto txh_done; \ + } while (0) + + CALL_TXH(ieee80211_tx_h_check_assoc); + CALL_TXH(ieee80211_tx_h_ps_buf); + CALL_TXH(ieee80211_tx_h_select_key); + CALL_TXH(ieee80211_tx_h_michael_mic_add); + if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_rate_ctrl); + CALL_TXH(ieee80211_tx_h_misc); + CALL_TXH(ieee80211_tx_h_sequence); + CALL_TXH(ieee80211_tx_h_fragment); /* handlers after fragment must be aware of tx info fragmentation! */ - CALL_TXH(ieee80211_tx_h_stats) - CALL_TXH(ieee80211_tx_h_encrypt) - CALL_TXH(ieee80211_tx_h_calculate_duration) + CALL_TXH(ieee80211_tx_h_stats); + CALL_TXH(ieee80211_tx_h_encrypt); + CALL_TXH(ieee80211_tx_h_calculate_duration); #undef CALL_TXH txh_done: @@ -1387,6 +1398,30 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, return 0; } +static bool need_dynamic_ps(struct ieee80211_local *local) +{ + /* driver doesn't support power save */ + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) + return false; + + /* hardware does dynamic power save */ + if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) + return false; + + /* dynamic power save disabled */ + if (local->hw.conf.dynamic_ps_timeout <= 0) + return false; + + /* we are scanning, don't enable power save */ + if (local->scanning) + return false; + + if (!local->ps_sdata) + return false; + + return true; +} + static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { @@ -1397,11 +1432,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, int headroom; bool may_encrypt; - dev_hold(sdata->dev); - - if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && - local->hw.conf.dynamic_ps_timeout > 0 && - !(local->scanning) && local->ps_sdata) { + if (need_dynamic_ps(local)) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_QUEUE_STOP_REASON_PS); @@ -1413,7 +1444,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } - info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + rcu_read_lock(); if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) { int hdrlen; @@ -1437,7 +1468,6 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, * support we will need a different mechanism. */ - rcu_read_lock(); list_for_each_entry_rcu(tmp_sdata, &local->interfaces, list) { if (!netif_running(tmp_sdata->dev)) @@ -1446,13 +1476,10 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, continue; if (compare_ether_addr(tmp_sdata->dev->dev_addr, hdr->addr2) == 0) { - dev_hold(tmp_sdata->dev); - dev_put(sdata->dev); sdata = tmp_sdata; break; } } - rcu_read_unlock(); } } @@ -1466,7 +1493,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, if (ieee80211_skb_resize(local, skb, headroom, may_encrypt)) { dev_kfree_skb(skb); - dev_put(sdata->dev); + rcu_read_unlock(); return; } @@ -1477,13 +1504,13 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, !is_multicast_ether_addr(hdr->addr1)) if (mesh_nexthop_lookup(skb, sdata)) { /* skb queued: don't free */ - dev_put(sdata->dev); + rcu_read_unlock(); return; } ieee80211_select_queue(local, skb); ieee80211_tx(sdata, skb, false); - dev_put(sdata->dev); + rcu_read_unlock(); } netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, @@ -1547,6 +1574,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, memset(info, 0, sizeof(*info)); + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + /* pass the radiotap header up to xmit */ ieee80211_xmit(IEEE80211_DEV_TO_SUB_IF(dev), skb); return NETDEV_TX_OK; @@ -1585,7 +1614,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, const u8 *encaps_data; int encaps_len, skip_header_bytes; int nh_pos, h_pos; - struct sta_info *sta; + struct sta_info *sta = NULL; u32 sta_flags = 0; if (unlikely(skb->len < ETH_HLEN)) { @@ -1602,8 +1631,24 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: + rcu_read_lock(); + sta = rcu_dereference(sdata->u.vlan.sta); + if (sta) { + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr.addr1, sta->sta.addr, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 30; + sta_flags = get_sta_flags(sta); + } + rcu_read_unlock(); + if (sta) + break; + /* fall through */ + case NL80211_IFTYPE_AP: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); @@ -1639,21 +1684,25 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, /* packet from other interface */ struct mesh_path *mppath; int is_mesh_mcast = 1; - char *mesh_da; + const u8 *mesh_da; rcu_read_lock(); if (is_multicast_ether_addr(skb->data)) /* DA TA mSA AE:SA */ mesh_da = skb->data; else { + static const u8 bcast[ETH_ALEN] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + mppath = mpp_path_lookup(skb->data, sdata); if (mppath) { /* RA TA mDA mSA AE:DA SA */ mesh_da = mppath->mpp; is_mesh_mcast = 0; - } else + } else { /* DA TA mSA AE:SA */ - mesh_da = dev->broadcast; + mesh_da = bcast; + } } hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, mesh_da, dev->dev_addr); @@ -1677,12 +1726,21 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; #endif case NL80211_IFTYPE_STATION: - fc |= cpu_to_le16(IEEE80211_FCTL_TODS); - /* BSSID SA DA */ memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - hdrlen = 24; + if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) { + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); + /* RA TA DA SA */ + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 30; + } else { + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + hdrlen = 24; + } break; case NL80211_IFTYPE_ADHOC: /* DA SA BSSID */ @@ -1907,12 +1965,10 @@ void ieee80211_tx_pending(unsigned long data) } sdata = vif_to_sdata(info->control.vif); - dev_hold(sdata->dev); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); txok = ieee80211_tx_pending_skb(local, skb); - dev_put(sdata->dev); if (!txok) __skb_queue_head(&local->pending[i], skb); spin_lock_irqsave(&local->queue_stop_reason_lock, @@ -1990,8 +2046,9 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, } } -struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) +struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 *tim_offset, u16 *tim_length) { struct ieee80211_local *local = hw_to_local(hw); struct sk_buff *skb = NULL; @@ -2008,6 +2065,11 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); + if (tim_offset) + *tim_offset = 0; + if (tim_length) + *tim_length = 0; + if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); @@ -2043,6 +2105,11 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, spin_unlock_irqrestore(&local->sta_lock, flags); } + if (tim_offset) + *tim_offset = beacon->head_len; + if (tim_length) + *tim_length = skb->len - beacon->head_len; + if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), beacon->tail, beacon->tail_len); @@ -2080,7 +2147,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); memset(mgmt->da, 0xff, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - /* BSSID is left zeroed, wildcard value */ + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(sdata->vif.bss_conf.beacon_int); mgmt->u.beacon.capab_info = 0x0; /* 0x0 for MPs */ @@ -2119,7 +2186,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, rcu_read_unlock(); return skb; } -EXPORT_SYMBOL(ieee80211_beacon_get); +EXPORT_SYMBOL(ieee80211_beacon_get_tim); void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const void *frame, size_t frame_len, @@ -2214,17 +2281,12 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_get_buffered_bc); -void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - int encrypt) +void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); skb_set_mac_header(skb, 0); skb_set_network_header(skb, 0); skb_set_transport_header(skb, 0); - if (!encrypt) - info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - /* * The other path calling ieee80211_xmit is from the tasklet, * and while we can handle concurrent transmissions locking diff --git a/net/mac80211/util.c b/net/mac80211/util.c index aeb65b3d229..78a6e924c7e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -520,9 +520,9 @@ EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); */ static bool ieee80211_can_queue_work(struct ieee80211_local *local) { - if (WARN(local->suspended, "queueing ieee80211 work while " - "going to suspend\n")) - return false; + if (WARN(local->suspended && !local->resuming, + "queueing ieee80211 work while going to suspend\n")) + return false; return true; } @@ -579,7 +579,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, if (elen > left) break; - if (calc_crc && id < 64 && (filter & BIT(id))) + if (calc_crc && id < 64 && (filter & (1ULL << id))) crc = crc32_be(crc, pos - 2, elen + 2); switch (id) { @@ -666,8 +666,8 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->mesh_id_len = elen; break; case WLAN_EID_MESH_CONFIG: - elems->mesh_config = pos; - elems->mesh_config_len = elen; + if (elen >= sizeof(struct ieee80211_meshconf_ie)) + elems->mesh_config = (void *)pos; break; case WLAN_EID_PEER_LINK: elems->peer_link = pos; @@ -685,6 +685,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, elems->perr = pos; elems->perr_len = elen; break; + case WLAN_EID_RANN: + if (elen >= sizeof(struct ieee80211_rann_ie)) + elems->rann = (void *)pos; + break; case WLAN_EID_CHANNEL_SWITCH: elems->ch_switch_elem = pos; elems->ch_switch_elem_len = elen; @@ -868,17 +872,19 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, WARN_ON(err); } - ieee80211_tx_skb(sdata, skb, 0); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, - const u8 *ie, size_t ie_len) + const u8 *ie, size_t ie_len, + enum ieee80211_band band) { struct ieee80211_supported_band *sband; u8 *pos, *supp_rates_len, *esupp_rates_len = NULL; int i; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = local->hw.wiphy->bands[band]; pos = buffer; @@ -966,9 +972,11 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, memcpy(pos, ssid, ssid_len); pos += ssid_len; - skb_put(skb, ieee80211_build_preq_ies(local, pos, ie, ie_len)); + skb_put(skb, ieee80211_build_preq_ies(local, pos, ie, ie_len, + local->hw.conf.channel->band)); - ieee80211_tx_skb(sdata, skb, 0); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + ieee80211_tx_skb(sdata, skb); } u32 ieee80211_sta_get_rates(struct ieee80211_local *local, @@ -1025,13 +1033,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) struct sta_info *sta; unsigned long flags; int res; - bool from_suspend = local->suspended; - /* - * We're going to start the hardware, at that point - * we are no longer suspended and can RX frames. - */ - local->suspended = false; + if (local->suspended) + local->resuming = true; /* restart hardware */ if (local->open_count) { @@ -1129,11 +1133,14 @@ int ieee80211_reconfig(struct ieee80211_local *local) * If this is for hw restart things are still running. * We may want to change that later, however. */ - if (!from_suspend) + if (!local->suspended) return 0; #ifdef CONFIG_PM + /* first set suspended false, then resuming */ local->suspended = false; + mb(); + local->resuming = false; list_for_each_entry(sdata, &local->interfaces, list) { switch(sdata->vif.type) { diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 8a980f13694..247123fe1a7 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -281,16 +281,18 @@ bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) ieee80211_rx_result ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!ieee80211_is_data(hdr->frame_control) && !ieee80211_is_auth(hdr->frame_control)) return RX_CONTINUE; - if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { + if (!(status->flag & RX_FLAG_DECRYPTED)) { if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; - } else if (!(rx->status->flag & RX_FLAG_IV_STRIPPED)) { + } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ skb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 70778694877..5332014cb22 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -85,16 +85,16 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) u8 *data, *key = NULL, key_offset; size_t data_len; unsigned int hdrlen; - struct ieee80211_hdr *hdr; u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int authenticator = 1, wpa_test = 0; /* No way to verify the MIC if the hardware stripped it */ - if (rx->status->flag & RX_FLAG_MMIC_STRIPPED) + if (status->flag & RX_FLAG_MMIC_STRIPPED) return RX_CONTINUE; - hdr = (struct ieee80211_hdr *)skb->data; if (!rx->key || rx->key->conf.alg != ALG_TKIP || !ieee80211_has_protected(hdr->frame_control) || !ieee80211_is_data_present(hdr->frame_control)) @@ -216,6 +216,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) int hdrlen, res, hwaccel = 0, wpa_test = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -225,8 +226,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) if (!rx->sta || skb->len - hdrlen < 12) return RX_DROP_UNUSABLE; - if (rx->status->flag & RX_FLAG_DECRYPTED) { - if (rx->status->flag & RX_FLAG_IV_STRIPPED) { + if (status->flag & RX_FLAG_DECRYPTED) { + if (status->flag & RX_FLAG_IV_STRIPPED) { /* * Hardware took care of all processing, including * replay protection, and stripped the ICV/IV so @@ -442,6 +443,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) int hdrlen; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); u8 pn[CCMP_PN_LEN]; int data_len; @@ -455,8 +457,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; - if ((rx->status->flag & RX_FLAG_DECRYPTED) && - (rx->status->flag & RX_FLAG_IV_STRIPPED)) + if ((status->flag & RX_FLAG_DECRYPTED) && + (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; ccmp_hdr2pn(pn, skb->data + hdrlen); @@ -466,7 +468,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; } - if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { + if (!(status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ ccmp_special_blocks(skb, pn, key->u.ccmp.rx_crypto_buf, 1); @@ -563,6 +565,7 @@ ieee80211_rx_result ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) { struct sk_buff *skb = rx->skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie *mmie; u8 aad[20], mic[8], ipn[6]; @@ -571,8 +574,8 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) if (!ieee80211_is_mgmt(hdr->frame_control)) return RX_CONTINUE; - if ((rx->status->flag & RX_FLAG_DECRYPTED) && - (rx->status->flag & RX_FLAG_IV_STRIPPED)) + if ((status->flag & RX_FLAG_DECRYPTED) && + (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; if (skb->len < 24 + sizeof(*mmie)) @@ -591,7 +594,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; } - if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { + if (!(status->flag & RX_FLAG_DECRYPTED)) { /* hardware didn't decrypt/verify MIC */ bip_aad(skb, aad); ieee80211_aes_cmac(key->u.aes_cmac.tfm, diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 5bb34737501..60ec4e4bada 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -273,8 +273,8 @@ void __init netfilter_init(void) #ifdef CONFIG_SYSCTL struct ctl_path nf_net_netfilter_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, + { .procname = "net", }, + { .procname = "netfilter", }, { } }; EXPORT_SYMBOL_GPL(nf_net_netfilter_sysctl_path); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 446e9bd4b4b..e55a6861d26 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1706,12 +1706,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; const struct ctl_path net_vs_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "net", }, + { .procname = "ipv4", }, { .procname = "vs", }, { } }; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index c1757f3620c..1b9370db230 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -121,7 +121,7 @@ static ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { .ctl_name = 0 } + { } }; static struct ctl_table_header * sysctl_header; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 715b57f9540..f7476b95ab4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -302,7 +302,7 @@ static ctl_table vs_vars_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { .ctl_name = 0 } + { } }; static struct ctl_table_header * sysctl_header; diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 4a1d94aac20..018f90db511 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -30,7 +30,6 @@ MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); #ifdef CONFIG_SYSCTL static struct ctl_table acct_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_acct", .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ca6e68dcd8a..0e98c3282d4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -512,11 +512,17 @@ static noinline int early_drop(struct net *net, unsigned int hash) cnt++; } - if (ct && unlikely(nf_ct_is_dying(ct) || - !atomic_inc_not_zero(&ct->ct_general.use))) - ct = NULL; - if (ct || cnt >= NF_CT_EVICTION_RANGE) + if (ct != NULL) { + if (likely(!nf_ct_is_dying(ct) && + atomic_inc_not_zero(&ct->ct_general.use))) + break; + else + ct = NULL; + } + + if (cnt >= NF_CT_EVICTION_RANGE) break; + hash = (hash + 1) % nf_conntrack_htable_size; } rcu_read_unlock(); @@ -1351,6 +1357,11 @@ err_stat: return ret; } +s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq); +EXPORT_SYMBOL_GPL(nf_ct_nat_offset); + int nf_conntrack_init(struct net *net) { int ret; @@ -1368,6 +1379,9 @@ int nf_conntrack_init(struct net *net) /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + + /* Howto get NAT offsets */ + rcu_assign_pointer(nf_ct_nat_offset, NULL); } return 0; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index aee560b4768..d5a9bcd7d61 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -151,7 +151,6 @@ static int nf_ct_events_retry_timeout __read_mostly = 15*HZ; #ifdef CONFIG_SYSCTL static struct ctl_table event_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_events", .data = &init_net.ct.sysctl_events, .maxlen = sizeof(unsigned int), @@ -159,7 +158,6 @@ static struct ctl_table event_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_events_retry_timeout", .data = &init_net.ct.sysctl_events_retry_timeout, .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 2032dfe25ca..fdf5d2a1d9b 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -202,9 +202,9 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { - return a->master == b->master && a->class == b->class - && nf_ct_tuple_equal(&a->tuple, &b->tuple) - && nf_ct_tuple_mask_equal(&a->mask, &b->mask); + return a->master == b->master && a->class == b->class && + nf_ct_tuple_equal(&a->tuple, &b->tuple) && + nf_ct_tuple_mask_equal(&a->mask, &b->mask); } /* Generally a bad idea to call this: could have matched already. */ diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 5509dd1f14c..38ea7ef3ccd 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -243,8 +243,8 @@ static int try_epsv_response(const char *data, size_t dlen, /* Three delimiters. */ if (dlen <= 3) return 0; delim = data[0]; - if (isdigit(delim) || delim < 33 || delim > 126 - || data[1] != delim || data[2] != delim) + if (isdigit(delim) || delim < 33 || delim > 126 || + data[1] != delim || data[2] != delim) return 0; return get_port(data, 3, dlen, delim, &cmd->u.tcp.port); @@ -366,8 +366,8 @@ static int help(struct sk_buff *skb, typeof(nf_nat_ftp_hook) nf_nat_ftp; /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED - && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { pr_debug("ftp: Conntrackinfo = %u\n", ctinfo); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 1b816a2ea81..dd375500dcc 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -384,7 +384,7 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }; /* this module per-net specifics */ -static int dccp_net_id; +static int dccp_net_id __read_mostly; struct dccp_net { int dccp_loose; unsigned int dccp_timeout[CT_DCCP_MAX + 1]; @@ -703,64 +703,54 @@ static int dccp_nlattr_size(void) /* template, data assigned later */ static struct ctl_table dccp_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_request", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_respond", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_partopen", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_open", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closereq", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closing", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_timewait", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_loose", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { - .ctl_name = 0, - } + { } }; #endif /* CONFIG_SYSCTL */ @@ -810,12 +800,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { static __net_init int dccp_net_init(struct net *net) { - struct dccp_net *dn; - int err; - - dn = kmalloc(sizeof(*dn), GFP_KERNEL); - if (!dn) - return -ENOMEM; + struct dccp_net *dn = dccp_pernet(net); /* default values */ dn->dccp_loose = 1; @@ -827,16 +812,11 @@ static __net_init int dccp_net_init(struct net *net) dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; - err = net_assign_generic(net, dccp_net_id, dn); - if (err) - goto out; - #ifdef CONFIG_SYSCTL - err = -ENOMEM; dn->sysctl_table = kmemdup(dccp_sysctl_table, sizeof(dccp_sysctl_table), GFP_KERNEL); if (!dn->sysctl_table) - goto out; + return -ENOMEM; dn->sysctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; dn->sysctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; @@ -851,15 +831,11 @@ static __net_init int dccp_net_init(struct net *net) nf_net_netfilter_sysctl_path, dn->sysctl_table); if (!dn->sysctl_header) { kfree(dn->sysctl_table); - goto out; + return -ENOMEM; } #endif return 0; - -out: - kfree(dn); - return err; } static __net_exit void dccp_net_exit(struct net *net) @@ -869,21 +845,20 @@ static __net_exit void dccp_net_exit(struct net *net) unregister_net_sysctl_table(dn->sysctl_header); kfree(dn->sysctl_table); #endif - kfree(dn); - - net_assign_generic(net, dccp_net_id, NULL); } static struct pernet_operations dccp_net_ops = { .init = dccp_net_init, .exit = dccp_net_exit, + .id = &dccp_net_id, + .size = sizeof(struct dccp_net), }; static int __init nf_conntrack_proto_dccp_init(void) { int err; - err = register_pernet_gen_subsys(&dccp_net_id, &dccp_net_ops); + err = register_pernet_subsys(&dccp_net_ops); if (err < 0) goto err1; @@ -899,14 +874,14 @@ static int __init nf_conntrack_proto_dccp_init(void) err3: nf_conntrack_l4proto_unregister(&dccp_proto4); err2: - unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops); + unregister_pernet_subsys(&dccp_net_ops); err1: return err; } static void __exit nf_conntrack_proto_dccp_fini(void) { - unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops); + unregister_pernet_subsys(&dccp_net_ops); nf_conntrack_l4proto_unregister(&dccp_proto6); nf_conntrack_l4proto_unregister(&dccp_proto4); } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 829374f426c..e2091d0c7a2 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -69,9 +69,7 @@ static struct ctl_table generic_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table generic_compat_sysctl_table[] = { @@ -82,9 +80,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a54a0af0edb..c99cfba64dd 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -43,7 +43,7 @@ #define GRE_TIMEOUT (30 * HZ) #define GRE_STREAM_TIMEOUT (180 * HZ) -static int proto_gre_net_id; +static int proto_gre_net_id __read_mostly; struct netns_proto_gre { rwlock_t keymap_lock; struct list_head keymap_list; @@ -300,32 +300,24 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { static int proto_gre_net_init(struct net *net) { - struct netns_proto_gre *net_gre; - int rv; + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); - net_gre = kmalloc(sizeof(struct netns_proto_gre), GFP_KERNEL); - if (!net_gre) - return -ENOMEM; rwlock_init(&net_gre->keymap_lock); INIT_LIST_HEAD(&net_gre->keymap_list); - rv = net_assign_generic(net, proto_gre_net_id, net_gre); - if (rv < 0) - kfree(net_gre); - return rv; + return 0; } static void proto_gre_net_exit(struct net *net) { - struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); - nf_ct_gre_keymap_flush(net); - kfree(net_gre); } static struct pernet_operations proto_gre_net_ops = { .init = proto_gre_net_init, .exit = proto_gre_net_exit, + .id = &proto_gre_net_id, + .size = sizeof(struct netns_proto_gre), }; static int __init nf_ct_proto_gre_init(void) @@ -335,7 +327,7 @@ static int __init nf_ct_proto_gre_init(void) rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); if (rv < 0) return rv; - rv = register_pernet_gen_subsys(&proto_gre_net_id, &proto_gre_net_ops); + rv = register_pernet_subsys(&proto_gre_net_ops); if (rv < 0) nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); return rv; @@ -344,7 +336,7 @@ static int __init nf_ct_proto_gre_init(void) static void __exit nf_ct_proto_gre_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); - unregister_pernet_gen_subsys(proto_gre_net_id, &proto_gre_net_ops); + unregister_pernet_subsys(&proto_gre_net_ops); } module_init(nf_ct_proto_gre_init); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c10e6f36e31..f9d930f8027 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -595,9 +595,7 @@ static struct ctl_table sctp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT @@ -651,9 +649,7 @@ static struct ctl_table sctp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 97a82ba7537..3c96437b45a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -492,6 +492,21 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } +#ifdef CONFIG_NF_NAT_NEEDED +static inline s16 nat_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset); + + return get_offset != NULL ? get_offset(ct, dir, seq) : 0; +} +#define NAT_OFFSET(pf, ct, dir, seq) \ + (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) +#else +#define NAT_OFFSET(pf, ct, dir, seq) 0 +#endif + static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, @@ -506,6 +521,7 @@ static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; + s16 receiver_offset; bool res; /* @@ -519,11 +535,16 @@ static bool tcp_in_window(const struct nf_conn *ct, if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); + /* Take into account NAT sequence number mangling */ + receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); + ack -= receiver_offset; + sack -= receiver_offset; + pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n", - seq, ack, sack, win, end); + pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", + seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -613,8 +634,8 @@ static bool tcp_in_window(const struct nf_conn *ct, pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); - pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n", - seq, ack, sack, win, end); + pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", + seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -700,7 +721,7 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? - after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" @@ -715,39 +736,6 @@ static bool tcp_in_window(const struct nf_conn *ct, return res; } -#ifdef CONFIG_NF_NAT_NEEDED -/* Update sender->td_end after NAT successfully mangled the packet */ -/* Caller must linearize skb at tcp header. */ -void nf_conntrack_tcp_update(const struct sk_buff *skb, - unsigned int dataoff, - struct nf_conn *ct, int dir, - s16 offset) -{ - const struct tcphdr *tcph = (const void *)skb->data + dataoff; - const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; - const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir]; - __u32 end; - - end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); - - spin_lock_bh(&ct->lock); - /* - * We have to worry for the ack in the reply packet only... - */ - if (ct->proto.tcp.seen[dir].td_end + offset == end) - ct->proto.tcp.seen[dir].td_end = end; - ct->proto.tcp.last_end = end; - spin_unlock_bh(&ct->lock); - pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update); -#endif - #define TH_FIN 0x01 #define TH_SYN 0x02 #define TH_RST 0x04 @@ -908,23 +896,54 @@ static int tcp_packet(struct nf_conn *ct, /* b) This SYN/ACK acknowledges a SYN that we earlier * ignored as invalid. This means that the client and * the server are both in sync, while the firewall is - * not. We kill this session and block the SYN/ACK so - * that the client cannot but retransmit its SYN and - * thus initiate a clean new session. + * not. We get in sync from the previously annotated + * values. */ - spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: killing out of sync session "); - nf_ct_kill(ct); - return NF_DROP; + old_state = TCP_CONNTRACK_SYN_SENT; + new_state = TCP_CONNTRACK_SYN_RECV; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end = + ct->proto.tcp.last_end; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend = + ct->proto.tcp.last_end; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin = + ct->proto.tcp.last_win == 0 ? + 1 : ct->proto.tcp.last_win; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale = + ct->proto.tcp.last_wscale; + ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags = + ct->proto.tcp.last_flags; + memset(&ct->proto.tcp.seen[dir], 0, + sizeof(struct ip_ct_tcp_state)); + break; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; ct->proto.tcp.last_seq = ntohl(th->seq); ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); - + ct->proto.tcp.last_win = ntohs(th->window); + + /* a) This is a SYN in ORIGINAL. The client and the server + * may be in sync but we are not. In that case, we annotate + * the TCP options and let the packet go through. If it is a + * valid SYN packet, the server will reply with a SYN/ACK, and + * then we'll get in sync. Otherwise, the server ignores it. */ + if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) { + struct ip_ct_tcp_state seen = {}; + + ct->proto.tcp.last_flags = + ct->proto.tcp.last_wscale = 0; + tcp_options(skb, dataoff, th, &seen); + if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) { + ct->proto.tcp.last_flags |= + IP_CT_TCP_FLAG_WINDOW_SCALE; + ct->proto.tcp.last_wscale = seen.td_scale; + } + if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) { + ct->proto.tcp.last_flags |= + IP_CT_TCP_FLAG_SACK_PERM; + } + } spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, @@ -1303,7 +1322,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, .procname = "nf_conntrack_tcp_loose", .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), @@ -1311,7 +1329,6 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, .procname = "nf_conntrack_tcp_be_liberal", .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), @@ -1319,16 +1336,13 @@ static struct ctl_table tcp_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, .procname = "nf_conntrack_tcp_max_retrans", .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT @@ -1404,7 +1418,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, .procname = "ip_conntrack_tcp_loose", .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), @@ -1412,7 +1425,6 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, .procname = "ip_conntrack_tcp_be_liberal", .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), @@ -1420,16 +1432,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, .procname = "ip_conntrack_tcp_max_retrans", .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 70809d117b9..5c5518bedb4 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -154,9 +154,7 @@ static struct ctl_table udp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table udp_compat_sysctl_table[] = { @@ -174,9 +172,7 @@ static struct ctl_table udp_compat_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 0badedc542d..458655bb210 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -146,7 +146,6 @@ static unsigned int udplite_sysctl_table_users; static struct ctl_table_header *udplite_sysctl_header; static struct ctl_table udplite_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_udplite_timeout", .data = &nf_ct_udplite_timeout, .maxlen = sizeof(unsigned int), @@ -154,16 +153,13 @@ static struct ctl_table udplite_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_udplite_timeout_stream", .data = &nf_ct_udplite_timeout_stream, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = 0 - } + { } }; #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 19351538197..028aba667ef 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -340,7 +340,6 @@ static struct ctl_table_header *nf_ct_netfilter_header; static ctl_table nf_ct_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_MAX, .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), @@ -348,7 +347,6 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_COUNT, .procname = "nf_conntrack_count", .data = &init_net.ct.count, .maxlen = sizeof(int), @@ -356,7 +354,6 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_BUCKETS, .procname = "nf_conntrack_buckets", .data = &nf_conntrack_htable_size, .maxlen = sizeof(unsigned int), @@ -364,7 +361,6 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_CHECKSUM, .procname = "nf_conntrack_checksum", .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), @@ -372,43 +368,39 @@ static ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, .procname = "nf_conntrack_log_invalid", .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &log_invalid_proto_min, .extra2 = &log_invalid_proto_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_expect_max", .data = &nf_ct_expect_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; #define NET_NF_CONNTRACK_MAX 2089 static ctl_table nf_ct_netfilter_table[] = { { - .ctl_name = NET_NF_CONNTRACK_MAX, .procname = "nf_conntrack_max", .data = &nf_conntrack_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; static struct ctl_path nf_ct_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "net", }, { } }; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index c93494fef8e..015725a5cd5 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -128,9 +128,8 @@ EXPORT_SYMBOL(nf_log_packet); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) { - rcu_read_lock(); + mutex_lock(&nf_log_mutex); if (*pos >= ARRAY_SIZE(nf_loggers)) return NULL; @@ -149,9 +148,8 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) } static void seq_stop(struct seq_file *s, void *v) - __releases(RCU) { - rcu_read_unlock(); + mutex_unlock(&nf_log_mutex); } static int seq_show(struct seq_file *s, void *v) @@ -161,7 +159,7 @@ static int seq_show(struct seq_file *s, void *v) struct nf_logger *t; int ret; - logger = rcu_dereference(nf_loggers[*pos]); + logger = nf_loggers[*pos]; if (!logger) ret = seq_printf(s, "%2lld NONE (", *pos); @@ -171,22 +169,16 @@ static int seq_show(struct seq_file *s, void *v) if (ret < 0) return ret; - mutex_lock(&nf_log_mutex); list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) { ret = seq_printf(s, "%s", t->name); - if (ret < 0) { - mutex_unlock(&nf_log_mutex); + if (ret < 0) return ret; - } if (&t->list[*pos] != nf_loggers_l[*pos].prev) { ret = seq_printf(s, ","); - if (ret < 0) { - mutex_unlock(&nf_log_mutex); + if (ret < 0) return ret; - } } } - mutex_unlock(&nf_log_mutex); return seq_printf(s, ")\n"); } @@ -216,9 +208,9 @@ static const struct file_operations nflog_file_ops = { #ifdef CONFIG_SYSCTL static struct ctl_path nf_log_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, - { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "netfilter", }, + { .procname = "nf_log", }, { } }; @@ -273,7 +265,6 @@ static __init int netfilter_log_sysctl_init(void) for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); - nf_log_sysctl_table[i].ctl_name = CTL_UNNUMBERED; nf_log_sysctl_table[i].procname = nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; nf_log_sysctl_table[i].data = NULL; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f900dc3194a..9de0470d557 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -666,8 +666,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, { struct netlink_notify *n = ptr; - if (event == NETLINK_URELEASE && - n->protocol == NETLINK_NETFILTER && n->pid) { + if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this pid */ @@ -678,7 +677,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, struct hlist_head *head = &instance_table[i]; hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if ((n->net == &init_net) && + if ((net_eq(n->net, &init_net)) && (n->pid == inst->peer_pid)) __instance_destroy(inst); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7a9dec9fb82..7e3fa410641 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -574,8 +574,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, { struct netlink_notify *n = ptr; - if (event == NETLINK_URELEASE && - n->protocol == NETLINK_NETFILTER && n->pid) { + if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; /* destroy all instances for this pid */ diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 68098095439..38f03f75a63 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -103,7 +103,7 @@ static int count_them(struct xt_connlimit_data *data, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - const struct xt_match *match) + u_int8_t family) { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; @@ -113,8 +113,7 @@ static int count_them(struct xt_connlimit_data *data, bool addit = true; int matches = 0; - - if (match->family == NFPROTO_IPV6) + if (family == NFPROTO_IPV6) hash = &data->iphash[connlimit_iphash6(addr, mask)]; else hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; @@ -157,8 +156,7 @@ static int count_them(struct xt_connlimit_data *data, continue; } - if (same_source_net(addr, mask, &conn->tuple.src.u3, - match->family)) + if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) /* same source network -> be counted! */ ++matches; nf_ct_put(found_ct); @@ -207,7 +205,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) spin_lock_bh(&info->data->lock); connections = count_them(info->data, tuple_ptr, &addr, - &info->mask, par->match); + &info->mask, par->family); spin_unlock_bh(&info->data->lock); if (connections < 0) { diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 6dc4652f2fe..ae66305f0fe 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -113,7 +113,8 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, } static bool -conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) +conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par, + u16 state_mask, u16 status_mask) { const struct xt_conntrack_mtinfo2 *info = par->matchinfo; enum ip_conntrack_info ctinfo; @@ -136,7 +137,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (test_bit(IPS_DST_NAT_BIT, &ct->status)) statebit |= XT_CONNTRACK_STATE_DNAT; } - if (!!(info->state_mask & statebit) ^ + if (!!(state_mask & statebit) ^ !(info->invert_flags & XT_CONNTRACK_STATE)) return false; } @@ -172,7 +173,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) return false; if ((info->match_flags & XT_CONNTRACK_STATUS) && - (!!(info->status_mask & ct->status) ^ + (!!(status_mask & ct->status) ^ !(info->invert_flags & XT_CONNTRACK_STATUS))) return false; @@ -192,11 +193,17 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) static bool conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo2 *const *info = par->matchinfo; - struct xt_match_param newpar = *par; + const struct xt_conntrack_mtinfo1 *info = par->matchinfo; - newpar.matchinfo = *info; - return conntrack_mt(skb, &newpar); + return conntrack_mt(skb, par, info->state_mask, info->status_mask); +} + +static bool +conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_conntrack_mtinfo2 *info = par->matchinfo; + + return conntrack_mt(skb, par, info->state_mask, info->status_mask); } static bool conntrack_mt_check(const struct xt_mtchk_param *par) @@ -209,45 +216,11 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par) return true; } -static bool conntrack_mt_check_v1(const struct xt_mtchk_param *par) -{ - struct xt_conntrack_mtinfo1 *info = par->matchinfo; - struct xt_conntrack_mtinfo2 *up; - int ret = conntrack_mt_check(par); - - if (ret < 0) - return ret; - - up = kmalloc(sizeof(*up), GFP_KERNEL); - if (up == NULL) { - nf_ct_l3proto_module_put(par->family); - return -ENOMEM; - } - - /* - * The strategy here is to minimize the overhead of v1 matching, - * by prebuilding a v2 struct and putting the pointer into the - * v1 dataspace. - */ - memcpy(up, info, offsetof(typeof(*info), state_mask)); - up->state_mask = info->state_mask; - up->status_mask = info->status_mask; - *(void **)info = up; - return true; -} - static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_l3proto_module_put(par->family); } -static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par) -{ - struct xt_conntrack_mtinfo2 **info = par->matchinfo; - kfree(*info); - conntrack_mt_destroy(par); -} - static struct xt_match conntrack_mt_reg[] __read_mostly = { { .name = "conntrack", @@ -255,8 +228,8 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), .match = conntrack_mt_v1, - .checkentry = conntrack_mt_check_v1, - .destroy = conntrack_mt_destroy_v1, + .checkentry = conntrack_mt_check, + .destroy = conntrack_mt_destroy, .me = THIS_MODULE, }, { @@ -264,7 +237,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .revision = 2, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo2), - .match = conntrack_mt, + .match = conntrack_mt_v2, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, .me = THIS_MODULE, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 2e8089ecd0a..2773be6a71d 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -112,7 +112,7 @@ static bool limit_mt_check(const struct xt_mtchk_param *par) priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) - return -ENOMEM; + return false; /* For SMP, we only want to use one set of state. */ r->master = priv; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 63e19050465..4d1a41bbd5d 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -118,7 +118,7 @@ static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, { struct xt_osf_user_finger *f; struct xt_osf_finger *sf; - int err = ENOENT; + int err = -ENOENT; if (!osf_attrs[OSF_ATTR_FINGER]) return -EINVAL; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index eb0ceb84652..fc70a49c0af 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -482,8 +482,7 @@ static ssize_t recent_old_proc_write(struct file *file, if (copy_from_user(buf, input, size)) return -EFAULT; - while (isspace(*c)) - c++; + c = skip_spaces(c); if (size - (c - buf) < 5) return c - buf; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index ebf00ad5b19..6a902564d24 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -149,7 +149,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par, /* Ignore sockets listening on INADDR_ANY */ wildcard = (sk->sk_state != TCP_TIME_WAIT && - inet_sk(sk)->rcv_saddr == 0); + inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ @@ -192,7 +192,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .revision = 0, .family = NFPROTO_IPV4, .match = socket_mt_v0, - .hooks = 1 << NF_INET_PRE_ROUTING, + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, { @@ -201,7 +202,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .family = NFPROTO_IPV4, .match = socket_mt_v1, .matchsize = sizeof(struct xt_socket_mtinfo1), - .hooks = 1 << NF_INET_PRE_ROUTING, + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, }; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 7a10bbe02c1..c5d9f97ef21 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -682,7 +682,7 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, * buckets and @skip_chain entries. For each entry in the table call * @callback, if @callback returns a negative value stop 'walking' through the * table and return. Updates the values in @skip_bkt and @skip_chain on - * return. Returns zero on succcess, negative values on failure. + * return. Returns zero on success, negative values on failure. * */ int netlbl_domhsh_walk(u32 *skip_bkt, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index fb357f01018..98ed22ee2ff 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -472,13 +472,12 @@ int netlbl_unlhsh_add(struct net *net, rcu_read_lock(); if (dev_name != NULL) { - dev = dev_get_by_name(net, dev_name); + dev = dev_get_by_name_rcu(net, dev_name); if (dev == NULL) { ret_val = -ENODEV; goto unlhsh_add_return; } ifindex = dev->ifindex; - dev_put(dev); iface = netlbl_unlhsh_search_iface(ifindex); } else { ifindex = 0; @@ -737,13 +736,12 @@ int netlbl_unlhsh_remove(struct net *net, rcu_read_lock(); if (dev_name != NULL) { - dev = dev_get_by_name(net, dev_name); + dev = dev_get_by_name_rcu(net, dev_name); if (dev == NULL) { ret_val = -ENODEV; goto unlhsh_remove_return; } iface = netlbl_unlhsh_search_iface(dev->ifindex); - dev_put(dev); } else iface = rcu_dereference(netlbl_unlhsh_def); if (iface == NULL) { @@ -1552,7 +1550,7 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, struct netlbl_unlhsh_iface *iface; rcu_read_lock(); - iface = netlbl_unlhsh_search_iface_def(skb->iif); + iface = netlbl_unlhsh_search_iface_def(skb->skb_iif); if (iface == NULL) goto unlabel_getattr_nolabel; switch (family) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19e98007691..a4957bf2ca6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -428,7 +428,8 @@ static int __netlink_create(struct net *net, struct socket *sock, return 0; } -static int netlink_create(struct net *net, struct socket *sock, int protocol) +static int netlink_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct module *module = NULL; struct mutex *cb_mutex; @@ -497,7 +498,7 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid && !nlk->subscriptions) { + if (nlk->pid) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, @@ -707,7 +708,7 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, { struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; + DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr); nladdr->nl_family = AF_NETLINK; nladdr->nl_pad = 0; @@ -1091,7 +1092,7 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; - if (sock_net(sk) != sock_net(p->exclude_sk)) + if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) goto out; if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || @@ -2050,7 +2051,7 @@ static const struct proto_ops netlink_ops = { .sendpage = sock_no_sendpage, }; -static struct net_proto_family netlink_family_ops = { +static const struct net_proto_family netlink_family_ops = { .family = PF_NETLINK, .create = netlink_create, .owner = THIS_MODULE, /* for consistency 8) */ diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 44ff3f3810f..d07ecda0a92 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -97,25 +97,17 @@ static struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family) */ static inline u16 genl_generate_id(void) { - static u16 id_gen_idx; - int overflowed = 0; + static u16 id_gen_idx = GENL_MIN_ID; + int i; - do { - if (id_gen_idx == 0) + for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { + if (!genl_family_find_byid(id_gen_idx)) + return id_gen_idx; + if (++id_gen_idx > GENL_MAX_ID) id_gen_idx = GENL_MIN_ID; + } - if (++id_gen_idx > GENL_MAX_ID) { - if (!overflowed) { - overflowed = 1; - id_gen_idx = 0; - continue; - } else - return 0; - } - - } while (genl_family_find_byid(id_gen_idx)); - - return id_gen_idx; + return 0; } static struct genl_multicast_group notify_grp; @@ -374,11 +366,6 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } - if (genl_family_find_byid(family->id)) { - err = -EEXIST; - goto errout_locked; - } - if (family->id == GENL_ID_GENERATE) { u16 newid = genl_generate_id(); @@ -388,6 +375,9 @@ int genl_register_family(struct genl_family *family) } family->id = newid; + } else if (genl_family_find_byid(family->id)) { + err = -EEXIST; + goto errout_locked; } if (family->maxattr) { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 7a834952f67..71604c6613b 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -425,12 +425,13 @@ static struct proto nr_proto = { .obj_size = sizeof(struct nr_sock), }; -static int nr_create(struct net *net, struct socket *sock, int protocol) +static int nr_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct nr_sock *nr; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (sock->type != SOCK_SEQPACKET || protocol != 0) @@ -1372,7 +1373,7 @@ static const struct file_operations nr_info_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct net_proto_family nr_family_ops = { +static const struct net_proto_family nr_family_ops = { .family = PF_NETROM, .create = nr_create, .owner = THIS_MODULE, diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 4eb1ac9a767..aacba76070f 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -597,15 +597,15 @@ struct net_device *nr_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } if (first) dev_hold(first); - read_unlock(&dev_base_lock); + rcu_read_unlock(); return first; } @@ -617,16 +617,17 @@ struct net_device *nr_dev_get(ax25_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { - if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && + ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 7b49591fe87..1e0fa9e57aa 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -36,143 +36,119 @@ static struct ctl_table_header *nr_table_header; static ctl_table nr_table[] = { { - .ctl_name = NET_NETROM_DEFAULT_PATH_QUALITY, .procname = "default_path_quality", .data = &sysctl_netrom_default_path_quality, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_quality, .extra2 = &max_quality }, { - .ctl_name = NET_NETROM_OBSOLESCENCE_COUNT_INITIALISER, .procname = "obsolescence_count_initialiser", .data = &sysctl_netrom_obsolescence_count_initialiser, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_obs, .extra2 = &max_obs }, { - .ctl_name = NET_NETROM_NETWORK_TTL_INITIALISER, .procname = "network_ttl_initialiser", .data = &sysctl_netrom_network_ttl_initialiser, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ttl, .extra2 = &max_ttl }, { - .ctl_name = NET_NETROM_TRANSPORT_TIMEOUT, .procname = "transport_timeout", .data = &sysctl_netrom_transport_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t1, .extra2 = &max_t1 }, { - .ctl_name = NET_NETROM_TRANSPORT_MAXIMUM_TRIES, .procname = "transport_maximum_tries", .data = &sysctl_netrom_transport_maximum_tries, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_n2, .extra2 = &max_n2 }, { - .ctl_name = NET_NETROM_TRANSPORT_ACKNOWLEDGE_DELAY, .procname = "transport_acknowledge_delay", .data = &sysctl_netrom_transport_acknowledge_delay, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t2, .extra2 = &max_t2 }, { - .ctl_name = NET_NETROM_TRANSPORT_BUSY_DELAY, .procname = "transport_busy_delay", .data = &sysctl_netrom_transport_busy_delay, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_t4, .extra2 = &max_t4 }, { - .ctl_name = NET_NETROM_TRANSPORT_REQUESTED_WINDOW_SIZE, .procname = "transport_requested_window_size", .data = &sysctl_netrom_transport_requested_window_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, { - .ctl_name = NET_NETROM_TRANSPORT_NO_ACTIVITY_TIMEOUT, .procname = "transport_no_activity_timeout", .data = &sysctl_netrom_transport_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, { - .ctl_name = NET_NETROM_ROUTING_CONTROL, .procname = "routing_control", .data = &sysctl_netrom_routing_control, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, { - .ctl_name = NET_NETROM_LINK_FAILS_COUNT, .procname = "link_fails_count", .data = &sysctl_netrom_link_fails_count, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_fails, .extra2 = &max_fails }, { - .ctl_name = NET_NETROM_RESET, .procname = "reset", .data = &sysctl_netrom_reset_circuit, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_reset, .extra2 = &max_reset }, - { .ctl_name = 0 } + { } }; static struct ctl_path nr_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "netrom", .ctl_name = NET_NETROM, }, + { .procname = "net", }, + { .procname = "netrom", }, { } }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f2d116a5cb3..020562164b5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -79,6 +79,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/mutex.h> +#include <linux/if_vlan.h> #ifdef CONFIG_INET #include <net/inet_common.h> @@ -188,7 +189,6 @@ struct packet_sock { struct packet_ring_buffer tx_ring; int copy_thresh; #endif - struct packet_type prot_hook; spinlock_t bind_lock; struct mutex pg_vec_lock; unsigned int running:1, /* prot_hook is attached*/ @@ -204,6 +204,7 @@ struct packet_sock { unsigned int tp_reserve; unsigned int tp_loss:1; #endif + struct packet_type prot_hook ____cacheline_aligned_in_smp; }; struct packet_skb_cb { @@ -364,7 +365,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, if (skb->pkt_type == PACKET_LOOPBACK) goto out; - if (dev_net(dev) != sock_net(sk)) + if (!net_eq(dev_net(dev), sock_net(sk))) goto out; skb = skb_share_check(skb, GFP_ATOMIC); @@ -436,7 +437,8 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(sock_net(sk), saddr->spkt_device); + rcu_read_lock(); + dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -490,6 +492,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; if (err) goto out_free; @@ -498,14 +501,13 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, */ dev_queue_xmit(skb); - dev_put(dev); + rcu_read_unlock(); return len; out_free: kfree_skb(skb); out_unlock: - if (dev) - dev_put(dev); + rcu_read_unlock(); return err; } @@ -551,7 +553,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev_net(dev) != sock_net(sk)) + if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; skb->dev = dev; @@ -626,15 +628,14 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, spin_lock(&sk->sk_receive_queue.lock); po->stats.tp_packets++; + skb->dropcount = atomic_read(&sk->sk_drops); __skb_queue_tail(&sk->sk_receive_queue, skb); spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, skb->len); return 0; drop_n_acct: - spin_lock(&sk->sk_receive_queue.lock); - po->stats.tp_drops++; - spin_unlock(&sk->sk_receive_queue.lock); + po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -673,7 +674,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, sk = pt->af_packet_priv; po = pkt_sk(sk); - if (dev_net(dev) != sock_net(sk)) + if (!net_eq(dev_net(dev), sock_net(sk))) goto drop; if (dev->header_ops) { @@ -766,7 +767,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - h.h2->tp_vlan_tci = skb->vlan_tci; + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); hdrlen = sizeof(*h.h2); break; default: @@ -856,6 +857,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->protocol = proto; skb->dev = dev; skb->priority = po->sk.sk_priority; + skb->mark = po->sk.sk_mark; skb_shinfo(skb)->destructor_arg = ph.raw; switch (po->tp_version) { @@ -1032,9 +1034,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) goto out_xmit; packet_increment_head(&po->tx_ring); len_sum += tp_len; - } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT)) - && (atomic_read(&po->tx_ring.pending)))) - ); + } while (likely((ph != NULL) || + ((!(msg->msg_flags & MSG_DONTWAIT)) && + (atomic_read(&po->tx_ring.pending)))) + ); err = len_sum; goto out_put; @@ -1122,6 +1125,7 @@ static int packet_snd(struct socket *sock, skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; /* * Now send it @@ -1341,7 +1345,8 @@ static struct proto packet_proto = { * Create a packet of type SOCK_PACKET. */ -static int packet_create(struct net *net, struct socket *sock, int protocol) +static int packet_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct packet_sock *po; @@ -1472,7 +1477,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, if (err) goto out_free; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, @@ -1488,7 +1493,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - aux.tp_vlan_tci = skb->vlan_tci; + aux.tp_vlan_tci = vlan_tx_tag_get(skb); put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } @@ -1515,12 +1520,13 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex); - if (dev) { + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); + if (dev) strlcpy(uaddr->sa_data, dev->name, 15); - dev_put(dev); - } else + else memset(uaddr->sa_data, 0, 14); + rcu_read_unlock(); *uaddr_len = sizeof(*uaddr); return 0; @@ -1532,7 +1538,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, struct net_device *dev; struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; + DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); if (peer) return -EOPNOTSUPP; @@ -1540,16 +1546,17 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; sll->sll_protocol = po->num; - dev = dev_get_by_index(sock_net(sk), po->ifindex); + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len); - dev_put(dev); } else { sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } + rcu_read_unlock(); *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; return 0; @@ -1659,11 +1666,9 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) if (--ml->count == 0) { struct net_device *dev; *mlp = ml->next; - dev = dev_get_by_index(sock_net(sk), ml->ifindex); - if (dev) { + dev = __dev_get_by_index(sock_net(sk), ml->ifindex); + if (dev) packet_dev_mc(dev, ml, -1); - dev_put(dev); - } kfree(ml); } rtnl_unlock(); @@ -1687,11 +1692,9 @@ static void packet_flush_mclist(struct sock *sk) struct net_device *dev; po->mclist = ml->next; - dev = dev_get_by_index(sock_net(sk), ml->ifindex); - if (dev != NULL) { + dev = __dev_get_by_index(sock_net(sk), ml->ifindex); + if (dev != NULL) packet_dev_mc(dev, ml, -1); - dev_put(dev); - } kfree(ml); } rtnl_unlock(); @@ -2360,7 +2363,7 @@ static const struct proto_ops packet_ops = { .sendpage = sock_no_sendpage, }; -static struct net_proto_family packet_family_ops = { +static const struct net_proto_family packet_family_ops = { .family = PF_PACKET, .create = packet_create, .owner = THIS_MODULE, diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index f60c0c2aacb..526d0273991 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -35,7 +35,6 @@ /* Transport protocol registration */ static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; -static DEFINE_SPINLOCK(proto_tab_lock); static struct phonet_protocol *phonet_proto_get(int protocol) { @@ -44,11 +43,11 @@ static struct phonet_protocol *phonet_proto_get(int protocol) if (protocol >= PHONET_NPROTO) return NULL; - spin_lock(&proto_tab_lock); - pp = proto_tab[protocol]; + rcu_read_lock(); + pp = rcu_dereference(proto_tab[protocol]); if (pp && !try_module_get(pp->prot->owner)) pp = NULL; - spin_unlock(&proto_tab_lock); + rcu_read_unlock(); return pp; } @@ -60,7 +59,8 @@ static inline void phonet_proto_put(struct phonet_protocol *pp) /* protocol family functions */ -static int pn_socket_create(struct net *net, struct socket *sock, int protocol) +static int pn_socket_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct pn_sock *pn; @@ -118,7 +118,7 @@ out: return err; } -static struct net_proto_family phonet_proto_family = { +static const struct net_proto_family phonet_proto_family = { .family = PF_PHONET, .create = pn_socket_create, .owner = THIS_MODULE, @@ -190,9 +190,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, skb->priority = 0; skb->dev = dev; - if (pn_addr(src) == pn_addr(dst)) { + if (skb->pkt_type == PACKET_LOOPBACK) { skb_reset_mac_header(skb); - skb->pkt_type = PACKET_LOOPBACK; skb_orphan(skb); if (irq) netif_rx(skb); @@ -222,6 +221,9 @@ static int pn_raw_send(const void *data, int len, struct net_device *dev, if (skb == NULL) return -ENOMEM; + if (phonet_address_lookup(dev_net(dev), pn_addr(dst)) == 0) + skb->pkt_type = PACKET_LOOPBACK; + skb_reserve(skb, MAX_PHONET_HEADER); __skb_put(skb, len); skb_copy_to_linear_data(skb, data, len); @@ -235,6 +237,7 @@ static int pn_raw_send(const void *data, int len, struct net_device *dev, int pn_skb_send(struct sock *sk, struct sk_buff *skb, const struct sockaddr_pn *target) { + struct net *net = sock_net(sk); struct net_device *dev; struct pn_sock *pn = pn_sk(sk); int err; @@ -243,9 +246,13 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, err = -EHOSTUNREACH; if (sk->sk_bound_dev_if) - dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); - else - dev = phonet_device_get(sock_net(sk)); + dev = dev_get_by_index(net, sk->sk_bound_dev_if); + else if (phonet_address_lookup(net, daddr) == 0) { + dev = phonet_device_get(net); + skb->pkt_type = PACKET_LOOPBACK; + } else + dev = phonet_route_output(net, daddr); + if (!dev || !(dev->flags & IFF_UP)) goto drop; @@ -369,6 +376,12 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, pn_skb_get_dst_sockaddr(skb, &sa); + /* check if this is broadcasted */ + if (pn_sockaddr_get_addr(&sa) == PNADDR_BROADCAST) { + pn_deliver_sock_broadcast(net, skb); + goto out; + } + /* check if we are the destination */ if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) { /* Phonet packet input */ @@ -381,6 +394,38 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, send_obj_unreachable(skb); send_reset_indications(skb); } + } else if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) + goto out; /* Race between address deletion and loopback */ + else { + /* Phonet packet routing */ + struct net_device *out_dev; + + out_dev = phonet_route_output(net, pn_sockaddr_get_addr(&sa)); + if (!out_dev) { + LIMIT_NETDEBUG(KERN_WARNING"No Phonet route to %02X\n", + pn_sockaddr_get_addr(&sa)); + goto out; + } + + __skb_push(skb, sizeof(struct phonethdr)); + skb->dev = out_dev; + if (out_dev == dev) { + LIMIT_NETDEBUG(KERN_ERR"Phonet loop to %02X on %s\n", + pn_sockaddr_get_addr(&sa), dev->name); + goto out_dev; + } + /* Some drivers (e.g. TUN) do not allocate HW header space */ + if (skb_cow_head(skb, out_dev->hard_header_len)) + goto out_dev; + + if (dev_hard_header(skb, out_dev, ETH_P_PHONET, NULL, NULL, + skb->len) < 0) + goto out_dev; + dev_queue_xmit(skb); + dev_put(out_dev); + return NET_RX_SUCCESS; +out_dev: + dev_put(out_dev); } out: @@ -393,6 +438,8 @@ static struct packet_type phonet_packet_type __read_mostly = { .func = phonet_rcv, }; +static DEFINE_MUTEX(proto_tab_lock); + int __init_or_module phonet_proto_register(int protocol, struct phonet_protocol *pp) { @@ -405,12 +452,12 @@ int __init_or_module phonet_proto_register(int protocol, if (err) return err; - spin_lock(&proto_tab_lock); + mutex_lock(&proto_tab_lock); if (proto_tab[protocol]) err = -EBUSY; else - proto_tab[protocol] = pp; - spin_unlock(&proto_tab_lock); + rcu_assign_pointer(proto_tab[protocol], pp); + mutex_unlock(&proto_tab_lock); return err; } @@ -418,10 +465,11 @@ EXPORT_SYMBOL(phonet_proto_register); void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) { - spin_lock(&proto_tab_lock); + mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[protocol] != pp); - proto_tab[protocol] = NULL; - spin_unlock(&proto_tab_lock); + rcu_assign_pointer(proto_tab[protocol], NULL); + mutex_unlock(&proto_tab_lock); + synchronize_rcu(); proto_unregister(pp->prot); } EXPORT_SYMBOL(phonet_proto_unregister); @@ -435,6 +483,7 @@ static int __init phonet_init(void) if (err) return err; + pn_sock_init(); err = sock_register(&phonet_proto_family); if (err) { printk(KERN_ALERT diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index ef5c75c372e..67f072e94d0 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -159,11 +159,9 @@ out_nofree: static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err = sock_queue_rcv_skb(sk, skb); - if (err < 0) { + + if (err < 0) kfree_skb(skb); - if (err == -ENOMEM) - atomic_inc(&sk->sk_drops); - } return err ? NET_RX_DROP : NET_RX_SUCCESS; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 5f32d217535..b6356f3832f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -360,8 +360,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) err = sock_queue_rcv_skb(sk, skb); if (!err) return 0; - if (err == -ENOMEM) - atomic_inc(&sk->sk_drops); break; } @@ -716,8 +714,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) return -EINVAL; lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) - && !skb_queue_empty(&pn->ctrlreq_queue)) + if (sock_flag(sk, SOCK_URGINLINE) && + !skb_queue_empty(&pn->ctrlreq_queue)) answ = skb_peek(&pn->ctrlreq_queue)->len; else if (!skb_queue_empty(&sk->sk_receive_queue)) answ = skb_peek(&sk->sk_receive_queue)->len; @@ -845,7 +843,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct pep_sock *pn = pep_sk(sk); - struct sk_buff *skb = NULL; + struct sk_buff *skb; long timeo; int flags = msg->msg_flags; int err, done; @@ -853,6 +851,16 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR)) return -EOPNOTSUPP; + skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, + flags & MSG_DONTWAIT, &err); + if (!skb) + return -ENOBUFS; + + skb_reserve(skb, MAX_PHONET_HEADER + 3); + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (err < 0) + goto outfree; + lock_sock(sk); timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if ((1 << sk->sk_state) & (TCPF_LISTEN|TCPF_CLOSE)) { @@ -896,28 +904,13 @@ disabled: goto disabled; } - if (!skb) { - skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, - flags & MSG_DONTWAIT, &err); - if (skb == NULL) - goto out; - skb_reserve(skb, MAX_PHONET_HEADER + 3); - - if (sk->sk_state != TCP_ESTABLISHED || - !atomic_read(&pn->tx_credits)) - goto disabled; /* sock_alloc_send_skb might sleep */ - } - - err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); - if (err < 0) - goto out; - err = pipe_skb_send(sk, skb); if (err >= 0) err = len; /* success! */ skb = NULL; out: release_sock(sk); +outfree: kfree_skb(skb); return err; } diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 5f42f30dd16..bc4a33bf2d3 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -33,11 +33,17 @@ #include <net/netns/generic.h> #include <net/phonet/pn_dev.h> +struct phonet_routes { + struct mutex lock; + struct net_device *table[64]; +}; + struct phonet_net { struct phonet_device_list pndevs; + struct phonet_routes routes; }; -int phonet_net_id; +int phonet_net_id __read_mostly; struct phonet_device_list *phonet_device_list(struct net *net) { @@ -55,7 +61,8 @@ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); - list_add(&pnd->list, &pndevs->list); + BUG_ON(!mutex_is_locked(&pndevs->lock)); + list_add_rcu(&pnd->list, &pndevs->list); return pnd; } @@ -64,6 +71,7 @@ static struct phonet_device *__phonet_get(struct net_device *dev) struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; + BUG_ON(!mutex_is_locked(&pndevs->lock)); list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; @@ -71,6 +79,18 @@ static struct phonet_device *__phonet_get(struct net_device *dev) return NULL; } +static struct phonet_device *__phonet_get_rcu(struct net_device *dev) +{ + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); + struct phonet_device *pnd; + + list_for_each_entry_rcu(pnd, &pndevs->list, list) { + if (pnd->netdev == dev) + return pnd; + } + return NULL; +} + static void phonet_device_destroy(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); @@ -78,11 +98,11 @@ static void phonet_device_destroy(struct net_device *dev) ASSERT_RTNL(); - spin_lock_bh(&pndevs->lock); + mutex_lock(&pndevs->lock); pnd = __phonet_get(dev); if (pnd) - list_del(&pnd->list); - spin_unlock_bh(&pndevs->lock); + list_del_rcu(&pnd->list); + mutex_unlock(&pndevs->lock); if (pnd) { u8 addr; @@ -100,8 +120,8 @@ struct net_device *phonet_device_get(struct net *net) struct phonet_device *pnd; struct net_device *dev = NULL; - spin_lock_bh(&pndevs->lock); - list_for_each_entry(pnd, &pndevs->list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pnd, &pndevs->list, list) { dev = pnd->netdev; BUG_ON(!dev); @@ -112,7 +132,7 @@ struct net_device *phonet_device_get(struct net *net) } if (dev) dev_hold(dev); - spin_unlock_bh(&pndevs->lock); + rcu_read_unlock(); return dev; } @@ -122,7 +142,7 @@ int phonet_address_add(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - spin_lock_bh(&pndevs->lock); + mutex_lock(&pndevs->lock); /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) @@ -131,7 +151,7 @@ int phonet_address_add(struct net_device *dev, u8 addr) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; - spin_unlock_bh(&pndevs->lock); + mutex_unlock(&pndevs->lock); return err; } @@ -141,36 +161,56 @@ int phonet_address_del(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - spin_lock_bh(&pndevs->lock); + mutex_lock(&pndevs->lock); pnd = __phonet_get(dev); - if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) + if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) { err = -EADDRNOTAVAIL; - else if (bitmap_empty(pnd->addrs, 64)) { - list_del(&pnd->list); + pnd = NULL; + } else if (bitmap_empty(pnd->addrs, 64)) + list_del_rcu(&pnd->list); + else + pnd = NULL; + mutex_unlock(&pndevs->lock); + + if (pnd) { + synchronize_rcu(); kfree(pnd); } - spin_unlock_bh(&pndevs->lock); return err; } /* Gets a source address toward a destination, through a interface. */ -u8 phonet_address_get(struct net_device *dev, u8 addr) +u8 phonet_address_get(struct net_device *dev, u8 daddr) { - struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; + u8 saddr; - spin_lock_bh(&pndevs->lock); - pnd = __phonet_get(dev); + rcu_read_lock(); + pnd = __phonet_get_rcu(dev); if (pnd) { BUG_ON(bitmap_empty(pnd->addrs, 64)); /* Use same source address as destination, if possible */ - if (!test_bit(addr >> 2, pnd->addrs)) - addr = find_first_bit(pnd->addrs, 64) << 2; + if (test_bit(daddr >> 2, pnd->addrs)) + saddr = daddr; + else + saddr = find_first_bit(pnd->addrs, 64) << 2; } else - addr = PN_NO_ADDR; - spin_unlock_bh(&pndevs->lock); - return addr; + saddr = PN_NO_ADDR; + rcu_read_unlock(); + + if (saddr == PN_NO_ADDR) { + /* Fallback to another device */ + struct net_device *def_dev; + + def_dev = phonet_device_get(dev_net(dev)); + if (def_dev) { + if (def_dev != dev) + saddr = phonet_address_get(def_dev, daddr); + dev_put(def_dev); + } + } + return saddr; } int phonet_address_lookup(struct net *net, u8 addr) @@ -179,8 +219,8 @@ int phonet_address_lookup(struct net *net, u8 addr) struct phonet_device *pnd; int err = -EADDRNOTAVAIL; - spin_lock_bh(&pndevs->lock); - list_for_each_entry(pnd, &pndevs->list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pnd, &pndevs->list, list) { /* Don't allow unregistering devices! */ if ((pnd->netdev->reg_state != NETREG_REGISTERED) || ((pnd->netdev->flags & IFF_UP)) != IFF_UP) @@ -192,7 +232,7 @@ int phonet_address_lookup(struct net *net, u8 addr) } } found: - spin_unlock_bh(&pndevs->lock); + rcu_read_unlock(); return err; } @@ -219,6 +259,32 @@ static int phonet_device_autoconf(struct net_device *dev) return 0; } +static void phonet_route_autodel(struct net_device *dev) +{ + struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + unsigned i; + DECLARE_BITMAP(deleted, 64); + + /* Remove left-over Phonet routes */ + bitmap_zero(deleted, 64); + mutex_lock(&pnn->routes.lock); + for (i = 0; i < 64; i++) + if (dev == pnn->routes.table[i]) { + rcu_assign_pointer(pnn->routes.table[i], NULL); + set_bit(i, deleted); + } + mutex_unlock(&pnn->routes.lock); + + if (bitmap_empty(deleted, 64)) + return; /* short-circuit RCU */ + synchronize_rcu(); + for (i = find_first_bit(deleted, 64); i < 64; + i = find_next_bit(deleted, 64, i + 1)) { + rtm_phonet_notify(RTM_DELROUTE, dev, i); + dev_put(dev); + } +} + /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, void *arg) @@ -232,6 +298,7 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what, break; case NETDEV_UNREGISTER: phonet_device_destroy(dev); + phonet_route_autodel(dev); break; } return 0; @@ -246,18 +313,14 @@ static struct notifier_block phonet_device_notifier = { /* Per-namespace Phonet devices handling */ static int phonet_init_net(struct net *net) { - struct phonet_net *pnn = kmalloc(sizeof(*pnn), GFP_KERNEL); - if (!pnn) - return -ENOMEM; + struct phonet_net *pnn = net_generic(net, phonet_net_id); - if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops)) { - kfree(pnn); + if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops)) return -ENOMEM; - } INIT_LIST_HEAD(&pnn->pndevs.list); - spin_lock_init(&pnn->pndevs.lock); - net_assign_generic(net, phonet_net_id, pnn); + mutex_init(&pnn->pndevs.lock); + mutex_init(&pnn->routes.lock); return 0; } @@ -265,25 +328,35 @@ static void phonet_exit_net(struct net *net) { struct phonet_net *pnn = net_generic(net, phonet_net_id); struct net_device *dev; + unsigned i; rtnl_lock(); for_each_netdev(net, dev) phonet_device_destroy(dev); + + for (i = 0; i < 64; i++) { + dev = pnn->routes.table[i]; + if (dev) { + rtm_phonet_notify(RTM_DELROUTE, dev, i); + dev_put(dev); + } + } rtnl_unlock(); proc_net_remove(net, "phonet"); - kfree(pnn); } static struct pernet_operations phonet_net_ops = { .init = phonet_init_net, .exit = phonet_exit_net, + .id = &phonet_net_id, + .size = sizeof(struct phonet_net), }; /* Initialize Phonet devices list */ int __init phonet_device_init(void) { - int err = register_pernet_gen_device(&phonet_net_id, &phonet_net_ops); + int err = register_pernet_device(&phonet_net_ops); if (err) return err; @@ -298,5 +371,75 @@ void phonet_device_exit(void) { rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); - unregister_pernet_gen_device(phonet_net_id, &phonet_net_ops); + unregister_pernet_device(&phonet_net_ops); +} + +int phonet_route_add(struct net_device *dev, u8 daddr) +{ + struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + int err = -EEXIST; + + daddr = daddr >> 2; + mutex_lock(&routes->lock); + if (routes->table[daddr] == NULL) { + rcu_assign_pointer(routes->table[daddr], dev); + dev_hold(dev); + err = 0; + } + mutex_unlock(&routes->lock); + return err; +} + +int phonet_route_del(struct net_device *dev, u8 daddr) +{ + struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + + daddr = daddr >> 2; + mutex_lock(&routes->lock); + if (dev == routes->table[daddr]) + rcu_assign_pointer(routes->table[daddr], NULL); + else + dev = NULL; + mutex_unlock(&routes->lock); + + if (!dev) + return -ENOENT; + synchronize_rcu(); + dev_put(dev); + return 0; +} + +struct net_device *phonet_route_get(struct net *net, u8 daddr) +{ + struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + struct net_device *dev; + + ASSERT_RTNL(); /* no need to hold the device */ + + daddr >>= 2; + rcu_read_lock(); + dev = rcu_dereference(routes->table[daddr]); + rcu_read_unlock(); + return dev; +} + +struct net_device *phonet_route_output(struct net *net, u8 daddr) +{ + struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_routes *routes = &pnn->routes; + struct net_device *dev; + + daddr >>= 2; + rcu_read_lock(); + dev = rcu_dereference(routes->table[daddr]); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + if (!dev) + dev = phonet_device_get(net); /* Default route */ + return dev; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index d21fd357661..2e6c7eb8e76 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -29,6 +29,8 @@ #include <net/sock.h> #include <net/phonet/pn_dev.h> +/* Device address handling */ + static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, u32 pid, u32 seq, int event); @@ -51,8 +53,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr) RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); return; errout: - if (err < 0) - rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); + rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); } static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { @@ -130,8 +131,8 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) int addr_idx = 0, addr_start_idx = cb->args[1]; pndevs = phonet_device_list(sock_net(skb->sk)); - spin_lock_bh(&pndevs->lock); - list_for_each_entry(pnd, &pndevs->list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pnd, &pndevs->list, list) { u8 addr; if (dev_idx > dev_start_idx) @@ -153,13 +154,137 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } out: - spin_unlock_bh(&pndevs->lock); + rcu_read_unlock(); cb->args[0] = dev_idx; cb->args[1] = addr_idx; return skb->len; } +/* Routes handling */ + +static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, + u32 pid, u32 seq, int event) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0); + if (nlh == NULL) + return -EMSGSIZE; + + rtm = nlmsg_data(nlh); + rtm->rtm_family = AF_PHONET; + rtm->rtm_dst_len = 6; + rtm->rtm_src_len = 0; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_MAIN; + rtm->rtm_protocol = RTPROT_STATIC; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_type = RTN_UNICAST; + rtm->rtm_flags = 0; + NLA_PUT_U8(skb, RTA_DST, dst); + NLA_PUT_U32(skb, RTA_OIF, dev->ifindex); + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1) + nla_total_size(4), GFP_KERNEL); + if (skb == NULL) + goto errout; + err = fill_route(skb, dev, dst, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL); + return; +errout: + rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_ROUTE, err); +} + +static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { + [RTA_DST] = { .type = NLA_U8 }, + [RTA_OIF] = { .type = NLA_U32 }, +}; + +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[RTA_MAX+1]; + struct net_device *dev; + struct rtmsg *rtm; + int err; + u8 dst; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy); + if (err < 0) + return err; + + rtm = nlmsg_data(nlh); + if (rtm->rtm_table != RT_TABLE_MAIN || rtm->rtm_type != RTN_UNICAST) + return -EINVAL; + if (tb[RTA_DST] == NULL || tb[RTA_OIF] == NULL) + return -EINVAL; + dst = nla_get_u8(tb[RTA_DST]); + if (dst & 3) /* Phonet addresses only have 6 high-order bits */ + return -EINVAL; + + dev = __dev_get_by_index(net, nla_get_u32(tb[RTA_OIF])); + if (dev == NULL) + return -ENODEV; + + if (nlh->nlmsg_type == RTM_NEWROUTE) + err = phonet_route_add(dev, dst); + else + err = phonet_route_del(dev, dst); + if (!err) + rtm_phonet_notify(nlh->nlmsg_type, dev, dst); + return err; +} + +static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u8 addr, addr_idx = 0, addr_start_idx = cb->args[0]; + + for (addr = 0; addr < 64; addr++) { + struct net_device *dev; + + dev = phonet_route_get(net, addr << 2); + if (!dev) + continue; + + if (addr_idx++ < addr_start_idx) + continue; + if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWROUTE)) + goto out; + } + +out: + cb->args[0] = addr_idx; + cb->args[1] = 0; + + return skb->len; +} + int __init phonet_netlink_register(void) { int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); @@ -169,5 +294,8 @@ int __init phonet_netlink_register(void) /* Further __rtnl_register() cannot fail */ __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); + __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL); + __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL); + __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit); return 0; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index aa5b5a972bf..69c8b826a0c 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -45,13 +45,28 @@ static int pn_socket_release(struct socket *sock) return 0; } +#define PN_HASHSIZE 16 +#define PN_HASHMASK (PN_HASHSIZE-1) + + static struct { - struct hlist_head hlist; + struct hlist_head hlist[PN_HASHSIZE]; spinlock_t lock; -} pnsocks = { - .hlist = HLIST_HEAD_INIT, - .lock = __SPIN_LOCK_UNLOCKED(pnsocks.lock), -}; +} pnsocks; + +void __init pn_sock_init(void) +{ + unsigned i; + + for (i = 0; i < PN_HASHSIZE; i++) + INIT_HLIST_HEAD(pnsocks.hlist + i); + spin_lock_init(&pnsocks.lock); +} + +static struct hlist_head *pn_hash_list(u16 obj) +{ + return pnsocks.hlist + (obj & PN_HASHMASK); +} /* * Find address based on socket address, match only certain fields. @@ -64,10 +79,11 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) struct sock *rval = NULL; u16 obj = pn_sockaddr_get_object(spn); u8 res = spn->spn_resource; + struct hlist_head *hlist = pn_hash_list(obj); spin_lock_bh(&pnsocks.lock); - sk_for_each(sknode, node, &pnsocks.hlist) { + sk_for_each(sknode, node, hlist) { struct pn_sock *pn = pn_sk(sknode); BUG_ON(!pn->sobject); /* unbound socket */ @@ -82,8 +98,8 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) if (pn->resource != res) continue; } - if (pn_addr(pn->sobject) - && pn_addr(pn->sobject) != pn_addr(obj)) + if (pn_addr(pn->sobject) && + pn_addr(pn->sobject) != pn_addr(obj)) continue; rval = sknode; @@ -94,13 +110,44 @@ struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) spin_unlock_bh(&pnsocks.lock); return rval; +} + +/* Deliver a broadcast packet (only in bottom-half) */ +void pn_deliver_sock_broadcast(struct net *net, struct sk_buff *skb) +{ + struct hlist_head *hlist = pnsocks.hlist; + unsigned h; + + spin_lock(&pnsocks.lock); + for (h = 0; h < PN_HASHSIZE; h++) { + struct hlist_node *node; + struct sock *sknode; + + sk_for_each(sknode, node, hlist) { + struct sk_buff *clone; + + if (!net_eq(sock_net(sknode), net)) + continue; + if (!sock_flag(sknode, SOCK_BROADCAST)) + continue; + clone = skb_clone(skb, GFP_ATOMIC); + if (clone) { + sock_hold(sknode); + sk_receive_skb(sknode, clone, 0); + } + } + hlist++; + } + spin_unlock(&pnsocks.lock); } void pn_sock_hash(struct sock *sk) { + struct hlist_head *hlist = pn_hash_list(pn_sk(sk)->sobject); + spin_lock_bh(&pnsocks.lock); - sk_add_node(sk, &pnsocks.hlist); + sk_add_node(sk, hlist); spin_unlock_bh(&pnsocks.lock); } EXPORT_SYMBOL(pn_sock_hash); @@ -416,15 +463,20 @@ EXPORT_SYMBOL(pn_sock_get_port); static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); + struct hlist_head *hlist = pnsocks.hlist; struct hlist_node *node; struct sock *sknode; + unsigned h; - sk_for_each(sknode, node, &pnsocks.hlist) { - if (!net_eq(net, sock_net(sknode))) - continue; - if (!pos) - return sknode; - pos--; + for (h = 0; h < PN_HASHSIZE; h++) { + sk_for_each(sknode, node, hlist) { + if (!net_eq(net, sock_net(sknode))) + continue; + if (!pos) + return sknode; + pos--; + } + hlist++; } return NULL; } diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 2220f332232..cea1c7dbdae 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -84,20 +84,18 @@ static int proc_local_port_range(ctl_table *table, int write, static struct ctl_table phonet_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "local_port_range", .data = &local_port_range, .maxlen = sizeof(local_port_range), .mode = 0644, .proc_handler = proc_local_port_range, - .strategy = NULL, }, - { .ctl_name = 0 } + { } }; static struct ctl_path phonet_ctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "phonet", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "phonet", }, { }, }; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 98e05382fd3..853c52be781 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -174,8 +174,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, mask |= (POLLIN | POLLRDNORM); spin_unlock(&rs->rs_lock); } - if (!list_empty(&rs->rs_recv_queue) - || !list_empty(&rs->rs_notify_queue)) + if (!list_empty(&rs->rs_recv_queue) || + !list_empty(&rs->rs_notify_queue)) mask |= (POLLIN | POLLRDNORM); if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) mask |= (POLLOUT | POLLWRNORM); @@ -265,6 +265,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, case RDS_GET_MR: ret = rds_get_mr(rs, optval, optlen); break; + case RDS_GET_MR_FOR_DEST: + ret = rds_get_mr_for_dest(rs, optval, optlen); + break; case RDS_FREE_MR: ret = rds_free_mr(rs, optval, optlen); break; @@ -305,8 +308,8 @@ static int rds_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(int)) ret = -EINVAL; else - if (put_user(rs->rs_recverr, (int __user *) optval) - || put_user(sizeof(int), optlen)) + if (put_user(rs->rs_recverr, (int __user *) optval) || + put_user(sizeof(int), optlen)) ret = -EFAULT; else ret = 0; @@ -407,7 +410,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) return 0; } -static int rds_create(struct net *net, struct socket *sock, int protocol) +static int rds_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; @@ -431,7 +435,7 @@ void rds_sock_put(struct rds_sock *rs) sock_put(rds_rs_to_sk(rs)); } -static struct net_proto_family rds_family_ops = { +static const struct net_proto_family rds_family_ops = { .family = AF_RDS, .create = rds_create, .owner = THIS_MODULE, diff --git a/net/rds/cong.c b/net/rds/cong.c index dd2711df640..6d06cac2649 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -218,6 +218,8 @@ void rds_cong_queue_updates(struct rds_cong_map *map) spin_lock_irqsave(&rds_cong_lock, flags); list_for_each_entry(conn, &map->m_conn_list, c_map_item) { + if (conn->c_loopback) + continue; if (!test_and_set_bit(0, &conn->c_map_queued)) { rds_stats_inc(s_cong_update_queued); queue_delayed_work(rds_wq, &conn->c_send_w, 0); diff --git a/net/rds/connection.c b/net/rds/connection.c index cc8b568c0c8..278f607ab60 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -133,10 +133,8 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, spin_lock_irqsave(&rds_conn_lock, flags); conn = rds_conn_lookup(head, laddr, faddr, trans); - if (conn - && conn->c_loopback - && conn->c_trans != &rds_loop_transport - && !is_outgoing) { + if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && + !is_outgoing) { /* This is a looped back IB connection, and we're * called by the code handling the incoming connect. * We need a second connection object into which we diff --git a/net/rds/ib.h b/net/rds/ib.h index 1378b854cac..64df4e79b29 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -98,6 +98,7 @@ struct rds_ib_connection { struct rds_ib_send_work *i_sends; /* rx */ + struct tasklet_struct i_recv_tasklet; struct mutex i_recv_mutex; struct rds_ib_work_ring i_recv_ring; struct rds_ib_incoming *i_ibinc; @@ -303,6 +304,7 @@ void rds_ib_inc_free(struct rds_incoming *inc); int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, size_t size); void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_ib_recv_tasklet_fn(unsigned long data); void rds_ib_recv_init_ring(struct rds_ib_connection *ic); void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); void rds_ib_recv_init_ack(struct rds_ib_connection *ic); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index c2d372f13db..647cb8ffc39 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -377,8 +377,8 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) } /* Even if len is crap *now* I still want to check it. -ASG */ - if (event->param.conn.private_data_len < sizeof (*dp) - || dp->dp_protocol_major == 0) + if (event->param.conn.private_data_len < sizeof (*dp) || + dp->dp_protocol_major == 0) return RDS_PROTOCOL_3_0; common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS; @@ -694,6 +694,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) return -ENOMEM; INIT_LIST_HEAD(&ic->ib_node); + tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, + (unsigned long) ic); mutex_init(&ic->i_recv_mutex); #ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&ic->i_ack_lock); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index ef3ab5b7283..4b0da865a72 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -187,11 +187,8 @@ void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) INIT_LIST_HEAD(list); spin_unlock_irq(list_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); + list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) rds_conn_destroy(ic->conn); - } } struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) @@ -573,8 +570,8 @@ void rds_ib_free_mr(void *trans_private, int invalidate) spin_unlock_irqrestore(&pool->list_lock, flags); /* If we've pinned too many pages, request a flush */ - if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned - || atomic_read(&pool->dirty_count) >= pool->max_items / 10) + if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || + atomic_read(&pool->dirty_count) >= pool->max_items / 10) queue_work(rds_wq, &pool->flush_worker); if (invalidate) { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index cd7a6cfcab0..04dc0d3f3c9 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -143,15 +143,16 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, int ret = -ENOMEM; if (recv->r_ibinc == NULL) { - if (atomic_read(&rds_ib_allocation) >= rds_ib_sysctl_max_recv_allocation) { + if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { rds_ib_stats_inc(s_ib_rx_alloc_limit); goto out; } recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, kptr_gfp); - if (recv->r_ibinc == NULL) + if (recv->r_ibinc == NULL) { + atomic_dec(&rds_ib_allocation); goto out; - atomic_inc(&rds_ib_allocation); + } INIT_LIST_HEAD(&recv->r_ibinc->ii_frags); rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr); } @@ -229,8 +230,8 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, int ret = 0; u32 pos; - while ((prefill || rds_conn_up(conn)) - && rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) { + while ((prefill || rds_conn_up(conn)) && + rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) { if (pos >= ic->i_recv_ring.w_nr) { printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", pos); @@ -770,10 +771,10 @@ static void rds_ib_process_recv(struct rds_connection *conn, hdr = &ibinc->ii_inc.i_hdr; /* We can't just use memcmp here; fragments of a * single message may carry different ACKs */ - if (hdr->h_sequence != ihdr->h_sequence - || hdr->h_len != ihdr->h_len - || hdr->h_sport != ihdr->h_sport - || hdr->h_dport != ihdr->h_dport) { + if (hdr->h_sequence != ihdr->h_sequence || + hdr->h_len != ihdr->h_len || + hdr->h_sport != ihdr->h_sport || + hdr->h_dport != ihdr->h_dport) { rds_ib_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); return; @@ -824,17 +825,22 @@ void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_ib_connection *ic = conn->c_transport_data; - struct ib_wc wc; - struct rds_ib_ack_state state = { 0, }; - struct rds_ib_recv_work *recv; rdsdebug("conn %p cq %p\n", conn, cq); rds_ib_stats_inc(s_ib_rx_cq_call); - ib_req_notify_cq(cq, IB_CQ_SOLICITED); + tasklet_schedule(&ic->i_recv_tasklet); +} + +static inline void rds_poll_cq(struct rds_ib_connection *ic, + struct rds_ib_ack_state *state) +{ + struct rds_connection *conn = ic->conn; + struct ib_wc wc; + struct rds_ib_recv_work *recv; - while (ib_poll_cq(cq, 1, &wc) > 0) { + while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, wc.byte_len, be32_to_cpu(wc.ex.imm_data)); @@ -852,7 +858,7 @@ void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) if (rds_conn_up(conn) || rds_conn_connecting(conn)) { /* We expect errors as the qp is drained during shutdown */ if (wc.status == IB_WC_SUCCESS) { - rds_ib_process_recv(conn, recv, wc.byte_len, &state); + rds_ib_process_recv(conn, recv, wc.byte_len, state); } else { rds_ib_conn_error(conn, "recv completion on " "%pI4 had status %u, disconnecting and " @@ -863,6 +869,17 @@ void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) rds_ib_ring_free(&ic->i_recv_ring, 1); } +} + +void rds_ib_recv_tasklet_fn(unsigned long data) +{ + struct rds_ib_connection *ic = (struct rds_ib_connection *) data; + struct rds_connection *conn = ic->conn; + struct rds_ib_ack_state state = { 0, }; + + rds_poll_cq(ic, &state); + ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); + rds_poll_cq(ic, &state); if (state.ack_next_valid) rds_ib_set_ack(ic, state.ack_next, state.ack_required); diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 23bf830db2d..a10fab6886d 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -252,8 +252,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) rds_ib_ring_free(&ic->i_send_ring, completed); - if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) - || test_bit(0, &conn->c_map_queued)) + if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || + test_bit(0, &conn->c_map_queued)) queue_delayed_work(rds_wq, &conn->c_send_w, 0); /* We expect errors as the qp is drained during shutdown */ diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index 84b5ffcb280..03f01cb4e0f 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -67,68 +67,62 @@ unsigned int rds_ib_sysctl_flow_control = 0; ctl_table rds_ib_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "max_send_wr", .data = &rds_ib_sysctl_max_send_wr, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_ib_sysctl_max_wr_min, .extra2 = &rds_ib_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_wr", .data = &rds_ib_sysctl_max_recv_wr, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_ib_sysctl_max_wr_min, .extra2 = &rds_ib_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_wr", .data = &rds_ib_sysctl_max_unsig_wrs, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_ib_sysctl_max_unsig_wr_min, .extra2 = &rds_ib_sysctl_max_unsig_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_bytes", .data = &rds_ib_sysctl_max_unsig_bytes, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_ib_sysctl_max_unsig_bytes_min, .extra2 = &rds_ib_sysctl_max_unsig_bytes_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_allocation", .data = &rds_ib_sysctl_max_recv_allocation, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "flow_control", .data = &rds_ib_sysctl_flow_control, .maxlen = sizeof(rds_ib_sysctl_flow_control), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, - { .ctl_name = 0} + { } }; static struct ctl_path rds_ib_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, - { .procname = "ib", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "rds", }, + { .procname = "ib", }, { } }; diff --git a/net/rds/iw.h b/net/rds/iw.h index dd72b62bd50..eef2f0c2847 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -119,6 +119,7 @@ struct rds_iw_connection { struct rds_iw_send_work *i_sends; /* rx */ + struct tasklet_struct i_recv_tasklet; struct mutex i_recv_mutex; struct rds_iw_work_ring i_recv_ring; struct rds_iw_incoming *i_iwinc; @@ -330,6 +331,7 @@ void rds_iw_inc_free(struct rds_incoming *inc); int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, size_t size); void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_iw_recv_tasklet_fn(unsigned long data); void rds_iw_recv_init_ring(struct rds_iw_connection *ic); void rds_iw_recv_clear_ring(struct rds_iw_connection *ic); void rds_iw_recv_init_ack(struct rds_iw_connection *ic); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a416b0d492b..394cf6b4d0a 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -696,6 +696,8 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) return -ENOMEM; INIT_LIST_HEAD(&ic->iw_node); + tasklet_init(&ic->i_recv_tasklet, rds_iw_recv_tasklet_fn, + (unsigned long) ic); mutex_init(&ic->i_recv_mutex); #ifndef KERNEL_HAS_ATOMIC64 spin_lock_init(&ic->i_ack_lock); diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index de4a1b16bf7..9eda11cca95 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -245,11 +245,8 @@ void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock) INIT_LIST_HEAD(list); spin_unlock_irq(list_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); + list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) rds_conn_destroy(ic->conn); - } } static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg, @@ -576,8 +573,8 @@ void rds_iw_free_mr(void *trans_private, int invalidate) rds_iw_free_fastreg(pool, ibmr); /* If we've pinned too many pages, request a flush */ - if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned - || atomic_read(&pool->dirty_count) >= pool->max_items / 10) + if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || + atomic_read(&pool->dirty_count) >= pool->max_items / 10) queue_work(rds_wq, &pool->flush_worker); if (invalidate) { diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index 8683f5f66c4..54af7d6b92d 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -143,15 +143,16 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn, int ret = -ENOMEM; if (recv->r_iwinc == NULL) { - if (atomic_read(&rds_iw_allocation) >= rds_iw_sysctl_max_recv_allocation) { + if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) { rds_iw_stats_inc(s_iw_rx_alloc_limit); goto out; } recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, kptr_gfp); - if (recv->r_iwinc == NULL) + if (recv->r_iwinc == NULL) { + atomic_dec(&rds_iw_allocation); goto out; - atomic_inc(&rds_iw_allocation); + } INIT_LIST_HEAD(&recv->r_iwinc->ii_frags); rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); } @@ -229,8 +230,8 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, int ret = 0; u32 pos; - while ((prefill || rds_conn_up(conn)) - && rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) { + while ((prefill || rds_conn_up(conn)) && + rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) { if (pos >= ic->i_recv_ring.w_nr) { printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", pos); @@ -729,10 +730,10 @@ static void rds_iw_process_recv(struct rds_connection *conn, hdr = &iwinc->ii_inc.i_hdr; /* We can't just use memcmp here; fragments of a * single message may carry different ACKs */ - if (hdr->h_sequence != ihdr->h_sequence - || hdr->h_len != ihdr->h_len - || hdr->h_sport != ihdr->h_sport - || hdr->h_dport != ihdr->h_dport) { + if (hdr->h_sequence != ihdr->h_sequence || + hdr->h_len != ihdr->h_len || + hdr->h_sport != ihdr->h_sport || + hdr->h_dport != ihdr->h_dport) { rds_iw_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); return; @@ -783,17 +784,22 @@ void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context) { struct rds_connection *conn = context; struct rds_iw_connection *ic = conn->c_transport_data; - struct ib_wc wc; - struct rds_iw_ack_state state = { 0, }; - struct rds_iw_recv_work *recv; rdsdebug("conn %p cq %p\n", conn, cq); rds_iw_stats_inc(s_iw_rx_cq_call); - ib_req_notify_cq(cq, IB_CQ_SOLICITED); + tasklet_schedule(&ic->i_recv_tasklet); +} + +static inline void rds_poll_cq(struct rds_iw_connection *ic, + struct rds_iw_ack_state *state) +{ + struct rds_connection *conn = ic->conn; + struct ib_wc wc; + struct rds_iw_recv_work *recv; - while (ib_poll_cq(cq, 1, &wc) > 0) { + while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", (unsigned long long)wc.wr_id, wc.status, wc.byte_len, be32_to_cpu(wc.ex.imm_data)); @@ -811,7 +817,7 @@ void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context) if (rds_conn_up(conn) || rds_conn_connecting(conn)) { /* We expect errors as the qp is drained during shutdown */ if (wc.status == IB_WC_SUCCESS) { - rds_iw_process_recv(conn, recv, wc.byte_len, &state); + rds_iw_process_recv(conn, recv, wc.byte_len, state); } else { rds_iw_conn_error(conn, "recv completion on " "%pI4 had status %u, disconnecting and " @@ -822,6 +828,17 @@ void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context) rds_iw_ring_free(&ic->i_recv_ring, 1); } +} + +void rds_iw_recv_tasklet_fn(unsigned long data) +{ + struct rds_iw_connection *ic = (struct rds_iw_connection *) data; + struct rds_connection *conn = ic->conn; + struct rds_iw_ack_state state = { 0, }; + + rds_poll_cq(ic, &state); + ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); + rds_poll_cq(ic, &state); if (state.ack_next_valid) rds_iw_set_ack(ic, state.ack_next, state.ack_required); diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 1f5abe3cf2b..1379e9d66a7 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -288,8 +288,8 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) rds_iw_ring_free(&ic->i_send_ring, completed); - if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) - || test_bit(0, &conn->c_map_queued)) + if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || + test_bit(0, &conn->c_map_queued)) queue_delayed_work(rds_wq, &conn->c_send_w, 0); /* We expect errors as the qp is drained during shutdown */ @@ -519,8 +519,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); /* Fastreg support */ - if (rds_rdma_cookie_key(rm->m_rdma_cookie) - && !ic->i_fastreg_posted) { + if (rds_rdma_cookie_key(rm->m_rdma_cookie) && !ic->i_fastreg_posted) { ret = -EAGAIN; goto out; } diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 9590678cd61..1c4428a61a0 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -57,68 +57,62 @@ unsigned int rds_iw_sysctl_flow_control = 1; ctl_table rds_iw_sysctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "max_send_wr", .data = &rds_iw_sysctl_max_send_wr, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_iw_sysctl_max_wr_min, .extra2 = &rds_iw_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_wr", .data = &rds_iw_sysctl_max_recv_wr, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_iw_sysctl_max_wr_min, .extra2 = &rds_iw_sysctl_max_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_wr", .data = &rds_iw_sysctl_max_unsig_wrs, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_iw_sysctl_max_unsig_wr_min, .extra2 = &rds_iw_sysctl_max_unsig_wr_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unsignaled_bytes", .data = &rds_iw_sysctl_max_unsig_bytes, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, .extra1 = &rds_iw_sysctl_max_unsig_bytes_min, .extra2 = &rds_iw_sysctl_max_unsig_bytes_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_recv_allocation", .data = &rds_iw_sysctl_max_recv_allocation, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = proc_doulongvec_minmax, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "flow_control", .data = &rds_iw_sysctl_flow_control, .maxlen = sizeof(rds_iw_sysctl_flow_control), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, - { .ctl_name = 0} + { } }; static struct ctl_path rds_iw_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, - { .procname = "iw", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "rds", }, + { .procname = "iw", }, { } }; diff --git a/net/rds/message.c b/net/rds/message.c index ca50a8ec974..73e600ffd87 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -122,8 +122,7 @@ int rds_message_add_extension(struct rds_header *hdr, if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE) return 0; - if (type >= __RDS_EXTHDR_MAX - || len != rds_exthdr_size[type]) + if (type >= __RDS_EXTHDR_MAX || len != rds_exthdr_size[type]) return 0; if (ext_len >= RDS_HEADER_EXT_SPACE) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 8dc83d2caa5..4c64daa1f5d 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -317,6 +317,30 @@ int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) return __rds_rdma_map(rs, &args, NULL, NULL); } +int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen) +{ + struct rds_get_mr_for_dest_args args; + struct rds_get_mr_args new_args; + + if (optlen != sizeof(struct rds_get_mr_for_dest_args)) + return -EINVAL; + + if (copy_from_user(&args, (struct rds_get_mr_for_dest_args __user *)optval, + sizeof(struct rds_get_mr_for_dest_args))) + return -EFAULT; + + /* + * Initially, just behave like get_mr(). + * TODO: Implement get_mr as wrapper around this + * and deprecate it. + */ + new_args.vec = args.vec; + new_args.cookie_addr = args.cookie_addr; + new_args.flags = args.flags; + + return __rds_rdma_map(rs, &new_args, NULL, NULL); +} + /* * Free the MR indicated by the given R_Key */ @@ -607,8 +631,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, { struct rds_rdma_op *op; - if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) - || rm->m_rdma_op != NULL) + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) || + rm->m_rdma_op != NULL) return -EINVAL; op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); @@ -631,8 +655,8 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, u32 r_key; int err = 0; - if (cmsg->cmsg_len < CMSG_LEN(sizeof(rds_rdma_cookie_t)) - || rm->m_rdma_cookie != 0) + if (cmsg->cmsg_len < CMSG_LEN(sizeof(rds_rdma_cookie_t)) || + rm->m_rdma_cookie != 0) return -EINVAL; memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg), sizeof(rm->m_rdma_cookie)); @@ -668,8 +692,8 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg) { - if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_get_mr_args)) - || rm->m_rdma_cookie != 0) + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_get_mr_args)) || + rm->m_rdma_cookie != 0) return -EINVAL; return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); diff --git a/net/rds/rdma.h b/net/rds/rdma.h index 425512098b0..909c39835a5 100644 --- a/net/rds/rdma.h +++ b/net/rds/rdma.h @@ -61,6 +61,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) } int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); +int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen); int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); void rds_rdma_drop_keys(struct rds_sock *rs); int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, diff --git a/net/rds/recv.c b/net/rds/recv.c index fdff33c7b43..b426d67f760 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -195,8 +195,8 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, * XXX we could spend more on the wire to get more robust failure * detection, arguably worth it to avoid data corruption. */ - if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq - && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { + if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq && + (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { rds_stats_inc(s_recv_drop_old_seq); goto out; } @@ -432,10 +432,9 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, } timeo = wait_event_interruptible_timeout(*sk->sk_sleep, - (!list_empty(&rs->rs_notify_queue) - || rs->rs_cong_notify - || rds_next_incoming(rs, &inc)), - timeo); + (!list_empty(&rs->rs_notify_queue) || + rs->rs_cong_notify || + rds_next_incoming(rs, &inc)), timeo); rdsdebug("recvmsg woke inc %p timeo %ld\n", inc, timeo); if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) diff --git a/net/rds/send.c b/net/rds/send.c index 28c88ff3d03..b2fccfc2076 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -235,8 +235,8 @@ int rds_send_xmit(struct rds_connection *conn) * connection. * Therefore, we never retransmit messages with RDMA ops. */ - if (rm->m_rdma_op - && test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { + if (rm->m_rdma_op && + test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { spin_lock_irqsave(&conn->c_lock, flags); if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) list_move(&rm->m_conn_item, &to_be_dropped); @@ -247,8 +247,8 @@ int rds_send_xmit(struct rds_connection *conn) /* Require an ACK every once in a while */ len = ntohl(rm->m_inc.i_hdr.h_len); - if (conn->c_unacked_packets == 0 - || conn->c_unacked_bytes < len) { + if (conn->c_unacked_packets == 0 || + conn->c_unacked_bytes < len) { __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); conn->c_unacked_packets = rds_sysctl_max_unacked_packets; @@ -418,8 +418,8 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) spin_lock(&rm->m_rs_lock); ro = rm->m_rdma_op; - if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) - && ro && ro->r_notify && ro->r_notifier) { + if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && + ro && ro->r_notify && ro->r_notifier) { notifier = ro->r_notifier; rs = rm->m_rs; sock_hold(rds_rs_to_sk(rs)); @@ -549,8 +549,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status) list_del_init(&rm->m_sock_item); rds_send_sndbuf_remove(rs, rm); - if (ro && ro->r_notifier - && (status || ro->r_notify)) { + if (ro && ro->r_notifier && (status || ro->r_notify)) { notifier = ro->r_notifier; list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); @@ -877,8 +876,8 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (ret) goto out; - if ((rm->m_rdma_cookie || rm->m_rdma_op) - && conn->c_trans->xmit_rdma == NULL) { + if ((rm->m_rdma_cookie || rm->m_rdma_op) && + conn->c_trans->xmit_rdma == NULL) { if (printk_ratelimit()) printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", rm->m_rdma_op, conn->c_trans->xmit_rdma); @@ -890,8 +889,8 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, * have scheduled a delayed reconnect however - in this case * we should not interfere. */ - if (rds_conn_state(conn) == RDS_CONN_DOWN - && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) + if (rds_conn_state(conn) == RDS_CONN_DOWN && + !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) queue_delayed_work(rds_wq, &conn->c_conn_w, 0); ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); @@ -973,8 +972,8 @@ rds_send_pong(struct rds_connection *conn, __be16 dport) * have scheduled a delayed reconnect however - in this case * we should not interfere. */ - if (rds_conn_state(conn) == RDS_CONN_DOWN - && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) + if (rds_conn_state(conn) == RDS_CONN_DOWN && + !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) queue_delayed_work(rds_wq, &conn->c_conn_w, 0); ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 307dc5c1be1..7829a20325d 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -51,55 +51,50 @@ unsigned int rds_sysctl_ping_enable = 1; static ctl_table rds_sysctl_rds_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "reconnect_min_delay_ms", .data = &rds_sysctl_reconnect_min_jiffies, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, .extra1 = &rds_sysctl_reconnect_min, .extra2 = &rds_sysctl_reconnect_max_jiffies, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "reconnect_max_delay_ms", .data = &rds_sysctl_reconnect_max_jiffies, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, .extra1 = &rds_sysctl_reconnect_min_jiffies, .extra2 = &rds_sysctl_reconnect_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unacked_packets", .data = &rds_sysctl_max_unacked_packets, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "max_unacked_bytes", .data = &rds_sysctl_max_unacked_bytes, .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "ping_enable", .data = &rds_sysctl_ping_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, - { .ctl_name = 0} + { } }; static struct ctl_path rds_sysctl_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "net", }, + { .procname = "rds", }, { } }; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 24b743eb0b1..45474a43686 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -67,11 +67,11 @@ static int rds_tcp_accept_one(struct socket *sock) inet = inet_sk(new_sock->sk); rdsdebug("accepted tcp %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n", - NIPQUAD(inet->saddr), ntohs(inet->sport), - NIPQUAD(inet->daddr), ntohs(inet->dport)); + NIPQUAD(inet->inet_saddr), ntohs(inet->inet_sport), + NIPQUAD(inet->inet_daddr), ntohs(inet->inet_dport)); - conn = rds_conn_create(inet->saddr, inet->daddr, &rds_tcp_transport, - GFP_KERNEL); + conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr, + &rds_tcp_transport, GFP_KERNEL); if (IS_ERR(conn)) { ret = PTR_ERR(conn); goto out; diff --git a/net/rds/threads.c b/net/rds/threads.c index dd7e0cad1e7..00fa10e59af 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -170,8 +170,8 @@ void rds_shutdown_worker(struct work_struct *work) * handler is supposed to check for state DISCONNECTING */ mutex_lock(&conn->c_cm_lock); - if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) - && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) { + if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) && + !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) { rds_conn_error(conn, "shutdown called in state %d\n", atomic_read(&conn->c_state)); mutex_unlock(&conn->c_cm_lock); diff --git a/net/rfkill/core.c b/net/rfkill/core.c index ba2efb960c6..c218e07e5ca 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -579,6 +579,8 @@ static ssize_t rfkill_name_show(struct device *dev, static const char *rfkill_get_type_str(enum rfkill_type type) { + BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1); + switch (type) { case RFKILL_TYPE_WLAN: return "wlan"; @@ -592,11 +594,11 @@ static const char *rfkill_get_type_str(enum rfkill_type type) return "wwan"; case RFKILL_TYPE_GPS: return "gps"; + case RFKILL_TYPE_FM: + return "fm"; default: BUG(); } - - BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_GPS + 1); } static ssize_t rfkill_type_show(struct device *dev, @@ -1189,6 +1191,7 @@ static long rfkill_fop_ioctl(struct file *file, unsigned int cmd, #endif static const struct file_operations rfkill_fops = { + .owner = THIS_MODULE, .open = rfkill_fop_open, .read = rfkill_fop_read, .write = rfkill_fop_write, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 502cce76621..8feb9e5d662 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -512,12 +512,13 @@ static struct proto rose_proto = { .obj_size = sizeof(struct rose_sock), }; -static int rose_create(struct net *net, struct socket *sock, int protocol) +static int rose_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct rose_sock *rose; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (sock->type != SOCK_SEQPACKET || protocol != 0) @@ -1509,7 +1510,7 @@ static const struct file_operations rose_info_fops = { }; #endif /* CONFIG_PROC_FS */ -static struct net_proto_family rose_family_ops = { +static const struct net_proto_family rose_family_ops = { .family = PF_ROSE, .create = rose_create, .owner = THIS_MODULE, diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 9478d9b3d97..795c4b025e3 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -77,8 +77,9 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { - if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0 - && rose_neigh->dev == dev) + if (ax25cmp(&rose_route->neighbour, + &rose_neigh->callsign) == 0 && + rose_neigh->dev == dev) break; rose_neigh = rose_neigh->next; } @@ -311,8 +312,9 @@ static int rose_del_node(struct rose_route_struct *rose_route, rose_neigh = rose_neigh_list; while (rose_neigh != NULL) { - if (ax25cmp(&rose_route->neighbour, &rose_neigh->callsign) == 0 - && rose_neigh->dev == dev) + if (ax25cmp(&rose_route->neighbour, + &rose_neigh->callsign) == 0 && + rose_neigh->dev == dev) break; rose_neigh = rose_neigh->next; } @@ -578,18 +580,18 @@ static int rose_clear_routes(void) /* * Check that the device given is a valid AX.25 interface that is "up". + * called whith RTNL */ -static struct net_device *rose_ax25_dev_get(char *devname) +static struct net_device *rose_ax25_dev_find(char *devname) { struct net_device *dev; - if ((dev = dev_get_by_name(&init_net, devname)) == NULL) + if ((dev = __dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) return dev; - dev_put(dev); return NULL; } @@ -600,13 +602,13 @@ struct net_device *rose_dev_first(void) { struct net_device *dev, *first = NULL; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } - read_unlock(&dev_base_lock); + rcu_read_unlock(); return first; } @@ -618,8 +620,8 @@ struct net_device *rose_dev_get(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; @@ -627,7 +629,7 @@ struct net_device *rose_dev_get(rose_address *addr) } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev; } @@ -635,14 +637,14 @@ static int rose_dev_exists(rose_address *addr) { struct net_device *dev; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) goto out; } dev = NULL; out: - read_unlock(&dev_base_lock); + rcu_read_unlock(); return dev != NULL; } @@ -720,27 +722,23 @@ int rose_rt_ioctl(unsigned int cmd, void __user *arg) case SIOCADDRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; - if ((dev = rose_ax25_dev_get(rose_route.device)) == NULL) + if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; - if (rose_dev_exists(&rose_route.address)) { /* Can't add routes to ourself */ - dev_put(dev); + if (rose_dev_exists(&rose_route.address)) /* Can't add routes to ourself */ return -EINVAL; - } if (rose_route.mask > 10) /* Mask can't be more than 10 digits */ return -EINVAL; if (rose_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; err = rose_add_node(&rose_route, dev); - dev_put(dev); return err; case SIOCDELRT: if (copy_from_user(&rose_route, arg, sizeof(struct rose_route_struct))) return -EFAULT; - if ((dev = rose_ax25_dev_get(rose_route.device)) == NULL) + if ((dev = rose_ax25_dev_find(rose_route.device)) == NULL) return -EINVAL; err = rose_del_node(&rose_route, dev); - dev_put(dev); return err; case SIOCRSCLRRT: diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 3bfe504faf8..df6d9dac218 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -26,121 +26,101 @@ static struct ctl_table_header *rose_table_header; static ctl_table rose_table[] = { { - .ctl_name = NET_ROSE_RESTART_REQUEST_TIMEOUT, .procname = "restart_request_timeout", .data = &sysctl_rose_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_CALL_REQUEST_TIMEOUT, .procname = "call_request_timeout", .data = &sysctl_rose_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_RESET_REQUEST_TIMEOUT, .procname = "reset_request_timeout", .data = &sysctl_rose_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_CLEAR_REQUEST_TIMEOUT, .procname = "clear_request_timeout", .data = &sysctl_rose_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_NO_ACTIVITY_TIMEOUT, .procname = "no_activity_timeout", .data = &sysctl_rose_no_activity_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_idle, .extra2 = &max_idle }, { - .ctl_name = NET_ROSE_ACK_HOLD_BACK_TIMEOUT, .procname = "acknowledge_hold_back_timeout", .data = &sysctl_rose_ack_hold_back_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer }, { - .ctl_name = NET_ROSE_ROUTING_CONTROL, .procname = "routing_control", .data = &sysctl_rose_routing_control, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_route, .extra2 = &max_route }, { - .ctl_name = NET_ROSE_LINK_FAIL_TIMEOUT, .procname = "link_fail_timeout", .data = &sysctl_rose_link_fail_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_ftimer, .extra2 = &max_ftimer }, { - .ctl_name = NET_ROSE_MAX_VCS, .procname = "maximum_virtual_circuits", .data = &sysctl_rose_maximum_vcs, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_maxvcs, .extra2 = &max_maxvcs }, { - .ctl_name = NET_ROSE_WINDOW_SIZE, .procname = "window_size", .data = &sysctl_rose_window_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_window, .extra2 = &max_window }, - { .ctl_name = 0 } + { } }; static struct ctl_path rose_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "rose", .ctl_name = NET_ROSE, }, + { .procname = "net", }, + { .procname = "rose", }, { } }; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index a86afceaa94..287b1415cee 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -608,14 +608,15 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, /* * create an RxRPC socket */ -static int rxrpc_create(struct net *net, struct socket *sock, int protocol) +static int rxrpc_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct rxrpc_sock *rx; struct sock *sk; _enter("%p,%d", sock, protocol); - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; /* we support transport protocol UDP only */ @@ -777,7 +778,7 @@ static struct proto rxrpc_proto = { .max_header = sizeof(struct rxrpc_header), }; -static struct net_proto_family rxrpc_family_ops = { +static const struct net_proto_family rxrpc_family_ops = { .family = PF_RXRPC, .create = rxrpc_create, .owner = THIS_MODULE, diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index a39bf97f883..60c2b94e6b5 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -146,7 +146,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, memcpy(msg->msg_name, &call->conn->trans->peer->srx, sizeof(call->conn->trans->peer->srx)); - sock_recv_timestamp(msg, &rx->sk, skb); + sock_recv_ts_and_drops(msg, &rx->sk, skb); } /* receive the message */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2dfb3e7a040..64f5e328cee 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -598,7 +598,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, goto errout; /* compat_mode being true specifies a call that is supposed - * to add additional backward compatiblity statistic TLVs. + * to add additional backward compatibility statistic TLVs. */ if (compat_mode) { if (a->type == TCA_OLD_COMPAT) @@ -618,7 +618,8 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, goto errout; if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || - gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &h->tcf_bstats, + &h->tcf_rate_est) < 0 || gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; @@ -968,7 +969,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = 0, ovr = 0; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); @@ -1051,7 +1052,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); struct nlattr *kind = find_dump_kind(cb->nlh); - if (net != &init_net) + if (!net_eq(net, &init_net)) return 0; if (kind == NULL) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index b9aaab4e035..d329170243c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -65,48 +65,53 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, struct tc_mirred *parm; struct tcf_mirred *m; struct tcf_common *pc; - struct net_device *dev = NULL; - int ret = 0, err; - int ok_push = 0; + struct net_device *dev; + int ret, ok_push = 0; if (nla == NULL) return -EINVAL; - - err = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy); - if (err < 0) - return err; - + ret = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy); + if (ret < 0) + return ret; if (tb[TCA_MIRRED_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); - + switch (parm->eaction) { + case TCA_EGRESS_MIRROR: + case TCA_EGRESS_REDIR: + break; + default: + return -EINVAL; + } if (parm->ifindex) { dev = __dev_get_by_index(&init_net, parm->ifindex); if (dev == NULL) return -ENODEV; switch (dev->type) { - case ARPHRD_TUNNEL: - case ARPHRD_TUNNEL6: - case ARPHRD_SIT: - case ARPHRD_IPGRE: - case ARPHRD_VOID: - case ARPHRD_NONE: - ok_push = 0; - break; - default: - ok_push = 1; - break; + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + ok_push = 0; + break; + default: + ok_push = 1; + break; } + } else { + dev = NULL; } pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info); if (!pc) { - if (!parm->ifindex) + if (dev == NULL) return -EINVAL; pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, &mirred_idx_gen, &mirred_hash_info); if (IS_ERR(pc)) - return PTR_ERR(pc); + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { @@ -119,12 +124,12 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est, spin_lock_bh(&m->tcf_lock); m->tcf_action = parm->action; m->tcfm_eaction = parm->eaction; - if (parm->ifindex) { + if (dev != NULL) { m->tcfm_ifindex = parm->ifindex; if (ret != ACT_P_CREATED) dev_put(m->tcfm_dev); - m->tcfm_dev = dev; dev_hold(dev); + m->tcfm_dev = dev; m->tcfm_ok_push = ok_push; } spin_unlock_bh(&m->tcf_lock); @@ -148,57 +153,57 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, { struct tcf_mirred *m = a->priv; struct net_device *dev; - struct sk_buff *skb2 = NULL; - u32 at = G_TC_AT(skb->tc_verd); + struct sk_buff *skb2; + u32 at; + int retval, err = 1; spin_lock(&m->tcf_lock); - - dev = m->tcfm_dev; m->tcf_tm.lastuse = jiffies; - if (!(dev->flags&IFF_UP) ) { + dev = m->tcfm_dev; + if (!(dev->flags & IFF_UP)) { if (net_ratelimit()) printk("mirred to Houston: device %s is gone!\n", dev->name); -bad_mirred: - if (skb2 != NULL) - kfree_skb(skb2); - m->tcf_qstats.overlimits++; - m->tcf_bstats.bytes += qdisc_pkt_len(skb); - m->tcf_bstats.packets++; - spin_unlock(&m->tcf_lock); - /* should we be asking for packet to be dropped? - * may make sense for redirect case only - */ - return TC_ACT_SHOT; + goto out; } skb2 = skb_act_clone(skb, GFP_ATOMIC); if (skb2 == NULL) - goto bad_mirred; - if (m->tcfm_eaction != TCA_EGRESS_MIRROR && - m->tcfm_eaction != TCA_EGRESS_REDIR) { - if (net_ratelimit()) - printk("tcf_mirred unknown action %d\n", - m->tcfm_eaction); - goto bad_mirred; - } + goto out; m->tcf_bstats.bytes += qdisc_pkt_len(skb2); m->tcf_bstats.packets++; - if (!(at & AT_EGRESS)) + at = G_TC_AT(skb->tc_verd); + if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) skb_push(skb2, skb2->dev->hard_header_len); + } /* mirror is always swallowed */ if (m->tcfm_eaction != TCA_EGRESS_MIRROR) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; - skb2->iif = skb->dev->ifindex; + skb2->skb_iif = skb->dev->ifindex; dev_queue_xmit(skb2); + err = 0; + +out: + if (err) { + m->tcf_qstats.overlimits++; + m->tcf_bstats.bytes += qdisc_pkt_len(skb); + m->tcf_bstats.packets++; + /* should we be asking for packet to be dropped? + * may make sense for redirect case only + */ + retval = TC_ACT_SHOT; + } else { + retval = m->tcf_action; + } spin_unlock(&m->tcf_lock); - return m->tcf_action; + + return retval; } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 4ab916b8074..e9607fe55b5 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -54,6 +54,8 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, if (d->flags & SKBEDIT_F_QUEUE_MAPPING && skb->dev->real_num_tx_queues > d->queue_mapping) skb_set_queue_mapping(skb, d->queue_mapping); + if (d->flags & SKBEDIT_F_MARK) + skb->mark = d->mark; spin_unlock(&d->tcf_lock); return d->tcf_action; @@ -63,6 +65,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) }, [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, + [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, }; static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, @@ -72,7 +75,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, struct tc_skbedit *parm; struct tcf_skbedit *d; struct tcf_common *pc; - u32 flags = 0, *priority = NULL; + u32 flags = 0, *priority = NULL, *mark = NULL; u16 *queue_mapping = NULL; int ret = 0, err; @@ -95,6 +98,12 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, flags |= SKBEDIT_F_QUEUE_MAPPING; queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } + + if (tb[TCA_SKBEDIT_MARK] != NULL) { + flags |= SKBEDIT_F_MARK; + mark = nla_data(tb[TCA_SKBEDIT_MARK]); + } + if (!flags) return -EINVAL; @@ -124,6 +133,9 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, d->priority = *priority; if (flags & SKBEDIT_F_QUEUE_MAPPING) d->queue_mapping = *queue_mapping; + if (flags & SKBEDIT_F_MARK) + d->mark = *mark; + d->tcf_action = parm->action; spin_unlock_bh(&d->tcf_lock); @@ -161,6 +173,9 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if (d->flags & SKBEDIT_F_QUEUE_MAPPING) NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING, sizeof(d->queue_mapping), &d->queue_mapping); + if (d->flags & SKBEDIT_F_MARK) + NLA_PUT(skb, TCA_SKBEDIT_MARK, sizeof(d->mark), + &d->mark); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7cf6c0fbc7a..3725d8fa29d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -137,7 +137,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) int err; int tp_created = 0; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; replay: @@ -404,6 +404,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } +/* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); @@ -417,12 +418,12 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; struct tcf_dump_args arg; - if (net != &init_net) + if (!net_eq(net, &init_net)) return 0; if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) @@ -484,7 +485,6 @@ errout: if (cl) cops->put(q, cl); out: - dev_put(dev); return skb->len; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 9402a7fd378..e054c62857e 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -171,7 +171,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) static u32 flow_get_iif(const struct sk_buff *skb) { - return skb->iif; + return skb->skb_iif; } static u32 flow_get_priority(const struct sk_buff *skb) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 7034ea4530e..dd9414e4420 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -170,21 +170,23 @@ restart: for (s = sht[h1]; s; s = s->next) { if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && protocol == s->protocol && - !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) + !(s->dpi.mask & + (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) && #if RSVP_DST_LEN == 4 - && dst[0] == s->dst[0] - && dst[1] == s->dst[1] - && dst[2] == s->dst[2] + dst[0] == s->dst[0] && + dst[1] == s->dst[1] && + dst[2] == s->dst[2] && #endif - && tunnelid == s->tunnelid) { + tunnelid == s->tunnelid) { for (f = s->ht[h2]; f; f = f->next) { if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) #if RSVP_DST_LEN == 4 - && src[0] == f->src[0] - && src[1] == f->src[1] - && src[2] == f->src[2] + && + src[0] == f->src[0] && + src[1] == f->src[1] && + src[2] == f->src[2] #endif ) { *res = f->res; @@ -493,13 +495,13 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && pinfo && pinfo->protocol == s->protocol && - memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 + memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && #if RSVP_DST_LEN == 4 - && dst[0] == s->dst[0] - && dst[1] == s->dst[1] - && dst[2] == s->dst[2] + dst[0] == s->dst[0] && + dst[1] == s->dst[1] && + dst[2] == s->dst[2] && #endif - && pinfo->tunnelid == s->tunnelid) { + pinfo->tunnelid == s->tunnelid) { insert: /* OK, we found appropriate session */ diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 18d85d25910..24dce8b648a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -303,17 +303,18 @@ META_COLLECTOR(var_sk_bound_if) { SKIP_NONLOCAL(skb); - if (skb->sk->sk_bound_dev_if == 0) { + if (skb->sk->sk_bound_dev_if == 0) { dst->value = (unsigned long) "any"; dst->len = 3; - } else { + } else { struct net_device *dev; - dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if); + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(skb->sk), + skb->sk->sk_bound_dev_if); *err = var_dev(dev, dst); - if (dev) - dev_put(dev); - } + rcu_read_unlock(); + } } META_COLLECTOR(int_sk_refcnt) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 903e4188b6c..75fd1c672c6 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -947,7 +947,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) @@ -1009,7 +1009,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *q, *p; int err; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; replay: @@ -1179,7 +1179,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || - gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, &q->qstats) < 0) goto nla_put_failure; @@ -1274,14 +1274,15 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int s_idx, s_q_idx; struct net_device *dev; - if (net != &init_net) + if (!net_eq(net, &init_net)) return 0; s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; - read_lock(&dev_base_lock); + + rcu_read_lock(); idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev_rcu(&init_net, dev) { struct netdev_queue *dev_queue; if (idx < s_idx) @@ -1302,7 +1303,7 @@ cont: } done: - read_unlock(&dev_base_lock); + rcu_read_unlock(); cb->args[0] = idx; cb->args[1] = q_idx; @@ -1333,7 +1334,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EINVAL; if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) @@ -1575,7 +1576,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int t, s_t; - if (net != &init_net) + if (!net_eq(net, &init_net)) return 0; if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 5b132c47326..3846d65bc03 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1609,7 +1609,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, cl->xstats.undertime = cl->undertime - q->now; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 5a888af7e5d..a65604f8f2b 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -280,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, } if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) return -1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4ae6aa562f2..5173c1e1b19 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -119,32 +119,26 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, spin_unlock(root_lock); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_tx_queue_stopped(txq) && - !netif_tx_queue_frozen(txq)) + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) ret = dev_hard_start_xmit(skb, dev, txq); + HARD_TX_UNLOCK(dev, txq); spin_lock(root_lock); - switch (ret) { - case NETDEV_TX_OK: - /* Driver sent out skb successfully */ + if (dev_xmit_complete(ret)) { + /* Driver sent out skb successfully or skb was consumed */ ret = qdisc_qlen(q); - break; - - case NETDEV_TX_LOCKED: + } else if (ret == NETDEV_TX_LOCKED) { /* Driver try lock failed */ ret = handle_dev_cpu_collision(skb, txq, q); - break; - - default: + } else { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) printk(KERN_WARNING "BUG %s code %d qlen %d\n", dev->name, ret, q->q.qlen); ret = dev_requeue_skb(skb, q); - break; } if (ret && (netif_tx_queue_stopped(txq) || diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 2c5c76be18f..b38b39c6075 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1375,7 +1375,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 85acab9dc6f..508cf5f3a6d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1105,7 +1105,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->xstats.ctokens = cl->ctokens; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; @@ -1344,8 +1344,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, }; /* check for valid classid */ - if (!classid || TC_H_MAJ(classid ^ sch->handle) - || htb_find(classid, sch)) + if (!classid || TC_H_MAJ(classid ^ sch->handle) || + htb_find(classid, sch)) goto failure; /* check maximal depth */ diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 2b88295cb7b..d8b10e05462 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -199,9 +199,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * do it now in software before we mangle it. */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { - if (!(skb = skb_unshare(skb, GFP_ATOMIC)) - || (skb->ip_summed == CHECKSUM_PARTIAL - && skb_checksum_help(skb))) { + if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || + (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb))) { sch->qstats.drops++; return NET_XMIT_DROP; } @@ -210,9 +210,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } cb = netem_skb_cb(skb); - if (q->gap == 0 /* not doing reordering */ - || q->counter < q->gap /* inside last reordering gap */ - || q->reorder < get_crandom(&q->reorder_cor)) { + if (q->gap == 0 || /* not doing reordering */ + q->counter < q->gap || /* inside last reordering gap */ + q->reorder < get_crandom(&q->reorder_cor)) { psched_time_t now; psched_tdiff_t delay; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 5a002c24723..db69637069c 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -190,10 +190,13 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) if (m->slaves) { if (m->dev->flags & IFF_UP) { - if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT)) - || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST)) - || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST)) - || dev->mtu < m->dev->mtu) + if ((m->dev->flags & IFF_POINTOPOINT && + !(dev->flags & IFF_POINTOPOINT)) || + (m->dev->flags & IFF_BROADCAST && + !(dev->flags & IFF_BROADCAST)) || + (m->dev->flags & IFF_MULTICAST && + !(dev->flags & IFF_MULTICAST)) || + dev->mtu < m->dev->mtu) return -EINVAL; } else { if (!(dev->flags&IFF_POINTOPOINT)) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8450960df24..df5abbff63e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -63,6 +63,12 @@ static void sctp_assoc_bh_rcv(struct work_struct *work); static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); +/* Keep track of the new idr low so that we don't re-use association id + * numbers too fast. It is protected by they idr spin lock is in the + * range of 1 - INT_MAX. + */ +static u32 idr_low = 1; + /* 1st Level Abstractions. */ @@ -167,7 +173,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - sp->autoclose * HZ; + (unsigned long)sp->autoclose * HZ; /* Initilizes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -512,7 +518,13 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, * to this destination address earlier. The sender MUST set * CYCLING_CHANGEOVER to indicate that this switch is a * double switch to the same destination address. + * + * Really, only bother is we have data queued or outstanding on + * the association. */ + if (!asoc->outqueue.outstanding_bytes && !asoc->outqueue.out_qlen) + return; + if (transport->cacc.changeover_active) transport->cacc.cycling_changeover = changeover; @@ -732,6 +744,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, peer->partial_bytes_acked = 0; peer->flight_size = 0; + peer->burst_limited = 0; /* Set the transport's RTO.initial value */ peer->rto = asoc->rto_initial; @@ -1377,8 +1390,9 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_RECEIVED: case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && - ((asoc->rwnd - asoc->a_rwnd) >= - min_t(__u32, (asoc->base.sk->sk_rcvbuf >> 1), asoc->pathmtu))) + ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, + (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + asoc->pathmtu))) return 1; break; default: @@ -1485,15 +1499,13 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len) * local endpoint and the remote peer. */ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, - gfp_t gfp) + sctp_scope_t scope, gfp_t gfp) { - sctp_scope_t scope; int flags; /* Use scoping rules to determine the subset of addresses from * the endpoint. */ - scope = sctp_scope(&asoc->peer.active_path->ipaddr); flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; if (asoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; @@ -1547,7 +1559,12 @@ retry: spin_lock_bh(&sctp_assocs_id_lock); error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, - 1, &assoc_id); + idr_low, &assoc_id); + if (!error) { + idr_low = assoc_id + 1; + if (idr_low == INT_MAX) + idr_low = 1; + } spin_unlock_bh(&sctp_assocs_id_lock); if (error == -EAGAIN) goto retry; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index acf7c4d128f..8e4320040f0 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -263,9 +263,18 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (0 == i) frag |= SCTP_DATA_FIRST_FRAG; - if ((i == (whole - 1)) && !over) + if ((i == (whole - 1)) && !over) { frag |= SCTP_DATA_LAST_FRAG; + /* The application requests to set the I-bit of the + * last DATA chunk of a user message when providing + * the user message to the SCTP implementation. + */ + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + } + chunk = sctp_make_datafrag_empty(asoc, sinfo, len, frag, 0); if (!chunk) @@ -297,6 +306,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, else frag = SCTP_DATA_LAST_FRAG; + if ((sinfo->sinfo_flags & SCTP_EOF) || + (sinfo->sinfo_flags & SCTP_SACK_IMMEDIATELY)) + frag |= SCTP_DATA_SACK_IMM; + chunk = sctp_make_datafrag_empty(asoc, sinfo, over, frag, 0); if (!chunk) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index bb280e60e00..cc50fbe9929 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -837,15 +837,16 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); - if (!dev) - return 0; - if (!ipv6_chk_addr(&init_net, &addr->v6.sin6_addr, + rcu_read_lock(); + dev = dev_get_by_index_rcu(&init_net, + addr->v6.sin6_scope_id); + if (!dev || + !ipv6_chk_addr(&init_net, &addr->v6.sin6_addr, dev, 0)) { - dev_put(dev); + rcu_read_unlock(); return 0; } - dev_put(dev); + rcu_read_unlock(); } else if (type == IPV6_ADDR_MAPPED) { if (!opt->v4mapped) return 0; @@ -873,10 +874,12 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (type & IPV6_ADDR_LINKLOCAL) { if (!addr->v6.sin6_scope_id) return 0; - dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); + rcu_read_lock(); + dev = dev_get_by_index_rcu(&init_net, + addr->v6.sin6_scope_id); + rcu_read_unlock(); if (!dev) return 0; - dev_put(dev); } af = opt->pf->af; } @@ -930,7 +933,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -939,7 +941,6 @@ static struct inet_protosw sctpv6_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctpv6_prot, .ops = &inet6_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG, }; diff --git a/net/sctp/output.c b/net/sctp/output.c index 5cbda8f1ddf..7c558936343 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -429,23 +429,22 @@ int sctp_packet_transmit(struct sctp_packet *packet) list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { - if (!chunk->has_tsn) { - sctp_chunk_assign_ssn(chunk); - sctp_chunk_assign_tsn(chunk); - - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ + if (!chunk->resent) { + + /* 6.3.1 C4) When data is in flight and when allowed + * by rule C5, a new RTT measurement MUST be made each + * round trip. Furthermore, new RTT measurements + * SHOULD be made no more than once per round-trip + * for a given destination transport address. + */ if (!tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } - } else - chunk->resent = 1; + } + + chunk->resent = 1; has_data = 1; } @@ -557,8 +556,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct timer_list *timer; unsigned long timeout; - tp->last_time_used = jiffies; - /* Restart the AUTOCLOSE timer when sending data. */ if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; @@ -617,7 +614,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, sctp_xmit_t retval = SCTP_XMIT_OK; size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; - __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; struct sctp_outq *q = &asoc->outqueue; @@ -650,28 +646,6 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, } } - /* sctpimpguide-05 2.14.2 - * D) When the time comes for the sender to - * transmit new DATA chunks, the protocol parameter Max.Burst MUST - * first be applied to limit how many new DATA chunks may be sent. - * The limit is applied by adjusting cwnd as follows: - * if ((flightsize + Max.Burst * MTU) < cwnd) - * cwnd = flightsize + Max.Burst * MTU - */ - max_burst_bytes = asoc->max_burst * asoc->pathmtu; - if ((flight_size + max_burst_bytes) < transport->cwnd) { - transport->cwnd = flight_size + max_burst_bytes; - SCTP_DEBUG_PRINTK("%s: cwnd limited by max_burst: " - "transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, " - "pba: %d\n", - __func__, transport, - transport->cwnd, - transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - } - /* RFC 2960 6.1 Transmission of DATA Chunks * * B) At any given time, the sender MUST NOT transmit new data @@ -747,6 +721,8 @@ static void sctp_packet_append_data(struct sctp_packet *packet, /* Has been accepted for transmission. */ if (!asoc->peer.prsctp_capable) chunk->msg->can_abandon = 0; + sctp_chunk_assign_tsn(chunk); + sctp_chunk_assign_ssn(chunk); } static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c9f20e28521..229690f02a1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -191,8 +191,8 @@ static inline int sctp_cacc_skip(struct sctp_transport *primary, __u32 tsn) { if (primary->cacc.changeover_active && - (sctp_cacc_skip_3_1(primary, transport, count_of_newacks) - || sctp_cacc_skip_3_2(primary, tsn))) + (sctp_cacc_skip_3_1(primary, transport, count_of_newacks) || + sctp_cacc_skip_3_2(primary, tsn))) return 1; return 0; } @@ -423,16 +423,6 @@ void sctp_retransmit_mark(struct sctp_outq *q, if ((reason == SCTP_RTXR_FAST_RTX && (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { - /* If this chunk was sent less then 1 rto ago, do not - * retransmit this chunk, but give the peer time - * to acknowlege it. Do this only when - * retransmitting due to T3 timeout. - */ - if (reason == SCTP_RTXR_T3_RTX && - time_before(jiffies, chunk->sent_at + - transport->last_rto)) - continue; - /* RFC 2960 6.2.1 Processing a Received SACK * * C) Any time a DATA chunk is marked for @@ -931,6 +921,14 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) goto sctp_flush_out; } + /* Apply Max.Burst limitation to the current transport in + * case it will be used for new data. We are going to + * rest it before we return, but we want to apply the limit + * to the currently queued data. + */ + if (transport) + sctp_transport_burst_limited(transport); + /* Finally, transmit new packets. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { /* RFC 2960 6.5 Every DATA chunk MUST carry a valid @@ -976,6 +974,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) packet = &transport->packet; sctp_packet_config(packet, vtag, asoc->peer.ecn_capable); + /* We've switched transports, so apply the + * Burst limit to the new transport. + */ + sctp_transport_burst_limited(transport); } SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", @@ -1011,6 +1013,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) break; case SCTP_XMIT_OK: + /* The sender is in the SHUTDOWN-PENDING state, + * The sender MAY set the I-bit in the DATA + * chunk header. + */ + if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) + chunk->chunk_hdr->flags |= SCTP_DATA_SACK_IMM; + break; default: @@ -1063,6 +1072,9 @@ sctp_flush_out: packet = &t->packet; if (!sctp_packet_empty(packet)) error = sctp_packet_transmit(packet); + + /* Clear the burst limited state, if any */ + sctp_transport_burst_reset(t); } return error; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 612dc878e05..a3c8988758b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -205,14 +205,14 @@ static void sctp_get_local_addr_list(void) struct list_head *pos; struct sctp_af *af; - read_lock(&dev_base_lock); - for_each_netdev(&init_net, dev) { + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&sctp_local_addr_list, dev); } } - read_unlock(&dev_base_lock); + rcu_read_unlock(); } /* Free the existing local addresses. */ @@ -296,19 +296,19 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; - addr->v4.sin_addr.s_addr = inet_sk(sk)->rcv_saddr; + addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ static void sctp_v4_to_sk_saddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->rcv_saddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_rcv_saddr = addr->v4.sin_addr.s_addr; } /* Initialize sk->sk_daddr from sctp_addr. */ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk) { - inet_sk(sk)->daddr = addr->v4.sin_addr.s_addr; + inet_sk(sk)->inet_daddr = addr->v4.sin_addr.s_addr; } /* Initialize a sctp_addr from an address parameter. */ @@ -598,7 +598,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet = inet_sk(newsk); - newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; + newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; sk_refcnt_debug_inc(newsk); @@ -909,7 +909,6 @@ static struct inet_protosw sctp_seqpacket_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -918,7 +917,6 @@ static struct inet_protosw sctp_stream_protosw = { .protocol = IPPROTO_SCTP, .prot = &sctp_prot, .ops = &inet_seqpacket_ops, - .capability = -1, .no_check = 0, .flags = SCTP_PROTOSW_FLAG }; @@ -1260,6 +1258,9 @@ SCTP_STATIC __init int sctp_init(void) /* Set SCOPE policy to enabled */ sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; + /* Set the default rwnd update threshold */ + sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9d881a61ac0..9e732916b67 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -987,7 +987,10 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len, target = skb_put(chunk->skb, len); - memcpy(target, data, len); + if (data) + memcpy(target, data, len); + else + memset(target, 0, len); /* Adjust the chunk length field. */ chunk->chunk_hdr->length = htons(chunklen + len); @@ -1129,16 +1132,18 @@ nodata: struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, const struct sctp_chunk *chunk, __be16 cause_code, const void *payload, - size_t paylen) + size_t paylen, size_t reserve_tail) { struct sctp_chunk *retval; - retval = sctp_make_op_error_space(asoc, chunk, paylen); + retval = sctp_make_op_error_space(asoc, chunk, paylen + reserve_tail); if (!retval) goto nodata; - sctp_init_cause(retval, cause_code, paylen); + sctp_init_cause(retval, cause_code, paylen + reserve_tail); sctp_addto_chunk(retval, paylen, payload); + if (reserve_tail) + sctp_addto_param(retval, reserve_tail, NULL); nodata: return retval; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8674d491955..4e4ca65cd32 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -217,8 +217,7 @@ static int sctp_gen_sack(struct sctp_association *asoc, int force, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); } else { - if (asoc->a_rwnd > asoc->rwnd) - asoc->a_rwnd = asoc->rwnd; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (!sack) goto nomem; @@ -480,7 +479,6 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * that indicates that we have an outstanding HB. */ if (!is_hb || transport->hb_sent) { - transport->last_rto = transport->rto; transport->rto = min((transport->rto * 2), transport->asoc->rto_max); } } @@ -719,7 +717,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has - * sucessfully completed a connect() call. + * successfully completed a connect() call. */ if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED)) sk->sk_state = SCTP_SS_ESTABLISHED; @@ -1418,6 +1416,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc->init_last_sent_to = t; chunk->transport = t; t->init_sent_count++; + /* Set the new transport as primary */ + sctp_assoc_set_primary(asoc, t); break; case SCTP_CMD_INIT_RESTART: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c8fae1983dd..47bc20d3a85 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -384,6 +384,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, if (!new_asoc) goto nomem; + if (sctp_assoc_set_bind_addr_from_ep(new_asoc, + sctp_scope(sctp_source(chunk)), + GFP_ATOMIC) < 0) + goto nomem_init; + /* The call, sctp_process_init(), can fail on memory allocation. */ if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, sctp_source(chunk), @@ -401,9 +406,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, len = ntohs(err_chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); - if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) - goto nomem_init; - repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) goto nomem_init; @@ -994,14 +996,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, sctp_sf_heartbeat(ep, asoc, type, arg, commands)) return SCTP_DISPOSITION_NOMEM; + /* Set transport error counter and association error counter * when sending heartbeat. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, - SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(transport)); } + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, + SCTP_TRANSPORT(transport)); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, SCTP_TRANSPORT(transport)); @@ -1452,6 +1455,10 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (!new_asoc) goto nomem; + if (sctp_assoc_set_bind_addr_from_ep(new_asoc, + sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0) + goto nomem; + /* In the outbound INIT ACK the endpoint MUST copy its current * Verification Tag and Peers Verification tag into a reserved * place (local tie-tag and per tie-tag) within the state cookie. @@ -1488,9 +1495,6 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sizeof(sctp_chunkhdr_t); } - if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) - goto nomem; - repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) goto nomem; @@ -1717,7 +1721,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_COOKIE_IN_SHUTDOWN, - NULL, 0); + NULL, 0, 0); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); @@ -2865,6 +2869,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; + sctp_arg_t force = SCTP_NOFORCE(); int error; if (!sctp_vtag_verify(chunk, asoc)) { @@ -2898,6 +2903,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, BUG(); } + if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) + force = SCTP_FORCE(); + if (asoc->autoclose) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2926,7 +2934,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, * more aggressive than the following algorithms allow. */ if (chunk->end_of_packet) - sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); + sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force); return SCTP_DISPOSITION_CONSUME; @@ -2951,7 +2959,7 @@ discard_force: discard_noforce: if (chunk->end_of_packet) - sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_NOFORCE()); + sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, force); return SCTP_DISPOSITION_DISCARD; consume: @@ -3569,7 +3577,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, * To do this properly, we'll set the destination address of the chunk * and at the transmit time, will try look up the transport to use. * Since ASCONFs may be bundled, the correct transport may not be - * created untill we process the entire packet, thus this workaround. + * created until we process the entire packet, thus this workaround. */ asconf_ack->dest = chunk->source; sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(asconf_ack)); @@ -3970,7 +3978,7 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, err_chunk = sctp_make_op_error(asoc, chunk, SCTP_ERROR_UNSUP_HMAC, &auth_hdr->hmac_id, - sizeof(__u16)); + sizeof(__u16), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4062,7 +4070,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -4081,7 +4090,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length))); + WORD_ROUND(ntohs(hdr->length)), + 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err_chunk)); @@ -6045,7 +6055,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, - sizeof(data_hdr->stream)); + sizeof(data_hdr->stream), + sizeof(u16)); if (err) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(err)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c8d05758661..89ab66e5474 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -394,7 +394,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Refresh ephemeral port. */ if (!bp->port) - bp->port = inet_sk(sk)->num; + bp->port = inet_sk(sk)->inet_num; /* Add the address to the bind address list. * Use GFP_ATOMIC since BHs will be disabled. @@ -403,7 +403,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Copy back into socket for getsockname() use. */ if (!ret) { - inet_sk(sk)->sport = htons(inet_sk(sk)->num); + inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); af->to_sk_saddr(addr, sk); } @@ -1080,6 +1080,13 @@ static int __sctp_connect(struct sock* sk, err = -ENOMEM; goto out_free; } + + err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, + GFP_KERNEL); + if (err < 0) { + goto out_free; + } + } /* Prime the peer's transport structures. */ @@ -1095,11 +1102,6 @@ static int __sctp_connect(struct sock* sk, walk_size += af->sockaddr_len; } - err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL); - if (err < 0) { - goto out_free; - } - /* In case the user of sctp_connectx() wants an association * id back, assign one now. */ @@ -1115,7 +1117,7 @@ static int __sctp_connect(struct sock* sk, } /* Initialize sk's dport and daddr for getpeername() */ - inet_sk(sk)->dport = htons(asoc->peer.port); + inet_sk(sk)->inet_dport = htons(asoc->peer.port); af = sctp_get_af_specific(sa_addr->sa.sa_family); af->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; @@ -1274,22 +1276,30 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, } /* - * New (hopefully final) interface for the API. The option buffer is used - * both for the returned association id and the addresses. + * New (hopefully final) interface for the API. + * We use the sctp_getaddrs_old structure so that use-space library + * can avoid any unnecessary allocations. The only defferent part + * is that we store the actual length of the address buffer into the + * addrs_num structure member. That way we can re-use the existing + * code. */ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, char __user *optval, int __user *optlen) { + struct sctp_getaddrs_old param; sctp_assoc_t assoc_id = 0; int err = 0; - if (len < sizeof(assoc_id)) + if (len < sizeof(param)) return -EINVAL; + if (copy_from_user(¶m, optval, sizeof(param))) + return -EFAULT; + err = __sctp_setsockopt_connectx(sk, - (struct sockaddr __user *)(optval + sizeof(assoc_id)), - len - sizeof(assoc_id), &assoc_id); + (struct sockaddr __user *)param.addrs, + param.addr_num, &assoc_id); if (err == 0 || err == -EINPROGRESS) { if (copy_to_user(optval, &assoc_id, sizeof(assoc_id))) @@ -1689,6 +1699,11 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } asoc = new_asoc; + err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL); + if (err < 0) { + err = -ENOMEM; + goto out_free; + } /* If the SCTP_INIT ancillary data is specified, set all * the association init values accordingly. @@ -1718,11 +1733,6 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, err = -ENOMEM; goto out_free; } - err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL); - if (err < 0) { - err = -ENOMEM; - goto out_free; - } } /* ASSERT: we have a valid association at this point. */ @@ -1958,7 +1968,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, if (err) goto out_free; - sock_recv_timestamp(msg, sk, skb); + sock_recv_ts_and_drops(msg, sk, skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; sp->pf->event_msgname(event, msg->msg_name, addr_len); @@ -2076,6 +2086,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EINVAL; if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ + if (sp->autoclose > (MAX_SCHEDULE_TIMEOUT / HZ) ) + sp->autoclose = (__u32)(MAX_SCHEDULE_TIMEOUT / HZ) ; return 0; } @@ -2301,11 +2314,10 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, } } - /* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value - * of this field is ignored. Note also that a value of zero - * indicates the current setting should be left unchanged. + /* Note that a value of zero indicates the current setting should be + left unchanged. */ - if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) { + if (params->spp_pathmaxrxt) { if (trans) { trans->pathmaxrxt = params->spp_pathmaxrxt; } else if (asoc) { @@ -2344,8 +2356,8 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, pmtud_change == SPP_PMTUD || sackdelay_change == SPP_SACKDELAY || params.spp_sackdelay > 500 || - (params.spp_pathmtu - && params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) + (params.spp_pathmtu && + params.spp_pathmtu < SCTP_DEFAULT_MINSEGMENT)) return -EINVAL; /* If an address other than INADDR_ANY is specified, and @@ -4339,90 +4351,6 @@ static int sctp_getsockopt_initmsg(struct sock *sk, int len, char __user *optval return 0; } -static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_association *asoc; - struct list_head *pos; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD " - "socket option deprecated\n"); - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - - list_for_each(pos, &asoc->peer.transport_addr_list) { - cnt ++; - } - - return cnt; -} - -/* - * Old API for getting list of peer addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_transport *from; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0) return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD " - "socket option deprecated\n"); - - /* For UDP-style sockets, id specifies the association to query. */ - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - - to = (void __user *)getaddrs.addrs; - list_for_each_entry(from, &asoc->peer.transport_addr_list, - transports) { - memcpy(&temp, &from->ipaddr, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; - if (copy_to_user(to, &temp, addrlen)) - return -EFAULT; - to += addrlen ; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - getaddrs.addr_num = cnt; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &getaddrs, len)) - return -EFAULT; - - return 0; -} static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -4475,125 +4403,6 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, return 0; } -static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, - char __user *optval, - int __user *optlen) -{ - sctp_assoc_t id; - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - struct sctp_sockaddr_entry *addr; - int cnt = 0; - - if (len < sizeof(sctp_assoc_t)) - return -EINVAL; - - if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) - return -EFAULT; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - rcu_read_lock(); - list_for_each_entry_rcu(addr, - &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - - cnt++; - } - rcu_read_unlock(); - } else { - cnt = 1; - } - goto done; - } - - /* Protection on the bound address list is not needed, - * since in the socket option context we hold the socket lock, - * so there is no way that the bound address list can change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - cnt ++; - } -done: - return cnt; -} - -/* Helper function that copies local addresses to user and returns the number - * of addresses copied. - */ -static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, - int max_addrs, void *to, - int *bytes_copied) -{ - struct sctp_sockaddr_entry *addr; - union sctp_addr temp; - int cnt = 0; - int addrlen; - - rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { - if (!addr->valid) - continue; - - if ((PF_INET == sk->sk_family) && - (AF_INET6 == addr->a.sa.sa_family)) - continue; - if ((PF_INET6 == sk->sk_family) && - inet_v6_ipv6only(sk) && - (AF_INET == addr->a.sa.sa_family)) - continue; - memcpy(&temp, &addr->a, sizeof(temp)); - if (!temp.v4.sin_port) - temp.v4.sin_port = htons(port); - - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), - &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(to, &temp, addrlen); - - to += addrlen; - *bytes_copied += addrlen; - cnt ++; - if (cnt >= max_addrs) break; - } - rcu_read_unlock(); - - return cnt; -} - static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { @@ -4637,112 +4446,6 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, return cnt; } -/* Old API for getting list of local addresses. Does not work for 32-bit - * programs running on a 64-bit kernel - */ -static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - struct sctp_bind_addr *bp; - struct sctp_association *asoc; - int cnt = 0; - struct sctp_getaddrs_old getaddrs; - struct sctp_sockaddr_entry *addr; - void __user *to; - union sctp_addr temp; - struct sctp_sock *sp = sctp_sk(sk); - int addrlen; - int err = 0; - void *addrs; - void *buf; - int bytes_copied = 0; - - if (len < sizeof(struct sctp_getaddrs_old)) - return -EINVAL; - - len = sizeof(struct sctp_getaddrs_old); - if (copy_from_user(&getaddrs, optval, len)) - return -EFAULT; - - if (getaddrs.addr_num <= 0 || - getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) - return -EINVAL; - - printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD " - "socket option deprecated\n"); - - /* - * For UDP-style sockets, id specifies the association to query. - * If the id field is set to the value '0' then the locally bound - * addresses are returned without regard to any particular - * association. - */ - if (0 == getaddrs.assoc_id) { - bp = &sctp_sk(sk)->ep->base.bind_addr; - } else { - asoc = sctp_id2assoc(sk, getaddrs.assoc_id); - if (!asoc) - return -EINVAL; - bp = &asoc->base.bind_addr; - } - - to = getaddrs.addrs; - - /* Allocate space for a local instance of packed array to hold all - * the data. We store addresses here first and then put write them - * to the user in one shot. - */ - addrs = kmalloc(sizeof(union sctp_addr) * getaddrs.addr_num, - GFP_KERNEL); - if (!addrs) - return -ENOMEM; - - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid - * addresses from the global local address list. - */ - if (sctp_list_single_entry(&bp->address_list)) { - addr = list_entry(bp->address_list.next, - struct sctp_sockaddr_entry, list); - if (sctp_is_any(sk, &addr->a)) { - cnt = sctp_copy_laddrs_old(sk, bp->port, - getaddrs.addr_num, - addrs, &bytes_copied); - goto copy_getaddrs; - } - } - - buf = addrs; - /* Protection on the bound address list is not needed since - * in the socket option context we hold a socket lock and - * thus the bound address list can't change. - */ - list_for_each_entry(addr, &bp->address_list, list) { - memcpy(&temp, &addr->a, sizeof(temp)); - sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - memcpy(buf, &temp, addrlen); - buf += addrlen; - bytes_copied += addrlen; - cnt ++; - if (cnt >= getaddrs.addr_num) break; - } - -copy_getaddrs: - /* copy the entire address list into the user provided space */ - if (copy_to_user(to, addrs, bytes_copied)) { - err = -EFAULT; - goto error; - } - - /* copy the leading structure back to user */ - getaddrs.addr_num = cnt; - if (copy_to_user(optval, &getaddrs, len)) - err = -EFAULT; - -error: - kfree(addrs); - return err; -} static int sctp_getsockopt_local_addrs(struct sock *sk, int len, char __user *optval, int __user *optlen) @@ -5593,22 +5296,6 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_INITMSG: retval = sctp_getsockopt_initmsg(sk, len, optval, optlen); break; - case SCTP_GET_PEER_ADDRS_NUM_OLD: - retval = sctp_getsockopt_peer_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_NUM_OLD: - retval = sctp_getsockopt_local_addrs_num_old(sk, len, optval, - optlen); - break; - case SCTP_GET_PEER_ADDRS_OLD: - retval = sctp_getsockopt_peer_addrs_old(sk, len, optval, - optlen); - break; - case SCTP_GET_LOCAL_ADDRS_OLD: - retval = sctp_getsockopt_local_addrs_old(sk, len, optval, - optlen); - break; case SCTP_GET_PEER_ADDRS: retval = sctp_getsockopt_peer_addrs(sk, len, optval, optlen); @@ -5851,7 +5538,7 @@ pp_not_found: */ success: if (!sctp_sk(sk)->bind_hash) { - inet_sk(sk)->num = snum; + inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &pp->owner); sctp_sk(sk)->bind_hash = pp; } @@ -5923,7 +5610,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) if (sctp_autobind(sk)) return -EAGAIN; } else { - if (sctp_get_port(sk, inet_sk(sk)->num)) { + if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { sk->sk_state = SCTP_SS_CLOSED; return -EADDRINUSE; } @@ -6094,14 +5781,14 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) static inline void __sctp_put_port(struct sock *sk) { struct sctp_bind_hashbucket *head = - &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)]; + &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)]; struct sctp_bind_bucket *pp; sctp_spin_lock(&head->lock); pp = sctp_sk(sk)->bind_hash; __sk_del_bind_node(sk); sctp_sk(sk)->bind_hash = NULL; - inet_sk(sk)->num = 0; + inet_sk(sk)->inet_num = 0; sctp_bucket_destroy(pp); sctp_spin_unlock(&head->lock); } @@ -6128,7 +5815,7 @@ static int sctp_autobind(struct sock *sk) /* Initialize a local sockaddr structure to INADDR_ANY. */ af = sctp_sk(sk)->pf->af; - port = htons(inet_sk(sk)->num); + port = htons(inet_sk(sk)->inet_num); af->inaddr_any(&autoaddr, port); return sctp_do_bind(sk, &autoaddr, af->sockaddr_len); @@ -6697,12 +6384,12 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, /* Initialize sk's sport, dport, rcv_saddr and daddr for * getsockname() and getpeername() */ - newinet->sport = inet->sport; - newinet->saddr = inet->saddr; - newinet->rcv_saddr = inet->rcv_saddr; - newinet->dport = htons(asoc->peer.port); + newinet->inet_sport = inet->inet_sport; + newinet->inet_saddr = inet->inet_saddr; + newinet->inet_rcv_saddr = inet->inet_rcv_saddr; + newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->id = asoc->next_tsn ^ jiffies; + newinet->inet_id = asoc->next_tsn ^ jiffies; newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; @@ -6741,13 +6428,13 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsp->hmac = NULL; /* Hook this new socket in to the bind_hash list. */ - head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->num)]; + head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)]; sctp_local_bh_disable(); sctp_spin_lock(&head->lock); pp = sctp_sk(oldsk)->bind_hash; sk_add_bind_node(newsk, &pp->owner); sctp_sk(newsk)->bind_hash = pp; - inet_sk(newsk)->num = inet_sk(oldsk)->num; + inet_sk(newsk)->inet_num = inet_sk(oldsk)->inet_num; sctp_spin_unlock(&head->lock); sctp_local_bh_enable(); diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index ab7151da120..832590bbe0c 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -52,6 +52,7 @@ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int rwnd_scale_max = 16; extern int sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -59,180 +60,145 @@ extern int sysctl_sctp_wmem[3]; static ctl_table sctp_table[] = { { - .ctl_name = NET_SCTP_RTO_INITIAL, .procname = "rto_initial", .data = &sctp_rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_RTO_MIN, .procname = "rto_min", .data = &sctp_rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_RTO_MAX, .procname = "rto_max", .data = &sctp_rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, .procname = "valid_cookie_life", .data = &sctp_valid_cookie_life, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_MAX_BURST, .procname = "max_burst", .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &zero, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, .procname = "association_max_retrans", .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_SNDBUF_POLICY, .procname = "sndbuf_policy", .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RCVBUF_POLICY, .procname = "rcvbuf_policy", .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_PATH_MAX_RETRANS, .procname = "path_max_retrans", .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, .procname = "max_init_retransmits", .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &int_max }, { - .ctl_name = NET_SCTP_HB_INTERVAL, .procname = "hb_interval", .data = &sctp_hb_interval, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &one, .extra2 = &timer_max }, { - .ctl_name = NET_SCTP_PRESERVE_ENABLE, .procname = "cookie_preserve_enable", .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RTO_ALPHA, .procname = "rto_alpha_exp_divisor", .data = &sctp_rto_alpha, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_RTO_BETA, .procname = "rto_beta_exp_divisor", .data = &sctp_rto_beta, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_ADDIP_ENABLE, .procname = "addip_enable", .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_PRSCTP_ENABLE, .procname = "prsctp_enable", .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = NET_SCTP_SACK_TIMEOUT, .procname = "sack_timeout", .data = &sctp_sack_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_mem", .data = &sysctl_sctp_mem, .maxlen = sizeof(sysctl_sctp_mem), @@ -240,7 +206,6 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_rmem", .data = &sysctl_sctp_rmem, .maxlen = sizeof(sysctl_sctp_rmem), @@ -248,7 +213,6 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "sctp_wmem", .data = &sysctl_sctp_wmem, .maxlen = sizeof(sysctl_sctp_wmem), @@ -256,40 +220,44 @@ static ctl_table sctp_table[] = { .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "auth_enable", .data = &sctp_auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "addip_noauth_enable", .data = &sctp_addip_noauth, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, - .strategy = sysctl_intvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "addr_scope_policy", .data = &sctp_scope_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &addr_scope_max, }, - { .ctl_name = 0 } + { + .procname = "rwnd_update_shift", + .data = &sctp_rwnd_upd_shift, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &rwnd_scale_max, + }, + + { /* sentinel */ } }; static struct ctl_path sctp_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "sctp", .ctl_name = NET_SCTP, }, + { .procname = "net", }, + { .procname = "sctp", }, { } }; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index c256e483931..b827d21dbe5 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -74,7 +74,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->last_rto = peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rtt = 0; peer->rttvar = 0; peer->srtt = 0; @@ -83,7 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->fast_recovery = 0; peer->last_time_heard = jiffies; - peer->last_time_used = jiffies; peer->last_time_ecne_reduced = jiffies; peer->init_sent_count = 0; @@ -308,7 +307,8 @@ void sctp_transport_route(struct sctp_transport *transport, /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). */ - if (asoc && (transport == asoc->peer.active_path)) + if (asoc && (!asoc->peer.primary_path || + (transport == asoc->peer.active_path))) opt->pf->af->to_sk_saddr(&transport->saddr, asoc->base.sk); } else @@ -385,7 +385,6 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) tp->rto = tp->asoc->rto_max; tp->rtt = rtt; - tp->last_rto = tp->rto; /* Reset rto_pending so that a new RTT measurement is started when a * new data chunk is sent. @@ -564,10 +563,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * to be done every RTO interval, we do it every hearbeat * interval. */ - if (time_after(jiffies, transport->last_time_used + - transport->rto)) - transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + transport->cwnd = max(transport->cwnd/2, + 4*transport->asoc->pathmtu); break; } @@ -578,6 +575,43 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, transport->cwnd, transport->ssthresh); } +/* Apply Max.Burst limit to the congestion window: + * sctpimpguide-05 2.14.2 + * D) When the time comes for the sender to + * transmit new DATA chunks, the protocol parameter Max.Burst MUST + * first be applied to limit how many new DATA chunks may be sent. + * The limit is applied by adjusting cwnd as follows: + * if ((flightsize+ Max.Burst * MTU) < cwnd) + * cwnd = flightsize + Max.Burst * MTU + */ + +void sctp_transport_burst_limited(struct sctp_transport *t) +{ + struct sctp_association *asoc = t->asoc; + u32 old_cwnd = t->cwnd; + u32 max_burst_bytes; + + if (t->burst_limited) + return; + + max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); + if (max_burst_bytes < old_cwnd) { + t->cwnd = max_burst_bytes; + t->burst_limited = old_cwnd; + } +} + +/* Restore the old cwnd congestion window, after the burst had it's + * desired effect. + */ +void sctp_transport_burst_reset(struct sctp_transport *t) +{ + if (t->burst_limited) { + t->cwnd = t->burst_limited; + t->burst_limited = 0; + } +} + /* What is the next timeout value for this transport? */ unsigned long sctp_transport_timeout(struct sctp_transport *t) { @@ -600,8 +634,9 @@ void sctp_transport_reset(struct sctp_transport *t) * (see Section 6.2.1) */ t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380)); + t->burst_limited = 0; t->ssthresh = asoc->peer.i.a_rwnd; - t->last_rto = t->rto = asoc->rto_initial; + t->rto = asoc->rto_initial; t->rtt = 0; t->srtt = 0; t->rttvar = 0; diff --git a/net/socket.c b/net/socket.c index 75655365b5f..b94c3dd7101 100644 --- a/net/socket.c +++ b/net/socket.c @@ -97,6 +97,12 @@ #include <net/sock.h> #include <linux/netfilter.h> +#include <linux/if_tun.h> +#include <linux/ipv6_route.h> +#include <linux/route.h> +#include <linux/sockios.h> +#include <linux/atalk.h> + static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); @@ -668,10 +674,24 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, EXPORT_SYMBOL_GPL(__sock_recv_timestamp); -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, int flags) +inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) + put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, + sizeof(__u32), &skb->dropcount); +} + +void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + sock_recv_timestamp(msg, sk, skb); + sock_recv_drops(msg, sk, skb); +} +EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); + +static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) { - int err; struct sock_iocb *si = kiocb_to_siocb(iocb); si->sock = sock; @@ -680,13 +700,17 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, si->size = size; si->flags = flags; - err = security_socket_recvmsg(sock, msg, size, flags); - if (err) - return err; - return sock->ops->recvmsg(iocb, sock, msg, size, flags); } +static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + int err = security_socket_recvmsg(sock, msg, size, flags); + + return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); +} + int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { @@ -702,6 +726,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, return ret; } +static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct kiocb iocb; + struct sock_iocb siocb; + int ret; + + init_sync_kiocb(&iocb, NULL); + iocb.private = &siocb; + ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&iocb); + return ret; +} + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -886,6 +925,24 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(dlci_ioctl_set); +static long sock_do_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, unsigned long arg) +{ + int err; + void __user *argp = (void __user *)arg; + + err = sock->ops->ioctl(sock, cmd, arg); + + /* + * If this ioctl is unknown try to hand it down + * to the NIC driver. + */ + if (err == -ENOIOCTLCMD) + err = dev_ioctl(net, cmd, argp); + + return err; +} + /* * With an ioctl, arg may well be a user mode pointer, but we don't know * what to do with it - that's up to the protocol still. @@ -905,11 +962,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { err = dev_ioctl(net, cmd, argp); } else -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_WEXT_CORE if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(net, cmd, argp); } else -#endif /* CONFIG_WIRELESS_EXT */ +#endif switch (cmd) { case FIOSETOWN: case SIOCSPGRP: @@ -959,14 +1016,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) mutex_unlock(&dlci_ioctl_mutex); break; default: - err = sock->ops->ioctl(sock, cmd, arg); - - /* - * If this ioctl is unknown try to hand it down - * to the NIC driver. - */ - if (err == -ENOIOCTLCMD) - err = dev_ioctl(net, cmd, argp); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@ -1100,11 +1150,14 @@ static int sock_fasync(int fd, struct file *filp, int on) fna->fa_next = sock->fasync_list; write_lock_bh(&sk->sk_callback_lock); sock->fasync_list = fna; + sock_set_flag(sk, SOCK_FASYNC); write_unlock_bh(&sk->sk_callback_lock); } else { if (fa != NULL) { write_lock_bh(&sk->sk_callback_lock); *prev = fa->fa_next; + if (!sock->fasync_list) + sock_reset_flag(sk, SOCK_FASYNC); write_unlock_bh(&sk->sk_callback_lock); kfree(fa); } @@ -1216,7 +1269,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, /* Now protected by module ref count */ rcu_read_unlock(); - err = pf->create(net, sock, protocol); + err = pf->create(net, sock, protocol, kern); if (err < 0) goto out_module_put; @@ -1965,22 +2018,15 @@ out: return err; } -/* - * BSD recvmsg interface - */ - -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, - unsigned int, flags) +static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned flags, int nosec) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; - struct socket *sock; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; - struct msghdr msg_sys; unsigned long cmsg_ptr; int err, iov_size, total_len, len; - int fput_needed; /* kernel mode address */ struct sockaddr_storage addr; @@ -1990,27 +2036,23 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, int __user *uaddr_len; if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(&msg_sys, msg_compat)) + if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - err = -EMSGSIZE; - if (msg_sys.msg_iovlen > UIO_MAXIOV) - goto out_put; + if (msg_sys->msg_iovlen > UIO_MAXIOV) + goto out; /* Check whether to allocate the iovec area */ err = -ENOMEM; - iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); - if (msg_sys.msg_iovlen > UIO_FASTIOV) { + iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); + if (msg_sys->msg_iovlen > UIO_FASTIOV) { iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); if (!iov) - goto out_put; + goto out; } /* @@ -2018,46 +2060,47 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys.msg_name; + uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, + err = verify_compat_iovec(msg_sys, iov, (struct sockaddr *)&addr, VERIFY_WRITE); } else - err = verify_iovec(&msg_sys, iov, + err = verify_iovec(msg_sys, iov, (struct sockaddr *)&addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; total_len = err; - cmsg_ptr = (unsigned long)msg_sys.msg_control; - msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + cmsg_ptr = (unsigned long)msg_sys->msg_control; + msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg_sys, total_len, flags); + err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, + total_len, flags); if (err < 0) goto out_freeiov; len = err; if (uaddr != NULL) { err = move_addr_to_user((struct sockaddr *)&addr, - msg_sys.msg_namelen, uaddr, + msg_sys->msg_namelen, uaddr, uaddr_len); if (err < 0) goto out_freeiov; } - err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg_compat->msg_controllen); else - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, &msg->msg_controllen); if (err) goto out_freeiov; @@ -2066,21 +2109,162 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); -out_put: +out: + return err; +} + +/* + * BSD recvmsg interface + */ + +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) +{ + int fput_needed, err; + struct msghdr msg_sys; + struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + + if (!sock) + goto out; + + err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); + fput_light(sock->file, fput_needed); out: return err; } -#ifdef __ARCH_WANT_SYS_SOCKETCALL +/* + * Linux recvmmsg interface + */ + +int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, + unsigned int flags, struct timespec *timeout) +{ + int fput_needed, err, datagrams; + struct socket *sock; + struct mmsghdr __user *entry; + struct compat_mmsghdr __user *compat_entry; + struct msghdr msg_sys; + struct timespec end_time; + + if (timeout && + poll_select_set_timeout(&end_time, timeout->tv_sec, + timeout->tv_nsec)) + return -EINVAL; + datagrams = 0; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + return err; + + err = sock_error(sock->sk); + if (err) + goto out_put; + + entry = mmsg; + compat_entry = (struct compat_mmsghdr __user *)mmsg; + + while (datagrams < vlen) { + /* + * No need to ask LSM for more than the first datagram. + */ + if (MSG_CMSG_COMPAT & flags) { + err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, + &msg_sys, flags, datagrams); + if (err < 0) + break; + err = __put_user(err, &compat_entry->msg_len); + ++compat_entry; + } else { + err = __sys_recvmsg(sock, (struct msghdr __user *)entry, + &msg_sys, flags, datagrams); + if (err < 0) + break; + err = put_user(err, &entry->msg_len); + ++entry; + } + + if (err) + break; + ++datagrams; + + if (timeout) { + ktime_get_ts(timeout); + *timeout = timespec_sub(end_time, *timeout); + if (timeout->tv_sec < 0) { + timeout->tv_sec = timeout->tv_nsec = 0; + break; + } + + /* Timeout, return less than vlen datagrams */ + if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) + break; + } + + /* Out of band data, return right away */ + if (msg_sys.msg_flags & MSG_OOB) + break; + } + +out_put: + fput_light(sock->file, fput_needed); + + if (err == 0) + return datagrams; + + if (datagrams != 0) { + /* + * We may return less entries than requested (vlen) if the + * sock is non block and there aren't enough datagrams... + */ + if (err != -EAGAIN) { + /* + * ... or if recvmsg returns an error after we + * received some datagrams, where we record the + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). + */ + sock->sk->sk_err = -err; + } + + return datagrams; + } + + return err; +} + +SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags, + struct timespec __user *, timeout) +{ + int datagrams; + struct timespec timeout_sys; + + if (!timeout) + return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); + + if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys))) + return -EFAULT; + + datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); + + if (datagrams > 0 && + copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys))) + datagrams = -EFAULT; + + return datagrams; +} + +#ifdef __ARCH_WANT_SYS_SOCKETCALL /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[19]={ +static const unsigned char nargs[20] = { AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4) + AL(4),AL(5) }; #undef AL @@ -2100,7 +2284,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) int err; unsigned int len; - if (call < 1 || call > SYS_ACCEPT4) + if (call < 1 || call > SYS_RECVMMSG) return -EINVAL; len = nargs[call]; @@ -2178,6 +2362,10 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; + case SYS_RECVMMSG: + err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3], + (struct timespec __user *)a[4]); + break; case SYS_ACCEPT4: err = sys_accept4(a0, (struct sockaddr __user *)a1, (int __user *)a[2], a[3]); @@ -2300,6 +2488,552 @@ void socket_seq_show(struct seq_file *seq) #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_COMPAT +static int do_siocgstamp(struct net *net, struct socket *sock, + unsigned int cmd, struct compat_timeval __user *up) +{ + mm_segment_t old_fs = get_fs(); + struct timeval ktv; + int err; + + set_fs(KERNEL_DS); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); + set_fs(old_fs); + if (!err) { + err = put_user(ktv.tv_sec, &up->tv_sec); + err |= __put_user(ktv.tv_usec, &up->tv_usec); + } + return err; +} + +static int do_siocgstampns(struct net *net, struct socket *sock, + unsigned int cmd, struct compat_timespec __user *up) +{ + mm_segment_t old_fs = get_fs(); + struct timespec kts; + int err; + + set_fs(KERNEL_DS); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); + set_fs(old_fs); + if (!err) { + err = put_user(kts.tv_sec, &up->tv_sec); + err |= __put_user(kts.tv_nsec, &up->tv_nsec); + } + return err; +} + +static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + uifr = compat_alloc_user_space(sizeof(struct ifreq)); + if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + return -EFAULT; + + err = dev_ioctl(net, SIOCGIFNAME, uifr); + if (err) + return err; + + if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) + return -EFAULT; + + return 0; +} + +static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +{ + struct compat_ifconf ifc32; + struct ifconf ifc; + struct ifconf __user *uifc; + struct compat_ifreq __user *ifr32; + struct ifreq __user *ifr; + unsigned int i, j; + int err; + + if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) + return -EFAULT; + + if (ifc32.ifcbuf == 0) { + ifc32.ifc_len = 0; + ifc.ifc_len = 0; + ifc.ifc_req = NULL; + uifc = compat_alloc_user_space(sizeof(struct ifconf)); + } else { + size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * + sizeof (struct ifreq); + uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); + ifc.ifc_len = len; + ifr = ifc.ifc_req = (void __user *)(uifc + 1); + ifr32 = compat_ptr(ifc32.ifcbuf); + for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { + if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) + return -EFAULT; + ifr++; + ifr32++; + } + } + if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) + return -EFAULT; + + err = dev_ioctl(net, SIOCGIFCONF, uifc); + if (err) + return err; + + if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) + return -EFAULT; + + ifr = ifc.ifc_req; + ifr32 = compat_ptr(ifc32.ifcbuf); + for (i = 0, j = 0; + i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; + i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { + if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) + return -EFAULT; + ifr32++; + ifr++; + } + + if (ifc32.ifcbuf == 0) { + /* Translate from 64-bit structure multiple to + * a 32-bit one. + */ + i = ifc.ifc_len; + i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); + ifc32.ifc_len = i; + } else { + ifc32.ifc_len = i; + } + if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) + return -EFAULT; + + return 0; +} + +static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) +{ + struct ifreq __user *ifr; + u32 data; + void __user *datap; + + ifr = compat_alloc_user_space(sizeof(*ifr)); + + if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + return -EFAULT; + + if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + return -EFAULT; + + datap = compat_ptr(data); + if (put_user(datap, &ifr->ifr_ifru.ifru_data)) + return -EFAULT; + + return dev_ioctl(net, SIOCETHTOOL, ifr); +} + +static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) +{ + void __user *uptr; + compat_uptr_t uptr32; + struct ifreq __user *uifr; + + uifr = compat_alloc_user_space(sizeof (*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + return -EFAULT; + + if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) + return -EFAULT; + + uptr = compat_ptr(uptr32); + + if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) + return -EFAULT; + + return dev_ioctl(net, SIOCWANDEV, uifr); +} + +static int bond_ioctl(struct net *net, unsigned int cmd, + struct compat_ifreq __user *ifr32) +{ + struct ifreq kifr; + struct ifreq __user *uifr; + mm_segment_t old_fs; + int err; + u32 data; + void __user *datap; + + switch (cmd) { + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) + return -EFAULT; + + old_fs = get_fs(); + set_fs (KERNEL_DS); + err = dev_ioctl(net, cmd, &kifr); + set_fs (old_fs); + + return err; + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + return -EFAULT; + + if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + return -EFAULT; + + datap = compat_ptr(data); + if (put_user(datap, &uifr->ifr_ifru.ifru_data)) + return -EFAULT; + + return dev_ioctl(net, cmd, uifr); + default: + return -EINVAL; + }; +} + +static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, + struct compat_ifreq __user *u_ifreq32) +{ + struct ifreq __user *u_ifreq64; + char tmp_buf[IFNAMSIZ]; + void __user *data64; + u32 data32; + + if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), + IFNAMSIZ)) + return -EFAULT; + if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + return -EFAULT; + data64 = compat_ptr(data32); + + u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); + + /* Don't check these user accesses, just let that get trapped + * in the ioctl handler instead. + */ + if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], + IFNAMSIZ)) + return -EFAULT; + if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) + return -EFAULT; + + return dev_ioctl(net, cmd, u_ifreq64); +} + +static int dev_ifsioc(struct net *net, struct socket *sock, + unsigned int cmd, struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; +} + +static int compat_sioc_ifmap(struct net *net, unsigned int cmd, + struct compat_ifreq __user *uifr32) +{ + struct ifreq ifr; + struct compat_ifmap __user *uifmap32; + mm_segment_t old_fs; + int err; + + uifmap32 = &uifr32->ifr_ifru.ifru_map; + err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); + err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= __get_user(ifr.ifr_map.port, &uifmap32->port); + if (err) + return -EFAULT; + + old_fs = get_fs(); + set_fs (KERNEL_DS); + err = dev_ioctl(net, cmd, (void __user *)&ifr); + set_fs (old_fs); + + if (cmd == SIOCGIFMAP && !err) { + err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); + err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= __put_user(ifr.ifr_map.port, &uifmap32->port); + if (err) + err = -EFAULT; + } + return err; +} + +static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) +{ + void __user *uptr; + compat_uptr_t uptr32; + struct ifreq __user *uifr; + + uifr = compat_alloc_user_space(sizeof (*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + return -EFAULT; + + if (get_user(uptr32, &uifr32->ifr_data)) + return -EFAULT; + + uptr = compat_ptr(uptr32); + + if (put_user(uptr, &uifr->ifr_data)) + return -EFAULT; + + return dev_ioctl(net, SIOCSHWTSTAMP, uifr); +} + +struct rtentry32 { + u32 rt_pad1; + struct sockaddr rt_dst; /* target address */ + struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ + struct sockaddr rt_genmask; /* target network mask (IP) */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ + /* char * */ u32 rt_dev; /* forcing the device at add */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ + unsigned short rt_irtt; /* Initial RTT */ +}; + +struct in6_rtmsg32 { + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_src; + struct in6_addr rtmsg_gateway; + u32 rtmsg_type; + u16 rtmsg_dst_len; + u16 rtmsg_src_len; + u32 rtmsg_metric; + u32 rtmsg_info; + u32 rtmsg_flags; + s32 rtmsg_ifindex; +}; + +static int routing_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, void __user *argp) +{ + int ret; + void *r = NULL; + struct in6_rtmsg r6; + struct rtentry r4; + char devname[16]; + u32 rtdev; + mm_segment_t old_fs = get_fs(); + + if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ + struct in6_rtmsg32 __user *ur6 = argp; + ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), + 3 * sizeof(struct in6_addr)); + ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + + r = (void *) &r6; + } else { /* ipv4 */ + struct rtentry32 __user *ur4 = argp; + ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), + 3 * sizeof(struct sockaddr)); + ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); + ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); + ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); + ret |= __get_user (r4.rt_window, &(ur4->rt_window)); + ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); + ret |= __get_user (rtdev, &(ur4->rt_dev)); + if (rtdev) { + ret |= copy_from_user (devname, compat_ptr(rtdev), 15); + r4.rt_dev = devname; devname[15] = 0; + } else + r4.rt_dev = NULL; + + r = (void *) &r4; + } + + if (ret) { + ret = -EFAULT; + goto out; + } + + set_fs (KERNEL_DS); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); + set_fs (old_fs); + +out: + return ret; +} + +/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE + * for some operations; this forces use of the newer bridge-utils that + * use compatiable ioctls + */ +static int old_bridge_ioctl(compat_ulong_t __user *argp) +{ + compat_ulong_t tmp; + + if (get_user(tmp, argp)) + return -EFAULT; + if (tmp == BRCTL_GET_VERSION) + return BRCTL_VERSION + 1; + return -EINVAL; +} + +static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, + unsigned int cmd, unsigned long arg) +{ + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + + if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) + return siocdevprivate_ioctl(net, cmd, argp); + + switch (cmd) { + case SIOCSIFBR: + case SIOCGIFBR: + return old_bridge_ioctl(argp); + case SIOCGIFNAME: + return dev_ifname32(net, argp); + case SIOCGIFCONF: + return dev_ifconf(net, argp); + case SIOCETHTOOL: + return ethtool_ioctl(net, argp); + case SIOCWANDEV: + return compat_siocwandev(net, argp); + case SIOCGIFMAP: + case SIOCSIFMAP: + return compat_sioc_ifmap(net, cmd, argp); + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: + case SIOCBONDCHANGEACTIVE: + return bond_ioctl(net, cmd, argp); + case SIOCADDRT: + case SIOCDELRT: + return routing_ioctl(net, sock, cmd, argp); + case SIOCGSTAMP: + return do_siocgstamp(net, sock, cmd, argp); + case SIOCGSTAMPNS: + return do_siocgstampns(net, sock, cmd, argp); + case SIOCSHWTSTAMP: + return compat_siocshwtstamp(net, argp); + + case FIOSETOWN: + case SIOCSPGRP: + case FIOGETOWN: + case SIOCGPGRP: + case SIOCBRADDBR: + case SIOCBRDELBR: + case SIOCGIFVLAN: + case SIOCSIFVLAN: + case SIOCADDDLCI: + case SIOCDELDLCI: + return sock_ioctl(file, cmd, arg); + + case SIOCGIFFLAGS: + case SIOCSIFFLAGS: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + case SIOCGIFMTU: + case SIOCSIFMTU: + case SIOCGIFMEM: + case SIOCSIFMEM: + case SIOCGIFHWADDR: + case SIOCSIFHWADDR: + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCSIFADDR: + case SIOCSIFHWBROADCAST: + case SIOCDIFADDR: + case SIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCSIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCSIFPFLAGS: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCSIFTXQLEN: + case SIOCBRADDIF: + case SIOCBRDELIF: + case SIOCSIFNAME: + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: + return dev_ifsioc(net, sock, cmd, argp); + + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: + case SIOCATMARK: + return sock_do_ioctl(net, sock, cmd, arg); + } + + /* Prevent warning from compat_sys_ioctl, these always + * result in -EINVAL in the native case anyway. */ + switch (cmd) { + case SIOCRTMSG: + case SIOCGIFCOUNT: + case SIOCSRARP: + case SIOCGRARP: + case SIOCDRARP: + case SIOCSIFLINK: + case SIOCGIFSLAVE: + case SIOCSIFSLAVE: + return -EINVAL; + } + + return -ENOIOCTLCMD; +} + static long compat_sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { @@ -2318,6 +3052,9 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) ret = compat_wext_handle_ioctl(net, cmd, arg); + if (ret == -ENOIOCTLCMD) + ret = compat_sock_ioctl_trans(file, sock, cmd, arg); + return ret; } #endif diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 22e8fd89477..6dcdd251781 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -55,16 +55,8 @@ static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, /* * RFC 4291, Section 2.2.1 - * - * To keep the result as short as possible, especially - * since we don't shorthand, we don't want leading zeros - * in each halfword, so avoid %pI6. */ - return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", - ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), - ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), - ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), - ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); + return snprintf(buf, buflen, "%pI6c", addr); } static size_t rpc_ntop6(const struct sockaddr *sap, @@ -306,24 +298,25 @@ EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); * @sap: buffer into which to plant socket address * @salen: size of buffer * + * @uaddr does not have to be '\0'-terminated, but strict_strtoul() and + * rpc_pton() require proper string termination to be successful. + * * Returns the size of the socket address if successful; otherwise * zero is returned. */ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, struct sockaddr *sap, const size_t salen) { - char *c, buf[RPCBIND_MAXUADDRLEN]; + char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; unsigned long portlo, porthi; unsigned short port; - if (uaddr_len > sizeof(buf)) + if (uaddr_len > RPCBIND_MAXUADDRLEN) return 0; memcpy(buf, uaddr, uaddr_len); - buf[uaddr_len] = '\n'; - buf[uaddr_len + 1] = '\0'; - + buf[uaddr_len] = '\0'; c = strrchr(buf, '.'); if (unlikely(c == NULL)) return 0; @@ -332,9 +325,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, if (unlikely(portlo > 255)) return 0; - c[0] = '\n'; - c[1] = '\0'; - + *c = '\0'; c = strrchr(buf, '.'); if (unlikely(c == NULL)) return 0; @@ -345,8 +336,7 @@ size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, port = (unsigned short)((porthi << 8) | portlo); - c[0] = '\0'; - + *c = '\0'; if (rpc_pton(buf, strlen(buf), sap, salen) == 0) return 0; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 54a4e042f10..f394fc190a4 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -123,16 +123,19 @@ rpcauth_unhash_cred_locked(struct rpc_cred *cred) clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); } -static void +static int rpcauth_unhash_cred(struct rpc_cred *cred) { spinlock_t *cache_lock; + int ret; cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); - if (atomic_read(&cred->cr_count) == 0) + ret = atomic_read(&cred->cr_count) == 0; + if (ret) rpcauth_unhash_cred_locked(cred); spin_unlock(cache_lock); + return ret; } /* @@ -332,9 +335,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, list_add_tail(&new->cr_lru, &free); spin_unlock(&cache->lock); found: - if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) - && cred->cr_ops->cr_init != NULL - && !(flags & RPCAUTH_LOOKUP_NEW)) { + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && + cred->cr_ops->cr_init != NULL && + !(flags & RPCAUTH_LOOKUP_NEW)) { int res = cred->cr_ops->cr_init(auth, cred); if (res < 0) { put_rpccred(cred); @@ -446,31 +449,35 @@ void put_rpccred(struct rpc_cred *cred) { /* Fast path for unhashed credentials */ - if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - goto need_lock; - - if (!atomic_dec_and_test(&cred->cr_count)) + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { + if (atomic_dec_and_test(&cred->cr_count)) + cred->cr_ops->crdestroy(cred); return; - goto out_destroy; -need_lock: + } + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; if (!list_empty(&cred->cr_lru)) { number_cred_unused--; list_del_init(&cred->cr_lru); } - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - rpcauth_unhash_cred(cred); if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { - cred->cr_expire = jiffies; - list_add_tail(&cred->cr_lru, &cred_unused); - number_cred_unused++; - spin_unlock(&rpc_credcache_lock); - return; + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + goto out_nodestroy; + } + if (!rpcauth_unhash_cred(cred)) { + /* We were hashed and someone looked us up... */ + goto out_nodestroy; + } } spin_unlock(&rpc_credcache_lock); -out_destroy: cred->cr_ops->crdestroy(cred); + return; +out_nodestroy: + spin_unlock(&rpc_credcache_lock); } EXPORT_SYMBOL_GPL(put_rpccred); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fc6a43ccd95..3c3c50f38a1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -304,7 +304,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) * to that upcall instead of adding the new upcall. */ static inline struct gss_upcall_msg * -gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) +gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_inode *rpci = gss_msg->inode; struct inode *inode = &rpci->vfs_inode; @@ -445,7 +445,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred); if (IS_ERR(gss_new)) return gss_new; - gss_msg = gss_add_msg(gss_auth, gss_new); + gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { struct inode *inode = &gss_new->inode->vfs_inode; int res = rpc_queue_upcall(inode, &gss_new->msg); @@ -485,7 +485,7 @@ gss_refresh_upcall(struct rpc_task *task) dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid); gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); - if (IS_ERR(gss_msg) == -EAGAIN) { + if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index f160be6c1a4..17562b4c35f 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -75,8 +75,8 @@ krb5_get_seq_num(struct crypto_blkcipher *key, if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) return code; - if ((plain[4] != plain[5]) || (plain[4] != plain[6]) - || (plain[4] != plain[7])) + if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || + (plain[4] != plain[7])) return (s32)KG_BAD_SEQ; *direction = plain[4]; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index f6c51e562a0..e34bc531fcb 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -105,8 +105,8 @@ static int rsi_match(struct cache_head *a, struct cache_head *b) { struct rsi *item = container_of(a, struct rsi, h); struct rsi *tmp = container_of(b, struct rsi, h); - return netobj_equal(&item->in_handle, &tmp->in_handle) - && netobj_equal(&item->in_token, &tmp->in_token); + return netobj_equal(&item->in_handle, &tmp->in_handle) && + netobj_equal(&item->in_token, &tmp->in_token); } static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index d6eee291a0e..39bddba53ba 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -401,9 +401,8 @@ static int cache_clean(void) for (; ch; cp= & ch->next, ch= *cp) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; - if (ch->expiry_time >= get_seconds() - && ch->last_refresh >= current_detail->flush_time - ) + if (ch->expiry_time >= get_seconds() && + ch->last_refresh >= current_detail->flush_time) continue; if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) cache_dequeue(current_detail, ch); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 38829e20500..154034b675b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,7 +79,7 @@ static void call_connect_status(struct rpc_task *task); static __be32 *rpc_encode_header(struct rpc_task *task); static __be32 *rpc_verify_header(struct rpc_task *task); -static int rpc_ping(struct rpc_clnt *clnt, int flags); +static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { @@ -340,7 +340,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) return clnt; if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { - int err = rpc_ping(clnt, RPC_TASK_SOFT); + int err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); @@ -528,7 +528,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; - err = rpc_ping(clnt, RPC_TASK_SOFT); + err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); clnt = ERR_PTR(err); @@ -1060,7 +1060,7 @@ call_bind_status(struct rpc_task *task) goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u remote rpcbind service unavailable\n", + dprintk("RPC: %5u unrecognized remote rpcbind service\n", task->tk_pid); break; case -EPROTONOSUPPORT: @@ -1069,6 +1069,21 @@ call_bind_status(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_bind; return; + case -ECONNREFUSED: /* connection problems */ + case -ECONNRESET: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EPIPE: + dprintk("RPC: %5u remote rpcbind unreachable: %d\n", + task->tk_pid, task->tk_status); + if (!RPC_IS_SOFTCONN(task)) { + rpc_delay(task, 5*HZ); + goto retry_timeout; + } + status = task->tk_status; + break; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); @@ -1180,11 +1195,25 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; + + /* + * Common case: success. Force the compiler to put this + * test first. + */ + if (task->tk_status == 0) { + xprt_end_transmit(task); + rpc_task_force_reencode(task); + return; + } + switch (task->tk_status) { case -EAGAIN: break; default: + dprint_status(task); xprt_end_transmit(task); + rpc_task_force_reencode(task); + break; /* * Special cases: if we've been waiting on the * socket's write_space() callback, or if the @@ -1192,11 +1221,16 @@ call_transmit_status(struct rpc_task *task) * then hold onto the transport lock. */ case -ECONNREFUSED: - case -ECONNRESET: - case -ENOTCONN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) { + xprt_end_transmit(task); + rpc_exit(task, task->tk_status); + break; + } + case -ECONNRESET: + case -ENOTCONN: case -EPIPE: rpc_task_force_reencode(task); } @@ -1346,6 +1380,10 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; + if (RPC_IS_SOFTCONN(task)) { + rpc_exit(task, -ETIMEDOUT); + return; + } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", @@ -1675,14 +1713,14 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -static int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, }; int err; msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); - err = rpc_call_sync(clnt, &msg, flags); + err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); put_rpccred(msg.rpc_cred); return err; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 830faf4d999..3e3772d8eb9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -20,6 +20,7 @@ #include <linux/in6.h> #include <linux/kernel.h> #include <linux/errno.h> +#include <linux/mutex.h> #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> @@ -110,6 +111,9 @@ static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; +static struct rpc_clnt * rpcb_local_clnt; +static struct rpc_clnt * rpcb_local_clnt4; + struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -163,21 +167,60 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, - size_t addrlen, u32 version) +static DEFINE_MUTEX(rpcb_create_local_mutex); + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) { struct rpc_create_args args = { - .protocol = XPRT_TRANSPORT_UDP, - .address = addr, - .addrsize = addrlen, + .protocol = XPRT_TRANSPORT_TCP, + .address = (struct sockaddr *)&rpcb_inaddr_loopback, + .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, - .version = version, + .version = RPCBVERS_2, .authflavor = RPC_AUTH_UNIX, .flags = RPC_CLNT_CREATE_NOPING, }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create local rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } - return rpc_create(&args); + /* + * This results in an RPC ping. On systems running portmapper, + * the v4 ping will fail. Proceed anyway, but disallow rpcb + * v4 upcalls. + */ + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to create local rpcbind v4 " + "cleint (errno %ld).\n", PTR_ERR(clnt4)); + clnt4 = NULL; + } + + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; + +out: + mutex_unlock(&rpcb_create_local_mutex); + return result; } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, @@ -209,22 +252,13 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(const u32 version, struct rpc_message *msg) +static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) { - struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; - size_t addrlen = sizeof(rpcb_inaddr_loopback); - struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(addr, addrlen, version); - if (!IS_ERR(rpcb_clnt)) { - error = rpc_call_sync(rpcb_clnt, msg, 0); - rpc_shutdown_client(rpcb_clnt); - } else - error = PTR_ERR(rpcb_clnt); - + error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -279,6 +313,11 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -288,7 +327,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_2, &msg); + return rpcb_register_call(rpcb_local_clnt, &msg); } /* @@ -313,7 +352,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -340,7 +379,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -356,7 +395,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(RPCBVERS_4, msg); + return rpcb_register_call(rpcb_local_clnt4, msg); } /** @@ -414,6 +453,13 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; + if (rpcb_local_clnt4 == NULL) + return -EPROTONOSUPPORT; if (address == NULL) return rpcb_unregister_all_protofamilies(&msg); @@ -491,7 +537,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi .rpc_message = &msg, .callback_ops = &rpcb_getport_ops, .callback_data = map, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN, }; return rpc_run_task(&task_setup_data); @@ -1027,3 +1073,15 @@ static struct rpc_program rpcb_program = { .version = rpcb_version, .stats = &rpcb_stats, }; + +/** + * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister + * + */ +void cleanup_rpcb_clnt(void) +{ + if (rpcb_local_clnt4) + rpc_shutdown_client(rpcb_local_clnt4); + if (rpcb_local_clnt) + rpc_shutdown_client(rpcb_local_clnt); +} diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8cce9218901..f438347d817 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -24,6 +24,8 @@ extern struct cache_detail ip_map_cache, unix_gid_cache; +extern void cleanup_rpcb_clnt(void); + static int __init init_sunrpc(void) { @@ -53,6 +55,7 @@ out: static void __exit cleanup_sunrpc(void) { + cleanup_rpcb_clnt(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 952f206ff30..538ca433a56 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1103,8 +1103,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; } - if (*statp == rpc_success && (xdr = procp->pc_encode) - && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { + if (*statp == rpc_success && + (xdr = procp->pc_encode) && + !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) { dprintk("svc: failed to encode reply\n"); /* serv->sv_stats->rpcsystemerr++; */ *statp = rpc_system_err; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index df124f78ee4..b845e2293df 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -129,8 +129,8 @@ static void svc_xprt_free(struct kref *kref) struct svc_xprt *xprt = container_of(kref, struct svc_xprt, xpt_ref); struct module *owner = xprt->xpt_class->xcl_owner; - if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) - && xprt->xpt_auth_cache != NULL) + if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) && + xprt->xpt_auth_cache != NULL) svcauth_unix_info_release(xprt->xpt_auth_cache); xprt->xpt_ops->xpo_free(xprt); module_put(owner); @@ -846,8 +846,8 @@ static void svc_age_temp_xprts(unsigned long closure) * through, close it. */ if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) continue; - if (atomic_read(&xprt->xpt_ref.refcount) > 1 - || test_bit(XPT_BUSY, &xprt->xpt_flags)) + if (atomic_read(&xprt->xpt_ref.refcount) > 1 || + test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; svc_xprt_get(xprt); list_move(le, &to_be_aged); diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index e64109b02ae..4e9393c2468 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -46,8 +46,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) dprintk("svc: svc_authenticate (%d)\n", flavor); spin_lock(&authtab_lock); - if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) - || !try_module_get(aops->owner)) { + if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor]) || + !try_module_get(aops->owner)) { spin_unlock(&authtab_lock); *authp = rpc_autherr_badcred; return SVC_DENIED; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 117f68a8aa4..4a8f6558718 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -125,8 +125,8 @@ static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { struct ip_map *orig = container_of(corig, struct ip_map, h); struct ip_map *new = container_of(cnew, struct ip_map, h); - return strcmp(orig->m_class, new->m_class) == 0 - && ipv6_addr_equal(&orig->m_addr, &new->m_addr); + return strcmp(orig->m_class, new->m_class) == 0 && + ipv6_addr_equal(&orig->m_addr, &new->m_addr); } static void ip_map_init(struct cache_head *cnew, struct cache_head *citem) { @@ -686,8 +686,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) case AF_INET: sin = svc_addr_in(rqstp); sin6 = &sin6_storage; - ipv6_addr_set(&sin6->sin6_addr, 0, 0, - htonl(0x0000FFFF), sin->sin_addr.s_addr); + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr); break; case AF_INET6: sin6 = svc_addr_in6(rqstp); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1c246a4f491..870929e08e5 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -272,14 +272,14 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) case PF_INET: len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", proto_name, - &inet_sk(sk)->rcv_saddr, - inet_sk(sk)->num); + &inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); break; case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", proto_name, &inet6_sk(sk)->rcv_saddr, - inet_sk(sk)->num); + inet_sk(sk)->inet_num); break; default: len = snprintf(buf, remaining, "*unknown-%d*\n", @@ -1311,7 +1311,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, - ntohs(inet_sk(inet)->sport)); + ntohs(inet_sk(inet)->inet_sport)); if (*errp < 0) { kfree(svsk); diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 42f9748ae09..e65dcc61333 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -139,46 +139,45 @@ static ctl_table debug_table[] = { .data = &rpc_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "nfs_debug", .data = &nfs_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "nfsd_debug", .data = &nfsd_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "nlm_debug", .data = &nlm_debug, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dodebug + .proc_handler = proc_dodebug }, { .procname = "transports", .maxlen = 256, .mode = 0444, - .proc_handler = &proc_do_xprt, + .proc_handler = proc_do_xprt, }, - { .ctl_name = 0 } + { } }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = debug_table }, - { .ctl_name = 0 } + { } }; #endif diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fd46d42afa8..469de292c23 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -700,6 +700,10 @@ void xprt_connect(struct rpc_task *task) } if (!xprt_lock_write(xprt, task)) return; + + if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) + xprt->ops->close(xprt); + if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 35fb68b9c8e..5b8a8ff93a2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -120,8 +120,7 @@ static ctl_table svcrdma_parm_table[] = { .data = &svcrdma_max_requests, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_max_requests, .extra2 = &max_max_requests }, @@ -130,8 +129,7 @@ static ctl_table svcrdma_parm_table[] = { .data = &svcrdma_max_req_size, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_max_inline, .extra2 = &max_max_inline }, @@ -140,8 +138,7 @@ static ctl_table svcrdma_parm_table[] = { .data = &svcrdma_ord, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_ord, .extra2 = &max_ord, }, @@ -151,67 +148,65 @@ static ctl_table svcrdma_parm_table[] = { .data = &rdma_stat_read, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_recv", .data = &rdma_stat_recv, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_write", .data = &rdma_stat_write, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_sq_starve", .data = &rdma_stat_sq_starve, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_rq_starve", .data = &rdma_stat_rq_starve, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_rq_poll", .data = &rdma_stat_rq_poll, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_rq_prod", .data = &rdma_stat_rq_prod, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_sq_poll", .data = &rdma_stat_sq_poll, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, + .proc_handler = read_reset_stat, }, { .procname = "rdma_stat_sq_prod", .data = &rdma_stat_sq_prod, .maxlen = sizeof(atomic_t), .mode = 0644, - .proc_handler = &read_reset_stat, - }, - { - .ctl_name = 0, + .proc_handler = read_reset_stat, }, + { }, }; static ctl_table svcrdma_table[] = { @@ -220,21 +215,16 @@ static ctl_table svcrdma_table[] = { .mode = 0555, .child = svcrdma_parm_table }, - { - .ctl_name = 0, - }, + { }, }; static ctl_table svcrdma_root_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = svcrdma_table }, - { - .ctl_name = 0, - }, + { }, }; void svc_rdma_cleanup(void) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 9e884383134..f92e37eb413 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -337,10 +337,9 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) { - if ((RDMA_TRANSPORT_IWARP == - rdma_node_get_transport(xprt->sc_cm_id-> - device->node_type)) - && sge_count > 1) + if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == + RDMA_TRANSPORT_IWARP) && + sge_count > 1) return 1; else return min_t(int, sge_count, xprt->sc_max_sge); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index f11be72a1a8..b15e1ebb2bf 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -54,7 +54,7 @@ * Assumptions: * - head[0] is physically contiguous. * - tail[0] is physically contiguous. - * - pages[] is not physically or virtually contigous and consists of + * - pages[] is not physically or virtually contiguous and consists of * PAGE_SIZE elements. * * Output: diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9a63f669ece..7018eef1dcd 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -86,79 +86,63 @@ static struct ctl_table_header *sunrpc_table_header; static ctl_table xr_tunables_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_read", .data = &xprt_rdma_max_inline_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_max_inline_write", .data = &xprt_rdma_max_inline_write, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_inline_write_padding", .data = &xprt_rdma_inline_write_padding, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &max_padding, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_memreg_strategy", .data = &xprt_rdma_memreg_strategy, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_memreg, .extra2 = &max_memreg, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "rdma_pad_optimize", .data = &xprt_rdma_pad_optimize, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = 0, + .proc_handler = proc_dointvec, }, + { }, }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xr_tunables_table }, - { - .ctl_name = 0, - }, + { }, }; #endif diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 465aafc2007..2209aa87d89 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -878,8 +878,8 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { * others indicate a transport condition which has already * undergone a best-effort. */ - if (ep->rep_connected == -ECONNREFUSED - && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { + if (ep->rep_connected == -ECONNREFUSED && + ++retry_count <= RDMA_CONNECT_RETRY_MAX) { dprintk("RPC: %s: non-peer_reject, retry\n", __func__); goto retry; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475ba25..3d739e5d15d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -81,46 +81,38 @@ static struct ctl_table_header *sunrpc_table_header; */ static ctl_table xs_tunables_table[] = { { - .ctl_name = CTL_SLOTTABLE_UDP, .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_SLOTTABLE_TCP, .procname = "tcp_slot_table_entries", .data = &xprt_tcp_slot_table_entries, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &min_slot_table_size, .extra2 = &max_slot_table_size }, { - .ctl_name = CTL_MIN_RESVPORT, .procname = "min_resvport", .data = &xprt_min_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { - .ctl_name = CTL_MAX_RESVPORT, .procname = "max_resvport", .data = &xprt_max_resvport, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, @@ -129,24 +121,18 @@ static ctl_table xs_tunables_table[] = { .data = &xs_tcp_fin_timeout, .maxlen = sizeof(xs_tcp_fin_timeout), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = sysctl_jiffies - }, - { - .ctl_name = 0, + .proc_handler = proc_dointvec_jiffies, }, + { }, }; static ctl_table sunrpc_table[] = { { - .ctl_name = CTL_SUNRPC, .procname = "sunrpc", .mode = 0555, .child = xs_tunables_table }, - { - .ctl_name = 0, - }, + { }, }; #endif @@ -2033,7 +2019,7 @@ static void xs_connect(struct rpc_task *task) if (xprt_test_and_set_connecting(xprt)) return; - if (transport->sock != NULL) { + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index 689fdefe9d0..a7eac00cd36 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -437,11 +437,11 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf) break; case ROUTE_ADDITION: if (!is_slave(tipc_own_addr)) { - assert(!in_own_cluster(c_ptr->addr) - || is_slave(rem_node)); + assert(!in_own_cluster(c_ptr->addr) || + is_slave(rem_node)); } else { - assert(in_own_cluster(c_ptr->addr) - && !is_slave(rem_node)); + assert(in_own_cluster(c_ptr->addr) && + !is_slave(rem_node)); } n_ptr = c_ptr->nodes[tipc_node(rem_node)]; if (!n_ptr) @@ -451,11 +451,11 @@ void tipc_cltr_recv_routing_table(struct sk_buff *buf) break; case ROUTE_REMOVAL: if (!is_slave(tipc_own_addr)) { - assert(!in_own_cluster(c_ptr->addr) - || is_slave(rem_node)); + assert(!in_own_cluster(c_ptr->addr) || + is_slave(rem_node)); } else { - assert(in_own_cluster(c_ptr->addr) - && !is_slave(rem_node)); + assert(in_own_cluster(c_ptr->addr) && + !is_slave(rem_node)); } n_ptr = c_ptr->nodes[tipc_node(rem_node)]; if (n_ptr) diff --git a/net/tipc/link.c b/net/tipc/link.c index dd4c18b9a35..6f50f6423f6 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -378,8 +378,8 @@ static void link_timeout(struct link *l_ptr) struct tipc_msg *msg = buf_msg(l_ptr->first_out); u32 length = msg_size(msg); - if ((msg_user(msg) == MSG_FRAGMENTER) - && (msg_type(msg) == FIRST_FRAGMENT)) { + if ((msg_user(msg) == MSG_FRAGMENTER) && + (msg_type(msg) == FIRST_FRAGMENT)) { length = msg_size(msg_get_wrapped(msg)); } if (length) { @@ -2788,8 +2788,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, /* Is there an incomplete message waiting for this fragment? */ - while (pbuf && ((msg_seqno(buf_msg(pbuf)) != long_msg_seq_no) - || (msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) { + while (pbuf && ((msg_seqno(buf_msg(pbuf)) != long_msg_seq_no) || + (msg_orignode(fragm) != msg_orignode(buf_msg(pbuf))))) { prev = pbuf; pbuf = pbuf->next; } @@ -3325,8 +3325,8 @@ static void link_print(struct link *l_ptr, struct print_buf *buf, (l_ptr->last_out)), l_ptr->out_queue_size); if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) - msg_seqno(buf_msg(l_ptr->first_out))) - != (l_ptr->out_queue_size - 1)) - || (l_ptr->last_out->next != NULL)) { + != (l_ptr->out_queue_size - 1)) || + (l_ptr->last_out->next != NULL)) { tipc_printf(buf, "\nSend queue inconsistency\n"); tipc_printf(buf, "first_out= %x ", l_ptr->first_out); tipc_printf(buf, "next_out= %x ", l_ptr->next_out); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e6d9abf7440..1ea64f09cc4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -177,6 +177,7 @@ static void reject_rx_queue(struct sock *sk) * @net: network namespace (must be default network) * @sock: pre-allocated socket structure * @protocol: protocol indicator (must be 0) + * @kern: caused by kernel or by userspace? * * This routine creates additional data structures used by the TIPC socket, * initializes them, and links them together. @@ -184,7 +185,8 @@ static void reject_rx_queue(struct sock *sk) * Returns 0 on success, errno otherwise */ -static int tipc_create(struct net *net, struct socket *sock, int protocol) +static int tipc_create(struct net *net, struct socket *sock, int protocol, + int kern) { const struct proto_ops *ops; socket_state state; @@ -193,7 +195,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol) /* Validate arguments */ - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (unlikely(protocol != 0)) @@ -1134,13 +1136,11 @@ restart: /* Loop around if more data is required */ - if ((sz_copied < buf_len) /* didn't get all requested data */ - && (!skb_queue_empty(&sk->sk_receive_queue) || - (flags & MSG_WAITALL)) - /* ... and more is ready or required */ - && (!(flags & MSG_PEEK)) /* ... and aren't just peeking at data */ - && (!err) /* ... and haven't reached a FIN */ - ) + if ((sz_copied < buf_len) && /* didn't get all requested data */ + (!skb_queue_empty(&sk->sk_receive_queue) || + (flags & MSG_WAITALL)) && /* and more is ready or required */ + (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ + (!err)) /* and haven't reached a FIN */ goto restart; exit: @@ -1528,7 +1528,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_create(sock_net(sock->sk), new_sock, 0); + res = tipc_create(sock_net(sock->sk), new_sock, 0, 0); if (!res) { struct sock *new_sk = new_sock->sk; struct tipc_sock *new_tsock = tipc_sk(new_sk); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0747d8a9232..ac91f0dfa14 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -364,9 +364,9 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s, sub->seq.upper = htohl(s->seq.upper, swap); sub->timeout = htohl(s->timeout, swap); sub->filter = htohl(s->filter, swap); - if ((!(sub->filter & TIPC_SUB_PORTS) - == !(sub->filter & TIPC_SUB_SERVICE)) - || (sub->seq.lower > sub->seq.upper)) { + if ((!(sub->filter & TIPC_SUB_PORTS) == + !(sub->filter & TIPC_SUB_SERVICE)) || + (sub->seq.lower > sub->seq.upper)) { warn("Subscription rejected, illegal request\n"); kfree(sub); subscr_terminate(subscriber); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fc820cd7545..f2551190311 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -621,7 +621,8 @@ out: return sk; } -static int unix_create(struct net *net, struct socket *sock, int protocol) +static int unix_create(struct net *net, struct socket *sock, int protocol, + int kern) { if (protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; @@ -1032,8 +1033,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; addr_len = err; - if (test_bit(SOCK_PASSCRED, &sock->flags) - && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr && + (err = unix_autobind(sock)) != 0) goto out; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); @@ -1258,7 +1259,7 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ { struct sock *sk = sock->sk; struct unix_sock *u; - struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; if (peer) { @@ -1377,8 +1378,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (test_bit(SOCK_PASSCRED, &sock->flags) - && !u->addr && (err = unix_autobind(sock)) != 0) + if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr + && (err = unix_autobind(sock)) != 0) goto out; err = -EMSGSIZE; @@ -2216,7 +2217,7 @@ static const struct file_operations unix_seq_fops = { #endif -static struct net_proto_family unix_family_ops = { +static const struct net_proto_family unix_family_ops = { .family = PF_UNIX, .create = unix_create, .owner = THIS_MODULE, diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 83c093077eb..708f5df6b7f 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -16,19 +16,18 @@ static ctl_table unix_table[] = { { - .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, .procname = "max_dgram_qlen", .data = &init_net.unx.sysctl_max_dgram_qlen, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { .ctl_name = 0 } + { } }; static struct ctl_path unix_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "unix", .ctl_name = NET_UNIX, }, + { .procname = "net", }, + { .procname = "unix", }, { }, }; diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index d631a17186b..d3bfb6ef13a 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -388,6 +388,8 @@ int wimax_gnl_doit_msg_from_user(struct sk_buff *skb, struct genl_info *info) } mutex_lock(&wimax_dev->mutex); result = wimax_dev_is_ready(wimax_dev); + if (result == -ENOMEDIUM) + result = 0; if (result < 0) goto error_not_ready; result = -ENOSYS; diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c index ca269178c4d..35f370091f4 100644 --- a/net/wimax/op-reset.c +++ b/net/wimax/op-reset.c @@ -62,7 +62,7 @@ * Called when wanting to reset the device for any reason. Device is * taken back to power on status. * - * This call blocks; on succesful return, the device has completed the + * This call blocks; on successful return, the device has completed the * reset process and is ready to operate. */ int wimax_reset(struct wimax_dev *wimax_dev) diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c index 70ef4df863b..ae752a64d92 100644 --- a/net/wimax/op-rfkill.c +++ b/net/wimax/op-rfkill.c @@ -107,8 +107,8 @@ void wimax_report_rfkill_hw(struct wimax_dev *wimax_dev, if (state != wimax_dev->rf_hw) { wimax_dev->rf_hw = state; - if (wimax_dev->rf_hw == WIMAX_RF_ON - && wimax_dev->rf_sw == WIMAX_RF_ON) + if (wimax_dev->rf_hw == WIMAX_RF_ON && + wimax_dev->rf_sw == WIMAX_RF_ON) wimax_state = WIMAX_ST_READY; else wimax_state = WIMAX_ST_RADIO_OFF; @@ -163,8 +163,8 @@ void wimax_report_rfkill_sw(struct wimax_dev *wimax_dev, if (state != wimax_dev->rf_sw) { wimax_dev->rf_sw = state; - if (wimax_dev->rf_hw == WIMAX_RF_ON - && wimax_dev->rf_sw == WIMAX_RF_ON) + if (wimax_dev->rf_hw == WIMAX_RF_ON && + wimax_dev->rf_sw == WIMAX_RF_ON) wimax_state = WIMAX_ST_READY; else wimax_state = WIMAX_ST_RADIO_OFF; @@ -305,8 +305,15 @@ int wimax_rfkill(struct wimax_dev *wimax_dev, enum wimax_rf_state state) d_fnstart(3, dev, "(wimax_dev %p state %u)\n", wimax_dev, state); mutex_lock(&wimax_dev->mutex); result = wimax_dev_is_ready(wimax_dev); - if (result < 0) + if (result < 0) { + /* While initializing, < 1.4.3 wimax-tools versions use + * this call to check if the device is a valid WiMAX + * device; so we allow it to proceed always, + * considering the radios are all off. */ + if (result == -ENOMEDIUM && state == WIMAX_RF_QUERY) + result = WIMAX_RF_OFF << 1 | WIMAX_RF_OFF; goto error_not_ready; + } switch (state) { case WIMAX_RF_ON: case WIMAX_RF_OFF: @@ -355,6 +362,7 @@ int wimax_rfkill_add(struct wimax_dev *wimax_dev) wimax_dev->rfkill = rfkill; + rfkill_init_sw_state(rfkill, 1); result = rfkill_register(wimax_dev->rfkill); if (result < 0) goto error_rfkill_register; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 79fb7d7c640..c8866412f83 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -60,6 +60,14 @@ #define D_SUBMODULE stack #include "debug-levels.h" +static char wimax_debug_params[128]; +module_param_string(debug, wimax_debug_params, sizeof(wimax_debug_params), + 0644); +MODULE_PARM_DESC(debug, + "String of space-separated NAME:VALUE pairs, where NAMEs " + "are the different debug submodules and VALUE are the " + "initial debug value to set."); + /* * Authoritative source for the RE_STATE_CHANGE attribute policy * @@ -562,6 +570,9 @@ int __init wimax_subsys_init(void) int result, cnt; d_fnstart(4, NULL, "()\n"); + d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params, + "wimax.debug"); + snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), "WiMAX"); result = genl_register_family(&wimax_gnl_family); diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index abf7ca3f9ff..90e93a5701a 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,3 +1,21 @@ +config WIRELESS_EXT + bool + +config WEXT_CORE + def_bool y + depends on CFG80211_WEXT || WIRELESS_EXT + +config WEXT_PROC + def_bool y + depends on PROC_FS + depends on WEXT_CORE + +config WEXT_SPY + bool + +config WEXT_PRIV + bool + config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL @@ -67,14 +85,10 @@ config CFG80211_DEFAULT_PS applications instead -- they need to register their network latency requirement, see Documentation/power/pm_qos_interface.txt. -config CFG80211_DEFAULT_PS_VALUE - int - default 1 if CFG80211_DEFAULT_PS - default 0 - config CFG80211_DEBUGFS bool "cfg80211 DebugFS entries" - depends on CFG80211 && DEBUG_FS + depends on CFG80211 + depends on DEBUG_FS ---help--- You can enable this if you want to debugfs entries for cfg80211. @@ -83,6 +97,7 @@ config CFG80211_DEBUGFS config WIRELESS_OLD_REGULATORY bool "Old wireless static regulatory definitions" default n + depends on CFG80211 ---help--- This option enables the old static regulatory information and uses it within the new framework. This option is available @@ -94,20 +109,19 @@ config WIRELESS_OLD_REGULATORY Say N and if you say Y, please tell us why. The default is N. -config WIRELESS_EXT - bool "Wireless extensions" +config CFG80211_WEXT + bool "cfg80211 wireless extensions compatibility" + depends on CFG80211 + select WEXT_CORE default y - ---help--- - This option enables the legacy wireless extensions - (wireless network interface configuration via ioctls.) - - Say Y unless you've upgraded all your userspace to use - nl80211 instead of wireless extensions. + help + Enable this option if you need old userspace for wireless + extensions with cfg80211-based drivers. config WIRELESS_EXT_SYSFS bool "Wireless extensions sysfs files" default y - depends on WIRELESS_EXT && SYSFS + depends on WEXT_CORE && SYSFS help This option enables the deprecated wireless statistics files in /sys/class/net/*/wireless/. The same information diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 3ecaa917997..f07c8dc7aab 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,13 +1,17 @@ -obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o obj-$(CONFIG_LIB80211) += lib80211.o obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o +obj-$(CONFIG_WEXT_CORE) += wext-core.o +obj-$(CONFIG_WEXT_PROC) += wext-proc.o +obj-$(CONFIG_WEXT_SPY) += wext-spy.o +obj-$(CONFIG_WEXT_PRIV) += wext-priv.o + cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o -cfg80211-y += mlme.o ibss.o sme.o chan.o +cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o -cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o wext-sme.o +cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/wireless/core.c b/net/wireless/core.c index a595f712b5b..c2a2c563d21 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -22,6 +22,7 @@ #include "sysfs.h" #include "debugfs.h" #include "wext-compat.h" +#include "ethtool.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" @@ -44,6 +45,9 @@ DEFINE_MUTEX(cfg80211_mutex); /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; +/* for the cleanup, scan and event works */ +struct workqueue_struct *cfg80211_wq; + /* requires cfg80211_mutex to be held! */ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { @@ -230,7 +234,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; int err = 0; - if (!rdev->wiphy.netnsok) + if (!(rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK)) return -EOPNOTSUPP; list_for_each_entry(wdev, &rdev->netdev_list, list) { @@ -359,11 +363,17 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); +#ifdef CONFIG_CFG80211_WEXT + rdev->wiphy.wext = &cfg80211_wext_handler; +#endif + device_initialize(&rdev->wiphy.dev); rdev->wiphy.dev.class = &ieee80211_class; rdev->wiphy.dev.platform_data = rdev; - rdev->wiphy.ps_default = CONFIG_CFG80211_DEFAULT_PS_VALUE; +#ifdef CONFIG_CFG80211_DEFAULT_PS + rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; +#endif wiphy_net_set(&rdev->wiphy, &init_net); @@ -478,7 +488,7 @@ int wiphy_register(struct wiphy *wiphy) if (IS_ERR(rdev->wiphy.debugfsdir)) rdev->wiphy.debugfsdir = NULL; - if (wiphy->custom_regulatory) { + if (wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) { struct regulatory_request request; request.wiphy_idx = get_wiphy_idx(wiphy); @@ -542,7 +552,7 @@ void wiphy_unregister(struct wiphy *wiphy) * First remove the hardware from everywhere, this makes * it impossible to find from userspace. */ - cfg80211_debugfs_rdev_del(rdev); + debugfs_remove_recursive(rdev->wiphy.debugfsdir); list_del(&rdev->list); /* @@ -565,7 +575,6 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); - debugfs_remove(rdev->wiphy.debugfsdir); mutex_unlock(&cfg80211_mutex); @@ -626,6 +635,10 @@ static void wdev_cleanup_work(struct work_struct *work) dev_put(wdev->netdev); } +static struct device_type wiphy_type = { + .name = "wlan", +}; + static int cfg80211_netdev_notifier_call(struct notifier_block * nb, unsigned long state, void *ndev) @@ -642,6 +655,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, WARN_ON(wdev->iftype == NL80211_IFTYPE_UNSPECIFIED); switch (state) { + case NETDEV_POST_INIT: + SET_NETDEV_DEVTYPE(dev, &wiphy_type); + break; case NETDEV_REGISTER: /* * NB: cannot take rdev->mtx here because this may be @@ -666,13 +682,14 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->netdev = dev; wdev->sme_state = CFG80211_SME_IDLE; mutex_unlock(&rdev->devlist_mtx); -#ifdef CONFIG_WIRELESS_EXT - if (!dev->wireless_handlers) - dev->wireless_handlers = &cfg80211_wext_handler; +#ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; - wdev->wext.ps = wdev->wiphy->ps_default; + if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) + wdev->wext.ps = true; + else + wdev->wext.ps = false; wdev->wext.ps_timeout = 100; if (rdev->ops->set_power_mgmt) if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, @@ -682,6 +699,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->wext.ps = false; } #endif + if (!dev->ethtool_ops) + dev->ethtool_ops = &cfg80211_ethtool_ops; + + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) + dev->priv_flags |= IFF_DONT_BRIDGE; break; case NETDEV_GOING_DOWN: switch (wdev->iftype) { @@ -690,7 +713,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, break; case NL80211_IFTYPE_STATION: wdev_lock(wdev); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.ie); wdev->wext.ie = NULL; wdev->wext.ie_len = 0; @@ -707,7 +730,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, break; case NETDEV_DOWN: dev_hold(dev); - schedule_work(&wdev->cleanup_work); + queue_work(cfg80211_wq, &wdev->cleanup_work); break; case NETDEV_UP: /* @@ -722,7 +745,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, mutex_unlock(&rdev->devlist_mtx); dev_put(dev); } -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); @@ -760,7 +783,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_init(&wdev->list); rdev->devlist_generation++; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.keys); #endif } @@ -825,8 +848,14 @@ static int __init cfg80211_init(void) if (err) goto out_fail_reg; + cfg80211_wq = create_singlethread_workqueue("cfg80211"); + if (!cfg80211_wq) + goto out_fail_wq; + return 0; +out_fail_wq: + regulatory_exit(); out_fail_reg: debugfs_remove(ieee80211_debugfs_dir); out_fail_nl80211: @@ -848,5 +877,6 @@ static void cfg80211_exit(void) wiphy_sysfs_exit(); regulatory_exit(); unregister_pernet_device(&cfg80211_pernet_ops); + destroy_workqueue(cfg80211_wq); } module_exit(cfg80211_exit); diff --git a/net/wireless/core.h b/net/wireless/core.h index 68b321997d4..4ef3efc9410 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -72,17 +72,6 @@ struct cfg80211_registered_device { /* current channel */ struct ieee80211_channel *channel; -#ifdef CONFIG_CFG80211_DEBUGFS - /* Debugfs entries */ - struct wiphy_debugfsdentries { - struct dentry *rts_threshold; - struct dentry *fragmentation_threshold; - struct dentry *short_retry_limit; - struct dentry *long_retry_limit; - struct dentry *ht40allow_map; - } debugfs; -#endif - /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); @@ -102,6 +91,8 @@ bool wiphy_idx_valid(int wiphy_idx) return (wiphy_idx >= 0); } + +extern struct workqueue_struct *cfg80211_wq; extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; @@ -284,6 +275,8 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct cfg80211_ibss_params *params, struct cfg80211_cached_keys *connkeys); void cfg80211_clear_ibss(struct net_device *dev, bool nowext); +int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid); diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 13d93d84f90..2e489561503 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -104,11 +104,7 @@ static const struct file_operations ht40allow_map_ops = { }; #define DEBUGFS_ADD(name) \ - rdev->debugfs.name = debugfs_create_file(#name, S_IRUGO, phyd, \ - &rdev->wiphy, &name## _ops); -#define DEBUGFS_DEL(name) \ - debugfs_remove(rdev->debugfs.name); \ - rdev->debugfs.name = NULL; + debugfs_create_file(#name, S_IRUGO, phyd, &rdev->wiphy, &name## _ops); void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) { @@ -120,12 +116,3 @@ void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) DEBUGFS_ADD(long_retry_limit); DEBUGFS_ADD(ht40allow_map); } - -void cfg80211_debugfs_rdev_del(struct cfg80211_registered_device *rdev) -{ - DEBUGFS_DEL(rts_threshold); - DEBUGFS_DEL(fragmentation_threshold); - DEBUGFS_DEL(short_retry_limit); - DEBUGFS_DEL(long_retry_limit); - DEBUGFS_DEL(ht40allow_map); -} diff --git a/net/wireless/debugfs.h b/net/wireless/debugfs.h index 6419b6d6ce3..74fdd381142 100644 --- a/net/wireless/debugfs.h +++ b/net/wireless/debugfs.h @@ -3,12 +3,9 @@ #ifdef CONFIG_CFG80211_DEBUGFS void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev); -void cfg80211_debugfs_rdev_del(struct cfg80211_registered_device *rdev); #else static inline void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) {} -static inline -void cfg80211_debugfs_rdev_del(struct cfg80211_registered_device *rdev) {} #endif #endif /* __CFG80211_DEBUGFS_H */ diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c new file mode 100644 index 00000000000..ca4c825be93 --- /dev/null +++ b/net/wireless/ethtool.c @@ -0,0 +1,45 @@ +#include <linux/utsname.h> +#include <net/cfg80211.h> +#include "ethtool.h" + +static void cfg80211_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name, + sizeof(info->driver)); + + strlcpy(info->version, init_utsname()->release, sizeof(info->version)); + + if (wdev->wiphy->fw_version[0]) + strncpy(info->fw_version, wdev->wiphy->fw_version, + sizeof(info->fw_version)); + else + strncpy(info->fw_version, "N/A", sizeof(info->fw_version)); + + strlcpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)), + sizeof(info->bus_info)); +} + +static int cfg80211_get_regs_len(struct net_device *dev) +{ + /* For now, return 0... */ + return 0; +} + +static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *data) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + regs->version = wdev->wiphy->hw_version; + regs->len = 0; +} + +const struct ethtool_ops cfg80211_ethtool_ops = { + .get_drvinfo = cfg80211_get_drvinfo, + .get_regs_len = cfg80211_get_regs_len, + .get_regs = cfg80211_get_regs, + .get_link = ethtool_op_get_link, +}; diff --git a/net/wireless/ethtool.h b/net/wireless/ethtool.h new file mode 100644 index 00000000000..695ecad20bd --- /dev/null +++ b/net/wireless/ethtool.h @@ -0,0 +1,6 @@ +#ifndef __CFG80211_ETHTOOL__ +#define __CFG80211_ETHTOOL__ + +extern const struct ethtool_ops cfg80211_ethtool_ops; + +#endif /* __CFG80211_ETHTOOL__ */ diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index c8833891197..6ef5a491fb4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -15,7 +15,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -44,7 +44,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, GFP_KERNEL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); @@ -70,7 +70,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp) spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); - schedule_work(&rdev->event_work); + queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_ibss_joined); @@ -96,7 +96,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, kfree(wdev->connect_keys); wdev->connect_keys = connkeys; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.channel = params->channel; #endif err = rdev->ops->join_ibss(&rdev->wiphy, dev, params); @@ -154,7 +154,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) wdev->current_bss = NULL; wdev->ssid_len = 0; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!nowext) wdev->wext.ibss.ssid_len = 0; #endif @@ -169,8 +169,8 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext) wdev_unlock(wdev); } -static int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool nowext) +int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -203,7 +203,7 @@ int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, return err; } -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 0a6b7a0eca6..1001db4912f 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -62,7 +62,6 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) u8 *ie = mgmt->u.assoc_resp.variable; int i, ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); struct cfg80211_internal_bss *bss = NULL; - bool need_connect_result = true; wdev_lock(wdev); @@ -97,7 +96,6 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) WARN_ON(!bss); } else if (wdev->conn) { cfg80211_sme_failed_assoc(wdev); - need_connect_result = false; /* * do not call connect_result() now because the * sme will schedule work that does it later. @@ -130,7 +128,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_rx_assoc); -static void __cfg80211_send_deauth(struct net_device *dev, +void __cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -139,7 +137,6 @@ static void __cfg80211_send_deauth(struct net_device *dev, struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; const u8 *bssid = mgmt->bssid; int i; - bool done = false; ASSERT_WDEV_LOCK(wdev); @@ -147,7 +144,6 @@ static void __cfg80211_send_deauth(struct net_device *dev, if (wdev->current_bss && memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0) { - done = true; cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(&wdev->current_bss->pub); wdev->current_bss = NULL; @@ -157,7 +153,6 @@ static void __cfg80211_send_deauth(struct net_device *dev, cfg80211_unhold_bss(wdev->auth_bsses[i]); cfg80211_put_bss(&wdev->auth_bsses[i]->pub); wdev->auth_bsses[i] = NULL; - done = true; break; } if (wdev->authtry_bsses[i] && @@ -165,13 +160,10 @@ static void __cfg80211_send_deauth(struct net_device *dev, cfg80211_unhold_bss(wdev->authtry_bsses[i]); cfg80211_put_bss(&wdev->authtry_bsses[i]->pub); wdev->authtry_bsses[i] = NULL; - done = true; break; } } - WARN_ON(!done); - if (wdev->sme_state == CFG80211_SME_CONNECTED) { u16 reason_code; bool from_ap; @@ -186,27 +178,19 @@ static void __cfg80211_send_deauth(struct net_device *dev, false, NULL); } } +EXPORT_SYMBOL(__cfg80211_send_deauth); - -void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, - void *cookie) +void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - BUG_ON(cookie && wdev != cookie); - - if (cookie) { - /* called within callback */ - __cfg80211_send_deauth(dev, buf, len); - } else { - wdev_lock(wdev); - __cfg80211_send_deauth(dev, buf, len); - wdev_unlock(wdev); - } + wdev_lock(wdev); + __cfg80211_send_deauth(dev, buf, len); + wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_deauth); -static void __cfg80211_send_disassoc(struct net_device *dev, +void __cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -247,40 +231,24 @@ static void __cfg80211_send_disassoc(struct net_device *dev, from_ap = memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0; __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); } +EXPORT_SYMBOL(__cfg80211_send_disassoc); -void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, - void *cookie) +void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - BUG_ON(cookie && wdev != cookie); - - if (cookie) { - /* called within callback */ - __cfg80211_send_disassoc(dev, buf, len); - } else { - wdev_lock(wdev); - __cfg80211_send_disassoc(dev, buf, len); - wdev_unlock(wdev); - } + wdev_lock(wdev); + __cfg80211_send_disassoc(dev, buf, len); + wdev_unlock(wdev); } EXPORT_SYMBOL(cfg80211_send_disassoc); -void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +static void __cfg80211_auth_remove(struct wireless_dev *wdev, const u8 *addr) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); int i; bool done = false; - wdev_lock(wdev); - - nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + ASSERT_WDEV_LOCK(wdev); for (i = 0; addr && i < MAX_AUTH_BSSES; i++) { if (wdev->authtry_bsses[i] && @@ -295,6 +263,29 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) } WARN_ON(!done); +} + +void __cfg80211_auth_canceled(struct net_device *dev, const u8 *addr) +{ + __cfg80211_auth_remove(dev->ieee80211_ptr, addr); +} +EXPORT_SYMBOL(__cfg80211_auth_canceled); + +void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + wdev_lock(wdev); + + nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); + if (wdev->sme_state == CFG80211_SME_CONNECTING) + __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, + WLAN_STATUS_UNSPECIFIED_FAILURE, + false, NULL); + + __cfg80211_auth_remove(wdev, addr); wdev_unlock(wdev); } @@ -340,7 +331,7 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; char *buf = kmalloc(128, gfp); @@ -469,12 +460,23 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct cfg80211_assoc_request req; struct cfg80211_internal_bss *bss; int i, err, slot = -1; + bool was_connected = false; ASSERT_WDEV_LOCK(wdev); memset(&req, 0, sizeof(req)); - if (wdev->current_bss) + if (wdev->current_bss && prev_bssid && + memcmp(wdev->current_bss->pub.bssid, prev_bssid, ETH_ALEN) == 0) { + /* + * Trying to reassociate: Allow this to proceed and let the old + * association to be dropped when the new one is completed. + */ + if (wdev->sme_state == CFG80211_SME_CONNECTED) { + was_connected = true; + wdev->sme_state = CFG80211_SME_CONNECTING; + } + } else if (wdev->current_bss) return -EALREADY; req.ie = ie; @@ -484,8 +486,11 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, req.prev_bssid = prev_bssid; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (!req.bss) + if (!req.bss) { + if (was_connected) + wdev->sme_state = CFG80211_SME_CONNECTED; return -ENOENT; + } bss = bss_from_pub(req.bss); @@ -503,6 +508,8 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, err = rdev->ops->assoc(&rdev->wiphy, dev, &req); out: + if (err && was_connected) + wdev->sme_state = CFG80211_SME_CONNECTED; /* still a reference in wdev->auth_bsses[slot] */ cfg80211_put_bss(req.bss); return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ca3c92a0a14..a6028433e3a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -138,6 +138,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, + [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, + [NL80211_ATTR_PMKID] = { .type = NLA_BINARY, + .len = WLAN_PMKID_LEN }, }; /* policy for the attributes */ @@ -151,6 +154,26 @@ nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = { [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, }; +/* ifidx get helper */ +static int nl80211_get_ifidx(struct netlink_callback *cb) +{ + int res; + + res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + nl80211_fam.attrbuf, nl80211_fam.maxattr, + nl80211_policy); + if (res) + return res; + + if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + res = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); + if (!res) + return -EINVAL; + return res; +} + /* IE validation */ static bool is_valid_ie_attr(const struct nlattr *attr) { @@ -429,6 +452,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, sizeof(u32) * dev->wiphy.n_cipher_suites, dev->wiphy.cipher_suites); + NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, + dev->wiphy.max_num_pmkids); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -540,7 +566,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); - if (dev->wiphy.netnsok) { + CMD(set_pmksa, SET_PMKSA); + CMD(del_pmksa, DEL_PMKSA); + CMD(flush_pmksa, FLUSH_PMKSA); + if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); } @@ -947,6 +976,32 @@ static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) return 0; } +static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u8 use_4addr, + enum nl80211_iftype iftype) +{ + if (!use_4addr) { + if (netdev && netdev->br_port) + return -EBUSY; + return 0; + } + + switch (iftype) { + case NL80211_IFTYPE_AP_VLAN: + if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP) + return 0; + break; + case NL80211_IFTYPE_STATION: + if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_STATION) + return 0; + break; + default: + break; + } + + return -EOPNOTSUPP; +} + static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; @@ -987,6 +1042,16 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } + if (info->attrs[NL80211_ATTR_4ADDR]) { + params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); + change = true; + err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype); + if (err) + goto unlock; + } else { + params.use_4addr = -1; + } + if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { if (ntype != NL80211_IFTYPE_MONITOR) { err = -EINVAL; @@ -1006,6 +1071,9 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) else err = 0; + if (!err && params.use_4addr != -1) + dev->ieee80211_ptr->use_4addr = params.use_4addr; + unlock: dev_put(dev); cfg80211_unlock_rdev(rdev); @@ -1053,6 +1121,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); } + if (info->attrs[NL80211_ATTR_4ADDR]) { + params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); + err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); + if (err) + goto unlock; + } + err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); @@ -1264,7 +1339,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!err) err = func(&rdev->wiphy, dev, key.idx); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!err) { if (func == rdev->ops->set_default_key) dev->ieee80211_ptr->wext.default_key = key.idx; @@ -1365,7 +1440,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (!err) err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!err) { if (key.idx == dev->ieee80211_ptr->wext.default_key) dev->ieee80211_ptr->wext.default_key = -1; @@ -1682,20 +1757,10 @@ static int nl80211_dump_station(struct sk_buff *skb, int sta_idx = cb->args[1]; int err; - if (!ifidx) { - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); - if (err) - return err; - - if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) - return -EINVAL; - - ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); - if (!ifidx) - return -EINVAL; - } + if (!ifidx) + ifidx = nl80211_get_ifidx(cb); + if (ifidx < 0) + return ifidx; rtnl_lock(); @@ -1800,7 +1865,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) } /* - * Get vlan interface making sure it is on the right wiphy. + * Get vlan interface making sure it is running and on the right wiphy. */ static int get_vlan(struct genl_info *info, struct cfg80211_registered_device *rdev, @@ -1818,6 +1883,8 @@ static int get_vlan(struct genl_info *info, return -EINVAL; if ((*vlan)->ieee80211_ptr->wiphy != &rdev->wiphy) return -EINVAL; + if (!netif_running(*vlan)) + return -ENETDOWN; } return 0; } @@ -2105,9 +2172,9 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, if (pinfo->filled & MPATH_INFO_FRAME_QLEN) NLA_PUT_U32(msg, NL80211_MPATH_INFO_FRAME_QLEN, pinfo->frame_qlen); - if (pinfo->filled & MPATH_INFO_DSN) - NLA_PUT_U32(msg, NL80211_MPATH_INFO_DSN, - pinfo->dsn); + if (pinfo->filled & MPATH_INFO_SN) + NLA_PUT_U32(msg, NL80211_MPATH_INFO_SN, + pinfo->sn); if (pinfo->filled & MPATH_INFO_METRIC) NLA_PUT_U32(msg, NL80211_MPATH_INFO_METRIC, pinfo->metric); @@ -2145,20 +2212,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb, int path_idx = cb->args[1]; int err; - if (!ifidx) { - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); - if (err) - return err; - - if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) - return -EINVAL; - - ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); - if (!ifidx) - return -EINVAL; - } + if (!ifidx) + ifidx = nl80211_get_ifidx(cb); + if (ifidx < 0) + return ifidx; rtnl_lock(); @@ -2605,6 +2662,8 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, cur_params.dot11MeshHWMPpreqMinInterval); NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, cur_params.dot11MeshHWMPnetDiameterTraversalTime); + NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_ROOTMODE, + cur_params.dot11MeshHWMPRootMode); nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); err = genlmsg_reply(msg, info); @@ -2715,6 +2774,10 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) dot11MeshHWMPnetDiameterTraversalTime, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshHWMPRootMode, mask, + NL80211_MESHCONF_HWMP_ROOTMODE, + nla_get_u8); /* Apply changes */ err = rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask); @@ -2988,7 +3051,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out; } - request->n_channels = n_channels; if (n_ssids) request->ssids = (void *)&request->channels[n_channels]; request->n_ssids = n_ssids; @@ -2999,32 +3061,53 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->ie = (void *)(request->channels + n_channels); } + i = 0; if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { /* user specified, bail out if channel not found */ - request->n_channels = n_channels; - i = 0; nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) { - request->channels[i] = ieee80211_get_channel(wiphy, nla_get_u32(attr)); - if (!request->channels[i]) { + struct ieee80211_channel *chan; + + chan = ieee80211_get_channel(wiphy, nla_get_u32(attr)); + + if (!chan) { err = -EINVAL; goto out_free; } + + /* ignore disabled channels */ + if (chan->flags & IEEE80211_CHAN_DISABLED) + continue; + + request->channels[i] = chan; i++; } } else { /* all channels */ - i = 0; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; for (j = 0; j < wiphy->bands[band]->n_channels; j++) { - request->channels[i] = &wiphy->bands[band]->channels[j]; + struct ieee80211_channel *chan; + + chan = &wiphy->bands[band]->channels[j]; + + if (chan->flags & IEEE80211_CHAN_DISABLED) + continue; + + request->channels[i] = chan; i++; } } } + if (!i) { + err = -EINVAL; + goto out_free; + } + + request->n_channels = i; + i = 0; if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { @@ -3105,6 +3188,8 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval); NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability); NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq); + NLA_PUT_U32(msg, NL80211_BSS_SEEN_MS_AGO, + jiffies_to_msecs(jiffies - intbss->ts)); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: @@ -3159,21 +3244,11 @@ static int nl80211_dump_scan(struct sk_buff *skb, int start = cb->args[1], idx = 0; int err; - if (!ifidx) { - err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - nl80211_fam.attrbuf, nl80211_fam.maxattr, - nl80211_policy); - if (err) - return err; - - if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) - return -EINVAL; - - ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); - if (!ifidx) - return -EINVAL; - cb->args[0] = ifidx; - } + if (!ifidx) + ifidx = nl80211_get_ifidx(cb); + if (ifidx < 0) + return ifidx; + cb->args[0] = ifidx; dev = dev_get_by_index(sock_net(skb->sk), ifidx); if (!dev) @@ -3216,6 +3291,106 @@ static int nl80211_dump_scan(struct sk_buff *skb, return err; } +static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, + int flags, struct net_device *dev, + struct survey_info *survey) +{ + void *hdr; + struct nlattr *infoattr; + + /* Survey without a channel doesn't make sense */ + if (!survey->channel) + return -EINVAL; + + hdr = nl80211hdr_put(msg, pid, seq, flags, + NL80211_CMD_NEW_SURVEY_RESULTS); + if (!hdr) + return -ENOMEM; + + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + + infoattr = nla_nest_start(msg, NL80211_ATTR_SURVEY_INFO); + if (!infoattr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_SURVEY_INFO_FREQUENCY, + survey->channel->center_freq); + if (survey->filled & SURVEY_INFO_NOISE_DBM) + NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE, + survey->noise); + + nla_nest_end(msg, infoattr); + + return genlmsg_end(msg, hdr); + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nl80211_dump_survey(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct survey_info survey; + struct cfg80211_registered_device *dev; + struct net_device *netdev; + int ifidx = cb->args[0]; + int survey_idx = cb->args[1]; + int res; + + if (!ifidx) + ifidx = nl80211_get_ifidx(cb); + if (ifidx < 0) + return ifidx; + cb->args[0] = ifidx; + + rtnl_lock(); + + netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); + if (!netdev) { + res = -ENODEV; + goto out_rtnl; + } + + dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); + if (IS_ERR(dev)) { + res = PTR_ERR(dev); + goto out_rtnl; + } + + if (!dev->ops->dump_survey) { + res = -EOPNOTSUPP; + goto out_err; + } + + while (1) { + res = dev->ops->dump_survey(&dev->wiphy, netdev, survey_idx, + &survey); + if (res == -ENOENT) + break; + if (res) + goto out_err; + + if (nl80211_send_survey(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + netdev, + &survey) < 0) + goto out; + survey_idx++; + } + + out: + cb->args[1] = survey_idx; + res = skb->len; + out_err: + cfg80211_unlock_rdev(dev); + out_rtnl: + rtnl_unlock(); + + return res; +} + static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type) { return auth_type <= NL80211_AUTHTYPE_MAX; @@ -4054,6 +4229,99 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_pmksa *pmksa) = NULL; + int err; + struct net_device *dev; + struct cfg80211_pmksa pmksa; + + memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); + + if (!info->attrs[NL80211_ATTR_MAC]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_PMKID]) + return -EINVAL; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto out_rtnl; + + pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); + pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + switch (info->genlhdr->cmd) { + case NL80211_CMD_SET_PMKSA: + rdev_ops = rdev->ops->set_pmksa; + break; + case NL80211_CMD_DEL_PMKSA: + rdev_ops = rdev->ops->del_pmksa; + break; + default: + WARN_ON(1); + break; + } + + if (!rdev_ops) { + err = -EOPNOTSUPP; + goto out; + } + + err = rdev_ops(&rdev->wiphy, dev, &pmksa); + + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + out_rtnl: + rtnl_unlock(); + + return err; +} + +static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev; + int err; + struct net_device *dev; + + rtnl_lock(); + + err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); + if (err) + goto out_rtnl; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + + if (!rdev->ops->flush_pmksa) { + err = -EOPNOTSUPP; + goto out; + } + + err = rdev->ops->flush_pmksa(&rdev->wiphy, dev); + + out: + cfg80211_unlock_rdev(rdev); + dev_put(dev); + out_rtnl: + rtnl_unlock(); + + return err; + +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -4293,6 +4561,30 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_GET_SURVEY, + .policy = nl80211_policy, + .dumpit = nl80211_dump_survey, + }, + { + .cmd = NL80211_CMD_SET_PMKSA, + .doit = nl80211_setdel_pmksa, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DEL_PMKSA, + .doit = nl80211_setdel_pmksa, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_FLUSH_PMKSA, + .doit = nl80211_flush_pmksa, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f256dfffbf4..baa898add28 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -141,62 +141,35 @@ static const struct ieee80211_regdomain us_regdom = { .reg_rules = { /* IEEE 802.11b/g, channels 1..11 */ REG_RULE(2412-10, 2462+10, 40, 6, 27, 0), - /* IEEE 802.11a, channel 36 */ - REG_RULE(5180-10, 5180+10, 40, 6, 23, 0), - /* IEEE 802.11a, channel 40 */ - REG_RULE(5200-10, 5200+10, 40, 6, 23, 0), - /* IEEE 802.11a, channel 44 */ - REG_RULE(5220-10, 5220+10, 40, 6, 23, 0), + /* IEEE 802.11a, channel 36..48 */ + REG_RULE(5180-10, 5240+10, 40, 6, 17, 0), /* IEEE 802.11a, channels 48..64 */ - REG_RULE(5240-10, 5320+10, 40, 6, 23, 0), + REG_RULE(5260-10, 5320+10, 40, 6, 20, NL80211_RRF_DFS), + /* IEEE 802.11a, channels 100..124 */ + REG_RULE(5500-10, 5590+10, 40, 6, 20, NL80211_RRF_DFS), + /* IEEE 802.11a, channels 132..144 */ + REG_RULE(5660-10, 5700+10, 40, 6, 20, NL80211_RRF_DFS), /* IEEE 802.11a, channels 149..165, outdoor */ REG_RULE(5745-10, 5825+10, 40, 6, 30, 0), } }; static const struct ieee80211_regdomain jp_regdom = { - .n_reg_rules = 3, + .n_reg_rules = 6, .alpha2 = "JP", .reg_rules = { - /* IEEE 802.11b/g, channels 1..14 */ - REG_RULE(2412-10, 2484+10, 40, 6, 20, 0), - /* IEEE 802.11a, channels 34..48 */ - REG_RULE(5170-10, 5240+10, 40, 6, 20, - NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11b/g, channels 1..11 */ + REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), + /* IEEE 802.11b/g, channels 12..13 */ + REG_RULE(2467-10, 2472+10, 20, 6, 20, 0), + /* IEEE 802.11b/g, channel 14 */ + REG_RULE(2484-10, 2484+10, 20, 6, 20, NL80211_RRF_NO_OFDM), + /* IEEE 802.11a, channels 36..48 */ + REG_RULE(5180-10, 5240+10, 40, 6, 20, 0), /* IEEE 802.11a, channels 52..64 */ - REG_RULE(5260-10, 5320+10, 40, 6, 20, - NL80211_RRF_NO_IBSS | - NL80211_RRF_DFS), - } -}; - -static const struct ieee80211_regdomain eu_regdom = { - .n_reg_rules = 6, - /* - * This alpha2 is bogus, we leave it here just for stupid - * backward compatibility - */ - .alpha2 = "EU", - .reg_rules = { - /* IEEE 802.11b/g, channels 1..13 */ - REG_RULE(2412-10, 2472+10, 40, 6, 20, 0), - /* IEEE 802.11a, channel 36 */ - REG_RULE(5180-10, 5180+10, 40, 6, 23, - NL80211_RRF_PASSIVE_SCAN), - /* IEEE 802.11a, channel 40 */ - REG_RULE(5200-10, 5200+10, 40, 6, 23, - NL80211_RRF_PASSIVE_SCAN), - /* IEEE 802.11a, channel 44 */ - REG_RULE(5220-10, 5220+10, 40, 6, 23, - NL80211_RRF_PASSIVE_SCAN), - /* IEEE 802.11a, channels 48..64 */ - REG_RULE(5240-10, 5320+10, 40, 6, 20, - NL80211_RRF_NO_IBSS | - NL80211_RRF_DFS), - /* IEEE 802.11a, channels 100..140 */ - REG_RULE(5500-10, 5700+10, 40, 6, 30, - NL80211_RRF_NO_IBSS | - NL80211_RRF_DFS), + REG_RULE(5260-10, 5320+10, 40, 6, 20, NL80211_RRF_DFS), + /* IEEE 802.11a, channels 100..144 */ + REG_RULE(5500-10, 5700+10, 40, 6, 23, NL80211_RRF_DFS), } }; @@ -206,15 +179,17 @@ static const struct ieee80211_regdomain *static_regdom(char *alpha2) return &us_regdom; if (alpha2[0] == 'J' && alpha2[1] == 'P') return &jp_regdom; + /* Use world roaming rules for "EU", since it was a pseudo + domain anyway... */ if (alpha2[0] == 'E' && alpha2[1] == 'U') - return &eu_regdom; - /* Default, as per the old rules */ - return &us_regdom; + return &world_regdom; + /* Default, world roaming rules */ + return &world_regdom; } static bool is_old_static_regdom(const struct ieee80211_regdomain *rd) { - if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom) + if (rd == &us_regdom || rd == &jp_regdom || rd == &world_regdom) return true; return false; } @@ -1008,7 +983,7 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && - request_wiphy->strict_regulatory) { + request_wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) { /* * This gaurantees the driver's requested regulatory domain * will always be used as a base for further regulatory @@ -1051,13 +1026,13 @@ static bool ignore_reg_update(struct wiphy *wiphy, if (!last_request) return true; if (initiator == NL80211_REGDOM_SET_BY_CORE && - wiphy->custom_regulatory) + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) return true; /* * wiphy->regd will be set once the device has its own * desired regulatory domain set */ - if (wiphy->strict_regulatory && !wiphy->regd && + if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && !is_world_regdom(last_request->alpha2)) return true; return false; @@ -1093,7 +1068,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; - if (wiphy->disable_beacon_hints) + if (wiphy->flags & WIPHY_FLAG_DISABLE_BEACON_HINTS) return; chan_before.center_freq = chan->center_freq; @@ -1164,7 +1139,7 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) return true; if (last_request && last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && - wiphy->custom_regulatory) + wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY) return true; return false; } @@ -1591,7 +1566,8 @@ static void reg_process_hint(struct regulatory_request *reg_request) r = __regulatory_hint(wiphy, reg_request); /* This is required so that the orig_* parameters are saved */ - if (r == -EALREADY && wiphy && wiphy->strict_regulatory) + if (r == -EALREADY && wiphy && + wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) wiphy_update_regulatory(wiphy, reg_request->initiator); out: mutex_unlock(®_mutex); @@ -1930,7 +1906,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) const struct ieee80211_freq_range *freq_range = NULL; const struct ieee80211_power_rule *power_rule = NULL; - printk(KERN_INFO "\t(start_freq - end_freq @ bandwidth), " + printk(KERN_INFO " (start_freq - end_freq @ bandwidth), " "(max_antenna_gain, max_eirp)\n"); for (i = 0; i < rd->n_reg_rules; i++) { @@ -1943,7 +1919,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) * in certain regions */ if (power_rule->max_antenna_gain) - printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " "(%d mBi, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, @@ -1951,7 +1927,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) power_rule->max_antenna_gain, power_rule->max_eirp); else - printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + printk(KERN_INFO " (%d KHz - %d KHz @ %d KHz), " "(N/A, %d mBm)\n", freq_range->start_freq_khz, freq_range->end_freq_khz, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index e5f92ee758f..12dfa62aad1 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -22,7 +22,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { struct cfg80211_scan_request *request; struct net_device *dev; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -47,7 +47,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) else nl80211_send_scan_done(rdev, dev); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (!request->aborted) { memset(&wrqu, 0, sizeof(wrqu)); @@ -88,7 +88,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); request->aborted = aborted; - schedule_work(&wiphy_to_dev(request->wiphy)->scan_done_wk); + queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -217,7 +217,7 @@ static bool is_mesh(struct cfg80211_bss *a, a->len_information_elements); if (!ie) return false; - if (ie[1] != IEEE80211_MESH_CONFIG_LEN) + if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) return false; /* @@ -225,7 +225,8 @@ static bool is_mesh(struct cfg80211_bss *a, * comparing since that may differ between stations taking * part in the same mesh. */ - return memcmp(ie + 2, meshcfg, IEEE80211_MESH_CONFIG_LEN - 2) == 0; + return memcmp(ie + 2, meshcfg, + sizeof(struct ieee80211_meshconf_ie) - 2) == 0; } static int cmp_bss(struct cfg80211_bss *a, @@ -399,7 +400,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, res->pub.information_elements, res->pub.len_information_elements); if (!meshid || !meshcfg || - meshcfg[1] != IEEE80211_MESH_CONFIG_LEN) { + meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) { /* bogus mesh */ kref_put(&res->ref, bss_release); return NULL; @@ -592,7 +593,7 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) } EXPORT_SYMBOL(cfg80211_unlink_bss); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) @@ -650,9 +651,15 @@ int cfg80211_wext_siwscan(struct net_device *dev, i = 0; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { int j; + if (!wiphy->bands[band]) continue; + for (j = 0; j < wiphy->bands[band]->n_channels; j++) { + /* ignore disabled channels */ + if (wiphy->bands[band]->channels[j].flags & + IEEE80211_CHAN_DISABLED) + continue; /* If we have a wireless request structure and the * wireless request specifies frequencies, then search @@ -859,7 +866,7 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, break; case WLAN_EID_MESH_CONFIG: ismesh = true; - if (ie[1] != IEEE80211_MESH_CONFIG_LEN) + if (ie[1] != sizeof(struct ieee80211_meshconf_ie)) break; buf = kmalloc(50, GFP_ATOMIC); if (!buf) @@ -867,35 +874,40 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, cfg = ie + 2; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, "Mesh network (version %d)", cfg[0]); + sprintf(buf, "Mesh Network Path Selection Protocol ID: " + "0x%02X", cfg[0]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Path Selection Metric ID: 0x%02X", + cfg[1]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Congestion Control Mode ID: 0x%02X", + cfg[2]); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, buf); - sprintf(buf, "Path Selection Protocol ID: " - "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], - cfg[4]); + sprintf(buf, "Synchronization ID: 0x%02X", cfg[3]); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, buf); - sprintf(buf, "Path Selection Metric ID: " - "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], - cfg[8]); + sprintf(buf, "Authentication ID: 0x%02X", cfg[4]); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, buf); - sprintf(buf, "Congestion Control Mode ID: " - "0x%02X%02X%02X%02X", cfg[9], cfg[10], - cfg[11], cfg[12]); + sprintf(buf, "Formation Info: 0x%02X", cfg[5]); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, buf); - sprintf(buf, "Channel Precedence: " - "0x%02X%02X%02X%02X", cfg[13], cfg[14], - cfg[15], cfg[16]); + sprintf(buf, "Capabilities: 0x%02X", cfg[6]); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, @@ -925,8 +937,8 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, ie += ie[1] + 2; } - if (bss->pub.capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) - || ismesh) { + if (bss->pub.capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) || + ismesh) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWMODE; if (ismesh) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 9f0b2800a9d..2333d78187e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -365,7 +365,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, { struct wireless_dev *wdev = dev->ieee80211_ptr; u8 *country_ie; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -382,7 +382,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, resp_ie, resp_ie_len, status, GFP_KERNEL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (wextev) { if (req_ie && status == WLAN_STATUS_SUCCESS) { memset(&wrqu, 0, sizeof(wrqu)); @@ -488,7 +488,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); - schedule_work(&rdev->event_work); + queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_connect_result); @@ -497,7 +497,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid, const u8 *resp_ie, size_t resp_ie_len) { struct cfg80211_bss *bss; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -532,7 +532,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, GFP_KERNEL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT if (req_ie) { memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = req_ie_len; @@ -583,7 +583,7 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid, spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); - schedule_work(&rdev->event_work); + queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_roamed); @@ -593,7 +593,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); int i; -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -651,7 +651,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, for (i = 0; i < 6; i++) rdev->ops->del_key(wdev->wiphy, dev, i, NULL); -#ifdef CONFIG_WIRELESS_EXT +#ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); @@ -681,7 +681,7 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); - schedule_work(&rdev->event_work); + queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_disconnected); diff --git a/net/wireless/util.c b/net/wireless/util.c index 3fc2df86278..59361fdcb5d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -320,7 +320,9 @@ int ieee80211_data_to_8023(struct sk_buff *skb, u8 *addr, break; case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): if (unlikely(iftype != NL80211_IFTYPE_WDS && - iftype != NL80211_IFTYPE_MESH_POINT)) + iftype != NL80211_IFTYPE_MESH_POINT && + iftype != NL80211_IFTYPE_AP_VLAN && + iftype != NL80211_IFTYPE_STATION)) return -1; if (iftype == NL80211_IFTYPE_MESH_POINT) { struct ieee80211s_hdr *meshdr = @@ -656,7 +658,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; + /* if it's part of a bridge, reject changing type to station/ibss */ + if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC || + ntype == NL80211_IFTYPE_STATION)) + return -EBUSY; + if (ntype != otype) { + dev->ieee80211_ptr->use_4addr = false; + switch (otype) { case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, false); @@ -680,5 +689,34 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, WARN_ON(!err && dev->ieee80211_ptr->iftype != ntype); + if (!err && params && params->use_4addr != -1) + dev->ieee80211_ptr->use_4addr = params->use_4addr; + + if (!err) { + dev->priv_flags &= ~IFF_DONT_BRIDGE; + switch (ntype) { + case NL80211_IFTYPE_STATION: + if (dev->ieee80211_ptr->use_4addr) + break; + /* fall through */ + case NL80211_IFTYPE_ADHOC: + dev->priv_flags |= IFF_DONT_BRIDGE; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: + /* bridging OK */ + break; + case NL80211_IFTYPE_MONITOR: + /* monitor can't bridge anyway */ + break; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + /* not happening */ + break; + } + } + return err; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 561a45cf2a6..54face3d442 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -437,6 +437,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, { struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i; + bool rejoin = false; if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), @@ -466,8 +467,25 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (remove) { err = 0; - if (wdev->current_bss) + if (wdev->current_bss) { + /* + * If removing the current TX key, we will need to + * join a new IBSS without the privacy bit clear. + */ + if (idx == wdev->wext.default_key && + wdev->iftype == NL80211_IFTYPE_ADHOC) { + __cfg80211_leave_ibss(rdev, wdev->netdev, true); + rejoin = true; + } err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr); + } + wdev->wext.connect.privacy = false; + /* + * Applications using wireless extensions expect to be + * able to delete keys that don't exist, so allow that. + */ + if (err == -ENOENT) + err = 0; if (!err) { if (!addr) { wdev->wext.keys->params[idx].key_len = 0; @@ -478,12 +496,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, else if (idx == wdev->wext.default_mgmt_key) wdev->wext.default_mgmt_key = -1; } - /* - * Applications using wireless extensions expect to be - * able to delete keys that don't exist, so allow that. - */ - if (err == -ENOENT) - return 0; + + if (!err && rejoin) + err = cfg80211_ibss_wext_join(rdev, wdev); return err; } @@ -511,11 +526,25 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if ((params->cipher == WLAN_CIPHER_SUITE_WEP40 || params->cipher == WLAN_CIPHER_SUITE_WEP104) && (tx_key || (!addr && wdev->wext.default_key == -1))) { - if (wdev->current_bss) + if (wdev->current_bss) { + /* + * If we are getting a new TX key from not having + * had one before we need to join a new IBSS with + * the privacy bit set. + */ + if (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->wext.default_key == -1) { + __cfg80211_leave_ibss(rdev, wdev->netdev, true); + rejoin = true; + } err = rdev->ops->set_default_key(&rdev->wiphy, dev, idx); - if (!err) + } + if (!err) { wdev->wext.default_key = idx; + if (rejoin) + err = cfg80211_ibss_wext_join(rdev, wdev); + } return err; } @@ -539,10 +568,13 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, { int err; + /* devlist mutex needed for possible IBSS re-join */ + mutex_lock(&rdev->devlist_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_set_encryption(rdev, dev, addr, remove, tx_key, idx, params); wdev_unlock(dev->ieee80211_ptr); + mutex_unlock(&rdev->devlist_mtx); return err; } @@ -904,8 +936,6 @@ static int cfg80211_set_auth_alg(struct wireless_dev *wdev, static int cfg80211_set_wpa_version(struct wireless_dev *wdev, u32 wpa_versions) { - wdev->wext.connect.crypto.wpa_versions = 0; - if (wpa_versions & ~(IW_AUTH_WPA_VERSION_WPA | IW_AUTH_WPA_VERSION_WPA2| IW_AUTH_WPA_VERSION_DISABLED)) @@ -933,8 +963,6 @@ static int cfg80211_set_wpa_version(struct wireless_dev *wdev, u32 wpa_versions) static int cfg80211_set_cipher_group(struct wireless_dev *wdev, u32 cipher) { - wdev->wext.connect.crypto.cipher_group = 0; - if (cipher & IW_AUTH_CIPHER_WEP40) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_WEP40; @@ -950,6 +978,8 @@ static int cfg80211_set_cipher_group(struct wireless_dev *wdev, u32 cipher) else if (cipher & IW_AUTH_CIPHER_AES_CMAC) wdev->wext.connect.crypto.cipher_group = WLAN_CIPHER_SUITE_AES_CMAC; + else if (cipher & IW_AUTH_CIPHER_NONE) + wdev->wext.connect.crypto.cipher_group = 0; else return -EINVAL; @@ -1372,6 +1402,47 @@ int cfg80211_wext_giwessid(struct net_device *dev, } EXPORT_SYMBOL_GPL(cfg80211_wext_giwessid); +int cfg80211_wext_siwpmksa(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_pmksa cfg_pmksa; + struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; + + memset(&cfg_pmksa, 0, sizeof(struct cfg80211_pmksa)); + + if (wdev->iftype != NL80211_IFTYPE_STATION) + return -EINVAL; + + cfg_pmksa.bssid = pmksa->bssid.sa_data; + cfg_pmksa.pmkid = pmksa->pmkid; + + switch (pmksa->cmd) { + case IW_PMKSA_ADD: + if (!rdev->ops->set_pmksa) + return -EOPNOTSUPP; + + return rdev->ops->set_pmksa(&rdev->wiphy, dev, &cfg_pmksa); + + case IW_PMKSA_REMOVE: + if (!rdev->ops->del_pmksa) + return -EOPNOTSUPP; + + return rdev->ops->del_pmksa(&rdev->wiphy, dev, &cfg_pmksa); + + case IW_PMKSA_FLUSH: + if (!rdev->ops->flush_pmksa) + return -EOPNOTSUPP; + + return rdev->ops->flush_pmksa(&rdev->wiphy, dev); + + default: + return -EOPNOTSUPP; + } +} + static const iw_handler cfg80211_handlers[] = { [IW_IOCTL_IDX(SIOCGIWNAME)] = (iw_handler) cfg80211_wext_giwname, [IW_IOCTL_IDX(SIOCSIWFREQ)] = (iw_handler) cfg80211_wext_siwfreq, @@ -1404,6 +1475,7 @@ static const iw_handler cfg80211_handlers[] = { [IW_IOCTL_IDX(SIOCSIWAUTH)] = (iw_handler) cfg80211_wext_siwauth, [IW_IOCTL_IDX(SIOCGIWAUTH)] = (iw_handler) cfg80211_wext_giwauth, [IW_IOCTL_IDX(SIOCSIWENCODEEXT)]= (iw_handler) cfg80211_wext_siwencodeext, + [IW_IOCTL_IDX(SIOCSIWPMKSA)] = (iw_handler) cfg80211_wext_siwpmksa, }; const struct iw_handler_def cfg80211_wext_handler = { diff --git a/net/wireless/wext.c b/net/wireless/wext-core.c index 60fe57761ca..5e1656bdf23 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext-core.c @@ -1,112 +1,28 @@ /* - * This file implement the Wireless Extensions APIs. + * This file implement the Wireless Extensions core API. * * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * * (As all part of the Linux kernel, this file is GPL) */ - -/************************** DOCUMENTATION **************************/ -/* - * API definition : - * -------------- - * See <linux/wireless.h> for details of the APIs and the rest. - * - * History : - * ------- - * - * v1 - 5.12.01 - Jean II - * o Created this file. - * - * v2 - 13.12.01 - Jean II - * o Move /proc/net/wireless stuff from net/core/dev.c to here - * o Make Wireless Extension IOCTLs go through here - * o Added iw_handler handling ;-) - * o Added standard ioctl description - * o Initial dumb commit strategy based on orinoco.c - * - * v3 - 19.12.01 - Jean II - * o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call - * o Add event dispatcher function - * o Add event description - * o Propagate events as rtnetlink IFLA_WIRELESS option - * o Generate event on selected SET requests - * - * v4 - 18.04.02 - Jean II - * o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1 - * - * v5 - 21.06.02 - Jean II - * o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup) - * o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes - * o Add IWEVCUSTOM for driver specific event/scanning token - * o Turn on WE_STRICT_WRITE by default + kernel warning - * o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num) - * o Fix off-by-one in test (extra_size <= IFNAMSIZ) - * - * v6 - 9.01.03 - Jean II - * o Add common spy support : iw_handler_set_spy(), wireless_spy_update() - * o Add enhanced spy support : iw_handler_set_thrspy() and event. - * o Add WIRELESS_EXT version display in /proc/net/wireless - * - * v6 - 18.06.04 - Jean II - * o Change get_spydata() method for added safety - * o Remove spy #ifdef, they are always on -> cleaner code - * o Allow any size GET request if user specifies length > max - * and if request has IW_DESCR_FLAG_NOMAX flag or is SIOCGIWPRIV - * o Start migrating get_wireless_stats to struct iw_handler_def - * o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus - * Based on patch from Pavel Roskin <proski@gnu.org> : - * o Fix kernel data leak to user space in private handler handling - * - * v7 - 18.3.05 - Jean II - * o Remove (struct iw_point *)->pointer from events and streams - * o Remove spy_offset from struct iw_handler_def - * o Start deprecating dev->get_wireless_stats, output a warning - * o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless - * o Don't lose INVALID/DBM flags when clearing UPDATED flags (iwstats) - * - * v8 - 17.02.06 - Jean II - * o RtNetlink requests support (SET/GET) - * - * v8b - 03.08.06 - Herbert Xu - * o Fix Wireless Event locking issues. - * - * v9 - 14.3.06 - Jean II - * o Change length in ESSID and NICK to strlen() instead of strlen()+1 - * o Make standard_ioctl_num and standard_event_num unsigned - * o Remove (struct net_device *)->get_wireless_stats() - * - * v10 - 16.3.07 - Jean II - * o Prevent leaking of kernel space in stream on 64 bits. - */ - -/***************************** INCLUDES *****************************/ - -#include <linux/module.h> -#include <linux/types.h> /* off_t */ -#include <linux/netdevice.h> /* struct ifreq, dev_get_by_name() */ -#include <linux/proc_fs.h> -#include <linux/rtnetlink.h> /* rtnetlink stuff */ -#include <linux/seq_file.h> -#include <linux/init.h> /* for __init */ -#include <linux/if_arp.h> /* ARPHRD_ETHER */ -#include <linux/etherdevice.h> /* compare_ether_addr */ -#include <linux/interrupt.h> -#include <net/net_namespace.h> - -#include <linux/wireless.h> /* Pretty obvious */ -#include <net/iw_handler.h> /* New driver API */ +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <linux/wireless.h> +#include <linux/uaccess.h> +#include <net/cfg80211.h> +#include <net/iw_handler.h> #include <net/netlink.h> #include <net/wext.h> +#include <net/net_namespace.h> + +typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, + unsigned int, struct iw_request_info *, + iw_handler); -#include <asm/uaccess.h> /* copy_to_user() */ -/************************* GLOBAL VARIABLES *************************/ -/* - * You should not use global variables, because of re-entrancy. - * On our case, it's only const, so it's OK... - */ /* * Meta-data about all the standard Wireless Extension request we * know about. @@ -390,18 +306,6 @@ static const struct iw_ioctl_description standard_event[] = { }; static const unsigned standard_event_num = ARRAY_SIZE(standard_event); -/* Size (in bytes) of the various private data types */ -static const char iw_priv_type_size[] = { - 0, /* IW_PRIV_TYPE_NONE */ - 1, /* IW_PRIV_TYPE_BYTE */ - 1, /* IW_PRIV_TYPE_CHAR */ - 0, /* Not defined */ - sizeof(__u32), /* IW_PRIV_TYPE_INT */ - sizeof(struct iw_freq), /* IW_PRIV_TYPE_FLOAT */ - sizeof(struct sockaddr), /* IW_PRIV_TYPE_ADDR */ - 0, /* Not defined */ -}; - /* Size (in bytes) of various events */ static const int event_type_size[] = { IW_EV_LCP_LEN, /* IW_HEADER_TYPE_NULL */ @@ -433,323 +337,346 @@ static const int compat_event_type_size[] = { }; #endif -/************************ COMMON SUBROUTINES ************************/ -/* - * Stuff that may be used in various place or doesn't fit in one - * of the section below. - */ - -/* ---------------------------------------------------------------- */ -/* - * Return the driver handler associated with a specific Wireless Extension. - */ -static iw_handler get_handler(struct net_device *dev, unsigned int cmd) -{ - /* Don't "optimise" the following variable, it will crash */ - unsigned int index; /* *MUST* be unsigned */ - /* Check if we have some wireless handlers defined */ - if (dev->wireless_handlers == NULL) - return NULL; - - /* Try as a standard command */ - index = cmd - SIOCIWFIRST; - if (index < dev->wireless_handlers->num_standard) - return dev->wireless_handlers->standard[index]; - - /* Try as a private command */ - index = cmd - SIOCIWFIRSTPRIV; - if (index < dev->wireless_handlers->num_private) - return dev->wireless_handlers->private[index]; +/* IW event code */ - /* Not found */ - return NULL; -} - -/* ---------------------------------------------------------------- */ -/* - * Get statistics out of the driver - */ -struct iw_statistics *get_wireless_stats(struct net_device *dev) +static int __net_init wext_pernet_init(struct net *net) { - /* New location */ - if ((dev->wireless_handlers != NULL) && - (dev->wireless_handlers->get_wireless_stats != NULL)) - return dev->wireless_handlers->get_wireless_stats(dev); - - /* Not found */ - return NULL; + skb_queue_head_init(&net->wext_nlevents); + return 0; } -/* ---------------------------------------------------------------- */ -/* - * Call the commit handler in the driver - * (if exist and if conditions are right) - * - * Note : our current commit strategy is currently pretty dumb, - * but we will be able to improve on that... - * The goal is to try to agreagate as many changes as possible - * before doing the commit. Drivers that will define a commit handler - * are usually those that need a reset after changing parameters, so - * we want to minimise the number of reset. - * A cool idea is to use a timer : at each "set" command, we re-set the - * timer, when the timer eventually fires, we call the driver. - * Hopefully, more on that later. - * - * Also, I'm waiting to see how many people will complain about the - * netif_running(dev) test. I'm open on that one... - * Hopefully, the driver will remember to do a commit in "open()" ;-) - */ -static int call_commit_handler(struct net_device *dev) +static void __net_exit wext_pernet_exit(struct net *net) { - if ((netif_running(dev)) && - (dev->wireless_handlers->standard[0] != NULL)) - /* Call the commit handler on the driver */ - return dev->wireless_handlers->standard[0](dev, NULL, - NULL, NULL); - else - return 0; /* Command completed successfully */ + skb_queue_purge(&net->wext_nlevents); } -/* ---------------------------------------------------------------- */ -/* - * Calculate size of private arguments - */ -static int get_priv_size(__u16 args) -{ - int num = args & IW_PRIV_SIZE_MASK; - int type = (args & IW_PRIV_TYPE_MASK) >> 12; +static struct pernet_operations wext_pernet_ops = { + .init = wext_pernet_init, + .exit = wext_pernet_exit, +}; - return num * iw_priv_type_size[type]; +static int __init wireless_nlevent_init(void) +{ + return register_pernet_subsys(&wext_pernet_ops); } -/* ---------------------------------------------------------------- */ -/* - * Re-calculate the size of private arguments - */ -static int adjust_priv_size(__u16 args, struct iw_point *iwp) +subsys_initcall(wireless_nlevent_init); + +/* Process events generated by the wireless layer or the driver. */ +static void wireless_nlevent_process(struct work_struct *work) { - int num = iwp->length; - int max = args & IW_PRIV_SIZE_MASK; - int type = (args & IW_PRIV_TYPE_MASK) >> 12; + struct sk_buff *skb; + struct net *net; - /* Make sure the driver doesn't goof up */ - if (max < num) - num = max; + rtnl_lock(); + + for_each_net(net) { + while ((skb = skb_dequeue(&net->wext_nlevents))) + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, + GFP_KERNEL); + } - return num * iw_priv_type_size[type]; + rtnl_unlock(); } -/* ---------------------------------------------------------------- */ -/* - * Standard Wireless Handler : get wireless stats - * Allow programatic access to /proc/net/wireless even if /proc - * doesn't exist... Also more efficient... - */ -static int iw_handler_get_iwstats(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) +static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process); + +static struct nlmsghdr *rtnetlink_ifinfo_prep(struct net_device *dev, + struct sk_buff *skb) { - /* Get stats from the driver */ - struct iw_statistics *stats; + struct ifinfomsg *r; + struct nlmsghdr *nlh; - stats = get_wireless_stats(dev); - if (stats) { - /* Copy statistics to extra */ - memcpy(extra, stats, sizeof(struct iw_statistics)); - wrqu->data.length = sizeof(struct iw_statistics); + nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*r), 0); + if (!nlh) + return NULL; - /* Check if we need to clear the updated flag */ - if (wrqu->data.flags != 0) - stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; - return 0; - } else - return -EOPNOTSUPP; + r = nlmsg_data(nlh); + r->ifi_family = AF_UNSPEC; + r->__ifi_pad = 0; + r->ifi_type = dev->type; + r->ifi_index = dev->ifindex; + r->ifi_flags = dev_get_flags(dev); + r->ifi_change = 0; /* Wireless changes don't affect those flags */ + + NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); + + return nlh; + nla_put_failure: + nlmsg_cancel(skb, nlh); + return NULL; } -/* ---------------------------------------------------------------- */ + /* - * Standard Wireless Handler : get iwpriv definitions - * Export the driver private handler definition - * They will be picked up by tools like iwpriv... + * Main event dispatcher. Called from other parts and drivers. + * Send the event on the appropriate channels. + * May be called from interrupt context. */ -static int iw_handler_get_private(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) +void wireless_send_event(struct net_device * dev, + unsigned int cmd, + union iwreq_data * wrqu, + const char * extra) { - /* Check if the driver has something to export */ - if ((dev->wireless_handlers->num_private_args == 0) || - (dev->wireless_handlers->private_args == NULL)) - return -EOPNOTSUPP; + const struct iw_ioctl_description * descr = NULL; + int extra_len = 0; + struct iw_event *event; /* Mallocated whole event */ + int event_len; /* Its size */ + int hdr_len; /* Size of the event header */ + int wrqu_off = 0; /* Offset in wrqu */ + /* Don't "optimise" the following variable, it will crash */ + unsigned cmd_index; /* *MUST* be unsigned */ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct nlattr *nla; +#ifdef CONFIG_COMPAT + struct __compat_iw_event *compat_event; + struct compat_iw_point compat_wrqu; + struct sk_buff *compskb; +#endif - /* Check if there is enough buffer up there */ - if (wrqu->data.length < dev->wireless_handlers->num_private_args) { - /* User space can't know in advance how large the buffer - * needs to be. Give it a hint, so that we can support - * any size buffer we want somewhat efficiently... */ - wrqu->data.length = dev->wireless_handlers->num_private_args; - return -E2BIG; + /* + * Nothing in the kernel sends scan events with data, be safe. + * This is necessary because we cannot fix up scan event data + * for compat, due to being contained in 'extra', but normally + * applications are required to retrieve the scan data anyway + * and no data is included in the event, this codifies that + * practice. + */ + if (WARN_ON(cmd == SIOCGIWSCAN && extra)) + extra = NULL; + + /* Get the description of the Event */ + if (cmd <= SIOCIWLAST) { + cmd_index = cmd - SIOCIWFIRST; + if (cmd_index < standard_ioctl_num) + descr = &(standard_ioctl[cmd_index]); + } else { + cmd_index = cmd - IWEVFIRST; + if (cmd_index < standard_event_num) + descr = &(standard_event[cmd_index]); + } + /* Don't accept unknown events */ + if (descr == NULL) { + /* Note : we don't return an error to the driver, because + * the driver would not know what to do about it. It can't + * return an error to the user, because the event is not + * initiated by a user request. + * The best the driver could do is to log an error message. + * We will do it ourselves instead... + */ + printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", + dev->name, cmd); + return; } - /* Set the number of available ioctls. */ - wrqu->data.length = dev->wireless_handlers->num_private_args; + /* Check extra parameters and set extra_len */ + if (descr->header_type == IW_HEADER_TYPE_POINT) { + /* Check if number of token fits within bounds */ + if (wrqu->data.length > descr->max_tokens) { + printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); + return; + } + if (wrqu->data.length < descr->min_tokens) { + printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); + return; + } + /* Calculate extra_len - extra is NULL for restricted events */ + if (extra != NULL) + extra_len = wrqu->data.length * descr->token_size; + /* Always at an offset in wrqu */ + wrqu_off = IW_EV_POINT_OFF; + } - /* Copy structure to the user buffer. */ - memcpy(extra, dev->wireless_handlers->private_args, - sizeof(struct iw_priv_args) * wrqu->data.length); + /* Total length of the event */ + hdr_len = event_type_size[descr->header_type]; + event_len = hdr_len + extra_len; - return 0; -} + /* + * The problem for 64/32 bit. + * + * On 64-bit, a regular event is laid out as follows: + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | event.len | event.cmd | p a d d i n g | + * | wrqu data ... (with the correct size) | + * + * This padding exists because we manipulate event->u, + * and 'event' is not packed. + * + * An iw_point event is laid out like this instead: + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | event.len | event.cmd | p a d d i n g | + * | iwpnt.len | iwpnt.flg | p a d d i n g | + * | extra data ... + * + * The second padding exists because struct iw_point is extended, + * but this depends on the platform... + * + * On 32-bit, all the padding shouldn't be there. + */ + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return; -/******************** /proc/net/wireless SUPPORT ********************/ -/* - * The /proc/net/wireless file is a human readable user-space interface - * exporting various wireless specific statistics from the wireless devices. - * This is the most popular part of the Wireless Extensions ;-) - * - * This interface is a pure clone of /proc/net/dev (in net/core/dev.c). - * The content of the file is basically the content of "struct iw_statistics". - */ + /* Send via the RtNetlink event channel */ + nlh = rtnetlink_ifinfo_prep(dev, skb); + if (WARN_ON(!nlh)) { + kfree_skb(skb); + return; + } -#ifdef CONFIG_PROC_FS + /* Add the wireless events in the netlink packet */ + nla = nla_reserve(skb, IFLA_WIRELESS, event_len); + if (!nla) { + kfree_skb(skb); + return; + } + event = nla_data(nla); -/* ---------------------------------------------------------------- */ -/* - * Print one entry (line) of /proc/net/wireless - */ -static void wireless_seq_printf_stats(struct seq_file *seq, - struct net_device *dev) -{ - /* Get stats from the driver */ - struct iw_statistics *stats = get_wireless_stats(dev); - static struct iw_statistics nullstats = {}; + /* Fill event - first clear to avoid data leaking */ + memset(event, 0, hdr_len); + event->len = event_len; + event->cmd = cmd; + memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); + if (extra_len) + memcpy(((char *) event) + hdr_len, extra, extra_len); - /* show device if it's wireless regardless of current stats */ - if (!stats && dev->wireless_handlers) - stats = &nullstats; + nlmsg_end(skb, nlh); +#ifdef CONFIG_COMPAT + hdr_len = compat_event_type_size[descr->header_type]; + event_len = hdr_len + extra_len; - if (stats) { - seq_printf(seq, "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d " - "%6d %6d %6d\n", - dev->name, stats->status, stats->qual.qual, - stats->qual.updated & IW_QUAL_QUAL_UPDATED - ? '.' : ' ', - ((__s32) stats->qual.level) - - ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), - stats->qual.updated & IW_QUAL_LEVEL_UPDATED - ? '.' : ' ', - ((__s32) stats->qual.noise) - - ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), - stats->qual.updated & IW_QUAL_NOISE_UPDATED - ? '.' : ' ', - stats->discard.nwid, stats->discard.code, - stats->discard.fragment, stats->discard.retries, - stats->discard.misc, stats->miss.beacon); - - if (stats != &nullstats) - stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; + compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!compskb) { + kfree_skb(skb); + return; } -} -/* ---------------------------------------------------------------- */ -/* - * Print info for /proc/net/wireless (print all entries) - */ -static int wireless_dev_seq_show(struct seq_file *seq, void *v) -{ - might_sleep(); - - if (v == SEQ_START_TOKEN) - seq_printf(seq, "Inter-| sta-| Quality | Discarded " - "packets | Missed | WE\n" - " face | tus | link level noise | nwid " - "crypt frag retry misc | beacon | %d\n", - WIRELESS_EXT); - else - wireless_seq_printf_stats(seq, v); - return 0; + /* Send via the RtNetlink event channel */ + nlh = rtnetlink_ifinfo_prep(dev, compskb); + if (WARN_ON(!nlh)) { + kfree_skb(skb); + kfree_skb(compskb); + return; + } + + /* Add the wireless events in the netlink packet */ + nla = nla_reserve(compskb, IFLA_WIRELESS, event_len); + if (!nla) { + kfree_skb(skb); + kfree_skb(compskb); + return; + } + compat_event = nla_data(nla); + + compat_event->len = event_len; + compat_event->cmd = cmd; + if (descr->header_type == IW_HEADER_TYPE_POINT) { + compat_wrqu.length = wrqu->data.length; + compat_wrqu.flags = wrqu->data.flags; + memcpy(&compat_event->pointer, + ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF, + hdr_len - IW_EV_COMPAT_LCP_LEN); + if (extra_len) + memcpy(((char *) compat_event) + hdr_len, + extra, extra_len); + } else { + /* extra_len must be zero, so no if (extra) needed */ + memcpy(&compat_event->pointer, wrqu, + hdr_len - IW_EV_COMPAT_LCP_LEN); + } + + nlmsg_end(compskb, nlh); + + skb_shinfo(skb)->frag_list = compskb; +#endif + skb_queue_tail(&dev_net(dev)->wext_nlevents, skb); + schedule_work(&wireless_nlevent_work); } +EXPORT_SYMBOL(wireless_send_event); + + + +/* IW handlers */ -static void *wireless_dev_seq_start(struct seq_file *seq, loff_t *pos) +struct iw_statistics *get_wireless_stats(struct net_device *dev) { - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; +#ifdef CONFIG_WIRELESS_EXT + if ((dev->wireless_handlers != NULL) && + (dev->wireless_handlers->get_wireless_stats != NULL)) + return dev->wireless_handlers->get_wireless_stats(dev); +#endif - rtnl_lock(); - if (!*pos) - return SEQ_START_TOKEN; +#ifdef CONFIG_CFG80211_WEXT + if (dev->ieee80211_ptr && dev->ieee80211_ptr && + dev->ieee80211_ptr->wiphy && + dev->ieee80211_ptr->wiphy->wext && + dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) + return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev); +#endif - off = 1; - for_each_netdev(net, dev) - if (off++ == *pos) - return dev; + /* not found */ return NULL; } -static void *wireless_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static int iw_handler_get_iwstats(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) { - struct net *net = seq_file_net(seq); + /* Get stats from the driver */ + struct iw_statistics *stats; - ++*pos; + stats = get_wireless_stats(dev); + if (stats) { + /* Copy statistics to extra */ + memcpy(extra, stats, sizeof(struct iw_statistics)); + wrqu->data.length = sizeof(struct iw_statistics); - return v == SEQ_START_TOKEN ? - first_net_device(net) : next_net_device(v); + /* Check if we need to clear the updated flag */ + if (wrqu->data.flags != 0) + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; + return 0; + } else + return -EOPNOTSUPP; } -static void wireless_dev_seq_stop(struct seq_file *seq, void *v) +static iw_handler get_handler(struct net_device *dev, unsigned int cmd) { - rtnl_unlock(); -} - -static const struct seq_operations wireless_seq_ops = { - .start = wireless_dev_seq_start, - .next = wireless_dev_seq_next, - .stop = wireless_dev_seq_stop, - .show = wireless_dev_seq_show, -}; + /* Don't "optimise" the following variable, it will crash */ + unsigned int index; /* *MUST* be unsigned */ + const struct iw_handler_def *handlers = NULL; -static int seq_open_wireless(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &wireless_seq_ops, - sizeof(struct seq_net_private)); -} +#ifdef CONFIG_CFG80211_WEXT + if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) + handlers = dev->ieee80211_ptr->wiphy->wext; +#endif +#ifdef CONFIG_WIRELESS_EXT + if (dev->wireless_handlers) + handlers = dev->wireless_handlers; +#endif -static const struct file_operations wireless_seq_fops = { - .owner = THIS_MODULE, - .open = seq_open_wireless, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; + if (!handlers) + return NULL; -int wext_proc_init(struct net *net) -{ - /* Create /proc/net/wireless entry */ - if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) - return -ENOMEM; + /* Try as a standard command */ + index = cmd - SIOCIWFIRST; + if (index < handlers->num_standard) + return handlers->standard[index]; - return 0; -} +#ifdef CONFIG_WEXT_PRIV + /* Try as a private command */ + index = cmd - SIOCIWFIRSTPRIV; + if (index < handlers->num_private) + return handlers->private[index]; +#endif -void wext_proc_exit(struct net *net) -{ - proc_net_remove(net, "wireless"); + /* Not found */ + return NULL; } -#endif /* CONFIG_PROC_FS */ -/************************** IOCTL SUPPORT **************************/ -/* - * The original user space API to configure all those Wireless Extensions - * is through IOCTLs. - * In there, we check if we need to call the new driver API (iw_handler) - * or just call the driver ioctl handler. - */ - -/* ---------------------------------------------------------------- */ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, const struct iw_ioctl_description *descr, iw_handler handler, struct net_device *dev, @@ -875,7 +802,8 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, } /* Generate an event to notify listeners of the change */ - if ((descr->flags & IW_DESCR_FLAG_EVENT) && err == -EIWCOMMIT) { + if ((descr->flags & IW_DESCR_FLAG_EVENT) && + ((err == 0) || (err == -EIWCOMMIT))) { union iwreq_data *data = (union iwreq_data *) iwp; if (descr->flags & IW_DESCR_FLAG_RESTRICT) @@ -893,188 +821,39 @@ out: } /* - * Wrapper to call a standard Wireless Extension handler. - * We do various checks and also take care of moving data between - * user space and kernel space. - */ -static int ioctl_standard_call(struct net_device * dev, - struct iwreq *iwr, - unsigned int cmd, - struct iw_request_info *info, - iw_handler handler) -{ - const struct iw_ioctl_description * descr; - int ret = -EINVAL; - - /* Get the description of the IOCTL */ - if ((cmd - SIOCIWFIRST) >= standard_ioctl_num) - return -EOPNOTSUPP; - descr = &(standard_ioctl[cmd - SIOCIWFIRST]); - - /* Check if we have a pointer to user space data or not */ - if (descr->header_type != IW_HEADER_TYPE_POINT) { - - /* No extra arguments. Trivial to handle */ - ret = handler(dev, info, &(iwr->u), NULL); - - /* Generate an event to notify listeners of the change */ - if ((descr->flags & IW_DESCR_FLAG_EVENT) && - ((ret == 0) || (ret == -EIWCOMMIT))) - wireless_send_event(dev, cmd, &(iwr->u), NULL); - } else { - ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr, - handler, dev, info); - } - - /* Call commit handler if needed and defined */ - if (ret == -EIWCOMMIT) - ret = call_commit_handler(dev); - - /* Here, we will generate the appropriate event if needed */ - - return ret; -} - -/* ---------------------------------------------------------------- */ -/* - * Wrapper to call a private Wireless Extension handler. - * We do various checks and also take care of moving data between - * user space and kernel space. - * It's not as nice and slimline as the standard wrapper. The cause - * is struct iw_priv_args, which was not really designed for the - * job we are going here. + * Call the commit handler in the driver + * (if exist and if conditions are right) + * + * Note : our current commit strategy is currently pretty dumb, + * but we will be able to improve on that... + * The goal is to try to agreagate as many changes as possible + * before doing the commit. Drivers that will define a commit handler + * are usually those that need a reset after changing parameters, so + * we want to minimise the number of reset. + * A cool idea is to use a timer : at each "set" command, we re-set the + * timer, when the timer eventually fires, we call the driver. + * Hopefully, more on that later. * - * IMPORTANT : This function prevent to set and get data on the same - * IOCTL and enforce the SET/GET convention. Not doing it would be - * far too hairy... - * If you need to set and get data at the same time, please don't use - * a iw_handler but process it in your ioctl handler (i.e. use the - * old driver API). + * Also, I'm waiting to see how many people will complain about the + * netif_running(dev) test. I'm open on that one... + * Hopefully, the driver will remember to do a commit in "open()" ;-) */ -static int get_priv_descr_and_size(struct net_device *dev, unsigned int cmd, - const struct iw_priv_args **descrp) -{ - const struct iw_priv_args *descr; - int i, extra_size; - - descr = NULL; - for (i = 0; i < dev->wireless_handlers->num_private_args; i++) { - if (cmd == dev->wireless_handlers->private_args[i].cmd) { - descr = &dev->wireless_handlers->private_args[i]; - break; - } - } - - extra_size = 0; - if (descr) { - if (IW_IS_SET(cmd)) { - int offset = 0; /* For sub-ioctls */ - /* Check for sub-ioctl handler */ - if (descr->name[0] == '\0') - /* Reserve one int for sub-ioctl index */ - offset = sizeof(__u32); - - /* Size of set arguments */ - extra_size = get_priv_size(descr->set_args); - - /* Does it fits in iwr ? */ - if ((descr->set_args & IW_PRIV_SIZE_FIXED) && - ((extra_size + offset) <= IFNAMSIZ)) - extra_size = 0; - } else { - /* Size of get arguments */ - extra_size = get_priv_size(descr->get_args); - - /* Does it fits in iwr ? */ - if ((descr->get_args & IW_PRIV_SIZE_FIXED) && - (extra_size <= IFNAMSIZ)) - extra_size = 0; - } - } - *descrp = descr; - return extra_size; -} - -static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, - const struct iw_priv_args *descr, - iw_handler handler, struct net_device *dev, - struct iw_request_info *info, int extra_size) -{ - char *extra; - int err; - - /* Check what user space is giving us */ - if (IW_IS_SET(cmd)) { - if (!iwp->pointer && iwp->length != 0) - return -EFAULT; - - if (iwp->length > (descr->set_args & IW_PRIV_SIZE_MASK)) - return -E2BIG; - } else if (!iwp->pointer) - return -EFAULT; - - extra = kmalloc(extra_size, GFP_KERNEL); - if (!extra) - return -ENOMEM; - - /* If it is a SET, get all the extra data in here */ - if (IW_IS_SET(cmd) && (iwp->length != 0)) { - if (copy_from_user(extra, iwp->pointer, extra_size)) { - err = -EFAULT; - goto out; - } - } - - /* Call the handler */ - err = handler(dev, info, (union iwreq_data *) iwp, extra); - - /* If we have something to return to the user */ - if (!err && IW_IS_GET(cmd)) { - /* Adjust for the actual length if it's variable, - * avoid leaking kernel bits outside. - */ - if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) - extra_size = adjust_priv_size(descr->get_args, iwp); - - if (copy_to_user(iwp->pointer, extra, extra_size)) - err = -EFAULT; - } - -out: - kfree(extra); - return err; -} - -static int ioctl_private_call(struct net_device *dev, struct iwreq *iwr, - unsigned int cmd, struct iw_request_info *info, - iw_handler handler) +int call_commit_handler(struct net_device *dev) { - int extra_size = 0, ret = -EINVAL; - const struct iw_priv_args *descr; - - extra_size = get_priv_descr_and_size(dev, cmd, &descr); - - /* Check if we have a pointer to user space data or not. */ - if (extra_size == 0) { - /* No extra arguments. Trivial to handle */ - ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); - } else { - ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr, - handler, dev, info, extra_size); - } - - /* Call commit handler if needed and defined */ - if (ret == -EIWCOMMIT) - ret = call_commit_handler(dev); - - return ret; +#ifdef CONFIG_WIRELESS_EXT + if ((netif_running(dev)) && + (dev->wireless_handlers->standard[0] != NULL)) + /* Call the commit handler on the driver */ + return dev->wireless_handlers->standard[0](dev, NULL, + NULL, NULL); + else + return 0; /* Command completed successfully */ +#else + /* cfg80211 has no commit */ + return 0; +#endif } -/* ---------------------------------------------------------------- */ -typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *, - unsigned int, struct iw_request_info *, - iw_handler); - /* * Main IOCTl dispatcher. * Check the type of IOCTL and call the appropriate wrapper... @@ -1103,9 +882,11 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return standard(dev, iwr, cmd, info, &iw_handler_get_iwstats); +#ifdef CONFIG_WEXT_PRIV if (cmd == SIOCGIWPRIV && dev->wireless_handlers) return standard(dev, iwr, cmd, info, - &iw_handler_get_private); + iw_handler_get_private); +#endif /* Basic check */ if (!netif_device_present(dev)) @@ -1117,7 +898,7 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, /* Standard and private are not the same */ if (cmd < SIOCIWFIRSTPRIV) return standard(dev, iwr, cmd, info, handler); - else + else if (private) return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ @@ -1131,8 +912,9 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, */ static int wext_permission_check(unsigned int cmd) { - if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT) - && !capable(CAP_NET_ADMIN)) + if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || + cmd == SIOCGIWENCODEEXT) && + !capable(CAP_NET_ADMIN)) return -EPERM; return 0; @@ -1157,6 +939,50 @@ static int wext_ioctl_dispatch(struct net *net, struct ifreq *ifr, return ret; } +/* + * Wrapper to call a standard Wireless Extension handler. + * We do various checks and also take care of moving data between + * user space and kernel space. + */ +static int ioctl_standard_call(struct net_device * dev, + struct iwreq *iwr, + unsigned int cmd, + struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_ioctl_description * descr; + int ret = -EINVAL; + + /* Get the description of the IOCTL */ + if ((cmd - SIOCIWFIRST) >= standard_ioctl_num) + return -EOPNOTSUPP; + descr = &(standard_ioctl[cmd - SIOCIWFIRST]); + + /* Check if we have a pointer to user space data or not */ + if (descr->header_type != IW_HEADER_TYPE_POINT) { + + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), NULL); + + /* Generate an event to notify listeners of the change */ + if ((descr->flags & IW_DESCR_FLAG_EVENT) && + ((ret == 0) || (ret == -EIWCOMMIT))) + wireless_send_event(dev, cmd, &(iwr->u), NULL); + } else { + ret = ioctl_standard_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info); + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + /* Here, we will generate the appropriate event if needed */ + + return ret; +} + + int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, void __user *arg) { @@ -1205,43 +1031,6 @@ static int compat_standard_call(struct net_device *dev, return err; } -static int compat_private_call(struct net_device *dev, struct iwreq *iwr, - unsigned int cmd, struct iw_request_info *info, - iw_handler handler) -{ - const struct iw_priv_args *descr; - int ret, extra_size; - - extra_size = get_priv_descr_and_size(dev, cmd, &descr); - - /* Check if we have a pointer to user space data or not. */ - if (extra_size == 0) { - /* No extra arguments. Trivial to handle */ - ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); - } else { - struct compat_iw_point *iwp_compat; - struct iw_point iwp; - - iwp_compat = (struct compat_iw_point *) &iwr->u.data; - iwp.pointer = compat_ptr(iwp_compat->pointer); - iwp.length = iwp_compat->length; - iwp.flags = iwp_compat->flags; - - ret = ioctl_private_iw_point(&iwp, cmd, descr, - handler, dev, info, extra_size); - - iwp_compat->pointer = ptr_to_compat(iwp.pointer); - iwp_compat->length = iwp.length; - iwp_compat->flags = iwp.flags; - } - - /* Call commit handler if needed and defined */ - if (ret == -EIWCOMMIT) - ret = call_commit_handler(dev); - - return ret; -} - int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, unsigned long arg) { @@ -1274,502 +1063,3 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd, return ret; } #endif - -static int __net_init wext_pernet_init(struct net *net) -{ - skb_queue_head_init(&net->wext_nlevents); - return 0; -} - -static void __net_exit wext_pernet_exit(struct net *net) -{ - skb_queue_purge(&net->wext_nlevents); -} - -static struct pernet_operations wext_pernet_ops = { - .init = wext_pernet_init, - .exit = wext_pernet_exit, -}; - -static int __init wireless_nlevent_init(void) -{ - return register_pernet_subsys(&wext_pernet_ops); -} - -subsys_initcall(wireless_nlevent_init); - -/* Process events generated by the wireless layer or the driver. */ -static void wireless_nlevent_process(struct work_struct *work) -{ - struct sk_buff *skb; - struct net *net; - - rtnl_lock(); - - for_each_net(net) { - while ((skb = skb_dequeue(&net->wext_nlevents))) - rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, - GFP_KERNEL); - } - - rtnl_unlock(); -} - -static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process); - -static struct nlmsghdr *rtnetlink_ifinfo_prep(struct net_device *dev, - struct sk_buff *skb) -{ - struct ifinfomsg *r; - struct nlmsghdr *nlh; - - nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*r), 0); - if (!nlh) - return NULL; - - r = nlmsg_data(nlh); - r->ifi_family = AF_UNSPEC; - r->__ifi_pad = 0; - r->ifi_type = dev->type; - r->ifi_index = dev->ifindex; - r->ifi_flags = dev_get_flags(dev); - r->ifi_change = 0; /* Wireless changes don't affect those flags */ - - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - - return nlh; - nla_put_failure: - nlmsg_cancel(skb, nlh); - return NULL; -} - - -/* - * Main event dispatcher. Called from other parts and drivers. - * Send the event on the appropriate channels. - * May be called from interrupt context. - */ -void wireless_send_event(struct net_device * dev, - unsigned int cmd, - union iwreq_data * wrqu, - const char * extra) -{ - const struct iw_ioctl_description * descr = NULL; - int extra_len = 0; - struct iw_event *event; /* Mallocated whole event */ - int event_len; /* Its size */ - int hdr_len; /* Size of the event header */ - int wrqu_off = 0; /* Offset in wrqu */ - /* Don't "optimise" the following variable, it will crash */ - unsigned cmd_index; /* *MUST* be unsigned */ - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct nlattr *nla; -#ifdef CONFIG_COMPAT - struct __compat_iw_event *compat_event; - struct compat_iw_point compat_wrqu; - struct sk_buff *compskb; -#endif - - /* - * Nothing in the kernel sends scan events with data, be safe. - * This is necessary because we cannot fix up scan event data - * for compat, due to being contained in 'extra', but normally - * applications are required to retrieve the scan data anyway - * and no data is included in the event, this codifies that - * practice. - */ - if (WARN_ON(cmd == SIOCGIWSCAN && extra)) - extra = NULL; - - /* Get the description of the Event */ - if (cmd <= SIOCIWLAST) { - cmd_index = cmd - SIOCIWFIRST; - if (cmd_index < standard_ioctl_num) - descr = &(standard_ioctl[cmd_index]); - } else { - cmd_index = cmd - IWEVFIRST; - if (cmd_index < standard_event_num) - descr = &(standard_event[cmd_index]); - } - /* Don't accept unknown events */ - if (descr == NULL) { - /* Note : we don't return an error to the driver, because - * the driver would not know what to do about it. It can't - * return an error to the user, because the event is not - * initiated by a user request. - * The best the driver could do is to log an error message. - * We will do it ourselves instead... - */ - printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n", - dev->name, cmd); - return; - } - - /* Check extra parameters and set extra_len */ - if (descr->header_type == IW_HEADER_TYPE_POINT) { - /* Check if number of token fits within bounds */ - if (wrqu->data.length > descr->max_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length); - return; - } - if (wrqu->data.length < descr->min_tokens) { - printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length); - return; - } - /* Calculate extra_len - extra is NULL for restricted events */ - if (extra != NULL) - extra_len = wrqu->data.length * descr->token_size; - /* Always at an offset in wrqu */ - wrqu_off = IW_EV_POINT_OFF; - } - - /* Total length of the event */ - hdr_len = event_type_size[descr->header_type]; - event_len = hdr_len + extra_len; - - /* - * The problem for 64/32 bit. - * - * On 64-bit, a regular event is laid out as follows: - * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | - * | event.len | event.cmd | p a d d i n g | - * | wrqu data ... (with the correct size) | - * - * This padding exists because we manipulate event->u, - * and 'event' is not packed. - * - * An iw_point event is laid out like this instead: - * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | - * | event.len | event.cmd | p a d d i n g | - * | iwpnt.len | iwpnt.flg | p a d d i n g | - * | extra data ... - * - * The second padding exists because struct iw_point is extended, - * but this depends on the platform... - * - * On 32-bit, all the padding shouldn't be there. - */ - - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); - if (!skb) - return; - - /* Send via the RtNetlink event channel */ - nlh = rtnetlink_ifinfo_prep(dev, skb); - if (WARN_ON(!nlh)) { - kfree_skb(skb); - return; - } - - /* Add the wireless events in the netlink packet */ - nla = nla_reserve(skb, IFLA_WIRELESS, event_len); - if (!nla) { - kfree_skb(skb); - return; - } - event = nla_data(nla); - - /* Fill event - first clear to avoid data leaking */ - memset(event, 0, hdr_len); - event->len = event_len; - event->cmd = cmd; - memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); - if (extra_len) - memcpy(((char *) event) + hdr_len, extra, extra_len); - - nlmsg_end(skb, nlh); -#ifdef CONFIG_COMPAT - hdr_len = compat_event_type_size[descr->header_type]; - event_len = hdr_len + extra_len; - - compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); - if (!compskb) { - kfree_skb(skb); - return; - } - - /* Send via the RtNetlink event channel */ - nlh = rtnetlink_ifinfo_prep(dev, compskb); - if (WARN_ON(!nlh)) { - kfree_skb(skb); - kfree_skb(compskb); - return; - } - - /* Add the wireless events in the netlink packet */ - nla = nla_reserve(compskb, IFLA_WIRELESS, event_len); - if (!nla) { - kfree_skb(skb); - kfree_skb(compskb); - return; - } - compat_event = nla_data(nla); - - compat_event->len = event_len; - compat_event->cmd = cmd; - if (descr->header_type == IW_HEADER_TYPE_POINT) { - compat_wrqu.length = wrqu->data.length; - compat_wrqu.flags = wrqu->data.flags; - memcpy(&compat_event->pointer, - ((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF, - hdr_len - IW_EV_COMPAT_LCP_LEN); - if (extra_len) - memcpy(((char *) compat_event) + hdr_len, - extra, extra_len); - } else { - /* extra_len must be zero, so no if (extra) needed */ - memcpy(&compat_event->pointer, wrqu, - hdr_len - IW_EV_COMPAT_LCP_LEN); - } - - nlmsg_end(compskb, nlh); - - skb_shinfo(skb)->frag_list = compskb; -#endif - skb_queue_tail(&dev_net(dev)->wext_nlevents, skb); - schedule_work(&wireless_nlevent_work); -} -EXPORT_SYMBOL(wireless_send_event); - -/********************** ENHANCED IWSPY SUPPORT **********************/ -/* - * In the old days, the driver was handling spy support all by itself. - * Now, the driver can delegate this task to Wireless Extensions. - * It needs to use those standard spy iw_handler in struct iw_handler_def, - * push data to us via wireless_spy_update() and include struct iw_spy_data - * in its private part (and export it in net_device->wireless_data->spy_data). - * One of the main advantage of centralising spy support here is that - * it becomes much easier to improve and extend it without having to touch - * the drivers. One example is the addition of the Spy-Threshold events. - */ - -/* ---------------------------------------------------------------- */ -/* - * Return the pointer to the spy data in the driver. - * Because this is called on the Rx path via wireless_spy_update(), - * we want it to be efficient... - */ -static inline struct iw_spy_data *get_spydata(struct net_device *dev) -{ - /* This is the new way */ - if (dev->wireless_data) - return dev->wireless_data->spy_data; - return NULL; -} - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : set Spy List - */ -int iw_handler_set_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Disable spy collection while we copy the addresses. - * While we copy addresses, any call to wireless_spy_update() - * will NOP. This is OK, as anyway the addresses are changing. */ - spydata->spy_number = 0; - - /* We want to operate without locking, because wireless_spy_update() - * most likely will happen in the interrupt handler, and therefore - * have its own locking constraints and needs performance. - * The rtnl_lock() make sure we don't race with the other iw_handlers. - * This make sure wireless_spy_update() "see" that the spy list - * is temporarily disabled. */ - smp_wmb(); - - /* Are there are addresses to copy? */ - if (wrqu->data.length > 0) { - int i; - - /* Copy addresses */ - for (i = 0; i < wrqu->data.length; i++) - memcpy(spydata->spy_address[i], address[i].sa_data, - ETH_ALEN); - /* Reset stats */ - memset(spydata->spy_stat, 0, - sizeof(struct iw_quality) * IW_MAX_SPY); - } - - /* Make sure above is updated before re-enabling */ - smp_wmb(); - - /* Enable addresses */ - spydata->spy_number = wrqu->data.length; - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_spy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : get Spy List - */ -int iw_handler_get_spy(struct net_device * dev, - struct iw_request_info * info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct sockaddr * address = (struct sockaddr *) extra; - int i; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - wrqu->data.length = spydata->spy_number; - - /* Copy addresses. */ - for (i = 0; i < spydata->spy_number; i++) { - memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); - address[i].sa_family = AF_UNIX; - } - /* Copy stats to the user buffer (just after). */ - if (spydata->spy_number > 0) - memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), - spydata->spy_stat, - sizeof(struct iw_quality) * spydata->spy_number); - /* Reset updated flags. */ - for (i = 0; i < spydata->spy_number; i++) - spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; - return 0; -} -EXPORT_SYMBOL(iw_handler_get_spy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : set spy threshold - */ -int iw_handler_set_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - memcpy(&(spydata->spy_thr_low), &(threshold->low), - 2 * sizeof(struct iw_quality)); - - /* Clear flag */ - memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); - - return 0; -} -EXPORT_SYMBOL(iw_handler_set_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Standard Wireless Handler : get spy threshold - */ -int iw_handler_get_thrspy(struct net_device * dev, - struct iw_request_info *info, - union iwreq_data * wrqu, - char * extra) -{ - struct iw_spy_data * spydata = get_spydata(dev); - struct iw_thrspy * threshold = (struct iw_thrspy *) extra; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return -EOPNOTSUPP; - - /* Just do it */ - memcpy(&(threshold->low), &(spydata->spy_thr_low), - 2 * sizeof(struct iw_quality)); - - return 0; -} -EXPORT_SYMBOL(iw_handler_get_thrspy); - -/*------------------------------------------------------------------*/ -/* - * Prepare and send a Spy Threshold event - */ -static void iw_send_thrspy_event(struct net_device * dev, - struct iw_spy_data * spydata, - unsigned char * address, - struct iw_quality * wstats) -{ - union iwreq_data wrqu; - struct iw_thrspy threshold; - - /* Init */ - wrqu.data.length = 1; - wrqu.data.flags = 0; - /* Copy address */ - memcpy(threshold.addr.sa_data, address, ETH_ALEN); - threshold.addr.sa_family = ARPHRD_ETHER; - /* Copy stats */ - memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality)); - /* Copy also thresholds */ - memcpy(&(threshold.low), &(spydata->spy_thr_low), - 2 * sizeof(struct iw_quality)); - - /* Send event to user space */ - wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); -} - -/* ---------------------------------------------------------------- */ -/* - * Call for the driver to update the spy data. - * For now, the spy data is a simple array. As the size of the array is - * small, this is good enough. If we wanted to support larger number of - * spy addresses, we should use something more efficient... - */ -void wireless_spy_update(struct net_device * dev, - unsigned char * address, - struct iw_quality * wstats) -{ - struct iw_spy_data * spydata = get_spydata(dev); - int i; - int match = -1; - - /* Make sure driver is not buggy or using the old API */ - if (!spydata) - return; - - /* Update all records that match */ - for (i = 0; i < spydata->spy_number; i++) - if (!compare_ether_addr(address, spydata->spy_address[i])) { - memcpy(&(spydata->spy_stat[i]), wstats, - sizeof(struct iw_quality)); - match = i; - } - - /* Generate an event if we cross the spy threshold. - * To avoid event storms, we have a simple hysteresis : we generate - * event only when we go under the low threshold or above the - * high threshold. */ - if (match >= 0) { - if (spydata->spy_thr_under[match]) { - if (wstats->level > spydata->spy_thr_high.level) { - spydata->spy_thr_under[match] = 0; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } else { - if (wstats->level < spydata->spy_thr_low.level) { - spydata->spy_thr_under[match] = 1; - iw_send_thrspy_event(dev, spydata, - address, wstats); - } - } - } -} -EXPORT_SYMBOL(wireless_spy_update); diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c new file mode 100644 index 00000000000..a3c2277de9e --- /dev/null +++ b/net/wireless/wext-priv.c @@ -0,0 +1,248 @@ +/* + * This file implement the Wireless Extensions priv API. + * + * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> + * + * (As all part of the Linux kernel, this file is GPL) + */ +#include <linux/wireless.h> +#include <linux/netdevice.h> +#include <net/iw_handler.h> +#include <net/wext.h> + +int iw_handler_get_private(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + /* Check if the driver has something to export */ + if ((dev->wireless_handlers->num_private_args == 0) || + (dev->wireless_handlers->private_args == NULL)) + return -EOPNOTSUPP; + + /* Check if there is enough buffer up there */ + if (wrqu->data.length < dev->wireless_handlers->num_private_args) { + /* User space can't know in advance how large the buffer + * needs to be. Give it a hint, so that we can support + * any size buffer we want somewhat efficiently... */ + wrqu->data.length = dev->wireless_handlers->num_private_args; + return -E2BIG; + } + + /* Set the number of available ioctls. */ + wrqu->data.length = dev->wireless_handlers->num_private_args; + + /* Copy structure to the user buffer. */ + memcpy(extra, dev->wireless_handlers->private_args, + sizeof(struct iw_priv_args) * wrqu->data.length); + + return 0; +} + +/* Size (in bytes) of the various private data types */ +static const char iw_priv_type_size[] = { + 0, /* IW_PRIV_TYPE_NONE */ + 1, /* IW_PRIV_TYPE_BYTE */ + 1, /* IW_PRIV_TYPE_CHAR */ + 0, /* Not defined */ + sizeof(__u32), /* IW_PRIV_TYPE_INT */ + sizeof(struct iw_freq), /* IW_PRIV_TYPE_FLOAT */ + sizeof(struct sockaddr), /* IW_PRIV_TYPE_ADDR */ + 0, /* Not defined */ +}; + +static int get_priv_size(__u16 args) +{ + int num = args & IW_PRIV_SIZE_MASK; + int type = (args & IW_PRIV_TYPE_MASK) >> 12; + + return num * iw_priv_type_size[type]; +} + +static int adjust_priv_size(__u16 args, struct iw_point *iwp) +{ + int num = iwp->length; + int max = args & IW_PRIV_SIZE_MASK; + int type = (args & IW_PRIV_TYPE_MASK) >> 12; + + /* Make sure the driver doesn't goof up */ + if (max < num) + num = max; + + return num * iw_priv_type_size[type]; +} + +/* + * Wrapper to call a private Wireless Extension handler. + * We do various checks and also take care of moving data between + * user space and kernel space. + * It's not as nice and slimline as the standard wrapper. The cause + * is struct iw_priv_args, which was not really designed for the + * job we are going here. + * + * IMPORTANT : This function prevent to set and get data on the same + * IOCTL and enforce the SET/GET convention. Not doing it would be + * far too hairy... + * If you need to set and get data at the same time, please don't use + * a iw_handler but process it in your ioctl handler (i.e. use the + * old driver API). + */ +static int get_priv_descr_and_size(struct net_device *dev, unsigned int cmd, + const struct iw_priv_args **descrp) +{ + const struct iw_priv_args *descr; + int i, extra_size; + + descr = NULL; + for (i = 0; i < dev->wireless_handlers->num_private_args; i++) { + if (cmd == dev->wireless_handlers->private_args[i].cmd) { + descr = &dev->wireless_handlers->private_args[i]; + break; + } + } + + extra_size = 0; + if (descr) { + if (IW_IS_SET(cmd)) { + int offset = 0; /* For sub-ioctls */ + /* Check for sub-ioctl handler */ + if (descr->name[0] == '\0') + /* Reserve one int for sub-ioctl index */ + offset = sizeof(__u32); + + /* Size of set arguments */ + extra_size = get_priv_size(descr->set_args); + + /* Does it fits in iwr ? */ + if ((descr->set_args & IW_PRIV_SIZE_FIXED) && + ((extra_size + offset) <= IFNAMSIZ)) + extra_size = 0; + } else { + /* Size of get arguments */ + extra_size = get_priv_size(descr->get_args); + + /* Does it fits in iwr ? */ + if ((descr->get_args & IW_PRIV_SIZE_FIXED) && + (extra_size <= IFNAMSIZ)) + extra_size = 0; + } + } + *descrp = descr; + return extra_size; +} + +static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, + const struct iw_priv_args *descr, + iw_handler handler, struct net_device *dev, + struct iw_request_info *info, int extra_size) +{ + char *extra; + int err; + + /* Check what user space is giving us */ + if (IW_IS_SET(cmd)) { + if (!iwp->pointer && iwp->length != 0) + return -EFAULT; + + if (iwp->length > (descr->set_args & IW_PRIV_SIZE_MASK)) + return -E2BIG; + } else if (!iwp->pointer) + return -EFAULT; + + extra = kmalloc(extra_size, GFP_KERNEL); + if (!extra) + return -ENOMEM; + + /* If it is a SET, get all the extra data in here */ + if (IW_IS_SET(cmd) && (iwp->length != 0)) { + if (copy_from_user(extra, iwp->pointer, extra_size)) { + err = -EFAULT; + goto out; + } + } + + /* Call the handler */ + err = handler(dev, info, (union iwreq_data *) iwp, extra); + + /* If we have something to return to the user */ + if (!err && IW_IS_GET(cmd)) { + /* Adjust for the actual length if it's variable, + * avoid leaking kernel bits outside. + */ + if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) + extra_size = adjust_priv_size(descr->get_args, iwp); + + if (copy_to_user(iwp->pointer, extra, extra_size)) + err = -EFAULT; + } + +out: + kfree(extra); + return err; +} + +int ioctl_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + int extra_size = 0, ret = -EINVAL; + const struct iw_priv_args *descr; + + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + ret = ioctl_private_iw_point(&iwr->u.data, cmd, descr, + handler, dev, info, extra_size); + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + return ret; +} + +#ifdef CONFIG_COMPAT +int compat_private_call(struct net_device *dev, struct iwreq *iwr, + unsigned int cmd, struct iw_request_info *info, + iw_handler handler) +{ + const struct iw_priv_args *descr; + int ret, extra_size; + + extra_size = get_priv_descr_and_size(dev, cmd, &descr); + + /* Check if we have a pointer to user space data or not. */ + if (extra_size == 0) { + /* No extra arguments. Trivial to handle */ + ret = handler(dev, info, &(iwr->u), (char *) &(iwr->u)); + } else { + struct compat_iw_point *iwp_compat; + struct iw_point iwp; + + iwp_compat = (struct compat_iw_point *) &iwr->u.data; + iwp.pointer = compat_ptr(iwp_compat->pointer); + iwp.length = iwp_compat->length; + iwp.flags = iwp_compat->flags; + + ret = ioctl_private_iw_point(&iwp, cmd, descr, + handler, dev, info, extra_size); + + iwp_compat->pointer = ptr_to_compat(iwp.pointer); + iwp_compat->length = iwp.length; + iwp_compat->flags = iwp.flags; + } + + /* Call commit handler if needed and defined */ + if (ret == -EIWCOMMIT) + ret = call_commit_handler(dev); + + return ret; +} +#endif diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c new file mode 100644 index 00000000000..273a7f77c83 --- /dev/null +++ b/net/wireless/wext-proc.c @@ -0,0 +1,155 @@ +/* + * This file implement the Wireless Extensions proc API. + * + * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * + * (As all part of the Linux kernel, this file is GPL) + */ + +/* + * The /proc/net/wireless file is a human readable user-space interface + * exporting various wireless specific statistics from the wireless devices. + * This is the most popular part of the Wireless Extensions ;-) + * + * This interface is a pure clone of /proc/net/dev (in net/core/dev.c). + * The content of the file is basically the content of "struct iw_statistics". + */ + +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/wireless.h> +#include <linux/netdevice.h> +#include <linux/rtnetlink.h> +#include <net/iw_handler.h> +#include <net/wext.h> + + +static void wireless_seq_printf_stats(struct seq_file *seq, + struct net_device *dev) +{ + /* Get stats from the driver */ + struct iw_statistics *stats = get_wireless_stats(dev); + static struct iw_statistics nullstats = {}; + + /* show device if it's wireless regardless of current stats */ + if (!stats) { +#ifdef CONFIG_WIRELESS_EXT + if (dev->wireless_handlers) + stats = &nullstats; +#endif +#ifdef CONFIG_CFG80211 + if (dev->ieee80211_ptr) + stats = &nullstats; +#endif + } + + if (stats) { + seq_printf(seq, "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d " + "%6d %6d %6d\n", + dev->name, stats->status, stats->qual.qual, + stats->qual.updated & IW_QUAL_QUAL_UPDATED + ? '.' : ' ', + ((__s32) stats->qual.level) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), + stats->qual.updated & IW_QUAL_LEVEL_UPDATED + ? '.' : ' ', + ((__s32) stats->qual.noise) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), + stats->qual.updated & IW_QUAL_NOISE_UPDATED + ? '.' : ' ', + stats->discard.nwid, stats->discard.code, + stats->discard.fragment, stats->discard.retries, + stats->discard.misc, stats->miss.beacon); + + if (stats != &nullstats) + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; + } +} + +/* ---------------------------------------------------------------- */ +/* + * Print info for /proc/net/wireless (print all entries) + */ +static int wireless_dev_seq_show(struct seq_file *seq, void *v) +{ + might_sleep(); + + if (v == SEQ_START_TOKEN) + seq_printf(seq, "Inter-| sta-| Quality | Discarded " + "packets | Missed | WE\n" + " face | tus | link level noise | nwid " + "crypt frag retry misc | beacon | %d\n", + WIRELESS_EXT); + else + wireless_seq_printf_stats(seq, v); + return 0; +} + +static void *wireless_dev_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + loff_t off; + struct net_device *dev; + + rtnl_lock(); + if (!*pos) + return SEQ_START_TOKEN; + + off = 1; + for_each_netdev(net, dev) + if (off++ == *pos) + return dev; + return NULL; +} + +static void *wireless_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + + ++*pos; + + return v == SEQ_START_TOKEN ? + first_net_device(net) : next_net_device(v); +} + +static void wireless_dev_seq_stop(struct seq_file *seq, void *v) +{ + rtnl_unlock(); +} + +static const struct seq_operations wireless_seq_ops = { + .start = wireless_dev_seq_start, + .next = wireless_dev_seq_next, + .stop = wireless_dev_seq_stop, + .show = wireless_dev_seq_show, +}; + +static int seq_open_wireless(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &wireless_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations wireless_seq_fops = { + .owner = THIS_MODULE, + .open = seq_open_wireless, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +int wext_proc_init(struct net *net) +{ + /* Create /proc/net/wireless entry */ + if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) + return -ENOMEM; + + return 0; +} + +void wext_proc_exit(struct net *net) +{ + proc_net_remove(net, "wireless"); +} diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c new file mode 100644 index 00000000000..6dcfe65a2d1 --- /dev/null +++ b/net/wireless/wext-spy.c @@ -0,0 +1,231 @@ +/* + * This file implement the Wireless Extensions spy API. + * + * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> + * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. + * + * (As all part of the Linux kernel, this file is GPL) + */ + +#include <linux/wireless.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <net/iw_handler.h> +#include <net/arp.h> +#include <net/wext.h> + +static inline struct iw_spy_data *get_spydata(struct net_device *dev) +{ + /* This is the new way */ + if (dev->wireless_data) + return dev->wireless_data->spy_data; + return NULL; +} + +int iw_handler_set_spy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct sockaddr * address = (struct sockaddr *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Disable spy collection while we copy the addresses. + * While we copy addresses, any call to wireless_spy_update() + * will NOP. This is OK, as anyway the addresses are changing. */ + spydata->spy_number = 0; + + /* We want to operate without locking, because wireless_spy_update() + * most likely will happen in the interrupt handler, and therefore + * have its own locking constraints and needs performance. + * The rtnl_lock() make sure we don't race with the other iw_handlers. + * This make sure wireless_spy_update() "see" that the spy list + * is temporarily disabled. */ + smp_wmb(); + + /* Are there are addresses to copy? */ + if (wrqu->data.length > 0) { + int i; + + /* Copy addresses */ + for (i = 0; i < wrqu->data.length; i++) + memcpy(spydata->spy_address[i], address[i].sa_data, + ETH_ALEN); + /* Reset stats */ + memset(spydata->spy_stat, 0, + sizeof(struct iw_quality) * IW_MAX_SPY); + } + + /* Make sure above is updated before re-enabling */ + smp_wmb(); + + /* Enable addresses */ + spydata->spy_number = wrqu->data.length; + + return 0; +} +EXPORT_SYMBOL(iw_handler_set_spy); + +int iw_handler_get_spy(struct net_device * dev, + struct iw_request_info * info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct sockaddr * address = (struct sockaddr *) extra; + int i; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + wrqu->data.length = spydata->spy_number; + + /* Copy addresses. */ + for (i = 0; i < spydata->spy_number; i++) { + memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN); + address[i].sa_family = AF_UNIX; + } + /* Copy stats to the user buffer (just after). */ + if (spydata->spy_number > 0) + memcpy(extra + (sizeof(struct sockaddr) *spydata->spy_number), + spydata->spy_stat, + sizeof(struct iw_quality) * spydata->spy_number); + /* Reset updated flags. */ + for (i = 0; i < spydata->spy_number; i++) + spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; + return 0; +} +EXPORT_SYMBOL(iw_handler_get_spy); + +/*------------------------------------------------------------------*/ +/* + * Standard Wireless Handler : set spy threshold + */ +int iw_handler_set_thrspy(struct net_device * dev, + struct iw_request_info *info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct iw_thrspy * threshold = (struct iw_thrspy *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Just do it */ + memcpy(&(spydata->spy_thr_low), &(threshold->low), + 2 * sizeof(struct iw_quality)); + + /* Clear flag */ + memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under)); + + return 0; +} +EXPORT_SYMBOL(iw_handler_set_thrspy); + +/*------------------------------------------------------------------*/ +/* + * Standard Wireless Handler : get spy threshold + */ +int iw_handler_get_thrspy(struct net_device * dev, + struct iw_request_info *info, + union iwreq_data * wrqu, + char * extra) +{ + struct iw_spy_data * spydata = get_spydata(dev); + struct iw_thrspy * threshold = (struct iw_thrspy *) extra; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return -EOPNOTSUPP; + + /* Just do it */ + memcpy(&(threshold->low), &(spydata->spy_thr_low), + 2 * sizeof(struct iw_quality)); + + return 0; +} +EXPORT_SYMBOL(iw_handler_get_thrspy); + +/*------------------------------------------------------------------*/ +/* + * Prepare and send a Spy Threshold event + */ +static void iw_send_thrspy_event(struct net_device * dev, + struct iw_spy_data * spydata, + unsigned char * address, + struct iw_quality * wstats) +{ + union iwreq_data wrqu; + struct iw_thrspy threshold; + + /* Init */ + wrqu.data.length = 1; + wrqu.data.flags = 0; + /* Copy address */ + memcpy(threshold.addr.sa_data, address, ETH_ALEN); + threshold.addr.sa_family = ARPHRD_ETHER; + /* Copy stats */ + memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality)); + /* Copy also thresholds */ + memcpy(&(threshold.low), &(spydata->spy_thr_low), + 2 * sizeof(struct iw_quality)); + + /* Send event to user space */ + wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold); +} + +/* ---------------------------------------------------------------- */ +/* + * Call for the driver to update the spy data. + * For now, the spy data is a simple array. As the size of the array is + * small, this is good enough. If we wanted to support larger number of + * spy addresses, we should use something more efficient... + */ +void wireless_spy_update(struct net_device * dev, + unsigned char * address, + struct iw_quality * wstats) +{ + struct iw_spy_data * spydata = get_spydata(dev); + int i; + int match = -1; + + /* Make sure driver is not buggy or using the old API */ + if (!spydata) + return; + + /* Update all records that match */ + for (i = 0; i < spydata->spy_number; i++) + if (!compare_ether_addr(address, spydata->spy_address[i])) { + memcpy(&(spydata->spy_stat[i]), wstats, + sizeof(struct iw_quality)); + match = i; + } + + /* Generate an event if we cross the spy threshold. + * To avoid event storms, we have a simple hysteresis : we generate + * event only when we go under the low threshold or above the + * high threshold. */ + if (match >= 0) { + if (spydata->spy_thr_under[match]) { + if (wstats->level > spydata->spy_thr_high.level) { + spydata->spy_thr_under[match] = 0; + iw_send_thrspy_event(dev, spydata, + address, wstats); + } + } else { + if (wstats->level < spydata->spy_thr_low.level) { + spydata->spy_thr_under[match] = 1; + iw_send_thrspy_event(dev, spydata, + address, wstats); + } + } + } +} +EXPORT_SYMBOL(wireless_spy_update); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 7fa9c7ad3d3..e3219e4cd04 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -415,6 +415,7 @@ static int x25_setsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; int rc = -ENOPROTOOPT; + lock_kernel(); if (level != SOL_X25 || optname != X25_QBITINCL) goto out; @@ -429,6 +430,7 @@ static int x25_setsockopt(struct socket *sock, int level, int optname, x25_sk(sk)->qbitincl = !!opt; rc = 0; out: + unlock_kernel(); return rc; } @@ -438,6 +440,7 @@ static int x25_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; int val, len, rc = -ENOPROTOOPT; + lock_kernel(); if (level != SOL_X25 || optname != X25_QBITINCL) goto out; @@ -458,6 +461,7 @@ static int x25_getsockopt(struct socket *sock, int level, int optname, val = x25_sk(sk)->qbitincl; rc = copy_to_user(optval, &val, len) ? -EFAULT : 0; out: + unlock_kernel(); return rc; } @@ -466,12 +470,14 @@ static int x25_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; int rc = -EOPNOTSUPP; + lock_kernel(); if (sk->sk_state != TCP_LISTEN) { memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; rc = 0; } + unlock_kernel(); return rc; } @@ -501,13 +507,14 @@ out: return sk; } -static int x25_create(struct net *net, struct socket *sock, int protocol) +static int x25_create(struct net *net, struct socket *sock, int protocol, + int kern) { struct sock *sk; struct x25_sock *x25; int rc = -ESOCKTNOSUPPORT; - if (net != &init_net) + if (!net_eq(net, &init_net)) return -EAFNOSUPPORT; if (sock->type != SOCK_SEQPACKET || protocol) @@ -597,6 +604,7 @@ static int x25_release(struct socket *sock) struct sock *sk = sock->sk; struct x25_sock *x25; + lock_kernel(); if (!sk) goto out; @@ -627,6 +635,7 @@ static int x25_release(struct socket *sock) sock_orphan(sk); out: + unlock_kernel(); return 0; } @@ -634,18 +643,23 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; + int rc = 0; + lock_kernel(); if (!sock_flag(sk, SOCK_ZAPPED) || addr_len != sizeof(struct sockaddr_x25) || - addr->sx25_family != AF_X25) - return -EINVAL; + addr->sx25_family != AF_X25) { + rc = -EINVAL; + goto out; + } x25_sk(sk)->source_addr = addr->sx25_addr; x25_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); - - return 0; +out: + unlock_kernel(); + return rc; } static int x25_wait_for_connection_establishment(struct sock *sk) @@ -686,6 +700,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, struct x25_route *rt; int rc = 0; + lock_kernel(); lock_sock(sk); if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { sock->state = SS_CONNECTED; @@ -763,6 +778,7 @@ out_put_route: x25_route_put(rt); out: release_sock(sk); + unlock_kernel(); return rc; } @@ -802,6 +818,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags) struct sk_buff *skb; int rc = -EINVAL; + lock_kernel(); if (!sk || sk->sk_state != TCP_LISTEN) goto out; @@ -829,6 +846,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags) out2: release_sock(sk); out: + unlock_kernel(); return rc; } @@ -838,10 +856,14 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr, struct sockaddr_x25 *sx25 = (struct sockaddr_x25 *)uaddr; struct sock *sk = sock->sk; struct x25_sock *x25 = x25_sk(sk); + int rc = 0; + lock_kernel(); if (peer) { - if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; + if (sk->sk_state != TCP_ESTABLISHED) { + rc = -ENOTCONN; + goto out; + } sx25->sx25_addr = x25->dest_addr; } else sx25->sx25_addr = x25->source_addr; @@ -849,7 +871,21 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr, sx25->sx25_family = AF_X25; *uaddr_len = sizeof(*sx25); - return 0; +out: + unlock_kernel(); + return rc; +} + +static unsigned int x25_datagram_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + int rc; + + lock_kernel(); + rc = datagram_poll(file, sock, wait); + unlock_kernel(); + + return rc; } int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, @@ -1002,6 +1038,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, size_t size; int qbit = 0, rc = -EINVAL; + lock_kernel(); if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT)) goto out; @@ -1166,6 +1203,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, release_sock(sk); rc = len; out: + unlock_kernel(); return rc; out_kfree_skb: kfree_skb(skb); @@ -1186,6 +1224,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int rc = -ENOTCONN; + lock_kernel(); /* * This works for seqpacket too. The receiver has ordered the queue for * us! We do one quick check first though @@ -1259,6 +1298,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, out_free_dgram: skb_free_datagram(sk, skb); out: + unlock_kernel(); return rc; } @@ -1270,6 +1310,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; int rc; + lock_kernel(); switch (cmd) { case TIOCOUTQ: { int amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); @@ -1363,7 +1404,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) facilities.throughput > 0xDD) break; if (facilities.reverse && - (facilities.reverse | 0x81)!= 0x81) + (facilities.reverse & 0x81) != 0x81) break; x25->facilities = facilities; rc = 0; @@ -1430,6 +1471,17 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + case SIOCX25SCAUSEDIAG: { + struct x25_causediag causediag; + rc = -EFAULT; + if (copy_from_user(&causediag, argp, sizeof(causediag))) + break; + x25->causediag = causediag; + rc = 0; + break; + + } + case SIOCX25SCUDMATCHLEN: { struct x25_subaddr sub_addr; rc = -EINVAL; @@ -1472,11 +1524,12 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -ENOIOCTLCMD; break; } + unlock_kernel(); return rc; } -static struct net_proto_family x25_family_ops = { +static const struct net_proto_family x25_family_ops = { .family = AF_X25, .create = x25_create, .owner = THIS_MODULE, @@ -1542,15 +1595,19 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, break; case SIOCGSTAMP: rc = -EINVAL; + lock_kernel(); if (sk) rc = compat_sock_get_timestamp(sk, (struct timeval __user*)argp); + unlock_kernel(); break; case SIOCGSTAMPNS: rc = -EINVAL; + lock_kernel(); if (sk) rc = compat_sock_get_timestampns(sk, (struct timespec __user*)argp); + unlock_kernel(); break; case SIOCGIFADDR: case SIOCSIFADDR: @@ -1569,16 +1626,22 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; + lock_kernel(); rc = x25_route_ioctl(cmd, argp); + unlock_kernel(); break; case SIOCX25GSUBSCRIP: + lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); + unlock_kernel(); break; case SIOCX25SSUBSCRIP: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; + lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); + unlock_kernel(); break; case SIOCX25GFACILITIES: case SIOCX25SFACILITIES: @@ -1587,6 +1650,7 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, case SIOCX25GCALLUSERDATA: case SIOCX25SCALLUSERDATA: case SIOCX25GCAUSEDIAG: + case SIOCX25SCAUSEDIAG: case SIOCX25SCUDMATCHLEN: case SIOCX25CALLACCPTAPPRV: case SIOCX25SENDCALLACCPT: @@ -1600,7 +1664,7 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, } #endif -static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { +static const struct proto_ops x25_proto_ops = { .family = AF_X25, .owner = THIS_MODULE, .release = x25_release, @@ -1609,7 +1673,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { .socketpair = sock_no_socketpair, .accept = x25_accept, .getname = x25_getname, - .poll = datagram_poll, + .poll = x25_datagram_poll, .ioctl = x25_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_x25_ioctl, @@ -1624,8 +1688,6 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { .sendpage = sock_no_sendpage, }; -SOCKOPS_WRAP(x25_proto, AF_X25); - static struct packet_type x25_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_X25), .func = x25_lapb_receive_frame, @@ -1659,20 +1721,31 @@ static int __init x25_init(void) if (rc != 0) goto out; - sock_register(&x25_family_ops); + rc = sock_register(&x25_family_ops); + if (rc != 0) + goto out_proto; dev_add_pack(&x25_packet_type); - register_netdevice_notifier(&x25_dev_notifier); + rc = register_netdevice_notifier(&x25_dev_notifier); + if (rc != 0) + goto out_sock; printk(KERN_INFO "X.25 for Linux Version 0.2\n"); -#ifdef CONFIG_SYSCTL x25_register_sysctl(); -#endif - x25_proc_init(); + rc = x25_proc_init(); + if (rc != 0) + goto out_dev; out: return rc; +out_dev: + unregister_netdevice_notifier(&x25_dev_notifier); +out_sock: + sock_unregister(AF_X25); +out_proto: + proto_unregister(&x25_proto); + goto out; } module_init(x25_init); @@ -1682,9 +1755,7 @@ static void __exit x25_exit(void) x25_link_free(); x25_route_free(); -#ifdef CONFIG_SYSCTL x25_unregister_sysctl(); -#endif unregister_netdevice_notifier(&x25_dev_notifier); diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index a5d3416522d..d2efd29f434 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -19,62 +19,51 @@ static struct ctl_table_header *x25_table_header; static struct ctl_table x25_table[] = { { - .ctl_name = NET_X25_RESTART_REQUEST_TIMEOUT, .procname = "restart_request_timeout", .data = &sysctl_x25_restart_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_CALL_REQUEST_TIMEOUT, .procname = "call_request_timeout", .data = &sysctl_x25_call_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_RESET_REQUEST_TIMEOUT, .procname = "reset_request_timeout", .data = &sysctl_x25_reset_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_CLEAR_REQUEST_TIMEOUT, .procname = "clear_request_timeout", .data = &sysctl_x25_clear_request_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_ACK_HOLD_BACK_TIMEOUT, .procname = "acknowledgement_hold_back_timeout", .data = &sysctl_x25_ack_holdback_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .strategy = sysctl_intvec, .extra1 = &min_timer, .extra2 = &max_timer, }, { - .ctl_name = NET_X25_FORWARD, .procname = "x25_forward", .data = &sysctl_x25_forward, .maxlen = sizeof(int), @@ -85,8 +74,8 @@ static struct ctl_table x25_table[] = { }; static struct ctl_path x25_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "x25", .ctl_name = NET_X25, }, + { .procname = "net", }, + { .procname = "x25", }, { } }; diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 7d7c3abf38b..96d92278354 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -114,7 +114,7 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp /* * Copy any Call User Data. */ - if (skb->len >= 0) { + if (skb->len > 0) { skb_copy_from_linear_data(skb, x25->calluserdata.cuddata, skb->len); diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c index 2c999ccf504..b95fae9ab39 100644 --- a/net/x25/x25_route.c +++ b/net/x25/x25_route.c @@ -136,8 +136,10 @@ struct net_device *x25_dev_get(char *devname) #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) && dev->type != ARPHRD_ETHER #endif - ))) + ))){ dev_put(dev); + dev = NULL; + } return dev; } @@ -190,7 +192,7 @@ int x25_route_ioctl(unsigned int cmd, void __user *arg) goto out; rc = -EINVAL; - if (rt.sigdigits < 0 || rt.sigdigits > 15) + if (rt.sigdigits > 15) goto out; dev = x25_dev_get(rt.device); diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 511a5986af3..352b32d216f 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -225,6 +225,12 @@ void x25_write_internal(struct sock *sk, int frametype) break; case X25_CLEAR_REQUEST: + dptr = skb_put(skb, 3); + *dptr++ = frametype; + *dptr++ = x25->causediag.cause; + *dptr++ = x25->causediag.diagnostic; + break; + case X25_RESET_REQUEST: dptr = skb_put(skb, 3); *dptr++ = frametype; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index faf54c6bf96..743c0134a6a 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -200,6 +200,40 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { + .name = "hmac(sha384)", + + .uinfo = { + .auth = { + .icv_truncbits = 192, + .icv_fullbits = 384, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_384HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 384, + .sadb_alg_maxbits = 384 + } +}, +{ + .name = "hmac(sha512)", + + .uinfo = { + .auth = { + .icv_truncbits = 256, + .icv_fullbits = 512, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_SHA2_512HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 512, + .sadb_alg_maxbits = 512 + } +}, +{ .name = "hmac(rmd160)", .compat = "rmd160", @@ -365,6 +399,7 @@ static struct xfrm_algo_desc ealg_list[] = { }, { .name = "cbc(camellia)", + .compat = "camellia", .uinfo = { .encr = { @@ -689,84 +724,6 @@ int xfrm_count_enc_supported(void) } EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); -/* Move to common area: it is shared with AH. */ - -int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, - int offset, int len, icv_update_fn_t icv_update) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - struct sk_buff *frag_iter; - struct scatterlist sg; - int err; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - - sg_init_one(&sg, skb->data + offset, copy); - - err = icv_update(desc, &sg, copy); - if (unlikely(err)) - return err; - - if ((len -= copy) == 0) - return 0; - offset += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - - sg_init_table(&sg, 1); - sg_set_page(&sg, frag->page, copy, - frag->page_offset + offset-start); - - err = icv_update(desc, &sg, copy); - if (unlikely(err)) - return err; - - if (!(len -= copy)) - return 0; - offset += copy; - } - start = end; - } - - skb_walk_frags(skb, frag_iter) { - int end; - - WARN_ON(start > offset + len); - - end = start + frag_iter->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - err = skb_icv_walk(frag_iter, desc, offset-start, - copy, icv_update); - if (unlikely(err)) - return err; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; - } - BUG_ON(len); - return 0; -} -EXPORT_SYMBOL_GPL(skb_icv_walk); - #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f2f7c638083..d847f1a52b4 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -21,6 +21,9 @@ #include <linux/cache.h> #include <linux/audit.h> #include <asm/uaccess.h> +#include <linux/ktime.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> #include "xfrm_hash.h" @@ -352,7 +355,7 @@ static void xfrm_put_mode(struct xfrm_mode *mode) static void xfrm_state_gc_destroy(struct xfrm_state *x) { - del_timer_sync(&x->timer); + tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); @@ -398,9 +401,10 @@ static inline unsigned long make_jiffies(long secs) return secs*HZ; } -static void xfrm_timer_handler(unsigned long data) +static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) { - struct xfrm_state *x = (struct xfrm_state*)data; + struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); + struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); struct net *net = xs_net(x); unsigned long now = get_seconds(); long next = LONG_MAX; @@ -451,8 +455,9 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX) - mod_timer(&x->timer, jiffies + make_jiffies(next)); + if (next != LONG_MAX){ + tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); + } goto out; @@ -474,6 +479,7 @@ expired: out: spin_unlock(&x->lock); + return HRTIMER_NORESTART; } static void xfrm_replay_timer_handler(unsigned long data); @@ -492,7 +498,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); - setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x); + tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS); setup_timer(&x->rtimer, xfrm_replay_timer_handler, (unsigned long)x); x->curlft.add_time = get_seconds(); @@ -843,8 +849,7 @@ found: hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; - x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; - add_timer(&x->timer); + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { @@ -921,7 +926,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - mod_timer(&x->timer, jiffies + HZ); + tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); @@ -1019,8 +1024,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.reqid = reqid; x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; - add_timer(&x->timer); + tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); h = xfrm_src_hash(net, daddr, saddr, family); @@ -1110,7 +1114,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) x->props.saddr = orig->props.saddr; if (orig->aalg) { - x->aalg = xfrm_algo_clone(orig->aalg); + x->aalg = xfrm_algo_auth_clone(orig->aalg); if (!x->aalg) goto error; } @@ -1300,7 +1304,7 @@ out: memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - mod_timer(&x1->timer, jiffies + HZ); + tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -1325,7 +1329,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - mod_timer(&x->timer, jiffies); + tasklet_hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL); return -EINVAL; } diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 2e6ffb66f06..2e221f2cad7 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -13,28 +13,24 @@ static void __xfrm_sysctl_init(struct net *net) #ifdef CONFIG_SYSCTL static struct ctl_table xfrm_table[] = { { - .ctl_name = NET_CORE_AEVENT_ETIME, .procname = "xfrm_aevent_etime", .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_dointvec }, { - .ctl_name = NET_CORE_AEVENT_RSEQTH, .procname = "xfrm_aevent_rseqth", .maxlen = sizeof(u32), .mode = 0644, .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_larval_drop", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, { - .ctl_name = CTL_UNNUMBERED, .procname = "xfrm_acq_expires", .maxlen = sizeof(int), .mode = 0644, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b95a2d64eb5..1ada6186933 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -62,6 +62,22 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; } +static int verify_auth_trunc(struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_ALG_AUTH_TRUNC]; + struct xfrm_algo_auth *algp; + + if (!rt) + return 0; + + algp = nla_data(rt); + if (nla_len(rt) < xfrm_alg_auth_len(algp)) + return -EINVAL; + + algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0'; + return 0; +} + static int verify_aead(struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_ALG_AEAD]; @@ -128,7 +144,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if (!attrs[XFRMA_ALG_AUTH] || + if ((!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC]) || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP]) @@ -139,10 +156,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if (attrs[XFRMA_ALG_COMP]) goto out; if (!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC] && !attrs[XFRMA_ALG_CRYPT] && !attrs[XFRMA_ALG_AEAD]) goto out; if ((attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT]) && attrs[XFRMA_ALG_AEAD]) goto out; @@ -152,6 +171,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if (!attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT]) goto out; break; @@ -161,6 +181,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case IPPROTO_ROUTING: if (attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || @@ -176,6 +197,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, if ((err = verify_aead(attrs))) goto out; + if ((err = verify_auth_trunc(attrs))) + goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) @@ -229,6 +252,66 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return 0; } +static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo *ualg; + struct xfrm_algo_auth *p; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + *props = algo->desc.sadb_alg_id; + + p = kmalloc(sizeof(*p) + (ualg->alg_key_len + 7) / 8, GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + p->alg_key_len = ualg->alg_key_len; + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + memcpy(p->alg_key, ualg->alg_key, (ualg->alg_key_len + 7) / 8); + + *algpp = p; + return 0; +} + +static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, + struct nlattr *rta) +{ + struct xfrm_algo_auth *p, *ualg; + struct xfrm_algo_desc *algo; + + if (!rta) + return 0; + + ualg = nla_data(rta); + + algo = xfrm_aalg_get_byname(ualg->alg_name, 1); + if (!algo) + return -ENOSYS; + if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) + return -EINVAL; + *props = algo->desc.sadb_alg_id; + + p = kmemdup(ualg, xfrm_alg_auth_len(ualg), GFP_KERNEL); + if (!p) + return -ENOMEM; + + strcpy(p->alg_name, algo->name); + if (!p->alg_trunc_len) + p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; + + *algpp = p; + return 0; +} + static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, struct nlattr *rta) { @@ -332,10 +415,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if ((err = attach_aead(&x->aead, &x->props.ealgo, attrs[XFRMA_ALG_AEAD]))) goto error; - if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, - xfrm_aalg_get_byname, - attrs[XFRMA_ALG_AUTH]))) + if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH_TRUNC]))) goto error; + if (!x->props.aalgo) { + if ((err = attach_auth(&x->aalg, &x->props.aalgo, + attrs[XFRMA_ALG_AUTH]))) + goto error; + } if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, xfrm_ealg_get_byname, attrs[XFRMA_ALG_CRYPT]))) @@ -548,6 +635,24 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) return 0; } +static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) +{ + struct xfrm_algo *algo; + struct nlattr *nla; + + nla = nla_reserve(skb, XFRMA_ALG_AUTH, + sizeof(*algo) + (auth->alg_key_len + 7) / 8); + if (!nla) + return -EMSGSIZE; + + algo = nla_data(nla); + strcpy(algo->alg_name, auth->alg_name); + memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); + algo->alg_key_len = auth->alg_key_len; + + return 0; +} + /* Don't change this without updating xfrm_sa_len! */ static int copy_to_user_state_extra(struct xfrm_state *x, struct xfrm_usersa_info *p, @@ -563,8 +668,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (x->aead) NLA_PUT(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); - if (x->aalg) - NLA_PUT(skb, XFRMA_ALG_AUTH, xfrm_alg_len(x->aalg), x->aalg); + if (x->aalg) { + if (copy_to_user_auth(x->aalg, skb)) + goto nla_put_failure; + + NLA_PUT(skb, XFRMA_ALG_AUTH_TRUNC, + xfrm_alg_auth_len(x->aalg), x->aalg); + } if (x->ealg) NLA_PUT(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); if (x->calg) @@ -2117,8 +2227,11 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) size_t l = 0; if (x->aead) l += nla_total_size(aead_len(x->aead)); - if (x->aalg) - l += nla_total_size(xfrm_alg_len(x->aalg)); + if (x->aalg) { + l += nla_total_size(sizeof(struct xfrm_algo) + + (x->aalg->alg_key_len + 7) / 8); + l += nla_total_size(xfrm_alg_auth_len(x->aalg)); + } if (x->ealg) l += nla_total_size(xfrm_alg_len(x->ealg)); if (x->calg) @@ -2608,22 +2721,24 @@ static int __net_init xfrm_user_net_init(struct net *net) xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; + net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ rcu_assign_pointer(net->xfrm.nlsk, nlsk); return 0; } -static void __net_exit xfrm_user_net_exit(struct net *net) +static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) { - struct sock *nlsk = net->xfrm.nlsk; - - rcu_assign_pointer(net->xfrm.nlsk, NULL); - synchronize_rcu(); - netlink_kernel_release(nlsk); + struct net *net; + list_for_each_entry(net, net_exit_list, exit_list) + rcu_assign_pointer(net->xfrm.nlsk, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->xfrm.nlsk_stash); } static struct pernet_operations xfrm_user_net_ops = { - .init = xfrm_user_net_init, - .exit = xfrm_user_net_exit, + .init = xfrm_user_net_init, + .exit_batch = xfrm_user_net_exit, }; static int __init xfrm_user_init(void) |