From ab73b751303bc60d7d9fba875c958dedfe14754c Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 10 Apr 2012 12:51:52 +0200 Subject: NFC: Export LLCP general bytes getter Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index b7ca4a2a1d7..3116f923f60 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -188,6 +188,7 @@ struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp); int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gt, u8 gt_len); +u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len); int nfc_targets_found(struct nfc_dev *dev, struct nfc_target *targets, int ntargets); -- cgit v1.2.3-70-g09d2 From fe7c580073280c15bb4eb4f82bf20dddc1a68383 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Tue, 15 May 2012 15:57:06 +0200 Subject: NFC: Add target mode protocols to the polling loop startup routine Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533.c | 39 +++++++++++++++++++++++++++++---------- drivers/nfc/pn544_hci.c | 10 ++++++---- include/linux/nfc.h | 4 ++++ include/net/nfc/hci.h | 3 ++- include/net/nfc/nfc.h | 3 ++- include/net/nfc/shdlc.h | 3 ++- net/nfc/core.c | 10 +++++----- net/nfc/hci/core.c | 5 +++-- net/nfc/hci/shdlc.c | 6 ++++-- net/nfc/nci/core.c | 7 ++++--- net/nfc/netlink.c | 19 +++++++++++++++---- net/nfc/nfc.h | 2 +- 12 files changed, 77 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index 19110f0eb15..38a523c6213 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -1078,27 +1078,23 @@ stop_poll: return 0; } -static int pn533_start_poll(struct nfc_dev *nfc_dev, u32 protocols) +static int pn533_init_target(struct nfc_dev *nfc_dev, u32 protocols) +{ + return 0; +} + +static int pn533_start_im_poll(struct nfc_dev *nfc_dev, u32 protocols) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_poll_modulations *start_mod; int rc; - nfc_dev_dbg(&dev->interface->dev, "%s - protocols=0x%x", __func__, - protocols); - if (dev->poll_mod_count) { nfc_dev_err(&dev->interface->dev, "Polling operation already" " active"); return -EBUSY; } - if (dev->tgt_active_prot) { - nfc_dev_err(&dev->interface->dev, "Cannot poll with a target" - " already activated"); - return -EBUSY; - } - pn533_poll_create_mod_list(dev, protocols); if (!dev->poll_mod_count) { @@ -1135,6 +1131,29 @@ error: return rc; } +static int pn533_start_poll(struct nfc_dev *nfc_dev, + u32 im_protocols, u32 tm_protocols) +{ + struct pn533 *dev = nfc_get_drvdata(nfc_dev); + + nfc_dev_dbg(&dev->interface->dev, + "%s: im protocols 0x%x tm protocols 0x%x", + __func__, im_protocols, tm_protocols); + + if (dev->tgt_active_prot) { + nfc_dev_err(&dev->interface->dev, + "Cannot poll with a target already activated"); + return -EBUSY; + } + + if (!tm_protocols) + return pn533_start_im_poll(nfc_dev, im_protocols); + else if (!im_protocols) + return pn533_init_target(nfc_dev, tm_protocols); + else + return -EINVAL; +} + static void pn533_stop_poll(struct nfc_dev *nfc_dev) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); diff --git a/drivers/nfc/pn544_hci.c b/drivers/nfc/pn544_hci.c index 281f18c2fb8..457eac35dc7 100644 --- a/drivers/nfc/pn544_hci.c +++ b/drivers/nfc/pn544_hci.c @@ -576,7 +576,8 @@ static int pn544_hci_xmit(struct nfc_shdlc *shdlc, struct sk_buff *skb) return pn544_hci_i2c_write(client, skb->data, skb->len); } -static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, u32 protocols) +static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, + u32 im_protocols, u32 tm_protocols) { struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc); u8 phases = 0; @@ -584,7 +585,8 @@ static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, u32 protocols) u8 duration[2]; u8 activated; - pr_info(DRIVER_DESC ": %s protocols = %d\n", __func__, protocols); + pr_info(DRIVER_DESC ": %s protocols 0x%x 0x%x\n", + __func__, im_protocols, tm_protocols); r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, NFC_HCI_EVT_END_OPERATION, NULL, 0); @@ -604,10 +606,10 @@ static int pn544_hci_start_poll(struct nfc_shdlc *shdlc, u32 protocols) if (r < 0) return r; - if (protocols & (NFC_PROTO_ISO14443_MASK | NFC_PROTO_MIFARE_MASK | + if (im_protocols & (NFC_PROTO_ISO14443_MASK | NFC_PROTO_MIFARE_MASK | NFC_PROTO_JEWEL_MASK)) phases |= 1; /* Type A */ - if (protocols & NFC_PROTO_FELICA_MASK) { + if (im_protocols & NFC_PROTO_FELICA_MASK) { phases |= (1 << 2); /* Type F 212 */ phases |= (1 << 3); /* Type F 424 */ } diff --git a/include/linux/nfc.h b/include/linux/nfc.h index 0ae9b5857c8..548715881fb 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -94,6 +94,8 @@ enum nfc_commands { * @NFC_ATTR_TARGET_SENSF_RES: NFC-F targets extra information, max 18 bytes * @NFC_ATTR_COMM_MODE: Passive or active mode * @NFC_ATTR_RF_MODE: Initiator or target + * @NFC_ATTR_IM_PROTOCOLS: Initiator mode protocols to poll for + * @NFC_ATTR_TM_PROTOCOLS: Target mode protocols to listen for */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -109,6 +111,8 @@ enum nfc_attrs { NFC_ATTR_COMM_MODE, NFC_ATTR_RF_MODE, NFC_ATTR_DEVICE_POWERED, + NFC_ATTR_IM_PROTOCOLS, + NFC_ATTR_TM_PROTOCOLS, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index 4467c946085..e30e6a86971 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -31,7 +31,8 @@ struct nfc_hci_ops { void (*close) (struct nfc_hci_dev *hdev); int (*hci_ready) (struct nfc_hci_dev *hdev); int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb); - int (*start_poll) (struct nfc_hci_dev *hdev, u32 protocols); + int (*start_poll) (struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols); int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate, struct nfc_target *target); int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 3116f923f60..97aa0e81aa8 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -53,7 +53,8 @@ struct nfc_target; struct nfc_ops { int (*dev_up)(struct nfc_dev *dev); int (*dev_down)(struct nfc_dev *dev); - int (*start_poll)(struct nfc_dev *dev, u32 protocols); + int (*start_poll)(struct nfc_dev *dev, + u32 im_protocols, u32 tm_protocols); void (*stop_poll)(struct nfc_dev *dev); int (*dep_link_up)(struct nfc_dev *dev, struct nfc_target *target, u8 comm_mode, u8 *gb, size_t gb_len); diff --git a/include/net/nfc/shdlc.h b/include/net/nfc/shdlc.h index ab06afd462d..35e930d2f63 100644 --- a/include/net/nfc/shdlc.h +++ b/include/net/nfc/shdlc.h @@ -27,7 +27,8 @@ struct nfc_shdlc_ops { void (*close) (struct nfc_shdlc *shdlc); int (*hci_ready) (struct nfc_shdlc *shdlc); int (*xmit) (struct nfc_shdlc *shdlc, struct sk_buff *skb); - int (*start_poll) (struct nfc_shdlc *shdlc, u32 protocols); + int (*start_poll) (struct nfc_shdlc *shdlc, + u32 im_protocols, u32 tm_protocols); int (*target_from_gate) (struct nfc_shdlc *shdlc, u8 gate, struct nfc_target *target); int (*complete_target_discovered) (struct nfc_shdlc *shdlc, u8 gate, diff --git a/net/nfc/core.c b/net/nfc/core.c index f5a43f701a9..c83717bfcb8 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -121,14 +121,14 @@ error: * The device remains polling for targets until a target is found or * the nfc_stop_poll function is called. */ -int nfc_start_poll(struct nfc_dev *dev, u32 protocols) +int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) { int rc; - pr_debug("dev_name=%s protocols=0x%x\n", - dev_name(&dev->dev), protocols); + pr_debug("dev_name %s initiator protocols 0x%x target protocols 0x%x\n", + dev_name(&dev->dev), im_protocols, tm_protocols); - if (!protocols) + if (!im_protocols && !tm_protocols) return -EINVAL; device_lock(&dev->dev); @@ -143,7 +143,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 protocols) goto error; } - rc = dev->ops->start_poll(dev, protocols); + rc = dev->ops->start_poll(dev, im_protocols, tm_protocols); if (!rc) dev->polling = true; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index e1a640d2b58..281f3a3bec0 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -481,12 +481,13 @@ static int hci_dev_down(struct nfc_dev *nfc_dev) return 0; } -static int hci_start_poll(struct nfc_dev *nfc_dev, u32 protocols) +static int hci_start_poll(struct nfc_dev *nfc_dev, + u32 im_protocols, u32 tm_protocols) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); if (hdev->ops->start_poll) - return hdev->ops->start_poll(hdev, protocols); + return hdev->ops->start_poll(hdev, im_protocols, tm_protocols); else return nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE, NFC_HCI_EVT_READER_REQUESTED, NULL, 0); diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 5665dc6d893..6b836e6242b 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -765,14 +765,16 @@ static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb) return 0; } -static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev, u32 protocols) +static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev, + u32 im_protocols, u32 tm_protocols) { struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev); pr_debug("\n"); if (shdlc->ops->start_poll) - return shdlc->ops->start_poll(shdlc, protocols); + return shdlc->ops->start_poll(shdlc, + im_protocols, tm_protocols); return 0; } diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index d560e6f1307..0f718982f80 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -387,7 +387,8 @@ static int nci_dev_down(struct nfc_dev *nfc_dev) return nci_close_device(ndev); } -static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 protocols) +static int nci_start_poll(struct nfc_dev *nfc_dev, + __u32 im_protocols, __u32 tm_protocols) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; @@ -413,11 +414,11 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, __u32 protocols) return -EBUSY; } - rc = nci_request(ndev, nci_rf_discover_req, protocols, + rc = nci_request(ndev, nci_rf_discover_req, im_protocols, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); if (!rc) - ndev->poll_prots = protocols; + ndev->poll_prots = im_protocols; return rc; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 581d419083a..a18fd56798f 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -49,6 +49,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 }, [NFC_ATTR_RF_MODE] = { .type = NLA_U8 }, [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, + [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 }, + [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, }; static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, @@ -519,16 +521,25 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) struct nfc_dev *dev; int rc; u32 idx; - u32 protocols; + u32 im_protocols = 0, tm_protocols = 0; pr_debug("Poll start\n"); if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || - !info->attrs[NFC_ATTR_PROTOCOLS]) + ((!info->attrs[NFC_ATTR_IM_PROTOCOLS] && + !info->attrs[NFC_ATTR_PROTOCOLS]) && + !info->attrs[NFC_ATTR_TM_PROTOCOLS])) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); - protocols = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); + + if (info->attrs[NFC_ATTR_TM_PROTOCOLS]) + tm_protocols = nla_get_u32(info->attrs[NFC_ATTR_TM_PROTOCOLS]); + else if (info->attrs[NFC_ATTR_PROTOCOLS]) + tm_protocols = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]); + + if (info->attrs[NFC_ATTR_IM_PROTOCOLS]) + im_protocols = nla_get_u32(info->attrs[NFC_ATTR_IM_PROTOCOLS]); dev = nfc_get_device(idx); if (!dev) @@ -536,7 +547,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) mutex_lock(&dev->genl_data.genl_data_mutex); - rc = nfc_start_poll(dev, protocols); + rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) dev->genl_data.poll_req_pid = info->snd_pid; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 3dd4232ae66..7d9708f2a66 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -158,7 +158,7 @@ int nfc_dev_up(struct nfc_dev *dev); int nfc_dev_down(struct nfc_dev *dev); -int nfc_start_poll(struct nfc_dev *dev, u32 protocols); +int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols); int nfc_stop_poll(struct nfc_dev *dev); -- cgit v1.2.3-70-g09d2 From fc40a8c1a06ab7db45da790693dd9802612a055c Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Fri, 1 Jun 2012 13:21:13 +0200 Subject: NFC: Add target mode activation netlink event Userspace gets a netlink event upon target mode activation. The LLCP layer is also signaled when we get an ATR_REQ in order to get the remote general bytes. Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533.c | 27 ++++++++++++++++++++-- include/linux/nfc.h | 7 ++++++ include/net/nfc/nfc.h | 4 ++++ net/nfc/core.c | 35 +++++++++++++++++++++++++++++ net/nfc/netlink.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 3 +++ 6 files changed, 136 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index 605a08a62e4..c6b9bc5ac6e 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -260,6 +260,10 @@ struct pn533_cmd_jump_dep_response { #define PN533_INIT_TARGET_PASSIVE 0x1 #define PN533_INIT_TARGET_DEP 0x2 +#define PN533_INIT_TARGET_RESP_FRAME_MASK 0x3 +#define PN533_INIT_TARGET_RESP_ACTIVE 0x1 +#define PN533_INIT_TARGET_RESP_DEP 0x4 + struct pn533_cmd_init_target { u8 mode; u8 mifare[6]; @@ -1128,10 +1132,13 @@ static int pn533_init_target_frame(struct pn533_frame *frame, return 0; } +#define ATR_REQ_GB_OFFSET 17 static int pn533_init_target_complete(struct pn533 *dev, void *arg, u8 *params, int params_len) { struct pn533_cmd_init_target_response *resp; + u8 frame, comm_mode = NFC_COMM_PASSIVE, *gb; + size_t gb_len; nfc_dev_dbg(&dev->interface->dev, "%s", __func__); @@ -1143,11 +1150,27 @@ static int pn533_init_target_complete(struct pn533 *dev, void *arg, return params_len; } + if (params_len < ATR_REQ_GB_OFFSET + 1) + return -EINVAL; + resp = (struct pn533_cmd_init_target_response *) params; - nfc_dev_dbg(&dev->interface->dev, "Target mode 0x%x\n", resp->mode); + nfc_dev_dbg(&dev->interface->dev, "Target mode 0x%x param len %d\n", + resp->mode, params_len); - return 0; + frame = resp->mode & PN533_INIT_TARGET_RESP_FRAME_MASK; + if (frame == PN533_INIT_TARGET_RESP_ACTIVE) + comm_mode = NFC_COMM_ACTIVE; + + /* Again, only DEP */ + if ((resp->mode & PN533_INIT_TARGET_RESP_DEP) == 0) + return -EOPNOTSUPP; + + gb = resp->cmd + ATR_REQ_GB_OFFSET; + gb_len = params_len - (ATR_REQ_GB_OFFSET + 1); + + return nfc_tm_activated(dev->nfc_dev, NFC_PROTO_NFC_DEP_MASK, + comm_mode, gb, gb_len); } static int pn533_init_target(struct nfc_dev *nfc_dev, u32 protocols) diff --git a/include/linux/nfc.h b/include/linux/nfc.h index 548715881fb..d124e9273fc 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -56,6 +56,10 @@ * %NFC_ATTR_PROTOCOLS) * @NFC_EVENT_DEVICE_REMOVED: event emitted when a device is removed * (it sends %NFC_ATTR_DEVICE_INDEX) + * @NFC_EVENT_TM_ACTIVATED: event emitted when the adapter is activated in + * target mode. + * @NFC_EVENT_DEVICE_DEACTIVATED: event emitted when the adapter is deactivated + * from target mode. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -71,6 +75,8 @@ enum nfc_commands { NFC_EVENT_DEVICE_ADDED, NFC_EVENT_DEVICE_REMOVED, NFC_EVENT_TARGET_LOST, + NFC_EVENT_TM_ACTIVATED, + NFC_EVENT_TM_DEACTIVATED, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -122,6 +128,7 @@ enum nfc_attrs { #define NFC_NFCID1_MAXSIZE 10 #define NFC_SENSB_RES_MAXSIZE 12 #define NFC_SENSF_RES_MAXSIZE 18 +#define NFC_GB_MAXSIZE 48 /* NFC protocols */ #define NFC_PROTO_JEWEL 1 diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 97aa0e81aa8..41573b4bd78 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -198,4 +198,8 @@ int nfc_target_lost(struct nfc_dev *dev, u32 target_idx); int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); +int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, + u8 *gb, size_t gb_len); +int nfc_tm_deactivated(struct nfc_dev *dev); + #endif /* __NET_NFC_H */ diff --git a/net/nfc/core.c b/net/nfc/core.c index c83717bfcb8..17f147430b7 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -455,6 +455,41 @@ u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len) } EXPORT_SYMBOL(nfc_get_local_general_bytes); +int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, + u8 *gb, size_t gb_len) +{ + int rc; + + device_lock(&dev->dev); + + dev->polling = false; + + if (gb != NULL) { + rc = nfc_set_remote_general_bytes(dev, gb, gb_len); + if (rc < 0) + goto out; + } + + if (protocol == NFC_PROTO_NFC_DEP_MASK) + nfc_dep_link_is_up(dev, 0, comm_mode, NFC_RF_TARGET); + + rc = nfc_genl_tm_activated(dev, protocol); + +out: + device_unlock(&dev->dev); + + return rc; +} +EXPORT_SYMBOL(nfc_tm_activated); + +int nfc_tm_deactivated(struct nfc_dev *dev) +{ + dev->dep_link_up = false; + + return nfc_genl_tm_deactivated(dev); +} +EXPORT_SYMBOL(nfc_tm_deactivated); + /** * nfc_alloc_send_skb - allocate a skb for data exchange responses * diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index a18fd56798f..21eaa9b5c6b 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -221,6 +221,68 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_TM_ACTIVATED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + if (nla_put_u32(msg, NFC_ATTR_TM_PROTOCOLS, protocol)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +int nfc_genl_tm_deactivated(struct nfc_dev *dev) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_TM_DEACTIVATED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + int nfc_genl_device_added(struct nfc_dev *dev) { struct sk_buff *msg; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 7d9708f2a66..cd9fcbe5746 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -128,6 +128,9 @@ int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_genl_dep_link_down_event(struct nfc_dev *dev); +int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol); +int nfc_genl_tm_deactivated(struct nfc_dev *dev); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) -- cgit v1.2.3-70-g09d2 From f212ad5e993e7efb996fc8ce94a5de8f0bd06d41 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 31 May 2012 00:02:26 +0200 Subject: NFC: Set the NFC device RF mode appropriately Signed-off-by: Samuel Ortiz --- include/linux/nfc.h | 1 + include/net/nfc/nfc.h | 2 +- net/nfc/core.c | 14 ++++++++++---- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/linux/nfc.h b/include/linux/nfc.h index d124e9273fc..f4e6dd915b1 100644 --- a/include/linux/nfc.h +++ b/include/linux/nfc.h @@ -146,6 +146,7 @@ enum nfc_attrs { /* NFC RF modes */ #define NFC_RF_INITIATOR 0 #define NFC_RF_TARGET 1 +#define NFC_RF_NONE 2 /* NFC protocols masks used in bitsets */ #define NFC_PROTO_JEWEL_MASK (1 << NFC_PROTO_JEWEL) diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 41573b4bd78..a6a7b49a3e2 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -100,10 +100,10 @@ struct nfc_dev { int targets_generation; struct device dev; bool dev_up; + u8 rf_mode; bool polling; struct nfc_target *active_target; bool dep_link_up; - u32 dep_rf_mode; struct nfc_genl_data genl_data; u32 supported_protocols; diff --git a/net/nfc/core.c b/net/nfc/core.c index 17f147430b7..722a0c76c66 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -144,8 +144,10 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols) } rc = dev->ops->start_poll(dev, im_protocols, tm_protocols); - if (!rc) + if (!rc) { dev->polling = true; + dev->rf_mode = NFC_RF_NONE; + } error: device_unlock(&dev->dev); @@ -235,8 +237,10 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode) } rc = dev->ops->dep_link_up(dev, target, comm_mode, gb, gb_len); - if (!rc) + if (!rc) { dev->active_target = target; + dev->rf_mode = NFC_RF_INITIATOR; + } error: device_unlock(&dev->dev); @@ -264,7 +268,7 @@ int nfc_dep_link_down(struct nfc_dev *dev) goto error; } - if (dev->dep_rf_mode == NFC_RF_TARGET) { + if (dev->rf_mode == NFC_RF_TARGET) { rc = -EOPNOTSUPP; goto error; } @@ -286,7 +290,6 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode) { dev->dep_link_up = true; - dev->dep_rf_mode = rf_mode; nfc_llcp_mac_is_up(dev, target_idx, comm_mode, rf_mode); @@ -330,6 +333,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol) rc = dev->ops->activate_target(dev, target, protocol); if (!rc) { dev->active_target = target; + dev->rf_mode = NFC_RF_INITIATOR; if (dev->ops->check_presence) mod_timer(&dev->check_pres_timer, jiffies + @@ -470,6 +474,8 @@ int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, goto out; } + dev->rf_mode = NFC_RF_TARGET; + if (protocol == NFC_PROTO_NFC_DEP_MASK) nfc_dep_link_is_up(dev, 0, comm_mode, NFC_RF_TARGET); -- cgit v1.2.3-70-g09d2 From be9ae4ce4ee66e211815122ab4f41913efed4fec Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 16 May 2012 15:55:48 +0200 Subject: NFC: Introduce target mode tx ops And rename the initiator mode data exchange ops for consistency sake. Signed-off-by: Samuel Ortiz --- drivers/nfc/pn533.c | 8 ++++---- include/net/nfc/nfc.h | 3 ++- net/nfc/core.c | 37 ++++++++++++++++++++----------------- net/nfc/hci/core.c | 8 ++++---- net/nfc/nci/core.c | 8 ++++---- 5 files changed, 34 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index c6b9bc5ac6e..fd94c6f5d6a 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -1691,9 +1691,9 @@ error: return 0; } -static int pn533_data_exchange(struct nfc_dev *nfc_dev, - struct nfc_target *target, struct sk_buff *skb, - data_exchange_cb_t cb, void *cb_context) +static int pn533_transceive(struct nfc_dev *nfc_dev, + struct nfc_target *target, struct sk_buff *skb, + data_exchange_cb_t cb, void *cb_context) { struct pn533 *dev = nfc_get_drvdata(nfc_dev); struct pn533_frame *out_frame, *in_frame; @@ -1853,7 +1853,7 @@ struct nfc_ops pn533_nfc_ops = { .stop_poll = pn533_stop_poll, .activate_target = pn533_activate_target, .deactivate_target = pn533_deactivate_target, - .data_exchange = pn533_data_exchange, + .im_transceive = pn533_transceive, }; static int pn533_probe(struct usb_interface *interface, diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index a6a7b49a3e2..45c4c970575 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -63,9 +63,10 @@ struct nfc_ops { u32 protocol); void (*deactivate_target)(struct nfc_dev *dev, struct nfc_target *target); - int (*data_exchange)(struct nfc_dev *dev, struct nfc_target *target, + int (*im_transceive)(struct nfc_dev *dev, struct nfc_target *target, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); + int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb); int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target); }; diff --git a/net/nfc/core.c b/net/nfc/core.c index 722a0c76c66..76c1e207d29 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -413,27 +413,30 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, goto error; } - if (dev->active_target == NULL) { - rc = -ENOTCONN; - kfree_skb(skb); - goto error; - } + if (dev->rf_mode == NFC_RF_INITIATOR && dev->active_target != NULL) { + if (dev->active_target->idx != target_idx) { + rc = -EADDRNOTAVAIL; + kfree_skb(skb); + goto error; + } - if (dev->active_target->idx != target_idx) { - rc = -EADDRNOTAVAIL; + if (dev->ops->check_presence) + del_timer_sync(&dev->check_pres_timer); + + rc = dev->ops->im_transceive(dev, dev->active_target, skb, cb, + cb_context); + + if (!rc && dev->ops->check_presence) + mod_timer(&dev->check_pres_timer, jiffies + + msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); + } else if (dev->rf_mode == NFC_RF_TARGET && dev->ops->tm_send != NULL) { + rc = dev->ops->tm_send(dev, skb); + } else { + rc = -ENOTCONN; kfree_skb(skb); goto error; } - if (dev->ops->check_presence) - del_timer_sync(&dev->check_pres_timer); - - rc = dev->ops->data_exchange(dev, dev->active_target, skb, cb, - cb_context); - - if (!rc && dev->ops->check_presence) - mod_timer(&dev->check_pres_timer, jiffies + - msecs_to_jiffies(NFC_CHECK_PRES_FREQ_MS)); error: device_unlock(&dev->dev); @@ -727,7 +730,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, struct nfc_dev *dev; if (!ops->start_poll || !ops->stop_poll || !ops->activate_target || - !ops->deactivate_target || !ops->data_exchange) + !ops->deactivate_target || !ops->im_transceive) return NULL; if (!supported_protocols) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 281f3a3bec0..a8b0b71e8f8 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -512,9 +512,9 @@ static void hci_deactivate_target(struct nfc_dev *nfc_dev, { } -static int hci_data_exchange(struct nfc_dev *nfc_dev, struct nfc_target *target, - struct sk_buff *skb, data_exchange_cb_t cb, - void *cb_context) +static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, + struct sk_buff *skb, data_exchange_cb_t cb, + void *cb_context) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); int r; @@ -580,7 +580,7 @@ static struct nfc_ops hci_nfc_ops = { .stop_poll = hci_stop_poll, .activate_target = hci_activate_target, .deactivate_target = hci_deactivate_target, - .data_exchange = hci_data_exchange, + .im_transceive = hci_transceive, .check_presence = hci_check_presence, }; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 0f718982f80..766a02b1dfa 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -522,9 +522,9 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, } } -static int nci_data_exchange(struct nfc_dev *nfc_dev, struct nfc_target *target, - struct sk_buff *skb, - data_exchange_cb_t cb, void *cb_context) +static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, + struct sk_buff *skb, + data_exchange_cb_t cb, void *cb_context) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); int rc; @@ -557,7 +557,7 @@ static struct nfc_ops nci_nfc_ops = { .stop_poll = nci_stop_poll, .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, - .data_exchange = nci_data_exchange, + .im_transceive = nci_transceive, }; /* ---- Interface to NCI drivers ---- */ -- cgit v1.2.3-70-g09d2 From 73167ced31d15c04e57b9e0885ac05675e9195a4 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 31 May 2012 00:05:50 +0200 Subject: NFC: Introduce target mode rx data callback This routine will be called by drivers whenever they receive data in target mode. This should be unexpected events and as such should be handled by a standalone API (i.e. not as a callback pointer from an existing API). Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 1 + net/nfc/core.c | 12 ++++++++++++ net/nfc/llcp/llcp.c | 15 +++++++++++++++ net/nfc/nfc.h | 7 +++++++ 4 files changed, 35 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 45c4c970575..180964b954a 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -202,5 +202,6 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, u8 *gb, size_t gb_len); int nfc_tm_deactivated(struct nfc_dev *dev); +int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); #endif /* __NET_NFC_H */ diff --git a/net/nfc/core.c b/net/nfc/core.c index 76c1e207d29..6a3799eebc3 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -462,6 +462,18 @@ u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len) } EXPORT_SYMBOL(nfc_get_local_general_bytes); +int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb) +{ + /* Only LLCP target mode for now */ + if (dev->dep_link_up == false) { + kfree_skb(skb); + return -ENOLINK; + } + + return nfc_llcp_data_received(dev, skb); +} +EXPORT_SYMBOL(nfc_tm_data_received); + int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, u8 *gb, size_t gb_len) { diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 5f7aa3f632f..5705e6dffb3 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -937,6 +937,21 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err) return; } +int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) +{ + struct nfc_llcp_local *local; + + local = nfc_llcp_find_local(dev); + if (local == NULL) + return -ENODEV; + + local->rx_pending = skb_get(skb); + del_timer(&local->link_timer); + queue_work(local->rx_wq, &local->rx_work); + + return 0; +} + void nfc_llcp_mac_is_down(struct nfc_dev *dev) { struct nfc_llcp_local *local; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index cd9fcbe5746..c5e42b79a41 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -55,6 +55,7 @@ int nfc_llcp_register_device(struct nfc_dev *dev); void nfc_llcp_unregister_device(struct nfc_dev *dev); int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); +int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); @@ -90,6 +91,12 @@ static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *gb_len) return NULL; } +static inline int nfc_llcp_data_received(struct nfc_dev *dev, + struct sk_buff *skb) +{ + return 0; +} + static inline int nfc_llcp_init(void) { return 0; -- cgit v1.2.3-70-g09d2 From 2827011f666e157f3307d55070a75e1d1110b194 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 21:14:09 -0700 Subject: Bluetooth: Fix early return from l2cap_chan_del This fixes a regression from commit 2ead70b8390d199ca04cd35311b51f5f3676079e that is present in all kernels starting at v3.0. When L2CAP information was moved to struct l2cap_chan, a check was added to l2cap_chan_del to avoid certain cleanup operations when ERTM or streaming mode had not yet been initialized. The logic in the check did not take in to account that chan->conf_state is set to 0 in l2cap_chan_ready, so l2cap_chan_del failed to cancel timers and leaked memory any time the ERTM queues or lists were not empty. This change makes sure that l2cap_chan_del only returns early if ERTM initialization was not performed. Signed-off-by: Mat Martineau Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 1c7d1cd5e67..452fcc4c0ff 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -597,6 +597,7 @@ enum { CONF_EWS_RECV, CONF_LOC_CONF_PEND, CONF_REM_CONF_PEND, + CONF_NOT_COMPLETE, }; #define L2CAP_CONF_MAX_CONF_REQ 2 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 078bf805cd9..d9f215f3f8e 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -392,6 +392,9 @@ struct l2cap_chan *l2cap_chan_create(void) atomic_set(&chan->refcnt, 1); + /* This flag is cleared in l2cap_chan_ready() */ + set_bit(CONF_NOT_COMPLETE, &chan->conf_state); + BT_DBG("chan %p", chan); return chan; @@ -509,8 +512,7 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) release_sock(sk); - if (!(test_bit(CONF_OUTPUT_DONE, &chan->conf_state) && - test_bit(CONF_INPUT_DONE, &chan->conf_state))) + if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) return; skb_queue_purge(&chan->tx_q); @@ -923,6 +925,7 @@ static void l2cap_chan_ready(struct l2cap_chan *chan) BT_DBG("sk %p, parent %p", sk, parent); + /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); -- cgit v1.2.3-70-g09d2 From f5dbb0772df3feb2bb5eda8a9f0e0acdeb25653f Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 20:53:38 -0700 Subject: Bluetooth: Remove receive code that has been superceded This deletes the receive code that had handlers for each frame type at the top level, and then had logic to determine the receive state within each handler. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 8 - net/bluetooth/l2cap_core.c | 492 ------------------------------------------ 2 files changed, 500 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 452fcc4c0ff..7d1da5a7d11 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -419,11 +419,6 @@ struct l2cap_seq_list { #define L2CAP_SEQ_LIST_CLEAR 0xFFFF #define L2CAP_SEQ_LIST_TAIL 0x8000 -struct srej_list { - __u16 tx_seq; - struct list_head list; -}; - struct l2cap_chan { struct sock *sk; @@ -475,14 +470,12 @@ struct l2cap_chan { __u16 expected_ack_seq; __u16 expected_tx_seq; __u16 buffer_seq; - __u16 buffer_seq_srej; __u16 srej_save_reqseq; __u16 last_acked_seq; __u16 frames_sent; __u16 unacked_frames; __u8 retry_count; __u16 srej_queue_next; - __u8 num_acked; __u16 sdu_len; struct sk_buff *sdu; struct sk_buff *sdu_last_frag; @@ -515,7 +508,6 @@ struct l2cap_chan { struct sk_buff_head srej_q; struct l2cap_seq_list srej_list; struct l2cap_seq_list retrans_list; - struct list_head srej_l; struct list_head list; struct list_head global_l; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0a195ab4a38..d795d15cadf 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -534,8 +534,6 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) skb_queue_purge(&chan->tx_q); if (chan->mode == L2CAP_MODE_ERTM) { - struct srej_list *l, *tmp; - __clear_retrans_timer(chan); __clear_monitor_timer(chan); __clear_ack_timer(chan); @@ -544,10 +542,6 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) l2cap_seq_list_free(&chan->srej_list); l2cap_seq_list_free(&chan->retrans_list); - list_for_each_entry_safe(l, tmp, &chan->srej_l, list) { - list_del(&l->list); - kfree(l); - } } } @@ -1658,25 +1652,6 @@ static void l2cap_retrans_timeout(struct work_struct *work) l2cap_chan_put(chan); } -static void l2cap_drop_acked_frames(struct l2cap_chan *chan) -{ - struct sk_buff *skb; - - while ((skb = skb_peek(&chan->tx_q)) && - chan->unacked_frames) { - if (bt_cb(skb)->control.txseq == chan->expected_ack_seq) - break; - - skb = skb_dequeue(&chan->tx_q); - kfree_skb(skb); - - chan->unacked_frames--; - } - - if (!chan->unacked_frames) - __clear_retrans_timer(chan); -} - static int l2cap_streaming_send(struct l2cap_chan *chan, struct sk_buff_head *skbs) { @@ -1718,53 +1693,6 @@ static int l2cap_streaming_send(struct l2cap_chan *chan, return 0; } -static void l2cap_retransmit_one_frame(struct l2cap_chan *chan, u16 tx_seq) -{ - struct sk_buff *skb, *tx_skb; - u16 fcs; - u32 control; - - skb = skb_peek(&chan->tx_q); - if (!skb) - return; - - while (bt_cb(skb)->control.txseq != tx_seq) { - if (skb_queue_is_last(&chan->tx_q, skb)) - return; - - skb = skb_queue_next(&chan->tx_q, skb); - } - - if (bt_cb(skb)->control.retries == chan->remote_max_tx && - chan->remote_max_tx) { - l2cap_send_disconn_req(chan->conn, chan, ECONNABORTED); - return; - } - - tx_skb = skb_clone(skb, GFP_ATOMIC); - bt_cb(skb)->control.retries++; - - control = __get_control(chan, tx_skb->data + L2CAP_HDR_SIZE); - control &= __get_sar_mask(chan); - - if (test_and_clear_bit(CONN_SEND_FBIT, &chan->conn_state)) - control |= __set_ctrl_final(chan); - - control |= __set_reqseq(chan, chan->buffer_seq); - control |= __set_txseq(chan, tx_seq); - - __put_control(chan, control, tx_skb->data + L2CAP_HDR_SIZE); - - if (chan->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)tx_skb->data, - tx_skb->len - L2CAP_FCS_SIZE); - put_unaligned_le16(fcs, - tx_skb->data + tx_skb->len - L2CAP_FCS_SIZE); - } - - l2cap_do_send(chan, tx_skb); -} - static int l2cap_ertm_send(struct l2cap_chan *chan) { struct sk_buff *skb, *tx_skb; @@ -1868,18 +1796,6 @@ static void l2cap_send_ack(struct l2cap_chan *chan) __l2cap_send_ack(chan); } -static void l2cap_send_srejtail(struct l2cap_chan *chan) -{ - struct srej_list *tail; - u32 control; - - control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); - control |= __set_ctrl_final(chan); - - tail = list_entry((&chan->srej_l)->prev, struct srej_list, list); - control |= __set_reqseq(chan, tail->tx_seq); -} - static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan, struct msghdr *msg, int len, int count, struct sk_buff *skb) @@ -2639,7 +2555,6 @@ static inline int l2cap_ertm_init(struct l2cap_chan *chan) chan->expected_ack_seq = 0; chan->unacked_frames = 0; chan->buffer_seq = 0; - chan->num_acked = 0; chan->frames_sent = 0; chan->last_acked_seq = 0; chan->sdu = NULL; @@ -2660,7 +2575,6 @@ static inline int l2cap_ertm_init(struct l2cap_chan *chan) skb_queue_head_init(&chan->srej_q); - INIT_LIST_HEAD(&chan->srej_l); err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win); if (err < 0) return err; @@ -4277,41 +4191,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct l2cap_chan *chan) } } -static int l2cap_add_to_srej_queue(struct l2cap_chan *chan, struct sk_buff *skb, u16 tx_seq, u8 sar) -{ - struct sk_buff *next_skb; - int tx_seq_offset, next_tx_seq_offset; - - bt_cb(skb)->control.txseq = tx_seq; - bt_cb(skb)->control.sar = sar; - - next_skb = skb_peek(&chan->srej_q); - - tx_seq_offset = __seq_offset(chan, tx_seq, chan->buffer_seq); - - while (next_skb) { - if (bt_cb(next_skb)->control.txseq == tx_seq) - return -EINVAL; - - next_tx_seq_offset = __seq_offset(chan, - bt_cb(next_skb)->control.txseq, chan->buffer_seq); - - if (next_tx_seq_offset > tx_seq_offset) { - __skb_queue_before(&chan->srej_q, next_skb, skb); - return 0; - } - - if (skb_queue_is_last(&chan->srej_q, next_skb)) - next_skb = NULL; - else - next_skb = skb_queue_next(&chan->srej_q, next_skb); - } - - __skb_queue_tail(&chan->srej_q, skb); - - return 0; -} - static void append_skb_frag(struct sk_buff *skb, struct sk_buff *new_frag, struct sk_buff **last_frag) { @@ -4457,377 +4336,6 @@ void l2cap_chan_busy(struct l2cap_chan *chan, int busy) } } -static void l2cap_check_srej_gap(struct l2cap_chan *chan, u16 tx_seq) -{ - struct sk_buff *skb; - u32 control; - - while ((skb = skb_peek(&chan->srej_q)) && - !test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - int err; - - if (bt_cb(skb)->control.txseq != tx_seq) - break; - - skb = skb_dequeue(&chan->srej_q); - control = __set_ctrl_sar(chan, bt_cb(skb)->control.sar); - - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); - break; - } - - chan->buffer_seq_srej = __next_seq(chan, chan->buffer_seq_srej); - tx_seq = __next_seq(chan, tx_seq); - } -} - -static void l2cap_resend_srejframe(struct l2cap_chan *chan, u16 tx_seq) -{ - struct srej_list *l, *tmp; - u32 control; - - list_for_each_entry_safe(l, tmp, &chan->srej_l, list) { - if (l->tx_seq == tx_seq) { - list_del(&l->list); - kfree(l); - return; - } - control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); - control |= __set_reqseq(chan, l->tx_seq); - list_del(&l->list); - list_add_tail(&l->list, &chan->srej_l); - } -} - -static int l2cap_send_srejframe(struct l2cap_chan *chan, u16 tx_seq) -{ - struct srej_list *new; - u32 control; - - while (tx_seq != chan->expected_tx_seq) { - control = __set_ctrl_super(chan, L2CAP_SUPER_SREJ); - control |= __set_reqseq(chan, chan->expected_tx_seq); - l2cap_seq_list_append(&chan->srej_list, chan->expected_tx_seq); - - new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); - if (!new) - return -ENOMEM; - - new->tx_seq = chan->expected_tx_seq; - - chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); - - list_add_tail(&new->list, &chan->srej_l); - } - - chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); - - return 0; -} - -static inline int l2cap_data_channel_iframe(struct l2cap_chan *chan, u32 rx_control, struct sk_buff *skb) -{ - u16 tx_seq = __get_txseq(chan, rx_control); - u16 req_seq = __get_reqseq(chan, rx_control); - u8 sar = __get_ctrl_sar(chan, rx_control); - int tx_seq_offset, expected_tx_seq_offset; - int num_to_ack = (chan->tx_win/6) + 1; - int err = 0; - - BT_DBG("chan %p len %d tx_seq %d rx_control 0x%8.8x", chan, skb->len, - tx_seq, rx_control); - - if (__is_ctrl_final(chan, rx_control) && - test_bit(CONN_WAIT_F, &chan->conn_state)) { - __clear_monitor_timer(chan); - if (chan->unacked_frames > 0) - __set_retrans_timer(chan); - clear_bit(CONN_WAIT_F, &chan->conn_state); - } - - chan->expected_ack_seq = req_seq; - l2cap_drop_acked_frames(chan); - - tx_seq_offset = __seq_offset(chan, tx_seq, chan->buffer_seq); - - /* invalid tx_seq */ - if (tx_seq_offset >= chan->tx_win) { - l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); - goto drop; - } - - if (test_bit(CONN_LOCAL_BUSY, &chan->conn_state)) { - if (!test_bit(CONN_RNR_SENT, &chan->conn_state)) - l2cap_send_ack(chan); - goto drop; - } - - if (tx_seq == chan->expected_tx_seq) - goto expected; - - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - struct srej_list *first; - - first = list_first_entry(&chan->srej_l, - struct srej_list, list); - if (tx_seq == first->tx_seq) { - l2cap_add_to_srej_queue(chan, skb, tx_seq, sar); - l2cap_check_srej_gap(chan, tx_seq); - - list_del(&first->list); - kfree(first); - - if (list_empty(&chan->srej_l)) { - chan->buffer_seq = chan->buffer_seq_srej; - clear_bit(CONN_SREJ_SENT, &chan->conn_state); - l2cap_send_ack(chan); - BT_DBG("chan %p, Exit SREJ_SENT", chan); - } - } else { - struct srej_list *l; - - /* duplicated tx_seq */ - if (l2cap_add_to_srej_queue(chan, skb, tx_seq, sar) < 0) - goto drop; - - list_for_each_entry(l, &chan->srej_l, list) { - if (l->tx_seq == tx_seq) { - l2cap_resend_srejframe(chan, tx_seq); - return 0; - } - } - - err = l2cap_send_srejframe(chan, tx_seq); - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, -err); - return err; - } - } - } else { - expected_tx_seq_offset = __seq_offset(chan, - chan->expected_tx_seq, chan->buffer_seq); - - /* duplicated tx_seq */ - if (tx_seq_offset < expected_tx_seq_offset) - goto drop; - - set_bit(CONN_SREJ_SENT, &chan->conn_state); - - BT_DBG("chan %p, Enter SREJ", chan); - - INIT_LIST_HEAD(&chan->srej_l); - chan->buffer_seq_srej = chan->buffer_seq; - - __skb_queue_head_init(&chan->srej_q); - l2cap_add_to_srej_queue(chan, skb, tx_seq, sar); - - /* Set P-bit only if there are some I-frames to ack. */ - if (__clear_ack_timer(chan)) - set_bit(CONN_SEND_PBIT, &chan->conn_state); - - err = l2cap_send_srejframe(chan, tx_seq); - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, -err); - return err; - } - } - return 0; - -expected: - chan->expected_tx_seq = __next_seq(chan, chan->expected_tx_seq); - - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - bt_cb(skb)->control.txseq = tx_seq; - bt_cb(skb)->control.sar = sar; - __skb_queue_tail(&chan->srej_q, skb); - return 0; - } - - chan->buffer_seq = __next_seq(chan, chan->buffer_seq); - - if (err < 0) { - l2cap_send_disconn_req(chan->conn, chan, ECONNRESET); - return err; - } - - if (__is_ctrl_final(chan, rx_control)) { - if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) - l2cap_retransmit_frames(chan); - } - - - chan->num_acked = (chan->num_acked + 1) % num_to_ack; - if (chan->num_acked == num_to_ack - 1) - l2cap_send_ack(chan); - else - __set_ack_timer(chan); - - return 0; - -drop: - kfree_skb(skb); - return 0; -} - -static inline void l2cap_data_channel_rrframe(struct l2cap_chan *chan, u32 rx_control) -{ - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, - __get_reqseq(chan, rx_control), rx_control); - - chan->expected_ack_seq = __get_reqseq(chan, rx_control); - l2cap_drop_acked_frames(chan); - - if (__is_ctrl_poll(chan, rx_control)) { - set_bit(CONN_SEND_FBIT, &chan->conn_state); - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state) && - (chan->unacked_frames > 0)) - __set_retrans_timer(chan); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - l2cap_send_srejtail(chan); - } else { - l2cap_send_i_or_rr_or_rnr(chan); - } - - } else if (__is_ctrl_final(chan, rx_control)) { - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - - if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) - l2cap_retransmit_frames(chan); - - } else { - if (test_bit(CONN_REMOTE_BUSY, &chan->conn_state) && - (chan->unacked_frames > 0)) - __set_retrans_timer(chan); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - if (test_bit(CONN_SREJ_SENT, &chan->conn_state)) - l2cap_send_ack(chan); - else - l2cap_ertm_send(chan); - } -} - -static inline void l2cap_data_channel_rejframe(struct l2cap_chan *chan, u32 rx_control) -{ - u16 tx_seq = __get_reqseq(chan, rx_control); - - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - - chan->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(chan); - - if (__is_ctrl_final(chan, rx_control)) { - if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) - l2cap_retransmit_frames(chan); - } else { - l2cap_retransmit_frames(chan); - - if (test_bit(CONN_WAIT_F, &chan->conn_state)) - set_bit(CONN_REJ_ACT, &chan->conn_state); - } -} -static inline void l2cap_data_channel_srejframe(struct l2cap_chan *chan, u32 rx_control) -{ - u16 tx_seq = __get_reqseq(chan, rx_control); - - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); - - clear_bit(CONN_REMOTE_BUSY, &chan->conn_state); - - if (__is_ctrl_poll(chan, rx_control)) { - chan->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(chan); - - set_bit(CONN_SEND_FBIT, &chan->conn_state); - l2cap_retransmit_one_frame(chan, tx_seq); - - l2cap_ertm_send(chan); - - if (test_bit(CONN_WAIT_F, &chan->conn_state)) { - chan->srej_save_reqseq = tx_seq; - set_bit(CONN_SREJ_ACT, &chan->conn_state); - } - } else if (__is_ctrl_final(chan, rx_control)) { - if (test_bit(CONN_SREJ_ACT, &chan->conn_state) && - chan->srej_save_reqseq == tx_seq) - clear_bit(CONN_SREJ_ACT, &chan->conn_state); - else - l2cap_retransmit_one_frame(chan, tx_seq); - } else { - l2cap_retransmit_one_frame(chan, tx_seq); - if (test_bit(CONN_WAIT_F, &chan->conn_state)) { - chan->srej_save_reqseq = tx_seq; - set_bit(CONN_SREJ_ACT, &chan->conn_state); - } - } -} - -static inline void l2cap_data_channel_rnrframe(struct l2cap_chan *chan, u32 rx_control) -{ - u16 tx_seq = __get_reqseq(chan, rx_control); - - BT_DBG("chan %p, req_seq %d ctrl 0x%8.8x", chan, tx_seq, rx_control); - - set_bit(CONN_REMOTE_BUSY, &chan->conn_state); - chan->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(chan); - - if (__is_ctrl_poll(chan, rx_control)) - set_bit(CONN_SEND_FBIT, &chan->conn_state); - - if (!test_bit(CONN_SREJ_SENT, &chan->conn_state)) { - __clear_retrans_timer(chan); - if (__is_ctrl_poll(chan, rx_control)) - l2cap_send_rr_or_rnr(chan, L2CAP_CTRL_FINAL); - return; - } - - if (__is_ctrl_poll(chan, rx_control)) { - l2cap_send_srejtail(chan); - } else { - rx_control = __set_ctrl_super(chan, L2CAP_SUPER_RR); - } -} - -static inline int l2cap_data_channel_sframe(struct l2cap_chan *chan, u32 rx_control, struct sk_buff *skb) -{ - BT_DBG("chan %p rx_control 0x%8.8x len %d", chan, rx_control, skb->len); - - if (__is_ctrl_final(chan, rx_control) && - test_bit(CONN_WAIT_F, &chan->conn_state)) { - __clear_monitor_timer(chan); - if (chan->unacked_frames > 0) - __set_retrans_timer(chan); - clear_bit(CONN_WAIT_F, &chan->conn_state); - } - - switch (__get_ctrl_super(chan, rx_control)) { - case L2CAP_SUPER_RR: - l2cap_data_channel_rrframe(chan, rx_control); - break; - - case L2CAP_SUPER_REJ: - l2cap_data_channel_rejframe(chan, rx_control); - break; - - case L2CAP_SUPER_SREJ: - l2cap_data_channel_srejframe(chan, rx_control); - break; - - case L2CAP_SUPER_RNR: - l2cap_data_channel_rnrframe(chan, rx_control); - break; - } - - kfree_skb(skb); - return 0; -} - static u8 l2cap_classify_txseq(struct l2cap_chan *chan, u16 txseq) { BT_DBG("chan %p, txseq %d", chan, txseq); -- cgit v1.2.3-70-g09d2 From 4239d16f360ce4c8a1798508dd171ebce93985ba Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 20:53:49 -0700 Subject: Bluetooth: Check rules when setting retransmit or monitor timers The ERTM specification requires the retransmit timer to be cancelled when the monitor timer is set. The retransmit timer cannot be set again while the monitor timer is pending. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ---- net/bluetooth/l2cap_core.c | 22 ++++++++++++++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 7d1da5a7d11..117db8e4a5f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -706,11 +706,7 @@ static inline bool l2cap_clear_timer(struct l2cap_chan *chan, #define __set_chan_timer(c, t) l2cap_set_timer(c, &c->chan_timer, (t)) #define __clear_chan_timer(c) l2cap_clear_timer(c, &c->chan_timer) -#define __set_retrans_timer(c) l2cap_set_timer(c, &c->retrans_timer, \ - msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO)); #define __clear_retrans_timer(c) l2cap_clear_timer(c, &c->retrans_timer) -#define __set_monitor_timer(c) l2cap_set_timer(c, &c->monitor_timer, \ - msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO)); #define __clear_monitor_timer(c) l2cap_clear_timer(c, &c->monitor_timer) #define __set_ack_timer(c) l2cap_set_timer(c, &chan->ack_timer, \ msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO)); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8ea9ec648bf..38e9a0ea4f4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -227,6 +227,24 @@ static inline void l2cap_chan_set_err(struct l2cap_chan *chan, int err) release_sock(sk); } +static void __set_retrans_timer(struct l2cap_chan *chan) +{ + if (!delayed_work_pending(&chan->monitor_timer) && + chan->retrans_timeout) { + l2cap_set_timer(chan, &chan->retrans_timer, + msecs_to_jiffies(chan->retrans_timeout)); + } +} + +static void __set_monitor_timer(struct l2cap_chan *chan) +{ + __clear_retrans_timer(chan); + if (chan->monitor_timeout) { + l2cap_set_timer(chan, &chan->monitor_timer, + msecs_to_jiffies(chan->monitor_timeout)); + } +} + static struct sk_buff *l2cap_ertm_seq_in_queue(struct sk_buff_head *head, u16 seq) { @@ -1619,7 +1637,7 @@ int __l2cap_wait_ack(struct sock *sk) static void l2cap_monitor_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, - monitor_timer.work); + monitor_timer.work); BT_DBG("chan %p", chan); @@ -1643,7 +1661,7 @@ static void l2cap_monitor_timeout(struct work_struct *work) static void l2cap_retrans_timeout(struct work_struct *work) { struct l2cap_chan *chan = container_of(work, struct l2cap_chan, - retrans_timer.work); + retrans_timer.work); BT_DBG("chan %p", chan); -- cgit v1.2.3-70-g09d2 From 522cc2ee6e55ba49f4df338e0dfcfb989b46eb8c Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 17 May 2012 20:53:54 -0700 Subject: Bluetooth: Remove unused ERTM control field macros Now that l2cap_ctrl is used to set up control fields, these macros are not needed. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 168 ------------------------------------------ 1 file changed, 168 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 117db8e4a5f..7bc40198f14 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -725,174 +725,6 @@ static inline __u16 __next_seq(struct l2cap_chan *chan, __u16 seq) return (seq + 1) % (chan->tx_win_max + 1); } -static inline int l2cap_tx_window_full(struct l2cap_chan *ch) -{ - int sub; - - sub = (ch->next_tx_seq - ch->expected_ack_seq) % 64; - - if (sub < 0) - sub += 64; - - return sub == ch->remote_tx_win; -} - -static inline __u16 __get_reqseq(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_REQSEQ) >> - L2CAP_EXT_CTRL_REQSEQ_SHIFT; - else - return (ctrl & L2CAP_CTRL_REQSEQ) >> L2CAP_CTRL_REQSEQ_SHIFT; -} - -static inline __u32 __set_reqseq(struct l2cap_chan *chan, __u32 reqseq) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (reqseq << L2CAP_EXT_CTRL_REQSEQ_SHIFT) & - L2CAP_EXT_CTRL_REQSEQ; - else - return (reqseq << L2CAP_CTRL_REQSEQ_SHIFT) & L2CAP_CTRL_REQSEQ; -} - -static inline __u16 __get_txseq(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_TXSEQ) >> - L2CAP_EXT_CTRL_TXSEQ_SHIFT; - else - return (ctrl & L2CAP_CTRL_TXSEQ) >> L2CAP_CTRL_TXSEQ_SHIFT; -} - -static inline __u32 __set_txseq(struct l2cap_chan *chan, __u32 txseq) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (txseq << L2CAP_EXT_CTRL_TXSEQ_SHIFT) & - L2CAP_EXT_CTRL_TXSEQ; - else - return (txseq << L2CAP_CTRL_TXSEQ_SHIFT) & L2CAP_CTRL_TXSEQ; -} - -static inline bool __is_sframe(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return ctrl & L2CAP_EXT_CTRL_FRAME_TYPE; - else - return ctrl & L2CAP_CTRL_FRAME_TYPE; -} - -static inline __u32 __set_sframe(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_FRAME_TYPE; - else - return L2CAP_CTRL_FRAME_TYPE; -} - -static inline __u8 __get_ctrl_sar(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_SAR) >> L2CAP_EXT_CTRL_SAR_SHIFT; - else - return (ctrl & L2CAP_CTRL_SAR) >> L2CAP_CTRL_SAR_SHIFT; -} - -static inline __u32 __set_ctrl_sar(struct l2cap_chan *chan, __u32 sar) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (sar << L2CAP_EXT_CTRL_SAR_SHIFT) & L2CAP_EXT_CTRL_SAR; - else - return (sar << L2CAP_CTRL_SAR_SHIFT) & L2CAP_CTRL_SAR; -} - -static inline bool __is_sar_start(struct l2cap_chan *chan, __u32 ctrl) -{ - return __get_ctrl_sar(chan, ctrl) == L2CAP_SAR_START; -} - -static inline __u32 __get_sar_mask(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_SAR; - else - return L2CAP_CTRL_SAR; -} - -static inline __u8 __get_ctrl_super(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (ctrl & L2CAP_EXT_CTRL_SUPERVISE) >> - L2CAP_EXT_CTRL_SUPER_SHIFT; - else - return (ctrl & L2CAP_CTRL_SUPERVISE) >> L2CAP_CTRL_SUPER_SHIFT; -} - -static inline __u32 __set_ctrl_super(struct l2cap_chan *chan, __u32 super) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return (super << L2CAP_EXT_CTRL_SUPER_SHIFT) & - L2CAP_EXT_CTRL_SUPERVISE; - else - return (super << L2CAP_CTRL_SUPER_SHIFT) & - L2CAP_CTRL_SUPERVISE; -} - -static inline __u32 __set_ctrl_final(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_FINAL; - else - return L2CAP_CTRL_FINAL; -} - -static inline bool __is_ctrl_final(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return ctrl & L2CAP_EXT_CTRL_FINAL; - else - return ctrl & L2CAP_CTRL_FINAL; -} - -static inline __u32 __set_ctrl_poll(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_CTRL_POLL; - else - return L2CAP_CTRL_POLL; -} - -static inline bool __is_ctrl_poll(struct l2cap_chan *chan, __u32 ctrl) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return ctrl & L2CAP_EXT_CTRL_POLL; - else - return ctrl & L2CAP_CTRL_POLL; -} - -static inline __u32 __get_control(struct l2cap_chan *chan, void *p) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return get_unaligned_le32(p); - else - return get_unaligned_le16(p); -} - -static inline void __put_control(struct l2cap_chan *chan, __u32 control, - void *p) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return put_unaligned_le32(control, p); - else - return put_unaligned_le16(control, p); -} - -static inline __u8 __ctrl_size(struct l2cap_chan *chan) -{ - if (test_bit(FLAG_EXT_CTRL, &chan->flags)) - return L2CAP_EXT_HDR_SIZE - L2CAP_HDR_SIZE; - else - return L2CAP_ENH_HDR_SIZE - L2CAP_HDR_SIZE; -} extern bool disable_ertm; -- cgit v1.2.3-70-g09d2 From 38351c66e407e610283e5332b819822055db473c Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 22 May 2012 19:00:20 -0300 Subject: Bluetooth: Fix trailing whitespaces in license text As reported by checkpatch.pl Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 961669b648f..b98181bd2b3 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -1,4 +1,4 @@ -/* +/* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated @@ -12,13 +12,13 @@ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ -- cgit v1.2.3-70-g09d2 From a6c511c636848f871f5b7aef38e25e5b894b3b48 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Wed, 23 May 2012 12:35:46 +0200 Subject: Bluetooth: Rename HCI_QUIRK_NO_RESET to HCI_QUIRK_RESET_ON_CLOSE HCI_QUIRK_NO_RESET name is misleading - purpose of this quirk is to reset device on close instead of init, not to not reset at all. Rename it to HCI_QUIRK_RESET_ON_CLOSE to avoid confusion. Signed-off-by: Szymon Janc Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bpa10x.c | 2 +- drivers/bluetooth/btusb.c | 6 +++--- drivers/bluetooth/hci_ldisc.c | 2 +- include/net/bluetooth/hci.h | 2 +- net/bluetooth/hci_core.c | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 609861a53c2..29caaed2d71 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -470,7 +470,7 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id * hdev->flush = bpa10x_flush; hdev->send = bpa10x_send_frame; - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); err = hci_register_dev(hdev); if (err < 0) { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c9463af8e56..3a6cdc9b75a 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1026,7 +1026,7 @@ static int btusb_probe(struct usb_interface *intf, data->isoc = usb_ifnum_to_if(data->udev, 1); if (!reset) - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) { if (!disable_scofix) @@ -1038,7 +1038,7 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); } if (id->driver_info & BTUSB_CSR) { @@ -1046,7 +1046,7 @@ static int btusb_probe(struct usb_interface *intf, /* Old firmware would otherwise execute USB reset */ if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x117) - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); } if (id->driver_info & BTUSB_SNIFFER) { diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index e564579a611..2f9b796e106 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -394,7 +394,7 @@ static int hci_uart_register_dev(struct hci_uart *hu) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); if (!test_bit(HCI_UART_RESET_ON_INIT, &hu->hdev_flags)) - set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks); if (test_bit(HCI_UART_CREATE_AMP, &hu->hdev_flags)) hdev->dev_type = HCI_AMP; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 66a7b579e31..97c57aa938f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -58,7 +58,7 @@ /* HCI device quirks */ enum { - HCI_QUIRK_NO_RESET, + HCI_QUIRK_RESET_ON_CLOSE, HCI_QUIRK_RAW_DEVICE, HCI_QUIRK_FIXUP_BUFFER_SIZE }; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index d0a960dabd5..0ed4edf0f77 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -203,7 +203,7 @@ static void bredr_init(struct hci_dev *hdev) /* Mandatory initialization */ /* Reset */ - if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) { + if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_RESET, &hdev->flags); hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); } @@ -792,7 +792,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); if (!test_bit(HCI_RAW, &hdev->flags) && - test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) { + test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); __hci_request(hdev, hci_reset_req, 0, msecs_to_jiffies(250)); -- cgit v1.2.3-70-g09d2 From 9b3b44604ac8e06d299718c5d0fa0b91b675ae0b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 23 May 2012 11:31:20 +0300 Subject: Bluetooth: Use defined link key size Remove magic number with defined link key size. Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 6 ++++-- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/mgmt.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 97c57aa938f..0bc5555510f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -30,6 +30,8 @@ #define HCI_MAX_EVENT_SIZE 260 #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) +#define HCI_LINK_KEY_SIZE 16 + /* HCI dev events */ #define HCI_DEV_REG 1 #define HCI_DEV_UNREG 2 @@ -371,7 +373,7 @@ struct hci_cp_reject_conn_req { #define HCI_OP_LINK_KEY_REPLY 0x040b struct hci_cp_link_key_reply { bdaddr_t bdaddr; - __u8 link_key[16]; + __u8 link_key[HCI_LINK_KEY_SIZE]; } __packed; #define HCI_OP_LINK_KEY_NEG_REPLY 0x040c @@ -1048,7 +1050,7 @@ struct hci_ev_link_key_req { #define HCI_EV_LINK_KEY_NOTIFY 0x18 struct hci_ev_link_key_notify { bdaddr_t bdaddr; - __u8 link_key[16]; + __u8 link_key[HCI_LINK_KEY_SIZE]; __u8 key_type; } __packed; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9fc7728f94e..6c658fc7ac9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -105,7 +105,7 @@ struct link_key { struct list_head list; bdaddr_t bdaddr; u8 type; - u8 val[16]; + u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; }; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0ed4edf0f77..027257d4b52 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1291,7 +1291,7 @@ int hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, int new_key, } bacpy(&key->bdaddr, bdaddr); - memcpy(key->val, val, 16); + memcpy(key->val, val, HCI_LINK_KEY_SIZE); key->pin_len = pin_len; if (type == HCI_LK_CHANGED_COMBINATION) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6c2d7ccf26e..1795c0c9b41 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2739,7 +2739,7 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, } bacpy(&cp.bdaddr, &ev->bdaddr); - memcpy(cp.link_key, key->val, 16); + memcpy(cp.link_key, key->val, HCI_LINK_KEY_SIZE); hci_send_cmd(hdev, HCI_OP_LINK_KEY_REPLY, sizeof(cp), &cp); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6a7e926c418..1fd49e65269 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2955,7 +2955,7 @@ int mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bacpy(&ev.key.addr.bdaddr, &key->bdaddr); ev.key.addr.type = BDADDR_BREDR; ev.key.type = key->type; - memcpy(ev.key.val, key->val, 16); + memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; return mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL); -- cgit v1.2.3-70-g09d2 From c3c7ea65941a0b7a4f1b9655e7aaaab6ce1874d2 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Wed, 23 May 2012 04:04:20 -0300 Subject: Bluetooth: Fix coding style in include/net/bluetooth Fix all warning and errors reported by checkpatch but license trailing whitespace and bdaddr_t definition. Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 26 ++++++++++++++------------ include/net/bluetooth/hci.h | 4 ++-- include/net/bluetooth/hci_core.h | 12 +++++++----- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index b98181bd2b3..7a9f9612db5 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -25,7 +25,7 @@ #ifndef __BLUETOOTH_H #define __BLUETOOTH_H -#include +#include #include #include #include @@ -168,8 +168,8 @@ typedef struct { #define BDADDR_LE_PUBLIC 0x01 #define BDADDR_LE_RANDOM 0x02 -#define BDADDR_ANY (&(bdaddr_t) {{0, 0, 0, 0, 0, 0}}) -#define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff}}) +#define BDADDR_ANY (&(bdaddr_t) {{0, 0, 0, 0, 0, 0} }) +#define BDADDR_LOCAL (&(bdaddr_t) {{0, 0, 0, 0xff, 0xff, 0xff} }) /* Copy, swap, convert BD Address */ static inline int bacmp(bdaddr_t *ba1, bdaddr_t *ba2) @@ -215,7 +215,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); -uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait); +uint bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); @@ -225,12 +225,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct l2cap_ctrl { - unsigned int sframe : 1, - poll : 1, - final : 1, - fcs : 1, - sar : 2, - super : 2; + unsigned int sframe:1, + poll:1, + final:1, + fcs:1, + sar:2, + super:2; __u16 reqseq; __u16 txseq; __u8 retries; @@ -249,7 +249,8 @@ static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { struct sk_buff *skb; - if ((skb = alloc_skb(len + BT_SKB_RESERVE, how))) { + skb = alloc_skb(len + BT_SKB_RESERVE, how); + if (skb) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } @@ -261,7 +262,8 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, { struct sk_buff *skb; - if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { + skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err); + if (skb) { skb_reserve(skb, BT_SKB_RESERVE); bt_cb(skb)->incoming = 0; } diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0bc5555510f..5de351e49d4 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1309,12 +1309,12 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb) } /* Command opcode pack/unpack */ -#define hci_opcode_pack(ogf, ocf) (__u16) ((ocf & 0x03ff)|(ogf << 10)) +#define hci_opcode_pack(ogf, ocf) ((__u16) ((ocf & 0x03ff)|(ogf << 10))) #define hci_opcode_ogf(op) (op >> 10) #define hci_opcode_ocf(op) (op & 0x03ff) /* ACL handle and flags pack/unpack */ -#define hci_handle_pack(h, f) (__u16) ((h & 0x0fff)|(f << 12)) +#define hci_handle_pack(h, f) ((__u16) ((h & 0x0fff)|(f << 12))) #define hci_handle(h) (h & 0x0fff) #define hci_flags(h) (h >> 12) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6c658fc7ac9..a8ba50d7a81 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -65,7 +65,7 @@ struct discovery_state { DISCOVERY_RESOLVING, DISCOVERY_STOPPING, } state; - struct list_head all; /* All devices found during inquiry */ + struct list_head all; /* All devices found during inquiry */ struct list_head unknown; /* Name state not known */ struct list_head resolve; /* Name needs to be resolved */ __u32 timestamp; @@ -360,7 +360,8 @@ extern int l2cap_connect_cfm(struct hci_conn *hcon, u8 status); extern int l2cap_disconn_ind(struct hci_conn *hcon); extern int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason); extern int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt); -extern int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags); +extern int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, + u16 flags); extern int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr); extern int sco_connect_cfm(struct hci_conn *hcon, __u8 status); @@ -429,8 +430,8 @@ enum { static inline bool hci_conn_ssp_enabled(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - return (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && - test_bit(HCI_CONN_SSP_ENABLED, &conn->flags)); + return test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && + test_bit(HCI_CONN_SSP_ENABLED, &conn->flags); } static inline void hci_conn_hash_init(struct hci_dev *hdev) @@ -661,7 +662,8 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg); int hci_get_auth_info(struct hci_dev *hdev, void __user *arg); int hci_inquiry(void __user *arg); -struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr); +struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, + bdaddr_t *bdaddr); int hci_blacklist_clear(struct hci_dev *hdev); int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type); int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type); -- cgit v1.2.3-70-g09d2 From 8c520a59927a5600973782505dbb750d985057c4 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Wed, 23 May 2012 04:04:22 -0300 Subject: Bluetooth: Remove unnecessary headers include Most of the include were unnecessary or already included by some other header. Replace module.h by export.h where possible. Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 8 -------- include/net/bluetooth/bluetooth.h | 3 --- include/net/bluetooth/hci.h | 1 - include/net/bluetooth/hci_core.h | 1 - net/bluetooth/af_bluetooth.c | 11 ----------- net/bluetooth/bnep/core.c | 17 ----------------- net/bluetooth/bnep/netdev.c | 10 +--------- net/bluetooth/bnep/sock.c | 18 +----------------- net/bluetooth/hci_conn.c | 16 +--------------- net/bluetooth/hci_core.c | 23 ++--------------------- net/bluetooth/hci_event.c | 15 +-------------- net/bluetooth/hci_sock.c | 20 +------------------- net/bluetooth/hci_sysfs.c | 4 ---- net/bluetooth/hidp/core.c | 19 ------------------- net/bluetooth/hidp/sock.c | 16 +--------------- net/bluetooth/l2cap_core.c | 19 ------------------- net/bluetooth/l2cap_sock.c | 1 - net/bluetooth/lib.c | 7 +------ net/bluetooth/mgmt.c | 2 -- net/bluetooth/rfcomm/core.c | 14 -------------- net/bluetooth/rfcomm/sock.c | 21 +-------------------- net/bluetooth/rfcomm/tty.c | 5 ----- net/bluetooth/sco.c | 18 ------------------ net/bluetooth/smp.c | 7 ++++--- 24 files changed, 14 insertions(+), 262 deletions(-) (limited to 'include/net') diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3a6cdc9b75a..a45e717f5f8 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -21,15 +21,7 @@ * */ -#include #include -#include -#include -#include -#include -#include -#include - #include #include diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 7a9f9612db5..565d4bee1e4 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -25,9 +25,6 @@ #ifndef __BLUETOOTH_H #define __BLUETOOTH_H -#include -#include -#include #include #include diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5de351e49d4..edb66390812 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1292,7 +1292,6 @@ struct hci_sco_hdr { __u8 dlen; } __packed; -#include static inline struct hci_event_hdr *hci_event_hdr(const struct sk_buff *skb) { return (struct hci_event_hdr *) skb->data; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a8ba50d7a81..d584a47d1c8 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -25,7 +25,6 @@ #ifndef __HCI_CORE_H #define __HCI_CORE_H -#include #include /* HCI priority */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index e31a20f5b6b..251747269d3 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -25,18 +25,7 @@ /* Bluetooth address family and sockets. */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index a918f6e4f00..4a6620bc157 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -26,26 +26,9 @@ */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include - -#include #include - -#include #include -#include - #include #include diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 46c9ece7b04..98f86f91d47 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -25,16 +25,8 @@ SOFTWARE IS DISCLAIMED. */ -#include -#include - -#include -#include +#include #include -#include -#include - -#include #include #include diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 180bfc45810..5e5f5b410e0 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -24,24 +24,8 @@ SOFTWARE IS DISCLAIMED. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include - #include "bnep.h" diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 231fc4400f3..3bb2d552a88 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -24,21 +24,7 @@ /* Bluetooth HCI connection handling. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include +#include #include #include diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bee425ad25b..3431ec908c0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -25,28 +25,9 @@ /* Bluetooth HCI core. */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include -#include -#include +#include #include #include diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 87e6f74af6f..5e24a57a461 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -24,20 +24,7 @@ /* Bluetooth HCI event handling. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include +#include #include #include diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 9d8e1c39955..a7f04de03d7 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -24,25 +24,7 @@ /* Bluetooth HCI sockets. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include +#include #include #include diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ee8d9ea6bf3..a20e61c3653 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -1,10 +1,6 @@ /* Bluetooth HCI driver model support. */ -#include -#include -#include #include -#include #include #include diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index c8625b8ccb6..8a4afc7515a 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -21,27 +21,8 @@ */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include - -#include -#include #include #include diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 73a32d705c1..18b3f6892a3 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -20,22 +20,8 @@ SOFTWARE IS DISCLAIMED. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include #include "hidp.h" diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index db76a7750ee..f6b785593ec 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -30,27 +30,8 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include #include -#include - -#include #include #include diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3bb1611b9d4..4d3660540c0 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -27,7 +27,6 @@ /* Bluetooth L2CAP sockets. */ -#include #include #include diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index 994bc3c7ddc..e1c97527e16 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -26,12 +26,7 @@ #define pr_fmt(fmt) "Bluetooth: " fmt -#include - -#include -#include -#include -#include +#include #include diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 984afe4ef40..205574edff2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -24,8 +24,6 @@ /* Bluetooth HCI Management interface */ -#include -#include #include #include diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 585d3916d3d..c75107ef892 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -26,22 +26,8 @@ */ #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include - -#include -#include #include #include diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index e8707debb86..7e1e59645c0 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -25,27 +25,8 @@ * RFCOMM sockets. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include - -#include #include #include diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index f2f4d064df9..cb960773c00 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -31,11 +31,6 @@ #include #include -#include -#include -#include -#include - #include #include #include diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 6401ccae204..40bbe25dcff 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -25,26 +25,8 @@ /* Bluetooth SCO sockets. */ #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include -#include -#include - -#include #include #include diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 6fc7c4708f3..ff4835b61de 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -20,14 +20,15 @@ SOFTWARE IS DISCLAIMED. */ +#include +#include +#include + #include #include #include #include #include -#include -#include -#include #define SMP_TIMEOUT msecs_to_jiffies(30000) -- cgit v1.2.3-70-g09d2 From 59e54bd15d63f102c71c3ce695bca5ed90926e46 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 23 May 2012 15:44:06 +0300 Subject: Bluetooth: Define L2CAP conf continuation flag Define Continuation flag which the only flag used from Flags field in L2CAP Configuration Request and Response. Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 3 +++ net/bluetooth/l2cap_core.c | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 7bc40198f14..01422578cc7 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -271,6 +271,9 @@ struct l2cap_conf_rsp { #define L2CAP_CONF_PENDING 0x0004 #define L2CAP_CONF_EFS_REJECT 0x0005 +/* configuration req/rsp continuation flag */ +#define L2CAP_CONF_FLAG_CONTINUATION 0x0001 + struct l2cap_conf_opt { __u8 type; __u8 len; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f6b785593ec..e31b005f982 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2944,7 +2944,7 @@ done: } req->dcid = cpu_to_le16(chan->dcid); - req->flags = cpu_to_le16(0); + req->flags = __constant_cpu_to_le16(0); return ptr - data; } @@ -3164,7 +3164,7 @@ done: } rsp->scid = cpu_to_le16(chan->dcid); rsp->result = cpu_to_le16(result); - rsp->flags = cpu_to_le16(0x0000); + rsp->flags = __constant_cpu_to_le16(0); return ptr - data; } @@ -3263,7 +3263,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi } req->dcid = cpu_to_le16(chan->dcid); - req->flags = cpu_to_le16(0x0000); + req->flags = __constant_cpu_to_le16(0); return ptr - data; } @@ -3618,7 +3618,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr memcpy(chan->conf_req + chan->conf_len, req->data, len); chan->conf_len += len; - if (flags & 0x0001) { + if (flags & L2CAP_CONF_FLAG_CONTINUATION) { /* Incomplete config. Send empty response. */ l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, l2cap_build_conf_rsp(chan, rsp, @@ -3769,7 +3769,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr goto done; } - if (flags & 0x01) + if (flags & L2CAP_CONF_FLAG_CONTINUATION) goto done; set_bit(CONF_INPUT_DONE, &chan->conf_state); -- cgit v1.2.3-70-g09d2 From 2983fd682444180e45567ce8147a612b97ba69da Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Thu, 24 May 2012 15:42:50 +0300 Subject: Bluetooth: Define and use PSM identifiers Define assigned Protocol and Service Multiplexor (PSM) identifiers and use them instead of magic numbers. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ++++ net/bluetooth/l2cap_core.c | 4 ++-- net/bluetooth/l2cap_sock.c | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 01422578cc7..f44344b92d2 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -229,6 +229,10 @@ struct l2cap_conn_rsp { __le16 status; } __packed; +/* protocol/service multiplexer (PSM) */ +#define L2CAP_PSM_SDP 0x0001 +#define L2CAP_PSM_RFCOMM 0x0003 + /* channel indentifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c9e6ae4a336..65c3f4e1396 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -648,7 +648,7 @@ static inline u8 l2cap_get_auth_type(struct l2cap_chan *chan) default: return HCI_AT_NO_BONDING; } - } else if (chan->psm == cpu_to_le16(0x0001)) { + } else if (chan->psm == __constant_cpu_to_le16(L2CAP_PSM_SDP)) { if (chan->sec_level == BT_SECURITY_LOW) chan->sec_level = BT_SECURITY_SDP; @@ -3393,7 +3393,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd lock_sock(parent); /* Check if the ACL is secure enough (if not SDP) */ - if (psm != cpu_to_le16(0x0001) && + if (psm != __constant_cpu_to_le16(L2CAP_PSM_SDP) && !hci_conn_check_link_mode(conn->hcon)) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4d3660540c0..d244361a455 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -88,8 +88,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (err < 0) goto done; - if (__le16_to_cpu(la.l2_psm) == 0x0001 || - __le16_to_cpu(la.l2_psm) == 0x0003) + if (__le16_to_cpu(la.l2_psm) == L2CAP_PSM_SDP || + __le16_to_cpu(la.l2_psm) == L2CAP_PSM_RFCOMM) chan->sec_level = BT_SECURITY_SDP; bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); -- cgit v1.2.3-70-g09d2 From 523e93cdb39086b25af2ed19d2a69248510727a2 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 25 May 2012 15:09:26 +0300 Subject: Bluetooth: Define HCI AMP cmd struct Add HCI commands to deal with Bluetooth AMP controllers. Those commands will be used by bluetooth and softamp code. Signed-off-by: Andrei Emeltchenko Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index edb66390812..de09a26e422 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -31,6 +31,7 @@ #define HCI_MAX_FRAME_SIZE (HCI_MAX_ACL_SIZE + 4) #define HCI_LINK_KEY_SIZE 16 +#define HCI_AMP_LINK_KEY_SIZE (2 * HCI_LINK_KEY_SIZE) /* HCI dev events */ #define HCI_DEV_REG 1 @@ -525,6 +526,28 @@ struct hci_cp_io_capability_neg_reply { __u8 reason; } __packed; +#define HCI_OP_CREATE_PHY_LINK 0x0435 +struct hci_cp_create_phy_link { + __u8 phy_handle; + __u8 key_len; + __u8 key_type; + __u8 key[HCI_AMP_LINK_KEY_SIZE]; +} __packed; + +#define HCI_OP_ACCEPT_PHY_LINK 0x0436 +struct hci_cp_accept_phy_link { + __u8 phy_handle; + __u8 key_len; + __u8 key_type; + __u8 key[HCI_AMP_LINK_KEY_SIZE]; +} __packed; + +#define HCI_OP_DISCONN_PHY_LINK 0x0437 +struct hci_cp_disconn_phy_link { + __u8 phy_handle; + __u8 reason; +} __packed; + #define HCI_OP_SNIFF_MODE 0x0803 struct hci_cp_sniff_mode { __le16 handle; @@ -820,6 +843,31 @@ struct hci_rp_read_local_amp_info { __le32 be_flush_to; } __packed; +#define HCI_OP_READ_LOCAL_AMP_ASSOC 0x140a +struct hci_cp_read_local_amp_assoc { + __u8 phy_handle; + __le16 len_so_far; + __le16 max_len; +} __packed; +struct hci_rp_read_local_amp_assoc { + __u8 status; + __u8 phy_handle; + __le16 rem_len; + __u8 frag[0]; +} __packed; + +#define HCI_OP_WRITE_REMOTE_AMP_ASSOC 0x140b +struct hci_cp_write_remote_amp_assoc { + __u8 phy_handle; + __le16 len_so_far; + __le16 rem_len; + __u8 frag[0]; +} __packed; +struct hci_rp_write_remote_amp_assoc { + __u8 status; + __u8 phy_handle; +} __packed; + #define HCI_OP_LE_SET_EVENT_MASK 0x2001 struct hci_cp_le_set_event_mask { __u8 mask[8]; @@ -1192,6 +1240,39 @@ struct hci_ev_le_meta { __u8 subevent; } __packed; +#define HCI_EV_PHY_LINK_COMPLETE 0x40 +struct hci_ev_phy_link_complete { + __u8 status; + __u8 phy_handle; +} __packed; + +#define HCI_EV_CHANNEL_SELECTED 0x41 +struct hci_ev_channel_selected { + __u8 phy_handle; +} __packed; + +#define HCI_EV_DISCONN_PHY_LINK_COMPLETE 0x42 +struct hci_ev_disconn_phy_link_complete { + __u8 status; + __u8 phy_handle; + __u8 reason; +} __packed; + +#define HCI_EV_LOGICAL_LINK_COMPLETE 0x45 +struct hci_ev_logical_link_complete { + __u8 status; + __le16 handle; + __u8 phy_handle; + __u8 flow_spec_id; +} __packed; + +#define HCI_EV_DISCONN_LOGICAL_LINK_COMPLETE 0x46 +struct hci_ev_disconn_logical_link_complete { + __u8 status; + __le16 handle; + __u8 reason; +} __packed; + #define HCI_EV_NUM_COMP_BLOCKS 0x48 struct hci_comp_blocks_info { __le16 handle; -- cgit v1.2.3-70-g09d2 From 80b980279508edd1a92d8d77ec99b0ddad00c5fe Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Sun, 27 May 2012 22:27:51 -0300 Subject: Bluetooth: Use chan as parameters for l2cap chan ops Use chan instead of void * makes more sense here. Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 10 ++++++---- net/bluetooth/l2cap_core.c | 30 +++++++++++++++--------------- net/bluetooth/l2cap_sock.c | 16 ++++++++-------- 3 files changed, 29 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index f44344b92d2..aa2dbc680d5 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -527,10 +527,12 @@ struct l2cap_chan { struct l2cap_ops { char *name; - struct l2cap_chan *(*new_connection) (void *data); - int (*recv) (void *data, struct sk_buff *skb); - void (*close) (void *data); - void (*state_change) (void *data, int state); + struct l2cap_chan *(*new_connection) (struct l2cap_chan *chan); + int (*recv) (struct l2cap_chan * chan, + struct sk_buff *skb); + void (*close) (struct l2cap_chan *chan); + void (*state_change) (struct l2cap_chan *chan, + int state); struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan, unsigned long len, int nb); }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index de0dc9ec986..7edc8146db2 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -180,7 +180,7 @@ static void __l2cap_state_change(struct l2cap_chan *chan, int state) state_to_string(state)); chan->state = state; - chan->ops->state_change(chan->data, state); + chan->ops->state_change(chan, state); } static void l2cap_state_change(struct l2cap_chan *chan, int state) @@ -381,7 +381,7 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); mutex_unlock(&conn->chan_lock); l2cap_chan_put(chan); @@ -569,7 +569,7 @@ static void l2cap_chan_cleanup_listen(struct sock *parent) l2cap_chan_close(chan, ECONNRESET); l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); } } @@ -1213,7 +1213,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) goto clean; } - chan = pchan->ops->new_connection(pchan->data); + chan = pchan->ops->new_connection(pchan); if (!chan) goto clean; @@ -1324,7 +1324,7 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); l2cap_chan_put(chan); } @@ -2568,7 +2568,7 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) if (!nskb) continue; - if (chan->ops->recv(chan->data, nskb)) + if (chan->ops->recv(chan, nskb)) kfree_skb(nskb); } @@ -3411,7 +3411,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto response; } - chan = pchan->ops->new_connection(pchan->data); + chan = pchan->ops->new_connection(pchan); if (!chan) goto response; @@ -3420,7 +3420,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { sock_set_flag(sk, SOCK_ZAPPED); - chan->ops->close(chan->data); + chan->ops->close(chan); goto response; } @@ -3831,7 +3831,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -3865,7 +3865,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd l2cap_chan_unlock(chan); - chan->ops->close(chan->data); + chan->ops->close(chan); l2cap_chan_put(chan); mutex_unlock(&conn->chan_lock); @@ -4435,7 +4435,7 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, if (chan->sdu) break; - err = chan->ops->recv(chan->data, skb); + err = chan->ops->recv(chan, skb); break; case L2CAP_SAR_START: @@ -4485,7 +4485,7 @@ static int l2cap_reassemble_sdu(struct l2cap_chan *chan, struct sk_buff *skb, if (chan->sdu->len != chan->sdu_len) break; - err = chan->ops->recv(chan->data, chan->sdu); + err = chan->ops->recv(chan, chan->sdu); if (!err) { /* Reassembly complete */ @@ -5207,7 +5207,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk if (chan->imtu < skb->len) goto drop; - if (!chan->ops->recv(chan->data, skb)) + if (!chan->ops->recv(chan, skb)) goto done; break; @@ -5246,7 +5246,7 @@ static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, str if (chan->imtu < skb->len) goto drop; - if (!chan->ops->recv(chan->data, skb)) + if (!chan->ops->recv(chan, skb)) return 0; drop: @@ -5272,7 +5272,7 @@ static inline int l2cap_att_channel(struct l2cap_conn *conn, u16 cid, if (chan->imtu < skb->len) goto drop; - if (!chan->ops->recv(chan->data, skb)) + if (!chan->ops->recv(chan, skb)) return 0; drop: diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index d244361a455..db787f67c52 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -872,9 +872,9 @@ static int l2cap_sock_release(struct socket *sock) return err; } -static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data) +static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { - struct sock *sk, *parent = data; + struct sock *sk, *parent = chan->data; sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); @@ -888,10 +888,10 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(void *data) return l2cap_pi(sk)->chan; } -static int l2cap_sock_recv_cb(void *data, struct sk_buff *skb) +static int l2cap_sock_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { int err; - struct sock *sk = data; + struct sock *sk = chan->data; struct l2cap_pinfo *pi = l2cap_pi(sk); lock_sock(sk); @@ -924,16 +924,16 @@ done: return err; } -static void l2cap_sock_close_cb(void *data) +static void l2cap_sock_close_cb(struct l2cap_chan *chan) { - struct sock *sk = data; + struct sock *sk = chan->data; l2cap_sock_kill(sk); } -static void l2cap_sock_state_change_cb(void *data, int state) +static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state) { - struct sock *sk = data; + struct sock *sk = chan->data; sk->sk_state = state; } -- cgit v1.2.3-70-g09d2 From c0df7f6e06e1aeccee39c801af7f78cadeb9f345 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Sun, 27 May 2012 22:27:52 -0300 Subject: Bluetooth: Move clean up code and set of SOCK_ZAPPED to l2cap_sock.c This remove a bit more of socket code from l2cap core, this calls set the SOCK_ZAPPED and do some clean up depending on the socket state. Reported-by: Mat Martineau Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 55 +++++++------------------------------- net/bluetooth/l2cap_sock.c | 61 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 46 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index aa2dbc680d5..76b0e7e5dec 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -530,6 +530,7 @@ struct l2cap_ops { struct l2cap_chan *(*new_connection) (struct l2cap_chan *chan); int (*recv) (struct l2cap_chan * chan, struct sk_buff *skb); + void (*teardown) (struct l2cap_chan *chan, int err); void (*close) (struct l2cap_chan *chan); void (*state_change) (struct l2cap_chan *chan, int state); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7edc8146db2..1f4c7207415 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -493,9 +493,7 @@ static void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) static void l2cap_chan_del(struct l2cap_chan *chan, int err) { - struct sock *sk = chan->sk; struct l2cap_conn *conn = chan->conn; - struct sock *parent = bt_sk(sk)->parent; __clear_chan_timer(chan); @@ -511,21 +509,8 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) hci_conn_put(conn->hcon); } - lock_sock(sk); - - __l2cap_state_change(chan, BT_CLOSED); - sock_set_flag(sk, SOCK_ZAPPED); - - if (err) - __l2cap_chan_set_err(chan, err); - - if (parent) { - bt_accept_unlink(sk); - parent->sk_data_ready(parent, 0); - } else - sk->sk_state_change(sk); - - release_sock(sk); + if (chan->ops->teardown) + chan->ops->teardown(chan, err); if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state)) return; @@ -554,25 +539,6 @@ static void l2cap_chan_del(struct l2cap_chan *chan, int err) return; } -static void l2cap_chan_cleanup_listen(struct sock *parent) -{ - struct sock *sk; - - BT_DBG("parent %p", parent); - - /* Close not yet accepted channels */ - while ((sk = bt_accept_dequeue(parent, NULL))) { - struct l2cap_chan *chan = l2cap_pi(sk)->chan; - - l2cap_chan_lock(chan); - __clear_chan_timer(chan); - l2cap_chan_close(chan, ECONNRESET); - l2cap_chan_unlock(chan); - - chan->ops->close(chan); - } -} - void l2cap_chan_close(struct l2cap_chan *chan, int reason) { struct l2cap_conn *conn = chan->conn; @@ -583,12 +549,8 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) switch (chan->state) { case BT_LISTEN: - lock_sock(sk); - l2cap_chan_cleanup_listen(sk); - - __l2cap_state_change(chan, BT_CLOSED); - sock_set_flag(sk, SOCK_ZAPPED); - release_sock(sk); + if (chan->ops->teardown) + chan->ops->teardown(chan, 0); break; case BT_CONNECTED: @@ -630,9 +592,8 @@ void l2cap_chan_close(struct l2cap_chan *chan, int reason) break; default: - lock_sock(sk); - sock_set_flag(sk, SOCK_ZAPPED); - release_sock(sk); + if (chan->ops->teardown) + chan->ops->teardown(chan, 0); break; } } @@ -3419,7 +3380,9 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd /* Check if we already have channel with that dcid */ if (__l2cap_get_chan_by_dcid(conn, scid)) { - sock_set_flag(sk, SOCK_ZAPPED); + if (chan->ops->teardown) + chan->ops->teardown(chan, 0); + chan->ops->close(chan); goto response; } diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index db787f67c52..3f5946351fb 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -872,6 +872,25 @@ static int l2cap_sock_release(struct socket *sock) return err; } +static void l2cap_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + BT_DBG("parent %p", parent); + + /* Close not yet accepted channels */ + while ((sk = bt_accept_dequeue(parent, NULL))) { + struct l2cap_chan *chan = l2cap_pi(sk)->chan; + + l2cap_chan_lock(chan); + __clear_chan_timer(chan); + l2cap_chan_close(chan, ECONNRESET); + l2cap_chan_unlock(chan); + + l2cap_sock_kill(sk); + } +} + static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) { struct sock *sk, *parent = chan->data; @@ -931,6 +950,47 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan) l2cap_sock_kill(sk); } +static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err) +{ + struct sock *sk = chan->data; + struct sock *parent; + + lock_sock(sk); + + parent = bt_sk(sk)->parent; + + sock_set_flag(sk, SOCK_ZAPPED); + + switch (chan->state) { + case BT_OPEN: + case BT_BOUND: + case BT_CLOSED: + break; + case BT_LISTEN: + l2cap_sock_cleanup_listen(sk); + sk->sk_state = BT_CLOSED; + chan->state = BT_CLOSED; + + break; + default: + sk->sk_state = BT_CLOSED; + chan->state = BT_CLOSED; + + sk->sk_err = err; + + if (parent) { + bt_accept_unlink(sk); + parent->sk_data_ready(parent, 0); + } else { + sk->sk_state_change(sk); + } + + break; + } + + release_sock(sk); +} + static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state) { struct sock *sk = chan->data; @@ -959,6 +1019,7 @@ static struct l2cap_ops l2cap_chan_ops = { .new_connection = l2cap_sock_new_connection_cb, .recv = l2cap_sock_recv_cb, .close = l2cap_sock_close_cb, + .teardown = l2cap_sock_teardown_cb, .state_change = l2cap_sock_state_change_cb, .alloc_skb = l2cap_sock_alloc_skb_cb, }; -- cgit v1.2.3-70-g09d2 From 54a59aa2b562872781d6a8fc89f300d360941691 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Sun, 27 May 2012 22:27:53 -0300 Subject: Bluetooth: Add l2cap_chan->ops->ready() This move socket specific code to l2cap_sock.c. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan Signed-off-by: Marcel Holtmann --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 18 +++--------------- net/bluetooth/l2cap_sock.c | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 76b0e7e5dec..c5726c24ee0 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -534,6 +534,7 @@ struct l2cap_ops { void (*close) (struct l2cap_chan *chan); void (*state_change) (struct l2cap_chan *chan, int state); + void (*ready) (struct l2cap_chan *chan); struct sk_buff *(*alloc_skb) (struct l2cap_chan *chan, unsigned long len, int nb); }; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1f4c7207415..5947eb1c1be 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -931,26 +931,14 @@ static void l2cap_send_conn_req(struct l2cap_chan *chan) static void l2cap_chan_ready(struct l2cap_chan *chan) { - struct sock *sk = chan->sk; - struct sock *parent; - - lock_sock(sk); - - parent = bt_sk(sk)->parent; - - BT_DBG("sk %p, parent %p", sk, parent); - /* This clears all conf flags, including CONF_NOT_COMPLETE */ chan->conf_state = 0; __clear_chan_timer(chan); - __l2cap_state_change(chan, BT_CONNECTED); - sk->sk_state_change(sk); + chan->state = BT_CONNECTED; - if (parent) - parent->sk_data_ready(parent, 0); - - release_sock(sk); + if (chan->ops->ready) + chan->ops->ready(chan); } static void l2cap_do_start(struct l2cap_chan *chan) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3f5946351fb..5563023001c 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1014,6 +1014,26 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, return skb; } +static void l2cap_sock_ready_cb(struct l2cap_chan *chan) +{ + struct sock *sk = chan->data; + struct sock *parent; + + lock_sock(sk); + + parent = bt_sk(sk)->parent; + + BT_DBG("sk %p, parent %p", sk, parent); + + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + + if (parent) + parent->sk_data_ready(parent, 0); + + release_sock(sk); +} + static struct l2cap_ops l2cap_chan_ops = { .name = "L2CAP Socket Interface", .new_connection = l2cap_sock_new_connection_cb, @@ -1021,6 +1041,7 @@ static struct l2cap_ops l2cap_chan_ops = { .close = l2cap_sock_close_cb, .teardown = l2cap_sock_teardown_cb, .state_change = l2cap_sock_state_change_cb, + .ready = l2cap_sock_ready_cb, .alloc_skb = l2cap_sock_alloc_skb_cb, }; -- cgit v1.2.3-70-g09d2 From 466f8004f364e9cb46d9124109972489eccfb404 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:01 +0300 Subject: Bluetooth: A2MP: Create A2MP channel Create and initialize fixed A2MP channel Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 6 ++++ net/bluetooth/Makefile | 3 +- net/bluetooth/a2mp.c | 69 +++++++++++++++++++++++++++++++++++++++++++ net/bluetooth/l2cap_core.c | 6 ++-- 4 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 net/bluetooth/a2mp.c (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index c5726c24ee0..aaba222306b 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -52,6 +52,8 @@ #define L2CAP_CONN_TIMEOUT msecs_to_jiffies(40000) #define L2CAP_INFO_TIMEOUT msecs_to_jiffies(4000) +#define L2CAP_A2MP_DEFAULT_MTU 670 + /* L2CAP socket address */ struct sockaddr_l2 { sa_family_t l2_family; @@ -236,6 +238,7 @@ struct l2cap_conn_rsp { /* channel indentifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 +#define L2CAP_CID_A2MP 0x0003 #define L2CAP_CID_LE_DATA 0x0004 #define L2CAP_CID_LE_SIGNALING 0x0005 #define L2CAP_CID_SMP 0x0006 @@ -758,5 +761,8 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len, void l2cap_chan_busy(struct l2cap_chan *chan, int busy); int l2cap_chan_check_security(struct l2cap_chan *chan); void l2cap_chan_set_defaults(struct l2cap_chan *chan); +int l2cap_ertm_init(struct l2cap_chan *chan); +void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan); +void l2cap_chan_del(struct l2cap_chan *chan, int err); #endif /* __L2CAP_H */ diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 2dc5a5700f5..fa6d94a4602 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -9,4 +9,5 @@ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ - hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o + hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ + a2mp.o diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c new file mode 100644 index 00000000000..de455a26445 --- /dev/null +++ b/net/bluetooth/a2mp.c @@ -0,0 +1,69 @@ +/* + Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved. + Copyright (c) 2011,2012 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#include +#include +#include + +static struct l2cap_ops a2mp_chan_ops = { + .name = "L2CAP A2MP channel", +}; + +static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) +{ + struct l2cap_chan *chan; + int err; + + chan = l2cap_chan_create(); + if (!chan) + return NULL; + + BT_DBG("chan %p", chan); + + hci_conn_hold(conn->hcon); + + chan->omtu = L2CAP_A2MP_DEFAULT_MTU; + chan->imtu = L2CAP_A2MP_DEFAULT_MTU; + chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; + + chan->ops = &a2mp_chan_ops; + + l2cap_chan_set_defaults(chan); + chan->remote_max_tx = chan->max_tx; + chan->remote_tx_win = chan->tx_win; + + chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; + chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; + + skb_queue_head_init(&chan->tx_q); + + chan->mode = L2CAP_MODE_ERTM; + + err = l2cap_ertm_init(chan); + if (err < 0) { + l2cap_chan_del(chan, 0); + return NULL; + } + + chan->conf_state = 0; + + l2cap_chan_add(conn, chan); + + chan->remote_mps = chan->omtu; + chan->mps = chan->omtu; + + chan->state = BT_CONNECTED; + + return chan; +} diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 778c0c8cdc5..2c616cf24c7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -484,14 +484,14 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) list_add(&chan->list, &conn->chan_l); } -static void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) +void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { mutex_lock(&conn->chan_lock); __l2cap_chan_add(conn, chan); mutex_unlock(&conn->chan_lock); } -static void l2cap_chan_del(struct l2cap_chan *chan, int err) +void l2cap_chan_del(struct l2cap_chan *chan, int err) { struct l2cap_conn *conn = chan->conn; @@ -2691,7 +2691,7 @@ static void l2cap_ack_timeout(struct work_struct *work) l2cap_chan_put(chan); } -static inline int l2cap_ertm_init(struct l2cap_chan *chan) +int l2cap_ertm_init(struct l2cap_chan *chan) { int err; -- cgit v1.2.3-70-g09d2 From 9740e49d17e55f3832661fd99a8e0a17e921a82e Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:02 +0300 Subject: Bluetooth: A2MP: AMP Manager basic functions Define AMP Manager and some basic functions. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 30 ++++++++++++++++++++++ include/net/bluetooth/hci_core.h | 1 + net/bluetooth/a2mp.c | 54 ++++++++++++++++++++++++++++++++++++++++ net/bluetooth/hci_conn.c | 4 +++ 4 files changed, 89 insertions(+) create mode 100644 include/net/bluetooth/a2mp.h (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h new file mode 100644 index 00000000000..ff4754000cf --- /dev/null +++ b/include/net/bluetooth/a2mp.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2010,2011 Code Aurora Forum. All rights reserved. + Copyright (c) 2011,2012 Intel Corp. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 and + only version 2 as published by the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#ifndef __A2MP_H +#define __A2MP_H + +struct amp_mgr { + struct l2cap_conn *l2cap_conn; + struct l2cap_chan *a2mp_chan; + struct kref kref; + __u8 ident; + __u8 handle; + unsigned long flags; +}; + +void amp_mgr_get(struct amp_mgr *mgr); +int amp_mgr_put(struct amp_mgr *mgr); + +#endif /* __A2MP_H */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d584a47d1c8..6e64b76e30a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -332,6 +332,7 @@ struct hci_conn { void *l2cap_data; void *sco_data; void *smp_conn; + struct amp_mgr *amp_mgr; struct hci_conn *link; diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index de455a26445..3c241c2b3e1 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -15,6 +15,7 @@ #include #include #include +#include static struct l2cap_ops a2mp_chan_ops = { .name = "L2CAP A2MP channel", @@ -67,3 +68,56 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) return chan; } + +/* AMP Manager functions */ +void amp_mgr_get(struct amp_mgr *mgr) +{ + BT_DBG("mgr %p", mgr); + + kref_get(&mgr->kref); +} + +static void amp_mgr_destroy(struct kref *kref) +{ + struct amp_mgr *mgr = container_of(kref, struct amp_mgr, kref); + + BT_DBG("mgr %p", mgr); + + kfree(mgr); +} + +int amp_mgr_put(struct amp_mgr *mgr) +{ + BT_DBG("mgr %p", mgr); + + return kref_put(&mgr->kref, &_mgr_destroy); +} + +static struct amp_mgr *amp_mgr_create(struct l2cap_conn *conn) +{ + struct amp_mgr *mgr; + struct l2cap_chan *chan; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + BT_DBG("conn %p mgr %p", conn, mgr); + + mgr->l2cap_conn = conn; + + chan = a2mp_chan_open(conn); + if (!chan) { + kfree(mgr); + return NULL; + } + + mgr->a2mp_chan = chan; + chan->data = mgr; + + conn->hcon->amp_mgr = mgr; + + kref_init(&mgr->kref); + + return mgr; +} diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 126876d915f..1458667b284 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -28,6 +28,7 @@ #include #include +#include static void hci_le_connect(struct hci_conn *conn) { @@ -411,6 +412,9 @@ int hci_conn_del(struct hci_conn *conn) hci_chan_list_flush(conn); + if (conn->amp_mgr) + amp_mgr_put(conn->amp_mgr); + hci_conn_hash_del(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); -- cgit v1.2.3-70-g09d2 From f6d3c6e783b0e9f75b18232f8ff8cd5dbc3f7301 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:03 +0300 Subject: Bluetooth: A2MP: Build and Send msg helpers Helper function to build and send A2MP messages. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 7 +++++++ net/bluetooth/a2mp.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index ff4754000cf..654df60cfd6 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -24,6 +24,13 @@ struct amp_mgr { unsigned long flags; }; +struct a2mp_cmd { + __u8 code; + __u8 ident; + __le16 len; + __u8 data[0]; +} __packed; + void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 3c241c2b3e1..53f49a0b7f9 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -17,6 +17,52 @@ #include #include +/* A2MP build & send command helper functions */ +static struct a2mp_cmd *__a2mp_build(u8 code, u8 ident, u16 len, void *data) +{ + struct a2mp_cmd *cmd; + int plen; + + plen = sizeof(*cmd) + len; + cmd = kzalloc(plen, GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->code = code; + cmd->ident = ident; + cmd->len = cpu_to_le16(len); + + memcpy(cmd->data, data, len); + + return cmd; +} + +static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, + void *data) +{ + struct l2cap_chan *chan = mgr->a2mp_chan; + struct a2mp_cmd *cmd; + u16 total_len = len + sizeof(*cmd); + struct kvec iv; + struct msghdr msg; + + cmd = __a2mp_build(code, ident, len, data); + if (!cmd) + return; + + iv.iov_base = cmd; + iv.iov_len = total_len; + + memset(&msg, 0, sizeof(msg)); + + msg.msg_iov = (struct iovec *) &iv; + msg.msg_iovlen = 1; + + l2cap_chan_send(chan, &msg, total_len, 0); + + kfree(cmd); +} + static struct l2cap_ops a2mp_chan_ops = { .name = "L2CAP A2MP channel", }; -- cgit v1.2.3-70-g09d2 From b9058fb67c42851b4f852d90b11f43279586aae9 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:05 +0300 Subject: Bluetooth: A2MP: Definitions for A2MP commands Define A2MP command IDs and packet structures. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 73 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 654df60cfd6..7cbeb911fbd 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -31,6 +31,79 @@ struct a2mp_cmd { __u8 data[0]; } __packed; +/* A2MP command codes */ +#define A2MP_COMMAND_REJ 0x01 +struct a2mp_cmd_rej { + __le16 reason; + __u8 data[0]; +} __packed; + +#define A2MP_DISCOVER_REQ 0x02 +struct a2mp_discov_req { + __le16 mtu; + __le16 ext_feat; +} __packed; + +struct a2mp_cl { + __u8 id; + __u8 type; + __u8 status; +} __packed; + +#define A2MP_DISCOVER_RSP 0x03 +struct a2mp_discov_rsp { + __le16 mtu; + __le16 ext_feat; + struct a2mp_cl cl[0]; +} __packed; + +#define A2MP_CHANGE_NOTIFY 0x04 +#define A2MP_CHANGE_RSP 0x05 + +#define A2MP_GETINFO_REQ 0x06 +struct a2mp_info_req { + __u8 id; +} __packed; + +#define A2MP_GETINFO_RSP 0x07 +struct a2mp_info_rsp { + __u8 id; + __u8 status; + __le32 total_bw; + __le32 max_bw; + __le32 min_latency; + __le16 pal_cap; + __le16 assoc_size; +} __packed; + +#define A2MP_GETAMPASSOC_REQ 0x08 +struct a2mp_amp_assoc_req { + __u8 id; +} __packed; + +#define A2MP_GETAMPASSOC_RSP 0x09 +struct a2mp_amp_assoc_rsp { + __u8 id; + __u8 status; + __u8 amp_assoc[0]; +} __packed; + +#define A2MP_CREATEPHYSLINK_REQ 0x0A +#define A2MP_DISCONNPHYSLINK_REQ 0x0C +struct a2mp_physlink_req { + __u8 local_id; + __u8 remote_id; + __u8 amp_assoc[0]; +} __packed; + +#define A2MP_CREATEPHYSLINK_RSP 0x0B +#define A2MP_DISCONNPHYSLINK_RSP 0x0D +struct a2mp_physlink_rsp { + __u8 local_id; + __u8 remote_id; + __u8 status; +} __packed; + void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); -- cgit v1.2.3-70-g09d2 From e7af522e04bcf68caae6802722efc5c6e8fa63a7 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:06 +0300 Subject: Bluetooth: A2MP: Define A2MP status codes A2MP status codes copied from Bluez patch sent by Peter Krystad . Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 7cbeb911fbd..391acd7a67d 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -104,6 +104,16 @@ struct a2mp_physlink_rsp { __u8 status; } __packed; +/* A2MP response status */ +#define A2MP_STATUS_SUCCESS 0x00 +#define A2MP_STATUS_INVALID_CTRL_ID 0x01 +#define A2MP_STATUS_UNABLE_START_LINK_CREATION 0x02 +#define A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS 0x02 +#define A2MP_STATUS_COLLISION_OCCURED 0x03 +#define A2MP_STATUS_DISCONN_REQ_RECVD 0x04 +#define A2MP_STATUS_PHYS_LINK_EXISTS 0x05 +#define A2MP_STATUS_SECURITY_VIOLATION 0x06 + void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); -- cgit v1.2.3-70-g09d2 From 8598d064cbf22b2d84c7cd8a9fcb97138baffe3f Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:09 +0300 Subject: Bluetooth: A2MP: Process A2MP Discover Request Adds helper functions to count HCI devs and process A2MP Discover Request, code makes sure that first controller in the list is BREDR one. Trace is shown below: ... > ACL data: handle 11 flags 0x02 dlen 16 A2MP: Discover req: mtu/mps 670 mask: 0x0000 < ACL data: handle 11 flags 0x00 dlen 22 A2MP: Discover rsp: mtu/mps 670 mask: 0x0000 Controller list: id 0 type 0 (BR-EDR) status 0x01 (Bluetooth only) id 1 type 1 (802.11 AMP) status 0x01 (Bluetooth only) ... Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 2 + include/net/bluetooth/hci.h | 3 ++ include/net/bluetooth/hci_core.h | 13 ++++++ net/bluetooth/a2mp.c | 85 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 391acd7a67d..96f9cc2cf59 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -15,6 +15,8 @@ #ifndef __A2MP_H #define __A2MP_H +#define A2MP_FEAT_EXT 0x8000 + struct amp_mgr { struct l2cap_conn *l2cap_conn; struct l2cap_chan *a2mp_chan; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index de09a26e422..66af2c6193d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -59,6 +59,9 @@ #define HCI_BREDR 0x00 #define HCI_AMP 0x01 +/* First BR/EDR Controller shall have ID = 0 */ +#define HCI_BREDR_ID 0 + /* HCI device quirks */ enum { HCI_QUIRK_RESET_ON_CLOSE, diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6e64b76e30a..20fd57367dd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -641,6 +641,19 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data) dev_set_drvdata(&hdev->dev, data); } +/* hci_dev_list shall be locked */ +static inline uint8_t __hci_num_ctrl(void) +{ + uint8_t count = 0; + struct list_head *p; + + list_for_each(p, &hci_dev_list) { + count++; + } + + return count; +} + struct hci_dev *hci_dev_get(int index); struct hci_dev *hci_get_route(bdaddr_t *src, bdaddr_t *dst); diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 188b4212007..1cc920a62b0 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -63,6 +63,36 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, kfree(cmd); } +static inline void __a2mp_cl_bredr(struct a2mp_cl *cl) +{ + cl->id = 0; + cl->type = 0; + cl->status = 1; +} + +/* hci_dev_list shall be locked */ +static void __a2mp_add_cl(struct amp_mgr *mgr, struct a2mp_cl *cl, u8 num_ctrl) +{ + int i = 0; + struct hci_dev *hdev; + + __a2mp_cl_bredr(cl); + + list_for_each_entry(hdev, &hci_dev_list, list) { + /* Iterate through AMP controllers */ + if (hdev->id == HCI_BREDR_ID) + continue; + + /* Starting from second entry */ + if (++i >= num_ctrl) + return; + + cl[i].id = hdev->id; + cl[i].type = hdev->amp_type; + cl[i].status = hdev->amp_status; + } +} + /* Processing A2MP messages */ static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb, struct a2mp_cmd *hdr) @@ -79,6 +109,58 @@ static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb, return 0; } +static int a2mp_discover_req(struct amp_mgr *mgr, struct sk_buff *skb, + struct a2mp_cmd *hdr) +{ + struct a2mp_discov_req *req = (void *) skb->data; + u16 len = le16_to_cpu(hdr->len); + struct a2mp_discov_rsp *rsp; + u16 ext_feat; + u8 num_ctrl; + + if (len < sizeof(*req)) + return -EINVAL; + + skb_pull(skb, sizeof(*req)); + + ext_feat = le16_to_cpu(req->ext_feat); + + BT_DBG("mtu %d efm 0x%4.4x", le16_to_cpu(req->mtu), ext_feat); + + /* check that packet is not broken for now */ + while (ext_feat & A2MP_FEAT_EXT) { + if (len < sizeof(ext_feat)) + return -EINVAL; + + ext_feat = get_unaligned_le16(skb->data); + BT_DBG("efm 0x%4.4x", ext_feat); + len -= sizeof(ext_feat); + skb_pull(skb, sizeof(ext_feat)); + } + + read_lock(&hci_dev_list_lock); + + num_ctrl = __hci_num_ctrl(); + len = num_ctrl * sizeof(struct a2mp_cl) + sizeof(*rsp); + rsp = kmalloc(len, GFP_ATOMIC); + if (!rsp) { + read_unlock(&hci_dev_list_lock); + return -ENOMEM; + } + + rsp->mtu = __constant_cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU); + rsp->ext_feat = 0; + + __a2mp_add_cl(mgr, rsp->cl, num_ctrl); + + read_unlock(&hci_dev_list_lock); + + a2mp_send(mgr, A2MP_DISCOVER_RSP, hdr->ident, len, rsp); + + kfree(rsp); + return 0; +} + /* Handle A2MP signalling */ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) { @@ -109,6 +191,9 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb) break; case A2MP_DISCOVER_REQ: + err = a2mp_discover_req(mgr, skb, hdr); + break; + case A2MP_CHANGE_NOTIFY: case A2MP_GETINFO_REQ: case A2MP_GETAMPASSOC_REQ: -- cgit v1.2.3-70-g09d2 From 416fa7527d6bf658e5517ea36d2de9270be2c11e Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:16 +0300 Subject: Bluetooth: A2MP: Handling fixed channels A2MP fixed channel do not have sk Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/a2mp.c | 3 +-- net/bluetooth/l2cap_core.c | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index aaba222306b..a00b43ecbc7 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -581,6 +581,7 @@ struct l2cap_conn { #define L2CAP_CHAN_RAW 1 #define L2CAP_CHAN_CONN_LESS 2 #define L2CAP_CHAN_CONN_ORIENTED 3 +#define L2CAP_CHAN_CONN_FIX_A2MP 4 /* ----- L2CAP socket info ----- */ #define l2cap_pi(sk) ((struct l2cap_pinfo *) sk) diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 6a933dab1b7..f1ec1b1d308 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -483,8 +483,7 @@ static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) hci_conn_hold(conn->hcon); - chan->omtu = L2CAP_A2MP_DEFAULT_MTU; - chan->imtu = L2CAP_A2MP_DEFAULT_MTU; + chan->chan_type = L2CAP_CHAN_CONN_FIX_A2MP; chan->flush_to = L2CAP_DEFAULT_FLUSH_TO; chan->ops = &a2mp_chan_ops; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2c616cf24c7..fc572795497 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -465,6 +465,13 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) chan->omtu = L2CAP_DEFAULT_MTU; break; + case L2CAP_CHAN_CONN_FIX_A2MP: + chan->scid = L2CAP_CID_A2MP; + chan->dcid = L2CAP_CID_A2MP; + chan->omtu = L2CAP_A2MP_DEFAULT_MTU; + chan->imtu = L2CAP_A2MP_DEFAULT_MTU; + break; + default: /* Raw socket can send/recv signalling messages only */ chan->scid = L2CAP_CID_SIGNALING; @@ -1001,6 +1008,11 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c __clear_ack_timer(chan); } + if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) { + __l2cap_state_change(chan, BT_DISCONN); + return; + } + req.dcid = cpu_to_le16(chan->dcid); req.scid = cpu_to_le16(chan->scid); l2cap_send_cmd(conn, l2cap_get_ident(conn), @@ -1195,6 +1207,11 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) l2cap_chan_lock(chan); + if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) { + l2cap_chan_unlock(chan); + continue; + } + if (conn->hcon->type == LE_LINK) { if (smp_conn_security(conn, chan->sec_level)) l2cap_chan_ready(chan); -- cgit v1.2.3-70-g09d2 From 97e8e89d2d8185b7644c9941636d3682eedc517b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 29 May 2012 13:59:17 +0300 Subject: Bluetooth: A2MP: Manage incoming connections Handle incoming A2MP connection by creating AMP manager and processing A2MP messages. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/a2mp.h | 4 ++++ net/bluetooth/a2mp.c | 16 ++++++++++++++++ net/bluetooth/l2cap_core.c | 19 +++++++++++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/a2mp.h b/include/net/bluetooth/a2mp.h index 96f9cc2cf59..6a76e0a0705 100644 --- a/include/net/bluetooth/a2mp.h +++ b/include/net/bluetooth/a2mp.h @@ -15,6 +15,8 @@ #ifndef __A2MP_H #define __A2MP_H +#include + #define A2MP_FEAT_EXT 0x8000 struct amp_mgr { @@ -118,5 +120,7 @@ struct a2mp_physlink_rsp { void amp_mgr_get(struct amp_mgr *mgr); int amp_mgr_put(struct amp_mgr *mgr); +struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, + struct sk_buff *skb); #endif /* __A2MP_H */ diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index f1ec1b1d308..e08ca2ac31a 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -569,3 +569,19 @@ static struct amp_mgr *amp_mgr_create(struct l2cap_conn *conn) return mgr; } + +struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn, + struct sk_buff *skb) +{ + struct amp_mgr *mgr; + + mgr = amp_mgr_create(conn); + if (!mgr) { + BT_ERR("Could not create AMP manager"); + return NULL; + } + + BT_DBG("mgr: %p chan %p", mgr, mgr->a2mp_chan); + + return mgr->a2mp_chan; +} diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fc572795497..3daac2c6b7b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -37,6 +37,7 @@ #include #include #include +#include bool disable_ertm; @@ -5132,10 +5133,20 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk chan = l2cap_get_chan_by_scid(conn, cid); if (!chan) { - BT_DBG("unknown cid 0x%4.4x", cid); - /* Drop packet and return */ - kfree_skb(skb); - return 0; + if (cid == L2CAP_CID_A2MP) { + chan = a2mp_channel_create(conn, skb); + if (!chan) { + kfree_skb(skb); + return 0; + } + + l2cap_chan_lock(chan); + } else { + BT_DBG("unknown cid 0x%4.4x", cid); + /* Drop packet and return */ + kfree_skb(skb); + return 0; + } } BT_DBG("chan %p, len %d", chan, skb->len); -- cgit v1.2.3-70-g09d2 From 1afd5be87e30997a5623260d9177ed62cf88adbe Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 28 May 2012 11:47:20 +0200 Subject: Bluetooth: Remove unused HCI timeouts definitions Those are not used anywhere in code (and never were since introduction in 2006) so just remove them. Signed-off-by: Szymon Janc Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 66af2c6193d..3f5d682e866 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -139,10 +139,8 @@ enum { #define HCIINQUIRY _IOR('H', 240, int) /* HCI timeouts */ -#define HCI_CONNECT_TIMEOUT (40000) /* 40 seconds */ #define HCI_DISCONN_TIMEOUT (2000) /* 2 seconds */ #define HCI_PAIRING_TIMEOUT (60000) /* 60 seconds */ -#define HCI_IDLE_TIMEOUT (6000) /* 6 seconds */ #define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ #define HCI_CMD_TIMEOUT (1000) /* 1 seconds */ #define HCI_ACL_TX_TIMEOUT (45000) /* 45 seconds */ -- cgit v1.2.3-70-g09d2 From 8c3a4f004e706fd7e681c68c6de4946c8c76b976 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Thu, 31 May 2012 17:01:35 -0300 Subject: Bluetooth: Rename L2CAP_LE_DEFAULT_MTU This patch renames L2CAP_LE_DEFAULT_MTU macro to L2CAP_LE_MIN_MTU since it represents the minimum MTU value, not the default MTU value for LE. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 2 +- net/bluetooth/l2cap_sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index a00b43ecbc7..ce99c5683d9 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -40,11 +40,11 @@ #define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1009 /* Sized for 3-DH5 packet */ #define L2CAP_DEFAULT_ACK_TO 200 -#define L2CAP_LE_DEFAULT_MTU 23 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF #define L2CAP_DEFAULT_SDU_ITIME 0xFFFFFFFF #define L2CAP_DEFAULT_ACC_LAT 0xFFFFFFFF #define L2CAP_BREDR_MAX_PAYLOAD 1019 /* 3-DH5 packet */ +#define L2CAP_LE_MIN_MTU 23 #define L2CAP_DISC_TIMEOUT msecs_to_jiffies(100) #define L2CAP_DISC_REJ_TIMEOUT msecs_to_jiffies(5000) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ab5868d9430..a4bb27e8427 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -449,7 +449,7 @@ static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { case L2CAP_CID_LE_DATA: - if (mtu < L2CAP_LE_DEFAULT_MTU) + if (mtu < L2CAP_LE_MIN_MTU) return false; break; -- cgit v1.2.3-70-g09d2 From 7e1af8a3a51dbf5dc7392fb294a0830f7e853aa8 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 29 May 2012 13:19:26 -0300 Subject: Bluetooth: Create empty l2cap ops function A2MP doesn't use part of the L2CAP chan ops API so we just create general empty function instead of the A2MP specific one. Signed-off-by: Gustavo Padovan Signed-off-by: Johan Hedberg --- include/net/bluetooth/l2cap.h | 12 ++++++++++++ net/bluetooth/a2mp.c | 23 +++-------------------- 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index ce99c5683d9..d80e3f0691b 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -740,6 +740,18 @@ static inline __u16 __next_seq(struct l2cap_chan *chan, __u16 seq) return (seq + 1) % (chan->tx_win_max + 1); } +static inline struct l2cap_chan *l2cap_chan_no_new_connection(struct l2cap_chan *chan) +{ + return NULL; +} + +static inline void l2cap_chan_no_teardown(struct l2cap_chan *chan, int err) +{ +} + +static inline void l2cap_chan_no_ready(struct l2cap_chan *chan) +{ +} extern bool disable_ertm; diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index 0772c680abe..fb93250b393 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -440,23 +440,6 @@ static struct sk_buff *a2mp_chan_alloc_skb_cb(struct l2cap_chan *chan, return bt_skb_alloc(len, GFP_KERNEL); } -static struct l2cap_chan *a2mp_chan_no_new_conn_cb(struct l2cap_chan *chan) -{ - BT_ERR("new_connection for chan %p not implemented", chan); - - return NULL; -} - -static void a2mp_chan_no_teardown_cb(struct l2cap_chan *chan, int err) -{ - BT_ERR("teardown for chan %p not implemented", chan); -} - -static void a2mp_chan_no_ready(struct l2cap_chan *chan) -{ - BT_ERR("ready for chan %p not implemented", chan); -} - static struct l2cap_ops a2mp_chan_ops = { .name = "L2CAP A2MP channel", .recv = a2mp_chan_recv_cb, @@ -465,9 +448,9 @@ static struct l2cap_ops a2mp_chan_ops = { .alloc_skb = a2mp_chan_alloc_skb_cb, /* Not implemented for A2MP */ - .new_connection = a2mp_chan_no_new_conn_cb, - .teardown = a2mp_chan_no_teardown_cb, - .ready = a2mp_chan_no_ready, + .new_connection = l2cap_chan_no_new_connection, + .teardown = l2cap_chan_no_teardown, + .ready = l2cap_chan_no_ready, }; static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn) -- cgit v1.2.3-70-g09d2 From 72d7872852e1734e94686012a2e9deade3457329 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 10 May 2012 16:18:26 +0300 Subject: mac80211: allow low-level drivers to set netdev feature bits Low level drivers can now set certain netdev feature bits in netdev_features member of the ieee80211_hw struct. These will be propagated to every netdev created from this HW. The white-listed features currently include only ones related to HW checksumming. Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- include/net/mac80211.h | 5 +++++ net/mac80211/iface.c | 2 ++ net/mac80211/main.c | 7 +++++++ 3 files changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1937c7d9830..0286c0476e4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1297,6 +1297,10 @@ enum ieee80211_hw_flags { * reports, by default it is set to _MCS, _GI and _BW but doesn't * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_* values, only * adding _BW is supported today. + * + * @netdev_features: netdev features to be set in each netdev created + * from this HW. Note only HW checksum features are currently + * compatible with mac80211. Other feature bits will be rejected. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1319,6 +1323,7 @@ struct ieee80211_hw { u8 max_tx_aggregation_subframes; u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; + netdev_features_t netdev_features; }; /** diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index d4c19a7773d..f970e0b3c4b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1352,6 +1352,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->u.mgd.use_4addr = params->use_4addr; } + ndev->features |= local->hw.netdev_features; + ret = register_netdevice(ndev); if (ret) goto fail; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index f5548e95325..779ac613ee5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -682,6 +682,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) enum ieee80211_band band; int channels, max_bitrates; bool supp_ht; + netdev_features_t feature_whitelist; static const u32 cipher_suites[] = { /* keep WEP first, it may be removed below */ WLAN_CIPHER_SUITE_WEP40, @@ -708,6 +709,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if ((hw->flags & IEEE80211_HW_SCAN_WHILE_IDLE) && !local->ops->hw_scan) return -EINVAL; + /* Only HW csum features are currently compatible with mac80211 */ + feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_HW_CSUM; + if (WARN_ON(hw->netdev_features & ~feature_whitelist)) + return -EINVAL; + if (hw->max_report_rates == 0) hw->max_report_rates = hw->max_rates; -- cgit v1.2.3-70-g09d2 From d63e9ae3b12fd0c6a3795c9b08de6b476f80b8c3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 15 May 2012 14:20:31 -0700 Subject: net: mac80211: Add and use ht_vdbg debugging macro Simplify the use of #ifdef CONFIG_MAC80211_HT_DEBUG/#endif by adding a logging macro to encapsulate the test. Convert the appropriate uses too. Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- include/net/mac80211.h | 13 +++++++ net/mac80211/agg-rx.c | 24 ++++-------- net/mac80211/agg-tx.c | 103 ++++++++++++++----------------------------------- 3 files changed, 50 insertions(+), 90 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0286c0476e4..808462e2a71 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3844,4 +3844,17 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, */ int ieee80211_ave_rssi(struct ieee80211_vif *vif); +/* Extra debugging macros */ + +#ifdef CONFIG_MAC80211_HT_DEBUG +#define ht_vdbg(fmt, ...) \ + pr_debug(fmt, ##__VA_ARGS__) +#else +#define ht_vdbg(fmt, ...) \ +do { \ + if (0) \ + pr_debug(fmt, ##__VA_ARGS__); \ +} while (0) +#endif + #endif /* MAC80211_H */ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index ec55f42705b..a096b0dae37 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -74,12 +74,10 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Rx BA session stop requested for %pM tid %u %s reason: %d\n", - sta->sta.addr, tid, - initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", - (int)reason); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Rx BA session stop requested for %pM tid %u %s reason: %d\n", + sta->sta.addr, tid, + initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", + (int)reason); if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL, 0)) @@ -154,9 +152,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) return; } -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("rx session timer expired on tid %d\n", (u16)*ptid); -#endif + ht_vdbg("rx session timer expired on tid %d\n", (u16)*ptid); + set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); } @@ -243,9 +240,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Suspend in progress - Denying ADDBA request\n"); -#endif + ht_vdbg("Suspend in progress - Denying ADDBA request\n"); goto end_no_lock; } @@ -317,10 +312,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Rx A-MPDU request on tid %d result %d\n", tid, ret); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - + ht_vdbg("Rx A-MPDU request on tid %d result %d\n", tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 90b2c0ffd5b..da07f01cfe4 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -184,10 +184,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, spin_unlock_bh(&sta->lock); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Tx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); del_timer_sync(&tid_tx->addba_resp_timer); del_timer_sync(&tid_tx->session_timer); @@ -253,16 +251,12 @@ static void sta_addba_resp_timer_expired(unsigned long data) if (!tid_tx || test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", - tid); -#endif + ht_vdbg("timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", + tid); return; } -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("addBA response timer expired on tid %d\n", tid); -#endif + ht_vdbg("addBA response timer expired on tid %d\n", tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); rcu_read_unlock(); @@ -371,10 +365,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, &sta->sta, tid, &start_seq_num, 0); if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - HW unavailable for tid %d\n", - tid); -#endif + ht_vdbg("BA request denied - HW unavailable for tid %d\n", tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); @@ -387,9 +378,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("activated addBA response timer on tid %d\n", tid); -#endif + ht_vdbg("activated addBA response timer on tid %d\n", tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; @@ -436,9 +425,7 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("tx session timer expired on tid %d\n", (u16)*ptid); -#endif + ht_vdbg("tx session timer expired on tid %d\n", (u16)*ptid); ieee80211_stop_tx_ba_session(&sta->sta, *ptid); } @@ -462,10 +449,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) return -EINVAL; -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Open BA session requested for %pM tid %u\n", - pubsta->addr, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Open BA session requested for %pM tid %u\n", + pubsta->addr, tid); if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && @@ -475,9 +460,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA sessions blocked - Denying BA session request\n"); -#endif + ht_vdbg("BA sessions blocked - Denying BA session request\n"); return -EINVAL; } @@ -495,10 +478,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, */ if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && !sta->sta.ht_cap.ht_supported) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - IBSS STA %pM does not advertise HT support\n", - pubsta->addr); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("BA request denied - IBSS STA %pM does not advertise HT support\n", + pubsta->addr); return -EINVAL; } @@ -518,10 +499,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_BURST_RETRIES && time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - waiting a grace period after %d failed requests on tid %u\n", - sta->ampdu_mlme.addba_req_num[tid], tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("BA request denied - waiting a grace period after %d failed requests on tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], tid); ret = -EBUSY; goto err_unlock_sta; } @@ -529,10 +508,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("BA request denied - session is not idle on tid %u\n", - tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("BA request denied - session is not idle on tid %u\n", + tid); ret = -EAGAIN; goto err_unlock_sta; } @@ -587,9 +564,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Aggregation is on for tid %d\n", tid); -#endif + ht_vdbg("Aggregation is on for tid %d\n", tid); drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, @@ -623,9 +598,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) trace_api_start_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); -#endif + ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); return; } @@ -633,9 +606,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) sta = sta_info_get_bss(sdata, ra); if (!sta) { mutex_unlock(&local->sta_mtx); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Could not find station: %pM\n", ra); -#endif + ht_vdbg("Could not find station: %pM\n", ra); return; } @@ -643,9 +614,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (WARN_ON(!tid_tx)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("addBA was not requested!\n"); -#endif + ht_vdbg("addBA was not requested!\n"); goto unlock; } @@ -745,23 +714,17 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) trace_api_stop_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); -#endif + ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); return; } -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Stopping Tx BA session for %pM tid %d\n", ra, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_vdbg("Stopping Tx BA session for %pM tid %d\n", ra, tid); mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, ra); if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("Could not find station: %pM\n", ra); -#endif + ht_vdbg("Could not find station: %pM\n", ra); goto unlock; } @@ -770,9 +733,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("unexpected callback to A-MPDU stop\n"); -#endif + ht_vdbg("unexpected callback to A-MPDU stop\n"); goto unlock_sta; } @@ -848,17 +809,13 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("wrong addBA response token, tid %d\n", tid); -#endif + ht_vdbg("wrong addBA response token, tid %d\n", tid); goto out; } del_timer_sync(&tid_tx->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("switched off addBA timer for tid %d\n", tid); -#endif + ht_vdbg("switched off addBA timer for tid %d\n", tid); /* * addba_resp_timer may have fired before we got here, and @@ -867,10 +824,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, */ if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - pr_debug("got addBA resp for tid %d but we already gave up\n", - tid); -#endif + ht_vdbg("got addBA resp for tid %d but we already gave up\n", + tid); goto out; } -- cgit v1.2.3-70-g09d2 From 499f42bb03a9bd8a23f73e7c3886f70f52e7edc5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 15 May 2012 14:20:32 -0700 Subject: net: mac80211: Add and use ibss_vdbg debugging macro Simplify the use of #ifdef CONFIG_MAC80211_IBSS_DEBUG/#endif by adding a logging macro to encapsulate the test. Convert the appropriate uses too. Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- include/net/mac80211.h | 11 +++++++ net/mac80211/ibss.c | 79 ++++++++++++++++++------------------------------- net/mac80211/sta_info.c | 6 ++-- 3 files changed, 41 insertions(+), 55 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 808462e2a71..98da61e52a4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3857,4 +3857,15 @@ do { \ } while (0) #endif +#ifdef CONFIG_MAC80211_IBSS_DEBUG +#define ibss_vdbg(fmt, ...) \ + pr_debug(fmt, ##__VA_ARGS__) +#else +#define ibss_vdbg(fmt, ...) \ +do { \ + if (0) \ + pr_debug(fmt, ##__VA_ARGS__); \ +} while (0) +#endif + #endif /* MAC80211_H */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 148c2755302..0bc47a82569 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -281,10 +281,8 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); if (auth) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, sdata->u.ibss.bssid, addr); -#endif + ibss_vdbg("TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", + sdata->vif.addr, sdata->u.ibss.bssid, addr); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, NULL, 0, addr, sdata->u.ibss.bssid, NULL, 0, 0); } @@ -354,11 +352,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", - sdata->name, mgmt->sa, mgmt->da, mgmt->bssid, - auth_transaction); -#endif + ibss_vdbg("%s: RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", + sdata->name, mgmt->sa, mgmt->da, mgmt->bssid, + auth_transaction); sta_info_destroy_addr(sdata, mgmt->sa); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); rcu_read_unlock(); @@ -421,12 +417,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_mandatory_rates(local, band); if (sta->sta.supp_rates[band] != prev_rates) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", - sdata->name, sta->sta.addr, - prev_rates, - sta->sta.supp_rates[band]); -#endif + ibss_vdbg("%s: updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", + sdata->name, sta->sta.addr, + prev_rates, + sta->sta.supp_rates[band]); rates_updated = true; } } else { @@ -541,20 +535,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, rx_timestamp = drv_get_tsf(local, sdata); } -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", - mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, - (unsigned long long)beacon_timestamp, - (unsigned long long)(rx_timestamp - beacon_timestamp), - jiffies); -#endif + ibss_vdbg("RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + mgmt->sa, mgmt->bssid, + (unsigned long long)rx_timestamp, + (unsigned long long)beacon_timestamp, + (unsigned long long)(rx_timestamp - beacon_timestamp), + jiffies); if (beacon_timestamp > rx_timestamp) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", - sdata->name, mgmt->bssid); -#endif + ibss_vdbg("%s: beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", + sdata->name, mgmt->bssid); ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, @@ -717,10 +707,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&ifibss->mtx); active_ibss = ieee80211_sta_active_ibss(sdata); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: sta_find_ibss (active_ibss=%d)\n", - sdata->name, active_ibss); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg("%s: sta_find_ibss (active_ibss=%d)\n", + sdata->name, active_ibss); if (active_ibss) return; @@ -743,11 +731,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) struct ieee80211_bss *bss; bss = (void *)cbss->priv; -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug(" sta_find_ibss: selected %pM current %pM\n", - cbss->bssid, ifibss->bssid); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - + ibss_vdbg(" sta_find_ibss: selected %pM current %pM\n", + cbss->bssid, ifibss->bssid); pr_debug("%s: Selected IBSS BSSID %pM based on configured SSID\n", sdata->name, cbss->bssid); @@ -756,9 +741,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) return; } -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug(" did not try to join ibss\n"); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg(" did not try to join ibss\n"); /* Selected IBSS not found in current scan results - try to scan */ if (time_after(jiffies, ifibss->last_scan_completed + @@ -815,11 +798,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = drv_tx_last_beacon(local); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", - sdata->name, mgmt->sa, mgmt->da, - mgmt->bssid, tx_last_beacon); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg("%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", + sdata->name, mgmt->sa, mgmt->da, + mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; @@ -832,10 +813,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.probe_req.variable; if (pos[0] != WLAN_EID_SSID || pos + 2 + pos[1] > end) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: Invalid SSID IE in ProbeReq from %pM\n", - sdata->name, mgmt->sa); -#endif + ibss_vdbg("%s: Invalid SSID IE in ProbeReq from %pM\n", + sdata->name, mgmt->sa); return; } if (pos[1] != 0 && @@ -852,9 +831,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, resp = (struct ieee80211_mgmt *) skb->data; memcpy(resp->da, mgmt->sa, ETH_ALEN); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: Sending ProbeResp to %pM\n", sdata->name, resp->da); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ibss_vdbg("%s: Sending ProbeResp to %pM\n", sdata->name, resp->da); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4be50980760..0a70f797dcf 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -887,10 +887,8 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, continue; if (time_after(jiffies, sta->last_rx + exp_time)) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - pr_debug("%s: expiring inactive STA %pM\n", - sdata->name, sta->sta.addr); -#endif + ibss_vdbg("%s: expiring inactive STA %pM\n", + sdata->name, sta->sta.addr); WARN_ON(__sta_info_destroy(sta)); } } -- cgit v1.2.3-70-g09d2 From 51ca9d8db280b960345e7306e6a036dd3880ecff Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 16 May 2012 19:09:48 +0300 Subject: mac80211: remove ieee80211_get_operstate() ieee80211_get_operstate() was used by drivers in order to know whether the sta link is up, but it's no longer needed (nor used) as mac80211 notifies the drivers about authorization changes (via the sta_state callback) Signed-off-by: Eliad Peller Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 ---------- net/mac80211/mlme.c | 7 ------- 2 files changed, 17 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 98da61e52a4..d2ed86da8e4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3555,16 +3555,6 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); -/** - * ieee80211_get_operstate - get the operstate of the vif - * - * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * - * The driver might need to know the operstate of the net_device - * (specifically, whether the link is IF_OPER_UP after resume) - */ -unsigned char ieee80211_get_operstate(struct ieee80211_vif *vif); - /** * ieee80211_chswitch_done - Complete channel switch process * @vif: &struct ieee80211_vif pointer from the add_interface callback. diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 419301c6958..0c4e26ff7d9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3530,10 +3530,3 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp); } EXPORT_SYMBOL(ieee80211_cqm_rssi_notify); - -unsigned char ieee80211_get_operstate(struct ieee80211_vif *vif) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - return sdata->dev->operstate; -} -EXPORT_SYMBOL(ieee80211_get_operstate); -- cgit v1.2.3-70-g09d2 From d58e7e37aac0465b08527adadc8016421bd4060e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 May 2012 23:50:17 +0200 Subject: cfg80211: simplify cfg80211_can_beacon_sec_chan API Change cfg80211_can_beacon_sec_chan() to return true if there is no secondary channel to simplify all the current users of it. They all check the channel type before calling the function because it returns false if there's no secondary channel. Also actually document the return value. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 5 ++++- net/mac80211/ibss.c | 3 +-- net/wireless/chan.c | 22 ++++++---------------- 3 files changed, 11 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0289d4ce707..a8496f4ff5f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3359,11 +3359,14 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm, gfp_t gfp); -/* +/** * cfg80211_can_beacon_sec_chan - test if ht40 on extension channel can be used * @wiphy: the wiphy * @chan: main channel * @channel_type: HT mode + * + * This function returns true if there is no secondary channel or the secondary + * channel can be used for beaconing (i.e. is not a radar channel etc.) */ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, struct ieee80211_channel *chan, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 0bc47a82569..725cb4be229 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -82,8 +82,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, local->oper_channel = chan; channel_type = ifibss->channel_type; - if (channel_type > NL80211_CHAN_HT20 && - !cfg80211_can_beacon_sec_chan(local->hw.wiphy, chan, channel_type)) + if (!cfg80211_can_beacon_sec_chan(local->hw.wiphy, chan, channel_type)) channel_type = NL80211_CHAN_HT20; if (!ieee80211_set_channel_type(local, sdata, channel_type)) { /* can only fail due to HT40+/- mismatch */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 884801ac4dd..20b87d89572 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -60,7 +60,7 @@ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, diff = -20; break; default: - return false; + return true; } sec_chan = ieee80211_get_channel(wiphy, chan->center_freq + diff); @@ -107,21 +107,11 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev, wdev->iftype == NL80211_IFTYPE_AP || wdev->iftype == NL80211_IFTYPE_AP_VLAN || wdev->iftype == NL80211_IFTYPE_MESH_POINT || - wdev->iftype == NL80211_IFTYPE_P2P_GO)) { - switch (channel_type) { - case NL80211_CHAN_HT40PLUS: - case NL80211_CHAN_HT40MINUS: - if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan, - channel_type)) { - printk(KERN_DEBUG - "cfg80211: Secondary channel not " - "allowed to initiate communication\n"); - return -EINVAL; - } - break; - default: - break; - } + wdev->iftype == NL80211_IFTYPE_P2P_GO) && + !cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan, channel_type)) { + printk(KERN_DEBUG + "cfg80211: Secondary channel not allowed to beacon\n"); + return -EINVAL; } result = rdev->ops->set_channel(&rdev->wiphy, -- cgit v1.2.3-70-g09d2 From aa430da41019c1694f6a8e3b8bef1d12ed52b0ad Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 May 2012 23:50:18 +0200 Subject: cfg80211: provide channel to start_ap function Instead of setting the channel first and then starting the AP, let cfg80211 store the channel and provide it as one of the AP settings. This means that now you have to set the channel before you can start an AP interface, but since hostapd/wpa_supplicant always do that we're OK with this change. Alternatively, it's now possible to give the channel as an attribute to the start-ap nl80211 command, overriding any preset channel. Cc: Kalle Valo Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 36 ++------------------ drivers/net/wireless/ath/ath6kl/core.h | 3 -- drivers/net/wireless/ath/ath6kl/main.c | 1 - include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 12 ++++++- net/mac80211/cfg.c | 5 +++ net/wireless/nl80211.c | 53 ++++++++++++++++++++++++++++-- 7 files changed, 72 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index b869a358ce4..f27e9732951 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2585,35 +2585,6 @@ static int ath6kl_set_ies(struct ath6kl_vif *vif, return 0; } -static int ath6kl_set_channel(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type) -{ - struct ath6kl_vif *vif; - - /* - * 'dev' could be NULL if a channel change is required for the hardware - * device itself, instead of a particular VIF. - * - * FIXME: To be handled properly when monitor mode is supported. - */ - if (!dev) - return -EBUSY; - - vif = netdev_priv(dev); - - if (!ath6kl_cfg80211_ready(vif)) - return -EIO; - - ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: center_freq=%u hw_value=%u\n", - __func__, chan->center_freq, chan->hw_value); - vif->next_chan = chan->center_freq; - vif->next_ch_type = channel_type; - vif->next_ch_band = chan->band; - - return 0; -} - static int ath6kl_get_rsn_capab(struct cfg80211_beacon_data *beacon, u8 *rsn_capab) { @@ -2791,7 +2762,7 @@ static int ath6kl_start_ap(struct wiphy *wiphy, struct net_device *dev, p.ssid_len = vif->ssid_len; memcpy(p.ssid, vif->ssid, vif->ssid_len); p.dot11_auth_mode = vif->dot11_auth_mode; - p.ch = cpu_to_le16(vif->next_chan); + p.ch = cpu_to_le16(info->channel->center_freq); /* Enable uAPSD support by default */ res = ath6kl_wmi_ap_set_apsd(ar->wmi, vif->fw_vif_idx, true); @@ -2815,8 +2786,8 @@ static int ath6kl_start_ap(struct wiphy *wiphy, struct net_device *dev, return res; } - if (ath6kl_set_htcap(vif, vif->next_ch_band, - vif->next_ch_type != NL80211_CHAN_NO_HT)) + if (ath6kl_set_htcap(vif, info->channel->band, + info->channel_type != NL80211_CHAN_NO_HT)) return -EIO; /* @@ -3271,7 +3242,6 @@ static struct cfg80211_ops ath6kl_cfg80211_ops = { .suspend = __ath6kl_cfg80211_suspend, .resume = __ath6kl_cfg80211_resume, #endif - .set_channel = ath6kl_set_channel, .start_ap = ath6kl_start_ap, .change_beacon = ath6kl_change_beacon, .stop_ap = ath6kl_stop_ap, diff --git a/drivers/net/wireless/ath/ath6kl/core.h b/drivers/net/wireless/ath/ath6kl/core.h index 4d9c6f14269..8443b2a4133 100644 --- a/drivers/net/wireless/ath/ath6kl/core.h +++ b/drivers/net/wireless/ath/ath6kl/core.h @@ -553,9 +553,6 @@ struct ath6kl_vif { u32 last_cancel_roc_id; u32 send_action_id; bool probe_req_report; - u16 next_chan; - enum nl80211_channel_type next_ch_type; - enum ieee80211_band next_ch_band; u16 assoc_bss_beacon_int; u16 listen_intvl_t; u16 bmiss_time_t; diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c index e5524470529..b836f279511 100644 --- a/drivers/net/wireless/ath/ath6kl/main.c +++ b/drivers/net/wireless/ath/ath6kl/main.c @@ -598,7 +598,6 @@ static int ath6kl_commit_ch_switch(struct ath6kl_vif *vif, u16 channel) struct ath6kl *ar = vif->ar; - vif->next_chan = channel; vif->profile.ch = cpu_to_le16(channel); switch (vif->nw_type) { diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 6930dddad18..85e5037a218 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -170,6 +170,8 @@ * %NL80211_ATTR_CIPHER_GROUP, %NL80211_ATTR_WPA_VERSIONS, * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, * %NL80211_ATTR_AUTH_TYPE and %NL80211_ATTR_INACTIVITY_TIMEOUT. + * The channel to use can be set on the interface or be given using the + * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_WIPHY_CHANNEL_TYPE attrs. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a8496f4ff5f..a54fb895f61 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -404,6 +404,8 @@ struct cfg80211_beacon_data { * * Used to configure an AP interface. * + * @channel: the channel to start the AP on + * @channel_type: the channel type to use * @beacon: beacon data * @beacon_interval: beacon interval * @dtim_period: DTIM period @@ -417,6 +419,9 @@ struct cfg80211_beacon_data { * @inactivity_timeout: time in seconds to determine station's inactivity. */ struct cfg80211_ap_settings { + struct ieee80211_channel *channel; + enum nl80211_channel_type channel_type; + struct cfg80211_beacon_data beacon; int beacon_interval, dtim_period; @@ -2263,7 +2268,10 @@ struct cfg80211_cached_keys; * @netdev: (private) Used to reference back to the netdev * @current_bss: (private) Used by the internal configuration code * @channel: (private) Used by the internal configuration code to track - * user-set AP, monitor and WDS channels for wireless extensions + * the user-set AP, monitor and WDS channel + * @preset_chan: (private) Used by the internal configuration code to + * track the channel to be used for AP later + * @preset_chantype: (private) the corresponding channel type * @bssid: (private) Used by the internal configuration code * @ssid: (private) Used by the internal configuration code * @ssid_len: (private) Used by the internal configuration code @@ -2314,6 +2322,8 @@ struct wireless_dev { struct cfg80211_internal_bss *current_bss; /* associated / joined */ struct ieee80211_channel *channel; + struct ieee80211_channel *preset_chan; + enum nl80211_channel_type preset_chantype; bool ps; int ps_timeout; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9aab849fd6c..8e9d525c465 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -823,6 +823,11 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (old) return -EALREADY; + err = ieee80211_set_channel(wiphy, dev, params->channel, + params->channel_type); + if (err) + return err; + /* * Apply control port protocol, this allows us to * not encrypt dynamic WEP control frames. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 206465dc0ca..74f4a8f9393 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,7 +921,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - CMD(set_channel, SET_CHANNEL); + if (dev->ops->set_channel || dev->ops->start_ap) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + goto nla_put_failure; + } CMD(set_wds_peer, SET_WDS_PEER); if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); @@ -1170,6 +1174,9 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) * Monitors are special as they are normally slaved to * whatever else is going on, so they behave as though * you tried setting the wiphy channel itself. + * + * For AP/GO modes, it's only for compatibility, you can + * also give the channel to the start-AP command. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || @@ -1204,6 +1211,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct genl_info *info) { + struct ieee80211_channel *channel; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; u32 freq; int result; @@ -1221,7 +1229,25 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); mutex_lock(&rdev->devlist_mtx); - if (wdev) { + if (wdev) switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + if (wdev->beacon_interval) { + result = -EBUSY; + break; + } + channel = rdev_freq_to_chan(rdev, freq, channel_type); + if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, + channel, + channel_type)) { + result = -EINVAL; + break; + } + wdev->preset_chan = channel; + wdev->preset_chantype = channel_type; + result = 0; + break; + default: wdev_lock(wdev); result = cfg80211_set_freq(rdev, wdev, freq, channel_type); wdev_unlock(wdev); @@ -2299,6 +2325,29 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && + !nl80211_valid_channel_type(info, &channel_type)) + return -EINVAL; + + params.channel = rdev_freq_to_chan(rdev, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]), + channel_type); + if (!params.channel) + return -EINVAL; + params.channel_type = channel_type; + } else if (wdev->preset_chan) { + params.channel = wdev->preset_chan; + params.channel_type = wdev->preset_chantype; + } else + return -EINVAL; + + if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, params.channel, + params.channel_type)) + return -EINVAL; + err = rdev->ops->start_ap(&rdev->wiphy, dev, ¶ms); if (!err) wdev->beacon_interval = params.beacon_interval; -- cgit v1.2.3-70-g09d2 From cc1d2806bf06ab92268343d26eb3d8d8f00f8bc9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 May 2012 23:50:20 +0200 Subject: cfg80211: provide channel to join_mesh function Just like the AP mode patch, instead of setting the channel and then joining the mesh network, provide the channel to join the network on to the join_mesh() function. Like in AP mode, you can also give the channel to the join-mesh nl80211 command now. Unlike AP mode, it picks a default channel if none was given. As libertas uses mesh mode interfaces but has no join_mesh callback and we can't simply break it, keep some compatibility code for that case and configure the channel directly for it. In the non-libertas case, where we store the channel until join, allow setting it while the interface is down. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ++ net/mac80211/cfg.c | 6 +++ net/wireless/core.h | 7 +++- net/wireless/mesh.c | 91 +++++++++++++++++++++++++++++++++++++++++++++- net/wireless/nl80211.c | 43 +++++++++++++++++----- net/wireless/wext-compat.c | 12 +++++- 6 files changed, 148 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a54fb895f61..4c90c44b8b7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -831,6 +831,8 @@ struct mesh_config { /** * struct mesh_setup - 802.11s mesh setup configuration + * @channel: the channel to start the mesh network on + * @channel_type: the channel type to use * @mesh_id: the mesh ID * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes * @sync_method: which synchronization method to use @@ -845,6 +847,8 @@ struct mesh_config { * These parameters are fixed when the mesh is created. */ struct mesh_setup { + struct ieee80211_channel *channel; + enum nl80211_channel_type channel_type; const u8 *mesh_id; u8 mesh_id_len; u8 sync_method; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8e9d525c465..f47af8b3185 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1598,6 +1598,12 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, err = copy_mesh_setup(ifmsh, setup); if (err) return err; + + err = ieee80211_set_channel(wiphy, dev, setup->channel, + setup->channel_type); + if (err) + return err; + ieee80211_start_mesh(sdata); return 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index 8523f387867..1d3d2412694 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -303,14 +303,17 @@ extern const struct mesh_config default_mesh_config; extern const struct mesh_setup default_mesh_setup; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf); int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); +int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int freq, + enum nl80211_channel_type channel_type); /* MLME */ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2749cb86b46..2e3b700eba3 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -65,6 +65,9 @@ const struct mesh_config default_mesh_config = { }; const struct mesh_setup default_mesh_setup = { + /* cfg80211_join_mesh() will pick a channel if needed */ + .channel = NULL, + .channel_type = NL80211_CHAN_NO_HT, .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, .path_metric = IEEE80211_PATH_METRIC_AIRTIME, @@ -75,7 +78,7 @@ const struct mesh_setup default_mesh_setup = { int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -101,6 +104,51 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!rdev->ops->join_mesh) return -EOPNOTSUPP; + if (!setup->channel) { + /* if no channel explicitly given, use preset channel */ + setup->channel = wdev->preset_chan; + setup->channel_type = wdev->preset_chantype; + } + + if (!setup->channel) { + /* if we don't have that either, use the first usable channel */ + enum ieee80211_band band; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + int i; + + sband = rdev->wiphy.bands[band]; + if (!sband) + continue; + + for (i = 0; i < sband->n_channels; i++) { + chan = &sband->channels[i]; + if (chan->flags & (IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_PASSIVE_SCAN | + IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_RADAR)) + continue; + setup->channel = chan; + break; + } + + if (setup->channel) + break; + } + + /* no usable channel ... */ + if (!setup->channel) + return -EINVAL; + + setup->channel_type = NL80211_CHAN_NO_HT; + } + + if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, setup->channel, + setup->channel_type)) + return -EINVAL; + err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, setup); if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); @@ -112,7 +160,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const struct mesh_setup *setup, + struct mesh_setup *setup, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -125,6 +173,45 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, return err; } +int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int freq, + enum nl80211_channel_type channel_type) +{ + struct ieee80211_channel *channel; + + /* + * Workaround for libertas (only!), it puts the interface + * into mesh mode but doesn't implement join_mesh. Instead, + * it is configured via sysfs and then joins the mesh when + * you set the channel. Note that the libertas mesh isn't + * compatible with 802.11 mesh. + */ + if (!rdev->ops->join_mesh) { + int err; + + if (!netif_running(wdev->netdev)) + return -ENETDOWN; + wdev_lock(wdev); + err = cfg80211_set_freq(rdev, wdev, freq, channel_type); + wdev_unlock(wdev); + + return err; + } + + if (wdev->mesh_id_len) + return -EBUSY; + + channel = rdev_freq_to_chan(rdev, freq, channel_type); + if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, + channel, + channel_type)) { + return -EINVAL; + } + wdev->preset_chan = channel; + wdev->preset_chantype = channel_type; + return 0; +} + void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 089a5204dad..b22f1f87688 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,7 +921,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - if (dev->ops->set_channel || dev->ops->start_ap) { + if (dev->ops->set_channel || dev->ops->start_ap || + dev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; @@ -1166,17 +1167,19 @@ static int parse_txq_params(struct nlattr *tb[], static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) { /* - * You can only set the channel explicitly for AP and - * mesh type interfaces; all others have their channel - * managed via their respective "establish a connection" - * command (connect, join, ...) + * You can only set the channel explicitly for WDS interfaces, + * all others have their channel managed via their respective + * "establish a connection" command (connect, join, ...) + * + * For AP/GO and mesh mode, the channel can be set with the + * channel userspace API, but is only stored and passed to the + * low-level driver when the AP starts or the mesh is joined. + * This is for backward compatibility, userspace can also give + * the channel in the start-ap or join-mesh commands instead. * * Monitors are special as they are normally slaved to * whatever else is going on, so they behave as though * you tried setting the wiphy channel itself. - * - * For AP/GO modes, it's only for compatibility, you can - * also give the channel to the start-AP command. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || @@ -1246,6 +1249,9 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, wdev->preset_chantype = channel_type; result = 0; break; + case NL80211_IFTYPE_MESH_POINT: + result = cfg80211_set_mesh_freq(rdev, wdev, freq, channel_type); + break; default: wdev_lock(wdev); result = cfg80211_set_freq(rdev, wdev, freq, channel_type); @@ -1335,8 +1341,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = 0; mutex_lock(&rdev->mtx); - } else if (netif_running(netdev) && - nl80211_can_set_dev_channel(netdev->ieee80211_ptr)) + } else if (nl80211_can_set_dev_channel(netdev->ieee80211_ptr)) wdev = netdev->ieee80211_ptr; else wdev = NULL; @@ -6080,6 +6085,24 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; + + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && + !nl80211_valid_channel_type(info, &channel_type)) + return -EINVAL; + + setup.channel = rdev_freq_to_chan(rdev, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]), + channel_type); + if (!setup.channel) + return -EINVAL; + setup.channel_type = channel_type; + } else { + /* cfg80211_join_mesh() will sort it out */ + setup.channel = NULL; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index b082fcc26f0..faeb03548aa 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -796,7 +796,6 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: - case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); if (freq < 0) return freq; @@ -808,6 +807,17 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, wdev_unlock(wdev); mutex_unlock(&rdev->devlist_mtx); return err; + case NL80211_IFTYPE_MESH_POINT: + freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); + if (freq < 0) + return freq; + if (freq == 0) + return -EINVAL; + mutex_lock(&rdev->devlist_mtx); + err = cfg80211_set_mesh_freq(rdev, wdev, freq, + NL80211_CHAN_NO_HT); + mutex_unlock(&rdev->devlist_mtx); + return err; default: return -EOPNOTSUPP; } -- cgit v1.2.3-70-g09d2 From e8c9bd5b8d807cfe6c923265969a523b1ba1e6c2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Jun 2012 08:18:22 +0200 Subject: cfg80211: clarify set_channel APIs Now that we've removed all uses of the set_channel API except for the monitor channel and in libertas, clarify this. Split the libertas mesh use into a new libertas_set_mesh_channel() operation, just to keep backward compatibility, and rename the normal set_channel() to set_monitor_channel(). Also describe the desired set_monitor_channel() semantics more clearly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/cfg.c | 39 +++++++++++++++++++++++--------- drivers/net/wireless/libertas/dev.h | 1 + drivers/net/wireless/libertas/mesh.c | 7 ++---- drivers/net/wireless/orinoco/cfg.c | 9 ++++---- include/net/cfg80211.h | 24 ++++++++++++-------- net/mac80211/cfg.c | 9 +++++++- net/wireless/chan.c | 43 +++++------------------------------- net/wireless/core.h | 5 ++--- net/wireless/mesh.c | 26 +++++++++++----------- net/wireless/mlme.c | 2 -- net/wireless/nl80211.c | 21 ++++++++++-------- net/wireless/wext-compat.c | 10 ++------- net/wireless/wext-sme.c | 10 +++++++-- 13 files changed, 100 insertions(+), 106 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index 2fa879b015b..f4a203049fb 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -435,24 +435,40 @@ static int lbs_add_wpa_tlv(u8 *tlv, const u8 *ie, u8 ie_len) * Set Channel */ -static int lbs_cfg_set_channel(struct wiphy *wiphy, - struct net_device *netdev, - struct ieee80211_channel *channel, - enum nl80211_channel_type channel_type) +static int lbs_cfg_set_monitor_channel(struct wiphy *wiphy, + struct ieee80211_channel *channel, + enum nl80211_channel_type channel_type) { struct lbs_private *priv = wiphy_priv(wiphy); int ret = -ENOTSUPP; - lbs_deb_enter_args(LBS_DEB_CFG80211, "iface %s freq %d, type %d", - netdev_name(netdev), channel->center_freq, channel_type); + lbs_deb_enter_args(LBS_DEB_CFG80211, "freq %d, type %d", + channel->center_freq, channel_type); if (channel_type != NL80211_CHAN_NO_HT) goto out; - if (netdev == priv->mesh_dev) - ret = lbs_mesh_set_channel(priv, channel->hw_value); - else - ret = lbs_set_channel(priv, channel->hw_value); + ret = lbs_set_channel(priv, channel->hw_value); + + out: + lbs_deb_leave_args(LBS_DEB_CFG80211, "ret %d", ret); + return ret; +} + +static int lbs_cfg_set_mesh_channel(struct wiphy *wiphy, + struct net_device *netdev, + struct ieee80211_channel *channel) +{ + struct lbs_private *priv = wiphy_priv(wiphy); + int ret = -ENOTSUPP; + + lbs_deb_enter_args(LBS_DEB_CFG80211, "iface %s freq %d", + netdev_name(netdev), channel->center_freq); + + if (netdev != priv->mesh_dev) + goto out; + + ret = lbs_mesh_set_channel(priv, channel->hw_value); out: lbs_deb_leave_args(LBS_DEB_CFG80211, "ret %d", ret); @@ -2029,7 +2045,8 @@ static int lbs_leave_ibss(struct wiphy *wiphy, struct net_device *dev) */ static struct cfg80211_ops lbs_cfg80211_ops = { - .set_channel = lbs_cfg_set_channel, + .set_monitor_channel = lbs_cfg_set_monitor_channel, + .libertas_set_mesh_channel = lbs_cfg_set_mesh_channel, .scan = lbs_cfg_scan, .connect = lbs_cfg_connect, .disconnect = lbs_cfg_disconnect, diff --git a/drivers/net/wireless/libertas/dev.h b/drivers/net/wireless/libertas/dev.h index 672005430ac..60996ce89f7 100644 --- a/drivers/net/wireless/libertas/dev.h +++ b/drivers/net/wireless/libertas/dev.h @@ -58,6 +58,7 @@ struct lbs_private { uint16_t mesh_tlv; u8 mesh_ssid[IEEE80211_MAX_SSID_LEN + 1]; u8 mesh_ssid_len; + u8 mesh_channel; #endif /* Debugfs */ diff --git a/drivers/net/wireless/libertas/mesh.c b/drivers/net/wireless/libertas/mesh.c index e87c031b298..97807751ebc 100644 --- a/drivers/net/wireless/libertas/mesh.c +++ b/drivers/net/wireless/libertas/mesh.c @@ -131,16 +131,13 @@ static int lbs_mesh_config(struct lbs_private *priv, uint16_t action, int lbs_mesh_set_channel(struct lbs_private *priv, u8 channel) { + priv->mesh_channel = channel; return lbs_mesh_config(priv, CMD_ACT_MESH_CONFIG_START, channel); } static uint16_t lbs_mesh_get_channel(struct lbs_private *priv) { - struct wireless_dev *mesh_wdev = priv->mesh_dev->ieee80211_ptr; - if (mesh_wdev->channel) - return mesh_wdev->channel->hw_value; - else - return 1; + return priv->mesh_channel ?: 1; } /*************************************************************************** diff --git a/drivers/net/wireless/orinoco/cfg.c b/drivers/net/wireless/orinoco/cfg.c index f7b15b8934f..e15675585fb 100644 --- a/drivers/net/wireless/orinoco/cfg.c +++ b/drivers/net/wireless/orinoco/cfg.c @@ -160,10 +160,9 @@ static int orinoco_scan(struct wiphy *wiphy, struct net_device *dev, return err; } -static int orinoco_set_channel(struct wiphy *wiphy, - struct net_device *netdev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type) +static int orinoco_set_monitor_channel(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type) { struct orinoco_private *priv = wiphy_priv(wiphy); int err = 0; @@ -286,7 +285,7 @@ static int orinoco_set_wiphy_params(struct wiphy *wiphy, u32 changed) const struct cfg80211_ops orinoco_cfg_ops = { .change_virtual_intf = orinoco_change_vif, - .set_channel = orinoco_set_channel, + .set_monitor_channel = orinoco_set_monitor_channel, .scan = orinoco_scan, .set_wiphy_params = orinoco_set_wiphy_params, }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4c90c44b8b7..7319f25250b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1420,11 +1420,14 @@ struct cfg80211_gtk_rekey_data { * * @set_txq_params: Set TX queue parameters * - * @set_channel: Set channel for a given wireless interface. Some devices - * may support multi-channel operation (by channel hopping) so cfg80211 - * doesn't verify much. Note, however, that the passed netdev may be - * %NULL as well if the user requested changing the channel for the - * device itself, or for a monitor interface. + * @libertas_set_mesh_channel: Only for backward compatibility for libertas, + * as it doesn't implement join_mesh and needs to set the channel to + * join the mesh instead. + * + * @set_monitor_channel: Set the monitor mode channel for the device. If other + * interfaces are active this callback should reject the configuration. + * If no interfaces are active or the device is down, the channel should + * be stored for when a monitor interface becomes active. * @get_channel: Get the current operating channel, should return %NULL if * there's no single defined operating channel if for example the * device implements channel hopping for multi-channel virtual interfaces. @@ -1614,9 +1617,13 @@ struct cfg80211_ops { int (*set_txq_params)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_txq_params *params); - int (*set_channel)(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type); + int (*libertas_set_mesh_channel)(struct wiphy *wiphy, + struct net_device *dev, + struct ieee80211_channel *chan); + + int (*set_monitor_channel)(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type); int (*scan)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_scan_request *request); @@ -2325,7 +2332,6 @@ struct wireless_dev { spinlock_t event_lock; struct cfg80211_internal_bss *current_bss; /* associated / joined */ - struct ieee80211_channel *channel; struct ieee80211_channel *preset_chan; enum nl80211_channel_type preset_chantype; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 50aea1ac7e0..d99359a6f76 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -709,6 +709,13 @@ static int ieee80211_set_channel(struct wiphy *wiphy, return 0; } +static int ieee80211_set_monitor_channel(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type) +{ + return ieee80211_set_channel(wiphy, NULL, chan, channel_type); +} + static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, const u8 *resp, size_t resp_len) { @@ -2932,7 +2939,7 @@ struct cfg80211_ops mac80211_config_ops = { #endif .change_bss = ieee80211_change_bss, .set_txq_params = ieee80211_set_txq_params, - .set_channel = ieee80211_set_channel, + .set_monitor_channel = ieee80211_set_monitor_channel, .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 20b87d89572..c1999e45a07 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -78,50 +78,17 @@ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_can_beacon_sec_chan); -int cfg80211_set_freq(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, int freq, - enum nl80211_channel_type channel_type) +int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + int freq, enum nl80211_channel_type chantype) { struct ieee80211_channel *chan; - int result; - if (wdev && wdev->iftype == NL80211_IFTYPE_MONITOR) - wdev = NULL; - - if (wdev) { - ASSERT_WDEV_LOCK(wdev); - - if (!netif_running(wdev->netdev)) - return -ENETDOWN; - } - - if (!rdev->ops->set_channel) + if (!rdev->ops->set_monitor_channel) return -EOPNOTSUPP; - chan = rdev_freq_to_chan(rdev, freq, channel_type); + chan = rdev_freq_to_chan(rdev, freq, chantype); if (!chan) return -EINVAL; - /* Both channels should be able to initiate communication */ - if (wdev && (wdev->iftype == NL80211_IFTYPE_ADHOC || - wdev->iftype == NL80211_IFTYPE_AP || - wdev->iftype == NL80211_IFTYPE_AP_VLAN || - wdev->iftype == NL80211_IFTYPE_MESH_POINT || - wdev->iftype == NL80211_IFTYPE_P2P_GO) && - !cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan, channel_type)) { - printk(KERN_DEBUG - "cfg80211: Secondary channel not allowed to beacon\n"); - return -EINVAL; - } - - result = rdev->ops->set_channel(&rdev->wiphy, - wdev ? wdev->netdev : NULL, - chan, channel_type); - if (result) - return result; - - if (wdev) - wdev->channel = chan; - - return 0; + return rdev->ops->set_monitor_channel(&rdev->wiphy, chan, chantype); } diff --git a/net/wireless/core.h b/net/wireless/core.h index 1d3d2412694..9348a47562a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -444,9 +444,8 @@ cfg80211_can_add_interface(struct cfg80211_registered_device *rdev, struct ieee80211_channel * rdev_freq_to_chan(struct cfg80211_registered_device *rdev, int freq, enum nl80211_channel_type channel_type); -int cfg80211_set_freq(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, int freq, - enum nl80211_channel_type channel_type); +int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, + int freq, enum nl80211_channel_type chantype); int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, const u8 *rates, unsigned int n_rates, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2e3b700eba3..b44c736bf9c 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -179,6 +179,13 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, { struct ieee80211_channel *channel; + channel = rdev_freq_to_chan(rdev, freq, channel_type); + if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, + channel, + channel_type)) { + return -EINVAL; + } + /* * Workaround for libertas (only!), it puts the interface * into mesh mode but doesn't implement join_mesh. Instead, @@ -186,27 +193,20 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, * you set the channel. Note that the libertas mesh isn't * compatible with 802.11 mesh. */ - if (!rdev->ops->join_mesh) { - int err; + if (rdev->ops->libertas_set_mesh_channel) { + if (channel_type != NL80211_CHAN_NO_HT) + return -EINVAL; if (!netif_running(wdev->netdev)) return -ENETDOWN; - wdev_lock(wdev); - err = cfg80211_set_freq(rdev, wdev, freq, channel_type); - wdev_unlock(wdev); - - return err; + return rdev->ops->libertas_set_mesh_channel(&rdev->wiphy, + wdev->netdev, + channel); } if (wdev->mesh_id_len) return -EBUSY; - channel = rdev_freq_to_chan(rdev, freq, channel_type); - if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, - channel, - channel_type)) { - return -EINVAL; - } wdev->preset_chan = channel; wdev->preset_chantype = channel_type; return 0; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index eb90988bbd3..da4406f1192 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -947,8 +947,6 @@ void cfg80211_ch_switch_notify(struct net_device *dev, int freq, if (WARN_ON(!chan)) goto out; - wdev->channel = chan; - nl80211_ch_switch_notify(rdev, dev, freq, type, GFP_KERNEL); out: wdev_unlock(wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b22f1f87688..5e29bd38e7d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,7 +921,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } - if (dev->ops->set_channel || dev->ops->start_ap || + if (dev->ops->set_monitor_channel || dev->ops->start_ap || dev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) @@ -1178,8 +1178,8 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) * the channel in the start-ap or join-mesh commands instead. * * Monitors are special as they are normally slaved to - * whatever else is going on, so they behave as though - * you tried setting the wiphy channel itself. + * whatever else is going on, so they have their own special + * operation to set the monitor channel if possible. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || @@ -1217,6 +1217,10 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; u32 freq; int result; + enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; + + if (wdev) + iftype = wdev->iftype; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; @@ -1231,7 +1235,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); mutex_lock(&rdev->devlist_mtx); - if (wdev) switch (wdev->iftype) { + switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (wdev->beacon_interval) { @@ -1252,12 +1256,11 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: result = cfg80211_set_mesh_freq(rdev, wdev, freq, channel_type); break; + case NL80211_IFTYPE_MONITOR: + result = cfg80211_set_monitor_channel(rdev, freq, channel_type); + break; default: - wdev_lock(wdev); - result = cfg80211_set_freq(rdev, wdev, freq, channel_type); - wdev_unlock(wdev); - } else { - result = cfg80211_set_freq(rdev, NULL, freq, channel_type); + result = -EINVAL; } mutex_unlock(&rdev->devlist_mtx); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index faeb03548aa..bc879833b21 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -802,9 +802,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, if (freq == 0) return -EINVAL; mutex_lock(&rdev->devlist_mtx); - wdev_lock(wdev); - err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); - wdev_unlock(wdev); + err = cfg80211_set_monitor_channel(rdev, freq, NL80211_CHAN_NO_HT); mutex_unlock(&rdev->devlist_mtx); return err; case NL80211_IFTYPE_MESH_POINT: @@ -848,11 +846,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, freq->e = 6; return 0; default: - if (!wdev->channel) - return -EINVAL; - freq->m = wdev->channel->center_freq; - freq->e = 6; - return 0; + return -EINVAL; } } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 7decbd357d5..1f773f668d1 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -111,9 +111,15 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, wdev->wext.connect.channel = chan; - /* SSID is not set, we just want to switch channel */ + /* + * SSID is not set, we just want to switch monitor channel, + * this is really just backward compatibility, if the SSID + * is set then we use the channel to select the BSS to use + * to connect to instead. If we were connected on another + * channel we disconnected above and reconnect below. + */ if (chan && !wdev->wext.connect.ssid_len) { - err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); + err = cfg80211_set_monitor_channel(rdev, freq, NL80211_CHAN_NO_HT); goto out; } -- cgit v1.2.3-70-g09d2 From 196ac1c13d4db6c276dbb1c9190c8d7d45a83f1f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 14:28:40 +0200 Subject: mac80211: do remain-on-channel while idle The IDLE handling in HW off-channel is broken right now since we turn off IDLE only when the off-channel period already started. Therefore, all drivers that use it today (only iwlwifi!) must support off-channel while idle, so playing with idle isn't needed at all. Off-channel in general, since it's no longer used for authentication/association, shouldn't affect PS, so also remove that logic. Also document a small caveat for reporting TX status from off-channel frames in HW remain-on-channel. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 8 +++++++- net/mac80211/cfg.c | 7 +++---- net/mac80211/iface.c | 17 +++++++---------- net/mac80211/mlme.c | 6 ------ net/mac80211/offchannel.c | 4 ---- 5 files changed, 17 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d2ed86da8e4..6e700bf8d4a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2183,7 +2183,13 @@ enum ieee80211_rate_control_changed { * offload. Frames to transmit on the off-channel channel are transmitted * normally except for the %IEEE80211_TX_CTL_TX_OFFCHAN flag. When the * duration (which will always be non-zero) expires, the driver must call - * ieee80211_remain_on_channel_expired(). This callback may sleep. + * ieee80211_remain_on_channel_expired(). + * The driver must not call ieee80211_remain_on_channel_expired() before + * the TX status for a frame that was sent off-channel, otherwise the TX + * status is reported to userspace in an invalid way. + * Note that this callback may be called while the device is in IDLE and + * must be accepted in this case. + * This callback may sleep. * @cancel_remain_on_channel: Requests that an ongoing off-channel period is * aborted before it expires. This callback may sleep. * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d99359a6f76..a1690791970 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2187,8 +2187,6 @@ static int ieee80211_cancel_remain_on_channel_hw(struct ieee80211_local *local, local->hw_roc_cookie = 0; local->hw_roc_channel = NULL; - ieee80211_recalc_idle(local); - return 0; } @@ -2248,7 +2246,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_work *wk; const struct ieee80211_mgmt *mgmt = (void *)buf; u32 flags; - bool is_offchan = false; + bool is_offchan = false, in_hw_roc = false; if (dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; @@ -2268,6 +2266,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, if (chan == local->hw_roc_channel) { /* TODO: check channel type? */ is_offchan = false; + in_hw_roc = true; flags |= IEEE80211_TX_CTL_TX_OFFCHAN; } @@ -2370,7 +2369,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, * wait is involved, we might otherwise not be on * the right channel for long enough! */ - if (!is_offchan && !wait && !sdata->vif.bss_conf.idle) { + if (!is_offchan && !wait && (in_hw_roc || !sdata->vif.bss_conf.idle)) { ieee80211_tx_skb(sdata, skb); return 0; } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ede5f495990..968d71c5071 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1456,7 +1456,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; int count = 0; - bool working = false, scanning = false, hw_roc = false; + bool working = false, scanning = false; struct ieee80211_work *wk; unsigned int led_trig_start = 0, led_trig_stop = 0; @@ -1493,9 +1493,11 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) count++; } - list_for_each_entry(wk, &local->work_list, list) { - working = true; - wk->sdata->vif.bss_conf.idle = false; + if (!local->ops->remain_on_channel) { + list_for_each_entry(wk, &local->work_list, list) { + working = true; + wk->sdata->vif.bss_conf.idle = false; + } } if (local->scan_sdata && @@ -1504,9 +1506,6 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) local->scan_sdata->vif.bss_conf.idle = false; } - if (local->hw_roc_channel) - hw_roc = true; - list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -1518,7 +1517,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); } - if (working || scanning || hw_roc) + if (working || scanning) led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK; else led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK; @@ -1530,8 +1529,6 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); - if (hw_roc) - return ieee80211_idle_off(local, "hw remain-on-channel"); if (working) return ieee80211_idle_off(local, "working"); if (scanning) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e7c4ec4ce16..0f45d02e0ba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -930,11 +930,6 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) return; } - if (!list_empty(&local->work_list)) { - local->ps_sdata = NULL; - goto change; - } - list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -1007,7 +1002,6 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) local->ps_sdata = NULL; } - change: ieee80211_change_ps(local); } diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 935aa4b6dee..8f482b15bc5 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -207,8 +207,6 @@ static void ieee80211_hw_roc_start(struct work_struct *work) GFP_KERNEL); } - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } @@ -260,8 +258,6 @@ static void ieee80211_hw_roc_done(struct work_struct *work) local->hw_roc_channel = NULL; local->hw_roc_cookie = 0; - ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); } -- cgit v1.2.3-70-g09d2 From 2eb278e083549f4eb29838037004054b3b55df62 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Jun 2012 14:28:42 +0200 Subject: mac80211: unify SW/offload remain-on-channel Redesign all the off-channel code, getting rid of the generic off-channel work concept, replacing it with a simple remain-on-channel list. This fixes a number of small issues with the ROC implementation: * offloaded remain-on-channel couldn't be queued, now we can queue it as well, if needed * in iwlwifi (the only user) offloaded ROC is mutually exclusive with scanning, use the new queue to handle that case -- I expect that it will later depend on a HW flag The bigger issue though is that there's a bad bug in the current implementation: if we get a mgmt TX request while HW roc is active, and this new request has a wait time, we actually schedule a software ROC instead since we can't guarantee the existing offloaded ROC will still be that long. To fix this, the queuing mechanism was needed. The queuing mechanism for offloaded ROC isn't yet optimal, ideally we should add API to have the HW extend the ROC if needed. We could add that later but for now use a software implementation. Overall, this unifies the behaviour between the offloaded and software-implemented case as much as possible. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 - net/mac80211/Makefile | 1 - net/mac80211/cfg.c | 478 ++++++++++++++++++++++++--------------------- net/mac80211/ieee80211_i.h | 89 +++------ net/mac80211/iface.c | 23 +-- net/mac80211/main.c | 10 +- net/mac80211/offchannel.c | 280 ++++++++++++++++++++++---- net/mac80211/scan.c | 4 +- net/mac80211/status.c | 28 +-- net/mac80211/work.c | 370 ----------------------------------- 10 files changed, 532 insertions(+), 754 deletions(-) delete mode 100644 net/mac80211/work.c (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6e700bf8d4a..d152f54064f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2184,9 +2184,6 @@ enum ieee80211_rate_control_changed { * normally except for the %IEEE80211_TX_CTL_TX_OFFCHAN flag. When the * duration (which will always be non-zero) expires, the driver must call * ieee80211_remain_on_channel_expired(). - * The driver must not call ieee80211_remain_on_channel_expired() before - * the TX status for a frame that was sent off-channel, otherwise the TX - * status is reported to userspace in an invalid way. * Note that this callback may be called while the device is in IDLE and * must be accepted in this case. * This callback may sleep. diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 3e9d931bba3..2b1470bac17 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -9,7 +9,6 @@ mac80211-y := \ scan.o offchannel.o \ ht.o agg-tx.o agg-rx.o \ ibss.o \ - work.o \ iface.o \ rate.o \ michael.o \ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a1690791970..498c94e3442 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2112,35 +2112,171 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return 0; } -static int ieee80211_remain_on_channel_hw(struct ieee80211_local *local, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type chantype, - unsigned int duration, u64 *cookie) -{ +static int ieee80211_start_roc_work(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, + enum nl80211_channel_type channel_type, + unsigned int duration, u64 *cookie, + struct sk_buff *txskb) +{ + struct ieee80211_roc_work *roc, *tmp; + bool queued = false; int ret; - u32 random_cookie; lockdep_assert_held(&local->mtx); - if (local->hw_roc_cookie) - return -EBUSY; - /* must be nonzero */ - random_cookie = random32() | 1; - - *cookie = random_cookie; - local->hw_roc_dev = dev; - local->hw_roc_cookie = random_cookie; - local->hw_roc_channel = chan; - local->hw_roc_channel_type = chantype; - local->hw_roc_duration = duration; - ret = drv_remain_on_channel(local, chan, chantype, duration); + roc = kzalloc(sizeof(*roc), GFP_KERNEL); + if (!roc) + return -ENOMEM; + + roc->chan = channel; + roc->chan_type = channel_type; + roc->duration = duration; + roc->req_duration = duration; + roc->frame = txskb; + roc->mgmt_tx_cookie = (unsigned long)txskb; + roc->sdata = sdata; + INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); + INIT_LIST_HEAD(&roc->dependents); + + /* if there's one pending or we're scanning, queue this one */ + if (!list_empty(&local->roc_list) || local->scanning) + goto out_check_combine; + + /* if not HW assist, just queue & schedule work */ + if (!local->ops->remain_on_channel) { + ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); + goto out_queue; + } + + /* otherwise actually kick it off here (for error handling) */ + + /* + * If the duration is zero, then the driver + * wouldn't actually do anything. Set it to + * 10 for now. + * + * TODO: cancel the off-channel operation + * when we get the SKB's TX status and + * the wait time was zero before. + */ + if (!duration) + duration = 10; + + ret = drv_remain_on_channel(local, channel, channel_type, duration); if (ret) { - local->hw_roc_channel = NULL; - local->hw_roc_cookie = 0; + kfree(roc); + return ret; } - return ret; + roc->started = true; + goto out_queue; + + out_check_combine: + list_for_each_entry(tmp, &local->roc_list, list) { + if (tmp->chan != channel || tmp->chan_type != channel_type) + continue; + + /* + * Extend this ROC if possible: + * + * If it hasn't started yet, just increase the duration + * and add the new one to the list of dependents. + */ + if (!tmp->started) { + list_add_tail(&roc->list, &tmp->dependents); + tmp->duration = max(tmp->duration, roc->duration); + queued = true; + break; + } + + /* If it has already started, it's more difficult ... */ + if (local->ops->remain_on_channel) { + unsigned long j = jiffies; + + /* + * In the offloaded ROC case, if it hasn't begun, add + * this new one to the dependent list to be handled + * when the the master one begins. If it has begun, + * check that there's still a minimum time left and + * if so, start this one, transmitting the frame, but + * add it to the list directly after this one with a + * a reduced time so we'll ask the driver to execute + * it right after finishing the previous one, in the + * hope that it'll also be executed right afterwards, + * effectively extending the old one. + * If there's no minimum time left, just add it to the + * normal list. + */ + if (!tmp->hw_begun) { + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + break; + } + + if (time_before(j + IEEE80211_ROC_MIN_LEFT, + tmp->hw_start_time + + msecs_to_jiffies(tmp->duration))) { + int new_dur; + + ieee80211_handle_roc_started(roc); + + new_dur = roc->duration - + jiffies_to_msecs(tmp->hw_start_time + + msecs_to_jiffies( + tmp->duration) - + j); + + if (new_dur > 0) { + /* add right after tmp */ + list_add(&roc->list, &tmp->list); + } else { + list_add_tail(&roc->list, + &tmp->dependents); + } + queued = true; + } + } else if (del_timer_sync(&tmp->work.timer)) { + unsigned long new_end; + + /* + * In the software ROC case, cancel the timer, if + * that fails then the finish work is already + * queued/pending and thus we queue the new ROC + * normally, if that succeeds then we can extend + * the timer duration and TX the frame (if any.) + */ + + list_add_tail(&roc->list, &tmp->dependents); + queued = true; + + new_end = jiffies + msecs_to_jiffies(roc->duration); + + /* ok, it was started & we canceled timer */ + if (time_after(new_end, tmp->work.timer.expires)) + mod_timer(&tmp->work.timer, new_end); + else + add_timer(&tmp->work.timer); + + ieee80211_handle_roc_started(roc); + } + break; + } + + out_queue: + if (!queued) + list_add_tail(&roc->list, &local->roc_list); + + /* + * cookie is either the roc (for normal roc) + * or the SKB (for mgmt TX) + */ + if (txskb) + *cookie = (unsigned long)txskb; + else + *cookie = (unsigned long)roc; + + return 0; } static int ieee80211_remain_on_channel(struct wiphy *wiphy, @@ -2152,84 +2288,76 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; + int ret; - if (local->ops->remain_on_channel) { - int ret; - - mutex_lock(&local->mtx); - ret = ieee80211_remain_on_channel_hw(local, dev, - chan, channel_type, - duration, cookie); - local->hw_roc_for_tx = false; - mutex_unlock(&local->mtx); - - return ret; - } + mutex_lock(&local->mtx); + ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + duration, cookie, NULL); + mutex_unlock(&local->mtx); - return ieee80211_wk_remain_on_channel(sdata, chan, channel_type, - duration, cookie); + return ret; } -static int ieee80211_cancel_remain_on_channel_hw(struct ieee80211_local *local, - u64 cookie) +static int ieee80211_cancel_roc(struct ieee80211_local *local, + u64 cookie, bool mgmt_tx) { + struct ieee80211_roc_work *roc, *tmp, *found = NULL; int ret; - lockdep_assert_held(&local->mtx); + mutex_lock(&local->mtx); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + if (!mgmt_tx && (unsigned long)roc != cookie) + continue; + else if (mgmt_tx && roc->mgmt_tx_cookie != cookie) + continue; - if (local->hw_roc_cookie != cookie) - return -ENOENT; + found = roc; + break; + } - ret = drv_cancel_remain_on_channel(local); - if (ret) - return ret; + if (!found) { + mutex_unlock(&local->mtx); + return -ENOENT; + } - local->hw_roc_cookie = 0; - local->hw_roc_channel = NULL; + if (local->ops->remain_on_channel) { + if (found->started) { + ret = drv_cancel_remain_on_channel(local); + if (WARN_ON_ONCE(ret)) { + mutex_unlock(&local->mtx); + return ret; + } + } - return 0; -} + list_del(&found->list); -static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, - u64 cookie) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; + ieee80211_run_deferred_scan(local); + ieee80211_start_next_roc(local); + mutex_unlock(&local->mtx); - if (local->ops->cancel_remain_on_channel) { - int ret; + ieee80211_roc_notify_destroy(found); + } else { + /* work may be pending so use it all the time */ + found->abort = true; + ieee80211_queue_delayed_work(&local->hw, &found->work, 0); - mutex_lock(&local->mtx); - ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); mutex_unlock(&local->mtx); - return ret; + /* work will clean up etc */ + flush_delayed_work(&found->work); } - return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); + return 0; } -static enum work_done_result -ieee80211_offchan_tx_done(struct ieee80211_work *wk, struct sk_buff *skb) +static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie) { - /* - * Use the data embedded in the work struct for reporting - * here so if the driver mangled the SKB before dropping - * it (which is the only way we really should get here) - * then we don't report mangled data. - * - * If there was no wait time, then by the time we get here - * the driver will likely not have reported the status yet, - * so in that case userspace will have to deal with it. - */ - - if (wk->offchan_tx.wait && !wk->offchan_tx.status) - cfg80211_mgmt_tx_status(wk->sdata->dev, - (unsigned long) wk->offchan_tx.frame, - wk->data, wk->data_len, false, GFP_KERNEL); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; - return WORK_DONE_DESTROY; + return ieee80211_cancel_roc(local, cookie, false); } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, @@ -2243,10 +2371,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; - struct ieee80211_work *wk; const struct ieee80211_mgmt *mgmt = (void *)buf; + bool need_offchan = false; u32 flags; - bool is_offchan = false, in_hw_roc = false; + int ret; if (dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; @@ -2254,34 +2382,28 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; - /* Check that we are on the requested channel for transmission */ - if (chan != local->tmp_channel && - chan != local->oper_channel) - is_offchan = true; - if (channel_type_valid && - (channel_type != local->tmp_channel_type && - channel_type != local->_oper_channel_type)) - is_offchan = true; - - if (chan == local->hw_roc_channel) { - /* TODO: check channel type? */ - is_offchan = false; - in_hw_roc = true; - flags |= IEEE80211_TX_CTL_TX_OFFCHAN; - } - if (no_cck) flags |= IEEE80211_TX_CTL_NO_CCK_RATE; - if (is_offchan && !offchan) - return -EBUSY; - switch (sdata->vif.type) { case NL80211_IFTYPE_ADHOC: + if (!sdata->vif.bss_conf.ibss_joined) + need_offchan = true; + /* fall through */ +#ifdef CONFIG_MAC80211_MESH + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif) && + !sdata->u.mesh.mesh_id_len) + need_offchan = true; + /* fall through */ +#endif case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_MESH_POINT: + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + !ieee80211_vif_is_mesh(&sdata->vif) && + !rcu_access_pointer(sdata->bss->beacon)) + need_offchan = true; if (!ieee80211_is_action(mgmt->frame_control) || mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) break; @@ -2293,105 +2415,60 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: + if (!sdata->u.mgd.associated) + need_offchan = true; break; default: return -EOPNOTSUPP; } + mutex_lock(&local->mtx); + + /* Check if the operating channel is the requested channel */ + if (!need_offchan) { + need_offchan = chan != local->oper_channel; + if (channel_type_valid && + channel_type != local->_oper_channel_type) + need_offchan = true; + } + + if (need_offchan && !offchan) { + ret = -EBUSY; + goto out_unlock; + } + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); - if (!skb) - return -ENOMEM; + if (!skb) { + ret = -ENOMEM; + goto out_unlock; + } skb_reserve(skb, local->hw.extra_tx_headroom); memcpy(skb_put(skb, len), buf, len); IEEE80211_SKB_CB(skb)->flags = flags; - if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL && - flags & IEEE80211_TX_CTL_TX_OFFCHAN) - IEEE80211_SKB_CB(skb)->hw_queue = - local->hw.offchannel_tx_hw_queue; - skb->dev = sdata->dev; - *cookie = (unsigned long) skb; - - if (is_offchan && local->ops->remain_on_channel) { - unsigned int duration; - int ret; - - mutex_lock(&local->mtx); - /* - * If the duration is zero, then the driver - * wouldn't actually do anything. Set it to - * 100 for now. - * - * TODO: cancel the off-channel operation - * when we get the SKB's TX status and - * the wait time was zero before. - */ - duration = 100; - if (wait) - duration = wait; - ret = ieee80211_remain_on_channel_hw(local, dev, chan, - channel_type, - duration, cookie); - if (ret) { - kfree_skb(skb); - mutex_unlock(&local->mtx); - return ret; - } - - local->hw_roc_for_tx = true; - local->hw_roc_duration = wait; - - /* - * queue up frame for transmission after - * ieee80211_ready_on_channel call - */ - - /* modify cookie to prevent API mismatches */ - *cookie ^= 2; - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; - if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) - IEEE80211_SKB_CB(skb)->hw_queue = - local->hw.offchannel_tx_hw_queue; - local->hw_roc_skb = skb; - local->hw_roc_skb_for_status = skb; - mutex_unlock(&local->mtx); - - return 0; - } - - /* - * Can transmit right away if the channel was the - * right one and there's no wait involved... If a - * wait is involved, we might otherwise not be on - * the right channel for long enough! - */ - if (!is_offchan && !wait && (in_hw_roc || !sdata->vif.bss_conf.idle)) { + if (!need_offchan) { ieee80211_tx_skb(sdata, skb); - return 0; - } - - wk = kzalloc(sizeof(*wk) + len, GFP_KERNEL); - if (!wk) { - kfree_skb(skb); - return -ENOMEM; + ret = 0; + goto out_unlock; } - wk->type = IEEE80211_WORK_OFFCHANNEL_TX; - wk->chan = chan; - wk->chan_type = channel_type; - wk->sdata = sdata; - wk->done = ieee80211_offchan_tx_done; - wk->offchan_tx.frame = skb; - wk->offchan_tx.wait = wait; - wk->data_len = len; - memcpy(wk->data, buf, len); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_TX_OFFCHAN; + if (local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + IEEE80211_SKB_CB(skb)->hw_queue = + local->hw.offchannel_tx_hw_queue; - ieee80211_add_work(wk); - return 0; + /* This will handle all kinds of coalescing and immediate TX */ + ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + wait, cookie, skb); + if (ret) + kfree_skb(skb); + out_unlock: + mutex_unlock(&local->mtx); + return ret; } static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, @@ -2400,45 +2477,8 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - struct ieee80211_work *wk; - int ret = -ENOENT; - mutex_lock(&local->mtx); - - if (local->ops->cancel_remain_on_channel) { - cookie ^= 2; - ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); - - if (ret == 0) { - kfree_skb(local->hw_roc_skb); - local->hw_roc_skb = NULL; - local->hw_roc_skb_for_status = NULL; - } - - mutex_unlock(&local->mtx); - - return ret; - } - - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) - continue; - - if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) - continue; - - if (cookie != (unsigned long) wk->offchan_tx.frame) - continue; - - wk->timeout = jiffies; - - ieee80211_queue_work(&local->hw, &local->work_work); - ret = 0; - break; - } - mutex_unlock(&local->mtx); - - return ret; + return ieee80211_cancel_roc(local, cookie, true); } static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8c026abcb8d..e6cbf5b68c8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -317,55 +317,30 @@ struct mesh_preq_queue { u8 flags; }; -enum ieee80211_work_type { - IEEE80211_WORK_ABORT, - IEEE80211_WORK_REMAIN_ON_CHANNEL, - IEEE80211_WORK_OFFCHANNEL_TX, -}; - -/** - * enum work_done_result - indicates what to do after work was done - * - * @WORK_DONE_DESTROY: This work item is no longer needed, destroy. - * @WORK_DONE_REQUEUE: This work item was reset to be reused, and - * should be requeued. - */ -enum work_done_result { - WORK_DONE_DESTROY, - WORK_DONE_REQUEUE, -}; +#if HZ/100 == 0 +#define IEEE80211_ROC_MIN_LEFT 1 +#else +#define IEEE80211_ROC_MIN_LEFT (HZ/100) +#endif -struct ieee80211_work { +struct ieee80211_roc_work { struct list_head list; + struct list_head dependents; - struct rcu_head rcu_head; + struct delayed_work work; struct ieee80211_sub_if_data *sdata; - enum work_done_result (*done)(struct ieee80211_work *wk, - struct sk_buff *skb); - struct ieee80211_channel *chan; enum nl80211_channel_type chan_type; - unsigned long timeout; - enum ieee80211_work_type type; + bool started, abort, hw_begun, notified; - bool started; + unsigned long hw_start_time; - union { - struct { - u32 duration; - } remain; - struct { - struct sk_buff *frame; - u32 wait; - bool status; - } offchan_tx; - }; - - size_t data_len; - u8 data[]; + u32 duration, req_duration; + struct sk_buff *frame; + u64 mgmt_tx_cookie; }; /* flags used in struct ieee80211_if_managed.flags */ @@ -847,13 +822,6 @@ struct ieee80211_local { const struct ieee80211_ops *ops; - /* - * work stuff, potentially off-channel (in the future) - */ - struct list_head work_list; - struct timer_list work_timer; - struct work_struct work_work; - /* * private workqueue to mac80211. mac80211 makes this accessible * via ieee80211_queue_work() @@ -1088,14 +1056,12 @@ struct ieee80211_local { } debugfs; #endif - struct ieee80211_channel *hw_roc_channel; - struct net_device *hw_roc_dev; - struct sk_buff *hw_roc_skb, *hw_roc_skb_for_status; + /* + * Remain-on-channel support + */ + struct list_head roc_list; struct work_struct hw_roc_start, hw_roc_done; - enum nl80211_channel_type hw_roc_channel_type; - unsigned int hw_roc_duration; - u32 hw_roc_cookie; - bool hw_roc_for_tx; + unsigned long hw_roc_start_time; struct idr ack_status_frames; spinlock_t ack_status_lock; @@ -1291,7 +1257,12 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, bool offchannel_ps_enable); void ieee80211_offchannel_return(struct ieee80211_local *local, bool offchannel_ps_disable); -void ieee80211_hw_roc_setup(struct ieee80211_local *local); +void ieee80211_roc_setup(struct ieee80211_local *local); +void ieee80211_start_next_roc(struct ieee80211_local *local); +void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc); +void ieee80211_sw_roc_work(struct work_struct *work); +void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); /* interface handling */ int ieee80211_iface_init(void); @@ -1501,18 +1472,6 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, enum nl80211_channel_type channel_type, u16 prot_mode); -/* internal work items */ -void ieee80211_work_init(struct ieee80211_local *local); -void ieee80211_add_work(struct ieee80211_work *wk); -void free_work(struct ieee80211_work *wk); -void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata); -int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - unsigned int duration, u64 *cookie); -int ieee80211_wk_cancel_remain_on_channel( - struct ieee80211_sub_if_data *sdata, u64 cookie); - /* channel management */ enum ieee80211_chan_mode { CHAN_MODE_UNDEFINED, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 968d71c5071..87aeb4f21ff 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -528,10 +528,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, */ netif_tx_stop_all_queues(sdata->dev); - /* - * Purge work for this interface. - */ - ieee80211_work_purge(sdata); + ieee80211_roc_purge(sdata); /* * Remove all stations associated with this interface. @@ -637,18 +634,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_configure_filter(local); break; default: - mutex_lock(&local->mtx); - if (local->hw_roc_dev == sdata->dev && - local->hw_roc_channel) { - /* ignore return value since this is racy */ - drv_cancel_remain_on_channel(local); - ieee80211_queue_work(&local->hw, &local->hw_roc_done); - } - mutex_unlock(&local->mtx); - - flush_work(&local->hw_roc_start); - flush_work(&local->hw_roc_done); - flush_work(&sdata->work); /* * When we get here, the interface is marked down. @@ -1457,8 +1442,8 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; int count = 0; bool working = false, scanning = false; - struct ieee80211_work *wk; unsigned int led_trig_start = 0, led_trig_stop = 0; + struct ieee80211_roc_work *roc; #ifdef CONFIG_PROVE_LOCKING WARN_ON(debug_locks && !lockdep_rtnl_is_held() && @@ -1494,9 +1479,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) } if (!local->ops->remain_on_channel) { - list_for_each_entry(wk, &local->work_list, list) { + list_for_each_entry(roc, &local->roc_list, list) { working = true; - wk->sdata->vif.bss_conf.idle = false; + roc->sdata->vif.bss_conf.idle = false; } } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 779ac613ee5..d81c178c771 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -625,8 +625,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); - ieee80211_work_init(local); - INIT_WORK(&local->restart_work, ieee80211_restart_work); INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); @@ -669,7 +667,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, ieee80211_led_names(local); - ieee80211_hw_roc_setup(local); + ieee80211_roc_setup(local); return &local->hw; } @@ -1016,12 +1014,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) rtnl_unlock(); - /* - * Now all work items will be gone, but the - * timer might still be armed, so delete it - */ - del_timer_sync(&local->work_timer); - cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 8f482b15bc5..abb226dc475 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -16,6 +16,7 @@ #include #include "ieee80211_i.h" #include "driver-trace.h" +#include "driver-ops.h" /* * Tell our hardware to disable PS. @@ -181,32 +182,58 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); } +void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) +{ + if (roc->notified) + return; + + if (roc->mgmt_tx_cookie) { + if (!WARN_ON(!roc->frame)) { + ieee80211_tx_skb(roc->sdata, roc->frame); + roc->frame = NULL; + } + } else { + cfg80211_ready_on_channel(roc->sdata->dev, (unsigned long)roc, + roc->chan, roc->chan_type, + roc->req_duration, GFP_KERNEL); + } + + roc->notified = true; +} + static void ieee80211_hw_roc_start(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_start); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_roc_work *roc, *dep, *tmp; mutex_lock(&local->mtx); - if (!local->hw_roc_channel) { - mutex_unlock(&local->mtx); - return; - } + if (list_empty(&local->roc_list)) + goto out_unlock; - if (local->hw_roc_skb) { - sdata = IEEE80211_DEV_TO_SUB_IF(local->hw_roc_dev); - ieee80211_tx_skb(sdata, local->hw_roc_skb); - local->hw_roc_skb = NULL; - } else { - cfg80211_ready_on_channel(local->hw_roc_dev, - local->hw_roc_cookie, - local->hw_roc_channel, - local->hw_roc_channel_type, - local->hw_roc_duration, - GFP_KERNEL); - } + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); + + if (!roc->started) + goto out_unlock; + + roc->hw_begun = true; + roc->hw_start_time = local->hw_roc_start_time; + ieee80211_handle_roc_started(roc); + list_for_each_entry_safe(dep, tmp, &roc->dependents, list) { + ieee80211_handle_roc_started(dep); + + if (dep->duration > roc->duration) { + u32 dur = dep->duration; + dep->duration = dur - roc->duration; + roc->duration = dur; + list_del(&dep->list); + list_add(&dep->list, &roc->list); + } + } + out_unlock: mutex_unlock(&local->mtx); } @@ -214,50 +241,179 @@ void ieee80211_ready_on_channel(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); + local->hw_roc_start_time = jiffies; + trace_api_ready_on_channel(local); ieee80211_queue_work(hw, &local->hw_roc_start); } EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel); -static void ieee80211_hw_roc_done(struct work_struct *work) +void ieee80211_start_next_roc(struct ieee80211_local *local) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, hw_roc_done); + struct ieee80211_roc_work *roc; - mutex_lock(&local->mtx); + lockdep_assert_held(&local->mtx); - if (!local->hw_roc_channel) { - mutex_unlock(&local->mtx); + if (list_empty(&local->roc_list)) { + ieee80211_run_deferred_scan(local); return; } - /* was never transmitted */ - if (local->hw_roc_skb) { - u64 cookie; + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); - cookie = local->hw_roc_cookie ^ 2; + if (local->ops->remain_on_channel) { + int ret, duration = roc->duration; - cfg80211_mgmt_tx_status(local->hw_roc_dev, cookie, - local->hw_roc_skb->data, - local->hw_roc_skb->len, false, - GFP_KERNEL); + /* XXX: duplicated, see ieee80211_start_roc_work() */ + if (!duration) + duration = 10; - kfree_skb(local->hw_roc_skb); - local->hw_roc_skb = NULL; - local->hw_roc_skb_for_status = NULL; + ret = drv_remain_on_channel(local, roc->chan, + roc->chan_type, + duration); + + roc->started = true; + + if (ret) { + wiphy_warn(local->hw.wiphy, + "failed to start next HW ROC (%d)\n", ret); + /* + * queue the work struct again to avoid recursion + * when multiple failures occur + */ + ieee80211_remain_on_channel_expired(&local->hw); + } + } else { + /* delay it a bit */ + ieee80211_queue_delayed_work(&local->hw, &roc->work, + round_jiffies_relative(HZ/2)); + } +} + +void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) +{ + struct ieee80211_roc_work *dep, *tmp; + + /* was never transmitted */ + if (roc->frame) { + cfg80211_mgmt_tx_status(roc->sdata->dev, + (unsigned long)roc->frame, + roc->frame->data, roc->frame->len, + false, GFP_KERNEL); + kfree_skb(roc->frame); } - if (!local->hw_roc_for_tx) - cfg80211_remain_on_channel_expired(local->hw_roc_dev, - local->hw_roc_cookie, - local->hw_roc_channel, - local->hw_roc_channel_type, + if (!roc->mgmt_tx_cookie) + cfg80211_remain_on_channel_expired(roc->sdata->dev, + (unsigned long)roc, + roc->chan, roc->chan_type, GFP_KERNEL); - local->hw_roc_channel = NULL; - local->hw_roc_cookie = 0; + list_for_each_entry_safe(dep, tmp, &roc->dependents, list) + ieee80211_roc_notify_destroy(dep); + + kfree(roc); +} + +void ieee80211_sw_roc_work(struct work_struct *work) +{ + struct ieee80211_roc_work *roc = + container_of(work, struct ieee80211_roc_work, work.work); + struct ieee80211_sub_if_data *sdata = roc->sdata; + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->mtx); + + if (roc->abort) + goto finish; + + if (WARN_ON(list_empty(&local->roc_list))) + goto out_unlock; + + if (WARN_ON(roc != list_first_entry(&local->roc_list, + struct ieee80211_roc_work, + list))) + goto out_unlock; + + if (!roc->started) { + struct ieee80211_roc_work *dep; + + /* start this ROC */ + /* switch channel etc */ + ieee80211_recalc_idle(local); + + local->tmp_channel = roc->chan; + local->tmp_channel_type = roc->chan_type; + ieee80211_hw_config(local, 0); + + /* tell userspace or send frame */ + ieee80211_handle_roc_started(roc); + list_for_each_entry(dep, &roc->dependents, list) + ieee80211_handle_roc_started(dep); + + /* if it was pure TX, just finish right away */ + if (!roc->duration) + goto finish; + + roc->started = true; + ieee80211_queue_delayed_work(&local->hw, &roc->work, + msecs_to_jiffies(roc->duration)); + } else { + /* finish this ROC */ + finish: + list_del(&roc->list); + ieee80211_roc_notify_destroy(roc); + + if (roc->started) { + drv_flush(local, false); + + local->tmp_channel = NULL; + ieee80211_hw_config(local, 0); + + ieee80211_offchannel_return(local, true); + } + + ieee80211_recalc_idle(local); + + ieee80211_start_next_roc(local); + ieee80211_run_deferred_scan(local); + } + + out_unlock: + mutex_unlock(&local->mtx); +} + +static void ieee80211_hw_roc_done(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, hw_roc_done); + struct ieee80211_roc_work *roc; + + mutex_lock(&local->mtx); + + if (list_empty(&local->roc_list)) + goto out_unlock; + + roc = list_first_entry(&local->roc_list, struct ieee80211_roc_work, + list); + + if (!roc->started) + goto out_unlock; + + list_del(&roc->list); + + ieee80211_roc_notify_destroy(roc); + + /* if there's another roc, start it now */ + ieee80211_start_next_roc(local); + + /* or scan maybe */ + ieee80211_run_deferred_scan(local); + + out_unlock: mutex_unlock(&local->mtx); } @@ -271,8 +427,48 @@ void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); -void ieee80211_hw_roc_setup(struct ieee80211_local *local) +void ieee80211_roc_setup(struct ieee80211_local *local) { INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done); + INIT_LIST_HEAD(&local->roc_list); +} + +void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_roc_work *roc, *tmp; + LIST_HEAD(tmp_list); + + mutex_lock(&local->mtx); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { + if (roc->sdata != sdata) + continue; + + if (roc->started && local->ops->remain_on_channel) { + /* can race, so ignore return value */ + drv_cancel_remain_on_channel(local); + } + + list_move_tail(&roc->list, &tmp_list); + roc->abort = true; + } + + ieee80211_start_next_roc(local); + ieee80211_run_deferred_scan(local); + mutex_unlock(&local->mtx); + + list_for_each_entry_safe(roc, tmp, &tmp_list, list) { + if (local->ops->remain_on_channel) { + list_del(&roc->list); + ieee80211_roc_notify_destroy(roc); + } else { + ieee80211_queue_delayed_work(&local->hw, &roc->work, 0); + + /* work will clean up etc */ + flush_delayed_work(&roc->work); + } + } + + WARN_ON_ONCE(!list_empty(&tmp_list)); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 169da0742c8..379f178eab5 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -323,7 +323,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); - ieee80211_queue_work(&local->hw, &local->work_work); + ieee80211_start_next_roc(local); } void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) @@ -376,7 +376,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) static bool ieee80211_can_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - if (!list_empty(&local->work_list)) + if (!list_empty(&local->roc_list)) return false; if (sdata->vif.type == NL80211_IFTYPE_STATION && diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 63a76901506..6b4f4252788 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -520,36 +520,16 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = (unsigned long)skb; + acked = info->flags & IEEE80211_TX_STAT_ACK; if (ieee80211_is_nullfunc(hdr->frame_control) || - ieee80211_is_qos_nullfunc(hdr->frame_control)) { - acked = info->flags & IEEE80211_TX_STAT_ACK; - + ieee80211_is_qos_nullfunc(hdr->frame_control)) cfg80211_probe_status(skb->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); - } else { - struct ieee80211_work *wk; - - rcu_read_lock(); - list_for_each_entry_rcu(wk, &local->work_list, list) { - if (wk->type != IEEE80211_WORK_OFFCHANNEL_TX) - continue; - if (wk->offchan_tx.frame != skb) - continue; - wk->offchan_tx.status = true; - break; - } - rcu_read_unlock(); - if (local->hw_roc_skb_for_status == skb) { - cookie = local->hw_roc_cookie ^ 2; - local->hw_roc_skb_for_status = NULL; - } - + else cfg80211_mgmt_tx_status( skb->dev, cookie, skb->data, skb->len, - !!(info->flags & IEEE80211_TX_STAT_ACK), - GFP_ATOMIC); - } + acked, GFP_ATOMIC); } if (unlikely(info->ack_frame_id)) { diff --git a/net/mac80211/work.c b/net/mac80211/work.c deleted file mode 100644 index b2650a9d45f..00000000000 --- a/net/mac80211/work.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * mac80211 work implementation - * - * Copyright 2003-2008, Jouni Malinen - * Copyright 2004, Instant802 Networks, Inc. - * Copyright 2005, Devicescape Software, Inc. - * Copyright 2006-2007 Jiri Benc - * Copyright 2007, Michael Wu - * Copyright 2009, Johannes Berg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ieee80211_i.h" -#include "rate.h" -#include "driver-ops.h" - -enum work_action { - WORK_ACT_NONE, - WORK_ACT_TIMEOUT, -}; - - -/* utils */ -static inline void ASSERT_WORK_MTX(struct ieee80211_local *local) -{ - lockdep_assert_held(&local->mtx); -} - -/* - * We can have multiple work items (and connection probing) - * scheduling this timer, but we need to take care to only - * reschedule it when it should fire _earlier_ than it was - * asked for before, or if it's not pending right now. This - * function ensures that. Note that it then is required to - * run this function for all timeouts after the first one - * has happened -- the work that runs from this timer will - * do that. - */ -static void run_again(struct ieee80211_local *local, - unsigned long timeout) -{ - ASSERT_WORK_MTX(local); - - if (!timer_pending(&local->work_timer) || - time_before(timeout, local->work_timer.expires)) - mod_timer(&local->work_timer, timeout); -} - -void free_work(struct ieee80211_work *wk) -{ - kfree_rcu(wk, rcu_head); -} - -static enum work_action __must_check -ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk) -{ - /* - * First time we run, do nothing -- the generic code will - * have switched to the right channel etc. - */ - if (!wk->started) { - wk->timeout = jiffies + msecs_to_jiffies(wk->remain.duration); - - cfg80211_ready_on_channel(wk->sdata->dev, (unsigned long) wk, - wk->chan, wk->chan_type, - wk->remain.duration, GFP_KERNEL); - - return WORK_ACT_NONE; - } - - return WORK_ACT_TIMEOUT; -} - -static enum work_action __must_check -ieee80211_offchannel_tx(struct ieee80211_work *wk) -{ - if (!wk->started) { - wk->timeout = jiffies + msecs_to_jiffies(wk->offchan_tx.wait); - - /* - * After this, offchan_tx.frame remains but now is no - * longer a valid pointer -- we still need it as the - * cookie for canceling this work/status matching. - */ - ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame); - - return WORK_ACT_NONE; - } - - return WORK_ACT_TIMEOUT; -} - -static void ieee80211_work_timer(unsigned long data) -{ - struct ieee80211_local *local = (void *) data; - - if (local->quiescing) - return; - - ieee80211_queue_work(&local->hw, &local->work_work); -} - -static void ieee80211_work_work(struct work_struct *work) -{ - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, work_work); - struct ieee80211_work *wk, *tmp; - LIST_HEAD(free_work); - enum work_action rma; - bool remain_off_channel = false; - - /* - * ieee80211_queue_work() should have picked up most cases, - * here we'll pick the rest. - */ - if (WARN(local->suspended, "work scheduled while going to suspend\n")) - return; - - mutex_lock(&local->mtx); - - if (local->scanning) { - mutex_unlock(&local->mtx); - return; - } - - ieee80211_recalc_idle(local); - - list_for_each_entry_safe(wk, tmp, &local->work_list, list) { - bool started = wk->started; - - /* mark work as started if it's on the current off-channel */ - if (!started && local->tmp_channel && - wk->chan == local->tmp_channel && - wk->chan_type == local->tmp_channel_type) { - started = true; - wk->timeout = jiffies; - } - - if (!started && !local->tmp_channel) { - ieee80211_offchannel_stop_vifs(local, true); - - local->tmp_channel = wk->chan; - local->tmp_channel_type = wk->chan_type; - - ieee80211_hw_config(local, 0); - - started = true; - wk->timeout = jiffies; - } - - /* don't try to work with items that aren't started */ - if (!started) - continue; - - if (time_is_after_jiffies(wk->timeout)) { - /* - * This work item isn't supposed to be worked on - * right now, but take care to adjust the timer - * properly. - */ - run_again(local, wk->timeout); - continue; - } - - switch (wk->type) { - default: - WARN_ON(1); - /* nothing */ - rma = WORK_ACT_NONE; - break; - case IEEE80211_WORK_ABORT: - rma = WORK_ACT_TIMEOUT; - break; - case IEEE80211_WORK_REMAIN_ON_CHANNEL: - rma = ieee80211_remain_on_channel_timeout(wk); - break; - case IEEE80211_WORK_OFFCHANNEL_TX: - rma = ieee80211_offchannel_tx(wk); - break; - } - - wk->started = started; - - switch (rma) { - case WORK_ACT_NONE: - /* might have changed the timeout */ - run_again(local, wk->timeout); - break; - case WORK_ACT_TIMEOUT: - list_del_rcu(&wk->list); - synchronize_rcu(); - list_add(&wk->list, &free_work); - break; - default: - WARN(1, "unexpected: %d", rma); - } - } - - list_for_each_entry(wk, &local->work_list, list) { - if (!wk->started) - continue; - if (wk->chan != local->tmp_channel || - wk->chan_type != local->tmp_channel_type) - continue; - remain_off_channel = true; - } - - if (!remain_off_channel && local->tmp_channel) { - local->tmp_channel = NULL; - ieee80211_hw_config(local, 0); - - ieee80211_offchannel_return(local, true); - - /* give connection some time to breathe */ - run_again(local, jiffies + HZ/2); - } - - ieee80211_recalc_idle(local); - ieee80211_run_deferred_scan(local); - - mutex_unlock(&local->mtx); - - list_for_each_entry_safe(wk, tmp, &free_work, list) { - wk->done(wk, NULL); - list_del(&wk->list); - kfree(wk); - } -} - -void ieee80211_add_work(struct ieee80211_work *wk) -{ - struct ieee80211_local *local; - - if (WARN_ON(!wk->chan)) - return; - - if (WARN_ON(!wk->sdata)) - return; - - if (WARN_ON(!wk->done)) - return; - - if (WARN_ON(!ieee80211_sdata_running(wk->sdata))) - return; - - wk->started = false; - - local = wk->sdata->local; - mutex_lock(&local->mtx); - list_add_tail(&wk->list, &local->work_list); - mutex_unlock(&local->mtx); - - ieee80211_queue_work(&local->hw, &local->work_work); -} - -void ieee80211_work_init(struct ieee80211_local *local) -{ - INIT_LIST_HEAD(&local->work_list); - setup_timer(&local->work_timer, ieee80211_work_timer, - (unsigned long)local); - INIT_WORK(&local->work_work, ieee80211_work_work); -} - -void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_work *wk; - bool cleanup = false; - - mutex_lock(&local->mtx); - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) - continue; - cleanup = true; - wk->type = IEEE80211_WORK_ABORT; - wk->started = true; - wk->timeout = jiffies; - } - mutex_unlock(&local->mtx); - - /* run cleanups etc. */ - if (cleanup) - ieee80211_work_work(&local->work_work); - - mutex_lock(&local->mtx); - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) - continue; - WARN_ON(1); - break; - } - mutex_unlock(&local->mtx); -} - -static enum work_done_result ieee80211_remain_done(struct ieee80211_work *wk, - struct sk_buff *skb) -{ - /* - * We are done serving the remain-on-channel command. - */ - cfg80211_remain_on_channel_expired(wk->sdata->dev, (unsigned long) wk, - wk->chan, wk->chan_type, - GFP_KERNEL); - - return WORK_DONE_DESTROY; -} - -int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - unsigned int duration, u64 *cookie) -{ - struct ieee80211_work *wk; - - wk = kzalloc(sizeof(*wk), GFP_KERNEL); - if (!wk) - return -ENOMEM; - - wk->type = IEEE80211_WORK_REMAIN_ON_CHANNEL; - wk->chan = chan; - wk->chan_type = channel_type; - wk->sdata = sdata; - wk->done = ieee80211_remain_done; - - wk->remain.duration = duration; - - *cookie = (unsigned long) wk; - - ieee80211_add_work(wk); - - return 0; -} - -int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata, - u64 cookie) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_work *wk, *tmp; - bool found = false; - - mutex_lock(&local->mtx); - list_for_each_entry_safe(wk, tmp, &local->work_list, list) { - if ((unsigned long) wk == cookie) { - wk->timeout = jiffies; - found = true; - break; - } - } - mutex_unlock(&local->mtx); - - if (!found) - return -ENOENT; - - ieee80211_queue_work(&local->hw, &local->work_work); - - return 0; -} -- cgit v1.2.3-70-g09d2 From 2c352f444ccfa966a1aa4fd8e9ee29381c467448 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:09 +0000 Subject: netfilter: nf_conntrack: prepare namespace support for l4 protocol trackers This patch prepares the namespace support for layer 4 protocol trackers. Basically, this modifies the following interfaces: * nf_ct_[un]register_sysctl * nf_conntrack_l4proto_[un]register to include the namespace parameter. We still use init_net in this patch to prepare the ground for follow-up patches for each layer 4 protocol tracker. We add a new net_id field to struct nf_conntrack_l4proto that is used to store the pernet_operations id for each layer 4 protocol tracker. Note that AF_INET6's protocols do not need to do sysctl compat. Thus, we only register compat sysctl when l4proto.l3proto != AF_INET6. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 11 +- include/net/netns/conntrack.h | 12 +++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 18 ++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 18 ++-- net/netfilter/nf_conntrack_proto.c | 143 ++++++++++++++++++------- net/netfilter/nf_conntrack_proto_dccp.c | 10 +- net/netfilter/nf_conntrack_proto_gre.c | 6 +- net/netfilter/nf_conntrack_proto_sctp.c | 10 +- net/netfilter/nf_conntrack_proto_udplite.c | 10 +- 9 files changed, 159 insertions(+), 79 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 3b572bb20aa..d621c91de5c 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -12,6 +12,7 @@ #include #include #include +#include struct seq_file; @@ -103,6 +104,10 @@ struct nf_conntrack_l4proto { struct ctl_table *ctl_compat_table; #endif #endif + int *net_id; + /* Init l4proto pernet data */ + int (*init_net)(struct net *net); + /* Protocol name */ const char *name; @@ -123,8 +128,10 @@ nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); /* Protocol registration. */ -extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto); -extern void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto); +extern int nf_conntrack_l4proto_register(struct net *net, + struct nf_conntrack_l4proto *proto); +extern void nf_conntrack_l4proto_unregister(struct net *net, + struct nf_conntrack_l4proto *proto); /* Generic netlink helpers */ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index a053a19870c..1f53038b0d1 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -8,6 +8,18 @@ struct ctl_table_header; struct nf_conntrack_ecache; +struct nf_proto_net { +#ifdef CONFIG_SYSCTL + struct ctl_table_header *ctl_table_header; + struct ctl_table *ctl_table; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + struct ctl_table_header *ctl_compat_header; + struct ctl_table *ctl_compat_table; +#endif +#endif + unsigned int users; +}; + struct netns_ct { atomic_t count; unsigned int expect_count; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 91747d4ebc2..46ec515db12 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -391,19 +391,19 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) return ret; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_tcp4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register tcp.\n"); goto cleanup_sockopt; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udp4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register udp.\n"); goto cleanup_tcp; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_icmp); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register icmp.\n"); goto cleanup_udp; @@ -434,11 +434,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) cleanup_ipv4: nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); cleanup_icmp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); cleanup_udp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp4); cleanup_tcp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp4); cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst); return ret; @@ -452,9 +452,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) #endif nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp4); nf_unregister_sockopt(&so_getorigdst); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 3224ef90a21..51ad9f10442 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -340,19 +340,19 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) need_conntrack(); nf_defrag_ipv6_enable(); - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_tcp6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register tcp.\n"); return ret; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udp6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register udp.\n"); goto cleanup_tcp; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_icmpv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register icmpv6.\n"); goto cleanup_udp; @@ -376,11 +376,11 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) cleanup_ipv6: nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); cleanup_icmpv6: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); cleanup_udp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp6); cleanup_tcp: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp6); return ret; } @@ -389,9 +389,9 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp6); } module_init(nf_conntrack_l3proto_ipv6_init); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 8b631b07a64..7ee31ac0a12 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -36,28 +36,35 @@ static DEFINE_MUTEX(nf_ct_proto_mutex); #ifdef CONFIG_SYSCTL static int -nf_ct_register_sysctl(struct ctl_table_header **header, const char *path, - struct ctl_table *table, unsigned int *users) +nf_ct_register_sysctl(struct net *net, + struct ctl_table_header **header, + const char *path, + struct ctl_table *table, + unsigned int *users) { if (*header == NULL) { - *header = register_net_sysctl(&init_net, path, table); + *header = register_net_sysctl(net, path, table); if (*header == NULL) return -ENOMEM; } if (users != NULL) (*users)++; + return 0; } static void nf_ct_unregister_sysctl(struct ctl_table_header **header, - struct ctl_table *table, unsigned int *users) + struct ctl_table **table, + unsigned int *users) { if (users != NULL && --*users > 0) return; unregister_net_sysctl_table(*header); + kfree(*table); *header = NULL; + *table = NULL; } #endif @@ -167,7 +174,8 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto) #ifdef CONFIG_SYSCTL if (l3proto->ctl_table != NULL) { - err = nf_ct_register_sysctl(&l3proto->ctl_table_header, + err = nf_ct_register_sysctl(&init_net, + &l3proto->ctl_table_header, l3proto->ctl_table_path, l3proto->ctl_table, NULL); } @@ -180,7 +188,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto #ifdef CONFIG_SYSCTL if (l3proto->ctl_table_header != NULL) nf_ct_unregister_sysctl(&l3proto->ctl_table_header, - l3proto->ctl_table, NULL); + &l3proto->ctl_table, NULL); #endif } @@ -243,29 +251,54 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); -static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto) +static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, + struct nf_conntrack_l4proto *l4proto) +{ + if (l4proto->net_id) + return net_generic(net, *l4proto->net_id); + else + return NULL; +} + +static +int nf_ct_l4proto_register_sysctl(struct net *net, + struct nf_conntrack_l4proto *l4proto) { int err = 0; + struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); + if (pn == NULL) + return 0; #ifdef CONFIG_SYSCTL - if (l4proto->ctl_table != NULL) { - err = nf_ct_register_sysctl(l4proto->ctl_table_header, + if (pn->ctl_table != NULL) { + err = nf_ct_register_sysctl(net, + &pn->ctl_table_header, "net/netfilter", - l4proto->ctl_table, - l4proto->ctl_table_users); - if (err < 0) + pn->ctl_table, + &pn->users); + if (err < 0) { + if (!pn->users) { + kfree(pn->ctl_table); + pn->ctl_table = NULL; + } goto out; + } } #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->ctl_compat_table != NULL) { - err = nf_ct_register_sysctl(&l4proto->ctl_compat_table_header, + if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) { + err = nf_ct_register_sysctl(net, + &pn->ctl_compat_header, "net/ipv4/netfilter", - l4proto->ctl_compat_table, NULL); + pn->ctl_compat_table, + NULL); if (err == 0) goto out; - nf_ct_unregister_sysctl(l4proto->ctl_table_header, - l4proto->ctl_table, - l4proto->ctl_table_users); + + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; + nf_ct_unregister_sysctl(&pn->ctl_table_header, + &pn->ctl_table, + &pn->users); } #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ out: @@ -273,25 +306,34 @@ out: return err; } -static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto) +static +void nf_ct_l4proto_unregister_sysctl(struct net *net, + struct nf_conntrack_l4proto *l4proto) { + struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); + if (pn == NULL) + return; #ifdef CONFIG_SYSCTL - if (l4proto->ctl_table_header != NULL && - *l4proto->ctl_table_header != NULL) - nf_ct_unregister_sysctl(l4proto->ctl_table_header, - l4proto->ctl_table, - l4proto->ctl_table_users); + if (pn->ctl_table_header != NULL) + nf_ct_unregister_sysctl(&pn->ctl_table_header, + &pn->ctl_table, + &pn->users); + #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->ctl_compat_table_header != NULL) - nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header, - l4proto->ctl_compat_table, NULL); + if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL) + nf_ct_unregister_sysctl(&pn->ctl_compat_header, + &pn->ctl_compat_table, + NULL); #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#else + pn->users--; #endif /* CONFIG_SYSCTL */ } /* FIXME: Allow NULL functions and sub in pointers to generic for them. --RR */ -int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) +static int +nf_conntrack_l4proto_register_net(struct nf_conntrack_l4proto *l4proto) { int ret = 0; @@ -333,10 +375,6 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) goto out_unlock; } - ret = nf_ct_l4proto_register_sysctl(l4proto); - if (ret < 0) - goto out_unlock; - l4proto->nla_size = 0; if (l4proto->nlattr_size) l4proto->nla_size += l4proto->nlattr_size(); @@ -345,17 +383,34 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); - out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; } -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); -void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) +int nf_conntrack_l4proto_register(struct net *net, + struct nf_conntrack_l4proto *l4proto) { - struct net *net; + int ret = 0; + if (net == &init_net) + ret = nf_conntrack_l4proto_register_net(l4proto); + + if (ret < 0) + return ret; + + if (l4proto->init_net) + ret = l4proto->init_net(net); + if (ret < 0) + return ret; + + return nf_ct_l4proto_register_sysctl(net, l4proto); +} +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); + +static void +nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto) +{ BUG_ON(l4proto->l3proto >= PF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -365,15 +420,21 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) ) != l4proto); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], &nf_conntrack_l4proto_generic); - nf_ct_l4proto_unregister_sysctl(l4proto); mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); +} +void nf_conntrack_l4proto_unregister(struct net *net, + struct nf_conntrack_l4proto *l4proto) +{ + if (net == &init_net) + nf_conntrack_l4proto_unregister_net(l4proto); + + nf_ct_l4proto_unregister_sysctl(net, l4proto); /* Remove all contrack entries for this protocol */ rtnl_lock(); - for_each_net(net) - nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); @@ -383,7 +444,7 @@ int nf_conntrack_proto_init(void) unsigned int i; int err; - err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic); + err = nf_ct_l4proto_register_sysctl(&init_net, &nf_conntrack_l4proto_generic); if (err < 0) return err; @@ -397,7 +458,7 @@ void nf_conntrack_proto_fini(void) { unsigned int i; - nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic); + nf_ct_l4proto_unregister_sysctl(&init_net, &nf_conntrack_l4proto_generic); /* free l3proto protocol tables */ for (i = 0; i < PF_MAX; i++) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ef706a485be..5a8e0372428 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -945,17 +945,17 @@ static int __init nf_conntrack_proto_dccp_init(void) if (err < 0) goto err1; - err = nf_conntrack_l4proto_register(&dccp_proto4); + err = nf_conntrack_l4proto_register(&init_net, &dccp_proto4); if (err < 0) goto err2; - err = nf_conntrack_l4proto_register(&dccp_proto6); + err = nf_conntrack_l4proto_register(&init_net, &dccp_proto6); if (err < 0) goto err3; return 0; err3: - nf_conntrack_l4proto_unregister(&dccp_proto4); + nf_conntrack_l4proto_unregister(&init_net, &dccp_proto4); err2: unregister_pernet_subsys(&dccp_net_ops); err1: @@ -965,8 +965,8 @@ err1: static void __exit nf_conntrack_proto_dccp_fini(void) { unregister_pernet_subsys(&dccp_net_ops); - nf_conntrack_l4proto_unregister(&dccp_proto6); - nf_conntrack_l4proto_unregister(&dccp_proto4); + nf_conntrack_l4proto_unregister(&init_net, &dccp_proto6); + nf_conntrack_l4proto_unregister(&init_net, &dccp_proto4); } module_init(nf_conntrack_proto_dccp_init); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 4bf6b4e4b77..132f0d2d82c 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -396,18 +396,18 @@ static int __init nf_ct_proto_gre_init(void) { int rv; - rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); + rv = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_gre4); if (rv < 0) return rv; rv = register_pernet_subsys(&proto_gre_net_ops); if (rv < 0) - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_gre4); return rv; } static void __exit nf_ct_proto_gre_fini(void) { - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_gre4); unregister_pernet_subsys(&proto_gre_net_ops); } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 996db2fa21f..97bbc20e1a2 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -791,12 +791,12 @@ static int __init nf_conntrack_proto_sctp_init(void) { int ret; - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_sctp4); if (ret) { pr_err("nf_conntrack_l4proto_sctp4: protocol register failed\n"); goto out; } - ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6); + ret = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_sctp6); if (ret) { pr_err("nf_conntrack_l4proto_sctp6: protocol register failed\n"); goto cleanup_sctp4; @@ -805,15 +805,15 @@ static int __init nf_conntrack_proto_sctp_init(void) return ret; cleanup_sctp4: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_sctp4); out: return ret; } static void __exit nf_conntrack_proto_sctp_fini(void) { - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_sctp4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_sctp6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_sctp4); } module_init(nf_conntrack_proto_sctp_init); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4d60a5376aa..fa142a81496 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -299,23 +299,23 @@ static int __init nf_conntrack_proto_udplite_init(void) { int err; - err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite4); + err = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udplite4); if (err < 0) goto err1; - err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite6); + err = nf_conntrack_l4proto_register(&init_net, &nf_conntrack_l4proto_udplite6); if (err < 0) goto err2; return 0; err2: - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udplite4); err1: return err; } static void __exit nf_conntrack_proto_udplite_exit(void) { - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite6); - nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udplite6); + nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udplite4); } module_init(nf_conntrack_proto_udplite_init); -- cgit v1.2.3-70-g09d2 From 524a53e5ad5f34f64ed34281e8b0eca19437db5b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:10 +0000 Subject: netfilter: nf_conntrack: prepare namespace support for l3 protocol trackers This patch prepares the namespace support for layer 3 protocol trackers. Basically, this modifies the following interfaces: * nf_ct_l3proto_[un]register_sysctl. * nf_conntrack_l3proto_[un]register. We add a new nf_ct_l3proto_net is used to get the pernet data of l3proto. This adds rhe new struct nf_ip_net that is used to store the sysctl header and l3proto_ipv4,l4proto_tcp(6),l4proto_udp(6),l4proto_icmp(v6) because the protos such tcp and tcp6 use the same data,so making nf_ip_net as a field of netns_ct is the easiest way to manager it. This patch also adds init_net to struct nf_conntrack_l3proto to initial the layer 3 protocol pernet data. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 9 ++- include/net/netns/conntrack.h | 8 +++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 +- net/netfilter/nf_conntrack_proto.c | 92 ++++++++++++++++++++------ 5 files changed, 91 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 9699c028b74..d6df8c71a7f 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -69,6 +69,9 @@ struct nf_conntrack_l3proto { struct ctl_table *ctl_table; #endif /* CONFIG_SYSCTL */ + /* Init l3proto pernet data */ + int (*init_net)(struct net *net); + /* Module (if any) which this is connected to. */ struct module *me; }; @@ -76,8 +79,10 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; /* Protocol registration. */ -extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); -extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); +extern int nf_conntrack_l3proto_register(struct net *net, + struct nf_conntrack_l3proto *proto); +extern void nf_conntrack_l3proto_unregister(struct net *net, + struct nf_conntrack_l3proto *proto); extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 1f53038b0d1..b2dbcc5cd81 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -20,6 +20,13 @@ struct nf_proto_net { unsigned int users; }; +struct nf_ip_net { +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + struct ctl_table_header *ctl_table_header; + struct ctl_table *ctl_table; +#endif +}; + struct netns_ct { atomic_t count; unsigned int expect_count; @@ -40,6 +47,7 @@ struct netns_ct { unsigned int sysctl_log_invalid; /* Log invalid packets */ int sysctl_auto_assign_helper; bool auto_assign_helper_warned; + struct nf_ip_net nf_ct_proto; #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; struct ctl_table_header *acct_sysctl_header; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 46ec515db12..0c0fb906c19 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -409,7 +409,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_udp; } - ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); + ret = nf_conntrack_l3proto_register(&init_net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { pr_err("nf_conntrack_ipv4: can't register ipv4\n"); goto cleanup_icmp; @@ -432,7 +432,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); #endif cleanup_ipv4: - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv4); cleanup_icmp: nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); cleanup_udp: @@ -451,7 +451,7 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) nf_conntrack_ipv4_compat_fini(); #endif nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv4); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmp); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp4); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp4); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 51ad9f10442..7334cbfd600 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -358,7 +358,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) goto cleanup_udp; } - ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); + ret = nf_conntrack_l3proto_register(&init_net, &nf_conntrack_l3proto_ipv6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register ipv6\n"); goto cleanup_icmpv6; @@ -374,7 +374,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) return ret; cleanup_ipv6: - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv6); cleanup_icmpv6: nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); cleanup_udp: @@ -388,7 +388,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) { synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + nf_conntrack_l3proto_unregister(&init_net, &nf_conntrack_l3proto_ipv6); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_icmpv6); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_udp6); nf_conntrack_l4proto_unregister(&init_net, &nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 7ee31ac0a12..a8daf0faadb 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -168,31 +168,57 @@ static int kill_l4proto(struct nf_conn *i, void *data) nf_ct_l3num(i) == l4proto->l3proto; } -static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto) +static struct nf_ip_net *nf_ct_l3proto_net(struct net *net, + struct nf_conntrack_l3proto *l3proto) +{ + if (l3proto->l3proto == PF_INET) + return &net->ct.nf_ct_proto; + else + return NULL; +} + +static int nf_ct_l3proto_register_sysctl(struct net *net, + struct nf_conntrack_l3proto *l3proto) { int err = 0; + struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); + /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */ + if (in == NULL) + return 0; -#ifdef CONFIG_SYSCTL - if (l3proto->ctl_table != NULL) { - err = nf_ct_register_sysctl(&init_net, - &l3proto->ctl_table_header, +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + if (in->ctl_table != NULL) { + err = nf_ct_register_sysctl(net, + &in->ctl_table_header, l3proto->ctl_table_path, - l3proto->ctl_table, NULL); + in->ctl_table, + NULL); + if (err < 0) { + kfree(in->ctl_table); + in->ctl_table = NULL; + } } #endif return err; } -static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto) +static void nf_ct_l3proto_unregister_sysctl(struct net *net, + struct nf_conntrack_l3proto *l3proto) { -#ifdef CONFIG_SYSCTL - if (l3proto->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&l3proto->ctl_table_header, - &l3proto->ctl_table, NULL); + struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); + + if (in == NULL) + return; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + if (in->ctl_table_header != NULL) + nf_ct_unregister_sysctl(&in->ctl_table_header, + &in->ctl_table, + NULL); #endif } -int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) +static int +nf_conntrack_l3proto_register_net(struct nf_conntrack_l3proto *proto) { int ret = 0; struct nf_conntrack_l3proto *old; @@ -211,10 +237,6 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) goto out_unlock; } - ret = nf_ct_l3proto_register_sysctl(proto); - if (ret < 0) - goto out_unlock; - if (proto->nlattr_tuple_size) proto->nla_size = 3 * proto->nlattr_tuple_size(); @@ -223,13 +245,32 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) out_unlock: mutex_unlock(&nf_ct_proto_mutex); return ret; + } -EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); -void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) +int nf_conntrack_l3proto_register(struct net *net, + struct nf_conntrack_l3proto *proto) { - struct net *net; + int ret = 0; + + if (net == &init_net) + ret = nf_conntrack_l3proto_register_net(proto); + if (ret < 0) + return ret; + + if (proto->init_net) { + ret = proto->init_net(net); + if (ret < 0) + return ret; + } + return nf_ct_l3proto_register_sysctl(net, proto); +} +EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); + +static void +nf_conntrack_l3proto_unregister_net(struct nf_conntrack_l3proto *proto) +{ BUG_ON(proto->l3proto >= AF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -238,15 +279,22 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) ) != proto); rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], &nf_conntrack_l3proto_generic); - nf_ct_l3proto_unregister_sysctl(proto); mutex_unlock(&nf_ct_proto_mutex); synchronize_rcu(); +} + +void nf_conntrack_l3proto_unregister(struct net *net, + struct nf_conntrack_l3proto *proto) +{ + if (net == &init_net) + nf_conntrack_l3proto_unregister_net(proto); + + nf_ct_l3proto_unregister_sysctl(net, proto); /* Remove all contrack entries for this protocol */ rtnl_lock(); - for_each_net(net) - nf_ct_iterate_cleanup(net, kill_l3proto, proto); + nf_ct_iterate_cleanup(net, kill_l3proto, proto); rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); -- cgit v1.2.3-70-g09d2 From 15f585bd76b6bd2974b23c9e69ff038a0826a0be Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:11 +0000 Subject: netfilter: nf_ct_generic: add namespace support This patch adds namespace support for the generic layer 4 protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 4 +-- include/net/netns/conntrack.h | 6 ++++ net/netfilter/nf_conntrack_core.c | 17 ++++------- net/netfilter/nf_conntrack_proto.c | 46 +++++++++++++++++++----------- net/netfilter/nf_conntrack_proto_generic.c | 38 ++++++++++++++++++++++-- 5 files changed, 78 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index aced085132e..d8f5b9f5216 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -28,8 +28,8 @@ extern unsigned int nf_conntrack_in(struct net *net, extern int nf_conntrack_init(struct net *net); extern void nf_conntrack_cleanup(struct net *net); -extern int nf_conntrack_proto_init(void); -extern void nf_conntrack_proto_fini(void); +extern int nf_conntrack_proto_init(struct net *net); +extern void nf_conntrack_proto_fini(struct net *net); extern bool nf_ct_get_tuple(const struct sk_buff *skb, diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index b2dbcc5cd81..0ef8592d48b 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -20,7 +20,13 @@ struct nf_proto_net { unsigned int users; }; +struct nf_generic_net { + struct nf_proto_net pn; + unsigned int timeout; +}; + struct nf_ip_net { + struct nf_generic_net generic; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ac3af97cc46..068f2e0ec58 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1333,7 +1333,6 @@ static void nf_conntrack_cleanup_init_net(void) while (untrack_refs() > 0) schedule(); - nf_conntrack_proto_fini(); #ifdef CONFIG_NF_CONNTRACK_ZONES nf_ct_extend_unregister(&nf_ct_zone_extend); #endif @@ -1372,7 +1371,7 @@ void nf_conntrack_cleanup(struct net *net) netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); - + nf_conntrack_proto_fini(net); nf_conntrack_cleanup_net(net); if (net_eq(net, &init_net)) { @@ -1496,11 +1495,6 @@ static int nf_conntrack_init_init_net(void) printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", NF_CONNTRACK_VERSION, nf_conntrack_htable_size, nf_conntrack_max); - - ret = nf_conntrack_proto_init(); - if (ret < 0) - goto err_proto; - #ifdef CONFIG_NF_CONNTRACK_ZONES ret = nf_ct_extend_register(&nf_ct_zone_extend); if (ret < 0) @@ -1518,9 +1512,7 @@ static int nf_conntrack_init_init_net(void) #ifdef CONFIG_NF_CONNTRACK_ZONES err_extend: - nf_conntrack_proto_fini(); #endif -err_proto: return ret; } @@ -1583,9 +1575,7 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_helper_init(net); if (ret < 0) goto err_helper; - return 0; - err_helper: nf_conntrack_timeout_fini(net); err_timeout: @@ -1622,6 +1612,9 @@ int nf_conntrack_init(struct net *net) if (ret < 0) goto out_init_net; } + ret = nf_conntrack_proto_init(net); + if (ret < 0) + goto out_proto; ret = nf_conntrack_init_net(net); if (ret < 0) goto out_net; @@ -1637,6 +1630,8 @@ int nf_conntrack_init(struct net *net) return 0; out_net: + nf_conntrack_proto_fini(net); +out_proto: if (net_eq(net, &init_net)) nf_conntrack_cleanup_init_net(); out_init_net: diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a8daf0faadb..b095b4aefd7 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -302,10 +302,16 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { - if (l4proto->net_id) - return net_generic(net, *l4proto->net_id); - else - return NULL; + switch (l4proto->l4proto) { + case 255: /* l4proto_generic */ + return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; + default: + if (l4proto->net_id) + return net_generic(net, *l4proto->net_id); + else + return NULL; + } + return NULL; } static @@ -487,28 +493,34 @@ void nf_conntrack_l4proto_unregister(struct net *net, } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); -int nf_conntrack_proto_init(void) +int nf_conntrack_proto_init(struct net *net) { unsigned int i; int err; - - err = nf_ct_l4proto_register_sysctl(&init_net, &nf_conntrack_l4proto_generic); + err = nf_conntrack_l4proto_generic.init_net(net); + if (err < 0) + return err; + err = nf_ct_l4proto_register_sysctl(net, + &nf_conntrack_l4proto_generic); if (err < 0) return err; - for (i = 0; i < AF_MAX; i++) - rcu_assign_pointer(nf_ct_l3protos[i], - &nf_conntrack_l3proto_generic); + if (net == &init_net) { + for (i = 0; i < AF_MAX; i++) + rcu_assign_pointer(nf_ct_l3protos[i], + &nf_conntrack_l3proto_generic); + } return 0; } -void nf_conntrack_proto_fini(void) +void nf_conntrack_proto_fini(struct net *net) { unsigned int i; - - nf_ct_l4proto_unregister_sysctl(&init_net, &nf_conntrack_l4proto_generic); - - /* free l3proto protocol tables */ - for (i = 0; i < PF_MAX; i++) - kfree(nf_ct_protos[i]); + nf_ct_l4proto_unregister_sysctl(net, + &nf_conntrack_l4proto_generic); + if (net == &init_net) { + /* free l3proto protocol tables */ + for (i = 0; i < PF_MAX; i++) + kfree(nf_ct_protos[i]); + } } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d8923d54b35..19bc880eb4e 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -14,6 +14,11 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +static inline struct nf_generic_net *generic_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.generic; +} + static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -42,7 +47,7 @@ static int generic_print_tuple(struct seq_file *s, static unsigned int *generic_get_timeouts(struct net *net) { - return &nf_ct_generic_timeout; + return &(generic_pernet(net)->timeout); } /* Returns verdict for packet, or -1 for invalid. */ @@ -110,7 +115,6 @@ static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { { .procname = "nf_conntrack_generic_timeout", - .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -121,7 +125,6 @@ static struct ctl_table generic_sysctl_table[] = { static struct ctl_table generic_compat_sysctl_table[] = { { .procname = "ip_conntrack_generic_timeout", - .data = &nf_ct_generic_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -131,6 +134,34 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int generic_init_net(struct net *net) +{ + struct nf_generic_net *gn = generic_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)gn; + gn->timeout = nf_ct_generic_timeout; +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(generic_sysctl_table, + sizeof(generic_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &gn->timeout; + +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table, + sizeof(generic_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) { + kfree(pn->ctl_table); + pn->ctl_table = NULL; + return -ENOMEM; + } + pn->ctl_compat_table[0].data = &gn->timeout; +#endif +#endif + return 0; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, @@ -158,4 +189,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .ctl_compat_table = generic_compat_sysctl_table, #endif #endif + .init_net = generic_init_net, }; -- cgit v1.2.3-70-g09d2 From d2ba1fde42af44fbce361202e9af13daff9e4381 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:12 +0000 Subject: netfilter: nf_ct_tcp: add namespace support This patch adds namespace support for TCP protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 10 ++ net/netfilter/nf_conntrack_proto.c | 2 + net/netfilter/nf_conntrack_proto_tcp.c | 162 +++++++++++++++++++++++++++------ 3 files changed, 145 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0ef8592d48b..680d799ece8 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -4,6 +4,7 @@ #include #include #include +#include struct ctl_table_header; struct nf_conntrack_ecache; @@ -25,8 +26,17 @@ struct nf_generic_net { unsigned int timeout; }; +struct nf_tcp_net { + struct nf_proto_net pn; + unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX]; + unsigned int tcp_loose; + unsigned int tcp_be_liberal; + unsigned int tcp_max_retrans; +}; + struct nf_ip_net { struct nf_generic_net generic; + struct nf_tcp_net tcp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b095b4aefd7..8a71e8bb0d6 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -303,6 +303,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { switch (l4proto->l4proto) { + case IPPROTO_TCP: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 21ff1a99f53..a053f6786b3 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -270,6 +270,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { } }; +static inline struct nf_tcp_net *tcp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp; +} + static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { @@ -516,6 +521,7 @@ static bool tcp_in_window(const struct nf_conn *ct, u_int8_t pf) { struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -720,7 +726,7 @@ static bool tcp_in_window(const struct nf_conn *ct, } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || - nf_ct_tcp_be_liberal) + tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, @@ -828,6 +834,7 @@ static int tcp_packet(struct nf_conn *ct, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; @@ -1020,7 +1027,7 @@ static int tcp_packet(struct nf_conn *ct, && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && + if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & @@ -1065,6 +1072,8 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, enum tcp_conntrack new_state; const struct tcphdr *th; struct tcphdr _tcph; + struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; @@ -1093,7 +1102,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[0].td_end; tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); - } else if (nf_ct_tcp_loose == 0) { + } else if (tn->tcp_loose == 0) { /* Don't try to pick up connections. */ return false; } else { @@ -1360,91 +1369,78 @@ static struct ctl_table_header *tcp_sysctl_header; static struct ctl_table tcp_sysctl_table[] = { { .procname = "nf_conntrack_tcp_timeout_syn_sent", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_syn_recv", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_established", - .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_fin_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_last_ack", - .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_time_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_max_retrans", - .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_unacknowledged", - .data = &tcp_timeouts[TCP_CONNTRACK_UNACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_loose", - .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_tcp_be_liberal", - .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_tcp_max_retrans", - .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -1456,91 +1452,78 @@ static struct ctl_table tcp_sysctl_table[] = { static struct ctl_table tcp_compat_sysctl_table[] = { { .procname = "ip_conntrack_tcp_timeout_syn_sent", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_sent2", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_established", - .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_fin_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_last_ack", - .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_time_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_max_retrans", - .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_loose", - .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_tcp_be_liberal", - .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_tcp_max_retrans", - .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -1550,6 +1533,125 @@ static struct ctl_table tcp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL + struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; + + if (pn->ctl_table) + return 0; + + pn->ctl_table = kmemdup(tcp_sysctl_table, + sizeof(tcp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; + pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; + pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; + pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; + pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; + pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; + pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; + pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; + pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; + pn->ctl_table[10].data = &tn->tcp_loose; + pn->ctl_table[11].data = &tn->tcp_be_liberal; + pn->ctl_table[12].data = &tn->tcp_max_retrans; +#endif + return 0; +} + +static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; + pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, + sizeof(tcp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) + return -ENOMEM; + + pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; + pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2]; + pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; + pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; + pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; + pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; + pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; + pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; + pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; + pn->ctl_compat_table[10].data = &tn->tcp_loose; + pn->ctl_compat_table[11].data = &tn->tcp_be_liberal; + pn->ctl_compat_table[12].data = &tn->tcp_max_retrans; +#endif +#endif + return 0; +} + +static int tcpv4_init_net(struct net *net) +{ + int i; + int ret = 0; + struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)tn; + +#ifdef CONFIG_SYSCTL + if (!pn->ctl_table) { +#else + if (!pn->user++) { +#endif + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; + + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; + } + + ret = tcp_kmemdup_compat_sysctl_table(pn); + + if (ret < 0) + return ret; + + ret = tcp_kmemdup_sysctl_table(pn); + +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + if (ret < 0) { + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; + } +#endif +#endif + return ret; +} + +static int tcpv6_init_net(struct net *net) +{ + int i; + struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)tn; + +#ifdef CONFIG_SYSCTL + if (!pn->ctl_table) { +#else + if (!pn->user++) { +#endif + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; + } + + return tcp_kmemdup_sysctl_table(pn); +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, @@ -1590,6 +1692,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .ctl_compat_table = tcp_compat_sysctl_table, #endif #endif + .init_net = tcpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); @@ -1630,5 +1733,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .ctl_table_header = &tcp_sysctl_header, .ctl_table = tcp_sysctl_table, #endif + .init_net = tcpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); -- cgit v1.2.3-70-g09d2 From 0ce490ad4387a67ee8ca5253476272d508fc0b6f Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:13 +0000 Subject: netfilter: nf_ct_udp: add namespace support This patch adds namespace support for UDP protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 12 ++++ net/netfilter/nf_conntrack_proto.c | 2 + net/netfilter/nf_conntrack_proto_udp.c | 100 +++++++++++++++++++++++++++++---- 3 files changed, 103 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 680d799ece8..7bd14ab8ce1 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -34,9 +34,21 @@ struct nf_tcp_net { unsigned int tcp_max_retrans; }; +enum udp_conntrack { + UDP_CT_UNREPLIED, + UDP_CT_REPLIED, + UDP_CT_MAX +}; + +struct nf_udp_net { + struct nf_proto_net pn; + unsigned int timeouts[UDP_CT_MAX]; +}; + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; + struct nf_udp_net udp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 8a71e8bb0d6..9c6aee51dea 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -305,6 +305,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, switch (l4proto->l4proto) { case IPPROTO_TCP: return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; + case IPPROTO_UDP: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 7259a6bdeb4..f56c8905ddf 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -25,17 +25,16 @@ #include #include -enum udp_conntrack { - UDP_CT_UNREPLIED, - UDP_CT_REPLIED, - UDP_CT_MAX -}; - static unsigned int udp_timeouts[UDP_CT_MAX] = { [UDP_CT_UNREPLIED] = 30*HZ, [UDP_CT_REPLIED] = 180*HZ, }; +static inline struct nf_udp_net *udp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.udp; +} + static bool udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -73,7 +72,7 @@ static int udp_print_tuple(struct seq_file *s, static unsigned int *udp_get_timeouts(struct net *net) { - return udp_timeouts; + return udp_pernet(net)->timeouts; } /* Returns verdict for packet, and may modify conntracktype */ @@ -205,14 +204,12 @@ static struct ctl_table_header *udp_sysctl_header; static struct ctl_table udp_sysctl_table[] = { { .procname = "nf_conntrack_udp_timeout", - .data = &udp_timeouts[UDP_CT_UNREPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_udp_timeout_stream", - .data = &udp_timeouts[UDP_CT_REPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -223,14 +220,12 @@ static struct ctl_table udp_sysctl_table[] = { static struct ctl_table udp_compat_sysctl_table[] = { { .procname = "ip_conntrack_udp_timeout", - .data = &udp_timeouts[UDP_CT_UNREPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_udp_timeout_stream", - .data = &udp_timeouts[UDP_CT_REPLIED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -240,6 +235,87 @@ static struct ctl_table udp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL + struct nf_udp_net *un = (struct nf_udp_net *)pn; + if (pn->ctl_table) + return 0; + pn->ctl_table = kmemdup(udp_sysctl_table, + sizeof(udp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; + pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED]; +#endif + return 0; +} + +static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + struct nf_udp_net *un = (struct nf_udp_net *)pn; + pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table, + sizeof(udp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) + return -ENOMEM; + + pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; + pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED]; +#endif +#endif + return 0; +} + +static void udp_init_net_data(struct nf_udp_net *un) +{ + int i; +#ifdef CONFIG_SYSCTL + if (!un->pn.ctl_table) { +#else + if (!un->pn.user++) { +#endif + for (i = 0; i < UDP_CT_MAX; i++) + un->timeouts[i] = udp_timeouts[i]; + } +} + +static int udpv4_init_net(struct net *net) +{ + int ret; + struct nf_udp_net *un = udp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)un; + + udp_init_net_data(un); + + ret = udp_kmemdup_compat_sysctl_table(pn); + if (ret < 0) + return ret; + + ret = udp_kmemdup_sysctl_table(pn); +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + if (ret < 0) { + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; + } +#endif +#endif + return ret; +} + +static int udpv6_init_net(struct net *net) +{ + struct nf_udp_net *un = udp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)un; + + udp_init_net_data(un); + return udp_kmemdup_sysctl_table(pn); +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, @@ -275,6 +351,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .ctl_compat_table = udp_compat_sysctl_table, #endif #endif + .init_net = udpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); @@ -310,5 +387,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .ctl_table_header = &udp_sysctl_header, .ctl_table = udp_sysctl_table, #endif + .init_net = udpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); -- cgit v1.2.3-70-g09d2 From 4b626b9c5d35b4f99b073dc5d6457abddcbcf429 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:14 +0000 Subject: netfilter: nf_ct_icmp: add namespace support This patch adds namespace support for ICMP protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 6 +++++ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 38 +++++++++++++++++++++++++--- net/netfilter/nf_conntrack_proto.c | 2 ++ 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 7bd14ab8ce1..3d8e9e3b08a 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -45,10 +45,16 @@ struct nf_udp_net { unsigned int timeouts[UDP_CT_MAX]; }; +struct nf_icmp_net { + struct nf_proto_net pn; + unsigned int timeout; +}; + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; struct nf_udp_net udp; + struct nf_icmp_net icmp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 0847e373d33..a0eabeb36b9 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -23,6 +23,11 @@ static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; +static inline struct nf_icmp_net *icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} + static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { @@ -77,7 +82,7 @@ static int icmp_print_tuple(struct seq_file *s, static unsigned int *icmp_get_timeouts(struct net *net) { - return &nf_ct_icmp_timeout; + return &icmp_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -312,7 +317,6 @@ static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { { .procname = "nf_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -323,7 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = { static struct ctl_table icmp_compat_sysctl_table[] = { { .procname = "ip_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -333,6 +336,34 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int icmp_init_net(struct net *net) +{ + struct nf_icmp_net *in = icmp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)in; + in->timeout = nf_ct_icmp_timeout; + +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmp_sysctl_table, + sizeof(icmp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &in->timeout; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, + sizeof(icmp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) { + kfree(pn->ctl_table); + pn->ctl_table = NULL; + return -ENOMEM; + } + pn->ctl_compat_table[0].data = &in->timeout; +#endif +#endif + return 0; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, @@ -369,4 +400,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .ctl_compat_table = icmp_compat_sysctl_table, #endif #endif + .init_net = icmp_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9c6aee51dea..dbade5f2b1d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -307,6 +307,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; case IPPROTO_UDP: return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; + case IPPROTO_ICMP: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: -- cgit v1.2.3-70-g09d2 From 7080ba0955438ecd2885c1b73fbd9760b1594a41 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:15 +0000 Subject: netfilter: nf_ct_icmp: add namespace support This patch adds namespace support for ICMPv6 protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 + net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 25 +++++++++++++++++++++++-- net/netfilter/nf_conntrack_proto.c | 2 ++ 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 3d8e9e3b08a..3aecdc7a84f 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -55,6 +55,7 @@ struct nf_ip_net { struct nf_tcp_net tcp; struct nf_udp_net udp; struct nf_icmp_net icmp; + struct nf_icmp_net icmpv6; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 3e81904fbbc..f606355200d 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -29,6 +29,11 @@ static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6; +} + static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) @@ -90,7 +95,7 @@ static int icmpv6_print_tuple(struct seq_file *s, static unsigned int *icmpv6_get_timeouts(struct net *net) { - return &nf_ct_icmpv6_timeout; + return &icmpv6_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -319,7 +324,6 @@ static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { { .procname = "nf_conntrack_icmpv6_timeout", - .data = &nf_ct_icmpv6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -328,6 +332,22 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ +static int icmpv6_init_net(struct net *net) +{ + struct nf_icmp_net *in = icmpv6_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)in; + in->timeout = nf_ct_icmpv6_timeout; +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmpv6_sysctl_table, + sizeof(icmpv6_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + pn->ctl_table[0].data = &in->timeout; +#endif + return 0; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, @@ -359,4 +379,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .ctl_table_header = &icmpv6_sysctl_header, .ctl_table = icmpv6_sysctl_table, #endif + .init_net = icmpv6_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index dbade5f2b1d..1ea919450fc 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -309,6 +309,8 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; case IPPROTO_ICMP: return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp; + case IPPROTO_ICMPV6: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmpv6; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: -- cgit v1.2.3-70-g09d2 From e76d0af5e45f4152e3fdcc103b753a8aff93fcb5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 Jun 2012 18:35:48 +0200 Subject: netfilter: nf_conntrack: remove now unused sysctl for nf_conntrack_l[3|4]proto Since the sysctl data for l[3|4]proto now resides in pernet nf_proto_net. We can now remove this unused fields from struct nf_contrack_l[3,4]proto. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 2 -- include/net/netfilter/nf_conntrack_l4proto.h | 10 ---------- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 1 - net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 -------- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 5 ----- net/netfilter/nf_conntrack_proto_generic.c | 8 -------- net/netfilter/nf_conntrack_proto_sctp.c | 15 --------------- net/netfilter/nf_conntrack_proto_tcp.c | 15 --------------- net/netfilter/nf_conntrack_proto_udp.c | 15 --------------- net/netfilter/nf_conntrack_proto_udplite.c | 12 ------------ 10 files changed, 91 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index d6df8c71a7f..6f7c13f4ac0 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,9 +64,7 @@ struct nf_conntrack_l3proto { size_t nla_size; #ifdef CONFIG_SYSCTL - struct ctl_table_header *ctl_table_header; const char *ctl_table_path; - struct ctl_table *ctl_table; #endif /* CONFIG_SYSCTL */ /* Init l3proto pernet data */ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index d621c91de5c..cfa2f89b031 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -93,16 +93,6 @@ struct nf_conntrack_l4proto { unsigned int nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; -#endif - -#ifdef CONFIG_SYSCTL - struct ctl_table_header **ctl_table_header; - struct ctl_table *ctl_table; - unsigned int *ctl_table_users; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct ctl_table_header *ctl_compat_table_header; - struct ctl_table *ctl_compat_table; -#endif #endif int *net_id; /* Init l4proto pernet data */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5c66203af51..d79b961a800 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -380,7 +380,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { #endif #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) .ctl_table_path = "net/ipv4/netfilter", - .ctl_table = ip_ct_sysctl_table, #endif .init_net = ipv4_init_net, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index a0eabeb36b9..2bca7a5e422 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -313,7 +313,6 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { { .procname = "nf_conntrack_icmp_timeout", @@ -393,12 +392,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .nla_policy = icmp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &icmp_sysctl_header, - .ctl_table = icmp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = icmp_compat_sysctl_table, -#endif -#endif .init_net = icmp_init_net, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index f606355200d..1b7818f15f3 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -320,7 +320,6 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { { .procname = "nf_conntrack_icmpv6_timeout", @@ -375,9 +374,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .nla_policy = icmpv6_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &icmpv6_sysctl_header, - .ctl_table = icmpv6_sysctl_table, -#endif .init_net = icmpv6_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 19bc880eb4e..e4e2d2a38d3 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -111,7 +111,6 @@ generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { { .procname = "nf_conntrack_generic_timeout", @@ -182,12 +181,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .nla_policy = generic_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_header = &generic_sysctl_header, - .ctl_table = generic_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = generic_compat_sysctl_table, -#endif -#endif .init_net = generic_init_net, }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 9e5738db34d..d785f2c4182 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -610,8 +610,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { #ifdef CONFIG_SYSCTL -static unsigned int sctp_sysctl_table_users; -static struct ctl_table_header *sctp_sysctl_header; static struct ctl_table sctp_sysctl_table[] = { { .procname = "nf_conntrack_sctp_timeout_closed", @@ -832,14 +830,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &sctp_sysctl_table_users, - .ctl_table_header = &sctp_sysctl_header, - .ctl_table = sctp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = sctp_compat_sysctl_table, -#endif -#endif .net_id = &sctp_net_id, .init_net = sctpv4_init_net, }; @@ -873,11 +863,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#endif -#ifdef CONFIG_SYSCTL - .ctl_table_users = &sctp_sysctl_table_users, - .ctl_table_header = &sctp_sysctl_header, - .ctl_table = sctp_sysctl_table, #endif .net_id = &sctp_net_id, .init_net = sctpv6_init_net, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a053f6786b3..e57f2a888da 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1364,8 +1364,6 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static unsigned int tcp_sysctl_table_users; -static struct ctl_table_header *tcp_sysctl_header; static struct ctl_table tcp_sysctl_table[] = { { .procname = "nf_conntrack_tcp_timeout_syn_sent", @@ -1684,14 +1682,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &tcp_sysctl_table_users, - .ctl_table_header = &tcp_sysctl_header, - .ctl_table = tcp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = tcp_compat_sysctl_table, -#endif -#endif .init_net = tcpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); @@ -1728,11 +1718,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &tcp_sysctl_table_users, - .ctl_table_header = &tcp_sysctl_header, - .ctl_table = tcp_sysctl_table, -#endif .init_net = tcpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index f56c8905ddf..db7abad44bc 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -199,8 +199,6 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static unsigned int udp_sysctl_table_users; -static struct ctl_table_header *udp_sysctl_header; static struct ctl_table udp_sysctl_table[] = { { .procname = "nf_conntrack_udp_timeout", @@ -343,14 +341,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udp_sysctl_table_users, - .ctl_table_header = &udp_sysctl_header, - .ctl_table = udp_sysctl_table, -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - .ctl_compat_table = udp_compat_sysctl_table, -#endif -#endif .init_net = udpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); @@ -382,11 +372,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .nla_policy = udp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udp_sysctl_table_users, - .ctl_table_header = &udp_sysctl_header, - .ctl_table = udp_sysctl_table, -#endif .init_net = udpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 7f85b0850a4..2e25e985e8c 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -215,8 +215,6 @@ udplite_timeout_nla_policy[CTA_TIMEOUT_UDPLITE_MAX+1] = { #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL -static unsigned int udplite_sysctl_table_users; -static struct ctl_table_header *udplite_sysctl_header; static struct ctl_table udplite_sysctl_table[] = { { .procname = "nf_conntrack_udplite_timeout", @@ -287,11 +285,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udplite_sysctl_table_users, - .ctl_table_header = &udplite_sysctl_header, - .ctl_table = udplite_sysctl_table, -#endif .net_id = &udplite_net_id, .init_net = udplite_init_net, }; @@ -324,11 +317,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -#ifdef CONFIG_SYSCTL - .ctl_table_users = &udplite_sysctl_table_users, - .ctl_table_header = &udplite_sysctl_header, - .ctl_table = udplite_sysctl_table, -#endif .net_id = &udplite_net_id, .init_net = udplite_init_net, }; -- cgit v1.2.3-70-g09d2 From 8264deb81853462da5cbcfb19b54c4fd9f3d88ba Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:23 +0000 Subject: netfilter: nf_conntrack: add namespace support for cttimeout This patch adds namespace support for cttimeout. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 3 ++- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 ++++-- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++++-- net/netfilter/nf_conntrack_proto_dccp.c | 5 +++-- net/netfilter/nf_conntrack_proto_generic.c | 6 ++++-- net/netfilter/nf_conntrack_proto_gre.c | 8 +++++--- net/netfilter/nf_conntrack_proto_sctp.c | 6 ++++-- net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++-- net/netfilter/nf_conntrack_proto_udp.c | 8 +++++--- net/netfilter/nf_conntrack_proto_udplite.c | 8 +++++--- net/netfilter/nfnetlink_cttimeout.c | 13 ++++++++----- 11 files changed, 48 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index cfa2f89b031..81c52b5205f 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -87,7 +87,8 @@ struct nf_conntrack_l4proto { #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { size_t obj_size; - int (*nlattr_to_obj)(struct nlattr *tb[], void *data); + int (*nlattr_to_obj)(struct nlattr *tb[], + struct net *net, void *data); int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); unsigned int nlattr_max; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 2bca7a5e422..041923cb67a 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -279,16 +279,18 @@ static int icmp_nlattr_tuple_size(void) #include #include -static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_icmp_net *in = icmp_pernet(net); if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; } else { /* Set default ICMP timeout. */ - *timeout = nf_ct_icmp_timeout; + *timeout = in->timeout; } return 0; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 1b7818f15f3..63ed0121836 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -286,16 +286,18 @@ static int icmpv6_nlattr_tuple_size(void) #include #include -static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_icmp_net *in = icmpv6_pernet(net); if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; } else { /* Set default ICMPv6 timeout. */ - *timeout = nf_ct_icmpv6_timeout; + *timeout = in->timeout; } return 0; } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8d798a613e3..c33f76af913 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -712,9 +712,10 @@ static int dccp_nlattr_size(void) #include #include -static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { - struct dccp_net *dn = dccp_pernet(&init_net); + struct dccp_net *dn = dccp_pernet(net); unsigned int *timeouts = data; int i; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e4e2d2a38d3..bb0e74fe0fa 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -75,16 +75,18 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, #include #include -static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeout = data; + struct nf_generic_net *gn = generic_pernet(net); if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; else { /* Set default generic timeout. */ - *timeout = nf_ct_generic_timeout; + *timeout = gn->timeout; } return 0; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index e36973f9ef5..25ba5a2f5ed 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -304,13 +304,15 @@ static void gre_destroy(struct nf_conn *ct) #include #include -static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct netns_proto_gre *net_gre = gre_pernet(net); /* set default timeouts for GRE. */ - timeouts[GRE_CT_UNREPLIED] = gre_timeouts[GRE_CT_UNREPLIED]; - timeouts[GRE_CT_REPLIED] = gre_timeouts[GRE_CT_REPLIED]; + timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; + timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { timeouts[GRE_CT_UNREPLIED] = diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index d785f2c4182..8fb0582ad39 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -562,14 +562,16 @@ static int sctp_nlattr_size(void) #include #include -static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct sctp_net *sn = sctp_pernet(net); int i; /* set default SCTP timeouts. */ for (i=0; itimeouts[i]; /* there's a 1:1 mapping between attributes and protocol states. */ for (i=CTA_TIMEOUT_SCTP_UNSPEC+1; i #include -static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct nf_tcp_net *tn = tcp_pernet(net); int i; /* set default TCP timeouts. */ for (i=0; itimeouts[i]; if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) { timeouts[TCP_CONNTRACK_SYN_SENT] = diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index db7abad44bc..360565a95de 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -156,13 +156,15 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, #include #include -static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct nf_udp_net *un = udp_pernet(net); /* set default timeouts for UDP. */ - timeouts[UDP_CT_UNREPLIED] = udp_timeouts[UDP_CT_UNREPLIED]; - timeouts[UDP_CT_REPLIED] = udp_timeouts[UDP_CT_REPLIED]; + timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; + timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { timeouts[UDP_CT_UNREPLIED] = diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 2e25e985e8c..b32e700f8dd 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -172,13 +172,15 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, #include #include -static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) { unsigned int *timeouts = data; + struct udplite_net *un = udplite_pernet(net); /* set default timeouts for UDPlite. */ - timeouts[UDPLITE_CT_UNREPLIED] = udplite_timeouts[UDPLITE_CT_UNREPLIED]; - timeouts[UDPLITE_CT_REPLIED] = udplite_timeouts[UDPLITE_CT_REPLIED]; + timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; + timeouts[UDPLITE_CT_REPLIED] = un->timeouts[UDPLITE_CT_REPLIED]; if (tb[CTA_TIMEOUT_UDPLITE_UNREPLIED]) { timeouts[UDPLITE_CT_UNREPLIED] = diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 3e655288d1d..cdecbc8fe96 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -49,8 +49,9 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { static int ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, - struct nf_conntrack_l4proto *l4proto, - const struct nlattr *attr) + struct nf_conntrack_l4proto *l4proto, + struct net *net, + const struct nlattr *attr) { int ret = 0; @@ -60,7 +61,8 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, attr, l4proto->ctnl_timeout.nla_policy); - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, &timeout->data); + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, + &timeout->data); } return ret; } @@ -74,6 +76,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, __u8 l4num; struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; + struct net *net = sock_net(skb->sk); char *name; int ret; @@ -117,7 +120,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(matching, l4proto, + ret = ctnl_timeout_parse_policy(matching, l4proto, net, cda[CTA_TIMEOUT_DATA]); return ret; } @@ -132,7 +135,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(timeout, l4proto, + ret = ctnl_timeout_parse_policy(timeout, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; -- cgit v1.2.3-70-g09d2 From c8a627ed06d6d49bf65015a2185c519335c4c83f Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 8 Jun 2012 01:20:41 +0000 Subject: inetpeer: add namespace support for inetpeer now inetpeer doesn't support namespace,the information will be leaking across namespace. this patch move the global vars v4_peers and v6_peers to netns_ipv4 and netns_ipv6 as a field peers. add struct pernet_operations inetpeer_ops to initial pernet inetpeer data. and change family_to_base and inet_getpeer to support namespace. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/net/inetpeer.h | 10 ++++--- include/net/netns/ipv4.h | 2 +- include/net/netns/ipv6.h | 1 + net/ipv4/inetpeer.c | 68 +++++++++++++++++++++++++++++++++++------------- net/ipv4/route.c | 2 +- 5 files changed, 59 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 2040bff945d..fef9dfab5d4 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -75,7 +75,9 @@ static inline bool inet_metrics_new(const struct inet_peer *p) } /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create); +struct inet_peer *inet_getpeer(struct net *net, + const struct inetpeer_addr *daddr, + int create); static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) { @@ -83,7 +85,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(&daddr, create); + return inet_getpeer(&init_net, &daddr, create); } static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create) @@ -92,14 +94,14 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(&daddr, create); + return inet_getpeer(&init_net, &daddr, create); } /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); -extern void inetpeer_invalidate_tree(int family); +extern void inetpeer_invalidate_tree(struct net *net, int family); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index bbd023a1c9b..227f0cd9d3f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -30,7 +30,7 @@ struct netns_ipv4 { struct sock **icmp_sk; struct sock *tcp_sock; - + struct inet_peer_base *peers; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index b42be53587b..df0a5456a3f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -33,6 +33,7 @@ struct netns_ipv6 { struct netns_sysctl_ipv6 sysctl; struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; + struct inet_peer_base *peers; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *ip6table_filter; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index dfba343b250..1c8527349c8 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -88,18 +88,6 @@ struct inet_peer_base { int total; }; -static struct inet_peer_base v4_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), - .total = 0, -}; - -static struct inet_peer_base v6_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), - .total = 0, -}; - #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -153,6 +141,46 @@ static void inetpeer_gc_worker(struct work_struct *work) schedule_delayed_work(&gc_work, gc_delay); } +static int __net_init inetpeer_net_init(struct net *net) +{ + net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv4.peers == NULL) + return -ENOMEM; + + net->ipv4.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv4.peers->lock); + + net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv6.peers == NULL) + goto out_ipv6; + + net->ipv6.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv6.peers->lock); + + return 0; +out_ipv6: + kfree(net->ipv4.peers); + return -ENOMEM; +} + +static void __net_exit inetpeer_net_exit(struct net *net) +{ + inetpeer_invalidate_tree(net, AF_INET); + kfree(net->ipv4.peers); + net->ipv4.peers = NULL; + + inetpeer_invalidate_tree(net, AF_INET6); + kfree(net->ipv6.peers); + net->ipv6.peers = NULL; +} + +static struct pernet_operations inetpeer_ops = { + .init = inetpeer_net_init, + .exit = inetpeer_net_exit, +}; + /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -177,6 +205,7 @@ void __init inet_initpeers(void) NULL); INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); + register_pernet_subsys(&inetpeer_ops); } static int addr_compare(const struct inetpeer_addr *a, @@ -401,9 +430,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(int family) +static struct inet_peer_base *family_to_base(struct net *net, + int family) { - return family == AF_INET ? &v4_peers : &v6_peers; + return family == AF_INET ? net->ipv4.peers : net->ipv6.peers; } /* perform garbage collect on all items stacked during a lookup */ @@ -443,10 +473,12 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) +struct inet_peer *inet_getpeer(struct net *net, + const struct inetpeer_addr *daddr, + int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(daddr->family); + struct inet_peer_base *base = family_to_base(net, daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; @@ -571,10 +603,10 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void inetpeer_invalidate_tree(int family) +void inetpeer_invalidate_tree(struct net *net, int family) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(family); + struct inet_peer_base *base = family_to_base(net, family); write_seqlock_bh(&base->lock); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 98b30d08efe..006c21cc232 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -938,7 +938,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(AF_INET); + inetpeer_invalidate_tree(net, AF_INET); } /* -- cgit v1.2.3-70-g09d2 From 54db0cc2ba0d38166acc2d6bae21721405305537 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 8 Jun 2012 01:21:40 +0000 Subject: inetpeer: add parameter net for inet_getpeer_v4,v6 add struct net as a parameter of inet_getpeer_v[4,6], use net to replace &init_net. and modify some places to provide net for inet_getpeer_v[4,6] Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/net/inetpeer.h | 12 ++++++++---- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 6 +++++- net/ipv4/route.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 6 ++++-- net/ipv6/route.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 ++++-- 7 files changed, 29 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index fef9dfab5d4..20e67db69ac 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -79,22 +79,26 @@ struct inet_peer *inet_getpeer(struct net *net, const struct inetpeer_addr *daddr, int create); -static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) +static inline struct inet_peer *inet_getpeer_v4(struct net *net, + __be32 v4daddr, + int create) { struct inetpeer_addr daddr; daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(&init_net, &daddr, create); + return inet_getpeer(net, &daddr, create); } -static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create) +static inline struct inet_peer *inet_getpeer_v6(struct net *net, + const struct in6_addr *v6daddr, + int create) { struct inetpeer_addr daddr; *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(&init_net, &daddr, create); + return inet_getpeer(net, &daddr, create); } /* can be called from BH context or outside */ diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5ff2a51b6d0..85190e69297 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, if (q == NULL) return NULL; + q->net = nf; f->constructor(q, arg); atomic_add(f->qsize, &nf->mem); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); atomic_set(&q->refcnt, 1); - q->net = nf; return q; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9dbd3dd6022..22c6bab9717 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) static void ip4_frag_init(struct inet_frag_queue *q, void *a) { struct ipq *qp = container_of(q, struct ipq, q); + struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, + frags); + struct net *net = container_of(ipv4, struct net, ipv4); + struct ip4_create_arg *arg = a; qp->protocol = arg->iph->protocol; @@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 006c21cc232..2c9f73f3b7c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1328,9 +1328,10 @@ static u32 rt_peer_genid(void) void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v4(daddr, create); + peer = inet_getpeer_v4(net, daddr, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -1694,7 +1695,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, unsigned short est_mtu = 0; struct inet_peer *peer; - peer = inet_getpeer_v4(iph->daddr, 1); + peer = inet_getpeer_v4(net, iph->daddr, 1); if (peer) { unsigned short mtu = new_mtu; @@ -1935,6 +1936,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; int create = 0; @@ -1944,7 +1946,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); + rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create); if (peer) { rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3d9c1a4b881..f485b451f92 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1824,11 +1824,12 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) { struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); struct inet_peer *peer; if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { - peer = inet_getpeer_v4(inet->inet_daddr, 1); + peer = inet_getpeer_v4(net, inet->inet_daddr, 1); *release_it = true; } else { if (!rt->peer) @@ -1844,8 +1845,9 @@ EXPORT_SYMBOL(tcp_v4_get_peer); void *tcp_v4_tw_get_peer(struct sock *sk) { const struct inet_timewait_sock *tw = inet_twsk(sk); + struct net *net = sock_net(sk); - return inet_getpeer_v4(tw->tw_daddr, 1); + return inet_getpeer_v4(net, tw->tw_daddr, 1); } EXPORT_SYMBOL(tcp_v4_tw_get_peer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 999a982ad3f..4eca0130cce 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -306,9 +306,10 @@ static u32 rt6_peer_genid(void) void rt6_bind_peer(struct rt6_info *rt, int create) { + struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); + peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); else diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 80758255556..1a9cdd09f11 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1736,11 +1736,12 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct inet_peer *peer; if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { - peer = inet_getpeer_v6(&np->daddr, 1); + peer = inet_getpeer_v6(net, &np->daddr, 1); *release_it = true; } else { if (!rt->rt6i_peer) @@ -1756,11 +1757,12 @@ static void *tcp_v6_tw_get_peer(struct sock *sk) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); const struct inet_timewait_sock *tw = inet_twsk(sk); + struct net *net = sock_net(sk); if (tw->tw_family == AF_INET) return tcp_v4_tw_get_peer(sk); - return inet_getpeer_v6(&tw6->tw_v6_daddr, 1); + return inet_getpeer_v6(net, &tw6->tw_v6_daddr, 1); } static struct timewait_sock_ops tcp6_timewait_sock_ops = { -- cgit v1.2.3-70-g09d2 From 7123aaa3a1416529ce461e98108e6b343b294643 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Jun 2012 05:03:21 +0000 Subject: af_unix: speedup /proc/net/unix /proc/net/unix has quadratic behavior, and can hold unix_table_lock for a while if high number of unix sockets are alive. (90 ms for 200k sockets...) We already have a hash table, so its quite easy to use it. Problem is unbound sockets are still hashed in a single hash slot (unix_socket_table[UNIX_HASH_TABLE]) This patch also spreads unbound sockets to 256 hash slots, to speedup both /proc/net/unix and unix_diag. Time to read /proc/net/unix with 200k unix sockets : (time dd if=/proc/net/unix of=/dev/null bs=4k) before : 520 secs after : 2 secs Signed-off-by: Eric Dumazet Cc: Steven Whitehouse Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/af_unix.h | 3 +- net/unix/af_unix.c | 110 +++++++++++++++++++++++++++++--------------------- net/unix/diag.c | 6 ++- 3 files changed, 70 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2ee33da36a7..b5f8988e428 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -14,10 +14,11 @@ extern struct sock *unix_get_socket(struct file *filp); extern struct sock *unix_peer_get(struct sock *); #define UNIX_HASH_SIZE 256 +#define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; extern spinlock_t unix_table_lock; -extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { atomic_t refcnt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 641f2e47f16..cf83f6b5ac9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,15 +115,24 @@ #include #include -struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct hlist_head *unix_sockets_unbound(void *addr) +{ + unsigned long hash = (unsigned long)addr; + + hash ^= hash >> 16; + hash ^= hash >> 8; + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; +} + +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -645,7 +654,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(unix_sockets_unbound, sk); + unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); @@ -2239,47 +2248,58 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } #ifdef CONFIG_PROC_FS -static struct sock *first_unix_socket(int *i) -{ - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} -static struct sock *next_unix_socket(int *i, struct sock *s) -{ - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} +#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) struct unix_iter_state { struct seq_net_private p; - int i; }; -static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) +static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - loff_t off = 0; - struct sock *s; + unsigned long offset = get_offset(*pos); + unsigned long bucket = get_bucket(*pos); + struct sock *sk; + unsigned long count = 0; - for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != seq_file_net(seq)) + for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) continue; - if (off == pos) - return s; - ++off; + if (++count == offset) + break; } + + return sk; +} + +static struct sock *unix_next_socket(struct seq_file *seq, + struct sock *sk, + loff_t *pos) +{ + unsigned long bucket; + + while (sk > (struct sock *)SEQ_START_TOKEN) { + sk = sk_next(sk); + if (!sk) + goto next_bucket; + if (sock_net(sk) == seq_file_net(seq)) + return sk; + } + + do { + sk = unix_from_bucket(seq, pos); + if (sk) + return sk; + +next_bucket: + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < ARRAY_SIZE(unix_socket_table)); + return NULL; } @@ -2287,22 +2307,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; + + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) + return NULL; + + return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - struct sock *sk = v; ++*pos; - - if (v == SEQ_START_TOKEN) - sk = first_unix_socket(&iter->i); - else - sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != seq_file_net(seq))) - sk = next_unix_socket(&iter->i, sk); - return sk; + return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) diff --git a/net/unix/diag.c b/net/unix/diag.c index 47d3002737f..7e8a24bff34 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -195,7 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = s_num = cb->args[1]; spin_lock(&unix_table_lock); - for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { + for (slot = s_slot; + slot < ARRAY_SIZE(unix_socket_table); + s_num = 0, slot++) { struct sock *sk; struct hlist_node *node; @@ -228,7 +230,7 @@ static struct sock *unix_lookup_by_ino(int ino) struct sock *sk; spin_lock(&unix_table_lock); - for (i = 0; i <= UNIX_HASH_SIZE; i++) { + for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { struct hlist_node *node; sk_for_each(sk, node, &unix_socket_table[i]) -- cgit v1.2.3-70-g09d2 From 1c2e004183178e1947882cd2e74f37826f45230e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 8 Jun 2012 23:31:13 +0800 Subject: Bluetooth: Add support for encryption key refresh With LE/SMP the completion of a security level elavation from medium to high is indicated by a HCI Encryption Key Refresh Complete event. The necessary behavior upon receiving this event is a mix of what's done for auth_complete and encryption_change, which is also where most of the event handling code has been copied from. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 6 ++++++ net/bluetooth/hci_event.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 66a7b579e31..3def64ba77f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1144,6 +1144,12 @@ struct extended_inquiry_info { __u8 data[240]; } __packed; +#define HCI_EV_KEY_REFRESH_COMPLETE 0x30 +struct hci_ev_key_refresh_complete { + __u8 status; + __le16 handle; +} __packed; + #define HCI_EV_IO_CAPA_REQUEST 0x31 struct hci_ev_io_capa_request { bdaddr_t bdaddr; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4eefb7f65cf..94ad124a4ea 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3043,6 +3043,50 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +static void hci_key_refresh_complete_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_key_refresh_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %u handle %u", hdev->name, ev->status, + __le16_to_cpu(ev->handle)); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (!conn) + goto unlock; + + if (!ev->status) + conn->sec_level = conn->pending_sec_level; + + clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); + + if (ev->status && conn->state == BT_CONNECTED) { + hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); + hci_conn_put(conn); + goto unlock; + } + + if (conn->state == BT_CONFIG) { + if (!ev->status) + conn->state = BT_CONNECTED; + + hci_proto_connect_cfm(conn, ev->status); + hci_conn_put(conn); + } else { + hci_auth_cfm(conn, ev->status); + + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + +unlock: + hci_dev_unlock(hdev); +} + static inline u8 hci_get_auth_req(struct hci_conn *conn) { /* If remote requests dedicated bonding follow that lead */ @@ -3559,6 +3603,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_extended_inquiry_result_evt(hdev, skb); break; + case HCI_EV_KEY_REFRESH_COMPLETE: + hci_key_refresh_complete_evt(hdev, skb); + break; + case HCI_EV_IO_CAPA_REQUEST: hci_io_capa_request_evt(hdev, skb); break; -- cgit v1.2.3-70-g09d2 From fbfe95a42e90b3dd079cc9019ba7d7700feee0f6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 8 Jun 2012 23:24:18 -0700 Subject: inet: Create and use rt{,6}_get_peer_create(). There's a lot of places that open-code rt{,6}_get_peer() only because they want to set 'create' to one. So add an rt{,6}_get_peer_create() for their sake. There were also a few spots open-coding plain rt{,6}_get_peer() and those are transformed here as well. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 17 +++++++++++++---- include/net/route.h | 14 ++++++++++++-- net/ipv4/icmp.c | 5 ++--- net/ipv4/route.c | 35 +++++++++-------------------------- net/ipv4/tcp_ipv4.c | 4 +--- net/ipv6/icmp.c | 6 +++--- net/ipv6/ip6_output.c | 11 ++++------- net/ipv6/ndisc.c | 6 +++--- net/ipv6/route.c | 5 +---- net/ipv6/tcp_ipv6.c | 4 +--- 10 files changed, 49 insertions(+), 58 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 37c1a1ed82c..73d75028812 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -53,18 +53,27 @@ static inline unsigned int rt6_flags2srcprefs(int flags) return (flags >> 3) & 7; } -extern void rt6_bind_peer(struct rt6_info *rt, - int create); +extern void rt6_bind_peer(struct rt6_info *rt, int create); -static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) +static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) { if (rt->rt6i_peer) return rt->rt6i_peer; - rt6_bind_peer(rt, 0); + rt6_bind_peer(rt, create); return rt->rt6i_peer; } +static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 0); +} + +static inline struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 1); +} + extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct net *net, diff --git a/include/net/route.h b/include/net/route.h index ed2b78e2375..433fc6c1d40 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -296,15 +296,25 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); -static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) +static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) { if (rt->peer) return rt->peer; - rt_bind_peer(rt, daddr, 0); + rt_bind_peer(rt, daddr, create); return rt->peer; } +static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) +{ + return __rt_get_peer(rt, daddr, 0); +} + +static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr) +{ + return __rt_get_peer(rt, daddr, 1); +} + static inline int inet_iif(const struct sk_buff *skb) { return skb_rtable(skb)->rt_iif; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c75efbdc71c..0c78ef1e5dd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -253,9 +253,8 @@ static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, /* Limit if icmp type is enabled in ratemask. */ if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - if (!rt->peer) - rt_bind_peer(rt, fl4->daddr, 1); - rc = inet_peer_xrlim_allow(rt->peer, + struct inet_peer *peer = rt_get_peer_create(rt, fl4->daddr); + rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); } out: diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2c9f73f3b7c..7a4d724765e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -162,10 +162,7 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) struct inet_peer *peer; u32 *p = NULL; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; @@ -1364,14 +1361,13 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) struct rtable *rt = (struct rtable *) dst; if (rt && !(rt->dst.flags & DST_NOPEER)) { - if (rt->peer == NULL) - rt_bind_peer(rt, rt->rt_dst, 1); + struct inet_peer *peer = rt_get_peer_create(rt, rt->rt_dst); /* If peer is attached to destination, it is never detached, so that we need not to grab a lock to dereference it. */ - if (rt->peer) { - iph->id = htons(inet_getid(rt->peer, more)); + if (peer) { + iph->id = htons(inet_getid(peer, more)); return; } } else if (!rt) @@ -1481,10 +1477,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway != old_gw) continue; - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { if (peer->redirect_learned.a4 != new_gw) { peer->redirect_learned.a4 = new_gw; @@ -1579,9 +1572,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); return; @@ -1646,9 +1637,7 @@ static int ip_error(struct sk_buff *skb) break; } - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); send = true; if (peer) { @@ -1754,9 +1743,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_confirm(dst); - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, rt->rt_dst); if (peer) { unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); @@ -1782,12 +1769,8 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static void ipv4_validate_peer(struct rtable *rt) { if (rt->rt_peer_genid != rt_peer_genid()) { - struct inet_peer *peer; - - if (!rt->peer) - rt_bind_peer(rt, rt->rt_dst, 0); + struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); - peer = rt->peer; if (peer) { check_peer_pmtu(&rt->dst, peer); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f485b451f92..833e8d96a63 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1832,9 +1832,7 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) peer = inet_getpeer_v4(net, inet->inet_daddr, 1); *release_it = true; } else { - if (!rt->peer) - rt_bind_peer(rt, inet->inet_daddr, 1); - peer = rt->peer; + peer = rt_get_peer_create(rt, inet->inet_daddr); *release_it = false; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 091a2971c7b..ed89bba745a 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -188,14 +188,14 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; + struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); + peer = rt6_get_peer_create(rt); + res = inet_peer_xrlim_allow(peer, tmo); } dst_release(dst); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 17b8c67998b..62fcf3e48ac 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -463,6 +463,7 @@ int ip6_forward(struct sk_buff *skb) */ if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; + struct inet_peer *peer; struct rt6_info *rt; /* @@ -476,13 +477,12 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); + peer = rt6_get_peer_create(rt); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ - if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); } else { int addrtype = ipv6_addr_type(&hdr->saddr); @@ -602,11 +602,8 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) int old, new; if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; + struct inet_peer *peer = rt6_get_peer_create(rt); - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - peer = rt->rt6i_peer; if (peer) { fhdr->identification = htonl(inet_getid(peer, 0)); return; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 54f62d3b8dd..69a6330dea9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1472,6 +1472,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + struct inet_peer *peer; struct sk_buff *buff; struct icmp6hdr *icmph; struct in6_addr saddr_buf; @@ -1518,9 +1519,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) + peer = rt6_get_peer_create(rt); + if (!inet_peer_xrlim_allow(peer, 1*HZ)) goto release; if (dev->addr_len) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4eca0130cce..8a986be4aed 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -99,10 +99,7 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) if (!(rt->dst.flags & DST_HOST)) return NULL; - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - - peer = rt->rt6i_peer; + peer = rt6_get_peer_create(rt); if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1a9cdd09f11..218433cb992 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1744,9 +1744,7 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) peer = inet_getpeer_v6(net, &np->daddr, 1); *release_it = true; } else { - if (!rt->rt6i_peer) - rt6_bind_peer(rt, 1); - peer = rt->rt6i_peer; + peer = rt6_get_peer_create(rt); *release_it = false; } -- cgit v1.2.3-70-g09d2 From 4670fd819e7f47392c7c6fc6168ea2857c66d163 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 01:25:47 -0700 Subject: tcp: Get rid of inetpeer special cases. The get_peer method TCP uses is full of special cases that make no sense accommodating, and it also gets in the way of doing more reasonable things here. First of all, if the socket doesn't have a usable cached route, there is no sense in trying to optimize timewait recycling. Likewise for the case where we have IP options, such as SRR enabled, that make the IP header destination address (and thus the destination address of the route key) differ from that of the connection's destination address. Just return a NULL peer in these cases, and thus we're also able to get rid of the clumsy inetpeer release logic. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 +- include/net/tcp.h | 2 +- net/ipv4/tcp_ipv4.c | 21 ++++++++------------- net/ipv4/tcp_minisocks.c | 5 +---- net/ipv6/tcp_ipv6.c | 21 ++++++++------------- 5 files changed, 19 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7d83f90f203..e1b7734c456 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -43,7 +43,7 @@ struct inet_connection_sock_af_ops { struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); - struct inet_peer *(*get_peer)(struct sock *sk, bool *release_it); + struct inet_peer *(*get_peer)(struct sock *sk); u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; diff --git a/include/net/tcp.h b/include/net/tcp.h index e79aa48d9fc..42459186603 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -327,7 +327,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); -extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); +extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 833e8d96a63..77f049d00db 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1820,23 +1820,18 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) +struct inet_peer *tcp_v4_get_peer(struct sock *sk) { struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - struct inet_peer *peer; - - if (!rt || - inet->cork.fl.u.ip4.daddr != inet->inet_daddr) { - peer = inet_getpeer_v4(net, inet->inet_daddr, 1); - *release_it = true; - } else { - peer = rt_get_peer_create(rt, inet->inet_daddr); - *release_it = false; - } - return peer; + /* If we don't have a valid cached route, or we're doing IP + * options which make the IPv4 header destination address + * different from our peer's, do not bother with this. + */ + if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) + return NULL; + return rt_get_peer_create(rt, inet->inet_daddr); } EXPORT_SYMBOL(tcp_v4_get_peer); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b85d9fe7d66..fef9dbf3af0 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,9 +60,8 @@ static bool tcp_remember_stamp(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct inet_peer *peer; - bool release_it; - peer = icsk->icsk_af_ops->get_peer(sk, &release_it); + peer = icsk->icsk_af_ops->get_peer(sk); if (peer) { if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && @@ -70,8 +69,6 @@ static bool tcp_remember_stamp(struct sock *sk) peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; peer->tcp_ts = tp->rx_opt.ts_recent; } - if (release_it) - inet_putpeer(peer); return true; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 218433cb992..b5ecf37b61a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1732,23 +1732,18 @@ do_time_wait: goto discard_it; } -static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) +static struct inet_peer *tcp_v6_get_peer(struct sock *sk) { struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sock_net(sk); - struct inet_peer *peer; - - if (!rt || - !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) { - peer = inet_getpeer_v6(net, &np->daddr, 1); - *release_it = true; - } else { - peer = rt6_get_peer_create(rt); - *release_it = false; - } - return peer; + /* If we don't have a valid cached route, or we're doing IP + * options which make the IPv6 header destination address + * different from our peer's, do not bother with this. + */ + if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) + return NULL; + return rt6_get_peer_create(rt); } static void *tcp_v6_tw_get_peer(struct sock *sk) -- cgit v1.2.3-70-g09d2 From d13e14148154e5ce58467e76321eef1dd912c416 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 9 Jun 2012 10:31:09 +0200 Subject: mac80211: add some missing kernel-doc Add a few kernel-doc descriptions that were missed during development. Reported-by: Randy Dunlap Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/sta_info.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1937c7d9830..95e39b6a02e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1940,6 +1940,11 @@ enum ieee80211_rate_control_changed { * to also unregister the device. If it returns 1, then mac80211 * will also go through the regular complete restart on resume. * + * @set_wakeup: Enable or disable wakeup when WoWLAN configuration is + * modified. The reason is that device_set_wakeup_enable() is + * supposed to be called when the configuration changes, not only + * in suspend(). + * * @add_interface: Called when a netdevice attached to the hardware is * enabled. Because it is not called for monitor mode devices, @start * and @stop must be implemented. @@ -2966,6 +2971,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, * ieee80211_generic_frame_duration - Calculate the duration field for a frame * @hw: pointer obtained from ieee80211_alloc_hw(). * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @band: the band to calculate the frame duration on * @frame_len: the length of the frame. * @rate: the rate at which the frame is going to be transmitted. * diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3bb24a121c9..525ce5077e1 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -278,6 +278,8 @@ struct sta_ampdu_mlme { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @beacon_loss_count: number of times beacon loss has triggered + * @supports_40mhz: tracks whether the station advertised 40 MHz support + * as we overwrite its HT parameters with the currently used value */ struct sta_info { /* General information, mostly static */ -- cgit v1.2.3-70-g09d2 From 2397849baa7c44c242e5d5142d5d16d1e7ed53d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 14:56:12 -0700 Subject: [PATCH] tcp: Cache inetpeer in timewait socket, and only when necessary. Since it's guarenteed that we will access the inetpeer if we're trying to do timewait recycling and TCP options were enabled on the connection, just cache the peer in the timewait socket. In the future, inetpeer lookups will be context dependent (per routing realm), and this helps facilitate that as well. Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- include/net/tcp.h | 1 - include/net/timewait_sock.h | 8 -------- net/ipv4/tcp_ipv4.c | 10 ---------- net/ipv4/tcp_minisocks.c | 27 ++++++++++++++++++++------- net/ipv6/tcp_ipv6.c | 13 ------------- 6 files changed, 22 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4c5b6328337..23e8234f75a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -506,8 +506,9 @@ struct tcp_timewait_sock { u32 tw_rcv_wnd; u32 tw_ts_recent; long tw_ts_recent_stamp; + struct inet_peer *tw_peer; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *tw_md5_key; + struct tcp_md5sig_key *tw_md5_key; #endif /* Few sockets in timewait have cookies; in that case, then this * object holds a reference to them (tw_cookie_values->kref). diff --git a/include/net/tcp.h b/include/net/tcp.h index 42459186603..9332f342259 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -328,7 +328,6 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); -extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 8d6689cb2c6..68f0ecad6c6 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -22,7 +22,6 @@ struct timewait_sock_ops { int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); void (*twsk_destructor)(struct sock *sk); - void *(*twsk_getpeer)(struct sock *sk); }; static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -41,11 +40,4 @@ static inline void twsk_destructor(struct sock *sk) sk->sk_prot->twsk_prot->twsk_destructor(sk); } -static inline void *twsk_getpeer(struct sock *sk) -{ - if (sk->sk_prot->twsk_prot->twsk_getpeer) - return sk->sk_prot->twsk_prot->twsk_getpeer(sk); - return NULL; -} - #endif /* _TIMEWAIT_SOCK_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 77f049d00db..fda2ca17135 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1835,20 +1835,10 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk) } EXPORT_SYMBOL(tcp_v4_get_peer); -void *tcp_v4_tw_get_peer(struct sock *sk) -{ - const struct inet_timewait_sock *tw = inet_twsk(sk); - struct net *net = sock_net(sk); - - return inet_getpeer_v4(net, tw->tw_daddr, 1); -} -EXPORT_SYMBOL(tcp_v4_tw_get_peer); - static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, - .twsk_getpeer = tcp_v4_tw_get_peer, }; const struct inet_connection_sock_af_ops ipv4_specific = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index fef9dbf3af0..cb015317c9f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -77,20 +77,19 @@ static bool tcp_remember_stamp(struct sock *sk) static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) { + const struct tcp_timewait_sock *tcptw; struct sock *sk = (struct sock *) tw; struct inet_peer *peer; - peer = twsk_getpeer(sk); + tcptw = tcp_twsk(sk); + peer = tcptw->tw_peer; if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; peer->tcp_ts = tcptw->tw_ts_recent; } - inet_putpeer(peer); return true; } return false; @@ -314,9 +313,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); bool recycle_ok = false; + bool recycle_on = false; - if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) + if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) { recycle_ok = tcp_remember_stamp(sk); + recycle_on = true; + } if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); @@ -324,8 +326,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + struct inet_sock *inet = inet_sk(sk); + struct inet_peer *peer = NULL; - tw->tw_transparent = inet_sk(sk)->transparent; + tw->tw_transparent = inet->transparent; tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; @@ -347,6 +351,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } #endif + if (recycle_on) + peer = icsk->icsk_af_ops->get_peer(sk); + tcptw->tw_peer = peer; + if (peer) + atomic_inc(&peer->refcnt); + #ifdef CONFIG_TCP_MD5SIG /* * The timewait bucket does not have the key DB from the @@ -398,8 +408,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) void tcp_twsk_destructor(struct sock *sk) { -#ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); + + if (twsk->tw_peer) + inet_putpeer(twsk->tw_peer); +#ifdef CONFIG_TCP_MD5SIG if (twsk->tw_md5_key) { tcp_free_md5sig_pool(); kfree_rcu(twsk->tw_md5_key, rcu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b5ecf37b61a..f91b0bfd12d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1746,23 +1746,10 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk) return rt6_get_peer_create(rt); } -static void *tcp_v6_tw_get_peer(struct sock *sk) -{ - const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - const struct inet_timewait_sock *tw = inet_twsk(sk); - struct net *net = sock_net(sk); - - if (tw->tw_family == AF_INET) - return tcp_v4_tw_get_peer(sk); - - return inet_getpeer_v6(net, &tw6->tw_v6_daddr, 1); -} - static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, .twsk_destructor= tcp_twsk_destructor, - .twsk_getpeer = tcp_v6_tw_get_peer, }; static const struct inet_connection_sock_af_ops ipv6_specific = { -- cgit v1.2.3-70-g09d2 From c3426b47190d7c6785230c91a706fd42208b4120 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 16:27:05 -0700 Subject: inet: Initialize per-netns inetpeer roots in net/ipv{4,6}/route.c Instead of net/ipv4/inetpeer.c Signed-off-by: David S. Miller --- include/net/inetpeer.h | 9 +++++++ net/ipv4/inetpeer.c | 64 +++++++++++++------------------------------------- net/ipv4/route.c | 25 ++++++++++++++++++++ net/ipv6/route.c | 34 ++++++++++++++++++++++++++- 4 files changed, 83 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 20e67db69ac..5a96c8edc52 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -65,6 +65,14 @@ struct inet_peer { atomic_t refcnt; }; +struct inet_peer_base { + struct inet_peer __rcu *root; + seqlock_t lock; + int total; +}; + +extern void inet_peer_base_init(struct inet_peer_base *); + void inet_initpeers(void) __init; #define INETPEER_METRICS_NEW (~(u32) 0) @@ -105,6 +113,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net, extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); +extern void __inetpeer_invalidate_tree(struct inet_peer_base *); extern void inetpeer_invalidate_tree(struct net *net, int family); /* diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 1c8527349c8..d0d72f89b27 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -82,11 +82,13 @@ static const struct inet_peer peer_fake_node = { .avl_height = 0 }; -struct inet_peer_base { - struct inet_peer __rcu *root; - seqlock_t lock; - int total; -}; +void inet_peer_base_init(struct inet_peer_base *bp) +{ + bp->root = peer_avl_empty_rcu; + seqlock_init(&bp->lock); + bp->total = 0; +} +EXPORT_SYMBOL_GPL(inet_peer_base_init); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ @@ -141,46 +143,6 @@ static void inetpeer_gc_worker(struct work_struct *work) schedule_delayed_work(&gc_work, gc_delay); } -static int __net_init inetpeer_net_init(struct net *net) -{ - net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base), - GFP_KERNEL); - if (net->ipv4.peers == NULL) - return -ENOMEM; - - net->ipv4.peers->root = peer_avl_empty_rcu; - seqlock_init(&net->ipv4.peers->lock); - - net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base), - GFP_KERNEL); - if (net->ipv6.peers == NULL) - goto out_ipv6; - - net->ipv6.peers->root = peer_avl_empty_rcu; - seqlock_init(&net->ipv6.peers->lock); - - return 0; -out_ipv6: - kfree(net->ipv4.peers); - return -ENOMEM; -} - -static void __net_exit inetpeer_net_exit(struct net *net) -{ - inetpeer_invalidate_tree(net, AF_INET); - kfree(net->ipv4.peers); - net->ipv4.peers = NULL; - - inetpeer_invalidate_tree(net, AF_INET6); - kfree(net->ipv6.peers); - net->ipv6.peers = NULL; -} - -static struct pernet_operations inetpeer_ops = { - .init = inetpeer_net_init, - .exit = inetpeer_net_exit, -}; - /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -205,7 +167,6 @@ void __init inet_initpeers(void) NULL); INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); - register_pernet_subsys(&inetpeer_ops); } static int addr_compare(const struct inetpeer_addr *a, @@ -603,10 +564,9 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void inetpeer_invalidate_tree(struct net *net, int family) +void __inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(net, family); write_seqlock_bh(&base->lock); @@ -625,4 +585,12 @@ void inetpeer_invalidate_tree(struct net *net, int family) out: write_sequnlock_bh(&base->lock); } +EXPORT_SYMBOL(__inetpeer_invalidate_tree); + +void inetpeer_invalidate_tree(struct net *net, int family) +{ + struct inet_peer_base *base = family_to_base(net, family); + + __inetpeer_invalidate_tree(base); +} EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7a4d724765e..4cd35c3904d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3385,6 +3385,30 @@ static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, }; +static int __net_init ipv4_inetpeer_init(struct net *net) +{ + struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + + if (!bp) + return -ENOMEM; + inet_peer_base_init(bp); + net->ipv4.peers = bp; + return 0; +} + +static void __net_exit ipv4_inetpeer_exit(struct net *net) +{ + struct inet_peer_base *bp = net->ipv4.peers; + + net->ipv4.peers = NULL; + __inetpeer_invalidate_tree(bp); + kfree(bp); +} + +static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { + .init = ipv4_inetpeer_init, + .exit = ipv4_inetpeer_exit, +}; #ifdef CONFIG_IP_ROUTE_CLASSID struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; @@ -3465,6 +3489,7 @@ int __init ip_rt_init(void) register_pernet_subsys(&sysctl_route_ops); #endif register_pernet_subsys(&rt_genid_ops); + register_pernet_subsys(&ipv4_inetpeer_ops); return rc; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8a986be4aed..3e1e4e0da09 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2996,6 +2996,31 @@ static struct pernet_operations ip6_route_net_ops = { .exit = ip6_route_net_exit, }; +static int __net_init ipv6_inetpeer_init(struct net *net) +{ + struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); + + if (!bp) + return -ENOMEM; + inet_peer_base_init(bp); + net->ipv6.peers = bp; + return 0; +} + +static void __net_exit ipv6_inetpeer_exit(struct net *net) +{ + struct inet_peer_base *bp = net->ipv6.peers; + + net->ipv6.peers = NULL; + __inetpeer_invalidate_tree(bp); + kfree(bp); +} + +static __net_initdata struct pernet_operations ipv6_inetpeer_ops = { + .init = ipv6_inetpeer_init, + .exit = ipv6_inetpeer_exit, +}; + static struct notifier_block ip6_route_dev_notifier = { .notifier_call = ip6_route_dev_notify, .priority = 0, @@ -3020,6 +3045,10 @@ int __init ip6_route_init(void) if (ret) goto out_dst_entries; + ret = register_pernet_subsys(&ipv6_inetpeer_ops); + if (ret) + goto out_register_subsys; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -3035,7 +3064,7 @@ int __init ip6_route_init(void) #endif ret = fib6_init(); if (ret) - goto out_register_subsys; + goto out_register_inetpeer; ret = xfrm6_init(); if (ret) @@ -3064,6 +3093,8 @@ xfrm6_init: xfrm6_fini(); out_fib6_init: fib6_gc_cleanup(); +out_register_inetpeer: + unregister_pernet_subsys(&ipv6_inetpeer_ops); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); out_dst_entries: @@ -3079,6 +3110,7 @@ void ip6_route_cleanup(void) fib6_rules_cleanup(); xfrm6_fini(); fib6_gc_cleanup(); + unregister_pernet_subsys(&ipv6_inetpeer_ops); unregister_pernet_subsys(&ip6_route_net_ops); dst_entries_destroy(&ip6_dst_blackhole_ops); kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); -- cgit v1.2.3-70-g09d2 From 56a6b248eb345c1948ee60bf426de1ff7dd81509 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 16:32:41 -0700 Subject: inet: Consolidate inetpeer_invalidate_tree() interfaces. We only need one interface for this operation, since we always know which inetpeer root we want to flush. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 3 +-- net/ipv4/inetpeer.c | 10 +--------- net/ipv4/route.c | 4 ++-- net/ipv6/route.c | 2 +- 4 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 5a96c8edc52..733edc641b7 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -113,8 +113,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net, extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); -extern void __inetpeer_invalidate_tree(struct inet_peer_base *); -extern void inetpeer_invalidate_tree(struct net *net, int family); +extern void inetpeer_invalidate_tree(struct inet_peer_base *); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d0d72f89b27..9d89a381f0e 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -564,7 +564,7 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void __inetpeer_invalidate_tree(struct inet_peer_base *base) +void inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; @@ -585,12 +585,4 @@ void __inetpeer_invalidate_tree(struct inet_peer_base *base) out: write_sequnlock_bh(&base->lock); } -EXPORT_SYMBOL(__inetpeer_invalidate_tree); - -void inetpeer_invalidate_tree(struct net *net, int family) -{ - struct inet_peer_base *base = family_to_base(net, family); - - __inetpeer_invalidate_tree(base); -} EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4cd35c3904d..cf78343940d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -935,7 +935,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(net, AF_INET); + inetpeer_invalidate_tree(net->ipv4.peers); } /* @@ -3401,7 +3401,7 @@ static void __net_exit ipv4_inetpeer_exit(struct net *net) struct inet_peer_base *bp = net->ipv4.peers; net->ipv4.peers = NULL; - __inetpeer_invalidate_tree(bp); + inetpeer_invalidate_tree(bp); kfree(bp); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3e1e4e0da09..7f346d7492d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3012,7 +3012,7 @@ static void __net_exit ipv6_inetpeer_exit(struct net *net) struct inet_peer_base *bp = net->ipv6.peers; net->ipv6.peers = NULL; - __inetpeer_invalidate_tree(bp); + inetpeer_invalidate_tree(bp); kfree(bp); } -- cgit v1.2.3-70-g09d2 From c0efc887dcadbdbfe171f028acfab9c7c00e9dde Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 19:12:36 -0700 Subject: inet: Pass inetpeer root into inet_getpeer*() interfaces. Otherwise we reference potentially non-existing members when ipv6 is disabled. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 10 +++++----- net/ipv4/inetpeer.c | 9 +-------- net/ipv4/ip_fragment.c | 2 +- net/ipv4/route.c | 6 +++--- net/ipv6/route.c | 2 +- 5 files changed, 11 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 733edc641b7..b84b32fd5df 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -83,11 +83,11 @@ static inline bool inet_metrics_new(const struct inet_peer *p) } /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(struct net *net, +struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create); -static inline struct inet_peer *inet_getpeer_v4(struct net *net, +static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, int create) { @@ -95,10 +95,10 @@ static inline struct inet_peer *inet_getpeer_v4(struct net *net, daddr.addr.a4 = v4daddr; daddr.family = AF_INET; - return inet_getpeer(net, &daddr, create); + return inet_getpeer(base, &daddr, create); } -static inline struct inet_peer *inet_getpeer_v6(struct net *net, +static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, const struct in6_addr *v6daddr, int create) { @@ -106,7 +106,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct net *net, *(struct in6_addr *)daddr.addr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(net, &daddr, create); + return inet_getpeer(base, &daddr, create); } /* can be called from BH context or outside */ diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9d89a381f0e..e4cba56a534 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -391,12 +391,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(struct net *net, - int family) -{ - return family == AF_INET ? net->ipv4.peers : net->ipv6.peers; -} - /* perform garbage collect on all items stacked during a lookup */ static int inet_peer_gc(struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH], @@ -434,12 +428,11 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(struct net *net, +struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(net, daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 22c6bab9717..8d07c973409 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -184,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cf78343940d..2aa663a6ae9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1328,7 +1328,7 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v4(net, daddr, create); + peer = inet_getpeer_v4(net->ipv4.peers, daddr, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); @@ -1684,7 +1684,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, unsigned short est_mtu = 0; struct inet_peer *peer; - peer = inet_getpeer_v4(net, iph->daddr, 1); + peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); if (peer) { unsigned short mtu = new_mtu; @@ -1929,7 +1929,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create); + rt->peer = peer = inet_getpeer_v4(net->ipv4.peers, rt->rt_dst, create); if (peer) { rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9586c27e069..8fc41d502bb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -306,7 +306,7 @@ void rt6_bind_peer(struct rt6_info *rt, int create) struct net *net = dev_net(rt->dst.dev); struct inet_peer *peer; - peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create); + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); else -- cgit v1.2.3-70-g09d2 From 3ddd53f392c4f8e19e1110d1bdef770008b128b8 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Mon, 11 Jun 2012 11:59:10 +0800 Subject: cfg80211: add missing kernel-doc for mesh configuration structure Add the missing kernel-doc for mesh configuration parameters as pointed out by Johannes Berg. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 59 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7319f25250b..a129ee2f548 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -790,26 +790,69 @@ struct bss_parameters { int ht_opmode; }; -/* +/** * struct mesh_config - 802.11s mesh configuration * * These parameters can be changed while the mesh is active. + * + * @dot11MeshRetryTimeout: the initial retry timeout in millisecond units used + * by the Mesh Peering Open message + * @dot11MeshConfirmTimeout: the initial retry timeout in millisecond units + * used by the Mesh Peering Open message + * @dot11MeshHoldingTimeout: the confirm timeout in millisecond units used by + * the mesh peering management to close a mesh peering + * @dot11MeshMaxPeerLinks: the maximum number of peer links allowed on this + * mesh interface + * @dot11MeshMaxRetries: the maximum number of peer link open retries that can + * be sent to establish a new peer link instance in a mesh + * @dot11MeshTTL: the value of TTL field set at a source mesh STA + * @element_ttl: the value of TTL field set at a mesh STA for path selection + * elements + * @auto_open_plinks: whether we should automatically open peer links when we + * detect compatible mesh peers + * @dot11MeshNbrOffsetMaxNeighbor: the maximum number of neighbors to + * synchronize to for 11s default synchronization method + * @dot11MeshHWMPmaxPREQretries: the number of action frames containing a PREQ + * that an originator mesh STA can send to a particular path target + * @path_refresh_time: how frequently to refresh mesh paths in milliseconds + * @min_discovery_timeout: the minimum length of time to wait until giving up on + * a path discovery in milliseconds + * @dot11MeshHWMPactivePathTimeout: the time (in TUs) for which mesh STAs + * receiving a PREQ shall consider the forwarding information from the + * root to be valid. (TU = time unit) + * @dot11MeshHWMPpreqMinInterval: the minimum interval of time (in TUs) during + * which a mesh STA can send only one action frame containing a PREQ + * element + * @dot11MeshHWMPperrMinInterval: the minimum interval of time (in TUs) during + * which a mesh STA can send only one Action frame containing a PERR + * element + * @dot11MeshHWMPnetDiameterTraversalTime: the interval of time (in TUs) that + * it takes for an HWMP information element to propagate across the mesh + * @dot11MeshHWMPRootMode: the configuration of a mesh STA as root mesh STA + * @dot11MeshHWMPRannInterval: the interval of time (in TUs) between root + * announcements are transmitted + * @dot11MeshGateAnnouncementProtocol: whether to advertise that this mesh + * station has access to a broader network beyond the MBSS. (This is + * missnamed in draft 12.0: dot11MeshGateAnnouncementProtocol set to true + * only means that the station will announce others it's a mesh gate, but + * not necessarily using the gate announcement protocol. Still keeping the + * same nomenclature to be in sync with the spec) + * @dot11MeshForwarding: whether the Mesh STA is forwarding or non-forwarding + * entity (default is TRUE - forwarding entity) + * @rssi_threshold: the threshold for average signal strength of candidate + * station to establish a peer link + * @ht_opmode: mesh HT protection mode */ struct mesh_config { - /* Timeouts in ms */ - /* Mesh plink management parameters */ u16 dot11MeshRetryTimeout; u16 dot11MeshConfirmTimeout; u16 dot11MeshHoldingTimeout; u16 dot11MeshMaxPeerLinks; u8 dot11MeshMaxRetries; u8 dot11MeshTTL; - /* ttl used in path selection information elements */ u8 element_ttl; bool auto_open_plinks; - /* neighbor offset synchronization */ u32 dot11MeshNbrOffsetMaxNeighbor; - /* HWMP parameters */ u8 dot11MeshHWMPmaxPREQretries; u32 path_refresh_time; u16 min_discovery_timeout; @@ -819,10 +862,6 @@ struct mesh_config { u16 dot11MeshHWMPnetDiameterTraversalTime; u8 dot11MeshHWMPRootMode; u16 dot11MeshHWMPRannInterval; - /* This is missnamed in draft 12.0: dot11MeshGateAnnouncementProtocol - * set to true only means that the station will announce others it's a - * mesh gate, but not necessarily using the gate announcement protocol. - * Still keeping the same nomenclature to be in sync with the spec. */ bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; s32 rssi_threshold; -- cgit v1.2.3-70-g09d2 From a4f606ea73d56d15f28653d2242e54d58bb612e5 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Mon, 11 Jun 2012 11:59:36 +0800 Subject: {nl,cfg,mac}80211: fix the coding style related to mesh parameters fix the coding style related to mesh parameters, especially the indentation, as pointed out by Johannes Berg. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 6 +-- include/linux/nl80211.h | 98 ++++++++++++++++++++++--------------------- include/net/cfg80211.h | 12 +++--- net/mac80211/cfg.c | 3 +- net/mac80211/debugfs_netdev.c | 34 +++++++-------- net/wireless/nl80211.c | 97 ++++++++++++++++++++++-------------------- 6 files changed, 130 insertions(+), 120 deletions(-) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce9af891851..f831078182d 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1443,7 +1443,7 @@ enum ieee80211_tdls_actioncode { * * @IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET: the default synchronization method * @IEEE80211_SYNC_METHOD_VENDOR: a vendor specific synchronization method - * that will be specified in a vendor specific information element + * that will be specified in a vendor specific information element */ enum { IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET = 1, @@ -1455,7 +1455,7 @@ enum { * * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will - * be specified in a vendor specific information element + * be specified in a vendor specific information element */ enum { IEEE80211_PATH_PROTOCOL_HWMP = 1, @@ -1467,7 +1467,7 @@ enum { * * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be - * specified in a vendor specific information element + * specified in a vendor specific information element */ enum { IEEE80211_PATH_METRIC_AIRTIME = 1, diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 970afdf5a60..c61e1621822 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2090,78 +2090,80 @@ enum nl80211_mntr_flags { * @__NL80211_MESHCONF_INVALID: internal use * * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in - * millisecond units, used by the Peer Link Open message + * millisecond units, used by the Peer Link Open message * * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the initial confirm timeout, in - * millisecond units, used by the peer link management to close a peer link + * millisecond units, used by the peer link management to close a peer link * * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in - * millisecond units + * millisecond units * * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed - * on this mesh interface + * on this mesh interface * * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link - * open retries that can be sent to establish a new peer link instance in a - * mesh + * open retries that can be sent to establish a new peer link instance in a + * mesh * * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh - * point. + * point. * * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically - * open peer links when we detect compatible mesh peers. + * open peer links when we detect compatible mesh peers. * * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames - * containing a PREQ that an MP can send to a particular destination (path - * target) + * containing a PREQ that an MP can send to a particular destination (path + * target) * * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths - * (in milliseconds) + * (in milliseconds) * * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait - * until giving up on a path discovery (in milliseconds) + * until giving up on a path discovery (in milliseconds) * * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh - * points receiving a PREQ shall consider the forwarding information from the - * root to be valid. (TU = time unit) + * points receiving a PREQ shall consider the forwarding information from + * the root to be valid. (TU = time unit) * * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in - * TUs) during which an MP can send only one action frame containing a PREQ - * reference element + * TUs) during which an MP can send only one action frame containing a PREQ + * reference element * * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) - * that it takes for an HWMP information element to propagate across the mesh + * that it takes for an HWMP information element to propagate across the + * mesh * * @NL80211_MESHCONF_HWMP_ROOTMODE: whether root mode is enabled or not * * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a - * source mesh point for path selection elements. + * source mesh point for path selection elements. * * @NL80211_MESHCONF_HWMP_RANN_INTERVAL: The interval of time (in TUs) between - * root announcements are transmitted. + * root announcements are transmitted. * * @NL80211_MESHCONF_GATE_ANNOUNCEMENTS: Advertise that this mesh station has - * access to a broader network beyond the MBSS. This is done via Root - * Announcement frames. + * access to a broader network beyond the MBSS. This is done via Root + * Announcement frames. * * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in - * TUs) during which a mesh STA can send only one Action frame containing a - * PERR element. + * TUs) during which a mesh STA can send only one Action frame containing a + * PERR element. * * @NL80211_MESHCONF_FORWARDING: set Mesh STA as forwarding or non-forwarding - * or forwarding entity (default is TRUE - forwarding entity) + * or forwarding entity (default is TRUE - forwarding entity) * * @NL80211_MESHCONF_RSSI_THRESHOLD: RSSI threshold in dBm. This specifies the - * threshold for average signal strength of candidate station to establish - * a peer link. - * - * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * threshold for average signal strength of candidate station to establish + * a peer link. * * @NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR: maximum number of neighbors - * to synchronize to for 11s default synchronization method (see 11C.12.2.2) + * to synchronize to for 11s default synchronization method + * (see 11C.12.2.2) * * @NL80211_MESHCONF_HT_OPMODE: set mesh HT protection mode. * + * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2203,34 +2205,36 @@ enum nl80211_meshconf_params { * @__NL80211_MESH_SETUP_INVALID: Internal use * * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a - * vendor specific path selection algorithm or disable it to use the default - * HWMP. + * vendor specific path selection algorithm or disable it to use the + * default HWMP. * * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a - * vendor specific path metric or disable it to use the default Airtime - * metric. + * vendor specific path metric or disable it to use the default Airtime + * metric. * * @NL80211_MESH_SETUP_IE: Information elements for this mesh, for instance, a - * robust security network ie, or a vendor specific information element that - * vendors will use to identify the path selection methods and metrics in use. + * robust security network ie, or a vendor specific information element + * that vendors will use to identify the path selection methods and + * metrics in use. * * @NL80211_MESH_SETUP_USERSPACE_AUTH: Enable this option if an authentication - * daemon will be authenticating mesh candidates. + * daemon will be authenticating mesh candidates. * * @NL80211_MESH_SETUP_USERSPACE_AMPE: Enable this option if an authentication - * daemon will be securing peer link frames. AMPE is a secured version of Mesh - * Peering Management (MPM) and is implemented with the assistance of a - * userspace daemon. When this flag is set, the kernel will send peer - * management frames to a userspace daemon that will implement AMPE - * functionality (security capabilities selection, key confirmation, and key - * management). When the flag is unset (default), the kernel can autonomously - * complete (unsecured) mesh peering without the need of a userspace daemon. - * - * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number + * daemon will be securing peer link frames. AMPE is a secured version of + * Mesh Peering Management (MPM) and is implemented with the assistance of + * a userspace daemon. When this flag is set, the kernel will send peer + * management frames to a userspace daemon that will implement AMPE + * functionality (security capabilities selection, key confirmation, and + * key management). When the flag is unset (default), the kernel can + * autonomously complete (unsecured) mesh peering without the need of a + * userspace daemon. * * @NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC: Enable this option to use a - * vendor specific synchronization method or disable it to use the default - * neighbor offset synchronization + * vendor specific synchronization method or disable it to use the default + * neighbor offset synchronization + * + * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number * * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a129ee2f548..778e533a973 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -848,21 +848,21 @@ struct mesh_config { u16 dot11MeshConfirmTimeout; u16 dot11MeshHoldingTimeout; u16 dot11MeshMaxPeerLinks; - u8 dot11MeshMaxRetries; - u8 dot11MeshTTL; - u8 element_ttl; + u8 dot11MeshMaxRetries; + u8 dot11MeshTTL; + u8 element_ttl; bool auto_open_plinks; u32 dot11MeshNbrOffsetMaxNeighbor; - u8 dot11MeshHWMPmaxPREQretries; + u8 dot11MeshHWMPmaxPREQretries; u32 path_refresh_time; u16 min_discovery_timeout; u32 dot11MeshHWMPactivePathTimeout; u16 dot11MeshHWMPpreqMinInterval; u16 dot11MeshHWMPperrMinInterval; u16 dot11MeshHWMPnetDiameterTraversalTime; - u8 dot11MeshHWMPRootMode; + u8 dot11MeshHWMPRootMode; u16 dot11MeshHWMPRannInterval; - bool dot11MeshGateAnnouncementProtocol; + bool dot11MeshGateAnnouncementProtocol; bool dot11MeshForwarding; s32 rssi_threshold; u16 ht_opmode; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 498c94e3442..f41f9bea242 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1571,10 +1571,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, conf->dot11MeshGateAnnouncementProtocol = nconf->dot11MeshGateAnnouncementProtocol; } - if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) { + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_RANN_INTERVAL, mask)) conf->dot11MeshHWMPRannInterval = nconf->dot11MeshHWMPRannInterval; - } if (_chg_mesh_attr(NL80211_MESHCONF_FORWARDING, mask)) conf->dot11MeshForwarding = nconf->dot11MeshForwarding; if (_chg_mesh_attr(NL80211_MESHCONF_RSSI_THRESHOLD, mask)) { diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index d4272ff43f7..c429417e132 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -468,45 +468,45 @@ IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC); IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC); IEEE80211_IF_FILE(dropped_frames_congestion, - u.mesh.mshstats.dropped_frames_congestion, DEC); + u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, - u.mesh.mshstats.dropped_frames_no_route, DEC); + u.mesh.mshstats.dropped_frames_no_route, DEC); IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_FILE(dot11MeshMaxRetries, - u.mesh.mshcfg.dot11MeshMaxRetries, DEC); + u.mesh.mshcfg.dot11MeshMaxRetries, DEC); IEEE80211_IF_FILE(dot11MeshRetryTimeout, - u.mesh.mshcfg.dot11MeshRetryTimeout, DEC); + u.mesh.mshcfg.dot11MeshRetryTimeout, DEC); IEEE80211_IF_FILE(dot11MeshConfirmTimeout, - u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC); + u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC); IEEE80211_IF_FILE(dot11MeshHoldingTimeout, - u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC); + u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC); IEEE80211_IF_FILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC); IEEE80211_IF_FILE(element_ttl, u.mesh.mshcfg.element_ttl, DEC); IEEE80211_IF_FILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC); IEEE80211_IF_FILE(dot11MeshMaxPeerLinks, - u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC); + u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC); IEEE80211_IF_FILE(dot11MeshHWMPactivePathTimeout, - u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC); + u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMPpreqMinInterval, - u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC); + u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPperrMinInterval, - u.mesh.mshcfg.dot11MeshHWMPperrMinInterval, DEC); + u.mesh.mshcfg.dot11MeshHWMPperrMinInterval, DEC); IEEE80211_IF_FILE(dot11MeshHWMPnetDiameterTraversalTime, - u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC); + u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC); IEEE80211_IF_FILE(dot11MeshHWMPmaxPREQretries, - u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC); + u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC); IEEE80211_IF_FILE(path_refresh_time, - u.mesh.mshcfg.path_refresh_time, DEC); + u.mesh.mshcfg.path_refresh_time, DEC); IEEE80211_IF_FILE(min_discovery_timeout, - u.mesh.mshcfg.min_discovery_timeout, DEC); + u.mesh.mshcfg.min_discovery_timeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMPRootMode, - u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); + u.mesh.mshcfg.dot11MeshHWMPRootMode, DEC); IEEE80211_IF_FILE(dot11MeshGateAnnouncementProtocol, - u.mesh.mshcfg.dot11MeshGateAnnouncementProtocol, DEC); + u.mesh.mshcfg.dot11MeshGateAnnouncementProtocol, DEC); IEEE80211_IF_FILE(dot11MeshHWMPRannInterval, - u.mesh.mshcfg.dot11MeshHWMPRannInterval, DEC); + u.mesh.mshcfg.dot11MeshHWMPRannInterval, DEC); IEEE80211_IF_FILE(dot11MeshForwarding, u.mesh.mshcfg.dot11MeshForwarding, DEC); IEEE80211_IF_FILE(rssi_threshold, u.mesh.mshcfg.rssi_threshold, DEC); IEEE80211_IF_FILE(ht_opmode, u.mesh.mshcfg.ht_opmode, DEC); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ae54b82291..dd94ee5fb40 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -115,7 +115,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 }, [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, - .len = IEEE80211_MAX_MESH_ID_LEN }, + .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, @@ -3492,7 +3492,6 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 }, [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 }, [NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] = { .type = NLA_U32 }, - [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, [NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 }, [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 }, @@ -3504,8 +3503,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_RANN_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = { .type = NLA_U8 }, [NL80211_MESHCONF_FORWARDING] = { .type = NLA_U8 }, - [NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32}, - [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16}, + [NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32 }, + [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3515,7 +3514,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, - .len = IEEE80211_MAX_DATA_LEN }, + .len = IEEE80211_MAX_DATA_LEN }, [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, }; @@ -3548,63 +3547,71 @@ do {\ /* Fill in the params struct */ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, - mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); + mask, NL80211_MESHCONF_RETRY_TIMEOUT, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, - mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); + mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, - mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); + mask, NL80211_MESHCONF_HOLDING_TIMEOUT, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, - mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); + mask, NL80211_MESHCONF_MAX_PEER_LINKS, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, - mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); + mask, NL80211_MESHCONF_MAX_RETRIES, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, - mask, NL80211_MESHCONF_TTL, nla_get_u8); + mask, NL80211_MESHCONF_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, - mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); + mask, NL80211_MESHCONF_ELEMENT_TTL, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, - mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, - mask, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, - nla_get_u32); + mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, + nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask, + NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, - mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, - nla_get_u8); + mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, - mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); + mask, NL80211_MESHCONF_PATH_REFRESH_TIME, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, - mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, - nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, - mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, - nla_get_u32); + mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask, + NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, - mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, - nla_get_u16); + mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, - mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, - nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPnetDiameterTraversalTime, - mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, - nla_get_u16); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPRootMode, mask, - NL80211_MESHCONF_HWMP_ROOTMODE, - nla_get_u8); + mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshHWMPRannInterval, mask, - NL80211_MESHCONF_HWMP_RANN_INTERVAL, - nla_get_u16); + dot11MeshHWMPnetDiameterTraversalTime, mask, + NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask, + NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask, + NL80211_MESHCONF_HWMP_RANN_INTERVAL, + nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, - dot11MeshGateAnnouncementProtocol, mask, - NL80211_MESHCONF_GATE_ANNOUNCEMENTS, - nla_get_u8); + dot11MeshGateAnnouncementProtocol, mask, + NL80211_MESHCONF_GATE_ANNOUNCEMENTS, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, - mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); + mask, NL80211_MESHCONF_FORWARDING, + nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, - mask, NL80211_MESHCONF_RSSI_THRESHOLD, nla_get_u32); + mask, NL80211_MESHCONF_RSSI_THRESHOLD, + nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, - mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); + mask, NL80211_MESHCONF_HT_OPMODE, + nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3-70-g09d2 From 97bab73f987e2781129cd6f4b6379bf44d808cc6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 22:36:36 -0700 Subject: inet: Hide route peer accesses behind helpers. We encode the pointer(s) into an unsigned long with one state bit. The state bit is used so we can store the inetpeer tree root to use when resolving the peer later. Later the peer roots will be per-FIB table, and this change works to facilitate that. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ include/net/ip6_fib.h | 32 +++++++++++++++++++++++++++- include/net/ip6_route.h | 6 +++--- include/net/route.h | 42 +++++++++++++++++++++++++++++++++---- net/ipv4/route.c | 56 +++++++++++++++++++++++++++++-------------------- net/ipv4/xfrm4_policy.c | 10 ++++----- net/ipv6/route.c | 42 +++++++++++++++++++++---------------- net/ipv6/xfrm6_policy.c | 10 ++++----- 8 files changed, 193 insertions(+), 59 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index b84b32fd5df..d432489e710 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -71,6 +71,60 @@ struct inet_peer_base { int total; }; +#define INETPEER_BASE_BIT 0x1UL + +static inline struct inet_peer *inetpeer_ptr(unsigned long val) +{ + BUG_ON(val & INETPEER_BASE_BIT); + return (struct inet_peer *) val; +} + +static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val) +{ + if (!(val & INETPEER_BASE_BIT)) + return NULL; + val &= ~INETPEER_BASE_BIT; + return (struct inet_peer_base *) val; +} + +static inline bool inetpeer_ptr_is_peer(unsigned long val) +{ + return !(val & INETPEER_BASE_BIT); +} + +static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer) +{ + /* This implicitly clears INETPEER_BASE_BIT */ + *val = (unsigned long) peer; +} + +static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer) +{ + unsigned long val = (unsigned long) peer; + unsigned long orig = *ptr; + + if (!(orig & INETPEER_BASE_BIT) || !val || + cmpxchg(ptr, orig, val) != orig) + return false; + return true; +} + +static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base) +{ + *ptr = (unsigned long) base | INETPEER_BASE_BIT; +} + +static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from) +{ + unsigned long val = *from; + + *to = val; + if (inetpeer_ptr_is_peer(val)) { + struct inet_peer *peer = inetpeer_ptr(val); + atomic_inc(&peer->refcnt); + } +} + extern void inet_peer_base_init(struct inet_peer_base *); void inet_initpeers(void) __init; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0ae759a6c76..3ac5f155c69 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -107,7 +107,7 @@ struct rt6_info { u32 rt6i_peer_genid; struct inet6_dev *rt6i_idev; - struct inet_peer *rt6i_peer; + unsigned long _rt6i_peer; #ifdef CONFIG_XFRM u32 rt6i_flow_cache_genid; @@ -118,6 +118,36 @@ struct rt6_info { u8 rt6i_protocol; }; +static inline struct inet_peer *rt6_peer_ptr(struct rt6_info *rt) +{ + return inetpeer_ptr(rt->_rt6i_peer); +} + +static inline bool rt6_has_peer(struct rt6_info *rt) +{ + return inetpeer_ptr_is_peer(rt->_rt6i_peer); +} + +static inline void __rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) +{ + __inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); +} + +static inline bool rt6_set_peer(struct rt6_info *rt, struct inet_peer *peer) +{ + return inetpeer_ptr_set_peer(&rt->_rt6i_peer, peer); +} + +static inline void rt6_init_peer(struct rt6_info *rt, struct inet_peer_base *base) +{ + inetpeer_init_ptr(&rt->_rt6i_peer, base); +} + +static inline void rt6_transfer_peer(struct rt6_info *rt, struct rt6_info *ort) +{ + inetpeer_transfer_peer(&rt->_rt6i_peer, &ort->_rt6i_peer); +} + static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 73d75028812..f88a85cf31c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -57,11 +57,11 @@ extern void rt6_bind_peer(struct rt6_info *rt, int create); static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) { - if (rt->rt6i_peer) - return rt->rt6i_peer; + if (rt6_has_peer(rt)) + return rt6_peer_ptr(rt); rt6_bind_peer(rt, create); - return rt->rt6i_peer; + return rt6_peer_ptr(rt); } static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) diff --git a/include/net/route.h b/include/net/route.h index 433fc6c1d40..6340c37677f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -67,10 +67,44 @@ struct rtable { /* Miscellaneous cached information */ __be32 rt_spec_dst; /* RFC1122 specific destination */ u32 rt_peer_genid; - struct inet_peer *peer; /* long-living peer info */ + unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ }; +static inline struct inet_peer *rt_peer_ptr(struct rtable *rt) +{ + return inetpeer_ptr(rt->_peer); +} + +static inline bool rt_has_peer(struct rtable *rt) +{ + return inetpeer_ptr_is_peer(rt->_peer); +} + +static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer) +{ + __inetpeer_ptr_set_peer(&rt->_peer, peer); +} + +static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer) +{ + return inetpeer_ptr_set_peer(&rt->_peer, peer); +} + +static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base) +{ + inetpeer_init_ptr(&rt->_peer, base); +} + +static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort) +{ + rt->_peer = ort->_peer; + if (rt_has_peer(ort)) { + struct inet_peer *peer = rt_peer_ptr(ort); + atomic_inc(&peer->refcnt); + } +} + static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_route_iif != 0; @@ -298,11 +332,11 @@ extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) { - if (rt->peer) - return rt->peer; + if (rt_has_peer(rt)) + return rt_peer_ptr(rt); rt_bind_peer(rt, daddr, create); - return rt->peer; + return rt_peer_ptr(rt); } static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2aa663a6ae9..03e5b614370 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -677,7 +677,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - (rth->peer && rth->peer->pmtu_expires); + (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -1325,12 +1325,16 @@ static u32 rt_peer_genid(void) void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { - struct net *net = dev_net(rt->dst.dev); + struct inet_peer_base *base; struct inet_peer *peer; - peer = inet_getpeer_v4(net->ipv4.peers, daddr, create); + base = inetpeer_base_ptr(rt->_peer); + if (!base) + return; + + peer = inet_getpeer_v4(base, daddr, create); - if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) + if (!rt_set_peer(rt, peer)) inet_putpeer(peer); else rt->rt_peer_genid = rt_peer_genid(); @@ -1533,8 +1537,10 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt->peer && peer_pmtu_expired(rt->peer)) { - dst_metric_set(dst, RTAX_MTU, rt->peer->pmtu_orig); + } else if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_expired(peer)) + dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); } } return ret; @@ -1796,14 +1802,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_dst_destroy(struct dst_entry *dst) { struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer = rt->peer; if (rt->fi) { fib_info_put(rt->fi); rt->fi = NULL; } - if (peer) { - rt->peer = NULL; + if (rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); inet_putpeer(peer); } } @@ -1816,8 +1821,11 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && rt->peer && peer_pmtu_cleaned(rt->peer)) - dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); + if (rt && rt_has_peer(rt)) { + struct inet_peer *peer = rt_peer_ptr(rt); + if (peer_pmtu_cleaned(peer)) + dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); + } } static int ip_rt_bug(struct sk_buff *skb) @@ -1919,7 +1927,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { - struct net *net = dev_net(rt->dst.dev); + struct inet_peer_base *base; struct inet_peer *peer; int create = 0; @@ -1929,8 +1937,12 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; - rt->peer = peer = inet_getpeer_v4(net->ipv4.peers, rt->rt_dst, create); + base = inetpeer_base_ptr(rt->_peer); + BUG_ON(!base); + + peer = inet_getpeer_v4(base, rt->rt_dst, create); if (peer) { + __rt_set_peer(rt, peer); rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, @@ -2046,7 +2058,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -2174,7 +2186,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(rth->dst.dev)->ipv4.peers); rth->fi = NULL; rth->dst.input = ip_forward; @@ -2357,7 +2369,7 @@ local_input: rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -2561,7 +2573,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; - rth->peer = NULL; + rt_init_peer(rth, dev_net(dev_out)->ipv4.peers); rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2898,9 +2910,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; rt->rt_spec_dst = ort->rt_spec_dst; - rt->peer = ort->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) atomic_inc(&rt->fi->fib_clntref); @@ -2938,7 +2948,6 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - const struct inet_peer *peer = rt->peer; u32 id = 0, ts = 0, tsage = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); @@ -2994,8 +3003,9 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; error = rt->dst.error; - if (peer) { - inet_peer_refcheck(rt->peer); + if (rt_has_peer(rt)) { + const struct inet_peer *peer = rt_peer_ptr(rt); + inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; if (peer->tcp_ts_stamp) { ts = peer->tcp_ts; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0d3426cb5c4..8855d826855 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -90,9 +90,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - xdst->u.rt.peer = rt->peer; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); + rt_transfer_peer(&xdst->u.rt, rt); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ @@ -212,8 +210,10 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) dst_destroy_metrics_generic(dst); - if (likely(xdst->u.rt.peer)) - inet_putpeer(xdst->u.rt.peer); + if (rt_has_peer(&xdst->u.rt)) { + struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); + inet_putpeer(peer); + } xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8fc41d502bb..17a9b8687f2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -258,16 +258,18 @@ static struct rt6_info ip6_blk_hole_entry_template = { #endif /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, +static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags) { - struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); + struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, + 0, 0, flags); - if (rt) + if (rt) { memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); - + rt6_init_peer(rt, net->ipv6.peers); + } return rt; } @@ -275,7 +277,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - struct inet_peer *peer = rt->rt6i_peer; if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -288,8 +289,8 @@ static void ip6_dst_destroy(struct dst_entry *dst) if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) dst_release(dst->from); - if (peer) { - rt->rt6i_peer = NULL; + if (rt6_has_peer(rt)) { + struct inet_peer *peer = rt6_peer_ptr(rt); inet_putpeer(peer); } } @@ -303,11 +304,15 @@ static u32 rt6_peer_genid(void) void rt6_bind_peer(struct rt6_info *rt, int create) { - struct net *net = dev_net(rt->dst.dev); + struct inet_peer_base *base; struct inet_peer *peer; - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, create); - if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (!rt6_set_peer(rt, peer)) inet_putpeer(peer); else rt->rt6i_peer_genid = rt6_peer_genid(); @@ -950,6 +955,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); if (rt) { memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); + rt6_init_peer(rt, net->ipv6.peers); new = &rt->dst; @@ -994,7 +1000,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { - if (!rt->rt6i_peer) + if (!rt6_has_peer(rt)) rt6_bind_peer(rt, 0); rt->rt6i_peer_genid = rt6_peer_genid(); } @@ -1108,7 +1114,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(!idev)) return ERR_PTR(-ENODEV); - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); + rt = ip6_dst_alloc(net, dev, 0); if (unlikely(!rt)) { in6_dev_put(idev); dst = ERR_PTR(-ENOMEM); @@ -1290,7 +1296,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); + rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT); if (!rt) { err = -ENOMEM; @@ -1812,8 +1818,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - ort->dst.dev, 0); + struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0); if (rt) { rt->dst.input = ort->dst.input; @@ -2097,8 +2102,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, - net->loopback_dev, 0); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0); int err; if (!rt) { @@ -2519,7 +2523,9 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - peer = rt->rt6i_peer; + peer = NULL; + if (rt6_has_peer(rt)) + peer = rt6_peer_ptr(rt); ts = tsage = 0; if (peer && peer->tcp_ts_stamp) { ts = peer->tcp_ts; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8625fba96db..d7494845efb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -99,9 +99,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, if (!xdst->u.rt6.rt6i_idev) return -ENODEV; - xdst->u.rt6.rt6i_peer = rt->rt6i_peer; - if (rt->rt6i_peer) - atomic_inc(&rt->rt6i_peer->refcnt); + rt6_transfer_peer(&xdst->u.rt6, rt); /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ @@ -223,8 +221,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (likely(xdst->u.rt6.rt6i_peer)) - inet_putpeer(xdst->u.rt6.rt6i_peer); + if (rt6_has_peer(&xdst->u.rt6)) { + struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); + inet_putpeer(peer); + } xfrm_dst_destroy(xdst); } -- cgit v1.2.3-70-g09d2 From 46517008e1168dc926cf2c47d529efc07eca85c0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2012 00:04:12 -0700 Subject: ipv4: Kill ip_rt_frag_needed(). There is zero point to this function. It's only real substance is to perform an extremely outdated BSD4.2 ICMP check, which we can safely remove. If you really have a MTU limited link being routed by a BSD4.2 derived system, here's a nickel go buy yourself a real router. The other actions of ip_rt_frag_needed(), checking and conditionally updating the peer, are done by the per-protocol handlers of the ICMP event. TCP, UDP, et al. have a handler which will receive this event and transmit it back into the associated route via dst_ops->update_pmtu(). This simplification is important, because it eliminates the one place where we do not have a proper route context in which to make an inetpeer lookup. Signed-off-by: David S. Miller --- include/net/route.h | 2 -- net/ipv4/icmp.c | 4 +--- net/ipv4/route.c | 61 ---------------------------------------------------- net/rxrpc/ar-error.c | 4 ---- 4 files changed, 1 insertion(+), 70 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 6340c37677f..cc693a5bb20 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -215,8 +215,6 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s return ip_route_input_common(skb, dst, src, tos, devin, true); } -extern unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, - unsigned short new_mtu, struct net_device *dev); extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0c78ef1e5dd..e1caa1abe5d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -673,9 +673,7 @@ static void icmp_unreach(struct sk_buff *skb) LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); } else { - info = ip_rt_frag_needed(net, iph, - ntohs(icmph->un.frag.mtu), - skb->dev); + info = ntohs(icmph->un.frag.mtu); if (!info) goto out; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 03e5b614370..4f5834c4a66 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1664,67 +1664,6 @@ out: kfree_skb(skb); return 0; } -/* - * The last two values are not from the RFC but - * are needed for AMPRnet AX.25 paths. - */ - -static const unsigned short mtu_plateau[] = -{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 }; - -static inline unsigned short guess_mtu(unsigned short old_mtu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++) - if (old_mtu > mtu_plateau[i]) - return mtu_plateau[i]; - return 68; -} - -unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph, - unsigned short new_mtu, - struct net_device *dev) -{ - unsigned short old_mtu = ntohs(iph->tot_len); - unsigned short est_mtu = 0; - struct inet_peer *peer; - - peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); - if (peer) { - unsigned short mtu = new_mtu; - - if (new_mtu < 68 || new_mtu >= old_mtu) { - /* BSD 4.2 derived systems incorrectly adjust - * tot_len by the IP header length, and report - * a zero MTU in the ICMP message. - */ - if (mtu == 0 && - old_mtu >= 68 + (iph->ihl << 2)) - old_mtu -= iph->ihl << 2; - mtu = guess_mtu(old_mtu); - } - - if (mtu < ip_rt_min_pmtu) - mtu = ip_rt_min_pmtu; - if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { - unsigned long pmtu_expires; - - pmtu_expires = jiffies + ip_rt_mtu_expires; - if (!pmtu_expires) - pmtu_expires = 1UL; - - est_mtu = mtu; - peer->pmtu_learned = mtu; - peer->pmtu_expires = pmtu_expires; - atomic_inc(&__rt_peer_genid); - } - - inet_putpeer(peer); - } - return est_mtu ? : new_mtu; -} - static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 5d6b572a670..a9206087b4d 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -81,10 +81,6 @@ void rxrpc_UDP_error_report(struct sock *sk) _net("I/F MTU %u", mtu); } - /* ip_rt_frag_needed() may have eaten the info */ - if (mtu == 0) - mtu = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (mtu == 0) { /* they didn't give us a size, estimate one */ if (mtu > 1500) { -- cgit v1.2.3-70-g09d2 From b48c80ece973e9eddb042f6685b482b261ff0d47 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2012 00:24:21 -0700 Subject: inet: Add family scope inetpeer flushes. This implementation can deal with having many inetpeer roots, which is a necessary prerequisite for per-FIB table rooted peer tables. Each family (AF_INET, AF_INET6) has a sequence number which we bump when we get a family invalidation request. Each peer lookup cheaply checks whether the flush sequence of the root we are using is out of date, and if so flushes it and updates the sequence number. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 2 ++ net/ipv4/inetpeer.c | 28 ++++++++++++++++++++++++++++ net/ipv4/route.c | 2 +- 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index d432489e710..e15c0862a68 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -68,6 +68,7 @@ struct inet_peer { struct inet_peer_base { struct inet_peer __rcu *root; seqlock_t lock; + u32 flush_seq; int total; }; @@ -168,6 +169,7 @@ extern void inet_putpeer(struct inet_peer *p); extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); extern void inetpeer_invalidate_tree(struct inet_peer_base *); +extern void inetpeer_invalidate_family(int family); /* * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e4cba56a534..cac02ad1425 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -86,10 +86,36 @@ void inet_peer_base_init(struct inet_peer_base *bp) { bp->root = peer_avl_empty_rcu; seqlock_init(&bp->lock); + bp->flush_seq = ~0U; bp->total = 0; } EXPORT_SYMBOL_GPL(inet_peer_base_init); +static atomic_t v4_seq = ATOMIC_INIT(0); +static atomic_t v6_seq = ATOMIC_INIT(0); + +static atomic_t *inetpeer_seq_ptr(int family) +{ + return (family == AF_INET ? &v4_seq : &v6_seq); +} + +static inline void flush_check(struct inet_peer_base *base, int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + if (unlikely(base->flush_seq != atomic_read(fp))) { + inetpeer_invalidate_tree(base); + base->flush_seq = atomic_read(fp); + } +} + +void inetpeer_invalidate_family(int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + atomic_inc(fp); +} + #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -437,6 +463,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, unsigned int sequence; int invalidated, gccnt = 0; + flush_check(base, daddr->family); + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4f5834c4a66..456a9470fb5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -935,7 +935,7 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_tree(net->ipv4.peers); + inetpeer_invalidate_family(AF_INET); } /* -- cgit v1.2.3-70-g09d2 From 8e77327783c753689a1a766ab9d301b81c2529f1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Jun 2012 00:01:52 -0700 Subject: inet: Add inetpeer tree roots to the FIB tables. Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 + include/net/ip_fib.h | 12 +++++++----- net/ipv4/fib_trie.c | 3 +++ net/ipv6/ip6_fib.c | 5 +++++ 4 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 3ac5f155c69..a192f780765 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -237,6 +237,7 @@ struct fib6_table { u32 tb6_id; rwlock_t tb6_lock; struct fib6_node tb6_root; + struct inet_peer_base tb6_peers; }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 78df0866cc3..4b347c0ca09 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -157,11 +158,12 @@ extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); FIB_RES_SADDR(net, res)) struct fib_table { - struct hlist_node tb_hlist; - u32 tb_id; - int tb_default; - int tb_num_default; - unsigned long tb_data[0]; + struct hlist_node tb_hlist; + u32 tb_id; + int tb_default; + int tb_num_default; + struct inet_peer_base tb_peers; + unsigned long tb_data[0]; }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 18cbc15b20d..9b0f25930fb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1843,6 +1843,8 @@ int fib_table_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll); + inetpeer_invalidate_tree(&tb->tb_peers); + pr_debug("trie_flush found=%d\n", found); return found; } @@ -1991,6 +1993,7 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; + inet_peer_base_init(&tb->tb_peers); t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0c220a41662..7ef0743f06f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -197,6 +197,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) table->tb6_id = id; table->tb6_root.leaf = net->ipv6.ip6_null_entry; table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&table->tb6_peers); } return table; @@ -1633,6 +1634,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl), @@ -1643,6 +1645,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); #endif fib6_tables_init(net); @@ -1666,8 +1669,10 @@ static void fib6_net_exit(struct net *net) del_timer_sync(&net->ipv6.ip6_fib_timer); #ifdef CONFIG_IPV6_MULTIPLE_TABLES + inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); kfree(net->ipv6.fib6_local_tbl); #endif + inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); kfree(net->ipv6.fib6_main_tbl); kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); -- cgit v1.2.3-70-g09d2 From 7b34ca2ac7063f4ebf07f85fd75253ed84d5c648 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Jun 2012 04:13:57 -0700 Subject: inet: Avoid potential NULL peer dereference. We handle NULL in rt{,6}_set_peer but then our caller will try to pass that NULL pointer into inet_putpeer() which isn't ready for it. Fix this by moving the NULL check one level up, and then remove the now unnecessary NULL check from inetpeer_ptr_set_peer(). Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 2 +- net/ipv4/route.c | 11 ++++++----- net/ipv6/route.c | 10 ++++++---- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index e15c0862a68..c27c8f10ebd 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -104,7 +104,7 @@ static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *p unsigned long val = (unsigned long) peer; unsigned long orig = *ptr; - if (!(orig & INETPEER_BASE_BIT) || !val || + if (!(orig & INETPEER_BASE_BIT) || cmpxchg(ptr, orig, val) != orig) return false; return true; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4c33ce3000e..842510d5045 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1333,11 +1333,12 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) return; peer = inet_getpeer_v4(base, daddr, create); - - if (!rt_set_peer(rt, peer)) - inet_putpeer(peer); - else - rt->rt_peer_genid = rt_peer_genid(); + if (peer) { + if (!rt_set_peer(rt, peer)) + inet_putpeer(peer); + else + rt->rt_peer_genid = rt_peer_genid(); + } } /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9ba4808f26..58a3ec23da2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -313,10 +313,12 @@ void rt6_bind_peer(struct rt6_info *rt, int create) return; peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - else - rt->rt6i_peer_genid = rt6_peer_genid(); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + else + rt->rt6i_peer_genid = rt6_peer_genid(); + } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, -- cgit v1.2.3-70-g09d2 From 55afabaa0df0dd139c8796a71beb43d1216fbe43 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 11 Jun 2012 15:52:29 -0700 Subject: inet: Fix BUG triggered by __rt{,6}_get_peer(). If no peer actually gets attached (either because create is zero or the peer allocation fails) we'll trigger a BUG because we unconditionally do an rt{,6}_peer_ptr() afterwards. Fix this by guarding it with the proper check. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- include/net/route.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f88a85cf31c..a2cda240ca9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -61,7 +61,7 @@ static inline struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) return rt6_peer_ptr(rt); rt6_bind_peer(rt, create); - return rt6_peer_ptr(rt); + return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); } static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) diff --git a/include/net/route.h b/include/net/route.h index cc693a5bb20..2bfbc9329ea 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -334,7 +334,7 @@ static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, i return rt_peer_ptr(rt); rt_bind_peer(rt, daddr, create); - return rt_peer_ptr(rt); + return (rt_has_peer(rt) ? rt_peer_ptr(rt) : NULL); } static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) -- cgit v1.2.3-70-g09d2 From 5f246e890502fed387e0f959e2224ea680c03423 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Mon, 11 Jun 2012 11:13:07 +0300 Subject: Bluetooth: Update HCI timeouts constants to use msecs_to_jiffies The HCI constants are always used in form of jiffies. So just include the conversion from msecs in the define itself. This has the advantage of making the code where the timeout is used more readable and avoiding unnecessary conversions. The patch is similar to commit ba13ccd9 doing the same job for L2CAP Reported-by: Marcel Holtmann Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 10 +++++----- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_core.c | 25 +++++++++++-------------- 3 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 2a6b0b8b712..7dcd3495edd 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -139,11 +139,11 @@ enum { #define HCIINQUIRY _IOR('H', 240, int) /* HCI timeouts */ -#define HCI_DISCONN_TIMEOUT (2000) /* 2 seconds */ -#define HCI_PAIRING_TIMEOUT (60000) /* 60 seconds */ -#define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ -#define HCI_CMD_TIMEOUT (1000) /* 1 seconds */ -#define HCI_ACL_TX_TIMEOUT (45000) /* 45 seconds */ +#define HCI_DISCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_PAIRING_TIMEOUT msecs_to_jiffies(60000) /* 60 seconds */ +#define HCI_INIT_TIMEOUT msecs_to_jiffies(10000) /* 10 seconds */ +#define HCI_CMD_TIMEOUT msecs_to_jiffies(1000) /* 1 seconds */ +#define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 20fd57367dd..75766b7f0dc 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -598,7 +598,7 @@ static inline void hci_conn_put(struct hci_conn *conn) if (conn->type == ACL_LINK || conn->type == LE_LINK) { del_timer(&conn->idle_timer); if (conn->state == BT_CONNECTED) { - timeo = msecs_to_jiffies(conn->disc_timeout); + timeo = conn->disc_timeout; if (!conn->out) timeo *= 2; } else { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 471e4fb1b6e..e91bf7e1566 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -690,12 +690,11 @@ int hci_dev_open(__u16 dev) set_bit(HCI_INIT, &hdev->flags); hdev->init_last_cmd = 0; - ret = __hci_request(hdev, hci_init_req, 0, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT); if (lmp_host_le_capable(hdev)) ret = __hci_request(hdev, hci_le_init_req, 0, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); clear_bit(HCI_INIT, &hdev->flags); } @@ -782,8 +781,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) if (!test_bit(HCI_RAW, &hdev->flags) && test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { set_bit(HCI_INIT, &hdev->flags); - __hci_request(hdev, hci_reset_req, 0, - msecs_to_jiffies(HCI_CMD_TIMEOUT)); + __hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); clear_bit(HCI_INIT, &hdev->flags); } @@ -872,8 +870,7 @@ int hci_dev_reset(__u16 dev) hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; if (!test_bit(HCI_RAW, &hdev->flags)) - ret = __hci_request(hdev, hci_reset_req, 0, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); done: hci_req_unlock(hdev); @@ -913,7 +910,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: err = hci_request(hdev, hci_auth_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETENCRYPT: @@ -925,23 +922,23 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ err = hci_request(hdev, hci_auth_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); if (err) break; } err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETSCAN: err = hci_request(hdev, hci_scan_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETLINKPOL: err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, - msecs_to_jiffies(HCI_INIT_TIMEOUT)); + HCI_INIT_TIMEOUT); break; case HCISETLINKMODE: @@ -2455,7 +2452,7 @@ static void __check_timeout(struct hci_dev *hdev, unsigned int cnt) /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!cnt && time_after(jiffies, hdev->acl_last_tx + - msecs_to_jiffies(HCI_ACL_TX_TIMEOUT))) + HCI_ACL_TX_TIMEOUT)) hci_link_tx_to(hdev, ACL_LINK); } } @@ -2839,7 +2836,7 @@ static void hci_cmd_work(struct work_struct *work) del_timer(&hdev->cmd_timer); else mod_timer(&hdev->cmd_timer, - jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); + jiffies + HCI_CMD_TIMEOUT); } else { skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); -- cgit v1.2.3-70-g09d2 From af7985bf85840e3dc90ba108a679db044f91f00e Mon Sep 17 00:00:00 2001 From: Jefferson Delfes Date: Mon, 11 Jun 2012 09:18:51 -0400 Subject: Bluetooth: Fix flags of mgmt_device_found event Change flags field to matches userspace structure. This field needs to be converted to little endian before forward it. Signed-off-by: Jefferson Delfes Signed-off-by: Gustavo Padovan --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 23fd0546fcc..4348ee8bda6 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -444,7 +444,7 @@ struct mgmt_ev_auth_failed { struct mgmt_ev_device_found { struct mgmt_addr_info addr; __s8 rssi; - __u8 flags[4]; + __le32 flags; __le16 eir_len; __u8 eir[0]; } __packed; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c72307cc25f..b4816632d72 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3546,9 +3546,9 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, ev->addr.type = link_to_bdaddr(link_type, addr_type); ev->rssi = rssi; if (cfm_name) - ev->flags[0] |= MGMT_DEV_FOUND_CONFIRM_NAME; + ev->flags |= MGMT_DEV_FOUND_CONFIRM_NAME; if (!ssp) - ev->flags[0] |= MGMT_DEV_FOUND_LEGACY_PAIRING; + ev->flags |= MGMT_DEV_FOUND_LEGACY_PAIRING; if (eir_len > 0) memcpy(ev->eir, eir, eir_len); @@ -3558,6 +3558,7 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, dev_class, 3); ev->eir_len = cpu_to_le16(eir_len); + ev->flags = cpu_to_le32(ev->flags); ev_size = sizeof(*ev) + eir_len; -- cgit v1.2.3-70-g09d2 From 73c3df3ba3f2d7fe3ea47f944282f3cda31c5505 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Jun 2012 11:17:14 +0200 Subject: cfg80211/nl80211: fix kernel-doc Add missing entries to nl80211.h and fix the kernel-doc notation in cfg80211.h. Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 33 ++++++++++++++++++++++++++++----- include/net/cfg80211.h | 8 ++++---- 2 files changed, 32 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e7b1fc1fe26..e4f41bdebc0 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -277,6 +277,12 @@ * @NL80211_CMD_NEW_SURVEY_RESULTS: survey data notification (as a reply to * NL80211_CMD_GET_SURVEY and on the "scan" multicast group) * + * @NL80211_CMD_SET_PMKSA: Add a PMKSA cache entry, using %NL80211_ATTR_MAC + * (for the BSSID) and %NL80211_ATTR_PMKID. + * @NL80211_CMD_DEL_PMKSA: Delete a PMKSA cache entry, using %NL80211_ATTR_MAC + * (for the BSSID) and %NL80211_ATTR_PMKID. + * @NL80211_CMD_FLUSH_PMKSA: Flush all PMKSA cache entries. + * * @NL80211_CMD_REG_CHANGE: indicates to userspace the regulatory domain * has been changed and provides details of the request information * that caused the change such as who initiated the regulatory request @@ -456,6 +462,10 @@ * the frame. * @NL80211_CMD_ACTION_TX_STATUS: Alias for @NL80211_CMD_FRAME_TX_STATUS for * backward compatibility. + * + * @NL80211_CMD_SET_POWER_SAVE: Set powersave, using %NL80211_ATTR_PS_STATE + * @NL80211_CMD_GET_POWER_SAVE: Get powersave status in %NL80211_ATTR_PS_STATE + * * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command * is used to configure connection quality monitoring notification trigger * levels. @@ -771,6 +781,13 @@ enum nl80211_commands { * section 7.3.2.25.1, e.g. 0x000FAC04) * @NL80211_ATTR_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and * CCMP keys, each six bytes in little endian + * @NL80211_ATTR_KEY_DEFAULT: Flag attribute indicating the key is default key + * @NL80211_ATTR_KEY_DEFAULT_MGMT: Flag attribute indicating the key is the + * default management key + * @NL80211_ATTR_CIPHER_SUITES_PAIRWISE: For crypto settings for connect or + * other commands, indicates which pairwise cipher suites are used + * @NL80211_ATTR_CIPHER_SUITE_GROUP: For crypto settings for connect or + * other commands, indicates which group cipher suite is used * * @NL80211_ATTR_BEACON_INTERVAL: beacon interval in TU * @NL80211_ATTR_DTIM_PERIOD: DTIM period for beaconing @@ -1006,6 +1023,8 @@ enum nl80211_commands { * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. * + * @NL80211_ATTR_PS_STATE: powersave state, using &enum nl80211_ps_state values. + * * @NL80211_ATTR_CQM: connection quality monitor configuration in a * nested attribute with %NL80211_ATTR_CQM_* sub-attributes. * @@ -1063,7 +1082,7 @@ enum nl80211_commands { * flag isn't set, the frame will be rejected. This is also used as an * nl80211 capability flag. * - * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16) + * @NL80211_ATTR_BSS_HT_OPMODE: HT operation mode (u16) * * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags * attributes, specifying what a key should be set as default as. @@ -1087,10 +1106,10 @@ enum nl80211_commands { * indicate which WoW triggers should be enabled. This is also * used by %NL80211_CMD_GET_WOWLAN to get the currently enabled WoWLAN * triggers. - + * * @NL80211_ATTR_SCHED_SCAN_INTERVAL: Interval between scheduled scan * cycles, in msecs. - + * * @NL80211_ATTR_SCHED_SCAN_MATCH: Nested attribute with one or more * sets of attributes to match during scheduled scans. Only BSSs * that match any of the sets will be reported. These are @@ -1117,7 +1136,7 @@ enum nl80211_commands { * are managed in software: interfaces of these types aren't subject to * any restrictions in their number or combinations. * - * @%NL80211_ATTR_REKEY_DATA: nested attribute containing the information + * @NL80211_ATTR_REKEY_DATA: nested attribute containing the information * necessary for GTK rekeying in the device, see &enum nl80211_rekey_data. * * @NL80211_ATTR_SCAN_SUPP_RATES: rates per to be advertised as supported in scan, @@ -1184,7 +1203,6 @@ enum nl80211_commands { * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from * &enum nl80211_feature_flags and is advertised in wiphy information. * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe - * * requests while operating in AP-mode. * This attribute holds a bitmap of the supported protocols for * offloading (see &enum nl80211_probe_resp_offload_support_attr). @@ -2507,6 +2525,11 @@ enum nl80211_band { NL80211_BAND_5GHZ, }; +/** + * enum nl80211_ps_state - powersave state + * @NL80211_PS_DISABLED: powersave is disabled + * @NL80211_PS_ENABLED: powersave is enabled + */ enum nl80211_ps_state { NL80211_PS_DISABLED, NL80211_PS_ENABLED, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 778e533a973..76d54725ea3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -627,10 +627,10 @@ struct sta_bss_parameters { * @llid: mesh local link id * @plid: mesh peer link id * @plink_state: mesh peer link state - * @signal: the signal strength, type depends on the wiphy's signal_type - NOTE: For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. - * @signal_avg: avg signal strength, type depends on the wiphy's signal_type - NOTE: For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. + * @signal: The signal strength, type depends on the wiphy's signal_type. + * For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. + * @signal_avg: Average signal strength, type depends on the wiphy's signal_type. + * For CFG80211_SIGNAL_TYPE_MBM, value is expressed in _dBm_. * @txrate: current unicast bitrate from this station * @rxrate: current unicast bitrate to this station * @rx_packets: packets received from this station -- cgit v1.2.3-70-g09d2 From ac1073a61d73b6277794d2efc872eb7e1b706b5c Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Thu, 14 Jun 2012 02:06:06 +0800 Subject: {nl,cfg,mac}80211: implement dot11MeshHWMProotInterval and dot11MeshHWMPactivePathToRootTimeout Add the mesh configuration parameters dot11MeshHWMProotInterval and dot11MeshHWMPactivePathToRootTimeout to be used by proactive PREQ mechanism. Signed-off-by: Chun-Yeow Yeoh [line-break commit log] Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 9 +++++++++ include/net/cfg80211.h | 9 +++++++++ net/mac80211/cfg.c | 6 ++++++ net/mac80211/debugfs_netdev.c | 6 ++++++ net/wireless/mesh.c | 4 ++++ net/wireless/nl80211.c | 15 ++++++++++++++- 6 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e4f41bdebc0..6936fabe879 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2185,6 +2185,13 @@ enum nl80211_mntr_flags { * * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute * + * @NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT: The time (in TUs) for + * which mesh STAs receiving a proactive PREQ shall consider the forwarding + * information to the root mesh STA to be valid. + * + * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between + * proactive PREQs are transmitted. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2211,6 +2218,8 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_RSSI_THRESHOLD, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, NL80211_MESHCONF_HT_OPMODE, + NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + NL80211_MESHCONF_HWMP_ROOT_INTERVAL, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 76d54725ea3..e52b38d7b1b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -842,6 +842,13 @@ struct bss_parameters { * @rssi_threshold: the threshold for average signal strength of candidate * station to establish a peer link * @ht_opmode: mesh HT protection mode + * + * @dot11MeshHWMPactivePathToRootTimeout: The time (in TUs) for which mesh STAs + * receiving a proactive PREQ shall consider the forwarding information to + * the root mesh STA to be valid. + * + * @dot11MeshHWMProotInterval: The interval of time (in TUs) between proactive + * PREQs are transmitted. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -866,6 +873,8 @@ struct mesh_config { bool dot11MeshForwarding; s32 rssi_threshold; u16 ht_opmode; + u32 dot11MeshHWMPactivePathToRootTimeout; + u16 dot11MeshHWMProotInterval; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index cd8b1fb05d4..d93cda1c421 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1590,6 +1590,12 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, sdata->vif.bss_conf.ht_operation_mode = nconf->ht_opmode; ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); } + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, mask)) + conf->dot11MeshHWMPactivePathToRootTimeout = + nconf->dot11MeshHWMPactivePathToRootTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) + conf->dot11MeshHWMProotInterval = + nconf->dot11MeshHWMProotInterval; return 0; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c429417e132..a8cea70902e 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -510,6 +510,10 @@ IEEE80211_IF_FILE(dot11MeshHWMPRannInterval, IEEE80211_IF_FILE(dot11MeshForwarding, u.mesh.mshcfg.dot11MeshForwarding, DEC); IEEE80211_IF_FILE(rssi_threshold, u.mesh.mshcfg.rssi_threshold, DEC); IEEE80211_IF_FILE(ht_opmode, u.mesh.mshcfg.ht_opmode, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPactivePathToRootTimeout, + u.mesh.mshcfg.dot11MeshHWMPactivePathToRootTimeout, DEC); +IEEE80211_IF_FILE(dot11MeshHWMProotInterval, + u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -611,6 +615,8 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshGateAnnouncementProtocol); MESHPARAMS_ADD(rssi_threshold); MESHPARAMS_ADD(ht_opmode); + MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout); + MESHPARAMS_ADD(dot11MeshHWMProotInterval); #undef MESHPARAMS_ADD } #endif diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index b44c736bf9c..2f141cfd581 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -14,6 +14,8 @@ #define MESH_PATH_TIMEOUT 5000 #define MESH_RANN_INTERVAL 5000 +#define MESH_PATH_TO_ROOT_TIMEOUT 6000 +#define MESH_ROOT_INTERVAL 5000 /* * Minimum interval between two consecutive PREQs originated by the same @@ -62,6 +64,8 @@ const struct mesh_config default_mesh_config = { .dot11MeshForwarding = true, .rssi_threshold = MESH_RSSI_THRESHOLD, .ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED, + .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, + .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7db0aee8cd5..f8930db613d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3469,7 +3469,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_RSSI_THRESHOLD, cur_params.rssi_threshold) || nla_put_u32(msg, NL80211_MESHCONF_HT_OPMODE, - cur_params.ht_opmode)) + cur_params.ht_opmode) || + nla_put_u32(msg, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + cur_params.dot11MeshHWMPactivePathToRootTimeout) || + nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + cur_params.dot11MeshHWMProotInterval)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3505,6 +3509,8 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_FORWARDING] = { .type = NLA_U8 }, [NL80211_MESHCONF_RSSI_THRESHOLD] = { .type = NLA_U32 }, [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, + [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3612,6 +3618,13 @@ do {\ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, + mask, + NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, + mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3-70-g09d2 From 36393395536064e483b73d173f6afc103eadfbc4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 14 Jun 2012 22:21:46 -0700 Subject: ipv4: Handle PMTU in all ICMP error handlers. With ip_rt_frag_needed() removed, we have to explicitly update PMTU information in every ICMP error handler. Create two helper functions to facilitate this. 1) ipv4_sk_update_pmtu() This updates the PMTU when we have a socket context to work with. 2) ipv4_update_pmtu() Raw version, used when no socket context is available. For this interface, we essentially just pass in explicit arguments for the flow identity information we would have extracted from the socket. And you'll notice that ipv4_sk_update_pmtu() is simply implemented in terms of ipv4_update_pmtu() Note that __ip_route_output_key() is used, rather than something like ip_route_output_flow() or ip_route_output_key(). This is because we absolutely do not want to end up with a route that does IPSEC encapsulation and the like. Instead, we only want the route that would get us to the node described by the outermost IP header. Reported-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/route.h | 5 ++++- net/ipv4/ah4.c | 1 + net/ipv4/esp4.c | 1 + net/ipv4/ip_gre.c | 14 ++++++++++---- net/ipv4/ipcomp.c | 1 + net/ipv4/ipip.c | 15 +++++++++++---- net/ipv4/ping.c | 1 + net/ipv4/raw.c | 3 +++ net/ipv4/route.c | 28 ++++++++++++++++++++++++++++ net/ipv4/udp.c | 1 + net/ipv6/sit.c | 15 +++++++++++---- 11 files changed, 72 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index a36ae429ed5..47eb25ac1f7 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -215,7 +215,10 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s return ip_route_input_common(skb, dst, src, tos, devin, true); } -extern void ip_rt_send_redirect(struct sk_buff *skb); +extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, u32 mark, u8 protocol, int flow_flags); +extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); +extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); extern unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index e8f2617ecd4..916d5ecaf6c 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -408,6 +408,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) return; pr_debug("pmtu discovery on SA AH/%08x/%08x\n", ntohl(ah->spi), ntohl(iph->daddr)); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index cb982a61536..7b95b49a36c 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -494,6 +494,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", ntohl(esph->spi), ntohl(iph->daddr)); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f49047b7960..594cec35ac4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -516,9 +516,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -538,7 +535,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info) flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); - if (t == NULL || t->parms.iph.daddr == 0 || + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->parms.link, 0, IPPROTO_GRE, 0); + goto out; + } + + if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 63b64c45a82..b91375482d8 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -42,6 +42,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", spi, &iph->daddr); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2d0f99bf61b..715338a1b20 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -348,9 +348,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -369,7 +366,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); - if (t == NULL || t->parms.iph.daddr == 0) + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->dev->ifindex, 0, IPPROTO_IPIP, 0); + err = 0; + goto out; + } + + if (t->parms.iph.daddr == 0) goto out; err = 0; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2c00e8bf684..340fcf29a96 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -371,6 +371,7 @@ void ping_err(struct sk_buff *skb, u32 info) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4032b818f3e..659ddfb1094 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -216,6 +216,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) int err = 0; int harderr = 0; + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + ipv4_sk_update_pmtu(skb, sk, info); + /* Report error on raw socket, if: 1. User requested ip_recverr. 2. Socket is connected (otherwise the error indication diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 655506af47c..41df5297a41 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1711,6 +1711,34 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } +void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, u32 mark, u8 protocol, int flow_flags) +{ + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct flowi4 fl4; + struct rtable *rt; + + flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, + protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, + iph->daddr, iph->saddr, 0, 0); + rt = __ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) { + ip_rt_update_pmtu(&rt->dst, mtu); + ip_rt_put(rt); + } +} +EXPORT_SYMBOL_GPL(ipv4_update_pmtu); + +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +{ + const struct inet_sock *inet = inet_sk(sk); + + return ipv4_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk)); +} +EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); static void ipv4_validate_peer(struct rtable *rt) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eaca73644e7..db017efb76e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -615,6 +615,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); if (inet->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 60415711563..49aea94c9be 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -527,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -551,7 +548,17 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL || t->parms.iph.daddr == 0) + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->dev->ifindex, 0, IPPROTO_IPV6, 0); + err = 0; + goto out; + } + + if (t->parms.iph.daddr == 0) goto out; err = 0; -- cgit v1.2.3-70-g09d2 From 81aded24675ebda5de8a68843250ad15584ac38a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 15 Jun 2012 14:54:11 -0700 Subject: ipv6: Handle PMTU in ICMP error handlers. One tricky issue on the ipv6 side vs. ipv4 is that the ICMP callouts to handle the error pass the 32-bit info cookie in network byte order whereas ipv4 passes it around in host byte order. Like the ipv4 side, we have two helper functions. One for when we have a socket context and one for when we do not. ip6ip6 tunnels are not handled here, because they handle PMTU events by essentially relaying another ICMP packet-too-big message back to the original sender. This patch allows us to get rid of rt6_do_pmtu_disc(). It handles all kinds of situations that simply cannot happen when we do the PMTU update directly using a fully resolved route. In fact, the "plen == 128" check in ip6_rt_update_pmtu() can very likely be removed or changed into a BUG_ON() check. We should never have a prefixed ipv6 route when we get there. Another piece of strange history here is that TCP and DCCP, unlike in ipv4, never invoke the update_pmtu() method from their ICMP error handlers. This is incredibly astonishing since this is the context where we have the most accurate context in which to make a PMTU update, namely we have a fully connected socket and associated cached socket route. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 8 +-- net/dccp/ipv6.c | 2 + net/ipv6/ah6.c | 3 +- net/ipv6/esp6.c | 2 + net/ipv6/icmp.c | 6 +- net/ipv6/ipcomp6.c | 2 + net/ipv6/raw.c | 5 +- net/ipv6/route.c | 143 +++++++++++------------------------------------- net/ipv6/tcp_ipv6.c | 2 + net/ipv6/udp.c | 3 + 10 files changed, 54 insertions(+), 122 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a2cda240ca9..58cb3fc3487 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,10 +140,10 @@ extern void rt6_redirect(const struct in6_addr *dest, u8 *lladdr, int on_link); -extern void rt6_pmtu_discovery(const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct net_device *dev, - u32 pmtu); +extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, + int oif, u32 mark); +extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, + __be32 mtu); struct netlink_callback; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fa9512d86f3..9991be083ad 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -165,6 +165,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f1a4a2c28ed..49d4d26bda8 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -621,7 +622,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); - + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index db1521fcda5..89a615ba84f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -442,6 +443,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ed89bba745a..5247d5c211f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -649,7 +649,6 @@ static int icmpv6_rcv(struct sk_buff *skb) struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; - const struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; u8 type; @@ -661,7 +660,7 @@ static int icmpv6_rcv(struct sk_buff *skb) XFRM_STATE_ICMP)) goto drop_no_count; - if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); @@ -722,9 +721,6 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); - orig_hdr = (struct ipv6hdr *) (hdr + 1); - rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, - ntohl(hdr->icmp6_mtu)); /* * Drop through to notify diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5cb75bfe45b..92832385a8e 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 93d69836fde..43b0042f15f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -328,9 +328,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, return; harderr = icmpv6_err_convert(type, code, &err); - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); - + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58a3ec23da2..0d41f68daff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; + dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { + struct net *net = dev_net(dst->dev); + rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { u32 features = dst_metric(dst, RTAX_FEATURES); @@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } +void ip6_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, __be32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + ip6_rt_update_pmtu(dst, ntohl(mtu)); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_update_pmtu); + +void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) +{ + ip6_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); + static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -1703,116 +1736,6 @@ out: dst_release(&rt->dst); } -/* - * Handle ICMP "packet too big" messages - * i.e. Path MTU discovery - */ - -static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net *net, u32 pmtu, int ifindex) -{ - struct rt6_info *rt, *nrt; - int allfrag = 0; -again: - rt = rt6_lookup(net, daddr, saddr, ifindex, 0); - if (!rt) - return; - - if (rt6_check_expired(rt)) { - ip6_del_rt(rt); - goto again; - } - - if (pmtu >= dst_mtu(&rt->dst)) - goto out; - - if (pmtu < IPV6_MIN_MTU) { - /* - * According to RFC2460, PMTU is set to the IPv6 Minimum Link - * MTU (1280) and a fragment header should always be included - * after a node receiving Too Big message reporting PMTU is - * less than the IPv6 Minimum Link MTU. - */ - pmtu = IPV6_MIN_MTU; - allfrag = 1; - } - - /* New mtu received -> path was valid. - They are sent only in response to data packets, - so that this nexthop apparently is reachable. --ANK - */ - dst_confirm(&rt->dst); - - /* Host route. If it is static, it would be better - not to override it, but add new one, so that - when cache entry will expire old pmtu - would return automatically. - */ - if (rt->rt6i_flags & RTF_CACHE) { - dst_metric_set(&rt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&rt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&rt->dst, RTAX_FEATURES, features); - } - rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); - rt->rt6i_flags |= RTF_MODIFIED; - goto out; - } - - /* Network route. - Two cases are possible: - 1. It is connected route. Action: COW - 2. It is gatewayed route or NONEXTHOP route. Action: clone it. - */ - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, daddr, saddr); - else - nrt = rt6_alloc_clone(rt, daddr); - - if (nrt) { - dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&nrt->dst, RTAX_FEATURES, features); - } - - /* According to RFC 1981, detecting PMTU increase shouldn't be - * happened within 5 mins, the recommended timer is 10 mins. - * Here this route expiration time is set to ip6_rt_mtu_expires - * which is 10 mins. After 10 mins the decreased pmtu is expired - * and detecting PMTU increase will be automatically happened. - */ - rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); - nrt->rt6i_flags |= RTF_DYNAMIC; - ip6_ins_rt(nrt); - } -out: - dst_release(&rt->dst); -} - -void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) -{ - struct net *net = dev_net(dev); - - /* - * RFC 1981 states that a node "MUST reduce the size of the packets it - * is sending along the path" that caused the Packet Too Big message. - * Since it's not possible in the general case to determine which - * interface was used to send the original packet, we update the MTU - * on the interface that will be used to send future packets. We also - * update the MTU on the interface that received the Packet Too Big in - * case the original packet was forced out that interface with - * SO_BINDTODEVICE or similar. This is the next best thing to the - * correct behaviour, which would be to update the MTU on all - * interfaces. - */ - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); -} - /* * Misc support functions */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f91b0bfd12d..26a88623940 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -415,6 +415,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f05099fc590..051ad481973 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; + if (type == ICMPV6_PKT_TOOBIG) + ip6_sk_update_pmtu(skb, sk, info); + np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr) -- cgit v1.2.3-70-g09d2 From 2a0c451ade8e1783c5d453948289e4a978d417c9 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 14 Jun 2012 23:00:17 +0000 Subject: ipv6: Prevent access to uninitialized fib_table_hash via /proc/net/ipv6_route /proc/net/ipv6_route reflects the contents of fib_table_hash. The proc handler is installed in ip6_route_net_init() whereas fib_table_hash is allocated in fib6_net_init() _after_ the proc handler has been installed. This opens up a short time frame to access fib_table_hash with its pants down. fib6_init() as a whole can't be moved to an earlier position as it also registers the rtnetlink message handlers which should be registered at the end. Therefore split it into fib6_init() which is run early and fib6_init_late() to register the rtnetlink message handlers. Signed-off-by: Thomas Graf Reviewed-by: Neil Horman Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/ip6_fib.c | 18 +++++++++++------- net/ipv6/route.c | 16 +++++++++++----- 3 files changed, 24 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0ae759a6c76..209af13b033 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -271,6 +271,8 @@ extern void fib6_run_gc(unsigned long expires, extern void fib6_gc_cleanup(void); extern int fib6_init(void); +extern int fib6_init_late(void); +extern void fib6_cleanup_late(void); #ifdef CONFIG_IPV6_MULTIPLE_TABLES extern int fib6_rules_init(void); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 74c21b924a7..fbd4afff05f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1692,21 +1692,25 @@ int __init fib6_init(void) ret = register_pernet_subsys(&fib6_net_ops); if (ret) goto out_kmem_cache_create; - - ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - NULL); - if (ret) - goto out_unregister_subsys; out: return ret; -out_unregister_subsys: - unregister_pernet_subsys(&fib6_net_ops); out_kmem_cache_create: kmem_cache_destroy(fib6_node_kmem); goto out; } +int __init fib6_init_late(void) +{ + return __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); +} + +void fib6_cleanup_late(void) +{ + rtnl_unregister(PF_INET6, RTM_GETROUTE); +} + void fib6_gc_cleanup(void) { unregister_pernet_subsys(&fib6_net_ops); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 999a982ad3f..dc60bf58596 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3018,10 +3018,14 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; - ret = register_pernet_subsys(&ip6_route_net_ops); + ret = fib6_init(); if (ret) goto out_dst_entries; + ret = register_pernet_subsys(&ip6_route_net_ops); + if (ret) + goto out_fib6_init; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -3035,13 +3039,13 @@ int __init ip6_route_init(void) init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif - ret = fib6_init(); + ret = fib6_init_late(); if (ret) goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_fib6_init; + goto out_fib6_init_late; ret = fib6_rules_init(); if (ret) @@ -3064,10 +3068,12 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_fib6_init: - fib6_gc_cleanup(); +out_fib6_init_late: + fib6_cleanup_late(); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); +out_fib6_init: + fib6_gc_cleanup(); out_dst_entries: dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: -- cgit v1.2.3-70-g09d2 From e8803b6c387129059e04d9e14d49efda250a7361 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 16 Jun 2012 01:12:19 -0700 Subject: Revert "ipv6: Prevent access to uninitialized fib_table_hash via /proc/net/ipv6_route" This reverts commit 2a0c451ade8e1783c5d453948289e4a978d417c9. It causes crashes, because now ip6_null_entry is used before it is initialized. Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 -- net/ipv6/ip6_fib.c | 18 +++++++----------- net/ipv6/route.c | 16 +++++----------- 3 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 209af13b033..0ae759a6c76 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -271,8 +271,6 @@ extern void fib6_run_gc(unsigned long expires, extern void fib6_gc_cleanup(void); extern int fib6_init(void); -extern int fib6_init_late(void); -extern void fib6_cleanup_late(void); #ifdef CONFIG_IPV6_MULTIPLE_TABLES extern int fib6_rules_init(void); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fbd4afff05f..74c21b924a7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1692,25 +1692,21 @@ int __init fib6_init(void) ret = register_pernet_subsys(&fib6_net_ops); if (ret) goto out_kmem_cache_create; + + ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, + NULL); + if (ret) + goto out_unregister_subsys; out: return ret; +out_unregister_subsys: + unregister_pernet_subsys(&fib6_net_ops); out_kmem_cache_create: kmem_cache_destroy(fib6_node_kmem); goto out; } -int __init fib6_init_late(void) -{ - return __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - NULL); -} - -void fib6_cleanup_late(void) -{ - rtnl_unregister(PF_INET6, RTM_GETROUTE); -} - void fib6_gc_cleanup(void) { unregister_pernet_subsys(&fib6_net_ops); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc60bf58596..999a982ad3f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3018,13 +3018,9 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; - ret = fib6_init(); - if (ret) - goto out_dst_entries; - ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) - goto out_fib6_init; + goto out_dst_entries; ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; @@ -3039,13 +3035,13 @@ int __init ip6_route_init(void) init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #endif - ret = fib6_init_late(); + ret = fib6_init(); if (ret) goto out_register_subsys; ret = xfrm6_init(); if (ret) - goto out_fib6_init_late; + goto out_fib6_init; ret = fib6_rules_init(); if (ret) @@ -3068,12 +3064,10 @@ fib6_rules_init: fib6_rules_cleanup(); xfrm6_init: xfrm6_fini(); -out_fib6_init_late: - fib6_cleanup_late(); -out_register_subsys: - unregister_pernet_subsys(&ip6_route_net_ops); out_fib6_init: fib6_gc_cleanup(); +out_register_subsys: + unregister_pernet_subsys(&ip6_route_net_ops); out_dst_entries: dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: -- cgit v1.2.3-70-g09d2 From 3a8fc53a45c444400259e2e285ba414a87061e3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 15 Jan 2012 16:34:08 +0100 Subject: netfilter: nf_ct_helper: allocate 16 bytes for the helper and policy names This patch modifies the struct nf_conntrack_helper to allocate the room for the helper name. The maximum length is 16 bytes (this was already introduced in 2.6.24). For the maximum length for expectation policy names, I have also selected 16 bytes. This patch is required by the follow-up patch to support user-space connection tracking helpers. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_expect.h | 4 +++- include/net/netfilter/nf_conntrack_helper.h | 2 +- net/netfilter/nf_conntrack_ftp.c | 8 ++------ net/netfilter/nf_conntrack_irc.c | 8 ++------ net/netfilter/nf_conntrack_sane.c | 8 ++------ net/netfilter/nf_conntrack_sip.c | 7 ++----- net/netfilter/nf_conntrack_tftp.c | 8 ++------ 7 files changed, 14 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4619caadd9d..983f0026324 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -59,10 +59,12 @@ static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) return nf_ct_net(exp->master); } +#define NF_CT_EXP_POLICY_NAME_LEN 16 + struct nf_conntrack_expect_policy { unsigned int max_expected; unsigned int timeout; - const char *name; + char name[NF_CT_EXP_POLICY_NAME_LEN]; }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1d1889409b9..5f5a4d9d4df 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -19,7 +19,7 @@ struct module; struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ - const char *name; /* name of the module */ + char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 8c5c95c6d34..44e47c9e14f 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -512,7 +512,6 @@ out_update_nl: } static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; -static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { .max_expected = 1, @@ -541,7 +540,6 @@ static void nf_conntrack_ftp_fini(void) static int __init nf_conntrack_ftp_init(void) { int i, j = -1, ret = 0; - char *tmpname; ftp_buffer = kmalloc(65536, GFP_KERNEL); if (!ftp_buffer) @@ -561,12 +559,10 @@ static int __init nf_conntrack_ftp_init(void) ftp[i][j].expect_policy = &ftp_exp_policy; ftp[i][j].me = THIS_MODULE; ftp[i][j].help = help; - tmpname = &ftp_names[i][j][0]; if (ports[i] == FTP_PORT) - sprintf(tmpname, "ftp"); + sprintf(ftp[i][j].name, "ftp"); else - sprintf(tmpname, "ftp-%d", ports[i]); - ftp[i][j].name = tmpname; + sprintf(ftp[i][j].name, "ftp-%d", ports[i]); pr_debug("nf_ct_ftp: registering helper for pf: %d " "port: %d\n", diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 81366c11827..009c52cfd1e 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -221,7 +221,6 @@ static int help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly; -static char irc_names[MAX_PORTS][sizeof("irc-65535")] __read_mostly; static struct nf_conntrack_expect_policy irc_exp_policy; static void nf_conntrack_irc_fini(void); @@ -229,7 +228,6 @@ static void nf_conntrack_irc_fini(void); static int __init nf_conntrack_irc_init(void) { int i, ret; - char *tmpname; if (max_dcc_channels < 1) { printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n"); @@ -255,12 +253,10 @@ static int __init nf_conntrack_irc_init(void) irc[i].me = THIS_MODULE; irc[i].help = help; - tmpname = &irc_names[i][0]; if (ports[i] == IRC_PORT) - sprintf(tmpname, "irc"); + sprintf(irc[i].name, "irc"); else - sprintf(tmpname, "irc-%u", i); - irc[i].name = tmpname; + sprintf(irc[i].name, "irc-%u", i); ret = nf_conntrack_helper_register(&irc[i]); if (ret) { diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 8501823b3f9..ec3fc18c4ef 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -163,7 +163,6 @@ out: } static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly; -static char sane_names[MAX_PORTS][2][sizeof("sane-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sane_exp_policy = { .max_expected = 1, @@ -190,7 +189,6 @@ static void nf_conntrack_sane_fini(void) static int __init nf_conntrack_sane_init(void) { int i, j = -1, ret = 0; - char *tmpname; sane_buffer = kmalloc(65536, GFP_KERNEL); if (!sane_buffer) @@ -210,12 +208,10 @@ static int __init nf_conntrack_sane_init(void) sane[i][j].expect_policy = &sane_exp_policy; sane[i][j].me = THIS_MODULE; sane[i][j].help = help; - tmpname = &sane_names[i][j][0]; if (ports[i] == SANE_PORT) - sprintf(tmpname, "sane"); + sprintf(sane[i][j].name, "sane"); else - sprintf(tmpname, "sane-%d", ports[i]); - sane[i][j].name = tmpname; + sprintf(sane[i][j].name, "sane-%d", ports[i]); pr_debug("nf_ct_sane: registering helper for pf: %d " "port: %d\n", diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 93faf6a3a63..dfd3ff38224 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1556,7 +1556,6 @@ static void nf_conntrack_sip_fini(void) static int __init nf_conntrack_sip_init(void) { int i, j, ret; - char *tmpname; if (ports_c == 0) ports[ports_c++] = SIP_PORT; @@ -1584,12 +1583,10 @@ static int __init nf_conntrack_sip_init(void) sip[i][j].expect_class_max = SIP_EXPECT_MAX; sip[i][j].me = THIS_MODULE; - tmpname = &sip_names[i][j][0]; if (ports[i] == SIP_PORT) - sprintf(tmpname, "sip"); + sprintf(sip_names[i][j], "sip"); else - sprintf(tmpname, "sip-%u", i); - sip[i][j].name = tmpname; + sprintf(sip_names[i][j], "sip-%u", i); pr_debug("port #%u: %u\n", i, ports[i]); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 75466fd72f4..81fc61c0526 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -92,7 +92,6 @@ static int tftp_help(struct sk_buff *skb, } static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly; -static char tftp_names[MAX_PORTS][2][sizeof("tftp-65535")] __read_mostly; static const struct nf_conntrack_expect_policy tftp_exp_policy = { .max_expected = 1, @@ -112,7 +111,6 @@ static void nf_conntrack_tftp_fini(void) static int __init nf_conntrack_tftp_init(void) { int i, j, ret; - char *tmpname; if (ports_c == 0) ports[ports_c++] = TFTP_PORT; @@ -129,12 +127,10 @@ static int __init nf_conntrack_tftp_init(void) tftp[i][j].me = THIS_MODULE; tftp[i][j].help = tftp_help; - tmpname = &tftp_names[i][j][0]; if (ports[i] == TFTP_PORT) - sprintf(tmpname, "tftp"); + sprintf(tftp[i][j].name, "tftp"); else - sprintf(tmpname, "tftp-%u", i); - tftp[i][j].name = tmpname; + sprintf(tftp[i][j].name, "tftp-%u", i); ret = nf_conntrack_helper_register(&tftp[i][j]); if (ret) { -- cgit v1.2.3-70-g09d2 From 3cf4c7e381d9a98a44fd86207b950bd8fef55d20 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 1 Feb 2012 16:18:31 +0100 Subject: netfilter: nf_ct_ext: support variable length extensions We can now define conntrack extensions of variable size. This patch is useful to get rid of these unions: union nf_conntrack_help union nf_conntrack_proto union nf_conntrack_nat_help Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_extend.h | 9 ++++++--- net/netfilter/nf_conntrack_extend.c | 16 +++++++++------- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 96755c3798a..8b4d1fc2909 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -80,10 +80,13 @@ static inline void nf_ct_ext_free(struct nf_conn *ct) } /* Add this type, returns pointer to data or NULL. */ -void * -__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); +void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp); + #define nf_ct_ext_add(ct, id, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp))) + ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), 0, (gfp))) +#define nf_ct_ext_add_length(ct, id, len, gfp) \ + ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), (len), (gfp))) #define NF_CT_EXT_F_PREALLOC 0x0001 diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 641ff5f9671..1a9545965c0 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -44,7 +44,8 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) EXPORT_SYMBOL(__nf_ct_ext_destroy); static void * -nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) +nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp) { unsigned int off, len; struct nf_ct_ext_type *t; @@ -54,8 +55,8 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); off = ALIGN(sizeof(struct nf_ct_ext), t->align); - len = off + t->len; - alloc_size = t->alloc_size; + len = off + t->len + var_alloc_len; + alloc_size = t->alloc_size + var_alloc_len; rcu_read_unlock(); *ext = kzalloc(alloc_size, gfp); @@ -68,7 +69,8 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) return (void *)(*ext) + off; } -void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) +void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp) { struct nf_ct_ext *old, *new; int i, newlen, newoff; @@ -79,7 +81,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) old = ct->ext; if (!old) - return nf_ct_ext_create(&ct->ext, id, gfp); + return nf_ct_ext_create(&ct->ext, id, var_alloc_len, gfp); if (__nf_ct_ext_exist(old, id)) return NULL; @@ -89,7 +91,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) BUG_ON(t == NULL); newoff = ALIGN(old->len, t->align); - newlen = newoff + t->len; + newlen = newoff + t->len + var_alloc_len; rcu_read_unlock(); new = __krealloc(old, newlen, gfp); @@ -117,7 +119,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) memset((void *)new + newoff, 0, newlen - newoff); return (void *)new + newoff; } -EXPORT_SYMBOL(__nf_ct_ext_add); +EXPORT_SYMBOL(__nf_ct_ext_add_length); static void update_alloc_size(struct nf_ct_ext_type *type) { -- cgit v1.2.3-70-g09d2 From 1afc56794e03229fa53cfa3c5012704d226e1dec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Jun 2012 12:11:50 +0200 Subject: netfilter: nf_ct_helper: implement variable length helper private data This patch uses the new variable length conntrack extensions. Instead of using union nf_conntrack_help that contain all the helper private data information, we allocate variable length area to store the private helper data. This patch includes the modification of all existing helpers. It also includes a couple of include header to avoid compilation warnings. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sip.h | 2 ++ include/net/netfilter/nf_conntrack.h | 35 ++--------------------- include/net/netfilter/nf_conntrack_helper.h | 15 +++++++++- net/ipv4/netfilter/nf_nat_amanda.c | 4 +-- net/ipv4/netfilter/nf_nat_h323.c | 8 +++--- net/ipv4/netfilter/nf_nat_pptp.c | 6 ++-- net/ipv4/netfilter/nf_nat_tftp.c | 4 +-- net/netfilter/nf_conntrack_core.c | 3 +- net/netfilter/nf_conntrack_ftp.c | 3 +- net/netfilter/nf_conntrack_h323_main.c | 16 +++++++---- net/netfilter/nf_conntrack_helper.c | 11 +++++--- net/netfilter/nf_conntrack_netlink.c | 4 +-- net/netfilter/nf_conntrack_pptp.c | 17 +++++------ net/netfilter/nf_conntrack_proto_gre.c | 16 +++++------ net/netfilter/nf_conntrack_sane.c | 4 +-- net/netfilter/nf_conntrack_sip.c | 25 ++++++++-------- net/netfilter/xt_CT.c | 44 +++++++++++++++++------------ 17 files changed, 111 insertions(+), 106 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 0ce91d56a5f..0dfc8b7210a 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -2,6 +2,8 @@ #define __NF_CONNTRACK_SIP_H__ #ifdef __KERNEL__ +#include + #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cce7f6a798b..f1494feba79 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -39,36 +39,6 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; -/* Add protocol helper include file here */ -#include -#include -#include -#include -#include - -/* per conntrack: application helper private data */ -union nf_conntrack_help { - /* insert conntrack helper private data (master) here */ -#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE) - struct nf_ct_ftp_master ct_ftp_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_PPTP) || \ - defined(CONFIG_NF_CONNTRACK_PPTP_MODULE) - struct nf_ct_pptp_master ct_pptp_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_H323) || \ - defined(CONFIG_NF_CONNTRACK_H323_MODULE) - struct nf_ct_h323_master ct_h323_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_SANE) || \ - defined(CONFIG_NF_CONNTRACK_SANE_MODULE) - struct nf_ct_sane_master ct_sane_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE) - struct nf_ct_sip_master ct_sip_info; -#endif -}; - #include #include #include @@ -89,12 +59,13 @@ struct nf_conn_help { /* Helper. if any */ struct nf_conntrack_helper __rcu *helper; - union nf_conntrack_help help; - struct hlist_head expectations; /* Current number of expected connections */ u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; + + /* private helper information. */ + char data[]; }; #include diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 5f5a4d9d4df..061352f71a8 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -11,6 +11,7 @@ #define _NF_CONNTRACK_HELPER_H #include #include +#include struct module; @@ -23,6 +24,9 @@ struct nf_conntrack_helper { struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; + /* length of internal data, ie. sizeof(struct nf_ct_*_master) */ + size_t data_len; + /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; @@ -48,7 +52,7 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); -extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); +extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp); extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags); @@ -60,6 +64,15 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); } +static inline void *nfct_help_data(const struct nf_conn *ct) +{ + struct nf_conn_help *help; + + help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); + + return (void *)help->data; +} + extern int nf_conntrack_helper_init(struct net *net); extern void nf_conntrack_helper_fini(struct net *net); diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 7b22382ff0e..3c04d24e297 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -13,10 +13,10 @@ #include #include -#include -#include #include #include +#include +#include #include MODULE_AUTHOR("Brian J. Murrell "); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index cad29c12131..c6784a18c1c 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -95,7 +95,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int count) { - const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + const struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int i; __be16 port; @@ -178,7 +178,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int i; u_int16_t nated_port; @@ -330,7 +330,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = ntohs(port); @@ -419,7 +419,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = ntohs(port); union nf_inet_addr addr; diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index c273d58980a..388140881eb 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -49,7 +49,7 @@ static void pptp_nat_expected(struct nf_conn *ct, const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_ipv4_range range; - ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(master); nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; /* And here goes the grand finale of corrosion... */ @@ -123,7 +123,7 @@ pptp_outbound_pkt(struct sk_buff *skb, __be16 new_callid; unsigned int cid_off; - ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(ct); nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; new_callid = ct_pptp_info->pns_call_id; @@ -192,7 +192,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, struct nf_ct_pptp_master *ct_pptp_info; struct nf_nat_pptp *nat_pptp_info; - ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(ct); nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; /* save original PAC call ID in nat_info */ diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index a2901bf829c..9dbb8d284f9 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -8,10 +8,10 @@ #include #include -#include -#include #include #include +#include +#include #include MODULE_AUTHOR("Magnus Boden "); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1ee2082b81b..cf4875565d6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -819,7 +819,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = exp->master; if (exp->helper) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, exp->helper, + GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 44e47c9e14f..4bb771d1f57 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -358,7 +358,7 @@ static int help(struct sk_buff *skb, u32 seq; int dir = CTINFO2DIR(ctinfo); unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff); - struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; + struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; union nf_inet_addr *daddr; struct nf_conntrack_man cmd = {}; @@ -554,6 +554,7 @@ static int __init nf_conntrack_ftp_init(void) ftp[i][0].tuple.src.l3num = PF_INET; ftp[i][1].tuple.src.l3num = PF_INET6; for (j = 0; j < 2; j++) { + ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master); ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; ftp[i][j].expect_policy = &ftp_exp_policy; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 46d69d7f1bb..ed219928052 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -114,7 +114,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); const struct tcphdr *th; struct tcphdr _tcph; @@ -618,6 +618,7 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, @@ -1170,6 +1171,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { { .name = "Q.931", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, @@ -1245,7 +1247,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int count) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret = 0; int i; @@ -1360,7 +1362,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationRequest *rrq) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int ret; typeof(set_ras_addr_hook) set_ras_addr; @@ -1395,7 +1397,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationConfirm *rcf) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret; struct nf_conntrack_expect *exp; @@ -1444,7 +1446,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, UnregistrationRequest *urq) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret; typeof(set_sig_addr_hook) set_sig_addr; @@ -1476,7 +1478,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionRequest *arq) { - const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + const struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); __be16 port; union nf_inet_addr addr; @@ -1743,6 +1745,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, @@ -1752,6 +1755,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4fa2ff961f5..9c18ecb0ab8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -161,11 +161,14 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); -struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) +struct nf_conn_help * +nf_ct_helper_ext_add(struct nf_conn *ct, + struct nf_conntrack_helper *helper, gfp_t gfp) { struct nf_conn_help *help; - help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); + help = nf_ct_ext_add_length(ct, NF_CT_EXT_HELPER, + helper->data_len, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else @@ -218,13 +221,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, } if (help == NULL) { - help = nf_ct_helper_ext_add(ct, flags); + help = nf_ct_helper_ext_add(ct, helper, flags); if (help == NULL) { ret = -ENOMEM; goto out; } } else { - memset(&help->help, 0, sizeof(help->help)); + memset(help->data, 0, helper->data_len); } rcu_assign_pointer(help->helper, helper); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6f4b00a8fc7..a08892048b4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1218,7 +1218,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) if (help->helper) return -EBUSY; /* need to zero data of old helper */ - memset(&help->help, 0, sizeof(help->help)); + memset(help->data, 0, help->helper->data_len); } else { /* we cannot set a helper for an existing conntrack */ return -EOPNOTSUPP; @@ -1440,7 +1440,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } else { struct nf_conn_help *help; - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC); if (help == NULL) { err = -ENOMEM; goto err2; diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 31d56b23b9e..6fed9ec3524 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -174,7 +174,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, static void pptp_destroy_siblings(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - const struct nf_conn_help *help = nfct_help(ct); + const struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_conntrack_tuple t; nf_ct_gre_keymap_destroy(ct); @@ -182,16 +182,16 @@ static void pptp_destroy_siblings(struct nf_conn *ct) /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; - t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; + t.src.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.u.gre.key = ct_pptp_info->pac_call_id; if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; - t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } @@ -269,7 +269,7 @@ pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + struct nf_ct_pptp_master *info = nfct_help_data(ct); u_int16_t msg; __be16 cid = 0, pcid = 0; typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; @@ -396,7 +396,7 @@ pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + struct nf_ct_pptp_master *info = nfct_help_data(ct); u_int16_t msg; __be16 cid = 0, pcid = 0; typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; @@ -506,7 +506,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, { int dir = CTINFO2DIR(ctinfo); - const struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + const struct nf_ct_pptp_master *info = nfct_help_data(ct); const struct tcphdr *tcph; struct tcphdr _tcph; const struct pptp_pkt_hdr *pptph; @@ -592,6 +592,7 @@ static const struct nf_conntrack_expect_policy pptp_exp_policy = { static struct nf_conntrack_helper pptp __read_mostly = { .name = "pptp", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_pptp_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 25ba5a2f5ed..5cac41c2fa0 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -117,10 +117,10 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, { struct net *net = nf_ct_net(ct); struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_ct_gre_keymap **kmp, *km; - kmp = &help->help.ct_pptp_info.keymap[dir]; + kmp = &ct_pptp_info->keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ read_lock_bh(&net_gre->keymap_lock); @@ -158,19 +158,19 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); write_lock_bh(&net_gre->keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { - if (help->help.ct_pptp_info.keymap[dir]) { + if (ct_pptp_info->keymap[dir]) { pr_debug("removing %p from list\n", - help->help.ct_pptp_info.keymap[dir]); - list_del(&help->help.ct_pptp_info.keymap[dir]->list); - kfree(help->help.ct_pptp_info.keymap[dir]); - help->help.ct_pptp_info.keymap[dir] = NULL; + ct_pptp_info->keymap[dir]); + list_del(&ct_pptp_info->keymap[dir]->list); + kfree(ct_pptp_info->keymap[dir]); + ct_pptp_info->keymap[dir] = NULL; } } write_unlock_bh(&net_gre->keymap_lock); diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index ec3fc18c4ef..295429f3908 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -69,13 +69,12 @@ static int help(struct sk_buff *skb, void *sb_ptr; int ret = NF_ACCEPT; int dir = CTINFO2DIR(ctinfo); - struct nf_ct_sane_master *ct_sane_info; + struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; struct sane_request *req; struct sane_reply_net_start *reply; - ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -203,6 +202,7 @@ static int __init nf_conntrack_sane_init(void) sane[i][0].tuple.src.l3num = PF_INET; sane[i][1].tuple.src.l3num = PF_INET6; for (j = 0; j < 2; j++) { + sane[i][j].data_len = sizeof(struct nf_ct_sane_master); sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); sane[i][j].tuple.dst.protonum = IPPROTO_TCP; sane[i][j].expect_policy = &sane_exp_policy; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index dfd3ff38224..758a1bacc12 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1075,12 +1075,12 @@ static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1091,12 +1091,12 @@ static int process_update_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1107,12 +1107,12 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1123,13 +1123,13 @@ static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); unsigned int ret; flush_expectations(ct, true); ret = process_sdp(skb, dataoff, dptr, datalen, cseq); if (ret == NF_ACCEPT) - help->help.ct_sip_info.invite_cseq = cseq; + ct_sip_info->invite_cseq = cseq; return ret; } @@ -1154,7 +1154,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; @@ -1235,7 +1235,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, store_cseq: if (ret == NF_ACCEPT) - help->help.ct_sip_info.register_cseq = cseq; + ct_sip_info->register_cseq = cseq; return ret; } @@ -1245,7 +1245,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr addr; __be16 port; @@ -1262,7 +1262,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, * responses, so we store the sequence number of the last valid * request and compare it here. */ - if (help->help.ct_sip_info.register_cseq != cseq) + if (ct_sip_info->register_cseq != cseq) return NF_ACCEPT; if (code >= 100 && code <= 199) @@ -1578,6 +1578,7 @@ static int __init nf_conntrack_sip_init(void) sip[i][3].help = sip_help_tcp; for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { + sip[i][j].data_len = sizeof(struct nf_ct_sip_master); sip[i][j].tuple.src.u.udp.port = htons(ports[i]); sip[i][j].expect_policy = sip_exp_policy; sip[i][j].expect_class_max = SIP_EXPECT_MAX; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index a51de9b052b..116018560c6 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -112,6 +112,8 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { + struct nf_conntrack_helper *helper; + ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { @@ -120,19 +122,21 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; } - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (help == NULL) - goto err3; - ret = -ENOENT; - help->helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (help->helper == NULL) { + helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (helper == NULL) { pr_info("No such helper \"%s\"\n", info->helper); goto err3; } + + ret = -ENOMEM; + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = helper; } __set_bit(IPS_TEMPLATE_BIT, &ct->status); @@ -202,6 +206,8 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { + struct nf_conntrack_helper *helper; + ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { @@ -210,19 +216,21 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; } - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (help == NULL) - goto err3; - ret = -ENOENT; - help->helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (help->helper == NULL) { + helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (helper == NULL) { pr_info("No such helper \"%s\"\n", info->helper); goto err3; } + + ret = -ENOMEM; + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = helper; } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -- cgit v1.2.3-70-g09d2 From 8c88f87cb27ad09086940bdd3e6955e5325ec89a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Jun 2012 13:31:25 +0200 Subject: netfilter: nfnetlink_queue: add NAT TCP sequence adjustment if packet mangled User-space programs that receive traffic via NFQUEUE may mangle packets. If NAT is enabled, this usually puzzles sequence tracking, leading to traffic disruptions. With this patch, nfnl_queue will make the corresponding NAT TCP sequence adjustment if: 1) The packet has been mangled, 2) the NFQA_CFG_F_CONNTRACK flag has been set, and 3) NAT is detected. There are some records on the Internet complaning about this issue: http://stackoverflow.com/questions/260757/packet-mangling-utilities-besides-iptables By now, we only support TCP since we have no helpers for DCCP or SCTP. Better to add this if we ever have some helper over those layer 4 protocols. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 ++ include/net/netfilter/nf_nat_helper.h | 4 ++++ net/ipv4/netfilter/nf_nat_helper.c | 13 +++++++++++++ net/netfilter/nf_conntrack_netlink.c | 4 ++++ net/netfilter/nfnetlink_queue.c | 19 +++++++++++-------- 5 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ba65bfbd7f7..dca19e61b30 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -401,6 +401,8 @@ struct nfq_ct_hook { size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct); int (*parse)(const struct nlattr *attr, struct nf_conn *ct); + void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off); }; extern struct nfq_ct_hook *nfq_ct_hook; #else diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 02bb6c29dc3..7d8fb7b46c4 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -54,4 +54,8 @@ extern void nf_nat_follow_master(struct nf_conn *ct, extern s16 nf_nat_get_offset(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); + +extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 dir, int off); + #endif diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index af65958f630..2e59ad0b90c 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -153,6 +153,19 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); +void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off) +{ + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} +EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); + static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, int datalen, __sum16 *check, int oldlen) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d304d591795..8be0ab9b475 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -46,6 +46,7 @@ #ifdef CONFIG_NF_NAT_NEEDED #include #include +#include #endif #include @@ -1751,6 +1752,9 @@ static struct nfq_ct_hook ctnetlink_nfqueue_hook = { .build_size = ctnetlink_nfqueue_build_size, .build = ctnetlink_nfqueue_build, .parse = ctnetlink_nfqueue_parse, +#ifdef CONFIG_NF_NAT_NEEDED + .seq_adjust = nf_nat_tcp_seq_adjust, +#endif }; #endif /* CONFIG_NETFILTER_NETLINK_QUEUE */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 647923ae923..ff82c7933df 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -502,12 +502,10 @@ err_out: } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) +nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; - int diff; - diff = data_len - e->skb->len; if (diff < 0) { if (pskb_trim(e->skb, data_len)) return -ENOMEM; @@ -767,6 +765,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, unsigned int verdict; struct nf_queue_entry *entry; struct nfq_ct_hook *nfq_ct; + enum ip_conntrack_info uninitialized_var(ctinfo); + struct nf_conn *ct = NULL; queue = instance_lookup(queue_num); if (!queue) @@ -789,20 +789,23 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, nfq_ct = rcu_dereference(nfq_ct_hook); if (nfq_ct != NULL && (queue->flags & NFQA_CFG_F_CONNTRACK) && nfqa[NFQA_CT]) { - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - ct = nf_ct_get(entry->skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) nfq_ct->parse(nfqa[NFQA_CT], ct); } - rcu_read_unlock(); if (nfqa[NFQA_PAYLOAD]) { + u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); + int diff = payload_len - entry->skb->len; + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), - nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) + payload_len, entry, diff) < 0) verdict = NF_DROP; + + if (ct && (ct->status & IPS_NAT_MASK) && diff) + nfq_ct->seq_adjust(skb, ct, ctinfo, diff); } + rcu_read_unlock(); if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); -- cgit v1.2.3-70-g09d2 From ae243bee397102c51fbf9db440eca3b077e0e702 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 7 Jun 2012 14:19:42 +0200 Subject: netfilter: ctnetlink: add CTA_HELP_INFO attribute This attribute can be used to modify and to dump the internal protocol information. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + include/net/netfilter/nf_conntrack_helper.h | 1 + net/netfilter/nf_conntrack_netlink.c | 23 ++++++++++++++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index e58e4b93c10..76888337008 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -191,6 +191,7 @@ enum ctattr_expect_nat { enum ctattr_help { CTA_HELP_UNSPEC, CTA_HELP_NAME, + CTA_HELP_INFO, __CTA_HELP_MAX }; #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 061352f71a8..84b24c3a383 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -39,6 +39,7 @@ struct nf_conntrack_helper { void (*destroy)(struct nf_conn *ct); + int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct); int (*to_nlattr)(struct sk_buff *skb, const struct nf_conn *ct); unsigned int expect_class_max; }; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 8be0ab9b475..ae156dff488 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -902,7 +902,8 @@ static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { }; static inline int -ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) +ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, + struct nlattr **helpinfo) { struct nlattr *tb[CTA_HELP_MAX+1]; @@ -913,6 +914,9 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) *helper_name = nla_data(tb[CTA_HELP_NAME]); + if (tb[CTA_HELP_INFO]) + *helpinfo = tb[CTA_HELP_INFO]; + return 0; } @@ -1173,13 +1177,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); char *helpname = NULL; + struct nlattr *helpinfo = NULL; int err; /* don't change helper of sibling connections */ if (ct->master) return -EBUSY; - err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) return err; @@ -1214,8 +1219,12 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) } if (help) { - if (help->helper == helper) + if (help->helper == helper) { + /* update private helper data if allowed. */ + if (helper->from_nlattr && helpinfo) + helper->from_nlattr(helpinfo, ct); return 0; + } if (help->helper) return -EBUSY; /* need to zero data of old helper */ @@ -1411,8 +1420,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, rcu_read_lock(); if (cda[CTA_HELP]) { char *helpname = NULL; - - err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + struct nlattr *helpinfo = NULL; + + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) goto err2; @@ -1446,6 +1456,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, err = -ENOMEM; goto err2; } + /* set private helper data if allowed. */ + if (helper->from_nlattr && helpinfo) + helper->from_nlattr(helpinfo, ct); /* not in hash table yet so not strictly necessary */ RCU_INIT_POINTER(help->helper, helper); -- cgit v1.2.3-70-g09d2 From 12f7a505331e6b2754684b509f2ac8f0011ce644 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 13 May 2012 21:44:54 +0200 Subject: netfilter: add user-space connection tracking helper infrastructure There are good reasons to supports helpers in user-space instead: * Rapid connection tracking helper development, as developing code in user-space is usually faster. * Reliability: A buggy helper does not crash the kernel. Moreover, we can monitor the helper process and restart it in case of problems. * Security: Avoid complex string matching and mangling in kernel-space running in privileged mode. Going further, we can even think about running user-space helpers as a non-root process. * Extensibility: It allows the development of very specific helpers (most likely non-standard proprietary protocols) that are very likely not to be accepted for mainline inclusion in the form of kernel-space connection tracking helpers. This patch adds the infrastructure to allow the implementation of user-space conntrack helpers by means of the new nfnetlink subsystem `nfnetlink_cthelper' and the existing queueing infrastructure (nfnetlink_queue). I had to add the new hook NF_IP6_PRI_CONNTRACK_HELPER to register ipv[4|6]_helper which results from splitting ipv[4|6]_confirm into two pieces. This change is required not to break NAT sequence adjustment and conntrack confirmation for traffic that is enqueued to our user-space conntrack helpers. Basic operation, in a few steps: 1) Register user-space helper by means of `nfct': nfct helper add ftp inet tcp [ It must be a valid existing helper supported by conntrack-tools ] 2) Add rules to enable the FTP user-space helper which is used to track traffic going to TCP port 21. For locally generated packets: iptables -I OUTPUT -t raw -p tcp --dport 21 -j CT --helper ftp For non-locally generated packets: iptables -I PREROUTING -t raw -p tcp --dport 21 -j CT --helper ftp 3) Run the test conntrackd in helper mode (see example files under doc/helper/conntrackd.conf conntrackd 4) Generate FTP traffic going, if everything is OK, then conntrackd should create expectations (you can check that with `conntrack': conntrack -E expect [NEW] 301 proto=6 src=192.168.1.136 dst=130.89.148.12 sport=0 dport=54037 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.1.136 master-dst=130.89.148.12 sport=57127 dport=21 class=0 helper=ftp [DESTROY] 301 proto=6 src=192.168.1.136 dst=130.89.148.12 sport=0 dport=54037 mask-src=255.255.255.255 mask-dst=255.255.255.255 sport=0 dport=65535 master-src=192.168.1.136 master-dst=130.89.148.12 sport=57127 dport=21 class=0 helper=ftp This confirms that our test helper is receiving packets including the conntrack information, and adding expectations in kernel-space. The user-space helper can also store its private tracking information in the conntrack structure in the kernel via the CTA_HELP_INFO. The kernel will consider this a binary blob whose layout is unknown. This information will be included in the information that is transfered to user-space via glue code that integrates nfnetlink_queue and ctnetlink. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/nfnetlink_cthelper.h | 55 ++ include/linux/netfilter_ipv4.h | 1 + include/linux/netfilter_ipv6.h | 1 + include/net/netfilter/nf_conntrack_helper.h | 11 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 48 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 43 +- net/netfilter/Kconfig | 8 + net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_helper.c | 21 +- net/netfilter/nfnetlink_cthelper.c | 672 +++++++++++++++++++++++++ 12 files changed, 839 insertions(+), 26 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_cthelper.h create mode 100644 net/netfilter/nfnetlink_cthelper.c (limited to 'include/net') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 1697036336b..874ae8f2706 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,7 @@ header-y += nfnetlink.h header-y += nfnetlink_acct.h header-y += nfnetlink_compat.h header-y += nfnetlink_conntrack.h +header-y += nfnetlink_cthelper.h header-y += nfnetlink_cttimeout.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index a1048c1587d..18341cdb244 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -50,7 +50,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_IPSET 6 #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 -#define NFNL_SUBSYS_COUNT 9 +#define NFNL_SUBSYS_CTHELPER 9 +#define NFNL_SUBSYS_COUNT 10 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/nfnetlink_cthelper.h b/include/linux/netfilter/nfnetlink_cthelper.h new file mode 100644 index 00000000000..33659f6fad3 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_cthelper.h @@ -0,0 +1,55 @@ +#ifndef _NFNL_CTHELPER_H_ +#define _NFNL_CTHELPER_H_ + +#define NFCT_HELPER_STATUS_DISABLED 0 +#define NFCT_HELPER_STATUS_ENABLED 1 + +enum nfnl_acct_msg_types { + NFNL_MSG_CTHELPER_NEW, + NFNL_MSG_CTHELPER_GET, + NFNL_MSG_CTHELPER_DEL, + NFNL_MSG_CTHELPER_MAX +}; + +enum nfnl_cthelper_type { + NFCTH_UNSPEC, + NFCTH_NAME, + NFCTH_TUPLE, + NFCTH_QUEUE_NUM, + NFCTH_POLICY, + NFCTH_PRIV_DATA_LEN, + NFCTH_STATUS, + __NFCTH_MAX +}; +#define NFCTH_MAX (__NFCTH_MAX - 1) + +enum nfnl_cthelper_policy_type { + NFCTH_POLICY_SET_UNSPEC, + NFCTH_POLICY_SET_NUM, + NFCTH_POLICY_SET, + NFCTH_POLICY_SET1 = NFCTH_POLICY_SET, + NFCTH_POLICY_SET2, + NFCTH_POLICY_SET3, + NFCTH_POLICY_SET4, + __NFCTH_POLICY_SET_MAX +}; +#define NFCTH_POLICY_SET_MAX (__NFCTH_POLICY_SET_MAX - 1) + +enum nfnl_cthelper_pol_type { + NFCTH_POLICY_UNSPEC, + NFCTH_POLICY_NAME, + NFCTH_POLICY_EXPECT_MAX, + NFCTH_POLICY_EXPECT_TIMEOUT, + __NFCTH_POLICY_MAX +}; +#define NFCTH_POLICY_MAX (__NFCTH_POLICY_MAX - 1) + +enum nfnl_cthelper_tuple_type { + NFCTH_TUPLE_UNSPEC, + NFCTH_TUPLE_L3PROTONUM, + NFCTH_TUPLE_L4PROTONUM, + __NFCTH_TUPLE_MAX, +}; +#define NFCTH_TUPLE_MAX (__NFCTH_TUPLE_MAX - 1) + +#endif /* _NFNL_CTHELPER_H */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index fa0946c549d..e2b12801378 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -66,6 +66,7 @@ enum nf_ip_hook_priorities { NF_IP_PRI_SECURITY = 50, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, + NF_IP_PRI_CONNTRACK_HELPER = 300, NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, NF_IP_PRI_LAST = INT_MAX, }; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 57c025127f1..7c8a513ce7a 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,6 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_SECURITY = 50, NF_IP6_PRI_NAT_SRC = 100, NF_IP6_PRI_SELINUX_LAST = 225, + NF_IP6_PRI_CONNTRACK_HELPER = 300, NF_IP6_PRI_LAST = INT_MAX, }; diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 84b24c3a383..9aad956d100 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -15,6 +15,11 @@ struct module; +enum nf_ct_helper_flags { + NF_CT_HELPER_F_USERSPACE = (1 << 0), + NF_CT_HELPER_F_CONFIGURED = (1 << 1), +}; + #define NF_CT_HELPER_NAME_LEN 16 struct nf_conntrack_helper { @@ -42,6 +47,9 @@ struct nf_conntrack_helper { int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct); int (*to_nlattr)(struct sk_buff *skb, const struct nf_conn *ct); unsigned int expect_class_max; + + unsigned int flags; + unsigned int queue_num; /* For user-space helpers. */ }; extern struct nf_conntrack_helper * @@ -96,4 +104,7 @@ nf_ct_helper_expectfn_find_by_name(const char *name); struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol); +extern struct hlist_head *nf_ct_helper_hash; +extern unsigned int nf_ct_helper_hsize; + #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d79b961a800..e7ff2dcab6c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -95,11 +95,11 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv4_helper(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -110,24 +110,38 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return NF_ACCEPT; help = nfct_help(ct); if (!help) - goto out; + return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - goto out; + return NF_ACCEPT; ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); - if (ret != NF_ACCEPT) { + if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, "nf_ct_%s: dropping packet", helper->name); - return ret; } + return ret; +} + +static unsigned int ipv4_confirm(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; /* adjust seqs for loopback traffic only in outgoing direction */ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && @@ -184,6 +198,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_CONNTRACK, }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, { .hook = ipv4_confirm, .owner = THIS_MODULE, @@ -191,6 +212,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, { .hook = ipv4_confirm, .owner = THIS_MODULE, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index fca10da80ea..4794f96cf2e 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -143,11 +143,11 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv6_helper(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; const struct nf_conn_help *help; @@ -161,15 +161,15 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return NF_ACCEPT; help = nfct_help(ct); if (!help) - goto out; + return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - goto out; + return NF_ACCEPT; protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); @@ -179,12 +179,19 @@ static unsigned int ipv6_confirm(unsigned int hooknum, } ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT) { + if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, "nf_ct_%s: dropping packet", helper->name); - return ret; } -out: + return ret; +} + +static unsigned int ipv6_confirm(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -253,6 +260,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, + { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, { .hook = ipv6_confirm, .owner = THIS_MODULE, @@ -260,6 +274,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_LAST, }, + { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, { .hook = ipv6_confirm, .owner = THIS_MODULE, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 209c1ed4336..aae6c628991 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -12,6 +12,14 @@ tristate "Netfilter NFACCT over NFNETLINK interface" If this option is enabled, the kernel will include support for extended accounting via NFNETLINK. +config NETFILTER_NETLINK_CTHELPER +tristate "Netfilter CTHELPER over NFNETLINK interface" + depends on NETFILTER_ADVANCED + select NETFILTER_NETLINK + help + If this option is enabled, the kernel will include support + for user-space connection tracking helpers via NFNETLINK. + config NETFILTER_NETLINK_QUEUE tristate "Netfilter NFQUEUE over NFNETLINK interface" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4e7960cc7b9..2f3bc0f647b 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o +obj-$(CONFIG_NETFILTER_NETLINK_CTHELPER) += nfnetlink_cthelper.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9c18ecb0ab8..2918ec2e450 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -30,8 +30,10 @@ #include static DEFINE_MUTEX(nf_ct_helper_mutex); -static struct hlist_head *nf_ct_helper_hash __read_mostly; -static unsigned int nf_ct_helper_hsize __read_mostly; +struct hlist_head *nf_ct_helper_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_helper_hash); +unsigned int nf_ct_helper_hsize __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; static bool nf_ct_auto_assign_helper __read_mostly = true; @@ -322,6 +324,9 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { + int ret = 0; + struct nf_conntrack_helper *cur; + struct hlist_node *n; unsigned int h = helper_hash(&me->tuple); BUG_ON(me->expect_policy == NULL); @@ -329,11 +334,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); mutex_lock(&nf_ct_helper_mutex); + hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) { + if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && + cur->tuple.src.l3num == me->tuple.src.l3num && + cur->tuple.dst.protonum == me->tuple.dst.protonum) { + ret = -EEXIST; + goto out; + } + } hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; +out: mutex_unlock(&nf_ct_helper_mutex); - - return 0; + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c new file mode 100644 index 00000000000..d6836193d47 --- /dev/null +++ b/net/netfilter/nfnetlink_cthelper.c @@ -0,0 +1,672 @@ +/* + * (C) 2012 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + * + * This software has been sponsored by Vyatta Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); + +static int +nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + const struct nf_conn_help *help; + struct nf_conntrack_helper *helper; + + help = nfct_help(ct); + if (help == NULL) + return NF_DROP; + + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (helper == NULL) + return NF_DROP; + + /* This is an user-space helper not yet configured, skip. */ + if ((helper->flags & + (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) == + NF_CT_HELPER_F_USERSPACE) + return NF_ACCEPT; + + /* If the user-space helper is not available, don't block traffic. */ + return NF_QUEUE_NR(helper->queue_num) | NF_VERDICT_FLAG_QUEUE_BYPASS; +} + +static const struct nla_policy nfnl_cthelper_tuple_pol[NFCTH_TUPLE_MAX+1] = { + [NFCTH_TUPLE_L3PROTONUM] = { .type = NLA_U16, }, + [NFCTH_TUPLE_L4PROTONUM] = { .type = NLA_U8, }, +}; + +static int +nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_TUPLE_MAX+1]; + + nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + + if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) + return -EINVAL; + + tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM])); + tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); + + return 0; +} + +static int +nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) +{ + const struct nf_conn_help *help = nfct_help(ct); + + if (help->helper->data_len == 0) + return -EINVAL; + + memcpy(&help->data, nla_data(attr), help->helper->data_len); + return 0; +} + +static int +nfnl_cthelper_to_nlattr(struct sk_buff *skb, const struct nf_conn *ct) +{ + const struct nf_conn_help *help = nfct_help(ct); + + if (help->helper->data_len && + nla_put(skb, CTA_HELP_INFO, help->helper->data_len, &help->data)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy nfnl_cthelper_expect_pol[NFCTH_POLICY_MAX+1] = { + [NFCTH_POLICY_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN-1 }, + [NFCTH_POLICY_EXPECT_MAX] = { .type = NLA_U32, }, + [NFCTH_POLICY_EXPECT_TIMEOUT] = { .type = NLA_U32, }, +}; + +static int +nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX+1]; + + nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + strncpy(expect_policy->name, + nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); + expect_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + expect_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static const struct nla_policy +nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = { + [NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, }, +}; + +static int +nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + int i, ret; + struct nf_conntrack_expect_policy *expect_policy; + struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + + nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + helper->expect_class_max = + ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + + if (helper->expect_class_max != 0 && + helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + return -EOVERFLOW; + + expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * + helper->expect_class_max, GFP_KERNEL); + if (expect_policy == NULL) + return -ENOMEM; + + for (i=0; iexpect_class_max; i++) { + if (!tb[NFCTH_POLICY_SET+i]) + goto err; + + ret = nfnl_cthelper_expect_policy(&expect_policy[i], + tb[NFCTH_POLICY_SET+i]); + if (ret < 0) + goto err; + } + helper->expect_policy = expect_policy; + return 0; +err: + kfree(expect_policy); + return -EINVAL; +} + +static int +nfnl_cthelper_create(const struct nlattr * const tb[], + struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_helper *helper; + int ret; + + if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) + return -EINVAL; + + helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); + if (helper == NULL) + return -ENOMEM; + + ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); + if (ret < 0) + goto err; + + strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); + helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + helper->flags |= NF_CT_HELPER_F_USERSPACE; + memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); + + helper->me = THIS_MODULE; + helper->help = nfnl_userspace_cthelper; + helper->from_nlattr = nfnl_cthelper_from_nlattr; + helper->to_nlattr = nfnl_cthelper_to_nlattr; + + /* Default to queue number zero, this can be updated at any time. */ + if (tb[NFCTH_QUEUE_NUM]) + helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); + + if (tb[NFCTH_STATUS]) { + int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); + + switch(status) { + case NFCT_HELPER_STATUS_ENABLED: + helper->flags |= NF_CT_HELPER_F_CONFIGURED; + break; + case NFCT_HELPER_STATUS_DISABLED: + helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; + break; + } + } + + ret = nf_conntrack_helper_register(helper); + if (ret < 0) + goto err; + + return 0; +err: + kfree(helper); + return ret; +} + +static int +nfnl_cthelper_update(const struct nlattr * const tb[], + struct nf_conntrack_helper *helper) +{ + int ret; + + if (tb[NFCTH_PRIV_DATA_LEN]) + return -EBUSY; + + if (tb[NFCTH_POLICY]) { + ret = nfnl_cthelper_parse_expect_policy(helper, + tb[NFCTH_POLICY]); + if (ret < 0) + return ret; + } + if (tb[NFCTH_QUEUE_NUM]) + helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); + + if (tb[NFCTH_STATUS]) { + int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); + + switch(status) { + case NFCT_HELPER_STATUS_ENABLED: + helper->flags |= NF_CT_HELPER_F_CONFIGURED; + break; + case NFCT_HELPER_STATUS_DISABLED: + helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; + break; + } + } + return 0; +} + +static int +nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + const char *helper_name; + struct nf_conntrack_helper *cur, *helper = NULL; + struct nf_conntrack_tuple tuple; + struct hlist_node *n; + int ret = 0, i; + + if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) + return -EINVAL; + + helper_name = nla_data(tb[NFCTH_NAME]); + + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + rcu_read_lock(); + for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { + hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) + continue; + + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) { + ret = -EEXIST; + goto err; + } + helper = cur; + break; + } + } + rcu_read_unlock(); + + if (helper == NULL) + ret = nfnl_cthelper_create(tb, &tuple); + else + ret = nfnl_cthelper_update(tb, helper); + + return ret; +err: + rcu_read_unlock(); + return ret; +} + +static int +nfnl_cthelper_dump_tuple(struct sk_buff *skb, + struct nf_conntrack_helper *helper) +{ + struct nlattr *nest_parms; + + nest_parms = nla_nest_start(skb, NFCTH_TUPLE | NLA_F_NESTED); + if (nest_parms == NULL) + goto nla_put_failure; + + if (nla_put_be16(skb, NFCTH_TUPLE_L3PROTONUM, + htons(helper->tuple.src.l3num))) + goto nla_put_failure; + + if (nla_put_u8(skb, NFCTH_TUPLE_L4PROTONUM, helper->tuple.dst.protonum)) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + return 0; + +nla_put_failure: + return -1; +} + +static int +nfnl_cthelper_dump_policy(struct sk_buff *skb, + struct nf_conntrack_helper *helper) +{ + int i; + struct nlattr *nest_parms1, *nest_parms2; + + nest_parms1 = nla_nest_start(skb, NFCTH_POLICY | NLA_F_NESTED); + if (nest_parms1 == NULL) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, + htonl(helper->expect_class_max))) + goto nla_put_failure; + + for (i=0; iexpect_class_max; i++) { + nest_parms2 = nla_nest_start(skb, + (NFCTH_POLICY_SET+i) | NLA_F_NESTED); + if (nest_parms2 == NULL) + goto nla_put_failure; + + if (nla_put_string(skb, NFCTH_POLICY_NAME, + helper->expect_policy[i].name)) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_MAX, + htonl(helper->expect_policy[i].max_expected))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_TIMEOUT, + htonl(helper->expect_policy[i].timeout))) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms2); + } + nla_nest_end(skb, nest_parms1); + return 0; + +nla_put_failure: + return -1; +} + +static int +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct nf_conntrack_helper *helper) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + int status; + + event |= NFNL_SUBSYS_CTHELPER << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFCTH_NAME, helper->name)) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_QUEUE_NUM, htonl(helper->queue_num))) + goto nla_put_failure; + + if (nfnl_cthelper_dump_tuple(skb, helper) < 0) + goto nla_put_failure; + + if (nfnl_cthelper_dump_policy(skb, helper) < 0) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_PRIV_DATA_LEN, htonl(helper->data_len))) + goto nla_put_failure; + + if (helper->flags & NF_CT_HELPER_F_CONFIGURED) + status = NFCT_HELPER_STATUS_ENABLED; + else + status = NFCT_HELPER_STATUS_DISABLED; + + if (nla_put_be32(skb, NFCTH_STATUS, htonl(status))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conntrack_helper *cur, *last; + struct hlist_node *n; + + rcu_read_lock(); + last = (struct nf_conntrack_helper *)cb->args[1]; + for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { +restart: + hlist_for_each_entry_rcu(cur, n, + &nf_ct_helper_hash[cb->args[0]], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (cb->args[1]) { + if (cur != last) + continue; + cb->args[1] = 0; + } + if (nfnl_cthelper_fill_info(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + goto out; + } + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } +out: + rcu_read_unlock(); + return skb->len; +} + +static int +nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = -ENOENT, i; + struct nf_conntrack_helper *cur; + struct hlist_node *n; + struct sk_buff *skb2; + char *helper_name = NULL; + struct nf_conntrack_tuple tuple; + bool tuple_set = false; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nfnl_cthelper_dump_table, + }; + return netlink_dump_start(nfnl, skb, nlh, &c); + } + + if (tb[NFCTH_NAME]) + helper_name = nla_data(tb[NFCTH_NAME]); + + if (tb[NFCTH_TUPLE]) { + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + tuple_set = true; + } + + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (helper_name && strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) { + continue; + } + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } + + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; + } + } + return ret; +} + +static int +nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + char *helper_name = NULL; + struct nf_conntrack_helper *cur; + struct hlist_node *n, *tmp; + struct nf_conntrack_tuple tuple; + bool tuple_set = false, found = false; + int i, j = 0, ret; + + if (tb[NFCTH_NAME]) + helper_name = nla_data(tb[NFCTH_NAME]); + + if (tb[NFCTH_TUPLE]) { + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + tuple_set = true; + } + + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hnode) { + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + j++; + + if (helper_name && strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) { + continue; + } + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + found = true; + nf_conntrack_helper_unregister(cur); + } + } + /* Make sure we return success if we flush and there is no helpers */ + return (found || j == 0) ? 0 : -ENOENT; +} + +static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { + [NFCTH_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN-1 }, + [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, +}; + +static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { + [NFNL_MSG_CTHELPER_NEW] = { .call = nfnl_cthelper_new, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, + [NFNL_MSG_CTHELPER_GET] = { .call = nfnl_cthelper_get, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, + [NFNL_MSG_CTHELPER_DEL] = { .call = nfnl_cthelper_del, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, +}; + +static const struct nfnetlink_subsystem nfnl_cthelper_subsys = { + .name = "cthelper", + .subsys_id = NFNL_SUBSYS_CTHELPER, + .cb_count = NFNL_MSG_CTHELPER_MAX, + .cb = nfnl_cthelper_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTHELPER); + +static int __init nfnl_cthelper_init(void) +{ + int ret; + + ret = nfnetlink_subsys_register(&nfnl_cthelper_subsys); + if (ret < 0) { + pr_err("nfnl_cthelper: cannot register with nfnetlink.\n"); + goto err_out; + } + return 0; +err_out: + return ret; +} + +static void __exit nfnl_cthelper_exit(void) +{ + struct nf_conntrack_helper *cur; + struct hlist_node *n, *tmp; + int i; + + nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); + + for (i=0; iflags & NF_CT_HELPER_F_USERSPACE)) + continue; + + nf_conntrack_helper_unregister(cur); + } + } +} + +module_init(nfnl_cthelper_init); +module_exit(nfnl_cthelper_exit); -- cgit v1.2.3-70-g09d2 From 7f95e1880e70bb351b992b01ef70ff083fc00d30 Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Sat, 16 Jun 2012 15:14:49 +0200 Subject: include/net/dst.h: neaten asterisk placement Fix code style - place the asterisk where it belongs. Signed-off-by: Eldad Zack Signed-off-by: David S. Miller --- include/net/dst.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 8197eadca81..f0bf3b8d591 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -48,8 +48,8 @@ struct dst_entry { #else void *__pad1; #endif - int (*input)(struct sk_buff*); - int (*output)(struct sk_buff*); + int (*input)(struct sk_buff *); + int (*output)(struct sk_buff *); int flags; #define DST_HOST 0x0001 @@ -241,7 +241,7 @@ dst_metric_locked(const struct dst_entry *dst, int metric) return dst_metric(dst, RTAX_LOCK) & (1<lastuse = time; } -static inline -struct dst_entry * dst_clone(struct dst_entry * dst) +static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) atomic_inc(&dst->__refcnt); @@ -371,12 +370,12 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) } extern int dst_discard(struct sk_buff *skb); -extern void *dst_alloc(struct dst_ops * ops, struct net_device *dev, +extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, int flags); -extern void __dst_free(struct dst_entry * dst); -extern struct dst_entry *dst_destroy(struct dst_entry * dst); +extern void __dst_free(struct dst_entry *dst); +extern struct dst_entry *dst_destroy(struct dst_entry *dst); -static inline void dst_free(struct dst_entry * dst) +static inline void dst_free(struct dst_entry *dst) { if (dst->obsolete > 1) return; -- cgit v1.2.3-70-g09d2 From 31fdc5553b42abd7e29bb7b89f6ba07514eb4763 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 13 Jun 2012 22:29:03 +0000 Subject: net: remove my future former mail address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Cc: Sakari Ailus Signed-off-by: David S. Miller --- include/net/phonet/gprs.h | 2 +- net/caif/caif_dev.c | 3 +-- net/phonet/af_phonet.c | 4 ++-- net/phonet/datagram.c | 4 ++-- net/phonet/pep-gprs.c | 2 +- net/phonet/pep.c | 2 +- net/phonet/pn_dev.c | 4 ++-- net/phonet/pn_netlink.c | 4 ++-- net/phonet/socket.c | 4 ++-- net/phonet/sysctl.c | 2 +- 10 files changed, 15 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/phonet/gprs.h b/include/net/phonet/gprs.h index 928daf595be..bcd525e39a0 100644 --- a/include/net/phonet/gprs.h +++ b/include/net/phonet/gprs.h @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Author: Rémi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index aa6f716524f..554b3128960 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -4,8 +4,7 @@ * Author: Sjur Brendeland/sjur.brandeland@stericsson.com * License terms: GNU General Public License (GPL) version 2 * - * Borrowed heavily from file: pn_dev.c. Thanks to - * Remi Denis-Courmont + * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont * and Sakari Ailus */ diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 779ce4ff92e..5a940dbd74a 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index bf35b4e1a14..12c30f3e643 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index d01208968c8..a2fba7edfd1 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Author: Rémi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 9dd4f926f7d..576f22c9c76 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Author: Rémi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 36f75a9e2c3..5bf6341e2dd 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index cfdf135fcd6..7dd762a464e 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Remi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 89cfa9ce493..0acc943f713 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -5,8 +5,8 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont - * Original author: Sakari Ailus + * Authors: Sakari Ailus + * Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 696348fd31a..d6bbbbd0af1 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -5,7 +5,7 @@ * * Copyright (C) 2008 Nokia Corporation. * - * Contact: Remi Denis-Courmont + * Author: Rémi Denis-Courmont * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.3-70-g09d2 From 728b19e5fb9bbebbd580784a092b786fe379ed8e Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Thu, 14 Jun 2012 02:06:10 +0800 Subject: {nl,cfg,mac}80211: implement dot11MeshHWMPconfirmationInterval As defined in section 13.10.9.3 Case D (802.11-2012), this control variable is used to limit the mesh STA to send only one PREQ to a root mesh STA within this interval of time (in TUs). The default value for this variable is set to 2000 TUs. However, for current implementation, the maximum configurable of dot11MeshHWMPconfirmationInterval is restricted by dot11MeshHWMPactivePathTimeout. Signed-off-by: Chun-Yeow Yeoh [line-break commit log] Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 5 +++++ include/net/cfg80211.h | 4 ++++ net/mac80211/cfg.c | 3 +++ net/mac80211/debugfs_netdev.c | 3 +++ net/mac80211/mesh.h | 2 ++ net/mac80211/mesh_hwmp.c | 7 ++++++- net/wireless/mesh.c | 2 ++ net/wireless/nl80211.c | 9 ++++++++- 8 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 6936fabe879..b7c3b737ddd 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2192,6 +2192,10 @@ enum nl80211_mntr_flags { * @NL80211_MESHCONF_HWMP_ROOT_INTERVAL: The interval of time (in TUs) between * proactive PREQs are transmitted. * + * @NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL: The minimum interval of time + * (in TUs) during which a mesh STA can send only one Action frame + * containing a PREQ element for root path confirmation. + * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use */ enum nl80211_meshconf_params { @@ -2220,6 +2224,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HT_OPMODE, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, + NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e52b38d7b1b..f0163a10b8c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -849,6 +849,9 @@ struct bss_parameters { * * @dot11MeshHWMProotInterval: The interval of time (in TUs) between proactive * PREQs are transmitted. + * @dot11MeshHWMPconfirmationInterval: The minimum interval of time (in TUs) + * during which a mesh STA can send only one Action frame containing + * a PREQ element for root path confirmation. */ struct mesh_config { u16 dot11MeshRetryTimeout; @@ -875,6 +878,7 @@ struct mesh_config { u16 ht_opmode; u32 dot11MeshHWMPactivePathToRootTimeout; u16 dot11MeshHWMProotInterval; + u16 dot11MeshHWMPconfirmationInterval; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5bd316c0a63..6e25ac4873c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1596,6 +1596,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ROOT_INTERVAL, mask)) conf->dot11MeshHWMProotInterval = nconf->dot11MeshHWMProotInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, mask)) + conf->dot11MeshHWMPconfirmationInterval = + nconf->dot11MeshHWMPconfirmationInterval; return 0; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index a8cea70902e..512c894893d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -514,6 +514,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPactivePathToRootTimeout, u.mesh.mshcfg.dot11MeshHWMPactivePathToRootTimeout, DEC); IEEE80211_IF_FILE(dot11MeshHWMProotInterval, u.mesh.mshcfg.dot11MeshHWMProotInterval, DEC); +IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval, + u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval, DEC); #endif #define DEBUGFS_ADD_MODE(name, mode) \ @@ -617,6 +619,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(ht_opmode); MESHPARAMS_ADD(dot11MeshHWMPactivePathToRootTimeout); MESHPARAMS_ADD(dot11MeshHWMProotInterval); + MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval); #undef MESHPARAMS_ADD } #endif diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index c7400a23b64..faaa39bcfd1 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -104,6 +104,7 @@ enum mesh_deferred_task_flags { * an mpath to a hash bucket on a path table. * @rann_snd_addr: the RANN sender address * @rann_metric: the aggregated path metric towards the root node + * @last_preq_to_root: Timestamp of last PREQ sent to root * @is_root: the destination station of this path is a root node * @is_gate: the destination station of this path is a mesh gate * @@ -131,6 +132,7 @@ struct mesh_path { spinlock_t state_lock; u8 rann_snd_addr[ETH_ALEN]; u32 rann_metric; + unsigned long last_preq_to_root; bool is_root; bool is_gate; }; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 35e3acbe226..bea52479e3a 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -98,6 +98,8 @@ static inline u32 u16_field_get(u8 *preq_elem, int offset, bool ae) #define max_preq_retries(s) (s->u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries) #define disc_timeout_jiff(s) \ msecs_to_jiffies(sdata->u.mesh.mshcfg.min_discovery_timeout) +#define root_path_confirmation_jiffies(s) \ + msecs_to_jiffies(sdata->u.mesh.mshcfg.dot11MeshHWMPconfirmationInterval) enum mpath_frame_type { MPATH_PREQ = 0, @@ -811,11 +813,14 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, } if ((!(mpath->flags & (MESH_PATH_ACTIVE | MESH_PATH_RESOLVING)) || - time_after(jiffies, mpath->exp_time - 1*HZ)) && + (time_after(jiffies, mpath->last_preq_to_root + + root_path_confirmation_jiffies(sdata)) || + time_before(jiffies, mpath->last_preq_to_root))) && !(mpath->flags & MESH_PATH_FIXED)) { mhwmp_dbg("%s time to refresh root mpath %pM", sdata->name, orig_addr); mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); + mpath->last_preq_to_root = jiffies; } if ((SN_LT(mpath->sn, orig_sn) || (mpath->sn == orig_sn && diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 2f141cfd581..3b73b07486c 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -16,6 +16,7 @@ #define MESH_RANN_INTERVAL 5000 #define MESH_PATH_TO_ROOT_TIMEOUT 6000 #define MESH_ROOT_INTERVAL 5000 +#define MESH_ROOT_CONFIRMATION_INTERVAL 2000 /* * Minimum interval between two consecutive PREQs originated by the same @@ -66,6 +67,7 @@ const struct mesh_config default_mesh_config = { .ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED, .dot11MeshHWMPactivePathToRootTimeout = MESH_PATH_TO_ROOT_TIMEOUT, .dot11MeshHWMProotInterval = MESH_ROOT_INTERVAL, + .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, }; const struct mesh_setup default_mesh_setup = { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f8930db613d..a363ca17bfc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3473,7 +3473,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, cur_params.dot11MeshHWMPactivePathToRootTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, - cur_params.dot11MeshHWMProotInterval)) + cur_params.dot11MeshHWMProotInterval) || + nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + cur_params.dot11MeshHWMPconfirmationInterval)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -3511,6 +3513,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, }; static const struct nla_policy @@ -3625,6 +3628,10 @@ do {\ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshHWMPconfirmationInterval, mask, + NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, + nla_get_u16); if (mask_out) *mask_out = mask; -- cgit v1.2.3-70-g09d2 From 7c62234547255ce4c385a218915965bc2f14fe45 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Jun 2012 02:10:57 +0200 Subject: netfilter: nfnetlink_queue: fix compilation with NF_CONNTRACK disabled In "9cb0176 netfilter: add glue code to integrate nfnetlink_queue and ctnetlink" the compilation with NF_CONNTRACK disabled is broken. This patch fixes this issue. I have moved the conntrack part into nfnetlink_queue_ct.c to avoid peppering the entire nfnetlink_queue.c code with ifdefs. I also needed to rename nfnetlink_queue.c to nfnetlink_queue_pkt.c to update the net/netfilter/Makefile to support conditional compilation of the conntrack integration. This patch also adds CONFIG_NETFILTER_QUEUE_CT in case you want to explicitly disable the integration between nf_conntrack and nfnetlink_queue. Reported-by: Andrew Morton Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nfnetlink_queue.h | 43 ++ net/netfilter/Kconfig | 9 + net/netfilter/Makefile | 2 + net/netfilter/nf_conntrack_netlink.c | 11 +- net/netfilter/nfnetlink_queue.c | 1121 ------------------------------- net/netfilter/nfnetlink_queue_core.c | 1090 ++++++++++++++++++++++++++++++ net/netfilter/nfnetlink_queue_ct.c | 97 +++ 7 files changed, 1245 insertions(+), 1128 deletions(-) create mode 100644 include/net/netfilter/nfnetlink_queue.h delete mode 100644 net/netfilter/nfnetlink_queue.c create mode 100644 net/netfilter/nfnetlink_queue_core.c create mode 100644 net/netfilter/nfnetlink_queue_ct.c (limited to 'include/net') diff --git a/include/net/netfilter/nfnetlink_queue.h b/include/net/netfilter/nfnetlink_queue.h new file mode 100644 index 00000000000..9f8095c108e --- /dev/null +++ b/include/net/netfilter/nfnetlink_queue.h @@ -0,0 +1,43 @@ +#ifndef _NET_NFNL_QUEUE_H_ +#define _NET_NFNL_QUEUE_H_ + +#include + +struct nf_conn; + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, + enum ip_conntrack_info *ctinfo); +struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, + const struct nlattr *attr, + enum ip_conntrack_info *ctinfo); +int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo); +void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff); +#else +inline struct nf_conn * +nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo) +{ + return NULL; +} + +inline struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, + const struct nlattr *attr, + enum ip_conntrack_info *ctinfo) +{ + return NULL; +} + +inline int +nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + return 0; +} + +inline void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff) +{ +} +#endif /* NF_CONNTRACK */ +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f1a52ba3e4c..c19b214ffd5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -340,6 +340,7 @@ config NF_CT_NETLINK_HELPER select NETFILTER_NETLINK depends on NF_CT_NETLINK depends on NETFILTER_NETLINK_QUEUE + depends on NETFILTER_NETLINK_QUEUE_CT depends on NETFILTER_ADVANCED help This option enables the user-space connection tracking helpers @@ -347,6 +348,14 @@ config NF_CT_NETLINK_HELPER If unsure, say `N'. +config NETFILTER_NETLINK_QUEUE_CT + bool "NFQUEUE integration with Connection Tracking" + default n + depends on NETFILTER_NETLINK_QUEUE + help + If this option is enabled, NFQUEUE can include Connection Tracking + information together with the packet is the enqueued via NFNETLINK. + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7cc20199fe8..1c5160f2278 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -9,6 +9,8 @@ obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o +nfnetlink_queue-y := nfnetlink_queue_core.o +nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 76271a1301a..31d1d8f3a6c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1627,8 +1627,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } -#if defined(CONFIG_NETFILTER_NETLINK_QUEUE) || \ - defined(CONFIG_NETFILTER_NETLINK_QUEUE_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT static size_t ctnetlink_nfqueue_build_size(const struct nf_conn *ct) { @@ -1762,7 +1761,7 @@ static struct nfq_ct_hook ctnetlink_nfqueue_hook = { .seq_adjust = nf_nat_tcp_seq_adjust, #endif }; -#endif /* CONFIG_NETFILTER_NETLINK_QUEUE */ +#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ /*********************************************************************** * EXPECT @@ -2568,8 +2567,7 @@ static int __init ctnetlink_init(void) pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } -#if defined(CONFIG_NETFILTER_NETLINK_QUEUE) || \ - defined(CONFIG_NETFILTER_NETLINK_QUEUE_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT /* setup interaction between nf_queue and nf_conntrack_netlink. */ RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook); #endif @@ -2590,8 +2588,7 @@ static void __exit ctnetlink_exit(void) unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); -#if defined(CONFIG_NETFILTER_NETLINK_QUEUE) || \ - defined(CONFIG_NETFILTER_NETLINK_QUEUE_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT RCU_INIT_POINTER(nfq_ct_hook, NULL); #endif } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c deleted file mode 100644 index ff82c7933df..00000000000 --- a/net/netfilter/nfnetlink_queue.c +++ /dev/null @@ -1,1121 +0,0 @@ -/* - * This is a module which is used for queueing packets and communicating with - * userspace via nfnetlink. - * - * (C) 2005 by Harald Welte - * (C) 2007 by Patrick McHardy - * - * Based on the old ipv4-only ip_queue.c: - * (C) 2000-2002 James Morris - * (C) 2003-2005 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_BRIDGE_NETFILTER -#include "../bridge/br_private.h" -#endif - -#define NFQNL_QMAX_DEFAULT 1024 - -struct nfqnl_instance { - struct hlist_node hlist; /* global list of queues */ - struct rcu_head rcu; - - int peer_pid; - unsigned int queue_maxlen; - unsigned int copy_range; - unsigned int queue_dropped; - unsigned int queue_user_dropped; - - - u_int16_t queue_num; /* number of this queue */ - u_int8_t copy_mode; - u_int32_t flags; /* Set using NFQA_CFG_FLAGS */ -/* - * Following fields are dirtied for each queued packet, - * keep them in same cache line if possible. - */ - spinlock_t lock; - unsigned int queue_total; - unsigned int id_sequence; /* 'sequence' of pkt ids */ - struct list_head queue_list; /* packets in queue */ -}; - -typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); - -static DEFINE_SPINLOCK(instances_lock); - -#define INSTANCE_BUCKETS 16 -static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; - -static inline u_int8_t instance_hashfn(u_int16_t queue_num) -{ - return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; -} - -static struct nfqnl_instance * -instance_lookup(u_int16_t queue_num) -{ - struct hlist_head *head; - struct hlist_node *pos; - struct nfqnl_instance *inst; - - head = &instance_table[instance_hashfn(queue_num)]; - hlist_for_each_entry_rcu(inst, pos, head, hlist) { - if (inst->queue_num == queue_num) - return inst; - } - return NULL; -} - -static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int pid) -{ - struct nfqnl_instance *inst; - unsigned int h; - int err; - - spin_lock(&instances_lock); - if (instance_lookup(queue_num)) { - err = -EEXIST; - goto out_unlock; - } - - inst = kzalloc(sizeof(*inst), GFP_ATOMIC); - if (!inst) { - err = -ENOMEM; - goto out_unlock; - } - - inst->queue_num = queue_num; - inst->peer_pid = pid; - inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xfffff; - inst->copy_mode = NFQNL_COPY_NONE; - spin_lock_init(&inst->lock); - INIT_LIST_HEAD(&inst->queue_list); - - if (!try_module_get(THIS_MODULE)) { - err = -EAGAIN; - goto out_free; - } - - h = instance_hashfn(queue_num); - hlist_add_head_rcu(&inst->hlist, &instance_table[h]); - - spin_unlock(&instances_lock); - - return inst; - -out_free: - kfree(inst); -out_unlock: - spin_unlock(&instances_lock); - return ERR_PTR(err); -} - -static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, - unsigned long data); - -static void -instance_destroy_rcu(struct rcu_head *head) -{ - struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, - rcu); - - nfqnl_flush(inst, NULL, 0); - kfree(inst); - module_put(THIS_MODULE); -} - -static void -__instance_destroy(struct nfqnl_instance *inst) -{ - hlist_del_rcu(&inst->hlist); - call_rcu(&inst->rcu, instance_destroy_rcu); -} - -static void -instance_destroy(struct nfqnl_instance *inst) -{ - spin_lock(&instances_lock); - __instance_destroy(inst); - spin_unlock(&instances_lock); -} - -static inline void -__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) -{ - list_add_tail(&entry->list, &queue->queue_list); - queue->queue_total++; -} - -static void -__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) -{ - list_del(&entry->list); - queue->queue_total--; -} - -static struct nf_queue_entry * -find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) -{ - struct nf_queue_entry *entry = NULL, *i; - - spin_lock_bh(&queue->lock); - - list_for_each_entry(i, &queue->queue_list, list) { - if (i->id == id) { - entry = i; - break; - } - } - - if (entry) - __dequeue_entry(queue, entry); - - spin_unlock_bh(&queue->lock); - - return entry; -} - -static void -nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) -{ - struct nf_queue_entry *entry, *next; - - spin_lock_bh(&queue->lock); - list_for_each_entry_safe(entry, next, &queue->queue_list, list) { - if (!cmpfn || cmpfn(entry, data)) { - list_del(&entry->list); - queue->queue_total--; - nf_reinject(entry, NF_DROP); - } - } - spin_unlock_bh(&queue->lock); -} - -static struct sk_buff * -nfqnl_build_packet_message(struct nfqnl_instance *queue, - struct nf_queue_entry *entry, - __be32 **packet_id_ptr) -{ - sk_buff_data_t old_tail; - size_t size; - size_t data_len = 0; - struct sk_buff *skb; - struct nlattr *nla; - struct nfqnl_msg_packet_hdr *pmsg; - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - struct sk_buff *entskb = entry->skb; - struct net_device *indev; - struct net_device *outdev; - struct nfq_ct_hook *nfq_ct; - struct nf_conn *ct = NULL; - enum ip_conntrack_info uninitialized_var(ctinfo); - - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#ifdef CONFIG_BRIDGE_NETFILTER - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ - + nla_total_size(sizeof(u_int32_t)) /* ifindex */ -#endif - + nla_total_size(sizeof(u_int32_t)) /* mark */ - + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); - - outdev = entry->outdev; - - switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { - case NFQNL_COPY_META: - case NFQNL_COPY_NONE: - break; - - case NFQNL_COPY_PACKET: - if (entskb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(entskb)) - return NULL; - - data_len = ACCESS_ONCE(queue->copy_range); - if (data_len == 0 || data_len > entskb->len) - data_len = entskb->len; - - size += nla_total_size(data_len); - break; - } - - /* rcu_read_lock()ed by __nf_queue already. */ - nfq_ct = rcu_dereference(nfq_ct_hook); - if (nfq_ct != NULL && (queue->flags & NFQA_CFG_F_CONNTRACK)) { - ct = nf_ct_get(entskb, &ctinfo); - if (ct) { - if (!nf_ct_is_untracked(ct)) - size += nfq_ct->build_size(ct); - else - ct = NULL; - } - } - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - goto nlmsg_failure; - - old_tail = skb->tail; - nlh = NLMSG_PUT(skb, 0, 0, - NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, - sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); - nfmsg->nfgen_family = entry->pf; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = htons(queue->queue_num); - - nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); - pmsg = nla_data(nla); - pmsg->hw_protocol = entskb->protocol; - pmsg->hook = entry->hook; - *packet_id_ptr = &pmsg->packet_id; - - indev = entry->indev; - if (indev) { -#ifndef CONFIG_BRIDGE_NETFILTER - if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) - goto nla_put_failure; -#else - if (entry->pf == PF_BRIDGE) { - /* Case 1: indev is physical input device, we need to - * look for bridge group (when called from - * netfilter_bridge) */ - if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(indev->ifindex)) || - /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by __nf_queue */ - nla_put_be32(skb, NFQA_IFINDEX_INDEV, - htonl(br_port_get_rcu(indev)->br->dev->ifindex))) - goto nla_put_failure; - } else { - /* Case 2: indev is bridge group, we need to look for - * physical device (when called from ipv4) */ - if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, - htonl(indev->ifindex))) - goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physindev && - nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(entskb->nf_bridge->physindev->ifindex))) - goto nla_put_failure; - } -#endif - } - - if (outdev) { -#ifndef CONFIG_BRIDGE_NETFILTER - if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) - goto nla_put_failure; -#else - if (entry->pf == PF_BRIDGE) { - /* Case 1: outdev is physical output device, we need to - * look for bridge group (when called from - * netfilter_bridge) */ - if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(outdev->ifindex)) || - /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by __nf_queue */ - nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, - htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) - goto nla_put_failure; - } else { - /* Case 2: outdev is bridge group, we need to look for - * physical output device (when called from ipv4) */ - if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, - htonl(outdev->ifindex))) - goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physoutdev && - nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(entskb->nf_bridge->physoutdev->ifindex))) - goto nla_put_failure; - } -#endif - } - - if (entskb->mark && - nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark))) - goto nla_put_failure; - - if (indev && entskb->dev && - entskb->mac_header != entskb->network_header) { - struct nfqnl_msg_packet_hw phw; - int len = dev_parse_header(entskb, phw.hw_addr); - if (len) { - phw.hw_addrlen = htons(len); - if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) - goto nla_put_failure; - } - } - - if (entskb->tstamp.tv64) { - struct nfqnl_msg_packet_timestamp ts; - struct timeval tv = ktime_to_timeval(entskb->tstamp); - ts.sec = cpu_to_be64(tv.tv_sec); - ts.usec = cpu_to_be64(tv.tv_usec); - - if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts)) - goto nla_put_failure; - } - - if (data_len) { - struct nlattr *nla; - int sz = nla_attr_size(data_len); - - if (skb_tailroom(skb) < nla_total_size(data_len)) { - printk(KERN_WARNING "nf_queue: no tailroom!\n"); - goto nlmsg_failure; - } - - nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); - nla->nla_type = NFQA_PAYLOAD; - nla->nla_len = sz; - - if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) - BUG(); - } - - if (ct) { - struct nlattr *nest_parms; - u_int32_t tmp; - - nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED); - if (!nest_parms) - goto nla_put_failure; - - if (nfq_ct->build(skb, ct) < 0) - goto nla_put_failure; - - nla_nest_end(skb, nest_parms); - - tmp = ctinfo; - if (nla_put_u32(skb, NFQA_CT_INFO, htonl(ctinfo))) - goto nla_put_failure; - } - - nlh->nlmsg_len = skb->tail - old_tail; - return skb; - -nlmsg_failure: -nla_put_failure: - if (skb) - kfree_skb(skb); - net_err_ratelimited("nf_queue: error creating packet message\n"); - return NULL; -} - -static int -nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) -{ - struct sk_buff *nskb; - struct nfqnl_instance *queue; - int err = -ENOBUFS; - __be32 *packet_id_ptr; - int failopen = 0; - - /* rcu_read_lock()ed by nf_hook_slow() */ - queue = instance_lookup(queuenum); - if (!queue) { - err = -ESRCH; - goto err_out; - } - - if (queue->copy_mode == NFQNL_COPY_NONE) { - err = -EINVAL; - goto err_out; - } - - nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); - if (nskb == NULL) { - err = -ENOMEM; - goto err_out; - } - spin_lock_bh(&queue->lock); - - if (!queue->peer_pid) { - err = -EINVAL; - goto err_out_free_nskb; - } - if (queue->queue_total >= queue->queue_maxlen) { - if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { - failopen = 1; - err = 0; - } else { - queue->queue_dropped++; - net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n", - queue->queue_total); - } - goto err_out_free_nskb; - } - entry->id = ++queue->id_sequence; - *packet_id_ptr = htonl(entry->id); - - /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); - if (err < 0) { - queue->queue_user_dropped++; - goto err_out_unlock; - } - - __enqueue_entry(queue, entry); - - spin_unlock_bh(&queue->lock); - return 0; - -err_out_free_nskb: - kfree_skb(nskb); -err_out_unlock: - spin_unlock_bh(&queue->lock); - if (failopen) - nf_reinject(entry, NF_ACCEPT); -err_out: - return err; -} - -static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) -{ - struct sk_buff *nskb; - - if (diff < 0) { - if (pskb_trim(e->skb, data_len)) - return -ENOMEM; - } else if (diff > 0) { - if (data_len > 0xFFFF) - return -EINVAL; - if (diff > skb_tailroom(e->skb)) { - nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), - diff, GFP_ATOMIC); - if (!nskb) { - printk(KERN_WARNING "nf_queue: OOM " - "in mangle, dropping packet\n"); - return -ENOMEM; - } - kfree_skb(e->skb); - e->skb = nskb; - } - skb_put(e->skb, diff); - } - if (!skb_make_writable(e->skb, data_len)) - return -ENOMEM; - skb_copy_to_linear_data(e->skb, data, data_len); - e->skb->ip_summed = CHECKSUM_NONE; - return 0; -} - -static int -nfqnl_set_mode(struct nfqnl_instance *queue, - unsigned char mode, unsigned int range) -{ - int status = 0; - - spin_lock_bh(&queue->lock); - switch (mode) { - case NFQNL_COPY_NONE: - case NFQNL_COPY_META: - queue->copy_mode = mode; - queue->copy_range = 0; - break; - - case NFQNL_COPY_PACKET: - queue->copy_mode = mode; - /* we're using struct nlattr which has 16bit nla_len */ - if (range > 0xffff) - queue->copy_range = 0xffff; - else - queue->copy_range = range; - break; - - default: - status = -EINVAL; - - } - spin_unlock_bh(&queue->lock); - - return status; -} - -static int -dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) -{ - if (entry->indev) - if (entry->indev->ifindex == ifindex) - return 1; - if (entry->outdev) - if (entry->outdev->ifindex == ifindex) - return 1; -#ifdef CONFIG_BRIDGE_NETFILTER - if (entry->skb->nf_bridge) { - if (entry->skb->nf_bridge->physindev && - entry->skb->nf_bridge->physindev->ifindex == ifindex) - return 1; - if (entry->skb->nf_bridge->physoutdev && - entry->skb->nf_bridge->physoutdev->ifindex == ifindex) - return 1; - } -#endif - return 0; -} - -/* drop all packets with either indev or outdev == ifindex from all queue - * instances */ -static void -nfqnl_dev_drop(int ifindex) -{ - int i; - - rcu_read_lock(); - - for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp; - struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; - - hlist_for_each_entry_rcu(inst, tmp, head, hlist) - nfqnl_flush(inst, dev_cmp, ifindex); - } - - rcu_read_unlock(); -} - -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) - -static int -nfqnl_rcv_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - - /* Drop any packets associated with the downed device */ - if (event == NETDEV_DOWN) - nfqnl_dev_drop(dev->ifindex); - return NOTIFY_DONE; -} - -static struct notifier_block nfqnl_dev_notifier = { - .notifier_call = nfqnl_rcv_dev_event, -}; - -static int -nfqnl_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct netlink_notify *n = ptr; - - if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { - int i; - - /* destroy all instances for this pid */ - spin_lock(&instances_lock); - for (i = 0; i < INSTANCE_BUCKETS; i++) { - struct hlist_node *tmp, *t2; - struct nfqnl_instance *inst; - struct hlist_head *head = &instance_table[i]; - - hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { - if ((n->net == &init_net) && - (n->pid == inst->peer_pid)) - __instance_destroy(inst); - } - } - spin_unlock(&instances_lock); - } - return NOTIFY_DONE; -} - -static struct notifier_block nfqnl_rtnl_notifier = { - .notifier_call = nfqnl_rcv_nl_event, -}; - -static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { - [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, - [NFQA_MARK] = { .type = NLA_U32 }, - [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, - [NFQA_CT] = { .type = NLA_UNSPEC }, -}; - -static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { - [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, - [NFQA_MARK] = { .type = NLA_U32 }, -}; - -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) -{ - struct nfqnl_instance *queue; - - queue = instance_lookup(queue_num); - if (!queue) - return ERR_PTR(-ENODEV); - - if (queue->peer_pid != nlpid) - return ERR_PTR(-EPERM); - - return queue; -} - -static struct nfqnl_msg_verdict_hdr* -verdicthdr_get(const struct nlattr * const nfqa[]) -{ - struct nfqnl_msg_verdict_hdr *vhdr; - unsigned int verdict; - - if (!nfqa[NFQA_VERDICT_HDR]) - return NULL; - - vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); - verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; - if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) - return NULL; - return vhdr; -} - -static int nfq_id_after(unsigned int id, unsigned int max) -{ - return (int)(id - max) > 0; -} - -static int -nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); - struct nf_queue_entry *entry, *tmp; - unsigned int verdict, maxid; - struct nfqnl_msg_verdict_hdr *vhdr; - struct nfqnl_instance *queue; - LIST_HEAD(batch_list); - u16 queue_num = ntohs(nfmsg->res_id); - - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); - if (IS_ERR(queue)) - return PTR_ERR(queue); - - vhdr = verdicthdr_get(nfqa); - if (!vhdr) - return -EINVAL; - - verdict = ntohl(vhdr->verdict); - maxid = ntohl(vhdr->id); - - spin_lock_bh(&queue->lock); - - list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) { - if (nfq_id_after(entry->id, maxid)) - break; - __dequeue_entry(queue, entry); - list_add_tail(&entry->list, &batch_list); - } - - spin_unlock_bh(&queue->lock); - - if (list_empty(&batch_list)) - return -ENOENT; - - list_for_each_entry_safe(entry, tmp, &batch_list, list) { - if (nfqa[NFQA_MARK]) - entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); - nf_reinject(entry, verdict); - } - return 0; -} - -static int -nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); - u_int16_t queue_num = ntohs(nfmsg->res_id); - - struct nfqnl_msg_verdict_hdr *vhdr; - struct nfqnl_instance *queue; - unsigned int verdict; - struct nf_queue_entry *entry; - struct nfq_ct_hook *nfq_ct; - enum ip_conntrack_info uninitialized_var(ctinfo); - struct nf_conn *ct = NULL; - - queue = instance_lookup(queue_num); - if (!queue) - - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); - if (IS_ERR(queue)) - return PTR_ERR(queue); - - vhdr = verdicthdr_get(nfqa); - if (!vhdr) - return -EINVAL; - - verdict = ntohl(vhdr->verdict); - - entry = find_dequeue_entry(queue, ntohl(vhdr->id)); - if (entry == NULL) - return -ENOENT; - - rcu_read_lock(); - nfq_ct = rcu_dereference(nfq_ct_hook); - if (nfq_ct != NULL && - (queue->flags & NFQA_CFG_F_CONNTRACK) && nfqa[NFQA_CT]) { - ct = nf_ct_get(entry->skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) - nfq_ct->parse(nfqa[NFQA_CT], ct); - } - - if (nfqa[NFQA_PAYLOAD]) { - u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); - int diff = payload_len - entry->skb->len; - - if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), - payload_len, entry, diff) < 0) - verdict = NF_DROP; - - if (ct && (ct->status & IPS_NAT_MASK) && diff) - nfq_ct->seq_adjust(skb, ct, ctinfo, diff); - } - rcu_read_unlock(); - - if (nfqa[NFQA_MARK]) - entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); - - nf_reinject(entry, verdict); - return 0; -} - -static int -nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - return -ENOTSUPP; -} - -static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { - [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, - [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, -}; - -static const struct nf_queue_handler nfqh = { - .name = "nf_queue", - .outfn = &nfqnl_enqueue_packet, -}; - -static int -nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nfqa[]) -{ - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); - u_int16_t queue_num = ntohs(nfmsg->res_id); - struct nfqnl_instance *queue; - struct nfqnl_msg_config_cmd *cmd = NULL; - int ret = 0; - - if (nfqa[NFQA_CFG_CMD]) { - cmd = nla_data(nfqa[NFQA_CFG_CMD]); - - /* Commands without queue context - might sleep */ - switch (cmd->command) { - case NFQNL_CFG_CMD_PF_BIND: - return nf_register_queue_handler(ntohs(cmd->pf), - &nfqh); - case NFQNL_CFG_CMD_PF_UNBIND: - return nf_unregister_queue_handler(ntohs(cmd->pf), - &nfqh); - } - } - - rcu_read_lock(); - queue = instance_lookup(queue_num); - if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { - ret = -EPERM; - goto err_out_unlock; - } - - if (cmd != NULL) { - switch (cmd->command) { - case NFQNL_CFG_CMD_BIND: - if (queue) { - ret = -EBUSY; - goto err_out_unlock; - } - queue = instance_create(queue_num, NETLINK_CB(skb).pid); - if (IS_ERR(queue)) { - ret = PTR_ERR(queue); - goto err_out_unlock; - } - break; - case NFQNL_CFG_CMD_UNBIND: - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - instance_destroy(queue); - break; - case NFQNL_CFG_CMD_PF_BIND: - case NFQNL_CFG_CMD_PF_UNBIND: - break; - default: - ret = -ENOTSUPP; - break; - } - } - - if (nfqa[NFQA_CFG_PARAMS]) { - struct nfqnl_msg_config_params *params; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - params = nla_data(nfqa[NFQA_CFG_PARAMS]); - nfqnl_set_mode(queue, params->copy_mode, - ntohl(params->copy_range)); - } - - if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { - __be32 *queue_maxlen; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); - spin_lock_bh(&queue->lock); - queue->queue_maxlen = ntohl(*queue_maxlen); - spin_unlock_bh(&queue->lock); - } - - if (nfqa[NFQA_CFG_FLAGS]) { - __u32 flags, mask; - - if (!queue) { - ret = -ENODEV; - goto err_out_unlock; - } - - if (!nfqa[NFQA_CFG_MASK]) { - /* A mask is needed to specify which flags are being - * changed. - */ - ret = -EINVAL; - goto err_out_unlock; - } - - flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); - mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); - - spin_lock_bh(&queue->lock); - queue->flags &= ~mask; - queue->flags |= flags & mask; - spin_unlock_bh(&queue->lock); - } - -err_out_unlock: - rcu_read_unlock(); - return ret; -} - -static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { - [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp, - .attr_count = NFQA_MAX, }, - [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict, - .attr_count = NFQA_MAX, - .policy = nfqa_verdict_policy }, - [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, - .attr_count = NFQA_CFG_MAX, - .policy = nfqa_cfg_policy }, - [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch, - .attr_count = NFQA_MAX, - .policy = nfqa_verdict_batch_policy }, -}; - -static const struct nfnetlink_subsystem nfqnl_subsys = { - .name = "nf_queue", - .subsys_id = NFNL_SUBSYS_QUEUE, - .cb_count = NFQNL_MSG_MAX, - .cb = nfqnl_cb, -}; - -#ifdef CONFIG_PROC_FS -struct iter_state { - unsigned int bucket; -}; - -static struct hlist_node *get_first(struct seq_file *seq) -{ - struct iter_state *st = seq->private; - - if (!st) - return NULL; - - for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { - if (!hlist_empty(&instance_table[st->bucket])) - return instance_table[st->bucket].first; - } - return NULL; -} - -static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) -{ - struct iter_state *st = seq->private; - - h = h->next; - while (!h) { - if (++st->bucket >= INSTANCE_BUCKETS) - return NULL; - - h = instance_table[st->bucket].first; - } - return h; -} - -static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_node *head; - head = get_first(seq); - - if (head) - while (pos && (head = get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *seq_start(struct seq_file *seq, loff_t *pos) - __acquires(instances_lock) -{ - spin_lock(&instances_lock); - return get_idx(seq, *pos); -} - -static void *seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - return get_next(s, v); -} - -static void seq_stop(struct seq_file *s, void *v) - __releases(instances_lock) -{ - spin_unlock(&instances_lock); -} - -static int seq_show(struct seq_file *s, void *v) -{ - const struct nfqnl_instance *inst = v; - - return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", - inst->queue_num, - inst->peer_pid, inst->queue_total, - inst->copy_mode, inst->copy_range, - inst->queue_dropped, inst->queue_user_dropped, - inst->id_sequence, 1); -} - -static const struct seq_operations nfqnl_seq_ops = { - .start = seq_start, - .next = seq_next, - .stop = seq_stop, - .show = seq_show, -}; - -static int nfqnl_open(struct inode *inode, struct file *file) -{ - return seq_open_private(file, &nfqnl_seq_ops, - sizeof(struct iter_state)); -} - -static const struct file_operations nfqnl_file_ops = { - .owner = THIS_MODULE, - .open = nfqnl_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -#endif /* PROC_FS */ - -static int __init nfnetlink_queue_init(void) -{ - int i, status = -ENOMEM; - - for (i = 0; i < INSTANCE_BUCKETS; i++) - INIT_HLIST_HEAD(&instance_table[i]); - - netlink_register_notifier(&nfqnl_rtnl_notifier); - status = nfnetlink_subsys_register(&nfqnl_subsys); - if (status < 0) { - printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); - goto cleanup_netlink_notifier; - } - -#ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_queue", 0440, - proc_net_netfilter, &nfqnl_file_ops)) - goto cleanup_subsys; -#endif - - register_netdevice_notifier(&nfqnl_dev_notifier); - return status; - -#ifdef CONFIG_PROC_FS -cleanup_subsys: - nfnetlink_subsys_unregister(&nfqnl_subsys); -#endif -cleanup_netlink_notifier: - netlink_unregister_notifier(&nfqnl_rtnl_notifier); - return status; -} - -static void __exit nfnetlink_queue_fini(void) -{ - nf_unregister_queue_handlers(&nfqh); - unregister_netdevice_notifier(&nfqnl_dev_notifier); -#ifdef CONFIG_PROC_FS - remove_proc_entry("nfnetlink_queue", proc_net_netfilter); -#endif - nfnetlink_subsys_unregister(&nfqnl_subsys); - netlink_unregister_notifier(&nfqnl_rtnl_notifier); - - rcu_barrier(); /* Wait for completion of call_rcu()'s */ -} - -MODULE_DESCRIPTION("netfilter packet queue handler"); -MODULE_AUTHOR("Harald Welte "); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); - -module_init(nfnetlink_queue_init); -module_exit(nfnetlink_queue_fini); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c new file mode 100644 index 00000000000..d36b95ea8ca --- /dev/null +++ b/net/netfilter/nfnetlink_queue_core.c @@ -0,0 +1,1090 @@ +/* + * This is a module which is used for queueing packets and communicating with + * userspace via nfnetlink. + * + * (C) 2005 by Harald Welte + * (C) 2007 by Patrick McHardy + * + * Based on the old ipv4-only ip_queue.c: + * (C) 2000-2002 James Morris + * (C) 2003-2005 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_BRIDGE_NETFILTER +#include "../bridge/br_private.h" +#endif + +#define NFQNL_QMAX_DEFAULT 1024 + +struct nfqnl_instance { + struct hlist_node hlist; /* global list of queues */ + struct rcu_head rcu; + + int peer_pid; + unsigned int queue_maxlen; + unsigned int copy_range; + unsigned int queue_dropped; + unsigned int queue_user_dropped; + + + u_int16_t queue_num; /* number of this queue */ + u_int8_t copy_mode; + u_int32_t flags; /* Set using NFQA_CFG_FLAGS */ +/* + * Following fields are dirtied for each queued packet, + * keep them in same cache line if possible. + */ + spinlock_t lock; + unsigned int queue_total; + unsigned int id_sequence; /* 'sequence' of pkt ids */ + struct list_head queue_list; /* packets in queue */ +}; + +typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); + +static DEFINE_SPINLOCK(instances_lock); + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; + +static inline u_int8_t instance_hashfn(u_int16_t queue_num) +{ + return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; +} + +static struct nfqnl_instance * +instance_lookup(u_int16_t queue_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfqnl_instance *inst; + + head = &instance_table[instance_hashfn(queue_num)]; + hlist_for_each_entry_rcu(inst, pos, head, hlist) { + if (inst->queue_num == queue_num) + return inst; + } + return NULL; +} + +static struct nfqnl_instance * +instance_create(u_int16_t queue_num, int pid) +{ + struct nfqnl_instance *inst; + unsigned int h; + int err; + + spin_lock(&instances_lock); + if (instance_lookup(queue_num)) { + err = -EEXIST; + goto out_unlock; + } + + inst = kzalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) { + err = -ENOMEM; + goto out_unlock; + } + + inst->queue_num = queue_num; + inst->peer_pid = pid; + inst->queue_maxlen = NFQNL_QMAX_DEFAULT; + inst->copy_range = 0xfffff; + inst->copy_mode = NFQNL_COPY_NONE; + spin_lock_init(&inst->lock); + INIT_LIST_HEAD(&inst->queue_list); + + if (!try_module_get(THIS_MODULE)) { + err = -EAGAIN; + goto out_free; + } + + h = instance_hashfn(queue_num); + hlist_add_head_rcu(&inst->hlist, &instance_table[h]); + + spin_unlock(&instances_lock); + + return inst; + +out_free: + kfree(inst); +out_unlock: + spin_unlock(&instances_lock); + return ERR_PTR(err); +} + +static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data); + +static void +instance_destroy_rcu(struct rcu_head *head) +{ + struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, + rcu); + + nfqnl_flush(inst, NULL, 0); + kfree(inst); + module_put(THIS_MODULE); +} + +static void +__instance_destroy(struct nfqnl_instance *inst) +{ + hlist_del_rcu(&inst->hlist); + call_rcu(&inst->rcu, instance_destroy_rcu); +} + +static void +instance_destroy(struct nfqnl_instance *inst) +{ + spin_lock(&instances_lock); + __instance_destroy(inst); + spin_unlock(&instances_lock); +} + +static inline void +__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) +{ + list_add_tail(&entry->list, &queue->queue_list); + queue->queue_total++; +} + +static void +__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) +{ + list_del(&entry->list); + queue->queue_total--; +} + +static struct nf_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) +{ + struct nf_queue_entry *entry = NULL, *i; + + spin_lock_bh(&queue->lock); + + list_for_each_entry(i, &queue->queue_list, list) { + if (i->id == id) { + entry = i; + break; + } + } + + if (entry) + __dequeue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + + return entry; +} + +static void +nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nf_queue_entry *entry, *next; + + spin_lock_bh(&queue->lock); + list_for_each_entry_safe(entry, next, &queue->queue_list, list) { + if (!cmpfn || cmpfn(entry, data)) { + list_del(&entry->list); + queue->queue_total--; + nf_reinject(entry, NF_DROP); + } + } + spin_unlock_bh(&queue->lock); +} + +static struct sk_buff * +nfqnl_build_packet_message(struct nfqnl_instance *queue, + struct nf_queue_entry *entry, + __be32 **packet_id_ptr) +{ + sk_buff_data_t old_tail; + size_t size; + size_t data_len = 0; + struct sk_buff *skb; + struct nlattr *nla; + struct nfqnl_msg_packet_hdr *pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct sk_buff *entskb = entry->skb; + struct net_device *indev; + struct net_device *outdev; + struct nf_conn *ct = NULL; + enum ip_conntrack_info uninitialized_var(ctinfo); + + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ +#ifdef CONFIG_BRIDGE_NETFILTER + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + + nla_total_size(sizeof(u_int32_t)) /* ifindex */ +#endif + + nla_total_size(sizeof(u_int32_t)) /* mark */ + + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + + outdev = entry->outdev; + + switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { + case NFQNL_COPY_META: + case NFQNL_COPY_NONE: + break; + + case NFQNL_COPY_PACKET: + if (entskb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(entskb)) + return NULL; + + data_len = ACCESS_ONCE(queue->copy_range); + if (data_len == 0 || data_len > entskb->len) + data_len = entskb->len; + + size += nla_total_size(data_len); + break; + } + + if (queue->flags & NFQA_CFG_F_CONNTRACK) + ct = nfqnl_ct_get(entskb, &size, &ctinfo); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + + old_tail = skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, + NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = entry->pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(queue->queue_num); + + nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); + pmsg = nla_data(nla); + pmsg->hw_protocol = entskb->protocol; + pmsg->hook = entry->hook; + *packet_id_ptr = &pmsg->packet_id; + + indev = entry->indev; + if (indev) { +#ifndef CONFIG_BRIDGE_NETFILTER + if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) + goto nla_put_failure; +#else + if (entry->pf == PF_BRIDGE) { + /* Case 1: indev is physical input device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(indev->ifindex)) || + /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ + nla_put_be32(skb, NFQA_IFINDEX_INDEV, + htonl(br_port_get_rcu(indev)->br->dev->ifindex))) + goto nla_put_failure; + } else { + /* Case 2: indev is bridge group, we need to look for + * physical device (when called from ipv4) */ + if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, + htonl(indev->ifindex))) + goto nla_put_failure; + if (entskb->nf_bridge && entskb->nf_bridge->physindev && + nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, + htonl(entskb->nf_bridge->physindev->ifindex))) + goto nla_put_failure; + } +#endif + } + + if (outdev) { +#ifndef CONFIG_BRIDGE_NETFILTER + if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) + goto nla_put_failure; +#else + if (entry->pf == PF_BRIDGE) { + /* Case 1: outdev is physical output device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(outdev->ifindex)) || + /* this is the bridge group "brX" */ + /* rcu_read_lock()ed by __nf_queue */ + nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, + htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) + goto nla_put_failure; + } else { + /* Case 2: outdev is bridge group, we need to look for + * physical output device (when called from ipv4) */ + if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, + htonl(outdev->ifindex))) + goto nla_put_failure; + if (entskb->nf_bridge && entskb->nf_bridge->physoutdev && + nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, + htonl(entskb->nf_bridge->physoutdev->ifindex))) + goto nla_put_failure; + } +#endif + } + + if (entskb->mark && + nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark))) + goto nla_put_failure; + + if (indev && entskb->dev && + entskb->mac_header != entskb->network_header) { + struct nfqnl_msg_packet_hw phw; + int len = dev_parse_header(entskb, phw.hw_addr); + if (len) { + phw.hw_addrlen = htons(len); + if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw)) + goto nla_put_failure; + } + } + + if (entskb->tstamp.tv64) { + struct nfqnl_msg_packet_timestamp ts; + struct timeval tv = ktime_to_timeval(entskb->tstamp); + ts.sec = cpu_to_be64(tv.tv_sec); + ts.usec = cpu_to_be64(tv.tv_usec); + + if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts)) + goto nla_put_failure; + } + + if (data_len) { + struct nlattr *nla; + int sz = nla_attr_size(data_len); + + if (skb_tailroom(skb) < nla_total_size(data_len)) { + printk(KERN_WARNING "nf_queue: no tailroom!\n"); + goto nlmsg_failure; + } + + nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla->nla_type = NFQA_PAYLOAD; + nla->nla_len = sz; + + if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) + BUG(); + } + + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) + goto nla_put_failure; + + nlh->nlmsg_len = skb->tail - old_tail; + return skb; + +nlmsg_failure: +nla_put_failure: + if (skb) + kfree_skb(skb); + net_err_ratelimited("nf_queue: error creating packet message\n"); + return NULL; +} + +static int +nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +{ + struct sk_buff *nskb; + struct nfqnl_instance *queue; + int err = -ENOBUFS; + __be32 *packet_id_ptr; + int failopen = 0; + + /* rcu_read_lock()ed by nf_hook_slow() */ + queue = instance_lookup(queuenum); + if (!queue) { + err = -ESRCH; + goto err_out; + } + + if (queue->copy_mode == NFQNL_COPY_NONE) { + err = -EINVAL; + goto err_out; + } + + nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); + if (nskb == NULL) { + err = -ENOMEM; + goto err_out; + } + spin_lock_bh(&queue->lock); + + if (!queue->peer_pid) { + err = -EINVAL; + goto err_out_free_nskb; + } + if (queue->queue_total >= queue->queue_maxlen) { + if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { + failopen = 1; + err = 0; + } else { + queue->queue_dropped++; + net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n", + queue->queue_total); + } + goto err_out_free_nskb; + } + entry->id = ++queue->id_sequence; + *packet_id_ptr = htonl(entry->id); + + /* nfnetlink_unicast will either free the nskb or add it to a socket */ + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + if (err < 0) { + queue->queue_user_dropped++; + goto err_out_unlock; + } + + __enqueue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + return 0; + +err_out_free_nskb: + kfree_skb(nskb); +err_out_unlock: + spin_unlock_bh(&queue->lock); + if (failopen) + nf_reinject(entry, NF_ACCEPT); +err_out: + return err; +} + +static int +nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) +{ + struct sk_buff *nskb; + + if (diff < 0) { + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { + if (data_len > 0xFFFF) + return -EINVAL; + if (diff > skb_tailroom(e->skb)) { + nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), + diff, GFP_ATOMIC); + if (!nskb) { + printk(KERN_WARNING "nf_queue: OOM " + "in mangle, dropping packet\n"); + return -ENOMEM; + } + kfree_skb(e->skb); + e->skb = nskb; + } + skb_put(e->skb, diff); + } + if (!skb_make_writable(e->skb, data_len)) + return -ENOMEM; + skb_copy_to_linear_data(e->skb, data, data_len); + e->skb->ip_summed = CHECKSUM_NONE; + return 0; +} + +static int +nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status = 0; + + spin_lock_bh(&queue->lock); + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nlattr which has 16bit nla_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } + spin_unlock_bh(&queue->lock); + + return status; +} + +static int +dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) +{ + if (entry->indev) + if (entry->indev->ifindex == ifindex) + return 1; + if (entry->outdev) + if (entry->outdev->ifindex == ifindex) + return 1; +#ifdef CONFIG_BRIDGE_NETFILTER + if (entry->skb->nf_bridge) { + if (entry->skb->nf_bridge->physindev && + entry->skb->nf_bridge->physindev->ifindex == ifindex) + return 1; + if (entry->skb->nf_bridge->physoutdev && + entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + return 1; + } +#endif + return 0; +} + +/* drop all packets with either indev or outdev == ifindex from all queue + * instances */ +static void +nfqnl_dev_drop(int ifindex) +{ + int i; + + rcu_read_lock(); + + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_rcu(inst, tmp, head, hlist) + nfqnl_flush(inst, dev_cmp, ifindex); + } + + rcu_read_unlock(); +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static int +nfqnl_rcv_dev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + nfqnl_dev_drop(dev->ifindex); + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_dev_notifier = { + .notifier_call = nfqnl_rcv_dev_event, +}; + +static int +nfqnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { + int i; + + /* destroy all instances for this pid */ + spin_lock(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + if ((n->net == &init_net) && + (n->pid == inst->peer_pid)) + __instance_destroy(inst); + } + } + spin_unlock(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_rtnl_notifier = { + .notifier_call = nfqnl_rcv_nl_event, +}; + +static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, + [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, + [NFQA_CT] = { .type = NLA_UNSPEC }, +}; + +static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { + [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, + [NFQA_MARK] = { .type = NLA_U32 }, +}; + +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +{ + struct nfqnl_instance *queue; + + queue = instance_lookup(queue_num); + if (!queue) + return ERR_PTR(-ENODEV); + + if (queue->peer_pid != nlpid) + return ERR_PTR(-EPERM); + + return queue; +} + +static struct nfqnl_msg_verdict_hdr* +verdicthdr_get(const struct nlattr * const nfqa[]) +{ + struct nfqnl_msg_verdict_hdr *vhdr; + unsigned int verdict; + + if (!nfqa[NFQA_VERDICT_HDR]) + return NULL; + + vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); + verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; + if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) + return NULL; + return vhdr; +} + +static int nfq_id_after(unsigned int id, unsigned int max) +{ + return (int)(id - max) > 0; +} + +static int +nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nf_queue_entry *entry, *tmp; + unsigned int verdict, maxid; + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + LIST_HEAD(batch_list); + u16 queue_num = ntohs(nfmsg->res_id); + + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + vhdr = verdicthdr_get(nfqa); + if (!vhdr) + return -EINVAL; + + verdict = ntohl(vhdr->verdict); + maxid = ntohl(vhdr->id); + + spin_lock_bh(&queue->lock); + + list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) { + if (nfq_id_after(entry->id, maxid)) + break; + __dequeue_entry(queue, entry); + list_add_tail(&entry->list, &batch_list); + } + + spin_unlock_bh(&queue->lock); + + if (list_empty(&batch_list)) + return -ENOENT; + + list_for_each_entry_safe(entry, tmp, &batch_list, list) { + if (nfqa[NFQA_MARK]) + entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + nf_reinject(entry, verdict); + } + return 0; +} + +static int +nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + unsigned int verdict; + struct nf_queue_entry *entry; + enum ip_conntrack_info uninitialized_var(ctinfo); + struct nf_conn *ct = NULL; + + queue = instance_lookup(queue_num); + if (!queue) + + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + vhdr = verdicthdr_get(nfqa); + if (!vhdr) + return -EINVAL; + + verdict = ntohl(vhdr->verdict); + + entry = find_dequeue_entry(queue, ntohl(vhdr->id)); + if (entry == NULL) + return -ENOENT; + + rcu_read_lock(); + if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK)) + ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); + + if (nfqa[NFQA_PAYLOAD]) { + u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); + int diff = payload_len - entry->skb->len; + + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), + payload_len, entry, diff) < 0) + verdict = NF_DROP; + + if (ct) + nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff); + } + rcu_read_unlock(); + + if (nfqa[NFQA_MARK]) + entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); + + nf_reinject(entry, verdict); + return 0; +} + +static int +nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + return -ENOTSUPP; +} + +static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { + [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, + [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, +}; + +static const struct nf_queue_handler nfqh = { + .name = "nf_queue", + .outfn = &nfqnl_enqueue_packet, +}; + +static int +nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const nfqa[]) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + struct nfqnl_instance *queue; + struct nfqnl_msg_config_cmd *cmd = NULL; + int ret = 0; + + if (nfqa[NFQA_CFG_CMD]) { + cmd = nla_data(nfqa[NFQA_CFG_CMD]); + + /* Commands without queue context - might sleep */ + switch (cmd->command) { + case NFQNL_CFG_CMD_PF_BIND: + return nf_register_queue_handler(ntohs(cmd->pf), + &nfqh); + case NFQNL_CFG_CMD_PF_UNBIND: + return nf_unregister_queue_handler(ntohs(cmd->pf), + &nfqh); + } + } + + rcu_read_lock(); + queue = instance_lookup(queue_num); + if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto err_out_unlock; + } + + if (cmd != NULL) { + switch (cmd->command) { + case NFQNL_CFG_CMD_BIND: + if (queue) { + ret = -EBUSY; + goto err_out_unlock; + } + queue = instance_create(queue_num, NETLINK_CB(skb).pid); + if (IS_ERR(queue)) { + ret = PTR_ERR(queue); + goto err_out_unlock; + } + break; + case NFQNL_CFG_CMD_UNBIND: + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + instance_destroy(queue); + break; + case NFQNL_CFG_CMD_PF_BIND: + case NFQNL_CFG_CMD_PF_UNBIND: + break; + default: + ret = -ENOTSUPP; + break; + } + } + + if (nfqa[NFQA_CFG_PARAMS]) { + struct nfqnl_msg_config_params *params; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + params = nla_data(nfqa[NFQA_CFG_PARAMS]); + nfqnl_set_mode(queue, params->copy_mode, + ntohl(params->copy_range)); + } + + if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { + __be32 *queue_maxlen; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); + spin_lock_bh(&queue->lock); + queue->queue_maxlen = ntohl(*queue_maxlen); + spin_unlock_bh(&queue->lock); + } + + if (nfqa[NFQA_CFG_FLAGS]) { + __u32 flags, mask; + + if (!queue) { + ret = -ENODEV; + goto err_out_unlock; + } + + if (!nfqa[NFQA_CFG_MASK]) { + /* A mask is needed to specify which flags are being + * changed. + */ + ret = -EINVAL; + goto err_out_unlock; + } + + flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS])); + mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK])); + + spin_lock_bh(&queue->lock); + queue->flags &= ~mask; + queue->flags |= flags & mask; + spin_unlock_bh(&queue->lock); + } + +err_out_unlock: + rcu_read_unlock(); + return ret; +} + +static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { + [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp, + .attr_count = NFQA_MAX, }, + [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_policy }, + [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .attr_count = NFQA_CFG_MAX, + .policy = nfqa_cfg_policy }, + [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch, + .attr_count = NFQA_MAX, + .policy = nfqa_verdict_batch_policy }, +}; + +static const struct nfnetlink_subsystem nfqnl_subsys = { + .name = "nf_queue", + .subsys_id = NFNL_SUBSYS_QUEUE, + .cb_count = NFQNL_MSG_MAX, + .cb = nfqnl_cb, +}; + +#ifdef CONFIG_PROC_FS +struct iter_state { + unsigned int bucket; +}; + +static struct hlist_node *get_first(struct seq_file *seq) +{ + struct iter_state *st = seq->private; + + if (!st) + return NULL; + + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { + if (!hlist_empty(&instance_table[st->bucket])) + return instance_table[st->bucket].first; + } + return NULL; +} + +static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) +{ + struct iter_state *st = seq->private; + + h = h->next; + while (!h) { + if (++st->bucket >= INSTANCE_BUCKETS) + return NULL; + + h = instance_table[st->bucket].first; + } + return h; +} + +static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head; + head = get_first(seq); + + if (head) + while (pos && (head = get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *seq_start(struct seq_file *seq, loff_t *pos) + __acquires(instances_lock) +{ + spin_lock(&instances_lock); + return get_idx(seq, *pos); +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return get_next(s, v); +} + +static void seq_stop(struct seq_file *s, void *v) + __releases(instances_lock) +{ + spin_unlock(&instances_lock); +} + +static int seq_show(struct seq_file *s, void *v) +{ + const struct nfqnl_instance *inst = v; + + return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", + inst->queue_num, + inst->peer_pid, inst->queue_total, + inst->copy_mode, inst->copy_range, + inst->queue_dropped, inst->queue_user_dropped, + inst->id_sequence, 1); +} + +static const struct seq_operations nfqnl_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nfqnl_open(struct inode *inode, struct file *file) +{ + return seq_open_private(file, &nfqnl_seq_ops, + sizeof(struct iter_state)); +} + +static const struct file_operations nfqnl_file_ops = { + .owner = THIS_MODULE, + .open = nfqnl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +#endif /* PROC_FS */ + +static int __init nfnetlink_queue_init(void) +{ + int i, status = -ENOMEM; + + for (i = 0; i < INSTANCE_BUCKETS; i++) + INIT_HLIST_HEAD(&instance_table[i]); + + netlink_register_notifier(&nfqnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) { + printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + +#ifdef CONFIG_PROC_FS + if (!proc_create("nfnetlink_queue", 0440, + proc_net_netfilter, &nfqnl_file_ops)) + goto cleanup_subsys; +#endif + + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; + +#ifdef CONFIG_PROC_FS +cleanup_subsys: + nfnetlink_subsys_unregister(&nfqnl_subsys); +#endif +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + return status; +} + +static void __exit nfnetlink_queue_fini(void) +{ + nf_unregister_queue_handlers(&nfqh); + unregister_netdevice_notifier(&nfqnl_dev_notifier); +#ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_queue", proc_net_netfilter); +#endif + nfnetlink_subsys_unregister(&nfqnl_subsys); + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ +} + +MODULE_DESCRIPTION("netfilter packet queue handler"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); + +module_init(nfnetlink_queue_init); +module_exit(nfnetlink_queue_fini); diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c new file mode 100644 index 00000000000..68ef550066f --- /dev/null +++ b/net/netfilter/nfnetlink_queue_ct.c @@ -0,0 +1,97 @@ +/* + * (C) 2012 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include + +struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, + enum ip_conntrack_info *ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nf_conn *ct; + + /* rcu_read_lock()ed by __nf_queue already. */ + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return NULL; + + ct = nf_ct_get(entskb, ctinfo); + if (ct) { + if (!nf_ct_is_untracked(ct)) + *size += nfq_ct->build_size(ct); + else + ct = NULL; + } + return ct; +} + +struct nf_conn * +nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr, + enum ip_conntrack_info *ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nf_conn *ct; + + /* rcu_read_lock()ed by __nf_queue already. */ + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return NULL; + + ct = nf_ct_get(skb, ctinfo); + if (ct && !nf_ct_is_untracked(ct)) + nfq_ct->parse(attr, ct); + + return ct; +} + +int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nlattr *nest_parms; + u_int32_t tmp; + + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return 0; + + nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + if (nfq_ct->build(skb, ct) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + + tmp = ctinfo; + if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff) +{ + struct nfq_ct_hook *nfq_ct; + + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return; + + if ((ct->status & IPS_NAT_MASK) && diff) + nfq_ct->seq_adjust(skb, ct, ctinfo, diff); +} -- cgit v1.2.3-70-g09d2 From 9345d40c580d0f3dfc040add0e6371b1a629c1cc Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 15 Jun 2012 10:36:42 +0300 Subject: Bluetooth: Use AUTO_OFF constant in jiffies Move AUTO_OFF_TIMEOUT to other constants changing name to HCI_AUTO_OFF_TIMEOUT and convert to jiffies. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci.h | 3 ++- net/bluetooth/hci_core.c | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 7dcd3495edd..ccd723e0f78 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -142,8 +142,9 @@ enum { #define HCI_DISCONN_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ #define HCI_PAIRING_TIMEOUT msecs_to_jiffies(60000) /* 60 seconds */ #define HCI_INIT_TIMEOUT msecs_to_jiffies(10000) /* 10 seconds */ -#define HCI_CMD_TIMEOUT msecs_to_jiffies(1000) /* 1 seconds */ +#define HCI_CMD_TIMEOUT msecs_to_jiffies(1000) /* 1 second */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ +#define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ /* HCI data types */ #define HCI_COMMAND_PKT 0x01 diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a2bdf936ed4..32dcb09cdb5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -33,8 +33,6 @@ #include #include -#define AUTO_OFF_TIMEOUT 2000 - static void hci_rx_work(struct work_struct *work); static void hci_cmd_work(struct work_struct *work); static void hci_tx_work(struct work_struct *work); @@ -1083,8 +1081,7 @@ static void hci_power_on(struct work_struct *work) return; if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) - schedule_delayed_work(&hdev->power_off, - msecs_to_jiffies(AUTO_OFF_TIMEOUT)); + schedule_delayed_work(&hdev->power_off, HCI_AUTO_OFF_TIMEOUT); if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); -- cgit v1.2.3-70-g09d2 From 674147e21195a496164e1c7ff70d0f0b45235f25 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Jun 2012 05:25:46 +0200 Subject: netfilter: fix missing symbols if CONFIG_NETFILTER_NETLINK_QUEUE_CT unset ERROR: "nfqnl_ct_parse" [net/netfilter/nfnetlink_queue.ko] undefined! ERROR: "nfqnl_ct_seq_adjust" [net/netfilter/nfnetlink_queue.ko] undefined! ERROR: "nfqnl_ct_put" [net/netfilter/nfnetlink_queue.ko] undefined! ERROR: "nfqnl_ct_get" [net/netfilter/nfnetlink_queue.ko] undefined! We have to use CONFIG_NETFILTER_NETLINK_QUEUE_CT in include/net/netfilter/nfnetlink_queue.h, not CONFIG_NF_CONNTRACK. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/net/netfilter/nfnetlink_queue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nfnetlink_queue.h b/include/net/netfilter/nfnetlink_queue.h index 9f8095c108e..86267a52951 100644 --- a/include/net/netfilter/nfnetlink_queue.h +++ b/include/net/netfilter/nfnetlink_queue.h @@ -5,7 +5,7 @@ struct nf_conn; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo); struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, -- cgit v1.2.3-70-g09d2 From f9242b6b28d61295f2bf7e8adfb1060b382e5381 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jun 2012 18:56:21 -0700 Subject: inet: Sanitize inet{,6} protocol demux. Don't pretend that inet_protos[] and inet6_protos[] are hashes, thay are just a straight arrays. Remove all unnecessary hash masking. Document MAX_INET_PROTOS. Use RAW_HTABLE_SIZE when appropriate. Reported-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/protocol.h | 7 +++++-- net/ipv4/af_inet.c | 26 ++++++++++++-------------- net/ipv4/icmp.c | 9 ++++----- net/ipv4/ip_input.c | 5 ++--- net/ipv4/protocol.c | 8 +++----- net/ipv6/icmp.c | 7 ++----- net/ipv6/ip6_input.c | 9 +++------ net/ipv6/protocol.c | 8 +++----- net/ipv6/raw.c | 4 ++-- 9 files changed, 36 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 875f4895b03..a1b1b530c33 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -29,8 +29,11 @@ #include #endif -#define MAX_INET_PROTOS 256 /* Must be a power of 2 */ - +/* This is one larger than the largest protocol value that can be + * found in an ipv4 or ipv6 header. Since in both cases the protocol + * value is presented in a __u8, this is defined to be 256. + */ +#define MAX_INET_PROTOS 256 /* This is used to register protocols. */ struct net_protocol { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e4e8e00a2c9..85a3b176313 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -242,20 +242,18 @@ void build_ehash_secret(void) } EXPORT_SYMBOL(build_ehash_secret); -static inline int inet_netns_ok(struct net *net, int protocol) +static inline int inet_netns_ok(struct net *net, __u8 protocol) { - int hash; const struct net_protocol *ipprot; if (net_eq(net, &init_net)) return 1; - hash = protocol & (MAX_INET_PROTOS - 1); - ipprot = rcu_dereference(inet_protos[hash]); - - if (ipprot == NULL) + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot == NULL) { /* raw IP is OK */ return 1; + } return ipprot->netns_ok; } @@ -1216,8 +1214,8 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); static int inet_gso_send_check(struct sk_buff *skb) { - const struct iphdr *iph; const struct net_protocol *ops; + const struct iphdr *iph; int proto; int ihl; int err = -EINVAL; @@ -1236,7 +1234,7 @@ static int inet_gso_send_check(struct sk_buff *skb) __skb_pull(skb, ihl); skb_reset_transport_header(skb); iph = ip_hdr(skb); - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; err = -EPROTONOSUPPORT; rcu_read_lock(); @@ -1253,8 +1251,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - struct iphdr *iph; const struct net_protocol *ops; + struct iphdr *iph; int proto; int ihl; int id; @@ -1286,7 +1284,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb_reset_transport_header(skb); iph = ip_hdr(skb); id = ntohs(iph->id); - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; segs = ERR_PTR(-EPROTONOSUPPORT); rcu_read_lock(); @@ -1340,7 +1338,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out; } - proto = iph->protocol & (MAX_INET_PROTOS - 1); + proto = iph->protocol; rcu_read_lock(); ops = rcu_dereference(inet_protos[proto]); @@ -1398,11 +1396,11 @@ out: static int inet_gro_complete(struct sk_buff *skb) { - const struct net_protocol *ops; + __be16 newlen = htons(skb->len - skb_network_offset(skb)); struct iphdr *iph = ip_hdr(skb); - int proto = iph->protocol & (MAX_INET_PROTOS - 1); + const struct net_protocol *ops; + int proto = iph->protocol; int err = -ENOSYS; - __be16 newlen = htons(skb->len - skb_network_offset(skb)); csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e1caa1abe5d..49a74cc79dc 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -637,12 +637,12 @@ EXPORT_SYMBOL(icmp_send); static void icmp_unreach(struct sk_buff *skb) { + const struct net_protocol *ipprot; const struct iphdr *iph; struct icmphdr *icmph; - int hash, protocol; - const struct net_protocol *ipprot; - u32 info = 0; struct net *net; + u32 info = 0; + int protocol; net = dev_net(skb_dst(skb)->dev); @@ -731,9 +731,8 @@ static void icmp_unreach(struct sk_buff *skb) */ raw_icmp_error(skb, protocol, info); - hash = protocol & (MAX_INET_PROTOS - 1); rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[hash]); + ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, info); rcu_read_unlock(); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 8590144ca33..c4fe1d27113 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -198,14 +198,13 @@ static int ip_local_deliver_finish(struct sk_buff *skb) rcu_read_lock(); { int protocol = ip_hdr(skb)->protocol; - int hash, raw; const struct net_protocol *ipprot; + int raw; resubmit: raw = raw_local_deliver(skb, protocol); - hash = protocol & (MAX_INET_PROTOS - 1); - ipprot = rcu_dereference(inet_protos[hash]); + ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot != NULL) { int ret; diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 9ae5c01cd0b..8918eff1426 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -36,9 +36,7 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { - int hash = protocol & (MAX_INET_PROTOS - 1); - - return !cmpxchg((const struct net_protocol **)&inet_protos[hash], + return !cmpxchg((const struct net_protocol **)&inet_protos[protocol], NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet_add_protocol); @@ -49,9 +47,9 @@ EXPORT_SYMBOL(inet_add_protocol); int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { - int ret, hash = protocol & (MAX_INET_PROTOS - 1); + int ret; - ret = (cmpxchg((const struct net_protocol **)&inet_protos[hash], + ret = (cmpxchg((const struct net_protocol **)&inet_protos[protocol], prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5247d5c211f..c7da1422cbd 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -600,9 +600,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; - int hash; - u8 nexthdr; __be16 frag_off; + u8 nexthdr; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) return; @@ -629,10 +628,8 @@ static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) --ANK (980726) */ - hash = nexthdr & (MAX_INET_PROTOS - 1); - rcu_read_lock(); - ipprot = rcu_dereference(inet6_protos[hash]); + ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, NULL, type, code, inner_offset, info); rcu_read_unlock(); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 21a15dfe4a9..5ab923e51af 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -168,13 +168,12 @@ drop: static int ip6_input_finish(struct sk_buff *skb) { + struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; + struct inet6_dev *idev; unsigned int nhoff; int nexthdr; bool raw; - u8 hash; - struct inet6_dev *idev; - struct net *net = dev_net(skb_dst(skb)->dev); /* * Parse extension headers @@ -189,9 +188,7 @@ resubmit: nexthdr = skb_network_header(skb)[nhoff]; raw = raw6_local_deliver(skb, nexthdr); - - hash = nexthdr & (MAX_INET_PROTOS - 1); - if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) { + if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 9a7978fdc02..053082dfc93 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -29,9 +29,7 @@ const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS] __read_mostly; int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int hash = protocol & (MAX_INET_PROTOS - 1); - - return !cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + return !cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], NULL, prot) ? 0 : -1; } EXPORT_SYMBOL(inet6_add_protocol); @@ -42,9 +40,9 @@ EXPORT_SYMBOL(inet6_add_protocol); int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { - int ret, hash = protocol & (MAX_INET_PROTOS - 1); + int ret; - ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[hash], + ret = (cmpxchg((const struct inet6_protocol **)&inet6_protos[protocol], prot, NULL) == prot) ? 0 : -1; synchronize_net(); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 43b0042f15f..b5c1dcb2773 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -165,7 +165,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; - hash = nexthdr & (MAX_INET_PROTOS - 1); + hash = nexthdr & (RAW_HTABLE_SIZE - 1); read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); @@ -229,7 +229,7 @@ bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { struct sock *raw_sk; - raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (MAX_INET_PROTOS - 1)]); + raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) raw_sk = NULL; -- cgit v1.2.3-70-g09d2 From 41063e9dd11956f2d285e12e4342e1d232ba0ea2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 19 Jun 2012 21:22:05 -0700 Subject: ipv4: Early TCP socket demux. Input packet processing for local sockets involves two major demuxes. One for the route and one for the socket. But we can optimize this down to one demux for certain kinds of local sockets. Currently we only do this for established TCP sockets, but it could at least in theory be expanded to other kinds of connections. If a TCP socket is established then it's identity is fully specified. This means that whatever input route was used during the three-way handshake must work equally well for the rest of the connection since the keys will not change. Once we move to established state, we cache the receive packet's input route to use later. Like the existing cached route in sk->sk_dst_cache used for output packets, we have to check for route invalidations using dst->obsolete and dst->ops->check(). Early demux occurs outside of a socket locked section, so when a route invalidation occurs we defer the fixup of sk->sk_rx_dst until we are actually inside of established state packet processing and thus have the socket locked. Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 ++-- include/net/protocol.h | 1 + include/net/sock.h | 2 ++ include/net/tcp.h | 1 + net/core/sock.c | 5 +++++ net/ipv4/af_inet.c | 18 +++++++++-------- net/ipv4/ip_input.c | 39 ++++++++++++++++++++++++------------ net/ipv4/tcp_input.c | 16 ++++++++++++++- net/ipv4/tcp_ipv4.c | 46 +++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_minisocks.c | 2 ++ 10 files changed, 110 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 808fc5f76b0..54be0287eb9 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -379,10 +379,10 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - struct sock *sk; + struct sock *sk = skb_steal_sock(skb); const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) + if (sk) return sk; else return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, diff --git a/include/net/protocol.h b/include/net/protocol.h index a1b1b530c33..967b926cbfb 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,6 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { + int (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index 4a452169956..87b424ae750 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -319,6 +319,7 @@ struct sock { unsigned long sk_flags; struct dst_entry *sk_dst_cache; spinlock_t sk_dst_lock; + struct dst_entry *sk_rx_dst; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; @@ -1426,6 +1427,7 @@ extern struct sk_buff *sock_rmalloc(struct sock *sk, gfp_t priority); extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); +extern void sock_edemux(struct sk_buff *skb); extern int sock_setsockopt(struct socket *sock, int level, int op, char __user *optval, diff --git a/include/net/tcp.h b/include/net/tcp.h index 9332f342259..6660ffc4963 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,6 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); +extern int tcp_v4_early_demux(struct sk_buff *skb); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 9e5b71fda6e..929bdcc2383 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1465,6 +1465,11 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +void sock_edemux(struct sk_buff *skb) +{ + sock_put(skb->sk); +} +EXPORT_SYMBOL(sock_edemux); int sock_i_uid(struct sock *sk) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 85a3b176313..07a02f6e969 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -157,6 +157,7 @@ void inet_sock_destruct(struct sock *sk) kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); + dst_release(sk->sk_rx_dst); sk_refcnt_debug_dec(sk); } EXPORT_SYMBOL(inet_sock_destruct); @@ -1518,14 +1519,15 @@ static const struct net_protocol igmp_protocol = { #endif static const struct net_protocol tcp_protocol = { - .handler = tcp_v4_rcv, - .err_handler = tcp_v4_err, - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - .no_policy = 1, - .netns_ok = 1, + .early_demux = tcp_v4_early_demux, + .handler = tcp_v4_rcv, + .err_handler = tcp_v4_err, + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + .no_policy = 1, + .netns_ok = 1, }; static const struct net_protocol udp_protocol = { diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c4fe1d27113..93b092c9a39 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -323,19 +323,32 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (skb_dst(skb) == NULL) { - int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); - if (unlikely(err)) { - if (err == -EHOSTUNREACH) - IP_INC_STATS_BH(dev_net(skb->dev), - IPSTATS_MIB_INADDRERRORS); - else if (err == -ENETUNREACH) - IP_INC_STATS_BH(dev_net(skb->dev), - IPSTATS_MIB_INNOROUTES); - else if (err == -EXDEV) - NET_INC_STATS_BH(dev_net(skb->dev), - LINUX_MIB_IPRPFILTER); - goto drop; + const struct net_protocol *ipprot; + int protocol = iph->protocol; + int err; + + rcu_read_lock(); + ipprot = rcu_dereference(inet_protos[protocol]); + err = -ENOENT; + if (ipprot && ipprot->early_demux) + err = ipprot->early_demux(skb); + rcu_read_unlock(); + + if (err) { + err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); + if (unlikely(err)) { + if (err == -EHOSTUNREACH) + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INADDRERRORS); + else if (err == -ENETUNREACH) + IP_INC_STATS_BH(dev_net(skb->dev), + IPSTATS_MIB_INNOROUTES); + else if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); + goto drop; + } } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b224eb8bce8..8416f8a68e6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5518,6 +5518,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); int res; + if (sk->sk_rx_dst) { + struct dst_entry *dst = sk->sk_rx_dst; + if (unlikely(dst->obsolete)) { + if (dst->ops->check(dst, 0) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + } + if (unlikely(sk->sk_rx_dst == NULL)) + sk->sk_rx_dst = dst_clone(skb_dst(skb)); + /* * Header prediction. * The code loosely follows the one in the famous @@ -5729,8 +5741,10 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); - if (skb != NULL) + if (skb != NULL) { + sk->sk_rx_dst = dst_clone(skb_dst(skb)); security_inet_conn_established(sk, skb); + } /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fda2ca17135..13857df1dae 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1671,6 +1671,52 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); +int tcp_v4_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct tcphdr *th; + struct sock *sk; + int err; + + err = -ENOENT; + if (skb->pkt_type != PACKET_HOST) + goto out_err; + + if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) + goto out_err; + + iph = ip_hdr(skb); + th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); + + if (th->doff < sizeof(struct tcphdr) / 4) + goto out_err; + + if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) + goto out_err; + + sk = __inet_lookup_established(net, &tcp_hashinfo, + iph->saddr, th->source, + iph->daddr, th->dest, + skb->dev->ifindex); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + if (dst) + dst = dst_check(dst, 0); + if (dst) { + skb_dst_set_noref(skb, dst); + err = 0; + } + } + } + +out_err: + return err; +} + /* * From tcp_input.c */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index cb015317c9f..72b7c63b1a3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -445,6 +445,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; + newsk->sk_rx_dst = dst_clone(skb_dst(skb)); + /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. -- cgit v1.2.3-70-g09d2 From 66572cfc30a4b764150c83ee5d842a3ce17991c9 Mon Sep 17 00:00:00 2001 From: Victor Goldenshtein Date: Thu, 21 Jun 2012 10:56:46 +0300 Subject: mac80211: add command to get current rssi Get current rssi (in dBm) from the driver/FW. Instead of reporting the signal received in the last rx packet, which might be inaccurate if rx traffic is low and beacon filtering is enabled, get the signal from the driver/FW. Signed-off-by: Victor Goldenshtein Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ net/mac80211/cfg.c | 21 +++++++++++++-------- net/mac80211/driver-ops.h | 15 +++++++++++++++ net/mac80211/driver-trace.h | 26 ++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d152f54064f..f11c2f8b00c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2249,6 +2249,9 @@ enum ieee80211_rate_control_changed { * @get_et_strings: Ethtool API to get a set of strings to describe stats * and perhaps other supported types of ethtool data-sets. * + * @get_rssi: Get current signal strength in dBm, the function is optional + * and can sleep. + * */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -2388,6 +2391,8 @@ struct ieee80211_ops { void (*get_et_strings)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 sset, u8 *data); + int (*get_rssi)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, s8 *rssi_dbm); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 03aff23c70f..d0c8f78115c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -353,6 +353,7 @@ void sta_set_rate_info_tx(struct sta_info *sta, static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; struct timespec uptime; sinfo->generation = sdata->local->sta_generation; @@ -388,7 +389,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; - sinfo->signal = (s8)sta->last_signal; + if (!local->ops->get_rssi || + drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal)) + sinfo->signal = (s8)sta->last_signal; sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } @@ -517,7 +520,7 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, * network device. */ - rcu_read_lock(); + mutex_lock(&local->sta_mtx); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sta = sta_info_get_bss(sdata, sdata->u.mgd.bssid); @@ -546,7 +549,7 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, data[i] = (u8)sinfo.signal_avg; i++; } else { - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { /* Make sure this station belongs to the proper dev */ if (sta->sdata->dev != dev) continue; @@ -603,7 +606,7 @@ do_survey: else data[i++] = -1LL; - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); if (WARN_ON(i != STA_STATS_LEN)) return; @@ -629,10 +632,11 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get_by_idx(sdata, idx); if (sta) { @@ -641,7 +645,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo); } - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); return ret; } @@ -658,10 +662,11 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; - rcu_read_lock(); + mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mac); if (sta) { @@ -669,7 +674,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo); } - rcu_read_unlock(); + mutex_unlock(&local->sta_mtx); return ret; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 6d33a0c743a..933026949df 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -845,4 +845,19 @@ drv_allow_buffered_frames(struct ieee80211_local *local, more_data); trace_drv_return_void(local); } + +static inline int drv_get_rssi(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + s8 *rssi_dbm) +{ + int ret; + + might_sleep(); + + ret = local->ops->get_rssi(&local->hw, &sdata->vif, sta, rssi_dbm); + trace_drv_get_rssi(local, sta, *rssi_dbm, ret); + + return ret; +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6de00b2c268..a0f7d357884 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -1218,6 +1218,32 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames, TP_ARGS(local, sta, tids, num_frames, reason, more_data) ); +TRACE_EVENT(drv_get_rssi, + TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, + s8 rssi, int ret), + + TP_ARGS(local, sta, rssi, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(s8, rssi) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->rssi = rssi; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " rssi:%d ret:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->rssi, __entry->ret + ) +); + /* * Tracing for API calls that drivers call. */ -- cgit v1.2.3-70-g09d2 From 6648bd7e0e62c0c8c03b15e00c9e7015e232feff Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 21 Jun 2012 13:58:31 +0000 Subject: ipv4: Add sysctl knob to control early socket demux This change is meant to add a control for disabling early socket demux. The main motivation behind this patch is to provide an option to disable the feature as it adds an additional cost to routing that reduces overall throughput by up to 5%. For example one of my systems went from 12.1Mpps to 11.6 after the early socket demux was added. It looks like the reason for the regression is that we are now having to perform two lookups, first the one for an established socket, and then the one for the routing table. By adding this patch and toggling the value for ip_early_demux to 0 I am able to get back to the 12.1Mpps I was previously seeing. [ Move local variables in ip_rcv_finish() down into the basic block in which they are actually used. -DaveM ] Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + include/net/ip.h | 3 +++ kernel/sysctl_binary.c | 2 ++ net/ipv4/ip_input.c | 22 +++++++++++++--------- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ 5 files changed, 26 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index c34b4c82b0d..20825e5f433 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -425,6 +425,7 @@ enum NET_TCP_ALLOWED_CONG_CONTROL=123, NET_TCP_MAX_SSTHRESH=124, NET_TCP_FRTO_RESPONSE=125, + NET_IPV4_EARLY_DEMUX=126, }; enum { diff --git a/include/net/ip.h b/include/net/ip.h index 83e0619f59d..50841bd6f10 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -210,6 +210,9 @@ extern int inet_peer_threshold; extern int inet_peer_minttl; extern int inet_peer_maxttl; +/* From ip_input.c */ +extern int sysctl_ip_early_demux; + /* From ip_output.c */ extern int sysctl_ip_dynaddr; diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index a650694883a..6a3cf8253aa 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -415,6 +415,8 @@ static const struct bin_table bin_net_ipv4_table[] = { { CTL_INT, NET_IPV4_IPFRAG_SECRET_INTERVAL, "ipfrag_secret_interval" }, /* NET_IPV4_IPFRAG_MAX_DIST "ipfrag_max_dist" no longer used */ + { CTL_INT, NET_IPV4_EARLY_DEMUX, "ip_early_demux" }, + { CTL_INT, 2088 /* NET_IPQ_QMAX */, "ip_queue_maxlen" }, /* NET_TCP_DEFAULT_WIN_SCALE unused */ diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 93b092c9a39..bca25179cdb 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -313,6 +313,8 @@ drop: return true; } +int sysctl_ip_early_demux __read_mostly = 1; + static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -323,16 +325,18 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (skb_dst(skb) == NULL) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - int err; + int err = -ENOENT; - rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[protocol]); - err = -ENOENT; - if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb); - rcu_read_unlock(); + if (sysctl_ip_early_demux) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + + rcu_read_lock(); + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot && ipprot->early_demux) + err = ipprot->early_demux(skb); + rcu_read_unlock(); + } if (err) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ef32956ed65..12aa0c5867c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -300,6 +300,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ip_early_demux", + .data = &sysctl_ip_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { .procname = "ip_dynaddr", .data = &sysctl_ip_dynaddr, -- cgit v1.2.3-70-g09d2 From 7586eceb0abc0ea1c2b023e3e5d4dfd4ff40930a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2012 05:02:19 +0000 Subject: ipv4: tcp: dont cache output dst for syncookies Don't cache output dst for syncookies, as this adds pressure on IP route cache and rcu subsystem for no gain. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/flow.h | 1 + include/net/inet_connection_sock.h | 3 ++- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 8 ++++++-- net/ipv4/route.c | 5 ++++- net/ipv4/tcp_ipv4.c | 12 +++++++----- 6 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 6c469dbdb91..bd524f59856 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -22,6 +22,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_PRECOW_METRICS 0x02 #define FLOWI_FLAG_CAN_SLEEP 0x04 +#define FLOWI_FLAG_RT_NOCACHE 0x08 __u32 flowic_secid; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e1b7734c456..af3c743a40e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -251,7 +251,8 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum); extern struct dst_entry* inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req); + const struct request_sock *req, + bool nocache); extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, struct sock *newsk, const struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 07f5579ca75..3eb76b5f221 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -504,7 +504,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; struct flowi4 fl4; - dst = inet_csk_route_req(sk, &fl4, req); + dst = inet_csk_route_req(sk, &fl4, req, false); if (dst == NULL) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f9ee7417f6a..034ddbe42ad 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,17 +368,21 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req) + const struct request_sock *req, + bool nocache) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); + int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; + if (nocache) + flags |= FLOWI_FLAG_RT_NOCACHE; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS, + flags, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a91f6d33804..8d62d85e68d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1156,7 +1156,7 @@ restart: candp = NULL; now = jiffies; - if (!rt_caching(dev_net(rt->dst.dev))) { + if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) { /* * If we're not caching, just tell the caller we * were successful and don't touch the route. The @@ -2582,6 +2582,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rt_set_nexthop(rth, fl4, res, fi, type, 0); + if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) + rth->dst.flags |= DST_NOCACHE; + return rth; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 21e22a00481..b52934f5334 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -825,7 +825,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct request_values *rvp, - u16 queue_mapping) + u16 queue_mapping, + bool nocache) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -833,7 +834,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) return -1; skb = tcp_make_synack(sk, dst, req, rvp); @@ -855,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v4_send_synack(sk, NULL, req, rvp, 0); + return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); } /* @@ -1388,7 +1389,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && + (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && fl4.daddr == saddr && (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { inet_peer_refcheck(peer); @@ -1424,7 +1425,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (tcp_v4_send_synack(sk, dst, req, (struct request_values *)&tmp_ext, - skb_get_queue_mapping(skb)) || + skb_get_queue_mapping(skb), + want_cookie) || want_cookie) goto drop_and_free; -- cgit v1.2.3-70-g09d2 From bdcbd8e0e3ffdad32b14b6373e67bfcf5fd3f002 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jun 2012 11:29:50 +0200 Subject: mac80211: clean up debugging There are a few things that make the logging and debugging in mac80211 less useful than it should be right now: * a lot of messages should be pr_info, not pr_debug * wholesale use of pr_debug makes it require *both* Kconfig and dynamic configuration * there are still a lot of ifdefs * the style is very inconsistent, sometimes the sdata->name is printed in front Clean up everything, introducing new macros and separating out the station MLME debugging into a new Kconfig symbol. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 24 ----- net/mac80211/Kconfig | 32 +++++-- net/mac80211/Makefile | 2 +- net/mac80211/agg-rx.c | 34 +++---- net/mac80211/agg-tx.c | 72 ++++++++------- net/mac80211/cfg.c | 9 +- net/mac80211/debug.h | 152 +++++++++++++++++++++++++++++++ net/mac80211/debugfs_netdev.c | 5 +- net/mac80211/ht.c | 10 +-- net/mac80211/ibss.c | 89 +++++++++--------- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 13 +-- net/mac80211/key.c | 4 +- net/mac80211/mesh.c | 4 - net/mac80211/mesh_hwmp.c | 42 ++++----- net/mac80211/mesh_pathtbl.c | 30 +++---- net/mac80211/mesh_plink.c | 61 +++++++------ net/mac80211/mesh_sync.c | 47 +++++----- net/mac80211/mlme.c | 203 +++++++++++++++++++----------------------- net/mac80211/rx.c | 40 +++------ net/mac80211/sta_info.c | 44 ++++----- net/mac80211/status.c | 11 +-- net/mac80211/tx.c | 43 ++++----- 23 files changed, 519 insertions(+), 453 deletions(-) create mode 100644 net/mac80211/debug.h (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f11c2f8b00c..510d852d522 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3842,28 +3842,4 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, */ int ieee80211_ave_rssi(struct ieee80211_vif *vif); -/* Extra debugging macros */ - -#ifdef CONFIG_MAC80211_HT_DEBUG -#define ht_vdbg(fmt, ...) \ - pr_debug(fmt, ##__VA_ARGS__) -#else -#define ht_vdbg(fmt, ...) \ -do { \ - if (0) \ - pr_debug(fmt, ##__VA_ARGS__); \ -} while (0) -#endif - -#ifdef CONFIG_MAC80211_IBSS_DEBUG -#define ibss_vdbg(fmt, ...) \ - pr_debug(fmt, ##__VA_ARGS__) -#else -#define ibss_vdbg(fmt, ...) \ -do { \ - if (0) \ - pr_debug(fmt, ##__VA_ARGS__); \ -} while (0) -#endif - #endif /* MAC80211_H */ diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 323aa19a39d..7475e266eb4 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -140,6 +140,26 @@ config MAC80211_VERBOSE_DEBUG Do not select this option. +config MAC80211_MLME_DEBUG + bool "Verbose managed MLME output" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out + debugging messages for the managed-mode MLME. It + should not be selected on production systems as some + of the messages are remotely triggerable. + + Do not select this option. + +config MAC80211_STA_DEBUG + bool "Verbose station debugging" + depends on MAC80211_DEBUG_MENU + ---help--- + Selecting this option causes mac80211 to print out + debugging messages for station addition/removal. + + Do not select this option. + config MAC80211_HT_DEBUG bool "Verbose HT debugging" depends on MAC80211_DEBUG_MENU @@ -163,7 +183,7 @@ config MAC80211_IBSS_DEBUG Do not select this option. -config MAC80211_VERBOSE_PS_DEBUG +config MAC80211_PS_DEBUG bool "Verbose powersave mode debugging" depends on MAC80211_DEBUG_MENU ---help--- @@ -175,7 +195,7 @@ config MAC80211_VERBOSE_PS_DEBUG Do not select this option. -config MAC80211_VERBOSE_MPL_DEBUG +config MAC80211_MPL_DEBUG bool "Verbose mesh peer link debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -188,7 +208,7 @@ config MAC80211_VERBOSE_MPL_DEBUG Do not select this option. -config MAC80211_VERBOSE_MPATH_DEBUG +config MAC80211_MPATH_DEBUG bool "Verbose mesh path debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -201,7 +221,7 @@ config MAC80211_VERBOSE_MPATH_DEBUG Do not select this option. -config MAC80211_VERBOSE_MHWMP_DEBUG +config MAC80211_MHWMP_DEBUG bool "Verbose mesh HWMP routing debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -214,7 +234,7 @@ config MAC80211_VERBOSE_MHWMP_DEBUG Do not select this option. -config MAC80211_VERBOSE_MESH_SYNC_DEBUG +config MAC80211_MESH_SYNC_DEBUG bool "Verbose mesh mesh synchronization debugging" depends on MAC80211_DEBUG_MENU depends on MAC80211_MESH @@ -225,7 +245,7 @@ config MAC80211_VERBOSE_MESH_SYNC_DEBUG Do not select this option. -config MAC80211_VERBOSE_TDLS_DEBUG +config MAC80211_TDLS_DEBUG bool "Verbose TDLS debugging" depends on MAC80211_DEBUG_MENU ---help--- diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 2b1470bac17..231ffa02e49 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -58,4 +58,4 @@ mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y) -ccflags-y += -D__CHECK_ENDIAN__ +ccflags-y += -D__CHECK_ENDIAN__ -DDEBUG diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 32ef11d6979..186d9919b04 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -74,15 +74,17 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], NULL); - ht_vdbg("Rx BA session stop requested for %pM tid %u %s reason: %d\n", - sta->sta.addr, tid, - initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", - (int)reason); + ht_dbg(sta->sdata, + "Rx BA session stop requested for %pM tid %u %s reason: %d\n", + sta->sta.addr, tid, + initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator", + (int)reason); if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, &sta->sta, tid, NULL, 0)) - pr_debug("HW problem - can not stop rx aggregation for tid %d\n", - tid); + sdata_info(sta->sdata, + "HW problem - can not stop rx aggregation for tid %d\n", + tid); /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) @@ -157,7 +159,7 @@ static void sta_rx_agg_session_timer_expired(unsigned long data) } rcu_read_unlock(); - ht_vdbg("rx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "rx session timer expired on tid %d\n", (u16)*ptid); set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired); ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); @@ -245,7 +247,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { - ht_vdbg("Suspend in progress - Denying ADDBA request\n"); + ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request\n"); goto end_no_lock; } @@ -257,10 +259,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { status = WLAN_STATUS_INVALID_QOS_PARAM; -#ifdef CONFIG_MAC80211_HT_DEBUG - net_dbg_ratelimited("AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", - mgmt->sa, tid, ba_policy, buf_size); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_dbg_ratelimited(sta->sdata, + "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", + mgmt->sa, tid, ba_policy, buf_size); goto end_no_lock; } /* determine default buffer size */ @@ -275,10 +276,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, mutex_lock(&sta->ampdu_mlme.mtx); if (sta->ampdu_mlme.tid_rx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - net_dbg_ratelimited("unexpected AddBA Req from %pM on tid %u\n", - mgmt->sa, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_dbg_ratelimited(sta->sdata, + "unexpected AddBA Req from %pM on tid %u\n", + mgmt->sa, tid); /* delete existing Rx BA session on the same tid */ ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, @@ -317,7 +317,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, &sta->sta, tid, &start_seq_num, 0); - ht_vdbg("Rx A-MPDU request on tid %d result %d\n", tid, ret); + ht_dbg(sta->sdata, "Rx A-MPDU request on tid %d result %d\n", tid, ret); if (ret) { kfree(tid_agg_rx->reorder_buf); kfree(tid_agg_rx->reorder_time); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index da07f01cfe4..5cc1bf7d803 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -184,8 +184,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, spin_unlock_bh(&sta->lock); - ht_vdbg("Tx BA session stop requested for %pM tid %u\n", - sta->sta.addr, tid); + ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); del_timer_sync(&tid_tx->addba_resp_timer); del_timer_sync(&tid_tx->session_timer); @@ -251,12 +251,13 @@ static void sta_addba_resp_timer_expired(unsigned long data) if (!tid_tx || test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); - ht_vdbg("timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", - tid); + ht_dbg(sta->sdata, + "timer expired on tid %d but we are not (or no longer) expecting addBA response there\n", + tid); return; } - ht_vdbg("addBA response timer expired on tid %d\n", tid); + ht_dbg(sta->sdata, "addBA response timer expired on tid %d\n", tid); ieee80211_stop_tx_ba_session(&sta->sta, tid); rcu_read_unlock(); @@ -316,8 +317,9 @@ ieee80211_agg_splice_packets(struct ieee80211_sub_if_data *sdata, ieee80211_stop_queue_agg(sdata, tid); - if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates" - " from the pending queue\n", tid)) + if (WARN(!tid_tx, + "TID %d gone but expected when splicing aggregates from the pending queue\n", + tid)) return; if (!skb_queue_empty(&tid_tx->pending)) { @@ -365,7 +367,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, &sta->sta, tid, &start_seq_num, 0); if (ret) { - ht_vdbg("BA request denied - HW unavailable for tid %d\n", tid); + ht_dbg(sdata, + "BA request denied - HW unavailable for tid %d\n", tid); spin_lock_bh(&sta->lock); ieee80211_agg_splice_packets(sdata, tid_tx, tid); ieee80211_assign_tid_tx(sta, tid, NULL); @@ -378,7 +381,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); - ht_vdbg("activated addBA response timer on tid %d\n", tid); + ht_dbg(sdata, "activated addBA response timer on tid %d\n", tid); spin_lock_bh(&sta->lock); sta->ampdu_mlme.last_addba_req_time[tid] = jiffies; @@ -425,7 +428,7 @@ static void sta_tx_agg_session_timer_expired(unsigned long data) rcu_read_unlock(); - ht_vdbg("tx session timer expired on tid %d\n", (u16)*ptid); + ht_dbg(sta->sdata, "tx session timer expired on tid %d\n", (u16)*ptid); ieee80211_stop_tx_ba_session(&sta->sta, *ptid); } @@ -449,8 +452,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, (local->hw.flags & IEEE80211_HW_TX_AMPDU_SETUP_IN_HW)) return -EINVAL; - ht_vdbg("Open BA session requested for %pM tid %u\n", - pubsta->addr, tid); + ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", + pubsta->addr, tid); if (sdata->vif.type != NL80211_IFTYPE_STATION && sdata->vif.type != NL80211_IFTYPE_MESH_POINT && @@ -460,7 +463,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { - ht_vdbg("BA sessions blocked - Denying BA session request\n"); + ht_dbg(sdata, + "BA sessions blocked - Denying BA session request\n"); return -EINVAL; } @@ -478,8 +482,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, */ if (sta->sdata->vif.type == NL80211_IFTYPE_ADHOC && !sta->sta.ht_cap.ht_supported) { - ht_vdbg("BA request denied - IBSS STA %pM does not advertise HT support\n", - pubsta->addr); + ht_dbg(sdata, + "BA request denied - IBSS STA %pM does not advertise HT support\n", + pubsta->addr); return -EINVAL; } @@ -499,8 +504,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_BURST_RETRIES && time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { - ht_vdbg("BA request denied - waiting a grace period after %d failed requests on tid %u\n", - sta->ampdu_mlme.addba_req_num[tid], tid); + ht_dbg(sdata, + "BA request denied - waiting a grace period after %d failed requests on tid %u\n", + sta->ampdu_mlme.addba_req_num[tid], tid); ret = -EBUSY; goto err_unlock_sta; } @@ -508,8 +514,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); /* check if the TID is not in aggregation flow already */ if (tid_tx || sta->ampdu_mlme.tid_start_tx[tid]) { - ht_vdbg("BA request denied - session is not idle on tid %u\n", - tid); + ht_dbg(sdata, + "BA request denied - session is not idle on tid %u\n", + tid); ret = -EAGAIN; goto err_unlock_sta; } @@ -564,7 +571,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, tid_tx = rcu_dereference_protected_tid_tx(sta, tid); - ht_vdbg("Aggregation is on for tid %d\n", tid); + ht_dbg(sta->sdata, "Aggregation is on for tid %d\n", tid); drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, @@ -598,7 +605,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) trace_api_start_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { - ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); + ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); return; } @@ -606,7 +614,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) sta = sta_info_get_bss(sdata, ra); if (!sta) { mutex_unlock(&local->sta_mtx); - ht_vdbg("Could not find station: %pM\n", ra); + ht_dbg(sdata, "Could not find station: %pM\n", ra); return; } @@ -614,7 +622,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (WARN_ON(!tid_tx)) { - ht_vdbg("addBA was not requested!\n"); + ht_dbg(sdata, "addBA was not requested!\n"); goto unlock; } @@ -714,17 +722,18 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) trace_api_stop_tx_ba_cb(sdata, ra, tid); if (tid >= STA_TID_NUM) { - ht_vdbg("Bad TID value: tid = %d (>= %d)\n", tid, STA_TID_NUM); + ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); return; } - ht_vdbg("Stopping Tx BA session for %pM tid %d\n", ra, tid); + ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", ra, tid); mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, ra); if (!sta) { - ht_vdbg("Could not find station: %pM\n", ra); + ht_dbg(sdata, "Could not find station: %pM\n", ra); goto unlock; } @@ -733,7 +742,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid) tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - ht_vdbg("unexpected callback to A-MPDU stop\n"); + ht_dbg(sdata, "unexpected callback to A-MPDU stop\n"); goto unlock_sta; } @@ -809,13 +818,13 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, goto out; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { - ht_vdbg("wrong addBA response token, tid %d\n", tid); + ht_dbg(sta->sdata, "wrong addBA response token, tid %d\n", tid); goto out; } del_timer_sync(&tid_tx->addba_resp_timer); - ht_vdbg("switched off addBA timer for tid %d\n", tid); + ht_dbg(sta->sdata, "switched off addBA timer for tid %d\n", tid); /* * addba_resp_timer may have fired before we got here, and @@ -824,8 +833,9 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, */ if (test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state) || test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) { - ht_vdbg("got addBA resp for tid %d but we already gave up\n", - tid); + ht_dbg(sta->sdata, + "got addBA resp for tid %d but we already gave up\n", + tid); goto out; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d0c8f78115c..7722a7336a5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2771,9 +2771,8 @@ static int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, !sdata->u.mgd.associated) return -EINVAL; -#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG - pr_debug("TDLS mgmt action %d peer %pM\n", action_code, peer); -#endif + tdls_dbg(sdata, "TDLS mgmt action %d peer %pM\n", + action_code, peer); skb = dev_alloc_skb(local->hw.extra_tx_headroom + max(sizeof(struct ieee80211_mgmt), @@ -2882,9 +2881,7 @@ static int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; -#ifdef CONFIG_MAC80211_VERBOSE_TDLS_DEBUG - pr_debug("TDLS oper %d peer %pM\n", oper, peer); -#endif + tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); switch (oper) { case NL80211_TDLS_ENABLE_LINK: diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h new file mode 100644 index 00000000000..6e6bbb9a9d4 --- /dev/null +++ b/net/mac80211/debug.h @@ -0,0 +1,152 @@ +#ifndef __MAC80211_DEBUG_H +#define __MAC80211_DEBUG_H + +#ifdef CONFIG_MAC80211_IBSS_DEBUG +#define MAC80211_IBSS_DEBUG 1 +#else +#define MAC80211_IBSS_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_PS_DEBUG +#define MAC80211_PS_DEBUG 1 +#else +#define MAC80211_PS_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_HT_DEBUG +#define MAC80211_HT_DEBUG 1 +#else +#define MAC80211_HT_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MPL_DEBUG +#define MAC80211_MPL_DEBUG 1 +#else +#define MAC80211_MPL_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MPATH_DEBUG +#define MAC80211_MPATH_DEBUG 1 +#else +#define MAC80211_MPATH_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MHWMP_DEBUG +#define MAC80211_MHWMP_DEBUG 1 +#else +#define MAC80211_MHWMP_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MESH_SYNC_DEBUG +#define MAC80211_MESH_SYNC_DEBUG 1 +#else +#define MAC80211_MESH_SYNC_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_TDLS_DEBUG +#define MAC80211_TDLS_DEBUG 1 +#else +#define MAC80211_TDLS_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_STA_DEBUG +#define MAC80211_STA_DEBUG 1 +#else +#define MAC80211_STA_DEBUG 0 +#endif + +#ifdef CONFIG_MAC80211_MLME_DEBUG +#define MAC80211_MLME_DEBUG 1 +#else +#define MAC80211_MLME_DEBUG 0 +#endif + +#define _sdata_info(sdata, fmt, ...) \ +do { \ + pr_info("%s: " fmt, \ + (sdata)->name, ##__VA_ARGS__); \ +} while (0) + +#define _sdata_dbg(print, sdata, fmt, ...) \ +do { \ + if (print) \ + pr_debug("%s: " fmt, \ + (sdata)->name, ##__VA_ARGS__); \ +} while (0) + +#define _sdata_err(sdata, fmt, ...) \ +do { \ + pr_err("%s: " fmt, \ + (sdata)->name, ##__VA_ARGS__); \ +} while (0) + +#define _wiphy_dbg(print, wiphy, fmt, ...) \ +do { \ + if (print) \ + wiphy_dbg((wiphy), fmt, ##__VA_ARGS__); \ +} while (0) + +#define sdata_info(sdata, fmt, ...) \ + _sdata_info(sdata, fmt, ##__VA_ARGS__) +#define sdata_err(sdata, fmt, ...) \ + _sdata_err(sdata, fmt, ##__VA_ARGS__) +#define sdata_dbg(sdata, fmt, ...) \ + _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__) + +#define ht_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_HT_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define ht_dbg_ratelimited(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_HT_DEBUG && net_ratelimit(), \ + sdata, fmt, ##__VA_ARGS__) + +#define ibss_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_IBSS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define ps_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_PS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define ps_dbg_hw(hw, fmt, ...) \ + _wiphy_dbg(MAC80211_PS_DEBUG, \ + (hw)->wiphy, fmt, ##__VA_ARGS__) + +#define ps_dbg_ratelimited(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_PS_DEBUG && net_ratelimit(), \ + sdata, fmt, ##__VA_ARGS__) + +#define mpl_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MPL_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mpath_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MPATH_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mhwmp_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MHWMP_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define msync_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MESH_SYNC_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define tdls_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_TDLS_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define sta_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_STA_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mlme_dbg(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MLME_DEBUG, \ + sdata, fmt, ##__VA_ARGS__) + +#define mlme_dbg_ratelimited(sdata, fmt, ...) \ + _sdata_dbg(MAC80211_MLME_DEBUG && net_ratelimit(), \ + sdata, fmt, ##__VA_ARGS__) + +#endif /* __MAC80211_DEBUG_H */ diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 512c894893d..6d5aec9418e 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -695,6 +695,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) sprintf(buf, "netdev:%s", sdata->name); if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf)) - pr_err("mac80211: debugfs: failed to rename debugfs " - "dir to %s\n", buf); + sdata_err(sdata, + "debugfs: failed to rename debugfs dir to %s\n", + buf); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 6f8615c54b2..4b4538d6392 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -305,12 +305,10 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; -#ifdef CONFIG_MAC80211_HT_DEBUG - net_dbg_ratelimited("delba from %pM (%s) tid %d reason code %d\n", - mgmt->sa, initiator ? "initiator" : "recipient", - tid, - le16_to_cpu(mgmt->u.action.u.delba.reason_code)); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + ht_dbg_ratelimited(sdata, "delba from %pM (%s) tid %d reason code %d\n", + mgmt->sa, initiator ? "initiator" : "recipient", + tid, + le16_to_cpu(mgmt->u.action.u.delba.reason_code)); if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 8931110b843..5746d62faba 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -261,11 +261,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, memcpy(addr, sta->sta.addr, ETH_ALEN); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - wiphy_debug(sdata->local->hw.wiphy, - "Adding new IBSS station %pM (dev=%s)\n", - addr, sdata->name); -#endif + ibss_dbg(sdata, "Adding new IBSS station %pM\n", addr); sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); @@ -280,8 +276,9 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); if (auth && !sdata->u.ibss.auth_frame_registrations) { - ibss_vdbg("TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, sdata->u.ibss.bssid, addr); + ibss_dbg(sdata, + "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", + sdata->vif.addr, sdata->u.ibss.bssid, addr); ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, NULL, 0, addr, sdata->u.ibss.bssid, NULL, 0, 0); } @@ -304,7 +301,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { - net_dbg_ratelimited("%s: No room for a new IBSS STA entry %pM\n", + net_info_ratelimited("%s: No room for a new IBSS STA entry %pM\n", sdata->name, addr); rcu_read_lock(); return NULL; @@ -351,9 +348,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - ibss_vdbg("%s: RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", - sdata->name, mgmt->sa, mgmt->da, mgmt->bssid, - auth_transaction); + ibss_dbg(sdata, + "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n", + mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction); sta_info_destroy_addr(sdata, mgmt->sa); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); rcu_read_unlock(); @@ -416,10 +413,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_mandatory_rates(local, band); if (sta->sta.supp_rates[band] != prev_rates) { - ibss_vdbg("%s: updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", - sdata->name, sta->sta.addr, - prev_rates, - sta->sta.supp_rates[band]); + ibss_dbg(sdata, + "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", + sta->sta.addr, prev_rates, + sta->sta.supp_rates[band]); rates_updated = true; } } else { @@ -534,16 +531,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, rx_timestamp = drv_get_tsf(local, sdata); } - ibss_vdbg("RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", - mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, - (unsigned long long)beacon_timestamp, - (unsigned long long)(rx_timestamp - beacon_timestamp), - jiffies); + ibss_dbg(sdata, + "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + mgmt->sa, mgmt->bssid, + (unsigned long long)rx_timestamp, + (unsigned long long)beacon_timestamp, + (unsigned long long)(rx_timestamp - beacon_timestamp), + jiffies); if (beacon_timestamp > rx_timestamp) { - ibss_vdbg("%s: beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", - sdata->name, mgmt->bssid); + ibss_dbg(sdata, + "beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n", + mgmt->bssid); ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, @@ -569,7 +568,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, * allow new one to be added. */ if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { - net_dbg_ratelimited("%s: No room for a new IBSS STA entry %pM\n", + net_info_ratelimited("%s: No room for a new IBSS STA entry %pM\n", sdata->name, addr); return; } @@ -645,8 +644,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) if (ifibss->fixed_channel) return; - pr_debug("%s: No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n", - sdata->name); + sdata_info(sdata, + "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len, NULL); @@ -674,8 +673,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) bssid[0] |= 0x02; } - pr_debug("%s: Creating new IBSS network, BSSID %pM\n", - sdata->name, bssid); + sdata_info(sdata, "Creating new IBSS network, BSSID %pM\n", bssid); capability = WLAN_CAPABILITY_IBSS; @@ -706,8 +704,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&ifibss->mtx); active_ibss = ieee80211_sta_active_ibss(sdata); - ibss_vdbg("%s: sta_find_ibss (active_ibss=%d)\n", - sdata->name, active_ibss); + ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); if (active_ibss) return; @@ -730,23 +727,24 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) struct ieee80211_bss *bss; bss = (void *)cbss->priv; - ibss_vdbg(" sta_find_ibss: selected %pM current %pM\n", - cbss->bssid, ifibss->bssid); - pr_debug("%s: Selected IBSS BSSID %pM based on configured SSID\n", - sdata->name, cbss->bssid); + ibss_dbg(sdata, + "sta_find_ibss: selected %pM current %pM\n", + cbss->bssid, ifibss->bssid); + sdata_info(sdata, + "Selected IBSS BSSID %pM based on configured SSID\n", + cbss->bssid); ieee80211_sta_join_ibss(sdata, bss); ieee80211_rx_bss_put(local, bss); return; } - ibss_vdbg(" did not try to join ibss\n"); + ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n"); /* Selected IBSS not found in current scan results - try to scan */ if (time_after(jiffies, ifibss->last_scan_completed + IEEE80211_SCAN_INTERVAL)) { - pr_debug("%s: Trigger new scan to find an IBSS to join\n", - sdata->name); + sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len, @@ -760,9 +758,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) ieee80211_sta_create_ibss(sdata); return; } - pr_debug("%s: IBSS not allowed on %d MHz\n", - sdata->name, - local->hw.conf.channel->center_freq); + sdata_info(sdata, "IBSS not allowed on %d MHz\n", + local->hw.conf.channel->center_freq); /* No IBSS found - decrease scan interval and continue * scanning. */ @@ -797,9 +794,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, tx_last_beacon = drv_tx_last_beacon(local); - ibss_vdbg("%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", - sdata->name, mgmt->sa, mgmt->da, - mgmt->bssid, tx_last_beacon); + ibss_dbg(sdata, + "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n", + mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon); if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da)) return; @@ -812,8 +809,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, pos = mgmt->u.probe_req.variable; if (pos[0] != WLAN_EID_SSID || pos + 2 + pos[1] > end) { - ibss_vdbg("%s: Invalid SSID IE in ProbeReq from %pM\n", - sdata->name, mgmt->sa); + ibss_dbg(sdata, "Invalid SSID IE in ProbeReq from %pM\n", + mgmt->sa); return; } if (pos[1] != 0 && @@ -830,7 +827,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, resp = (struct ieee80211_mgmt *) skb->data; memcpy(resp->da, mgmt->sa, ETH_ALEN); - ibss_vdbg("%s: Sending ProbeResp to %pM\n", sdata->name, resp->da); + ibss_dbg(sdata, "Sending ProbeResp to %pM\n", resp->da); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 36ce2bb066b..f834a005e1c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -30,6 +30,7 @@ #include #include "key.h" #include "sta_info.h" +#include "debug.h" struct ieee80211_local; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 728d3eac1f5..576880317d0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -57,9 +57,6 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ dev->mtu = new_mtu; return 0; } @@ -1223,7 +1220,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, if (__ffs64(mask) + hweight64(mask) != fls64(mask)) { /* not a contiguous mask ... not handled now! */ - pr_debug("not contiguous\n"); + pr_info("not contiguous\n"); break; } @@ -1414,10 +1411,6 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local, if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) return 0; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "device no longer idle - %s\n", reason); -#endif - local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; return IEEE80211_CONF_CHANGE_IDLE; } @@ -1427,10 +1420,6 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local) if (local->hw.conf.flags & IEEE80211_CONF_IDLE) return 0; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "device now idle\n"); -#endif - drv_flush(local, false); local->hw.conf.flags |= IEEE80211_CONF_IDLE; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 5bb600d93d7..b3b7e526e24 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -139,7 +139,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) } if (ret != -ENOSPC && ret != -EOPNOTSUPP) - wiphy_err(key->local->hw.wiphy, + sdata_err(sdata, "failed to set key (%d, %pM) to hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); @@ -186,7 +186,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta ? &sta->sta : NULL, &key->conf); if (ret) - wiphy_err(key->local->hw.wiphy, + sdata_err(sdata, "failed to remove key (%d, %pM) from hardware (%d)\n", key->conf.keyidx, sta ? sta->sta.addr : bcast_addr, ret); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index ae40a83675e..764593d65fc 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -523,10 +523,6 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, { bool free_plinks; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: running mesh housekeeping\n", sdata->name); -#endif - ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); mesh_path_expire(sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index aed1821bd6f..fb7b6a11d0b 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -13,13 +13,6 @@ #include "wme.h" #include "mesh.h" -#ifdef CONFIG_MAC80211_VERBOSE_MHWMP_DEBUG -#define mhwmp_dbg(fmt, args...) \ - pr_debug("Mesh HWMP (%s): " fmt "\n", sdata->name, ##args) -#else -#define mhwmp_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - #define TEST_FRAME_LEN 8192 #define MAX_METRIC 0xffffffff #define ARITH_SHIFT 8 @@ -144,19 +137,19 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, switch (action) { case MPATH_PREQ: - mhwmp_dbg("sending PREQ to %pM", target); + mhwmp_dbg(sdata, "sending PREQ to %pM\n", target); ie_len = 37; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREQ; break; case MPATH_PREP: - mhwmp_dbg("sending PREP to %pM", target); + mhwmp_dbg(sdata, "sending PREP to %pM\n", target); ie_len = 31; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PREP; break; case MPATH_RANN: - mhwmp_dbg("sending RANN from %pM", orig_addr); + mhwmp_dbg(sdata, "sending RANN from %pM\n", orig_addr); ie_len = sizeof(struct ieee80211_rann_ie); pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_RANN; @@ -535,10 +528,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, flags = PREQ_IE_FLAGS(preq_elem); root_is_gate = !!(flags & RANN_FLAG_IS_GATE); - mhwmp_dbg("received PREQ from %pM", orig_addr); + mhwmp_dbg(sdata, "received PREQ from %pM\n", orig_addr); if (ether_addr_equal(target_addr, sdata->vif.addr)) { - mhwmp_dbg("PREQ is for us"); + mhwmp_dbg(sdata, "PREQ is for us\n"); forward = false; reply = true; metric = 0; @@ -590,7 +583,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, lifetime = PREQ_IE_LIFETIME(preq_elem); ttl = ifmsh->mshcfg.element_ttl; if (ttl != 0) { - mhwmp_dbg("replying to the PREQ"); + mhwmp_dbg(sdata, "replying to the PREQ\n"); mesh_path_sel_frame_tx(MPATH_PREP, 0, orig_addr, cpu_to_le32(orig_sn), 0, target_addr, cpu_to_le32(target_sn), mgmt->sa, 0, ttl, @@ -611,7 +604,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, ifmsh->mshstats.dropped_frames_ttl++; return; } - mhwmp_dbg("forwarding the PREQ from %pM", orig_addr); + mhwmp_dbg(sdata, "forwarding the PREQ from %pM\n", orig_addr); --ttl; preq_id = PREQ_IE_PREQ_ID(preq_elem); hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1; @@ -658,7 +651,8 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, u8 next_hop[ETH_ALEN]; u32 target_sn, orig_sn, lifetime; - mhwmp_dbg("received PREP from %pM", PREP_IE_ORIG_ADDR(prep_elem)); + mhwmp_dbg(sdata, "received PREP from %pM\n", + PREP_IE_ORIG_ADDR(prep_elem)); orig_addr = PREP_IE_ORIG_ADDR(prep_elem); if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -784,8 +778,9 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(orig_addr, sdata->vif.addr)) return; - mhwmp_dbg("received RANN from %pM via neighbour %pM (is_gate=%d)", - orig_addr, mgmt->sa, root_is_gate); + mhwmp_dbg(sdata, + "received RANN from %pM via neighbour %pM (is_gate=%d)\n", + orig_addr, mgmt->sa, root_is_gate); rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -818,8 +813,9 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata, root_path_confirmation_jiffies(sdata)) || time_before(jiffies, mpath->last_preq_to_root))) && !(mpath->flags & MESH_PATH_FIXED) && (ttl != 0)) { - mhwmp_dbg("%s time to refresh root mpath %pM", sdata->name, - orig_addr); + mhwmp_dbg(sdata, + "time to refresh root mpath %pM\n", + orig_addr); mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); mpath->last_preq_to_root = jiffies; } @@ -926,7 +922,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_ATOMIC); if (!preq_node) { - mhwmp_dbg("could not allocate PREQ node"); + mhwmp_dbg(sdata, "could not allocate PREQ node\n"); return; } @@ -935,7 +931,7 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); kfree(preq_node); if (printk_ratelimit()) - mhwmp_dbg("PREQ node queue full"); + mhwmp_dbg(sdata, "PREQ node queue full\n"); return; } @@ -1183,7 +1179,7 @@ void mesh_path_timer(unsigned long data) if (!mpath->is_gate && mesh_gate_num(sdata) > 0) { ret = mesh_path_send_to_gates(mpath); if (ret) - mhwmp_dbg("no gate was reachable"); + mhwmp_dbg(sdata, "no gate was reachable\n"); } else mesh_path_flush_pending(mpath); } @@ -1221,7 +1217,7 @@ mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) 0, cpu_to_le32(ifmsh->preq_id++), sdata); break; default: - mhwmp_dbg("Proactive mechanism not supported"); + mhwmp_dbg(sdata, "Proactive mechanism not supported\n"); return; } } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 572f706fd65..c9ae931dd69 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -18,12 +18,6 @@ #include "ieee80211_i.h" #include "mesh.h" -#ifdef CONFIG_MAC80211_VERBOSE_MPATH_DEBUG -#define mpath_dbg(fmt, args...) pr_debug(fmt, ##args) -#else -#define mpath_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - /* There will be initially 2^INIT_PATHS_SIZE_ORDER buckets */ #define INIT_PATHS_SIZE_ORDER 2 @@ -322,9 +316,8 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, spin_lock_irqsave(&gate_mpath->frame_queue.lock, flags); skb_queue_splice(&gateq, &gate_mpath->frame_queue); - mpath_dbg("Mpath queue for gate %pM has %d frames\n", - gate_mpath->dst, - skb_queue_len(&gate_mpath->frame_queue)); + mpath_dbg(gate_mpath->sdata, "Mpath queue for gate %pM has %d frames\n", + gate_mpath->dst, skb_queue_len(&gate_mpath->frame_queue)); spin_unlock_irqrestore(&gate_mpath->frame_queue.lock, flags); if (!copy) @@ -446,9 +439,9 @@ int mesh_path_add_gate(struct mesh_path *mpath) hlist_add_head_rcu(&new_gate->list, tbl->known_gates); spin_unlock_bh(&tbl->gates_lock); rcu_read_unlock(); - mpath_dbg("Mesh path (%s): Recorded new gate: %pM. %d known gates\n", - mpath->sdata->name, mpath->dst, - mpath->sdata->u.mesh.num_gates); + mpath_dbg(mpath->sdata, + "Mesh path: Recorded new gate: %pM. %d known gates\n", + mpath->dst, mpath->sdata->u.mesh.num_gates); return 0; err_rcu: rcu_read_unlock(); @@ -477,8 +470,8 @@ static int mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) spin_unlock_bh(&tbl->gates_lock); mpath->sdata->u.mesh.num_gates--; mpath->is_gate = false; - mpath_dbg("Mesh path (%s): Deleted gate: %pM. " - "%d known gates\n", mpath->sdata->name, + mpath_dbg(mpath->sdata, + "Mesh path: Deleted gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); break; } @@ -946,19 +939,20 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) continue; if (gate->mpath->flags & MESH_PATH_ACTIVE) { - mpath_dbg("Forwarding to %pM\n", gate->mpath->dst); + mpath_dbg(sdata, "Forwarding to %pM\n", gate->mpath->dst); mesh_path_move_to_queue(gate->mpath, from_mpath, copy); from_mpath = gate->mpath; copy = true; } else { - mpath_dbg("Not forwarding %p\n", gate->mpath); - mpath_dbg("flags %x\n", gate->mpath->flags); + mpath_dbg(sdata, + "Not forwarding %p (flags %#x)\n", + gate->mpath, gate->mpath->flags); } } hlist_for_each_entry_rcu(gate, n, known_gates, list) if (gate->mpath->sdata == sdata) { - mpath_dbg("Sending to %pM\n", gate->mpath->dst); + mpath_dbg(sdata, "Sending to %pM\n", gate->mpath->dst); mesh_path_tx_pending(gate->mpath); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index be4fad128c3..a1dbd154027 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -13,12 +13,6 @@ #include "rate.h" #include "mesh.h" -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG -#define mpl_dbg(fmt, args...) pr_debug(fmt, ##args) -#else -#define mpl_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - #define PLINK_GET_LLID(p) (p + 2) #define PLINK_GET_PLID(p) (p + 4) @@ -134,12 +128,14 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) switch (sta->ch_type) { case NL80211_CHAN_NO_HT: - mpl_dbg("mesh_plink %pM: nonHT sta (%pM) is present", + mpl_dbg(sdata, + "mesh_plink %pM: nonHT sta (%pM) is present\n", sdata->vif.addr, sta->sta.addr); non_ht_sta = true; goto out; case NL80211_CHAN_HT20: - mpl_dbg("mesh_plink %pM: HT20 sta (%pM) is present", + mpl_dbg(sdata, + "mesh_plink %pM: HT20 sta (%pM) is present\n", sdata->vif.addr, sta->sta.addr); ht20_sta = true; default: @@ -160,7 +156,8 @@ out: sdata->vif.bss_conf.ht_operation_mode = ht_opmode; sdata->u.mesh.mshcfg.ht_opmode = ht_opmode; changed = BSS_CHANGED_HT; - mpl_dbg("mesh_plink %pM: protection mode changed to %d", + mpl_dbg(sdata, + "mesh_plink %pM: protection mode changed to %d\n", sdata->vif.addr, ht_opmode); } @@ -437,7 +434,8 @@ static void mesh_plink_timer(unsigned long data) spin_unlock_bh(&sta->lock); return; } - mpl_dbg("Mesh plink timer for %pM fired on state %d\n", + mpl_dbg(sta->sdata, + "Mesh plink timer for %pM fired on state %d\n", sta->sta.addr, sta->plink_state); reason = 0; llid = sta->llid; @@ -450,7 +448,8 @@ static void mesh_plink_timer(unsigned long data) /* retry timer */ if (sta->plink_retries < dot11MeshMaxRetries(sdata)) { u32 rand; - mpl_dbg("Mesh plink for %pM (retry, timeout): %d %d\n", + mpl_dbg(sta->sdata, + "Mesh plink for %pM (retry, timeout): %d %d\n", sta->sta.addr, sta->plink_retries, sta->plink_timeout); get_random_bytes(&rand, sizeof(u32)); @@ -530,7 +529,8 @@ int mesh_plink_open(struct sta_info *sta) sta->plink_state = NL80211_PLINK_OPN_SNT; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); - mpl_dbg("Mesh plink: starting establishment with %pM\n", + mpl_dbg(sdata, + "Mesh plink: starting establishment with %pM\n", sta->sta.addr); return mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN, @@ -565,7 +565,6 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m u8 *baseaddr; u32 changed = 0; __le16 plid, llid, reason; -#ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG static const char *mplstates[] = { [NL80211_PLINK_LISTEN] = "LISTEN", [NL80211_PLINK_OPN_SNT] = "OPN-SNT", @@ -575,14 +574,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m [NL80211_PLINK_HOLDING] = "HOLDING", [NL80211_PLINK_BLOCKED] = "BLOCKED" }; -#endif /* need action_code, aux */ if (len < IEEE80211_MIN_ACTION_SIZE + 3) return; if (is_multicast_ether_addr(mgmt->da)) { - mpl_dbg("Mesh plink: ignore frame from multicast address"); + mpl_dbg(sdata, + "Mesh plink: ignore frame from multicast address\n"); return; } @@ -595,12 +594,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } ieee802_11_parse_elems(baseaddr, len - baselen, &elems); if (!elems.peering) { - mpl_dbg("Mesh plink: missing necessary peer link ie\n"); + mpl_dbg(sdata, + "Mesh plink: missing necessary peer link ie\n"); return; } if (elems.rsn_len && sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { - mpl_dbg("Mesh plink: can't establish link with secure peer\n"); + mpl_dbg(sdata, + "Mesh plink: can't establish link with secure peer\n"); return; } @@ -610,14 +611,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 && ie_len != 8)) { - mpl_dbg("Mesh plink: incorrect plink ie length %d %d\n", - ftype, ie_len); + mpl_dbg(sdata, + "Mesh plink: incorrect plink ie length %d %d\n", + ftype, ie_len); return; } if (ftype != WLAN_SP_MESH_PEERING_CLOSE && (!elems.mesh_id || !elems.mesh_config)) { - mpl_dbg("Mesh plink: missing necessary ie\n"); + mpl_dbg(sdata, "Mesh plink: missing necessary ie\n"); return; } /* Note the lines below are correct, the llid in the frame is the plid @@ -632,21 +634,21 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m sta = sta_info_get(sdata, mgmt->sa); if (!sta && ftype != WLAN_SP_MESH_PEERING_OPEN) { - mpl_dbg("Mesh plink: cls or cnf from unknown peer\n"); + mpl_dbg(sdata, "Mesh plink: cls or cnf from unknown peer\n"); rcu_read_unlock(); return; } if (ftype == WLAN_SP_MESH_PEERING_OPEN && !rssi_threshold_check(sta, sdata)) { - mpl_dbg("Mesh plink: %pM does not meet rssi threshold\n", + mpl_dbg(sdata, "Mesh plink: %pM does not meet rssi threshold\n", mgmt->sa); rcu_read_unlock(); return; } if (sta && !test_sta_flag(sta, WLAN_STA_AUTH)) { - mpl_dbg("Mesh plink: Action frame from non-authed peer\n"); + mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n"); rcu_read_unlock(); return; } @@ -683,7 +685,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m } else if (!sta) { /* ftype == WLAN_SP_MESH_PEERING_OPEN */ if (!mesh_plink_free_count(sdata)) { - mpl_dbg("Mesh plink error: no more free plinks\n"); + mpl_dbg(sdata, "Mesh plink error: no more free plinks\n"); rcu_read_unlock(); return; } @@ -724,7 +726,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m event = CLS_ACPT; break; default: - mpl_dbg("Mesh plink: unknown frame subtype\n"); + mpl_dbg(sdata, "Mesh plink: unknown frame subtype\n"); rcu_read_unlock(); return; } @@ -734,13 +736,14 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m /* allocate sta entry if necessary and update info */ sta = mesh_peer_init(sdata, mgmt->sa, &elems); if (!sta) { - mpl_dbg("Mesh plink: failed to init peer!\n"); + mpl_dbg(sdata, "Mesh plink: failed to init peer!\n"); rcu_read_unlock(); return; } } - mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", + mpl_dbg(sdata, + "Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", mgmt->sa, mplstates[sta->plink_state], le16_to_cpu(sta->llid), le16_to_cpu(sta->plid), event); @@ -851,7 +854,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); changed |= BSS_CHANGED_BEACON; - mpl_dbg("Mesh plink with %pM ESTABLISHED\n", + mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); break; default: @@ -887,7 +890,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m mesh_plink_inc_estab_count(sdata); changed |= mesh_set_ht_prot_mode(sdata); changed |= BSS_CHANGED_BEACON; - mpl_dbg("Mesh plink with %pM ESTABLISHED\n", + mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n", sta->sta.addr); mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CONFIRM, diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 0ccdad49f98..accfa00ffcd 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -12,13 +12,6 @@ #include "mesh.h" #include "driver-ops.h" -#ifdef CONFIG_MAC80211_VERBOSE_MESH_SYNC_DEBUG -#define msync_dbg(fmt, args...) \ - pr_debug("Mesh sync (%s): " fmt "\n", sdata->name, ##args) -#else -#define msync_dbg(fmt, args...) do { (void)(0); } while (0) -#endif - /* This is not in the standard. It represents a tolerable tbtt drift below * which we do no TSF adjustment. */ @@ -65,14 +58,14 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) spin_lock_bh(&ifmsh->sync_offset_lock); if (ifmsh->sync_offset_clockdrift_max < beacon_int_fraction) { - msync_dbg("TBTT : max clockdrift=%lld; adjusting", - (long long) ifmsh->sync_offset_clockdrift_max); + msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting\n", + (long long) ifmsh->sync_offset_clockdrift_max); tsfdelta = -ifmsh->sync_offset_clockdrift_max; ifmsh->sync_offset_clockdrift_max = 0; } else { - msync_dbg("TBTT : max clockdrift=%lld; adjusting by %llu", - (long long) ifmsh->sync_offset_clockdrift_max, - (unsigned long long) beacon_int_fraction); + msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting by %llu\n", + (long long) ifmsh->sync_offset_clockdrift_max, + (unsigned long long) beacon_int_fraction); tsfdelta = -beacon_int_fraction; ifmsh->sync_offset_clockdrift_max -= beacon_int_fraction; } @@ -120,7 +113,7 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); - msync_dbg("STA %pM : is adjusting TBTT", sta->sta.addr); + msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); goto no_sync; } @@ -169,7 +162,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { s64 t_clockdrift = sta->t_offset_setpoint - sta->t_offset; - msync_dbg("STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld", + msync_dbg(sdata, + "STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld\n", sta->sta.addr, (long long) sta->t_offset, (long long) @@ -178,7 +172,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, if (t_clockdrift > TOFFSET_MAXIMUM_ADJUSTMENT || t_clockdrift < -TOFFSET_MAXIMUM_ADJUSTMENT) { - msync_dbg("STA %pM : t_clockdrift=%lld too large, setpoint reset", + msync_dbg(sdata, + "STA %pM : t_clockdrift=%lld too large, setpoint reset\n", sta->sta.addr, (long long) t_clockdrift); clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); @@ -197,8 +192,8 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, } else { sta->t_offset_setpoint = sta->t_offset - TOFFSET_SET_MARGIN; set_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); - msync_dbg("STA %pM : offset was invalid, " - " sta->t_offset=%lld", + msync_dbg(sdata, + "STA %pM : offset was invalid, sta->t_offset=%lld\n", sta->sta.addr, (long long) sta->t_offset); rcu_read_unlock(); @@ -226,17 +221,15 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata) * to the driver tsf setter, we punt * the tsf adjustment to the mesh tasklet */ - msync_dbg("TBTT : kicking off TBTT " - "adjustment with " - "clockdrift_max=%lld", - ifmsh->sync_offset_clockdrift_max); + msync_dbg(sdata, + "TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n", + ifmsh->sync_offset_clockdrift_max); set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags); } else { - msync_dbg("TBTT : max clockdrift=%lld; " - "too small to adjust", - (long long) - ifmsh->sync_offset_clockdrift_max); + msync_dbg(sdata, + "TBTT : max clockdrift=%lld; too small to adjust\n", + (long long)ifmsh->sync_offset_clockdrift_max); ifmsh->sync_offset_clockdrift_max = 0; } spin_unlock_bh(&ifmsh->sync_offset_lock); @@ -268,7 +261,7 @@ static void mesh_sync_vendor_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, const u8 *oui; WARN_ON(sdata->u.mesh.mesh_sp_id != IEEE80211_SYNC_METHOD_VENDOR); - msync_dbg("called mesh_sync_vendor_rx_bcn_presp"); + msync_dbg(sdata, "called mesh_sync_vendor_rx_bcn_presp\n"); oui = mesh_get_vendor_oui(sdata); /* here you would implement the vendor offset tracking for this oui */ } @@ -278,7 +271,7 @@ static void mesh_sync_vendor_adjust_tbtt(struct ieee80211_sub_if_data *sdata) const u8 *oui; WARN_ON(sdata->u.mesh.mesh_sp_id != IEEE80211_SYNC_METHOD_VENDOR); - msync_dbg("called mesh_sync_vendor_adjust_tbtt"); + msync_dbg(sdata, "called mesh_sync_vendor_adjust_tbtt\n"); oui = mesh_get_vendor_oui(sdata); /* here you would implement the vendor tsf adjustment for this oui */ } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2b450f54199..e11cd0e033e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1186,19 +1186,16 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, params.txop = get_unaligned_le16(pos + 2); params.uapsd = uapsd; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "WMM queue=%d aci=%d acm=%d aifs=%d " - "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", - queue, aci, acm, - params.aifs, params.cw_min, params.cw_max, - params.txop, params.uapsd); -#endif + mlme_dbg(sdata, + "WMM queue=%d aci=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d\n", + queue, aci, acm, + params.aifs, params.cw_min, params.cw_max, + params.txop, params.uapsd); sdata->tx_conf[queue] = params; if (drv_conf_tx(local, sdata, queue, ¶ms)) - wiphy_debug(local->hw.wiphy, - "failed to set TX queue parameters for queue %d\n", - queue); + sdata_err(sdata, + "failed to set TX queue parameters for queue %d\n", + queue); } /* enable WMM or activate new settings */ @@ -1565,11 +1562,10 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, goto out; } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (beacon) - net_dbg_ratelimited("%s: detected beacon loss from AP - sending probe request\n", - sdata->name); -#endif + mlme_dbg_ratelimited(sdata, + "detected beacon loss from AP - sending probe request\n"); + ieee80211_cqm_rssi_notify(&sdata->vif, NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL); @@ -1654,7 +1650,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - pr_debug("%s: Connection to AP %pM lost\n", sdata->name, bssid); + sdata_info(sdata, "Connection to AP %pM lost\n", bssid); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, @@ -1788,8 +1784,8 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, return RX_MGMT_NONE; if (status_code != WLAN_STATUS_SUCCESS) { - pr_debug("%s: %pM denied authentication (status %d)\n", - sdata->name, mgmt->sa, status_code); + sdata_info(sdata, "%pM denied authentication (status %d)\n", + mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); return RX_MGMT_CFG80211_RX_AUTH; } @@ -1812,7 +1808,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, return RX_MGMT_NONE; } - pr_debug("%s: authenticated\n", sdata->name); + sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; run_again(ifmgd, ifmgd->auth_data->timeout); @@ -1825,7 +1821,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, goto out_err; } if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) { - pr_debug("%s: failed moving %pM to auth\n", sdata->name, bssid); + sdata_info(sdata, "failed moving %pM to auth\n", bssid); goto out_err; } mutex_unlock(&sdata->local->sta_mtx); @@ -1859,8 +1855,8 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - pr_debug("%s: deauthenticated from %pM (Reason: %u)\n", - sdata->name, bssid, reason_code); + sdata_info(sdata, "deauthenticated from %pM (Reason: %u)\n", + bssid, reason_code); ieee80211_set_disassoc(sdata, 0, 0, false, NULL); @@ -1890,8 +1886,8 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - pr_debug("%s: disassociated from %pM (Reason: %u)\n", - sdata->name, mgmt->sa, reason_code); + sdata_info(sdata, "disassociated from %pM (Reason: %u)\n", + mgmt->sa, reason_code); ieee80211_set_disassoc(sdata, 0, 0, false, NULL); @@ -1983,15 +1979,15 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) - pr_debug("%s: invalid AID value 0x%x; bits 15:14 not set\n", - sdata->name, aid); + sdata_info(sdata, "invalid AID value 0x%x; bits 15:14 not set\n", + aid); aid &= ~(BIT(15) | BIT(14)); ifmgd->broken_ap = false; if (aid == 0 || aid > IEEE80211_MAX_AID) { - pr_debug("%s: invalid AID value %d (out of range), turn off PS\n", - sdata->name, aid); + sdata_info(sdata, "invalid AID value %d (out of range), turn off PS\n", + aid); aid = 0; ifmgd->broken_ap = true; } @@ -2000,8 +1996,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (!elems.supp_rates) { - pr_debug("%s: no SuppRates element in AssocResp\n", - sdata->name); + sdata_info(sdata, "no SuppRates element in AssocResp\n"); return false; } @@ -2041,8 +2036,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (!err && !(ifmgd->flags & IEEE80211_STA_CONTROL_PORT)) err = sta_info_move_state(sta, IEEE80211_STA_AUTHORIZED); if (err) { - pr_debug("%s: failed to move station %pM to desired state\n", - sdata->name, sta->sta.addr); + sdata_info(sdata, + "failed to move station %pM to desired state\n", + sta->sta.addr); WARN_ON(__sta_info_destroy(sta)); mutex_unlock(&sdata->local->sta_mtx); return false; @@ -2125,9 +2121,10 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); aid = le16_to_cpu(mgmt->u.assoc_resp.aid); - pr_debug("%s: RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n", - sdata->name, reassoc ? "Rea" : "A", mgmt->sa, - capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); + sdata_info(sdata, + "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n", + reassoc ? "Rea" : "A", mgmt->sa, + capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); pos = mgmt->u.assoc_resp.variable; ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); @@ -2138,8 +2135,9 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, u32 tu, ms; tu = get_unaligned_le32(elems.timeout_int + 1); ms = tu * 1024 / 1000; - pr_debug("%s: %pM rejected association temporarily; comeback duration %u TU (%u ms)\n", - sdata->name, mgmt->sa, tu, ms); + sdata_info(sdata, + "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", + mgmt->sa, tu, ms); assoc_data->timeout = jiffies + msecs_to_jiffies(ms); if (ms > IEEE80211_ASSOC_TIMEOUT) run_again(ifmgd, assoc_data->timeout); @@ -2149,11 +2147,11 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, *bss = assoc_data->bss; if (status_code != WLAN_STATUS_SUCCESS) { - pr_debug("%s: %pM denied association (code=%d)\n", - sdata->name, mgmt->sa, status_code); + sdata_info(sdata, "%pM denied association (code=%d)\n", + mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - pr_debug("%s: associated\n", sdata->name); + sdata_info(sdata, "associated\n"); if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ @@ -2261,7 +2259,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data && !ifmgd->auth_data->bss->proberesp_ies && ether_addr_equal(mgmt->bssid, ifmgd->auth_data->bss->bssid)) { /* got probe response, continue with auth */ - pr_debug("%s: direct probe responded\n", sdata->name); + sdata_info(sdata, "direct probe responded\n"); ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; run_again(ifmgd, ifmgd->auth_data->timeout); @@ -2397,10 +2395,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - net_dbg_ratelimited("%s: cancelling probereq poll due to a received beacon\n", - sdata->name); -#endif + mlme_dbg_ratelimited(sdata, + "cancelling probereq poll due to a received beacon\n"); mutex_lock(&local->mtx); ifmgd->flags &= ~IEEE80211_STA_BEACON_POLL; ieee80211_run_deferred_scan(local); @@ -2625,8 +2621,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) auth_data->tries++; if (auth_data->tries > IEEE80211_AUTH_MAX_TRIES) { - pr_debug("%s: authentication with %pM timed out\n", - sdata->name, auth_data->bss->bssid); + sdata_info(sdata, "authentication with %pM timed out\n", + auth_data->bss->bssid); /* * Most likely AP is not in the range so remove the @@ -2638,9 +2634,9 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) } if (auth_data->bss->proberesp_ies) { - pr_debug("%s: send auth to %pM (try %d/%d)\n", - sdata->name, auth_data->bss->bssid, auth_data->tries, - IEEE80211_AUTH_MAX_TRIES); + sdata_info(sdata, "send auth to %pM (try %d/%d)\n", + auth_data->bss->bssid, auth_data->tries, + IEEE80211_AUTH_MAX_TRIES); auth_data->expected_transaction = 2; ieee80211_send_auth(sdata, 1, auth_data->algorithm, @@ -2650,9 +2646,9 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) } else { const u8 *ssidie; - pr_debug("%s: direct probe to %pM (try %d/%i)\n", - sdata->name, auth_data->bss->bssid, auth_data->tries, - IEEE80211_AUTH_MAX_TRIES); + sdata_info(sdata, "direct probe to %pM (try %d/%i)\n", + auth_data->bss->bssid, auth_data->tries, + IEEE80211_AUTH_MAX_TRIES); ssidie = ieee80211_bss_get_ie(auth_data->bss, WLAN_EID_SSID); if (!ssidie) @@ -2680,8 +2676,8 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { - pr_debug("%s: association with %pM timed out\n", - sdata->name, assoc_data->bss->bssid); + sdata_info(sdata, "association with %pM timed out\n", + assoc_data->bss->bssid); /* * Most likely AP is not in the range so remove the @@ -2692,9 +2688,9 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) return -ETIMEDOUT; } - pr_debug("%s: associate with %pM (try %d/%d)\n", - sdata->name, assoc_data->bss->bssid, assoc_data->tries, - IEEE80211_ASSOC_MAX_TRIES); + sdata_info(sdata, "associate with %pM (try %d/%d)\n", + assoc_data->bss->bssid, assoc_data->tries, + IEEE80211_ASSOC_MAX_TRIES); ieee80211_send_assoc(sdata); assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; @@ -2767,45 +2763,31 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_reset_ap_probe(sdata); else if (ifmgd->nullfunc_failed) { if (ifmgd->probe_send_count < max_tries) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: No ack for nullfunc frame to" - " AP %pM, try %d/%i\n", - sdata->name, bssid, - ifmgd->probe_send_count, max_tries); -#endif + mlme_dbg(sdata, + "No ack for nullfunc frame to AP %pM, try %d/%i\n", + bssid, ifmgd->probe_send_count, + max_tries); ieee80211_mgd_probe_ap_send(sdata); } else { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: No ack for nullfunc frame to" - " AP %pM, disconnecting.\n", - sdata->name, bssid); -#endif + mlme_dbg(sdata, + "No ack for nullfunc frame to AP %pM, disconnecting.\n", + bssid); ieee80211_sta_connection_lost(sdata, bssid, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) run_again(ifmgd, ifmgd->probe_timeout); else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: Failed to send nullfunc to AP %pM" - " after %dms, disconnecting.\n", - sdata->name, - bssid, probe_wait_ms); -#endif + mlme_dbg(sdata, + "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", + bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY); } else if (ifmgd->probe_send_count < max_tries) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, - "%s: No probe response from AP %pM" - " after %dms, try %d/%i\n", - sdata->name, - bssid, probe_wait_ms, - ifmgd->probe_send_count, max_tries); -#endif + mlme_dbg(sdata, + "No probe response from AP %pM after %dms, try %d/%i\n", + bssid, probe_wait_ms, + ifmgd->probe_send_count, max_tries); ieee80211_mgd_probe_ap_send(sdata); } else { /* @@ -2920,11 +2902,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; mutex_lock(&ifmgd->mtx); if (ifmgd->associated) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(sdata->local->hw.wiphy, - "%s: driver requested disconnect after resume.\n", - sdata->name); -#endif + mlme_dbg(sdata, + "driver requested disconnect after resume\n"); ieee80211_sta_connection_lost(sdata, ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED); @@ -3065,10 +3044,11 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, * since we look at probe response/beacon data here * it should be OK. */ - pr_debug("%s: Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - sdata->name, cbss->channel->center_freq, - ht_cfreq, ht_oper->primary_chan, - cbss->channel->band); + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + cbss->channel->center_freq, + ht_cfreq, ht_oper->primary_chan, + cbss->channel->band); ht_oper = NULL; } } @@ -3092,8 +3072,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, if (!ieee80211_set_channel_type(local, sdata, channel_type)) { /* can only fail due to HT40+/- mismatch */ channel_type = NL80211_CHAN_HT20; - pr_debug("%s: disabling 40 MHz due to multi-vif mismatch\n", - sdata->name); + sdata_info(sdata, + "disabling 40 MHz due to multi-vif mismatch\n"); ifmgd->flags |= IEEE80211_STA_DISABLE_40MHZ; WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); @@ -3122,8 +3102,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, * we can connect -- with a warning. */ if (!basic_rates && min_rate_index >= 0) { - pr_debug("%s: No basic rates, using min rate instead\n", - sdata->name); + sdata_info(sdata, + "No basic rates, using min rate instead\n"); basic_rates = BIT(min_rate_index); } @@ -3149,8 +3129,9 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, err = sta_info_insert(sta); sta = NULL; if (err) { - pr_debug("%s: failed to insert STA entry for the AP (error %d)\n", - sdata->name, err); + sdata_info(sdata, + "failed to insert STA entry for the AP (error %d)\n", + err); return err; } } else @@ -3228,7 +3209,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->associated) ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - pr_debug("%s: authenticate with %pM\n", sdata->name, req->bss->bssid); + sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); err = ieee80211_prep_connection(sdata, req->bss, false); if (err) @@ -3410,8 +3391,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, * Wait up to one beacon interval ... * should this be more if we miss one? */ - pr_debug("%s: waiting for beacon from %pM\n", - sdata->name, ifmgd->bssid); + sdata_info(sdata, "waiting for beacon from %pM\n", + ifmgd->bssid); assoc_data->timeout = TU_TO_EXP_TIME(req->bss->beacon_interval); } else { assoc_data->have_beacon = true; @@ -3430,8 +3411,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, corrupt_type = "beacon"; } else if (bss->corrupt_data & IEEE80211_BSS_CORRUPT_PROBE_RESP) corrupt_type = "probe response"; - pr_debug("%s: associating with AP with corrupt %s\n", - sdata->name, corrupt_type); + sdata_info(sdata, "associating with AP with corrupt %s\n", + corrupt_type); } err = 0; @@ -3460,8 +3441,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, return 0; } - pr_debug("%s: deauthenticating from %pM by local choice (reason=%d)\n", - sdata->name, req->bssid, req->reason_code); + sdata_info(sdata, + "deauthenticating from %pM by local choice (reason=%d)\n", + req->bssid, req->reason_code); if (ifmgd->associated && ether_addr_equal(ifmgd->associated->bssid, req->bssid)) @@ -3503,8 +3485,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, return -ENOLINK; } - pr_debug("%s: disassociating from %pM by local choice (reason=%d)\n", - sdata->name, req->bss->bssid, req->reason_code); + sdata_info(sdata, + "disassociating from %pM by local choice (reason=%d)\n", + req->bss->bssid, req->reason_code); memcpy(bssid, req->bss->bssid, ETH_ALEN); ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9f1bd692589..ab5185054e6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -632,11 +632,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, HT_RX_REORDER_BUF_TIMEOUT)) goto set_release_timer; -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - wiphy_debug(sdata->local->hw.wiphy, - "release an RX reorder frame due to timeout on earlier frames\n"); -#endif + ht_dbg_ratelimited(sdata, + "release an RX reorder frame due to timeout on earlier frames\n"); ieee80211_release_reorder_frame(sdata, tid_agg_rx, j); /* @@ -1136,24 +1133,18 @@ static void ap_sta_ps_start(struct sta_info *sta) set_sta_flag(sta, WLAN_STA_PS_STA); if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d enters power save mode\n", - sdata->name, sta->sta.addr, sta->sta.aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", + sta->sta.addr, sta->sta.aid); } static void ap_sta_ps_end(struct sta_info *sta) { -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d exits power save mode\n", - sta->sdata->name, sta->sta.addr, sta->sta.aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sta->sdata, "STA %pM aid %d exits power save mode\n", + sta->sta.addr, sta->sta.aid); if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d driver-ps-blocked\n", - sta->sdata->name, sta->sta.addr, sta->sta.aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sta->sdata, "STA %pM aid %d driver-ps-blocked\n", + sta->sta.addr, sta->sta.aid); return; } @@ -1383,17 +1374,8 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) sdata->fragment_next = 0; - if (!skb_queue_empty(&entry->skb_list)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) entry->skb_list.next->data; - pr_debug("%s: RX reassembly removed oldest fragment entry (idx=%d age=%lu seq=%d last_frag=%d addr1=%pM addr2=%pM\n", - sdata->name, idx, - jiffies - entry->first_frag_time, entry->seq, - entry->last_frag, hdr->addr1, hdr->addr2); -#endif + if (!skb_queue_empty(&entry->skb_list)) __skb_queue_purge(&entry->skb_list); - } __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ *skb = NULL; @@ -1751,7 +1733,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) */ xmit_skb = skb_copy(skb, GFP_ATOMIC); if (!xmit_skb) - net_dbg_ratelimited("%s: failed to clone multicast frame\n", + net_info_ratelimited("%s: failed to clone multicast frame\n", dev->name); } else { dsta = sta_info_get(sdata, skb->data); @@ -1955,7 +1937,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) fwd_skb = skb_copy(skb, GFP_ATOMIC); if (!fwd_skb) { - net_dbg_ratelimited("%s: failed to clone mesh frame\n", + net_info_ratelimited("%s: failed to clone mesh frame\n", sdata->name); goto out; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 77dcf2f89d4..06fa75ceb02 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -169,9 +169,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) if (sta->rate_ctrl) rate_control_free_sta(sta); -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Destroyed STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); kfree(sta); } @@ -278,9 +276,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < NUM_RX_DATA_QUEUES; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Allocated STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); #ifdef CONFIG_MAC80211_MESH sta->plink_state = NL80211_PLINK_LISTEN; @@ -333,8 +329,9 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local, } if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - pr_debug("%s: failed to move IBSS STA %pM to state %d (%d) - keeping it anyway\n", - sdata->name, sta->sta.addr, state + 1, err); + sdata_info(sdata, + "failed to move IBSS STA %pM to state %d (%d) - keeping it anyway\n", + sta->sta.addr, state + 1, err); err = 0; } @@ -389,9 +386,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) sinfo.generation = local->sta_generation; cfg80211_new_sta(sdata->dev, sta->sta.addr, &sinfo, GFP_KERNEL); -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); /* move reference to rcu-protected */ rcu_read_lock(); @@ -617,9 +612,8 @@ static bool sta_info_cleanup_expire_buffered_ac(struct ieee80211_local *local, break; local->total_ps_buffered--; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("Buffered frame expired (STA %pM)\n", sta->sta.addr); -#endif + ps_dbg(sta->sdata, "Buffered frame expired (STA %pM)\n", + sta->sta.addr); dev_kfree_skb(skb); } @@ -745,9 +739,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta) mesh_accept_plinks_update(sdata); #endif -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - wiphy_debug(local->hw.wiphy, "Removed STA %pM\n", sta->sta.addr); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); + cancel_work_sync(&sta->drv_unblock_wk); cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL); @@ -887,8 +880,8 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, continue; if (time_after(jiffies, sta->last_rx + exp_time)) { - ibss_vdbg("%s: expiring inactive STA %pM\n", - sdata->name, sta->sta.addr); + ibss_dbg(sdata, "expiring inactive STA %pM\n", + sta->sta.addr); WARN_ON(__sta_info_destroy(sta)); } } @@ -986,10 +979,9 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) sta_info_recalc_tim(sta); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("%s: STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", - sdata->name, sta->sta.addr, sta->sta.aid, filtered, buffered); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sdata, + "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n", + sta->sta.addr, sta->sta.aid, filtered, buffered); } static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, @@ -1379,10 +1371,8 @@ int sta_info_move_state(struct sta_info *sta, return -EINVAL; } -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: moving STA %pM to state %d\n", - sta->sdata->name, sta->sta.addr, new_state); -#endif + sta_dbg(sta->sdata, "moving STA %pM to state %d\n", + sta->sta.addr, new_state); /* * notify the driver before the actual changes so it can diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 51a6d1e6e25..2ed2f27fe8a 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -155,13 +155,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, return; } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - if (net_ratelimit()) - wiphy_debug(local->hw.wiphy, - "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", - skb_queue_len(&sta->tx_filtered[ac]), - !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); -#endif + ps_dbg_ratelimited(sta->sdata, + "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", + skb_queue_len(&sta->tx_filtered[ac]), + !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); dev_kfree_skb(skb); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index af25c4e7ec5..37eda7e00e0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -297,9 +297,10 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(!assoc && ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - pr_debug("%s: dropped data frame to not associated station %pM\n", - tx->sdata->name, hdr->addr1); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + sdata_info(tx->sdata, + "dropped data frame to not associated station %pM\n", + hdr->addr1); +#endif I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); return TX_DROP; } @@ -366,10 +367,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) rcu_read_unlock(); local->total_ps_buffered = total; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - wiphy_debug(local->hw.wiphy, "PS buffers full - purged %d frames\n", - purged); -#endif + ps_dbg_hw(&local->hw, "PS buffers full - purged %d frames\n", purged); } static ieee80211_tx_result @@ -411,10 +409,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) purge_old_ps_buffers(tx->local); if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= AP_MAX_BC_BUFFER) { -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - net_dbg_ratelimited("%s: BC TX buffer full - dropping the oldest frame\n", - tx->sdata->name); -#endif + ps_dbg(tx->sdata, + "BC TX buffer full - dropping the oldest frame\n"); dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); } else tx->local->total_ps_buffered++; @@ -465,18 +461,15 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) return TX_CONTINUE; } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - pr_debug("STA %pM aid %d: PS buffer for AC %d\n", - sta->sta.addr, sta->sta.aid, ac); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n", + sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - net_dbg_ratelimited("%s: STA %pM TX buffer for AC %d full - dropping oldest frame\n", - tx->sdata->name, sta->sta.addr, ac); -#endif + ps_dbg(tx->sdata, + "STA %pM TX buffer for AC %d full - dropping oldest frame\n", + sta->sta.addr, ac); dev_kfree_skb(old); } else tx->local->total_ps_buffered++; @@ -498,13 +491,11 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) sta_info_recalc_tim(sta); return TX_QUEUED; + } else if (unlikely(test_sta_flag(sta, WLAN_STA_PS_STA))) { + ps_dbg(tx->sdata, + "STA %pM in PS mode, but polling/in SP -> send frame\n", + sta->sta.addr); } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(test_sta_flag(sta, WLAN_STA_PS_STA))) { - pr_debug("%s: STA %pM in PS mode, but polling/in SP -> send frame\n", - tx->sdata->name, sta->sta.addr); - } -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ return TX_CONTINUE; } @@ -1963,7 +1954,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, (cpu_to_be16(ethertype) != sdata->control_port_protocol || !ether_addr_equal(sdata->vif.addr, skb->data + ETH_ALEN)))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - net_dbg_ratelimited("%s: dropped frame to %pM (unauthorized port)\n", + net_info_ratelimited("%s: dropped frame to %pM (unauthorized port)\n", dev->name, hdr.addr1); #endif -- cgit v1.2.3-70-g09d2 From efc27f8ceebe5eb147fa31d6c995706d327ad855 Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Sun, 24 Jun 2012 13:03:07 +0000 Subject: net: Remove 'unlikely' qualifier in skb_steal_sock() With early demux enabled by default for TCP flows, there is high chance that skb->sk will be non-null. 'unlikely()' was removed from __inet_lookup_skb() but maybe it can be removed from skb_steal_sock() as well. Note: skb_steal_sock() is also called by __inet6_lookup_skb() and __udp4_lib_lookup_skb() but they are protected by their own 'unlikely' calls. Signed-off-by: Vijay Subramanian Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 87b424ae750..21086036e34 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2154,7 +2154,7 @@ static inline void sk_change_net(struct sock *sk, struct net *net) static inline struct sock *skb_steal_sock(struct sk_buff *skb) { - if (unlikely(skb->sk)) { + if (skb->sk) { struct sock *sk = skb->sk; skb->destructor = NULL; -- cgit v1.2.3-70-g09d2 From deaa58542b21d2b395db816952c202034319cbb4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Jun 2012 20:22:49 +0000 Subject: net: struct sock cleanups Add missing kernel doc for sk_rx_dst Move sk_rx_dst to avoid two 32bit holes on 64bit arches Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 21086036e34..dcb54a0793e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -198,6 +198,7 @@ struct cg_proto; * @sk_lock: synchronizer * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head + * @sk_rx_dst: receive input route used by early tcp demux * @sk_dst_cache: destination cache * @sk_dst_lock: destination cache lock * @sk_policy: flow policy @@ -317,9 +318,9 @@ struct sock { struct xfrm_policy *sk_policy[2]; #endif unsigned long sk_flags; + struct dst_entry *sk_rx_dst; struct dst_entry *sk_dst_cache; spinlock_t sk_dst_lock; - struct dst_entry *sk_rx_dst; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; -- cgit v1.2.3-70-g09d2 From c41254006377842013922fb1e407391f24c59522 Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Mon, 25 Jun 2012 07:49:41 +0000 Subject: caif-hsi: Add rtnl support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RTNL support for managing the caif hsi interface. The HSI HW interface is no longer registering as a device, instead we use symbol_get to get hold of the HSI API. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 226 +++++++++++++++++++++++++++----------------- include/net/caif/caif_hsi.h | 21 +++- 2 files changed, 157 insertions(+), 90 deletions(-) (limited to 'include/net') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index d80759e5853..a14f85c0f0e 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -79,7 +79,6 @@ MODULE_PARM_DESC(hsi_low_threshold, "HSI high threshold (FLOW ON)."); #define HIGH_WATER_MARK hsi_high_threshold static LIST_HEAD(cfhsi_list); -static spinlock_t cfhsi_list_lock; static void cfhsi_inactivity_tout(unsigned long arg) { @@ -1148,42 +1147,6 @@ static void cfhsi_setup(struct net_device *dev) cfhsi->ndev = dev; } -int cfhsi_probe(struct platform_device *pdev) -{ - struct cfhsi *cfhsi = NULL; - struct net_device *ndev; - - int res; - - ndev = alloc_netdev(sizeof(struct cfhsi), "cfhsi%d", cfhsi_setup); - if (!ndev) - return -ENODEV; - - cfhsi = netdev_priv(ndev); - cfhsi->ndev = ndev; - cfhsi->pdev = pdev; - - /* Assign the HSI device. */ - cfhsi->dev = pdev->dev.platform_data; - - /* Assign the driver to this HSI device. */ - cfhsi->dev->drv = &cfhsi->drv; - - /* Register network device. */ - res = register_netdev(ndev); - if (res) { - dev_err(&ndev->dev, "%s: Registration error: %d.\n", - __func__, res); - free_netdev(ndev); - } - /* Add CAIF HSI device to list. */ - spin_lock(&cfhsi_list_lock); - list_add_tail(&cfhsi->list, &cfhsi_list); - spin_unlock(&cfhsi_list_lock); - - return res; -} - static int cfhsi_open(struct net_device *ndev) { struct cfhsi *cfhsi = netdev_priv(ndev); @@ -1364,85 +1327,170 @@ static int cfhsi_close(struct net_device *ndev) return 0; } +static void cfhsi_uninit(struct net_device *dev) +{ + struct cfhsi *cfhsi = netdev_priv(dev); + ASSERT_RTNL(); + symbol_put(cfhsi_get_device); + list_del(&cfhsi->list); +} + static const struct net_device_ops cfhsi_ops = { + .ndo_uninit = cfhsi_uninit, .ndo_open = cfhsi_open, .ndo_stop = cfhsi_close, .ndo_start_xmit = cfhsi_xmit }; -int cfhsi_remove(struct platform_device *pdev) +static void cfhsi_netlink_parms(struct nlattr *data[], struct cfhsi *cfhsi) { - struct list_head *list_node; - struct list_head *n; - struct cfhsi *cfhsi = NULL; - struct cfhsi_dev *dev; + int i; - dev = (struct cfhsi_dev *)pdev->dev.platform_data; - spin_lock(&cfhsi_list_lock); - list_for_each_safe(list_node, n, &cfhsi_list) { - cfhsi = list_entry(list_node, struct cfhsi, list); - /* Find the corresponding device. */ - if (cfhsi->dev == dev) { - /* Remove from list. */ - list_del(list_node); - spin_unlock(&cfhsi_list_lock); - return 0; - } + if (!data) { + pr_debug("no params data found\n"); + return; } - spin_unlock(&cfhsi_list_lock); - return -ENODEV; + + i = __IFLA_CAIF_HSI_INACTIVITY_TOUT; + if (data[i]) + inactivity_timeout = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_AGGREGATION_TOUT; + if (data[i]) + aggregation_timeout = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_HEAD_ALIGN; + if (data[i]) + hsi_head_align = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_TAIL_ALIGN; + if (data[i]) + hsi_tail_align = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_QHIGH_WATERMARK; + if (data[i]) + hsi_high_threshold = nla_get_u32(data[i]); +} + +static int caif_hsi_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + cfhsi_netlink_parms(data, netdev_priv(dev)); + netdev_state_change(dev); + return 0; } -struct platform_driver cfhsi_plat_drv = { - .probe = cfhsi_probe, - .remove = cfhsi_remove, - .driver = { - .name = "cfhsi", - .owner = THIS_MODULE, - }, +static const struct nla_policy caif_hsi_policy[__IFLA_CAIF_HSI_MAX + 1] = { + [__IFLA_CAIF_HSI_INACTIVITY_TOUT] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_AGGREGATION_TOUT] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_HEAD_ALIGN] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_TAIL_ALIGN] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_QHIGH_WATERMARK] = { .type = NLA_U32, .len = 4 }, + [__IFLA_CAIF_HSI_QLOW_WATERMARK] = { .type = NLA_U32, .len = 4 }, }; -static void __exit cfhsi_exit_module(void) +static size_t caif_hsi_get_size(const struct net_device *dev) +{ + int i; + size_t s = 0; + for (i = __IFLA_CAIF_HSI_UNSPEC + 1; i < __IFLA_CAIF_HSI_MAX; i++) + s += nla_total_size(caif_hsi_policy[i].len); + return s; +} + +static int caif_hsi_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + if (nla_put_u32(skb, __IFLA_CAIF_HSI_INACTIVITY_TOUT, + inactivity_timeout) || + nla_put_u32(skb, __IFLA_CAIF_HSI_AGGREGATION_TOUT, + aggregation_timeout) || + nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, hsi_head_align) || + nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, hsi_tail_align) || + nla_put_u32(skb, __IFLA_CAIF_HSI_QHIGH_WATERMARK, + hsi_high_threshold) || + nla_put_u32(skb, __IFLA_CAIF_HSI_QLOW_WATERMARK, + hsi_low_threshold)) + return -EMSGSIZE; + + return 0; +} + +static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { - struct list_head *list_node; - struct list_head *n; struct cfhsi *cfhsi = NULL; + struct platform_device *(*get_dev)(void); - spin_lock(&cfhsi_list_lock); - list_for_each_safe(list_node, n, &cfhsi_list) { - cfhsi = list_entry(list_node, struct cfhsi, list); + ASSERT_RTNL(); + + cfhsi = netdev_priv(dev); + cfhsi_netlink_parms(data, cfhsi); + dev_net_set(cfhsi->ndev, src_net); + + get_dev = symbol_get(cfhsi_get_device); + if (!get_dev) { + pr_err("%s: failed to get the cfhsi device symbol\n", __func__); + return -ENODEV; + } + + /* Assign the HSI device. */ + cfhsi->pdev = (*get_dev)(); + if (!cfhsi->pdev) { + pr_err("%s: failed to get the cfhsi device\n", __func__); + goto err; + } - /* Remove from list. */ - list_del(list_node); - spin_unlock(&cfhsi_list_lock); + /* Assign the HSI device. */ + cfhsi->dev = cfhsi->pdev->dev.platform_data; + + /* Assign the driver to this HSI device. */ + cfhsi->dev->drv = &cfhsi->drv; - unregister_netdevice(cfhsi->ndev); + if (register_netdevice(dev)) { + pr_warn("%s: device rtml registration failed\n", __func__); + goto err; - spin_lock(&cfhsi_list_lock); } - spin_unlock(&cfhsi_list_lock); + /* Add CAIF HSI device to list. */ + list_add_tail(&cfhsi->list, &cfhsi_list); - /* Unregister platform driver. */ - platform_driver_unregister(&cfhsi_plat_drv); + return 0; +err: + symbol_put(cfhsi_get_device); + return -ENODEV; } -static int __init cfhsi_init_module(void) +static struct rtnl_link_ops caif_hsi_link_ops __read_mostly = { + .kind = "cfhsi", + .priv_size = sizeof(struct cfhsi), + .setup = cfhsi_setup, + .maxtype = __IFLA_CAIF_HSI_MAX, + .policy = caif_hsi_policy, + .newlink = caif_hsi_newlink, + .changelink = caif_hsi_changelink, + .get_size = caif_hsi_get_size, + .fill_info = caif_hsi_fill_info, +}; + +static void __exit cfhsi_exit_module(void) { - int result; + struct list_head *list_node; + struct list_head *n; + struct cfhsi *cfhsi; - /* Initialize spin lock. */ - spin_lock_init(&cfhsi_list_lock); + rtnl_link_unregister(&caif_hsi_link_ops); - /* Register platform driver. */ - result = platform_driver_register(&cfhsi_plat_drv); - if (result) { - printk(KERN_ERR "Could not register platform HSI driver: %d.\n", - result); - goto err_dev_register; + rtnl_lock(); + list_for_each_safe(list_node, n, &cfhsi_list) { + cfhsi = list_entry(list_node, struct cfhsi, list); + unregister_netdev(cfhsi->ndev); } + rtnl_unlock(); +} - err_dev_register: - return result; +static int __init cfhsi_init_module(void) +{ + return rtnl_link_register(&caif_hsi_link_ops); } module_init(cfhsi_init_module); diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index 439dadc8102..a77b2bd7719 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -170,7 +170,26 @@ struct cfhsi { unsigned long bits; }; - extern struct platform_driver cfhsi_driver; +/** + * enum ifla_caif_hsi - CAIF HSI NetlinkRT parameters. + * @IFLA_CAIF_HSI_INACTIVITY_TOUT: Inactivity timeout before + * taking the HSI wakeline down, in milliseconds. + * When using RT Netlink to create, destroy or configure a CAIF HSI interface, + * enum ifla_caif_hsi is used to specify the configuration attributes. + */ +enum ifla_caif_hsi { + __IFLA_CAIF_HSI_UNSPEC, + __IFLA_CAIF_HSI_INACTIVITY_TOUT, + __IFLA_CAIF_HSI_AGGREGATION_TOUT, + __IFLA_CAIF_HSI_HEAD_ALIGN, + __IFLA_CAIF_HSI_TAIL_ALIGN, + __IFLA_CAIF_HSI_QHIGH_WATERMARK, + __IFLA_CAIF_HSI_QLOW_WATERMARK, + __IFLA_CAIF_HSI_MAX +}; + +extern struct platform_device *cfhsi_get_device(void); + #endif /* CAIF_HSI_H_ */ -- cgit v1.2.3-70-g09d2 From 1c385f1fdf6f9c66d982802cd74349c040980b50 Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Mon, 25 Jun 2012 07:49:42 +0000 Subject: caif-hsi: Replace platform device with ops structure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove use of struct platform_device, and replace it with struct cfhsi_ops. Updated variable names in the same spirit: cfhsi_get_dev to cfhsi_get_ops, cfhsi->dev to cfhsi->ops and, cfhsi->dev.drv to cfhsi->ops->cb_ops. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 109 +++++++++++++++++++++----------------------- include/net/caif/caif_hsi.h | 38 +++++++-------- 2 files changed, 70 insertions(+), 77 deletions(-) (limited to 'include/net') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index a14f85c0f0e..0927c108bd1 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -184,7 +183,7 @@ static int cfhsi_flush_fifo(struct cfhsi *cfhsi) __func__); do { - ret = cfhsi->dev->cfhsi_fifo_occupancy(cfhsi->dev, + ret = cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, &fifo_occupancy); if (ret) { netdev_warn(cfhsi->ndev, @@ -197,8 +196,8 @@ static int cfhsi_flush_fifo(struct cfhsi *cfhsi) fifo_occupancy = min(sizeof(buffer), fifo_occupancy); set_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); - ret = cfhsi->dev->cfhsi_rx(buffer, fifo_occupancy, - cfhsi->dev); + ret = cfhsi->ops->cfhsi_rx(buffer, fifo_occupancy, + cfhsi->ops); if (ret) { clear_bit(CFHSI_FLUSH_FIFO, &cfhsi->bits); netdev_warn(cfhsi->ndev, @@ -371,7 +370,7 @@ static void cfhsi_start_tx(struct cfhsi *cfhsi) } /* Set up new transfer. */ - res = cfhsi->dev->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->dev); + res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); if (WARN_ON(res < 0)) netdev_err(cfhsi->ndev, "%s: TX error %d.\n", __func__, res); @@ -410,11 +409,11 @@ static void cfhsi_tx_done(struct cfhsi *cfhsi) return; } -static void cfhsi_tx_done_cb(struct cfhsi_drv *drv) +static void cfhsi_tx_done_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -476,8 +475,8 @@ static int cfhsi_rx_desc(struct cfhsi_desc *desc, struct cfhsi *cfhsi) skb->dev = cfhsi->ndev; /* - * We are called from a arch specific platform device. - * Unfortunately we don't know what context we're + * We are in a callback handler and + * unfortunately we don't know what context we're * running in. */ if (in_interrupt()) @@ -607,7 +606,7 @@ static int cfhsi_rx_pld(struct cfhsi_desc *desc, struct cfhsi *cfhsi) skb->dev = cfhsi->ndev; /* - * We're called from a platform device, + * We're called in callback from HSI * and don't know the context we're running in. */ if (in_interrupt()) @@ -711,8 +710,8 @@ static void cfhsi_rx_done(struct cfhsi *cfhsi) netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", __func__); - res = cfhsi->dev->cfhsi_rx(rx_ptr, rx_len, - cfhsi->dev); + res = cfhsi->ops->cfhsi_rx(rx_ptr, rx_len, + cfhsi->ops); if (WARN_ON(res < 0)) { netdev_err(cfhsi->ndev, "%s: RX error %d.\n", __func__, res); @@ -765,11 +764,11 @@ static void cfhsi_rx_slowpath(unsigned long arg) cfhsi_rx_done(cfhsi); } -static void cfhsi_rx_done_cb(struct cfhsi_drv *drv) +static void cfhsi_rx_done_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -803,7 +802,7 @@ static void cfhsi_wake_up(struct work_struct *work) } /* Activate wake line. */ - cfhsi->dev->cfhsi_wake_up(cfhsi->dev); + cfhsi->ops->cfhsi_wake_up(cfhsi->ops); netdev_dbg(cfhsi->ndev, "%s: Start waiting.\n", __func__); @@ -819,7 +818,7 @@ static void cfhsi_wake_up(struct work_struct *work) __func__, ret); clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->dev->cfhsi_wake_down(cfhsi->dev); + cfhsi->ops->cfhsi_wake_down(cfhsi->ops); return; } else if (!ret) { bool ca_wake = false; @@ -830,14 +829,14 @@ static void cfhsi_wake_up(struct work_struct *work) __func__); /* Check FIFO to check if modem has sent something. */ - WARN_ON(cfhsi->dev->cfhsi_fifo_occupancy(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, &fifo_occupancy)); netdev_dbg(cfhsi->ndev, "%s: Bytes in FIFO: %u.\n", __func__, (unsigned) fifo_occupancy); /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->dev->cfhsi_get_peer_wake(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, &ca_wake)); if (ca_wake) { @@ -852,7 +851,7 @@ static void cfhsi_wake_up(struct work_struct *work) } clear_bit(CFHSI_WAKE_UP, &cfhsi->bits); - cfhsi->dev->cfhsi_wake_down(cfhsi->dev); + cfhsi->ops->cfhsi_wake_down(cfhsi->ops); return; } wake_ack: @@ -865,7 +864,7 @@ wake_ack: /* Resume read operation. */ netdev_dbg(cfhsi->ndev, "%s: Start RX.\n", __func__); - res = cfhsi->dev->cfhsi_rx(cfhsi->rx_ptr, cfhsi->rx_len, cfhsi->dev); + res = cfhsi->ops->cfhsi_rx(cfhsi->rx_ptr, cfhsi->rx_len, cfhsi->ops); if (WARN_ON(res < 0)) netdev_err(cfhsi->ndev, "%s: RX err %d.\n", __func__, res); @@ -896,7 +895,7 @@ wake_ack: if (likely(len > 0)) { /* Set up new transfer. */ - res = cfhsi->dev->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->dev); + res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); if (WARN_ON(res < 0)) { netdev_err(cfhsi->ndev, "%s: TX error %d.\n", __func__, res); @@ -923,7 +922,7 @@ static void cfhsi_wake_down(struct work_struct *work) return; /* Deactivate wake line. */ - cfhsi->dev->cfhsi_wake_down(cfhsi->dev); + cfhsi->ops->cfhsi_wake_down(cfhsi->ops); /* Wait for acknowledge. */ ret = CFHSI_WAKE_TOUT; @@ -942,7 +941,7 @@ static void cfhsi_wake_down(struct work_struct *work) netdev_err(cfhsi->ndev, "%s: Timeout.\n", __func__); /* Check if we misssed the interrupt. */ - WARN_ON(cfhsi->dev->cfhsi_get_peer_wake(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_get_peer_wake(cfhsi->ops, &ca_wake)); if (!ca_wake) netdev_err(cfhsi->ndev, "%s: CA Wake missed !.\n", @@ -951,7 +950,7 @@ static void cfhsi_wake_down(struct work_struct *work) /* Check FIFO occupancy. */ while (retry) { - WARN_ON(cfhsi->dev->cfhsi_fifo_occupancy(cfhsi->dev, + WARN_ON(cfhsi->ops->cfhsi_fifo_occupancy(cfhsi->ops, &fifo_occupancy)); if (!fifo_occupancy) @@ -969,8 +968,7 @@ static void cfhsi_wake_down(struct work_struct *work) clear_bit(CFHSI_AWAKE, &cfhsi->bits); /* Cancel pending RX requests. */ - cfhsi->dev->cfhsi_rx_cancel(cfhsi->dev); - + cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); } static void cfhsi_out_of_sync(struct work_struct *work) @@ -984,11 +982,11 @@ static void cfhsi_out_of_sync(struct work_struct *work) rtnl_unlock(); } -static void cfhsi_wake_up_cb(struct cfhsi_drv *drv) +static void cfhsi_wake_up_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi = NULL; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -1003,11 +1001,11 @@ static void cfhsi_wake_up_cb(struct cfhsi_drv *drv) queue_work(cfhsi->wq, &cfhsi->wake_up_work); } -static void cfhsi_wake_down_cb(struct cfhsi_drv *drv) +static void cfhsi_wake_down_cb(struct cfhsi_cb_ops *cb_ops) { struct cfhsi *cfhsi = NULL; - cfhsi = container_of(drv, struct cfhsi, drv); + cfhsi = container_of(cb_ops, struct cfhsi, cb_ops); netdev_dbg(cfhsi->ndev, "%s.\n", __func__); @@ -1110,7 +1108,7 @@ static int cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) WARN_ON(!len); /* Set up new transfer. */ - res = cfhsi->dev->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->dev); + res = cfhsi->ops->cfhsi_tx(cfhsi->tx_buf, len, cfhsi->ops); if (WARN_ON(res < 0)) { netdev_err(cfhsi->ndev, "%s: TX error %d.\n", __func__, res); @@ -1125,19 +1123,19 @@ static int cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static const struct net_device_ops cfhsi_ops; +static const struct net_device_ops cfhsi_netdevops; static void cfhsi_setup(struct net_device *dev) { int i; struct cfhsi *cfhsi = netdev_priv(dev); dev->features = 0; - dev->netdev_ops = &cfhsi_ops; dev->type = ARPHRD_CAIF; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = CFHSI_MAX_CAIF_FRAME_SZ; dev->tx_queue_len = 0; dev->destructor = free_netdev; + dev->netdev_ops = &cfhsi_netdevops; for (i = 0; i < CFHSI_PRIO_LAST; ++i) skb_queue_head_init(&cfhsi->qhead[i]); cfhsi->cfdev.link_select = CAIF_LINK_HIGH_BANDW; @@ -1213,10 +1211,10 @@ static int cfhsi_open(struct net_device *ndev) spin_lock_init(&cfhsi->lock); /* Set up the driver. */ - cfhsi->drv.tx_done_cb = cfhsi_tx_done_cb; - cfhsi->drv.rx_done_cb = cfhsi_rx_done_cb; - cfhsi->drv.wake_up_cb = cfhsi_wake_up_cb; - cfhsi->drv.wake_down_cb = cfhsi_wake_down_cb; + cfhsi->cb_ops.tx_done_cb = cfhsi_tx_done_cb; + cfhsi->cb_ops.rx_done_cb = cfhsi_rx_done_cb; + cfhsi->cb_ops.wake_up_cb = cfhsi_wake_up_cb; + cfhsi->cb_ops.wake_down_cb = cfhsi_wake_down_cb; /* Initialize the work queues. */ INIT_WORK(&cfhsi->wake_up_work, cfhsi_wake_up); @@ -1230,7 +1228,7 @@ static int cfhsi_open(struct net_device *ndev) clear_bit(CFHSI_AWAKE, &cfhsi->bits); /* Create work thread. */ - cfhsi->wq = create_singlethread_workqueue(cfhsi->pdev->name); + cfhsi->wq = create_singlethread_workqueue(cfhsi->ndev->name); if (!cfhsi->wq) { netdev_err(cfhsi->ndev, "%s: Failed to create work queue.\n", __func__); @@ -1257,7 +1255,7 @@ static int cfhsi_open(struct net_device *ndev) cfhsi->aggregation_timer.function = cfhsi_aggregation_tout; /* Activate HSI interface. */ - res = cfhsi->dev->cfhsi_up(cfhsi->dev); + res = cfhsi->ops->cfhsi_up(cfhsi->ops); if (res) { netdev_err(cfhsi->ndev, "%s: can't activate HSI interface: %d.\n", @@ -1275,7 +1273,7 @@ static int cfhsi_open(struct net_device *ndev) return res; err_net_reg: - cfhsi->dev->cfhsi_down(cfhsi->dev); + cfhsi->ops->cfhsi_down(cfhsi->ops); err_activate: destroy_workqueue(cfhsi->wq); err_create_wq: @@ -1305,7 +1303,7 @@ static int cfhsi_close(struct net_device *ndev) del_timer_sync(&cfhsi->aggregation_timer); /* Cancel pending RX request (if any) */ - cfhsi->dev->cfhsi_rx_cancel(cfhsi->dev); + cfhsi->ops->cfhsi_rx_cancel(cfhsi->ops); /* Destroy workqueue */ destroy_workqueue(cfhsi->wq); @@ -1318,7 +1316,7 @@ static int cfhsi_close(struct net_device *ndev) cfhsi_abort_tx(cfhsi); /* Deactivate interface */ - cfhsi->dev->cfhsi_down(cfhsi->dev); + cfhsi->ops->cfhsi_down(cfhsi->ops); /* Free buffers. */ kfree(tx_buf); @@ -1335,7 +1333,7 @@ static void cfhsi_uninit(struct net_device *dev) list_del(&cfhsi->list); } -static const struct net_device_ops cfhsi_ops = { +static const struct net_device_ops cfhsi_netdevops = { .ndo_uninit = cfhsi_uninit, .ndo_open = cfhsi_open, .ndo_stop = cfhsi_close, @@ -1419,7 +1417,7 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct cfhsi *cfhsi = NULL; - struct platform_device *(*get_dev)(void); + struct cfhsi_ops *(*get_ops)(void); ASSERT_RTNL(); @@ -1427,36 +1425,31 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, cfhsi_netlink_parms(data, cfhsi); dev_net_set(cfhsi->ndev, src_net); - get_dev = symbol_get(cfhsi_get_device); - if (!get_dev) { - pr_err("%s: failed to get the cfhsi device symbol\n", __func__); + get_ops = symbol_get(cfhsi_get_ops); + if (!get_ops) { + pr_err("%s: failed to get the cfhsi_ops\n", __func__); return -ENODEV; } /* Assign the HSI device. */ - cfhsi->pdev = (*get_dev)(); - if (!cfhsi->pdev) { - pr_err("%s: failed to get the cfhsi device\n", __func__); + cfhsi->ops = (*get_ops)(); + if (!cfhsi->ops) { + pr_err("%s: failed to get the cfhsi_ops\n", __func__); goto err; } - /* Assign the HSI device. */ - cfhsi->dev = cfhsi->pdev->dev.platform_data; - /* Assign the driver to this HSI device. */ - cfhsi->dev->drv = &cfhsi->drv; - + cfhsi->ops->cb_ops = &cfhsi->cb_ops; if (register_netdevice(dev)) { - pr_warn("%s: device rtml registration failed\n", __func__); + pr_warn("%s: caif_hsi device registration failed\n", __func__); goto err; - } /* Add CAIF HSI device to list. */ list_add_tail(&cfhsi->list, &cfhsi_list); return 0; err: - symbol_put(cfhsi_get_device); + symbol_put(cfhsi_get_ops); return -ENODEV; } diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index a77b2bd7719..6dc7dc2674b 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -93,25 +93,25 @@ struct cfhsi_desc { #endif /* Structure implemented by the CAIF HSI driver. */ -struct cfhsi_drv { - void (*tx_done_cb) (struct cfhsi_drv *drv); - void (*rx_done_cb) (struct cfhsi_drv *drv); - void (*wake_up_cb) (struct cfhsi_drv *drv); - void (*wake_down_cb) (struct cfhsi_drv *drv); +struct cfhsi_cb_ops { + void (*tx_done_cb) (struct cfhsi_cb_ops *drv); + void (*rx_done_cb) (struct cfhsi_cb_ops *drv); + void (*wake_up_cb) (struct cfhsi_cb_ops *drv); + void (*wake_down_cb) (struct cfhsi_cb_ops *drv); }; /* Structure implemented by HSI device. */ -struct cfhsi_dev { - int (*cfhsi_up) (struct cfhsi_dev *dev); - int (*cfhsi_down) (struct cfhsi_dev *dev); - int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_dev *dev); - int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_dev *dev); - int (*cfhsi_wake_up) (struct cfhsi_dev *dev); - int (*cfhsi_wake_down) (struct cfhsi_dev *dev); - int (*cfhsi_get_peer_wake) (struct cfhsi_dev *dev, bool *status); - int (*cfhsi_fifo_occupancy)(struct cfhsi_dev *dev, size_t *occupancy); - int (*cfhsi_rx_cancel)(struct cfhsi_dev *dev); - struct cfhsi_drv *drv; +struct cfhsi_ops { + int (*cfhsi_up) (struct cfhsi_ops *dev); + int (*cfhsi_down) (struct cfhsi_ops *dev); + int (*cfhsi_tx) (u8 *ptr, int len, struct cfhsi_ops *dev); + int (*cfhsi_rx) (u8 *ptr, int len, struct cfhsi_ops *dev); + int (*cfhsi_wake_up) (struct cfhsi_ops *dev); + int (*cfhsi_wake_down) (struct cfhsi_ops *dev); + int (*cfhsi_get_peer_wake) (struct cfhsi_ops *dev, bool *status); + int (*cfhsi_fifo_occupancy) (struct cfhsi_ops *dev, size_t *occupancy); + int (*cfhsi_rx_cancel)(struct cfhsi_ops *dev); + struct cfhsi_cb_ops *cb_ops; }; /* Structure holds status of received CAIF frames processing */ @@ -138,8 +138,8 @@ struct cfhsi { struct net_device *ndev; struct platform_device *pdev; struct sk_buff_head qhead[CFHSI_PRIO_LAST]; - struct cfhsi_drv drv; - struct cfhsi_dev *dev; + struct cfhsi_cb_ops cb_ops; + struct cfhsi_ops *ops; int tx_state; struct cfhsi_rx_state rx_state; unsigned long inactivity_timeout; @@ -190,6 +190,6 @@ enum ifla_caif_hsi { __IFLA_CAIF_HSI_MAX }; -extern struct platform_device *cfhsi_get_device(void); +extern struct cfhsi_ops *cfhsi_get_ops(void); #endif /* CAIF_HSI_H_ */ -- cgit v1.2.3-70-g09d2 From 91fa0cbc0c47930f771bf5425109956cc99c6b0b Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Mon, 25 Jun 2012 07:49:43 +0000 Subject: caif-hsi: Remove use of module parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove use of module parameters on caif hsi device, as rtnl configuration parameters are already supported. All caif hsi configuration data is put in cfhsi_config, and default values in hsi_default_config. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 151 +++++++++++++++++++++----------------------- include/net/caif/caif_hsi.h | 14 ++-- 2 files changed, 82 insertions(+), 83 deletions(-) (limited to 'include/net') diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 0927c108bd1..1c2bd01e159 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -32,51 +32,39 @@ MODULE_DESCRIPTION("CAIF HSI driver"); #define PAD_POW2(x, pow) ((((x)&((pow)-1)) == 0) ? 0 :\ (((pow)-((x)&((pow)-1))))) -static int inactivity_timeout = 1000; -module_param(inactivity_timeout, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(inactivity_timeout, "Inactivity timeout on HSI, ms."); +static const struct cfhsi_config hsi_default_config = { -static int aggregation_timeout = 1; -module_param(aggregation_timeout, int, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(aggregation_timeout, "Aggregation timeout on HSI, ms."); + /* Inactivity timeout on HSI, ms */ + .inactivity_timeout = HZ, -/* - * HSI padding options. - * Warning: must be a base of 2 (& operation used) and can not be zero ! - */ -static int hsi_head_align = 4; -module_param(hsi_head_align, int, S_IRUGO); -MODULE_PARM_DESC(hsi_head_align, "HSI head alignment."); + /* Aggregation timeout (ms) of zero means no aggregation is done*/ + .aggregation_timeout = 1, -static int hsi_tail_align = 4; -module_param(hsi_tail_align, int, S_IRUGO); -MODULE_PARM_DESC(hsi_tail_align, "HSI tail alignment."); - -/* - * HSI link layer flowcontrol thresholds. - * Warning: A high threshold value migth increase throughput but it will at - * the same time prevent channel prioritization and increase the risk of - * flooding the modem. The high threshold should be above the low. - */ -static int hsi_high_threshold = 100; -module_param(hsi_high_threshold, int, S_IRUGO); -MODULE_PARM_DESC(hsi_high_threshold, "HSI high threshold (FLOW OFF)."); + /* + * HSI link layer flow-control thresholds. + * Threshold values for the HSI packet queue. Flow-control will be + * asserted when the number of packets exceeds q_high_mark. It will + * not be de-asserted before the number of packets drops below + * q_low_mark. + * Warning: A high threshold value might increase throughput but it + * will at the same time prevent channel prioritization and increase + * the risk of flooding the modem. The high threshold should be above + * the low. + */ + .q_high_mark = 100, + .q_low_mark = 50, -static int hsi_low_threshold = 50; -module_param(hsi_low_threshold, int, S_IRUGO); -MODULE_PARM_DESC(hsi_low_threshold, "HSI high threshold (FLOW ON)."); + /* + * HSI padding options. + * Warning: must be a base of 2 (& operation used) and can not be zero ! + */ + .head_align = 4, + .tail_align = 4, +}; #define ON 1 #define OFF 0 -/* - * Threshold values for the HSI packet queue. Flowcontrol will be asserted - * when the number of packets exceeds HIGH_WATER_MARK. It will not be - * de-asserted before the number of packets drops below LOW_WATER_MARK. - */ -#define LOW_WATER_MARK hsi_low_threshold -#define HIGH_WATER_MARK hsi_high_threshold - static LIST_HEAD(cfhsi_list); static void cfhsi_inactivity_tout(unsigned long arg) @@ -99,8 +87,8 @@ static void cfhsi_update_aggregation_stats(struct cfhsi *cfhsi, int hpad, tpad, len; info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), hsi_head_align); - tpad = PAD_POW2((skb->len + hpad), hsi_tail_align); + hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); + tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); len = skb->len + hpad + tpad; if (direction > 0) @@ -113,7 +101,7 @@ static bool cfhsi_can_send_aggregate(struct cfhsi *cfhsi) { int i; - if (cfhsi->aggregation_timeout == 0) + if (cfhsi->cfg.aggregation_timeout == 0) return true; for (i = 0; i < CFHSI_PRIO_BEBK; ++i) { @@ -169,7 +157,7 @@ static void cfhsi_abort_tx(struct cfhsi *cfhsi) cfhsi->tx_state = CFHSI_TX_STATE_IDLE; if (!test_bit(CFHSI_SHUTDOWN, &cfhsi->bits)) mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); } @@ -250,8 +238,8 @@ static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) /* Calculate needed head alignment and tail alignment. */ info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), hsi_head_align); - tpad = PAD_POW2((skb->len + hpad), hsi_tail_align); + hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); + tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); /* Check if frame still fits with added alignment. */ if ((skb->len + hpad + tpad) <= CFHSI_MAX_EMB_FRM_SZ) { @@ -292,8 +280,8 @@ static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi) /* Calculate needed head alignment and tail alignment. */ info = (struct caif_payload_info *)&skb->cb; - hpad = 1 + PAD_POW2((info->hdr_len + 1), hsi_head_align); - tpad = PAD_POW2((skb->len + hpad), hsi_tail_align); + hpad = 1 + PAD_POW2((info->hdr_len + 1), cfhsi->cfg.head_align); + tpad = PAD_POW2((skb->len + hpad), cfhsi->cfg.tail_align); /* Fill in CAIF frame length in descriptor. */ desc->cffrm_len[nfrms] = hpad + skb->len + tpad; @@ -364,7 +352,7 @@ static void cfhsi_start_tx(struct cfhsi *cfhsi) cfhsi->tx_state = CFHSI_TX_STATE_IDLE; /* Start inactivity timer. */ mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); break; } @@ -390,7 +378,7 @@ static void cfhsi_tx_done(struct cfhsi *cfhsi) */ spin_lock_bh(&cfhsi->lock); if (cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) <= cfhsi->q_low_mark && + cfhsi_tx_queue_len(cfhsi) <= cfhsi->cfg.q_low_mark && cfhsi->cfdev.flowctrl) { cfhsi->flow_off_sent = 0; @@ -402,7 +390,7 @@ static void cfhsi_tx_done(struct cfhsi *cfhsi) cfhsi_start_tx(cfhsi); } else { mod_timer(&cfhsi->aggregation_timer, - jiffies + cfhsi->aggregation_timeout); + jiffies + cfhsi->cfg.aggregation_timeout); spin_unlock_bh(&cfhsi->lock); } @@ -645,7 +633,7 @@ static void cfhsi_rx_done(struct cfhsi *cfhsi) /* Update inactivity timer if pending. */ spin_lock_bh(&cfhsi->lock); mod_timer_pending(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); if (cfhsi->rx_state.state == CFHSI_RX_STATE_DESC) { @@ -880,7 +868,7 @@ wake_ack: __func__); /* Start inactivity timer. */ mod_timer(&cfhsi->inactivity_timer, - jiffies + cfhsi->inactivity_timeout); + jiffies + cfhsi->cfg.inactivity_timeout); spin_unlock_bh(&cfhsi->lock); return; } @@ -1071,7 +1059,7 @@ static int cfhsi_xmit(struct sk_buff *skb, struct net_device *dev) /* Send flow off if number of packets is above high water mark. */ if (!cfhsi->flow_off_sent && - cfhsi_tx_queue_len(cfhsi) > cfhsi->q_high_mark && + cfhsi_tx_queue_len(cfhsi) > cfhsi->cfg.q_high_mark && cfhsi->cfdev.flowctrl) { cfhsi->flow_off_sent = 1; cfhsi->cfdev.flowctrl(cfhsi->ndev, OFF); @@ -1143,6 +1131,7 @@ static void cfhsi_setup(struct net_device *dev) cfhsi->cfdev.use_stx = false; cfhsi->cfdev.use_fcs = false; cfhsi->ndev = dev; + cfhsi->cfg = hsi_default_config; } static int cfhsi_open(struct net_device *ndev) @@ -1158,9 +1147,6 @@ static int cfhsi_open(struct net_device *ndev) /* Set flow info */ cfhsi->flow_off_sent = 0; - cfhsi->q_low_mark = LOW_WATER_MARK; - cfhsi->q_high_mark = HIGH_WATER_MARK; - /* * Allocate a TX buffer with the size of a HSI packet descriptors @@ -1188,20 +1174,8 @@ static int cfhsi_open(struct net_device *ndev) goto err_alloc_rx_flip; } - /* Pre-calculate inactivity timeout. */ - if (inactivity_timeout != -1) { - cfhsi->inactivity_timeout = - inactivity_timeout * HZ / 1000; - if (!cfhsi->inactivity_timeout) - cfhsi->inactivity_timeout = 1; - else if (cfhsi->inactivity_timeout > NEXT_TIMER_MAX_DELTA) - cfhsi->inactivity_timeout = NEXT_TIMER_MAX_DELTA; - } else { - cfhsi->inactivity_timeout = NEXT_TIMER_MAX_DELTA; - } - /* Initialize aggregation timeout */ - cfhsi->aggregation_timeout = aggregation_timeout; + cfhsi->cfg.aggregation_timeout = hsi_default_config.aggregation_timeout; /* Initialize recieve vaiables. */ cfhsi->rx_ptr = cfhsi->rx_buf; @@ -1350,24 +1324,39 @@ static void cfhsi_netlink_parms(struct nlattr *data[], struct cfhsi *cfhsi) } i = __IFLA_CAIF_HSI_INACTIVITY_TOUT; - if (data[i]) - inactivity_timeout = nla_get_u32(data[i]); + /* + * Inactivity timeout in millisecs. Lowest possible value is 1, + * and highest possible is NEXT_TIMER_MAX_DELTA. + */ + if (data[i]) { + u32 inactivity_timeout = nla_get_u32(data[i]); + /* Pre-calculate inactivity timeout. */ + cfhsi->cfg.inactivity_timeout = inactivity_timeout * HZ / 1000; + if (cfhsi->cfg.inactivity_timeout == 0) + cfhsi->cfg.inactivity_timeout = 1; + else if (cfhsi->cfg.inactivity_timeout > NEXT_TIMER_MAX_DELTA) + cfhsi->cfg.inactivity_timeout = NEXT_TIMER_MAX_DELTA; + } i = __IFLA_CAIF_HSI_AGGREGATION_TOUT; if (data[i]) - aggregation_timeout = nla_get_u32(data[i]); + cfhsi->cfg.aggregation_timeout = nla_get_u32(data[i]); i = __IFLA_CAIF_HSI_HEAD_ALIGN; if (data[i]) - hsi_head_align = nla_get_u32(data[i]); + cfhsi->cfg.head_align = nla_get_u32(data[i]); i = __IFLA_CAIF_HSI_TAIL_ALIGN; if (data[i]) - hsi_tail_align = nla_get_u32(data[i]); + cfhsi->cfg.tail_align = nla_get_u32(data[i]); i = __IFLA_CAIF_HSI_QHIGH_WATERMARK; if (data[i]) - hsi_high_threshold = nla_get_u32(data[i]); + cfhsi->cfg.q_high_mark = nla_get_u32(data[i]); + + i = __IFLA_CAIF_HSI_QLOW_WATERMARK; + if (data[i]) + cfhsi->cfg.q_low_mark = nla_get_u32(data[i]); } static int caif_hsi_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1398,16 +1387,20 @@ static size_t caif_hsi_get_size(const struct net_device *dev) static int caif_hsi_fill_info(struct sk_buff *skb, const struct net_device *dev) { + struct cfhsi *cfhsi = netdev_priv(dev); + if (nla_put_u32(skb, __IFLA_CAIF_HSI_INACTIVITY_TOUT, - inactivity_timeout) || + cfhsi->cfg.inactivity_timeout) || nla_put_u32(skb, __IFLA_CAIF_HSI_AGGREGATION_TOUT, - aggregation_timeout) || - nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, hsi_head_align) || - nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, hsi_tail_align) || + cfhsi->cfg.aggregation_timeout) || + nla_put_u32(skb, __IFLA_CAIF_HSI_HEAD_ALIGN, + cfhsi->cfg.head_align) || + nla_put_u32(skb, __IFLA_CAIF_HSI_TAIL_ALIGN, + cfhsi->cfg.tail_align) || nla_put_u32(skb, __IFLA_CAIF_HSI_QHIGH_WATERMARK, - hsi_high_threshold) || + cfhsi->cfg.q_high_mark) || nla_put_u32(skb, __IFLA_CAIF_HSI_QLOW_WATERMARK, - hsi_low_threshold)) + cfhsi->cfg.q_low_mark)) return -EMSGSIZE; return 0; diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h index 6dc7dc2674b..bcb9cc3ce98 100644 --- a/include/net/caif/caif_hsi.h +++ b/include/net/caif/caif_hsi.h @@ -132,6 +132,15 @@ enum { CFHSI_PRIO_LAST, }; +struct cfhsi_config { + u32 inactivity_timeout; + u32 aggregation_timeout; + u32 head_align; + u32 tail_align; + u32 q_high_mark; + u32 q_low_mark; +}; + /* Structure implemented by CAIF HSI drivers. */ struct cfhsi { struct caif_dev_common cfdev; @@ -142,7 +151,7 @@ struct cfhsi { struct cfhsi_ops *ops; int tx_state; struct cfhsi_rx_state rx_state; - unsigned long inactivity_timeout; + struct cfhsi_config cfg; int rx_len; u8 *rx_ptr; u8 *tx_buf; @@ -150,8 +159,6 @@ struct cfhsi { u8 *rx_flip_buf; spinlock_t lock; int flow_off_sent; - u32 q_low_mark; - u32 q_high_mark; struct list_head list; struct work_struct wake_up_work; struct work_struct wake_down_work; @@ -164,7 +171,6 @@ struct cfhsi { struct timer_list rx_slowpath_timer; /* TX aggregation */ - unsigned long aggregation_timeout; int aggregation_len; struct timer_list aggregation_timer; -- cgit v1.2.3-70-g09d2 From 88e920b4505105b710f8d4a535ec02c4078f8e2e Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 21 Jun 2012 11:09:54 -0700 Subject: nl80211: specify RSSI threshold in scheduled scan Support configuring an RSSI threshold in dBm (s32) when requesting scheduled scan, below which a BSS won't be reported by the cfg80211 driver. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 6 ++++++ include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 9 ++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index b7c3b737ddd..c0fc5d27733 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1542,6 +1542,9 @@ enum nl80211_attrs { #define NL80211_MIN_REMAIN_ON_CHANNEL_TIME 10 +/* default RSSI threshold for scan results if none specified. */ +#define NL80211_SCAN_RSSI_THOLD_OFF -300 + /** * enum nl80211_iftype - (virtual) interface types * @@ -1974,6 +1977,8 @@ enum nl80211_reg_rule_attr { * @__NL80211_SCHED_SCAN_MATCH_ATTR_INVALID: attribute number 0 is reserved * @NL80211_SCHED_SCAN_MATCH_ATTR_SSID: SSID to be used for matching, * only report BSS with matching SSID. + * @NL80211_SCHED_SCAN_MATCH_ATTR_RSSI: RSSI threshold (in dBm) for reporting a + * BSS in scan results. Filtering is turned off if not specified. * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -1982,6 +1987,7 @@ enum nl80211_sched_scan_match_attr { __NL80211_SCHED_SCAN_MATCH_ATTR_INVALID, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + NL80211_SCHED_SCAN_MATCH_ATTR_RSSI, /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f0163a10b8c..061c01957e5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1027,6 +1027,7 @@ struct cfg80211_match_set { * @wiphy: the wiphy this was for * @dev: the interface * @channels: channels to scan + * @rssi_thold: don't report scan results below this threshold (in s32 dBm) */ struct cfg80211_sched_scan_request { struct cfg80211_ssid *ssids; @@ -1037,6 +1038,7 @@ struct cfg80211_sched_scan_request { size_t ie_len; struct cfg80211_match_set *match_sets; int n_match_sets; + s32 rssi_thold; /* internal */ struct wiphy *wiphy; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 888fadc4d63..234ff3bbd10 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -340,6 +340,7 @@ static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, + [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, }; /* ifidx get helper */ @@ -4387,7 +4388,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { - struct nlattr *ssid; + struct nlattr *ssid, *rssi; nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, nla_data(attr), nla_len(attr), @@ -4403,6 +4404,12 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->match_sets[i].ssid.ssid_len = nla_len(ssid); } + rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; + if (rssi) + request->rssi_thold = nla_get_u32(rssi); + else + request->rssi_thold = + NL80211_SCAN_RSSI_THOLD_OFF; i++; } } -- cgit v1.2.3-70-g09d2 From 32bad7e30f113a8a5cebe4704bf6519ab4383e1b Mon Sep 17 00:00:00 2001 From: "alex.bluesman.smirnov@gmail.com" Date: Mon, 25 Jun 2012 23:24:48 +0000 Subject: mac802154: add wpan device-class support Every real 802.15.4 transceiver, which works with software MAC layer, can be classified as a wpan device in this stack. So the wpan device implementation provides missing link in datapath between the device drivers and the Linux network queue. According to the IEEE 802.15.4 standard each packet can be one of the following types: - beacon - MAC layer command - ACK - data This patch adds support for the data packet-type only, but this is enough to perform data transmission and receiving over radio. Signed-off-by: Alexander Smirnov Signed-off-by: David S. Miller --- include/linux/nl802154.h | 14 +- include/net/mac802154.h | 8 + net/mac802154/Makefile | 2 +- net/mac802154/ieee802154_dev.c | 4 + net/mac802154/mac802154.h | 4 + net/mac802154/mac_cmd.c | 4 + net/mac802154/rx.c | 1 + net/mac802154/wpan.c | 559 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 583 insertions(+), 13 deletions(-) create mode 100644 net/mac802154/wpan.c (limited to 'include/net') diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h index 5a3db3aa5f1..fd4f2d1cdf6 100644 --- a/include/linux/nl802154.h +++ b/include/linux/nl802154.h @@ -130,18 +130,8 @@ enum { enum { __IEEE802154_DEV_INVALID = -1, - /* TODO: - * Nowadays three device types supported by this stack at linux-zigbee - * project: WPAN = 0, MONITOR = 1 and SMAC = 2. - * - * Since this stack implementation exists many years, it's definitely - * bad idea to change the assigned values due to they are already used - * by third-party userspace software like: iz-tools, wireshark... - * - * Currently only monitor device is added and initialized by '1' for - * compatibility. - */ - IEEE802154_DEV_MONITOR = 1, + IEEE802154_DEV_WPAN, + IEEE802154_DEV_MONITOR, __IEEE802154_DEV_MAX, }; diff --git a/include/net/mac802154.h b/include/net/mac802154.h index c9f8ab5cc68..d0d11df9cba 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -21,6 +21,14 @@ #include +/* General MAC frame format: + * 2 bytes: Frame Control + * 1 byte: Sequence Number + * 20 bytes: Addressing fields + * 14 bytes: Auxiliary Security Header + */ +#define MAC802154_FRAME_HARD_HEADER_LEN (2 + 1 + 20 + 14) + /* The following flags are used to indicate changed address settings from * the stack to the hardware. */ diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile index ec1bd3fc127..57cf5d1a2e4 100644 --- a/net/mac802154/Makefile +++ b/net/mac802154/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_MAC802154) += mac802154.o -mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o +mac802154-objs := ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c index e3edfb0661b..e748aed290a 100644 --- a/net/mac802154/ieee802154_dev.c +++ b/net/mac802154/ieee802154_dev.c @@ -140,6 +140,10 @@ mac802154_add_iface(struct wpan_phy *phy, const char *name, int type) dev = alloc_netdev(sizeof(struct mac802154_sub_if_data), name, mac802154_monitor_setup); break; + case IEEE802154_DEV_WPAN: + dev = alloc_netdev(sizeof(struct mac802154_sub_if_data), + name, mac802154_wpan_setup); + break; default: dev = NULL; err = -EINVAL; diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index 789d9c948ae..c0efcf19a17 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -93,6 +93,7 @@ struct mac802154_sub_if_data { #define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */ extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced; +extern struct ieee802154_mlme_ops mac802154_mlme_wpan; int mac802154_slave_open(struct net_device *dev); int mac802154_slave_close(struct net_device *dev); @@ -100,6 +101,9 @@ int mac802154_slave_close(struct net_device *dev); void mac802154_monitors_rx(struct mac802154_priv *priv, struct sk_buff *skb); void mac802154_monitor_setup(struct net_device *dev); +void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb); +void mac802154_wpan_setup(struct net_device *dev); + netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb, u8 page, u8 chan); diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 7a5d0e052cd..db8341957bd 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -43,3 +43,7 @@ struct wpan_phy *mac802154_get_phy(const struct net_device *dev) struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = { .get_phy = mac802154_get_phy, }; + +struct ieee802154_mlme_ops mac802154_mlme_wpan = { + .get_phy = mac802154_get_phy, +}; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 4a7d76d4f8b..38548ec2098 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -77,6 +77,7 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi) } mac802154_monitors_rx(priv, skb); + mac802154_wpans_rx(priv, skb); out: dev_kfree_skb(skb); return; diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c new file mode 100644 index 00000000000..f30f6d4beea --- /dev/null +++ b/net/mac802154/wpan.c @@ -0,0 +1,559 @@ +/* + * Copyright 2007-2012 Siemens AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Written by: + * Dmitry Eremin-Solenikov + * Sergey Lapin + * Maxim Gorbachyov + * Alexander Smirnov + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "mac802154.h" + +static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val) +{ + if (unlikely(!pskb_may_pull(skb, 1))) + return -EINVAL; + + *val = skb->data[0]; + skb_pull(skb, 1); + + return 0; +} + +static inline int mac802154_fetch_skb_u16(struct sk_buff *skb, u16 *val) +{ + if (unlikely(!pskb_may_pull(skb, 2))) + return -EINVAL; + + *val = skb->data[0] | (skb->data[1] << 8); + skb_pull(skb, 2); + + return 0; +} + +static inline void mac802154_haddr_copy_swap(u8 *dest, const u8 *src) +{ + int i; + for (i = 0; i < IEEE802154_ADDR_LEN; i++) + dest[IEEE802154_ADDR_LEN - i - 1] = src[i]; +} + +static int +mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + struct sockaddr_ieee802154 *sa = + (struct sockaddr_ieee802154 *)&ifr->ifr_addr; + int err = -ENOIOCTLCMD; + + spin_lock_bh(&priv->mib_lock); + + switch (cmd) { + case SIOCGIFADDR: + if (priv->pan_id == IEEE802154_PANID_BROADCAST || + priv->short_addr == IEEE802154_ADDR_BROADCAST) { + err = -EADDRNOTAVAIL; + break; + } + + sa->family = AF_IEEE802154; + sa->addr.addr_type = IEEE802154_ADDR_SHORT; + sa->addr.pan_id = priv->pan_id; + sa->addr.short_addr = priv->short_addr; + + err = 0; + break; + case SIOCSIFADDR: + dev_warn(&dev->dev, + "Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n"); + if (sa->family != AF_IEEE802154 || + sa->addr.addr_type != IEEE802154_ADDR_SHORT || + sa->addr.pan_id == IEEE802154_PANID_BROADCAST || + sa->addr.short_addr == IEEE802154_ADDR_BROADCAST || + sa->addr.short_addr == IEEE802154_ADDR_UNDEF) { + err = -EINVAL; + break; + } + + priv->pan_id = sa->addr.pan_id; + priv->short_addr = sa->addr.short_addr; + + err = 0; + break; + } + + spin_unlock_bh(&priv->mib_lock); + return err; +} + +static int mac802154_wpan_mac_addr(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + + if (netif_running(dev)) + return -EBUSY; + + /* FIXME: validate addr */ + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + mac802154_dev_set_ieee_addr(dev); + return 0; +} + +static int mac802154_header_create(struct sk_buff *skb, + struct net_device *dev, + unsigned short type, + const void *_daddr, + const void *_saddr, + unsigned len) +{ + const struct ieee802154_addr *saddr = _saddr; + const struct ieee802154_addr *daddr = _daddr; + struct ieee802154_addr dev_addr; + struct mac802154_sub_if_data *priv = netdev_priv(dev); + int pos = 2; + u8 *head; + u16 fc; + + if (!daddr) + return -EINVAL; + + head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL); + if (head == NULL) + return -ENOMEM; + + head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ + fc = mac_cb_type(skb); + + if (!saddr) { + spin_lock_bh(&priv->mib_lock); + + if (priv->short_addr == IEEE802154_ADDR_BROADCAST || + priv->short_addr == IEEE802154_ADDR_UNDEF || + priv->pan_id == IEEE802154_PANID_BROADCAST) { + dev_addr.addr_type = IEEE802154_ADDR_LONG; + memcpy(dev_addr.hwaddr, dev->dev_addr, + IEEE802154_ADDR_LEN); + } else { + dev_addr.addr_type = IEEE802154_ADDR_SHORT; + dev_addr.short_addr = priv->short_addr; + } + + dev_addr.pan_id = priv->pan_id; + saddr = &dev_addr; + + spin_unlock_bh(&priv->mib_lock); + } + + if (daddr->addr_type != IEEE802154_ADDR_NONE) { + fc |= (daddr->addr_type << IEEE802154_FC_DAMODE_SHIFT); + + head[pos++] = daddr->pan_id & 0xff; + head[pos++] = daddr->pan_id >> 8; + + if (daddr->addr_type == IEEE802154_ADDR_SHORT) { + head[pos++] = daddr->short_addr & 0xff; + head[pos++] = daddr->short_addr >> 8; + } else { + mac802154_haddr_copy_swap(head + pos, daddr->hwaddr); + pos += IEEE802154_ADDR_LEN; + } + } + + if (saddr->addr_type != IEEE802154_ADDR_NONE) { + fc |= (saddr->addr_type << IEEE802154_FC_SAMODE_SHIFT); + + if ((saddr->pan_id == daddr->pan_id) && + (saddr->pan_id != IEEE802154_PANID_BROADCAST)) { + /* PANID compression/intra PAN */ + fc |= IEEE802154_FC_INTRA_PAN; + } else { + head[pos++] = saddr->pan_id & 0xff; + head[pos++] = saddr->pan_id >> 8; + } + + if (saddr->addr_type == IEEE802154_ADDR_SHORT) { + head[pos++] = saddr->short_addr & 0xff; + head[pos++] = saddr->short_addr >> 8; + } else { + mac802154_haddr_copy_swap(head + pos, saddr->hwaddr); + pos += IEEE802154_ADDR_LEN; + } + } + + head[0] = fc; + head[1] = fc >> 8; + + memcpy(skb_push(skb, pos), head, pos); + kfree(head); + + return pos; +} + +static int +mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const u8 *hdr = skb_mac_header(skb); + const u8 *tail = skb_tail_pointer(skb); + struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr; + u16 fc; + int da_type; + + if (hdr + 3 > tail) + goto malformed; + + fc = hdr[0] | (hdr[1] << 8); + + hdr += 3; + + da_type = IEEE802154_FC_DAMODE(fc); + addr->addr_type = IEEE802154_FC_SAMODE(fc); + + switch (da_type) { + case IEEE802154_ADDR_NONE: + if (fc & IEEE802154_FC_INTRA_PAN) + goto malformed; + break; + case IEEE802154_ADDR_LONG: + if (fc & IEEE802154_FC_INTRA_PAN) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + IEEE802154_ADDR_LEN > tail) + goto malformed; + + hdr += IEEE802154_ADDR_LEN; + break; + case IEEE802154_ADDR_SHORT: + if (fc & IEEE802154_FC_INTRA_PAN) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + 2 > tail) + goto malformed; + + hdr += 2; + break; + default: + goto malformed; + + } + + switch (addr->addr_type) { + case IEEE802154_ADDR_NONE: + break; + case IEEE802154_ADDR_LONG: + if (!(fc & IEEE802154_FC_INTRA_PAN)) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + IEEE802154_ADDR_LEN > tail) + goto malformed; + + mac802154_haddr_copy_swap(addr->hwaddr, hdr); + hdr += IEEE802154_ADDR_LEN; + break; + case IEEE802154_ADDR_SHORT: + if (!(fc & IEEE802154_FC_INTRA_PAN)) { + if (hdr + 2 > tail) + goto malformed; + addr->pan_id = hdr[0] | (hdr[1] << 8); + hdr += 2; + } + + if (hdr + 2 > tail) + goto malformed; + + addr->short_addr = hdr[0] | (hdr[1] << 8); + hdr += 2; + break; + default: + goto malformed; + } + + return sizeof(struct ieee802154_addr); + +malformed: + pr_debug("malformed packet\n"); + return 0; +} + +static netdev_tx_t +mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct mac802154_sub_if_data *priv; + u8 chan, page; + + priv = netdev_priv(dev); + + spin_lock_bh(&priv->mib_lock); + chan = priv->chan; + page = priv->page; + spin_unlock_bh(&priv->mib_lock); + + if (chan == MAC802154_CHAN_NONE || + page >= WPAN_NUM_PAGES || + chan >= WPAN_NUM_CHANNELS) + return NETDEV_TX_OK; + + skb->skb_iif = dev->ifindex; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + return mac802154_tx(priv->hw, skb, page, chan); +} + +static struct header_ops mac802154_header_ops = { + .create = mac802154_header_create, + .parse = mac802154_header_parse, +}; + +static const struct net_device_ops mac802154_wpan_ops = { + .ndo_open = mac802154_slave_open, + .ndo_stop = mac802154_slave_close, + .ndo_start_xmit = mac802154_wpan_xmit, + .ndo_do_ioctl = mac802154_wpan_ioctl, + .ndo_set_mac_address = mac802154_wpan_mac_addr, +}; + +void mac802154_wpan_setup(struct net_device *dev) +{ + struct mac802154_sub_if_data *priv; + + dev->addr_len = IEEE802154_ADDR_LEN; + memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN); + + dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN; + dev->header_ops = &mac802154_header_ops; + dev->needed_tailroom = 2; /* FCS */ + dev->mtu = IEEE802154_MTU; + dev->tx_queue_len = 10; + dev->type = ARPHRD_IEEE802154; + dev->flags = IFF_NOARP | IFF_BROADCAST; + dev->watchdog_timeo = 0; + + dev->destructor = free_netdev; + dev->netdev_ops = &mac802154_wpan_ops; + dev->ml_priv = &mac802154_mlme_wpan; + + priv = netdev_priv(dev); + priv->type = IEEE802154_DEV_WPAN; + + priv->chan = MAC802154_CHAN_NONE; + priv->page = 0; + + spin_lock_init(&priv->mib_lock); + + get_random_bytes(&priv->bsn, 1); + get_random_bytes(&priv->dsn, 1); + + priv->pan_id = IEEE802154_PANID_BROADCAST; + priv->short_addr = IEEE802154_ADDR_BROADCAST; +} + +static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb) +{ + return netif_rx(skb); +} + +static int +mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb) +{ + pr_debug("getting packet via slave interface %s\n", sdata->dev->name); + + spin_lock_bh(&sdata->mib_lock); + + switch (mac_cb(skb)->da.addr_type) { + case IEEE802154_ADDR_NONE: + if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) + /* FIXME: check if we are PAN coordinator */ + skb->pkt_type = PACKET_OTHERHOST; + else + /* ACK comes with both addresses empty */ + skb->pkt_type = PACKET_HOST; + break; + case IEEE802154_ADDR_LONG: + if (mac_cb(skb)->da.pan_id != sdata->pan_id && + mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST) + skb->pkt_type = PACKET_OTHERHOST; + else if (!memcmp(mac_cb(skb)->da.hwaddr, sdata->dev->dev_addr, + IEEE802154_ADDR_LEN)) + skb->pkt_type = PACKET_HOST; + else + skb->pkt_type = PACKET_OTHERHOST; + break; + case IEEE802154_ADDR_SHORT: + if (mac_cb(skb)->da.pan_id != sdata->pan_id && + mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST) + skb->pkt_type = PACKET_OTHERHOST; + else if (mac_cb(skb)->da.short_addr == sdata->short_addr) + skb->pkt_type = PACKET_HOST; + else if (mac_cb(skb)->da.short_addr == + IEEE802154_ADDR_BROADCAST) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_OTHERHOST; + break; + default: + break; + } + + spin_unlock_bh(&sdata->mib_lock); + + skb->dev = sdata->dev; + + sdata->dev->stats.rx_packets++; + sdata->dev->stats.rx_bytes += skb->len; + + switch (mac_cb_type(skb)) { + case IEEE802154_FC_TYPE_DATA: + return mac802154_process_data(sdata->dev, skb); + default: + pr_warning("ieee802154: bad frame received (type = %d)\n", + mac_cb_type(skb)); + kfree_skb(skb); + return NET_RX_DROP; + } +} + +static int mac802154_parse_frame_start(struct sk_buff *skb) +{ + u8 *head = skb->data; + u16 fc; + + if (mac802154_fetch_skb_u16(skb, &fc) || + mac802154_fetch_skb_u8(skb, &(mac_cb(skb)->seq))) + goto err; + + pr_debug("fc: %04x dsn: %02x\n", fc, head[2]); + + mac_cb(skb)->flags = IEEE802154_FC_TYPE(fc); + mac_cb(skb)->sa.addr_type = IEEE802154_FC_SAMODE(fc); + mac_cb(skb)->da.addr_type = IEEE802154_FC_DAMODE(fc); + + if (fc & IEEE802154_FC_INTRA_PAN) + mac_cb(skb)->flags |= MAC_CB_FLAG_INTRAPAN; + + if (mac_cb(skb)->da.addr_type != IEEE802154_ADDR_NONE) { + if (mac802154_fetch_skb_u16(skb, &(mac_cb(skb)->da.pan_id))) + goto err; + + /* source PAN id compression */ + if (mac_cb_is_intrapan(skb)) + mac_cb(skb)->sa.pan_id = mac_cb(skb)->da.pan_id; + + pr_debug("dest PAN addr: %04x\n", mac_cb(skb)->da.pan_id); + + if (mac_cb(skb)->da.addr_type == IEEE802154_ADDR_SHORT) { + u16 *da = &(mac_cb(skb)->da.short_addr); + + if (mac802154_fetch_skb_u16(skb, da)) + goto err; + + pr_debug("destination address is short: %04x\n", + mac_cb(skb)->da.short_addr); + } else { + if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN)) + goto err; + + mac802154_haddr_copy_swap(mac_cb(skb)->da.hwaddr, + skb->data); + skb_pull(skb, IEEE802154_ADDR_LEN); + + pr_debug("destination address is hardware\n"); + } + } + + if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) { + /* non PAN-compression, fetch source address id */ + if (!(mac_cb_is_intrapan(skb))) { + u16 *sa_pan = &(mac_cb(skb)->sa.pan_id); + + if (mac802154_fetch_skb_u16(skb, sa_pan)) + goto err; + } + + pr_debug("source PAN addr: %04x\n", mac_cb(skb)->da.pan_id); + + if (mac_cb(skb)->sa.addr_type == IEEE802154_ADDR_SHORT) { + u16 *sa = &(mac_cb(skb)->sa.short_addr); + + if (mac802154_fetch_skb_u16(skb, sa)) + goto err; + + pr_debug("source address is short: %04x\n", + mac_cb(skb)->sa.short_addr); + } else { + if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN)) + goto err; + + mac802154_haddr_copy_swap(mac_cb(skb)->sa.hwaddr, + skb->data); + skb_pull(skb, IEEE802154_ADDR_LEN); + + pr_debug("source address is hardware\n"); + } + } + + return 0; +err: + return -EINVAL; +} + +void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb) +{ + int ret; + struct sk_buff *sskb; + struct mac802154_sub_if_data *sdata; + + ret = mac802154_parse_frame_start(skb); + if (ret) { + pr_debug("got invalid frame\n"); + return; + } + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &priv->slaves, list) { + if (sdata->type != IEEE802154_DEV_WPAN) + continue; + + sskb = skb_clone(skb, GFP_ATOMIC); + if (sskb) + mac802154_subif_frame(sdata, sskb); + } + rcu_read_unlock(); +} -- cgit v1.2.3-70-g09d2 From dfb89c56add259b72a9c68d6b2846c1cd8c4e4b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Jun 2012 09:23:48 +0200 Subject: cfg80211: don't allow WoWLAN support without CONFIG_PM When CONFIG_PM is disabled, no device can possibly support WoWLAN since it can't go to sleep to start with. Due to this, mac80211 had even rejected the hardware registration. By making all the code and data for WoWLAN depend on CONFIG_PM we can promote this runtime error to a compile-time error. Add #ifdef around all WoWLAN code to remove it in systems that don't need it as they never suspend. Cc: Kalle Valo Acked-by: Luciano Coelho Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 2 ++ drivers/net/wireless/ti/wlcore/main.c | 2 ++ include/net/cfg80211.h | 2 ++ net/mac80211/main.c | 7 +++---- net/wireless/core.c | 4 ++++ net/wireless/nl80211.c | 6 ++++++ 6 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index f27e9732951..b8fce0d4d72 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -3487,6 +3487,7 @@ int ath6kl_cfg80211_init(struct ath6kl *ar) wiphy->cipher_suites = cipher_suites; wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); +#ifdef CONFIG_PM wiphy->wowlan.flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT | WIPHY_WOWLAN_GTK_REKEY_FAILURE | @@ -3496,6 +3497,7 @@ int ath6kl_cfg80211_init(struct ath6kl *ar) wiphy->wowlan.n_patterns = WOW_MAX_FILTERS_PER_LIST; wiphy->wowlan.pattern_min_len = 1; wiphy->wowlan.pattern_max_len = WOW_PATTERN_SIZE; +#endif wiphy->max_sched_scan_ssids = MAX_PROBED_SSID_INDEX; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 1156e3f578c..747a997bc60 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5321,6 +5321,7 @@ int __devinit wlcore_probe(struct wl1271 *wl, struct platform_device *pdev) goto out_free_hw; } +#ifdef CONFIG_PM ret = enable_irq_wake(wl->irq); if (!ret) { wl->irq_wake_enabled = true; @@ -5334,6 +5335,7 @@ int __devinit wlcore_probe(struct wl1271 *wl, struct platform_device *pdev) WL1271_RX_FILTER_MAX_PATTERN_SIZE; } } +#endif disable_irq(wl->irq); ret = wl12xx_get_hw_info(wl); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 061c01957e5..7d3cd3ce9a2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2153,7 +2153,9 @@ struct wiphy { char fw_version[ETHTOOL_BUSINFO_LEN]; u32 hw_version; +#ifdef CONFIG_PM struct wiphy_wowlan_support wowlan; +#endif u16 max_remain_on_channel_duration; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0b040fb7367..aded0018f6f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -706,12 +706,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.offchannel_tx_hw_queue >= local->hw.queues)) return -EINVAL; - if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) #ifdef CONFIG_PM - && (!local->ops->suspend || !local->ops->resume) -#endif - ) + if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) && + (!local->ops->suspend || !local->ops->resume)) return -EINVAL; +#endif if ((hw->flags & IEEE80211_HW_SCAN_WHILE_IDLE) && !local->ops->hw_scan) return -EINVAL; diff --git a/net/wireless/core.c b/net/wireless/core.c index 907f62c80e2..ddd32afa5f0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -421,9 +421,11 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; +#ifdef CONFIG_PM if (WARN_ON((wiphy->wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) return -EINVAL; +#endif if (WARN_ON(wiphy->ap_sme_capa && !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME))) @@ -500,12 +502,14 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; } +#ifdef CONFIG_PM if (rdev->wiphy.wowlan.n_patterns) { if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len || rdev->wiphy.wowlan.pattern_min_len > rdev->wiphy.wowlan.pattern_max_len)) return -EINVAL; } +#endif /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 234ff3bbd10..067c9fe02a7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1112,6 +1112,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_ifs); } +#ifdef CONFIG_PM if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) { struct nlattr *nl_wowlan; @@ -1152,6 +1153,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_wowlan); } +#endif if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, dev->wiphy.software_iftypes)) @@ -6276,6 +6278,7 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) return cfg80211_leave_mesh(rdev, dev); } +#ifdef CONFIG_PM static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6504,6 +6507,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) kfree(new_triggers.patterns); return err; } +#endif static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) { @@ -7158,6 +7162,7 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, +#ifdef CONFIG_PM { .cmd = NL80211_CMD_GET_WOWLAN, .doit = nl80211_get_wowlan, @@ -7174,6 +7179,7 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, +#endif { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, .doit = nl80211_set_rekey_data, -- cgit v1.2.3-70-g09d2 From f1caad274515ffd9841ac57ce9a7b5fc35bbf689 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 21 Jun 2012 04:36:39 +0000 Subject: netfilter: nf_conntrack: prepare l4proto->init_net cleanup l4proto->init contain quite redundant code. We can simplify this by adding a new parameter l3proto. This patch prepares that code simplification. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- net/netfilter/nf_conntrack_proto.c | 5 +++-- net/netfilter/nf_conntrack_proto_dccp.c | 2 +- net/netfilter/nf_conntrack_proto_generic.c | 2 +- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 4 ++-- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- 11 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 81c52b5205f..5dd60f2d02a 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -97,7 +97,7 @@ struct nf_conntrack_l4proto { #endif int *net_id; /* Init l4proto pernet data */ - int (*init_net)(struct net *net); + int (*init_net)(struct net *net, u_int16_t proto); /* Protocol name */ const char *name; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 041923cb67a..76f7a2f657f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -337,7 +337,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int icmp_init_net(struct net *net) +static int icmp_init_net(struct net *net, u_int16_t proto) { struct nf_icmp_net *in = icmp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)in; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 63ed0121836..807ae09df0c 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -333,7 +333,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int icmpv6_init_net(struct net *net) +static int icmpv6_init_net(struct net *net, u_int16_t proto) { struct nf_icmp_net *in = icmpv6_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)in; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9bd88aa3c74..6f4b6f3deee 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -461,7 +461,7 @@ int nf_conntrack_l4proto_register(struct net *net, int ret = 0; if (l4proto->init_net) { - ret = l4proto->init_net(net); + ret = l4proto->init_net(net, l4proto->l3proto); if (ret < 0) return ret; } @@ -515,7 +515,8 @@ int nf_conntrack_proto_init(struct net *net) { unsigned int i; int err; - err = nf_conntrack_l4proto_generic.init_net(net); + err = nf_conntrack_l4proto_generic.init_net(net, + nf_conntrack_l4proto_generic.l3proto); if (err < 0) return err; err = nf_ct_l4proto_register_sysctl(net, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index c33f76af913..52da8f0293b 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -815,7 +815,7 @@ static struct ctl_table dccp_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int dccp_init_net(struct net *net) +static int dccp_init_net(struct net *net, u_int16_t proto) { struct dccp_net *dn = dccp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)dn; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index bb0e74fe0fa..d1ed7b44e07 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -135,7 +135,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -static int generic_init_net(struct net *net) +static int generic_init_net(struct net *net, u_int16_t proto) { struct nf_generic_net *gn = generic_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)gn; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 5cac41c2fa0..b09b7af7f6f 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -348,7 +348,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { }; #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ -static int gre_init_net(struct net *net) +static int gre_init_net(struct net *net, u_int16_t proto) { struct netns_proto_gre *net_gre = gre_pernet(net); int i; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 8fb0582ad39..1e7836cead7 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -767,7 +767,7 @@ static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) return 0; } -static int sctpv4_init_net(struct net *net) +static int sctpv4_init_net(struct net *net, u_int16_t proto) { int ret; struct sctp_net *sn = sctp_pernet(net); @@ -793,7 +793,7 @@ static int sctpv4_init_net(struct net *net) return ret; } -static int sctpv6_init_net(struct net *net) +static int sctpv6_init_net(struct net *net, u_int16_t proto) { struct sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)sn; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 99caa130447..6db9d3c4482 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1593,7 +1593,7 @@ static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) return 0; } -static int tcpv4_init_net(struct net *net) +static int tcpv4_init_net(struct net *net, u_int16_t proto) { int i; int ret = 0; @@ -1631,7 +1631,7 @@ static int tcpv4_init_net(struct net *net) return ret; } -static int tcpv6_init_net(struct net *net) +static int tcpv6_init_net(struct net *net, u_int16_t proto) { int i; struct nf_tcp_net *tn = tcp_pernet(net); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index a83cf93545c..2b978e6fd1c 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -283,7 +283,7 @@ static void udp_init_net_data(struct nf_udp_net *un) } } -static int udpv4_init_net(struct net *net) +static int udpv4_init_net(struct net *net, u_int16_t proto) { int ret; struct nf_udp_net *un = udp_pernet(net); @@ -307,7 +307,7 @@ static int udpv4_init_net(struct net *net) return ret; } -static int udpv6_init_net(struct net *net) +static int udpv6_init_net(struct net *net, u_int16_t proto) { struct nf_udp_net *un = udp_pernet(net); struct nf_proto_net *pn = (struct nf_proto_net *)un; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index b32e700f8dd..d33e5115803 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -234,7 +234,7 @@ static struct ctl_table udplite_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -static int udplite_init_net(struct net *net) +static int udplite_init_net(struct net *net, u_int16_t proto) { int i; struct udplite_net *un = udplite_pernet(net); -- cgit v1.2.3-70-g09d2 From f28997e27a03abc679f13824a0574b09112eea37 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 21 Jun 2012 04:36:40 +0000 Subject: netfilter: nf_conntrack: add nf_ct_kfree_compat_sysctl_table This patch is a cleanup. It adds nf_ct_kfree_compat_sysctl_table to release l4proto's compat sysctl table and set the compat sysctl table point to NULL. This new function will be used by follow-up patches. Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 8 ++++++++ net/netfilter/nf_conntrack_proto.c | 3 +-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 5dd60f2d02a..08bb571b7ab 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -124,6 +124,14 @@ extern int nf_conntrack_l4proto_register(struct net *net, extern void nf_conntrack_l4proto_unregister(struct net *net, struct nf_conntrack_l4proto *proto); +static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) +{ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; +#endif +} + /* Generic netlink helpers */ extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 6f4b6f3deee..9d6b6ab193a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -361,8 +361,7 @@ int nf_ct_l4proto_register_sysctl(struct net *net, if (err == 0) goto out; - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; + nf_ct_kfree_compat_sysctl_table(pn); nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, &pn->users); -- cgit v1.2.3-70-g09d2 From c074da2810c118b3812f32d6754bd9ead2f169e7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 26 Jun 2012 23:14:15 +0000 Subject: ipv4: tcp: dont cache unconfirmed intput dst DDOS synflood attacks hit badly IP route cache. On typical machines, this cache is allowed to hold up to 8 Millions dst entries, 256 bytes for each, for a total of 2GB of memory. rt_garbage_collect() triggers and tries to cleanup things. Eventually route cache is disabled but machine is under fire and might OOM and crash. This patch exploits the new TCP early demux, to set a nocache boolean in case incoming TCP frame is for a not yet ESTABLISHED or TIMEWAIT socket. This 'nocache' boolean is then used in case dst entry is not found in route cache, to create an unhashed dst entry (DST_NOCACHE) SYN-cookie-ACK sent use a similar mechanism (ipv4: tcp: dont cache output dst for syncookies), so after this patch, a machine is able to absorb a DDOS synflood attack without polluting its IP route cache. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- include/net/route.h | 8 ++++---- include/net/tcp.h | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_input.c | 5 +++-- net/ipv4/route.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 4 +++- net/ipv4/xfrm4_input.c | 2 +- 9 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 967b926cbfb..7cfc8f76914 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); + int (*early_demux)(struct sk_buff *skb, bool *nocache); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/route.h b/include/net/route.h index 47eb25ac1f7..6361f933577 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,18 +201,18 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 } extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool noref); + u8 tos, struct net_device *devin, bool noref, bool nocache); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) { - return ip_route_input_common(skb, dst, src, tos, devin, false); + return ip_route_input_common(skb, dst, src, tos, devin, false, false); } static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) + u8 tos, struct net_device *devin, bool nocache) { - return ip_route_input_common(skb, dst, src, tos, devin, true); + return ip_route_input_common(skb, dst, src, tos, devin, true, nocache); } extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, diff --git a/include/net/tcp.h b/include/net/tcp.h index 6660ffc4963..917ed2e55e8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,7 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_v4_early_demux(struct sk_buff *skb); +extern int tcp_v4_early_demux(struct sk_buff *skb, bool *nocache); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2e560f0c757..6a979594436 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev, false) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409..978d55f256e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -259,7 +259,7 @@ static void ip_expire(unsigned long arg) skb_dst_drop(head); iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + iph->tos, head->dev, false); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2a39204de5b..7be54c8dcbe 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -326,6 +326,7 @@ static int ip_rcv_finish(struct sk_buff *skb) */ if (skb_dst(skb) == NULL) { int err = -ENOENT; + bool nocache = false; if (sysctl_ip_early_demux) { const struct net_protocol *ipprot; @@ -334,13 +335,13 @@ static int ip_rcv_finish(struct sk_buff *skb) rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb); + err = ipprot->early_demux(skb, &nocache); rcu_read_unlock(); } if (err) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + iph->tos, skb->dev, nocache); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 81533e3a23d..fdc7900f9d7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2214,7 +2214,7 @@ static int ip_mkroute_input(struct sk_buff *skb, */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) + u8 tos, struct net_device *dev, bool nocache) { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -2353,6 +2353,8 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } + if (nocache) + rth->dst.flags |= DST_NOCACHE; hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; @@ -2395,7 +2397,7 @@ martian_source_keep_err: } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool noref) + u8 tos, struct net_device *dev, bool noref, bool nocache) { struct rtable *rth; unsigned int hash; @@ -2471,7 +2473,7 @@ skip_cache: rcu_read_unlock(); return -EINVAL; } - res = ip_route_input_slow(skb, daddr, saddr, tos, dev); + res = ip_route_input_slow(skb, daddr, saddr, tos, dev, nocache); rcu_read_unlock(); return res; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1781dc650b9..33aabd4fc20 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,7 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb) +int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; @@ -1719,6 +1719,8 @@ int tcp_v4_early_demux(struct sk_buff *skb) } } } + } else { + *no_dst_cache = true; } out_err: diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216d..eee636b191b 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -28,7 +28,7 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + iph->tos, skb->dev, false)) goto drop; } return dst_input(skb); -- cgit v1.2.3-70-g09d2 From c10237e077cef50e925f052e49f3b4fead9d71f9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 17:05:06 -0700 Subject: Revert "ipv4: tcp: dont cache unconfirmed intput dst" This reverts commit c074da2810c118b3812f32d6754bd9ead2f169e7. This change has several unwanted side effects: 1) Sockets will cache the DST_NOCACHE route in sk->sk_rx_dst and we'll thus never create a real cached route. 2) All TCP traffic will use DST_NOCACHE and never use the routing cache at all. Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- include/net/route.h | 8 ++++---- include/net/tcp.h | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_input.c | 5 ++--- net/ipv4/route.c | 8 +++----- net/ipv4/tcp_ipv4.c | 4 +--- net/ipv4/xfrm4_input.c | 2 +- 9 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 7cfc8f76914..967b926cbfb 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb, bool *nocache); + int (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/route.h b/include/net/route.h index 6361f933577..47eb25ac1f7 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -201,18 +201,18 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 } extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool noref, bool nocache); + u8 tos, struct net_device *devin, bool noref); static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, u8 tos, struct net_device *devin) { - return ip_route_input_common(skb, dst, src, tos, devin, false, false); + return ip_route_input_common(skb, dst, src, tos, devin, false); } static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool nocache) + u8 tos, struct net_device *devin) { - return ip_route_input_common(skb, dst, src, tos, devin, true, nocache); + return ip_route_input_common(skb, dst, src, tos, devin, true); } extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, diff --git a/include/net/tcp.h b/include/net/tcp.h index 917ed2e55e8..6660ffc4963 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,7 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_v4_early_demux(struct sk_buff *skb, bool *nocache); +extern int tcp_v4_early_demux(struct sk_buff *skb); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 6a979594436..2e560f0c757 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input_noref(skb, tip, sip, 0, dev, false) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 978d55f256e..8d07c973409 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -259,7 +259,7 @@ static void ip_expire(unsigned long arg) skb_dst_drop(head); iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev, false); + iph->tos, head->dev); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 7be54c8dcbe..2a39204de5b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -326,7 +326,6 @@ static int ip_rcv_finish(struct sk_buff *skb) */ if (skb_dst(skb) == NULL) { int err = -ENOENT; - bool nocache = false; if (sysctl_ip_early_demux) { const struct net_protocol *ipprot; @@ -335,13 +334,13 @@ static int ip_rcv_finish(struct sk_buff *skb) rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb, &nocache); + err = ipprot->early_demux(skb); rcu_read_unlock(); } if (err) { err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev, nocache); + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fdc7900f9d7..81533e3a23d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2214,7 +2214,7 @@ static int ip_mkroute_input(struct sk_buff *skb, */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool nocache) + u8 tos, struct net_device *dev) { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -2353,8 +2353,6 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (nocache) - rth->dst.flags |= DST_NOCACHE; hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; @@ -2397,7 +2395,7 @@ martian_source_keep_err: } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool noref, bool nocache) + u8 tos, struct net_device *dev, bool noref) { struct rtable *rth; unsigned int hash; @@ -2473,7 +2471,7 @@ skip_cache: rcu_read_unlock(); return -EINVAL; } - res = ip_route_input_slow(skb, daddr, saddr, tos, dev, nocache); + res = ip_route_input_slow(skb, daddr, saddr, tos, dev); rcu_read_unlock(); return res; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 33aabd4fc20..1781dc650b9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,7 +1673,7 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache) +int tcp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; @@ -1719,8 +1719,6 @@ int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache) } } } - } else { - *no_dst_cache = true; } out_err: diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index eee636b191b..06814b6216d 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -28,7 +28,7 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev, false)) + iph->tos, skb->dev)) goto drop; } return dst_input(skb); -- cgit v1.2.3-70-g09d2 From 1d1e34ddd48d27def2f324c1e3be16d460b16436 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 21:57:03 -0700 Subject: xfrm_user: Propagate netlink error codes properly. Instead of using a fixed value of "-1" or "-EMSGSIZE", propagate what the nla_*() interfaces actually return. Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 +- net/xfrm/xfrm_user.c | 394 ++++++++++++++++++++++++++------------------------- 2 files changed, 208 insertions(+), 196 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e0a55df5bde..17acbc92476 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1682,13 +1682,11 @@ static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m) { - if ((m->m | m->v) && - nla_put(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m)) - goto nla_put_failure; - return 0; + int ret = 0; -nla_put_failure: - return -1; + if (m->m | m->v) + ret = nla_put(skb, XFRMA_MARK, sizeof(struct xfrm_mark), m); + return ret; } #endif /* _NET_XFRM_H */ diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 44293b3fd6a..540762726aa 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -754,58 +754,67 @@ static int copy_to_user_state_extra(struct xfrm_state *x, struct xfrm_usersa_info *p, struct sk_buff *skb) { - copy_to_user_state(x, p); - - if (x->coaddr && - nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr)) - goto nla_put_failure; - - if (x->lastused && - nla_put_u64(skb, XFRMA_LASTUSED, x->lastused)) - goto nla_put_failure; - - if (x->aead && - nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead)) - goto nla_put_failure; - - if (x->aalg && - (copy_to_user_auth(x->aalg, skb) || - nla_put(skb, XFRMA_ALG_AUTH_TRUNC, - xfrm_alg_auth_len(x->aalg), x->aalg))) - goto nla_put_failure; - - if (x->ealg && - nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg)) - goto nla_put_failure; - - if (x->calg && - nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg)) - goto nla_put_failure; - - if (x->encap && - nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap)) - goto nla_put_failure; + int ret = 0; - if (x->tfcpad && - nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad)) - goto nla_put_failure; - - if (xfrm_mark_put(skb, &x->mark)) - goto nla_put_failure; - - if (x->replay_esn && - nla_put(skb, XFRMA_REPLAY_ESN_VAL, - xfrm_replay_state_esn_len(x->replay_esn), - x->replay_esn)) - goto nla_put_failure; - - if (x->security && copy_sec_ctx(x->security, skb)) - goto nla_put_failure; - - return 0; + copy_to_user_state(x, p); -nla_put_failure: - return -EMSGSIZE; + if (x->coaddr) { + ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + if (ret) + goto out; + } + if (x->lastused) { + ret = nla_put_u64(skb, XFRMA_LASTUSED, x->lastused); + if (ret) + goto out; + } + if (x->aead) { + ret = nla_put(skb, XFRMA_ALG_AEAD, aead_len(x->aead), x->aead); + if (ret) + goto out; + } + if (x->aalg) { + ret = copy_to_user_auth(x->aalg, skb); + if (!ret) + ret = nla_put(skb, XFRMA_ALG_AUTH_TRUNC, + xfrm_alg_auth_len(x->aalg), x->aalg); + if (ret) + goto out; + } + if (x->ealg) { + ret = nla_put(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(x->ealg), x->ealg); + if (ret) + goto out; + } + if (x->calg) { + ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + if (ret) + goto out; + } + if (x->encap) { + ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (ret) + goto out; + } + if (x->tfcpad) { + ret = nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad); + if (ret) + goto out; + } + ret = xfrm_mark_put(skb, &x->mark); + if (ret) + goto out; + if (x->replay_esn) { + ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + if (ret) + goto out; + } + if (x->security) + ret = copy_sec_ctx(x->security, skb); +out: + return ret; } static int dump_one_state(struct xfrm_state *x, int count, void *ptr) @@ -825,15 +834,12 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) p = nlmsg_data(nlh); err = copy_to_user_state_extra(x, p, skb); - if (err) - goto nla_put_failure; - + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } nlmsg_end(skb, nlh); return 0; - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return err; } static int xfrm_dump_sa_done(struct netlink_callback *cb) @@ -904,6 +910,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, struct xfrmu_spdinfo spc; struct xfrmu_spdhinfo sph; struct nlmsghdr *nlh; + int err; u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); @@ -922,15 +929,15 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, sph.spdhcnt = si.spdhcnt; sph.spdhmcnt = si.spdhmcnt; - if (nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc) || - nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph)) - goto nla_put_failure; + err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); + if (!err) + err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -965,6 +972,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; struct nlmsghdr *nlh; + int err; u32 *f; nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); @@ -978,15 +986,15 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, sh.sadhmcnt = si.sadhmcnt; sh.sadhcnt = si.sadhcnt; - if (nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt) || - nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh)) - goto nla_put_failure; + err = nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt); + if (!err) + err = nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1439,9 +1447,8 @@ static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buf static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) { - if (xp->security) { + if (xp->security) return copy_sec_ctx(xp->security, skb); - } return 0; } static inline size_t userpolicy_type_attrsize(void) @@ -1477,6 +1484,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); @@ -1485,22 +1493,19 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; - + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } nlmsg_end(skb, nlh); return 0; - -nla_put_failure: -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_dump_policy_done(struct netlink_callback *cb) @@ -1688,6 +1693,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) @@ -1703,35 +1709,39 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct id->flags = c->data.aevent; if (x->replay_esn) { - if (nla_put(skb, XFRMA_REPLAY_ESN_VAL, - xfrm_replay_state_esn_len(x->replay_esn), - x->replay_esn)) - goto nla_put_failure; + err = nla_put(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); } else { - if (nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), - &x->replay)) - goto nla_put_failure; + err = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), + &x->replay); } - if (nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft)) - goto nla_put_failure; - - if ((id->flags & XFRM_AE_RTHR) && - nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff)) - goto nla_put_failure; - - if ((id->flags & XFRM_AE_ETHR) && - nla_put_u32(skb, XFRMA_ETIMER_THRESH, - x->replay_maxage * 10 / HZ)) - goto nla_put_failure; + if (err) + goto out_cancel; + err = nla_put(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); + if (err) + goto out_cancel; - if (xfrm_mark_put(skb, &x->mark)) - goto nla_put_failure; + if (id->flags & XFRM_AE_RTHR) { + err = nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); + if (err) + goto out_cancel; + } + if (id->flags & XFRM_AE_ETHR) { + err = nla_put_u32(skb, XFRMA_ETIMER_THRESH, + x->replay_maxage * 10 / HZ); + if (err) + goto out_cancel; + } + err = xfrm_mark_put(skb, &x->mark); + if (err) + goto out_cancel; return nlmsg_end(skb, nlh); -nla_put_failure: +out_cancel: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err; } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2155,7 +2165,7 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, const struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; - int i; + int i, err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); if (nlh == NULL) @@ -2167,21 +2177,25 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; - if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) - goto nlmsg_failure; - - if (copy_to_user_policy_type(type, skb) < 0) - goto nlmsg_failure; - + if (k != NULL) { + err = copy_to_user_kmaddress(k, skb); + if (err) + goto out_cancel; + } + err = copy_to_user_policy_type(type, skb); + if (err) + goto out_cancel; for (i = 0, mp = m ; i < num_migrate; i++, mp++) { - if (copy_to_user_migrate(mp, skb) < 0) - goto nlmsg_failure; + err = copy_to_user_migrate(mp, skb); + if (err) + goto out_cancel; } return nlmsg_end(skb, nlh); -nlmsg_failure: + +out_cancel: nlmsg_cancel(skb, nlh); - return -EMSGSIZE; + return err; } static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, @@ -2354,6 +2368,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) @@ -2363,13 +2378,11 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; - if (xfrm_mark_put(skb, &x->mark)) - goto nla_put_failure; + err = xfrm_mark_put(skb, &x->mark); + if (err) + return err; return nlmsg_end(skb, nlh); - -nla_put_failure: - return -EMSGSIZE; } static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2470,7 +2483,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; int len = xfrm_sa_len(x); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { @@ -2485,8 +2498,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) return -ENOMEM; nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nla_put_failure; + goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { @@ -2499,24 +2513,23 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) id->proto = x->id.proto; attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); + err = -EMSGSIZE; if (attr == NULL) - goto nla_put_failure; + goto out_free_skb; p = nla_data(attr); } - - if (copy_to_user_state_extra(x, p, skb)) - goto nla_put_failure; + err = copy_to_user_state_extra(x, p, skb); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); -nla_put_failure: - /* Somebody screwed up with xfrm_sa_len! */ - WARN_ON(1); +out_free_skb: kfree_skb(skb); - return -1; + return err; } static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2557,9 +2570,10 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { + __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - __u32 seq = xfrm_get_acqseq(); + int err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); if (nlh == NULL) @@ -2575,21 +2589,19 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, ua->calgos = xt->calgos; ua->seq = x->km.seq = seq; - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_state_sec_ctx(x, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_state_sec_ctx(x, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } return nlmsg_end(skb, nlh); - -nla_put_failure: -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, @@ -2681,8 +2693,9 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_user_polexpire *upe; - struct nlmsghdr *nlh; int hard = c->data.hard; + struct nlmsghdr *nlh; + int err; nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) @@ -2690,22 +2703,20 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_sec_ctx(xp, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_sec_ctx(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } upe->hard = !!hard; return nlmsg_end(skb, nlh); - -nla_put_failure: -nlmsg_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2725,13 +2736,13 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { + int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - int headlen; + int headlen, err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { @@ -2747,8 +2758,9 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e return -ENOMEM; nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nlmsg_failure; + goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { @@ -2763,29 +2775,29 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e memcpy(&id->sel, &xp->selector, sizeof(id->sel)); attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); + err = -EMSGSIZE; if (attr == NULL) - goto nlmsg_failure; + goto out_free_skb; p = nla_data(attr); } copy_to_user_policy(xp, p, dir); - if (copy_to_user_tmpl(xp, skb) < 0) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp->type, skb) < 0) - goto nlmsg_failure; - - if (xfrm_mark_put(skb, &xp->mark)) - goto nla_put_failure; + err = copy_to_user_tmpl(xp, skb); + if (!err) + err = copy_to_user_policy_type(xp->type, skb); + if (!err) + err = xfrm_mark_put(skb, &xp->mark); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); -nla_put_failure: -nlmsg_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } static int xfrm_notify_policy_flush(const struct km_event *c) @@ -2793,24 +2805,27 @@ static int xfrm_notify_policy_flush(const struct km_event *c) struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; + int err; skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + err = -EMSGSIZE; if (nlh == NULL) - goto nlmsg_failure; - if (copy_to_user_policy_type(c->data.type, skb) < 0) - goto nlmsg_failure; + goto out_free_skb; + err = copy_to_user_policy_type(c->data.type, skb); + if (err) + goto out_free_skb; nlmsg_end(skb, nlh); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); -nlmsg_failure: +out_free_skb: kfree_skb(skb); - return -1; + return err; } static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2853,15 +2868,14 @@ static int build_report(struct sk_buff *skb, u8 proto, ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); - if (addr && - nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr)) - goto nla_put_failure; - + if (addr) { + int err = nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr); + if (err) { + nlmsg_cancel(skb, nlh); + return err; + } + } return nlmsg_end(skb, nlh); - -nla_put_failure: - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; } static int xfrm_send_report(struct net *net, u8 proto, -- cgit v1.2.3-70-g09d2 From 160eb5a6b14ca2eab5c598bdbbb24c24624bad34 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Jun 2012 22:01:22 -0700 Subject: ipv4: Kill early demux method return value. It's completely unnecessary. Signed-off-by: David S. Miller --- include/net/protocol.h | 2 +- include/net/tcp.h | 2 +- net/ipv4/ip_input.c | 42 +++++++++++++++++++----------------------- net/ipv4/tcp_ipv4.c | 19 ++++++------------- 4 files changed, 27 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/protocol.h b/include/net/protocol.h index 967b926cbfb..057f2d31556 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -37,7 +37,7 @@ /* This is used to register protocols. */ struct net_protocol { - int (*early_demux)(struct sk_buff *skb); + void (*early_demux)(struct sk_buff *skb); int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index 6660ffc4963..53fb7d81417 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -325,7 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); -extern int tcp_v4_early_demux(struct sk_buff *skb); +extern void tcp_v4_early_demux(struct sk_buff *skb); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2a39204de5b..b27d4440f52 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -320,33 +320,29 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + + rcu_read_lock(); + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + rcu_read_unlock(); + } + /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ - if (skb_dst(skb) == NULL) { - int err = -ENOENT; - - if (sysctl_ip_early_demux) { - const struct net_protocol *ipprot; - int protocol = iph->protocol; - - rcu_read_lock(); - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot && ipprot->early_demux) - err = ipprot->early_demux(skb); - rcu_read_unlock(); - } - - if (err) { - err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); - if (unlikely(err)) { - if (err == -EXDEV) - NET_INC_STATS_BH(dev_net(skb->dev), - LINUX_MIB_IPRPFILTER); - goto drop; - } + if (!skb_dst(skb)) { + int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); + if (unlikely(err)) { + if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); + goto drop; } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1781dc650b9..b4ae1c199f3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1673,30 +1673,28 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb) +void tcp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; struct net_device *dev; struct sock *sk; - int err; - err = -ENOENT; if (skb->pkt_type != PACKET_HOST) - goto out_err; + return; if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) - goto out_err; + return; iph = ip_hdr(skb); th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); if (th->doff < sizeof(struct tcphdr) / 4) - goto out_err; + return; if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) - goto out_err; + return; dev = skb->dev; sk = __inet_lookup_established(net, &tcp_hashinfo, @@ -1713,16 +1711,11 @@ int tcp_v4_early_demux(struct sk_buff *skb) if (dst) { struct rtable *rt = (struct rtable *) dst; - if (rt->rt_iif == dev->ifindex) { + if (rt->rt_iif == dev->ifindex) skb_dst_set_noref(skb, dst); - err = 0; - } } } } - -out_err: - return err; } /* -- cgit v1.2.3-70-g09d2 From fc8a7321d3d68af759a369a9ad3e2426688742d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Jun 2012 10:33:25 +0200 Subject: mac80211: don't expose ieee80211_add_srates_ie() This and ieee80211_add_ext_srates_ie() aren't exported, so can't be used by drivers anyway, but there's also no reason that they should be so make them private to mac80211 and use sdata instead of vif arguments. Acked-by: Arik Nemtsov Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ------ net/mac80211/cfg.c | 12 ++++++------ net/mac80211/ieee80211_i.h | 4 ++++ net/mac80211/mesh_plink.c | 4 ++-- net/mac80211/tx.c | 4 ++-- net/mac80211/util.c | 10 ++++------ 6 files changed, 18 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 510d852d522..5e67020b170 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3826,12 +3826,6 @@ void ieee80211_enable_rssi_reports(struct ieee80211_vif *vif, void ieee80211_disable_rssi_reports(struct ieee80211_vif *vif); -int ieee80211_add_srates_ie(struct ieee80211_vif *vif, - struct sk_buff *skb, bool need_basic); - -int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, - struct sk_buff *skb, bool need_basic); - /** * ieee80211_ave_rssi - report the average rssi for the specified interface * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7722a7336a5..ebc353ef690 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2665,8 +2665,8 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_req.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(&sdata->vif, skb, false); - ieee80211_add_ext_srates_ie(&sdata->vif, skb, false); + ieee80211_add_srates_ie(sdata, skb, false); + ieee80211_add_ext_srates_ie(sdata, skb, false); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_RESPONSE: @@ -2679,8 +2679,8 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev, tf->u.setup_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(&sdata->vif, skb, false); - ieee80211_add_ext_srates_ie(&sdata->vif, skb, false); + ieee80211_add_srates_ie(sdata, skb, false); + ieee80211_add_ext_srates_ie(sdata, skb, false); ieee80211_tdls_add_ext_capab(skb); break; case WLAN_TDLS_SETUP_CONFIRM: @@ -2740,8 +2740,8 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, mgmt->u.action.u.tdls_discover_resp.capability = cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata)); - ieee80211_add_srates_ie(&sdata->vif, skb, false); - ieee80211_add_ext_srates_ie(&sdata->vif, skb, false); + ieee80211_add_srates_ie(sdata, skb, false); + ieee80211_add_ext_srates_ie(sdata, skb, false); ieee80211_tdls_add_ext_capab(skb); break; default: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 341d77d472d..6b7157d2050 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1480,6 +1480,10 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, struct ieee80211_channel *channel, enum nl80211_channel_type channel_type, u16 prot_mode); +int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, bool need_basic); +int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, bool need_basic); /* channel management */ enum ieee80211_chan_mode { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index a1dbd154027..425685914d7 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -258,8 +258,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, pos = skb_put(skb, 2); memcpy(pos + 2, &plid, 2); } - if (ieee80211_add_srates_ie(&sdata->vif, skb, true) || - ieee80211_add_ext_srates_ie(&sdata->vif, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true) || + ieee80211_add_ext_srates_ie(sdata, skb, true) || mesh_add_rsn_ie(skb, sdata) || mesh_add_meshid_ie(skb, sdata) || mesh_add_meshconf_ie(skb, sdata)) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ec8f5346737..4e753032e48 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2420,9 +2420,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - if (ieee80211_add_srates_ie(&sdata->vif, skb, true) || + if (ieee80211_add_srates_ie(sdata, skb, true) || mesh_add_ds_params_ie(skb, sdata) || - ieee80211_add_ext_srates_ie(&sdata->vif, skb, true) || + ieee80211_add_ext_srates_ie(sdata, skb, true) || mesh_add_rsn_ie(skb, sdata) || mesh_add_ht_cap_ie(skb, sdata) || mesh_add_ht_oper_ie(skb, sdata) || diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 242ecde381f..c4245695afc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1764,15 +1764,14 @@ ieee80211_ht_oper_to_channel_type(struct ieee80211_ht_operation *ht_oper) return channel_type; } -int ieee80211_add_srates_ie(struct ieee80211_vif *vif, +int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic) { - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; int rate; u8 i, rates, *pos; - u32 basic_rates = vif->bss_conf.basic_rates; + u32 basic_rates = sdata->vif.bss_conf.basic_rates; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; rates = sband->n_bitrates; @@ -1796,15 +1795,14 @@ int ieee80211_add_srates_ie(struct ieee80211_vif *vif, return 0; } -int ieee80211_add_ext_srates_ie(struct ieee80211_vif *vif, +int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool need_basic) { - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; int rate; u8 i, exrates, *pos; - u32 basic_rates = vif->bss_conf.basic_rates; + u32 basic_rates = sdata->vif.bss_conf.basic_rates; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; exrates = sband->n_bitrates; -- cgit v1.2.3-70-g09d2 From 70e7341673a47fb1525cfc7d6651cc98b5348928 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 03:21:41 -0700 Subject: ipv4: Show that ip_send_reply() is purely unicast routine. Rename it to ip_send_unicast_reply() and add explicit 'saddr' argument. This removed one of the few users of rt->rt_spec_dst. Signed-off-by: David S. Miller --- include/net/ip.h | 5 +++-- net/ipv4/ip_output.c | 9 +++++---- net/ipv4/tcp_ipv4.c | 8 ++++---- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 50841bd6f10..ec5cfde85e9 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -158,8 +158,9 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, - const struct ip_reply_arg *arg, unsigned int len); +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, + __be32 saddr, const struct ip_reply_arg *arg, + unsigned int len); struct ipv4_config { int log_martians; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0f3185a662c..2630900e480 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1459,13 +1459,14 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, /* * Generic function to send a packet as reply to another packet. - * Used to send TCP resets so far. ICMP should use this function too. + * Used to send TCP resets so far. * * Should run single threaded per socket because it uses the sock * structure to pass arguments. */ -void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, - const struct ip_reply_arg *arg, unsigned int len) +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, + __be32 saddr, const struct ip_reply_arg *arg, + unsigned int len) { struct inet_sock *inet = inet_sk(sk); struct ip_options_data replyopts; @@ -1491,7 +1492,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, RT_TOS(arg->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), - daddr, rt->rt_spec_dst, + daddr, saddr, tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b4ae1c199f3..64568fa21d0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -698,8 +698,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; - ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, - &arg, arg.iov[0].iov_len); + ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); @@ -781,8 +781,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; - ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, - &arg, arg.iov[0].iov_len); + ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); } -- cgit v1.2.3-70-g09d2 From 35ebf65e851c6d9731abc6362b189858eb59f4d3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 03:59:11 -0700 Subject: ipv4: Create and use fib_compute_spec_dst() helper. The specific destination is the host we direct unicast replies to. Usually this is the original packet source address, but if we are responding to a multicast or broadcast packet we have to use something different. Specifically we must use the source address we would use if we were to send a packet to the unicast source of the original packet. The routing cache precomputes this value, but we want to remove that precomputation because it creates a hard dependency on the expensive rpfilter source address validation which we'd like to make cheaper. There are only three places where this matters: 1) ICMP replies. 2) pktinfo CMSG 3) IP options Now there will be no real users of rt->rt_spec_dst and we can simply remove it altogether. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 29 +++++++++++++++++++++++++++++ net/ipv4/icmp.c | 6 ++++-- net/ipv4/ip_options.c | 22 +++++++++++----------- net/ipv4/ip_sockglue.c | 7 ++++--- 5 files changed, 49 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4b347c0ca09..1687b3de54f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -230,6 +230,7 @@ extern struct fib_table *fib_get_table(struct net *net, u32 id); /* Exported by fib_frontend.c */ extern const struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); +extern __be32 fib_compute_spec_dst(struct sk_buff *skb); extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, u32 *itag); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3854411fa37..451939b60c5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -180,6 +180,35 @@ unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, } EXPORT_SYMBOL(inet_dev_addr_type); +__be32 fib_compute_spec_dst(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct in_device *in_dev; + struct fib_result res; + struct flowi4 fl4; + struct net *net; + + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + return ip_hdr(skb)->daddr; + + in_dev = __in_dev_get_rcu(dev); + BUG_ON(!in_dev); + fl4.flowi4_oif = 0; + fl4.flowi4_iif = 0; + fl4.daddr = ip_hdr(skb)->saddr; + fl4.saddr = ip_hdr(skb)->daddr; + fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; + + net = dev_net(dev); + if (!fib_lookup(net, &fl4, &res)) + return FIB_RES_PREFSRC(net, res); + else + return inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); +} + /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 49a74cc79dc..4bce5a2830a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -95,6 +95,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -333,7 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; - __be32 daddr; + __be32 daddr, saddr; if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -347,6 +348,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = ip_hdr(skb)->saddr; + saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; ipc.tx_flags = 0; if (icmp_param->replyopts.opt.opt.optlen) { @@ -356,7 +358,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) } memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = rt->rt_spec_dst; + fl4.saddr = saddr; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 708b99494e2..766dfe56885 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * Write options to IP header, record destination address to @@ -104,7 +105,7 @@ int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb) sptr = skb_network_header(skb); dptr = dopt->__data; - daddr = skb_rtable(skb)->rt_spec_dst; + daddr = fib_compute_spec_dst(skb); if (sopt->rr) { optlen = sptr[sopt->rr+1]; @@ -250,15 +251,14 @@ void ip_options_fragment(struct sk_buff *skb) int ip_options_compile(struct net *net, struct ip_options *opt, struct sk_buff *skb) { - int l; - unsigned char *iph; - unsigned char *optptr; - int optlen; + __be32 spec_dst = (__force __be32) 0; unsigned char *pp_ptr = NULL; - struct rtable *rt = NULL; + unsigned char *optptr; + unsigned char *iph; + int optlen, l; if (skb != NULL) { - rt = skb_rtable(skb); + spec_dst = fib_compute_spec_dst(skb); optptr = (unsigned char *)&(ip_hdr(skb)[1]); } else optptr = opt->__data; @@ -330,8 +330,8 @@ int ip_options_compile(struct net *net, pp_ptr = optptr + 2; goto error; } - if (rt) { - memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); + if (skb) { + memcpy(&optptr[optptr[2]-1], &spec_dst, 4); opt->is_changed = 1; } optptr[2] += 4; @@ -372,8 +372,8 @@ int ip_options_compile(struct net *net, goto error; } opt->ts = optptr - iph; - if (rt) { - memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); + if (skb) { + memcpy(&optptr[optptr[2]-1], &spec_dst, 4); timeptr = &optptr[optptr[2]+3]; } opt->ts_needaddr = 1; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0d11f234d61..de29f46f68b 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -40,6 +40,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include #endif +#include #include #include @@ -1019,8 +1020,8 @@ e_inval: * @sk: socket * @skb: buffer * - * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst - * in skb->cb[] before dst drop. + * To support IP_CMSG_PKTINFO option, we store rt_iif and specific + * destination in skb->cb[] before dst drop. * This way, receiver doesnt make cache line misses to read rtable. */ void ipv4_pktinfo_prepare(struct sk_buff *skb) @@ -1030,7 +1031,7 @@ void ipv4_pktinfo_prepare(struct sk_buff *skb) if (rt) { pktinfo->ipi_ifindex = rt->rt_iif; - pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; -- cgit v1.2.3-70-g09d2 From 41347dcdd81988b8e60853257b2875285cc17a4e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 04:05:27 -0700 Subject: ipv4: Kill rt->rt_spec_dst, no longer used. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- include/net/route.h | 1 - net/ipv4/fib_frontend.c | 9 ++------- net/ipv4/route.c | 38 +++++++++----------------------------- net/ipv4/xfrm4_policy.c | 1 - 5 files changed, 12 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1687b3de54f..9e6c26d4ba4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -233,7 +233,7 @@ extern void ip_fib_init(void); extern __be32 fib_compute_spec_dst(struct sk_buff *skb); extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, - __be32 *spec_dst, u32 *itag); + u32 *itag); extern void fib_select_default(struct fib_result *res); /* Exported by fib_semantics.c */ diff --git a/include/net/route.h b/include/net/route.h index 47eb25ac1f7..211e2665139 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -65,7 +65,6 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - __be32 rt_spec_dst; /* RFC1122 specific destination */ u32 rt_peer_genid; unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 451939b60c5..63b11ca54d9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -218,8 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) * called with rcu_read_lock() */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, - int oif, struct net_device *dev, __be32 *spec_dst, - u32 *itag) + int oif, struct net_device *dev, u32 *itag) { struct in_device *in_dev; struct flowi4 fl4; @@ -258,7 +257,6 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, if (res.type != RTN_LOCAL || !accept_local) goto e_inval; } - *spec_dst = FIB_RES_PREFSRC(net, res); fib_combine_itag(itag, &res); dev_match = false; @@ -287,17 +285,14 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, ret = 0; if (fib_lookup(net, &fl4, &res) == 0) { - if (res.type == RTN_UNICAST) { - *spec_dst = FIB_RES_PREFSRC(net, res); + if (res.type == RTN_UNICAST) ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; - } } return ret; last_resort: if (rpf) goto e_rpf; - *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); *itag = 0; return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 81533e3a23d..83d56a01662 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -440,7 +440,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) r->rt_key_tos, -1, HHUptod, - r->rt_spec_dst, &len); + 0, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } @@ -1978,7 +1978,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, { unsigned int hash; struct rtable *rth; - __be32 spec_dst; struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; int err; @@ -1999,10 +1998,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr)) goto e_inval; - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, - &itag); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); if (err < 0) goto e_err; } @@ -2029,7 +2026,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_oif = 0; rth->rt_mark = skb->mark; rth->rt_gateway = daddr; - rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; @@ -2093,7 +2089,6 @@ static int __mkroute_input(struct sk_buff *skb, int err; struct in_device *out_dev; unsigned int flags = 0; - __be32 spec_dst; u32 itag; /* get a working reference to the output device */ @@ -2105,7 +2100,7 @@ static int __mkroute_input(struct sk_buff *skb, err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &spec_dst, &itag); + in_dev->dev, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2157,7 +2152,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_oif = 0; rth->rt_mark = skb->mark; rth->rt_gateway = daddr; - rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; @@ -2223,7 +2217,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 itag = 0; struct rtable *rth; unsigned int hash; - __be32 spec_dst; int err = -EINVAL; struct net *net = dev_net(dev); @@ -2281,12 +2274,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &spec_dst, &itag); + dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) flags |= RTCF_DIRECTSRC; - spec_dst = daddr; goto local_input; } @@ -2302,11 +2294,8 @@ brd_input: if (skb->protocol != htons(ETH_P_IP)) goto e_inval; - if (ipv4_is_zeronet(saddr)) - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); - else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst, - &itag); + if (!ipv4_is_zeronet(saddr)) { + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2344,7 +2333,6 @@ local_input: rth->rt_oif = 0; rth->rt_mark = skb->mark; rth->rt_gateway = daddr; - rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; @@ -2362,7 +2350,6 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); - spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); res.type = RTN_UNREACHABLE; if (err == -ESRCH) err = -ENETUNREACH; @@ -2545,7 +2532,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; rth->rt_gateway = fl4->daddr; - rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; rt_init_peer(rth, (res->table ? &res->table->tb_peers : @@ -2554,12 +2540,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, RT_CACHE_STAT_INC(out_slow_tot); - if (flags & RTCF_LOCAL) { + if (flags & RTCF_LOCAL) rth->dst.input = ip_local_deliver; - rth->rt_spec_dst = fl4->daddr; - } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { - rth->rt_spec_dst = fl4->saddr; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { rth->dst.output = ip_mc_output; @@ -2890,7 +2873,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; - rt->rt_spec_dst = ort->rt_spec_dst; rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) @@ -2965,10 +2947,8 @@ static int rt_fill_info(struct net *net, nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) goto nla_put_failure; #endif - if (rt_is_input_route(rt)) { - if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst)) - goto nla_put_failure; - } else if (rt->rt_src != rt->rt_key_src) { + if (!rt_is_input_route(rt) && + rt->rt_src != rt->rt_key_src) { if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) goto nla_put_failure; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8855d826855..9815ea0bca7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,7 +100,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_src = rt->rt_src; xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; - xdst->u.rt.rt_spec_dst = rt->rt_spec_dst; return 0; } -- cgit v1.2.3-70-g09d2 From bf0c111ec80355ee9fe2e2bdb609a536b54768d8 Mon Sep 17 00:00:00 2001 From: Mahesh Palivela Date: Fri, 22 Jun 2012 07:27:46 +0000 Subject: cfg80211: allow advertising VHT capabilities Allow drivers to advertise their VHT capabilities and export them to userspace via nl80211. Signed-off-by: Mahesh Palivela Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 6 ++++++ include/net/cfg80211.h | 17 +++++++++++++++++ net/wireless/nl80211.c | 9 +++++++++ 3 files changed, 32 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c0fc5d27733..23003272c70 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1813,6 +1813,9 @@ enum nl80211_mpath_info { * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n + * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as + * defined in 802.11ac + * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined * @__NL80211_BAND_ATTR_AFTER_LAST: internal use */ @@ -1826,6 +1829,9 @@ enum nl80211_band_attr { NL80211_BAND_ATTR_HT_AMPDU_FACTOR, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + NL80211_BAND_ATTR_VHT_MCS_SET, + NL80211_BAND_ATTR_VHT_CAPA, + /* keep last */ __NL80211_BAND_ATTR_AFTER_LAST, NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7d3cd3ce9a2..1fc89c4f930 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -210,6 +210,22 @@ struct ieee80211_sta_ht_cap { struct ieee80211_mcs_info mcs; }; +/** + * struct ieee80211_sta_vht_cap - STA's VHT capabilities + * + * This structure describes most essential parameters needed + * to describe 802.11ac VHT capabilities for an STA. + * + * @vht_supported: is VHT supported by the STA + * @cap: VHT capabilities map as described in 802.11ac spec + * @vht_mcs: Supported VHT MCS rates + */ +struct ieee80211_sta_vht_cap { + bool vht_supported; + u32 cap; /* use IEEE80211_VHT_CAP_ */ + struct ieee80211_vht_mcs_info vht_mcs; +}; + /** * struct ieee80211_supported_band - frequency band definition * @@ -233,6 +249,7 @@ struct ieee80211_supported_band { int n_channels; int n_bitrates; struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_sta_vht_cap vht_cap; }; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 067c9fe02a7..5c4a720f044 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -921,6 +921,15 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.bands[band]->ht_cap.ampdu_density))) goto nla_put_failure; + /* add VHT info */ + if (dev->wiphy.bands[band]->vht_cap.vht_supported && + (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, + sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs), + &dev->wiphy.bands[band]->vht_cap.vht_mcs) || + nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, + dev->wiphy.bands[band]->vht_cap.cap))) + goto nla_put_failure; + /* add frequencies */ nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); if (!nl_freqs) -- cgit v1.2.3-70-g09d2 From 3840a06e6046aaee95f33a120499d2dc8c054b9d Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 28 Jun 2012 12:34:19 +0000 Subject: tcp: pass fl6 to inet6_csk_route_req() This commit changes inet_csk_route_req() so that it uses a pointer to a struct flowi6, rather than allocating its own on the stack. This brings its behavior in line with its IPv4 cousin, inet_csk_route_req(), and allows a follow-on patch to fix a dst leak. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 1 + net/ipv6/inet6_connection_sock.c | 26 +++++++++++++------------- net/ipv6/tcp_ipv6.c | 9 ++++++--- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 1866a676c81..df2a857e853 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -26,6 +26,7 @@ extern int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax); extern struct dst_entry* inet6_csk_route_req(struct sock *sk, + struct flowi6 *fl6, const struct request_sock *req); extern struct request_sock *inet6_csk_search_req(const struct sock *sk, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e23d35424ca..bceb14450a1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -55,26 +55,26 @@ int inet6_csk_bind_conflict(const struct sock *sk, EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); struct dst_entry *inet6_csk_route_req(struct sock *sk, + struct flowi6 *fl6, const struct request_sock *req) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = treq->rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); - fl6.saddr = treq->loc_addr; - fl6.flowi6_oif = treq->iif; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet_rsk(req)->rmt_port; - fl6.fl6_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = IPPROTO_TCP; + fl6->daddr = treq->rmt_addr; + final_p = fl6_update_dst(fl6, np->opt, &final); + fl6->saddr = treq->loc_addr; + fl6->flowi6_oif = treq->iif; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_dport = inet_rsk(req)->rmt_port; + fl6->fl6_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(fl6)); + + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); if (IS_ERR(dst)) return NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fc0b96bf905..4e5fa5f6ec6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -477,7 +477,8 @@ out: } -static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, +static int tcp_v6_send_synack(struct sock *sk, + struct request_sock *req, struct request_values *rvp, u16 queue_mapping) { @@ -1058,6 +1059,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; + struct flowi6 fl6; bool want_cookie = false; if (skb->protocol == htons(ETH_P_IP)) @@ -1177,7 +1179,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, req)) != NULL && + (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL && (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, &treq->rmt_addr)) { @@ -1247,6 +1249,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; #endif + struct flowi6 fl6; if (skb->protocol == htons(ETH_P_IP)) { /* @@ -1309,7 +1312,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto out_overflow; if (!dst) { - dst = inet6_csk_route_req(sk, req); + dst = inet6_csk_route_req(sk, &fl6, req); if (!dst) goto out; } -- cgit v1.2.3-70-g09d2 From 58050fce3530939372e6c2f4b4beb76fcb4caa65 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 28 Jun 2012 03:57:45 +0000 Subject: net: Use NLMSG_DEFAULT_SIZE in combination with nlmsg_new() Using NLMSG_GOODSIZE results in multiple pages being used as nlmsg_new() will automatically add the size of the netlink header to the payload thus exceeding the page limit. NLMSG_DEFAULT_SIZE takes this into account. Signed-off-by: Thomas Graf Cc: Jiri Pirko Cc: Dmitry Eremin-Solenikov Cc: Sergey Lapin Cc: Johannes Berg Cc: Lauro Ramos Venancio Cc: Aloisio Almeida Jr Cc: Samuel Ortiz Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 8 ++++---- drivers/net/wireless/mac80211_hwsim.c | 2 +- include/net/genetlink.h | 2 ++ net/ieee802154/netlink.c | 4 ++-- net/ieee802154/nl-mac.c | 2 +- net/ieee802154/nl-phy.c | 2 +- net/l2tp/l2tp_netlink.c | 6 +++--- net/nfc/netlink.c | 18 +++++++++--------- net/wireless/nl80211.c | 26 +++++++++++++------------- 9 files changed, 36 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 5350eeaa22c..89853c31e7f 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1476,7 +1476,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) void *hdr; int err; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -1538,7 +1538,7 @@ static int team_nl_send_generic(struct genl_info *info, struct team *team, struct sk_buff *skb; int err; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; @@ -1648,7 +1648,7 @@ static int __send_and_alloc_skb(struct sk_buff **pskb, if (err) return err; } - *pskb = genlmsg_new(NLMSG_DEFAULT_SIZE - GENL_HDRLEN, GFP_KERNEL); + *pskb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!*pskb) return -ENOMEM; return 0; @@ -2016,7 +2016,7 @@ static int team_nl_send_event_port_list_get(struct team *team) int err; struct net *net = dev_net(team->dev); - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index a0b7cfd3468..a9ba3f7ea62 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -571,7 +571,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, skb_dequeue(&data->pending); } - skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (skb == NULL) goto nla_put_failure; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ccb68880abf..48905cd3884 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -5,6 +5,8 @@ #include #include +#define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN) + /** * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index c8097ae2482..97351e1d07a 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -44,7 +44,7 @@ struct genl_family nl802154_family = { struct sk_buff *ieee802154_nl_create(int flags, u8 req) { void *hdr; - struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + struct sk_buff *msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); unsigned long f; if (!msg) @@ -80,7 +80,7 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, int flags, u8 req) { void *hdr; - struct sk_buff *msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + struct sk_buff *msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return NULL; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index ca92587720f..1e9917124e7 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -530,7 +530,7 @@ static int ieee802154_list_iface(struct sk_buff *skb, if (!dev) return -ENODEV; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out_dev; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index eed291626da..d54be34cca9 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -101,7 +101,7 @@ static int ieee802154_list_phy(struct sk_buff *skb, if (!phy) return -ENODEV; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) goto out_dev; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index ddc553e7667..d71cd9229a4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -72,7 +72,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) void *hdr; int ret = -ENOBUFS; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; @@ -353,7 +353,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) goto out; } - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; @@ -699,7 +699,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) goto out; } - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto out; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 03c31db38f1..f4f07f9b61c 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -167,7 +167,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev) dev->genl_data.poll_req_pid = 0; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -195,7 +195,7 @@ int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -226,7 +226,7 @@ int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -258,7 +258,7 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -288,7 +288,7 @@ int nfc_genl_device_added(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -321,7 +321,7 @@ int nfc_genl_device_removed(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -434,7 +434,7 @@ int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx, pr_debug("DEP link is up\n"); - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -473,7 +473,7 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev) pr_debug("DEP link is down\n"); - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; @@ -514,7 +514,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) if (!dev) return -ENODEV; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; goto out_putdev; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7ae54b82291..cbdc0fd67a1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7210,7 +7210,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, { struct sk_buff *msg; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -7286,7 +7286,7 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, { struct sk_buff *msg; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -7502,7 +7502,7 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -7542,7 +7542,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -7580,7 +7580,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -7842,7 +7842,7 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, { struct sk_buff *msg; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -7863,7 +7863,7 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8026,7 +8026,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct nlattr *pinfoattr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8069,7 +8069,7 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct nlattr *rekey_attr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8113,7 +8113,7 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, struct nlattr *attr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8157,7 +8157,7 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8192,7 +8192,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct nlattr *pinfoattr; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -8236,7 +8236,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, void *hdr; int err; - msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; -- cgit v1.2.3-70-g09d2 From 9e56e3800ea42e78b7c816bdd2d87d047be80541 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Jun 2012 18:54:02 -0700 Subject: ipv4: Adjust in_dev handling in fib_validate_source() Checking for in_dev being NULL is pointless. In fact, all of our callers have in_dev precomputed already, so just pass it in and remove the NULL checking. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 27 ++++++++++----------------- net/ipv4/route.c | 10 ++++++---- 3 files changed, 17 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9e6c26d4ba4..619f68a7185 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -233,7 +233,7 @@ extern void ip_fib_init(void); extern __be32 fib_compute_spec_dst(struct sk_buff *skb); extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, - u32 *itag); + struct in_device *idev, u32 *itag); extern void fib_select_default(struct fib_result *res); /* Exported by fib_semantics.c */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d13217e01f..c84cff52021 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -226,15 +226,14 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) * called with rcu_read_lock() */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, - int oif, struct net_device *dev, u32 *itag) + int oif, struct net_device *dev, struct in_device *idev, + u32 *itag) { - struct in_device *in_dev; - struct flowi4 fl4; + int ret, no_addr, rpf, accept_local; struct fib_result res; - int no_addr, rpf, accept_local; - bool dev_match; - int ret; + struct flowi4 fl4; struct net *net; + bool dev_match; fl4.flowi4_oif = 0; fl4.flowi4_iif = oif; @@ -244,19 +243,13 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, fl4.flowi4_scope = RT_SCOPE_UNIVERSE; no_addr = rpf = accept_local = 0; - in_dev = __in_dev_get_rcu(dev); - if (in_dev) { - no_addr = in_dev->ifa_list == NULL; - - /* Ignore rp_filter for packets protected by IPsec. */ - rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev); + no_addr = idev->ifa_list == NULL; - accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); - fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - } + /* Ignore rp_filter for packets protected by IPsec. */ + rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (in_dev == NULL) - goto e_inval; + accept_local = IN_DEV_ACCEPT_LOCAL(idev); + fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; net = dev_net(dev); if (fib_lookup(net, &fl4, &res)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 83d56a01662..919d69e60ba 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1999,7 +1999,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (!ipv4_is_local_multicast(daddr)) goto e_inval; } else { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, + in_dev, &itag); if (err < 0) goto e_err; } @@ -2100,7 +2101,7 @@ static int __mkroute_input(struct sk_buff *skb, err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), - in_dev->dev, &itag); + in_dev->dev, in_dev, &itag); if (err < 0) { ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); @@ -2274,7 +2275,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, net->loopback_dev->ifindex, - dev, &itag); + dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) @@ -2295,7 +2296,8 @@ brd_input: goto e_inval; if (!ipv4_is_zeronet(saddr)) { - err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &itag); + err = fib_validate_source(skb, saddr, 0, tos, 0, dev, + in_dev, &itag); if (err < 0) goto martian_source_keep_err; if (err) -- cgit v1.2.3-70-g09d2 From d0087b29f77176480c27c203988b5704847d617c Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Thu, 28 Jun 2012 18:15:52 +0000 Subject: ipv6_tunnel: Allow receiving packets on the fallback tunnel if they pass sanity checks At Facebook, we do Layer-3 DSR via IP-in-IP tunneling. Our load balancers wrap an extra IP header on incoming packets so they can be routed to the backend. In the v4 tunnel driver, when these packets fall on the default tunl0 device, the behavior is to decapsulate them and drop them back on the stack. So our setup is that tunl0 has the VIP and eth0 has (obviously) the backend's real address. In IPv6 we do the same thing, but the v6 tunnel driver didn't have this same behavior - if you didn't have an explicit tunnel setup, it would drop the packet. This patch brings that v4 feature to the v6 driver. The same IPv6 address checks are performed as with any normal tunnel, but as the fallback tunnel endpoint addresses are unspecified, the checks must be performed on a per-packet basis, rather than at tunnel configuration time. [Patch description modified by phil@ipom.com] Signed-off-by: Ville Nuorvala Tested-by: Phil Dibowitz Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 ++ net/ipv6/ip6_tunnel.c | 65 ++++++++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index fc73e667b50..358fb86f57e 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -9,6 +9,8 @@ #define IP6_TNL_F_CAP_XMIT 0x10000 /* capable of receiving packets */ #define IP6_TNL_F_CAP_RCV 0x20000 +/* determine capability on a per-packet basis */ +#define IP6_TNL_F_CAP_PER_PACKET 0x40000 /* IPv6 tunnel */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c9015fad8d6..04a3cba2c12 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -684,24 +684,50 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } +static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) +{ + struct ip6_tnl_parm *p = &t->parms; + int ltype = ipv6_addr_type(laddr); + int rtype = ipv6_addr_type(raddr); + __u32 flags = 0; + + if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) { + flags = IP6_TNL_F_CAP_PER_PACKET; + } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && + !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && + (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { + if (ltype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_XMIT; + if (rtype&IPV6_ADDR_UNICAST) + flags |= IP6_TNL_F_CAP_RCV; + } + return flags; +} + /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t) +static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); - if (p->flags & IP6_TNL_F_CAP_RCV) { + if ((p->flags & IP6_TNL_F_CAP_RCV) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) { struct net_device *ldev = NULL; if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if ((ipv6_addr_is_multicast(&p->laddr) || - likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) && - likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0))) + if ((ipv6_addr_is_multicast(laddr) || + likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(!ipv6_chk_addr(net, raddr, NULL, 0))) ret = 1; - } return ret; } @@ -740,7 +766,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, goto discard; } - if (!ip6_tnl_rcv_ctl(t)) { + if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { t->dev->stats.rx_dropped++; rcu_read_unlock(); goto discard; @@ -1114,25 +1140,6 @@ tx_err: return NETDEV_TX_OK; } -static void ip6_tnl_set_cap(struct ip6_tnl *t) -{ - struct ip6_tnl_parm *p = &t->parms; - int ltype = ipv6_addr_type(&p->laddr); - int rtype = ipv6_addr_type(&p->raddr); - - p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV); - - if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) && - !((ltype|rtype) & IPV6_ADDR_LOOPBACK) && - (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) { - if (ltype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_XMIT; - if (rtype&IPV6_ADDR_UNICAST) - p->flags |= IP6_TNL_F_CAP_RCV; - } -} - static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; @@ -1153,7 +1160,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; - ip6_tnl_set_cap(t); + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV) dev->flags |= IFF_POINTOPOINT; @@ -1438,6 +1446,9 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) t->parms.proto = IPPROTO_IPV6; dev_hold(dev); + + ip6_tnl_link_config(t); + rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } -- cgit v1.2.3-70-g09d2 From 7a9bc9b81a5bc6e44ebc80ef781332e4385083f2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 29 Jun 2012 01:32:45 -0700 Subject: ipv4: Elide fib_validate_source() completely when possible. If rpfilter is off (or the SKB has an IPSEC path) and there are not tclassid users, we don't have to do anything at all when fib_validate_source() is invoked besides setting the itag to zero. We monitor tclassid uses with a counter (modified only under RTNL and marked __read_mostly) and we protect the fib_validate_source() real work with a test against this counter and whether rpfilter is to be done. Having a way to know whether we need no tclassid processing or not also opens the door for future optimized rpfilter algorithms that do not perform full FIB lookups. Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 + include/net/ip_fib.h | 5 +++++ net/core/fib_rules.c | 4 ++++ net/ipv4/fib_frontend.c | 32 ++++++++++++++++++++++++-------- net/ipv4/fib_rules.c | 16 +++++++++++++++- net/ipv4/fib_semantics.c | 10 ++++++++++ 6 files changed, 59 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 075f1e3a0fe..e361f488242 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -52,6 +52,7 @@ struct fib_rules_ops { struct sk_buff *, struct fib_rule_hdr *, struct nlattr **); + void (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, struct nlattr **); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 619f68a7185..3dc7c96bbea 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -235,6 +235,11 @@ extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, struct in_device *idev, u32 *itag); extern void fib_select_default(struct fib_result *res); +#ifdef CONFIG_IP_ROUTE_CLASSID +extern int fib_num_tclassid_users; +#else +#define fib_num_tclassid_users 0 +#endif /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 72cceb79d0d..ab7db83236c 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -151,6 +151,8 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) { list_del_rcu(&rule->list); + if (ops->delete) + ops->delete(rule); fib_rule_put(rule); } } @@ -499,6 +501,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + if (ops->delete) + ops->delete(rule); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c84cff52021..ae528d1b293 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -217,6 +218,10 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return inet_select_addr(dev, ip_hdr(skb)->saddr, scope); } +#ifdef CONFIG_IP_ROUTE_CLASSID +int fib_num_tclassid_users __read_mostly; +#endif + /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. @@ -225,11 +230,11 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) * - check, that packet arrived from expected physical interface. * called with rcu_read_lock() */ -int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, - int oif, struct net_device *dev, struct in_device *idev, - u32 *itag) +static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, + u8 tos, int oif, struct net_device *dev, + int rpf, struct in_device *idev, u32 *itag) { - int ret, no_addr, rpf, accept_local; + int ret, no_addr, accept_local; struct fib_result res; struct flowi4 fl4; struct net *net; @@ -242,12 +247,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; - no_addr = rpf = accept_local = 0; + no_addr = accept_local = 0; no_addr = idev->ifa_list == NULL; - /* Ignore rp_filter for packets protected by IPsec. */ - rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - accept_local = IN_DEV_ACCEPT_LOCAL(idev); fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; @@ -303,6 +305,20 @@ e_rpf: return -EXDEV; } +/* Ignore rp_filter for packets protected by IPsec. */ +int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, + u8 tos, int oif, struct net_device *dev, + struct in_device *idev, u32 *itag) +{ + int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); + + if (!r && !fib_num_tclassid_users) { + *itag = 0; + return 0; + } + return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); +} + static inline __be32 sk_extract_addr(struct sockaddr *addr) { return ((struct sockaddr_in *) addr)->sin_addr.s_addr; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 2d043f71ef7..b23fd952c84 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -169,8 +169,11 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->dst = nla_get_be32(tb[FRA_DST]); #ifdef CONFIG_IP_ROUTE_CLASSID - if (tb[FRA_FLOW]) + if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); + if (rule4->tclassid) + fib_num_tclassid_users++; + } #endif rule4->src_len = frh->src_len; @@ -184,6 +187,16 @@ errout: return err; } +static void fib4_rule_delete(struct fib_rule *rule) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + struct fib4_rule *rule4 = (struct fib4_rule *) rule; + + if (rule4->tclassid) + fib_num_tclassid_users--; +#endif +} + static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { @@ -256,6 +269,7 @@ static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { .action = fib4_rule_action, .match = fib4_rule_match, .configure = fib4_rule_configure, + .delete = fib4_rule_delete, .compare = fib4_rule_compare, .fill = fib4_rule_fill, .default_pref = fib_default_rule_pref, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 415f8230fc8..c46c20b6b0b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -163,6 +163,12 @@ void free_fib_info(struct fib_info *fi) return; } fib_info_cnt--; +#ifdef CONFIG_IP_ROUTE_CLASSID + change_nexthops(fi) { + if (nexthop_nh->nh_tclassid) + fib_num_tclassid_users--; + } endfor_nexthops(fi); +#endif call_rcu(&fi->rcu, free_fib_info_rcu); } @@ -421,6 +427,8 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; + if (nexthop_nh->nh_tclassid) + fib_num_tclassid_users++; #endif } @@ -815,6 +823,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) nh->nh_flags = cfg->fc_flags; #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; + if (nh->nh_tclassid) + fib_num_tclassid_users++; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; -- cgit v1.2.3-70-g09d2 From f4489ebeffa436c8427a20e2f05004e783708cde Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:46:58 +0200 Subject: cfg80211: add channel tracking for AP and mesh We need to know which channel is used by a running AP and mesh for channel context accounting and finding matching/active interface combination. STA/IBSS have current_bss already which allows us to check which channel a vif is tuned to. Non-fixed channel IBSS can be handled with additional changes. Monitor mode is going to be handled differently. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ net/wireless/ap.c | 4 +++- net/wireless/mesh.c | 18 ++++++++++++++---- net/wireless/mlme.c | 1 + net/wireless/nl80211.c | 1 + 5 files changed, 22 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1fc89c4f930..c62bc7864ad 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2408,6 +2408,9 @@ struct wireless_dev { struct ieee80211_channel *preset_chan; enum nl80211_channel_type preset_chantype; + /* for AP and mesh channel tracking */ + struct ieee80211_channel *channel; + bool ps; int ps_timeout; diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 45199cca63d..fcc60d8dbef 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -24,8 +24,10 @@ static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return -ENOENT; err = rdev->ops->stop_ap(&rdev->wiphy, dev); - if (!err) + if (!err) { wdev->beacon_interval = 0; + wdev->channel = NULL; + } return err; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 3b73b07486c..bab38134472 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -159,6 +159,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!err) { memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); wdev->mesh_id_len = setup->mesh_id_len; + wdev->channel = setup->channel; } return err; @@ -184,6 +185,7 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, enum nl80211_channel_type channel_type) { struct ieee80211_channel *channel; + int err; channel = rdev_freq_to_chan(rdev, freq, channel_type); if (!channel || !cfg80211_can_beacon_sec_chan(&rdev->wiphy, @@ -205,9 +207,14 @@ int cfg80211_set_mesh_freq(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return -ENETDOWN; - return rdev->ops->libertas_set_mesh_channel(&rdev->wiphy, - wdev->netdev, - channel); + + err = rdev->ops->libertas_set_mesh_channel(&rdev->wiphy, + wdev->netdev, + channel); + if (!err) + wdev->channel = channel; + + return err; } if (wdev->mesh_id_len) @@ -249,8 +256,11 @@ static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, return -ENOTCONN; err = rdev->ops->leave_mesh(&rdev->wiphy, dev); - if (!err) + if (!err) { wdev->mesh_id_len = 0; + wdev->channel = NULL; + } + return err; } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index da4406f1192..a7882eb8c46 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -947,6 +947,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, int freq, if (WARN_ON(!chan)) goto out; + wdev->channel = chan; nl80211_ch_switch_notify(rdev, dev, freq, type, GFP_KERNEL); out: wdev_unlock(wdev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 20d0fd6d128..12096b4ebf6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2488,6 +2488,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->preset_chan = params.channel; wdev->preset_chantype = params.channel_type; wdev->beacon_interval = params.beacon_interval; + wdev->channel = params.channel; } return err; } -- cgit v1.2.3-70-g09d2 From c30a3d38689bc601e03d5f2ad3c37d8ea13e46ca Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:46:59 +0200 Subject: cfg80211: track ibss fixed channel IBSS may hop between channels. It is necessary to account this special case when considering interface combinations. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/ibss.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c62bc7864ad..e030c6af86d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2411,6 +2411,8 @@ struct wireless_dev { /* for AP and mesh channel tracking */ struct ieee80211_channel *channel; + bool ibss_fixed; + bool ps; int ps_timeout; diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 89baa332841..b90fd86b2d1 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -113,6 +113,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, kfree(wdev->connect_keys); wdev->connect_keys = connkeys; + wdev->ibss_fixed = params->channel_fixed; #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.channel = params->channel; #endif -- cgit v1.2.3-70-g09d2 From dbbae26afa81320b3315fb4ad755b20f1ff256b4 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:47:01 +0200 Subject: cfg80211: track monitor interfaces count Implements .set_monitor_enabled(wiphy, enabled). Notifies driver upon change of interface layout. If only monitor interfaces become present it is called with 2nd argument being true. If non-monitor interface appears then 2nd argument is false. Driver is notified only upon change. This makes it more obvious about the fact that cfg80211 supports single monitor channel. Once we implement multi-channel we don't want to allow setting monitor channel while other interface types are running. Otherwise it would be ambiguous once we start considering num_different_channels. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ net/wireless/core.c | 24 ++++++++++++++++++++++++ net/wireless/core.h | 14 ++++++++++++++ net/wireless/util.c | 5 +++++ 4 files changed, 47 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e030c6af86d..f0d213dd8fe 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1499,6 +1499,8 @@ struct cfg80211_gtk_rekey_data { * interfaces are active this callback should reject the configuration. * If no interfaces are active or the device is down, the channel should * be stored for when a monitor interface becomes active. + * @set_monitor_enabled: Notify driver that there are only monitor + * interfaces running. * @get_channel: Get the current operating channel, should return %NULL if * there's no single defined operating channel if for example the * device implements channel hopping for multi-channel virtual interfaces. @@ -1817,6 +1819,8 @@ struct cfg80211_ops { struct ethtool_stats *stats, u64 *data); void (*get_et_strings)(struct wiphy *wiphy, struct net_device *dev, u32 sset, u8 *data); + + void (*set_monitor_enabled)(struct wiphy *wiphy, bool enabled); }; /* diff --git a/net/wireless/core.c b/net/wireless/core.c index c65f59c952c..8412da7d0f2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -717,6 +717,24 @@ static struct device_type wiphy_type = { .name = "wlan", }; +void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, + enum nl80211_iftype iftype, int num) +{ + bool has_monitors_only_old = cfg80211_has_monitors_only(rdev); + bool has_monitors_only_new; + + ASSERT_RDEV_LOCK(rdev); + + rdev->num_running_ifaces += num; + if (iftype == NL80211_IFTYPE_MONITOR) + rdev->num_running_monitor_ifaces += num; + + has_monitors_only_new = cfg80211_has_monitors_only(rdev); + if (has_monitors_only_new != has_monitors_only_old) + rdev->ops->set_monitor_enabled(&rdev->wiphy, + has_monitors_only_new); +} + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ndev) @@ -820,6 +838,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: dev_hold(dev); + cfg80211_lock_rdev(rdev); + cfg80211_update_iface_num(rdev, wdev->iftype, -1); + cfg80211_unlock_rdev(rdev); queue_work(cfg80211_wq, &wdev->cleanup_work); break; case NETDEV_UP: @@ -927,6 +948,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, ret = cfg80211_can_add_interface(rdev, wdev->iftype); if (ret) return notifier_from_errno(ret); + cfg80211_lock_rdev(rdev); + cfg80211_update_iface_num(rdev, wdev->iftype, 1); + cfg80211_unlock_rdev(rdev); break; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 56f18c2eb91..99acd51343b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -56,6 +56,9 @@ struct cfg80211_registered_device { u32 ap_beacons_nlpid; + int num_running_ifaces; + int num_running_monitor_ifaces; + /* BSSes/scanning */ spinlock_t bss_lock; struct list_head bss_list; @@ -197,6 +200,14 @@ static inline void wdev_unlock(struct wireless_dev *wdev) #define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx) #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) +static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) +{ + ASSERT_RDEV_LOCK(rdev); + + return rdev->num_running_ifaces == rdev->num_running_monitor_ifaces && + rdev->num_running_ifaces > 0; +} + enum cfg80211_event_type { EVENT_CONNECT_RESULT, EVENT_ROAMED, @@ -444,6 +455,9 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband, int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, u32 beacon_int); +void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, + enum nl80211_iftype iftype, int num); + #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS #define CFG80211_DEV_WARN_ON(cond) WARN_ON(cond) #else diff --git a/net/wireless/util.c b/net/wireless/util.c index fc948d0a53f..9b92ec57d07 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -871,6 +871,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, } } + if (!err && ntype != otype && netif_running(dev)) { + cfg80211_update_iface_num(rdev, ntype, 1); + cfg80211_update_iface_num(rdev, otype, -1); + } + return err; } -- cgit v1.2.3-70-g09d2 From 2e165b818456ecc1024dd0387eeac64745526377 Mon Sep 17 00:00:00 2001 From: Michal Kazior Date: Fri, 29 Jun 2012 12:47:06 +0200 Subject: cfg80211/mac80211: remove .get_channel We do not need it anymore since cfg80211 tracks monitor channel and monitor channel type. Signed-off-by: Michal Kazior Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ------ net/mac80211/cfg.c | 11 ----------- net/wireless/nl80211.c | 15 +++++---------- net/wireless/wext-compat.c | 9 ++------- 4 files changed, 7 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f0d213dd8fe..fa269347355 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1501,9 +1501,6 @@ struct cfg80211_gtk_rekey_data { * be stored for when a monitor interface becomes active. * @set_monitor_enabled: Notify driver that there are only monitor * interfaces running. - * @get_channel: Get the current operating channel, should return %NULL if - * there's no single defined operating channel if for example the - * device implements channel hopping for multi-channel virtual interfaces. * * @scan: Request to do a scan. If returning zero, the scan request is given * the driver, and will be valid until passed to cfg80211_scan_done(). @@ -1810,9 +1807,6 @@ struct cfg80211_ops { struct net_device *dev, u16 noack_map); - struct ieee80211_channel *(*get_channel)(struct wiphy *wiphy, - enum nl80211_channel_type *type); - int (*get_et_sset_count)(struct wiphy *wiphy, struct net_device *dev, int sset); void (*get_et_stats)(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ea4b1ea9105..ccbe2413142 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2980,16 +2980,6 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, return 0; } -static struct ieee80211_channel * -ieee80211_wiphy_get_channel(struct wiphy *wiphy, - enum nl80211_channel_type *type) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - - *type = local->_oper_channel_type; - return local->oper_channel; -} - static void ieee80211_set_monitor_enabled(struct wiphy *wiphy, bool enabled) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -3073,7 +3063,6 @@ struct cfg80211_ops mac80211_config_ops = { .tdls_oper = ieee80211_tdls_oper, .tdls_mgmt = ieee80211_tdls_mgmt, .probe_client = ieee80211_probe_client, - .get_channel = ieee80211_wiphy_get_channel, .set_noack_map = ieee80211_set_noack_map, .set_monitor_enabled = ieee80211_set_monitor_enabled, #ifdef CONFIG_PM diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 12096b4ebf6..5d29ed1f7c6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1689,16 +1689,11 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, (cfg80211_rdev_list_generation << 2))) goto nla_put_failure; - if (rdev->ops->get_channel) { - struct ieee80211_channel *chan; - enum nl80211_channel_type channel_type; - - chan = rdev->ops->get_channel(&rdev->wiphy, &channel_type); - if (chan && - (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, - chan->center_freq) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, - channel_type))) + if (rdev->monitor_channel) { + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, + rdev->monitor_channel->center_freq) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + rdev->monitor_channel_type)) goto nla_put_failure; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index bc879833b21..7df42f54187 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -827,8 +827,6 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - struct ieee80211_channel *chan; - enum nl80211_channel_type channel_type; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -836,13 +834,10 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_MONITOR: - if (!rdev->ops->get_channel) + if (!rdev->monitor_channel) return -EINVAL; - chan = rdev->ops->get_channel(wdev->wiphy, &channel_type); - if (!chan) - return -EINVAL; - freq->m = chan->center_freq; + freq->m = rdev->monitor_channel->center_freq; freq->e = 6; return 0; default: -- cgit v1.2.3-70-g09d2 From 38b3fef1730319e2730af3fc9f73698e3a9aeb4a Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 15 Jun 2012 11:50:28 +0300 Subject: Bluetooth: Improve debugging messages for hci_conn Improve debugging of hci_conn objects by: adding print to hci_conn refcounting, adding object spcifier when missing, change conn to hcon since conn is heavily used for l2cap_conn objects and this is misleading. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/hci_core.h | 6 ++++++ net/bluetooth/hci_conn.c | 44 ++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 75766b7f0dc..475b8c04ba5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -587,12 +587,18 @@ void hci_conn_put_device(struct hci_conn *conn); static inline void hci_conn_hold(struct hci_conn *conn) { + BT_DBG("hcon %p refcnt %d -> %d", conn, atomic_read(&conn->refcnt), + atomic_read(&conn->refcnt) + 1); + atomic_inc(&conn->refcnt); cancel_delayed_work(&conn->disc_work); } static inline void hci_conn_put(struct hci_conn *conn) { + BT_DBG("hcon %p refcnt %d -> %d", conn, atomic_read(&conn->refcnt), + atomic_read(&conn->refcnt) - 1); + if (atomic_dec_and_test(&conn->refcnt)) { unsigned long timeo; if (conn->type == ACL_LINK || conn->type == LE_LINK) { diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2fcced377e5..9bbef6e95d2 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -107,7 +107,7 @@ static void hci_acl_connect_cancel(struct hci_conn *conn) { struct hci_cp_create_conn_cancel cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) return; @@ -120,7 +120,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) { struct hci_cp_disconnect cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); conn->state = BT_DISCONN; @@ -134,7 +134,7 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) struct hci_dev *hdev = conn->hdev; struct hci_cp_add_sco cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; @@ -152,7 +152,7 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) struct hci_dev *hdev = conn->hdev; struct hci_cp_setup_sync_conn cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); conn->state = BT_CONNECT; conn->out = true; @@ -196,7 +196,7 @@ void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __u8 rand[8], struct hci_dev *hdev = conn->hdev; struct hci_cp_le_start_enc cp; - BT_DBG("%p", conn); + BT_DBG("hcon %p", conn); memset(&cp, 0, sizeof(cp)); @@ -213,11 +213,11 @@ void hci_sco_setup(struct hci_conn *conn, __u8 status) { struct hci_conn *sco = conn->link; - BT_DBG("%p", conn); - if (!sco) return; + BT_DBG("hcon %p", conn); + if (!status) { if (lmp_esco_capable(conn->hdev)) hci_setup_sync(sco, conn->handle); @@ -235,7 +235,7 @@ static void hci_conn_timeout(struct work_struct *work) disc_work.work); __u8 reason; - BT_DBG("conn %p state %s", conn, state_to_string(conn->state)); + BT_DBG("hcon %p state %s", conn, state_to_string(conn->state)); if (atomic_read(&conn->refcnt)) return; @@ -266,7 +266,7 @@ static void hci_conn_enter_sniff_mode(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p mode %d", conn, conn->mode); + BT_DBG("hcon %p mode %d", conn, conn->mode); if (test_bit(HCI_RAW, &hdev->flags)) return; @@ -301,7 +301,7 @@ static void hci_conn_idle(unsigned long arg) { struct hci_conn *conn = (void *) arg; - BT_DBG("conn %p mode %d", conn, conn->mode); + BT_DBG("hcon %p mode %d", conn, conn->mode); hci_conn_enter_sniff_mode(conn); } @@ -382,7 +382,7 @@ int hci_conn_del(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; - BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle); + BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle); del_timer(&conn->idle_timer); @@ -557,7 +557,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, /* Check link security requirement */ int hci_conn_check_link_mode(struct hci_conn *conn) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (hci_conn_ssp_enabled(conn) && !(conn->link_mode & HCI_LM_ENCRYPT)) return 0; @@ -568,7 +568,7 @@ int hci_conn_check_link_mode(struct hci_conn *conn) /* Authenticate remote device */ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (conn->pending_sec_level > sec_level) sec_level = conn->pending_sec_level; @@ -602,7 +602,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) /* Encrypt the the link */ static void hci_conn_encrypt(struct hci_conn *conn) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (!test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) { struct hci_cp_set_conn_encrypt cp; @@ -616,7 +616,7 @@ static void hci_conn_encrypt(struct hci_conn *conn) /* Enable security */ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); /* For sdp we don't need the link key. */ if (sec_level == BT_SECURITY_SDP) @@ -669,7 +669,7 @@ EXPORT_SYMBOL(hci_conn_security); /* Check secure link requirement */ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (sec_level != BT_SECURITY_HIGH) return 1; /* Accept if non-secure is required */ @@ -684,7 +684,7 @@ EXPORT_SYMBOL(hci_conn_check_secure); /* Change link key */ int hci_conn_change_link_key(struct hci_conn *conn) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_change_conn_link_key cp; @@ -699,7 +699,7 @@ int hci_conn_change_link_key(struct hci_conn *conn) /* Switch role */ int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); if (!role && conn->link_mode & HCI_LM_MASTER) return 1; @@ -720,7 +720,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active) { struct hci_dev *hdev = conn->hdev; - BT_DBG("conn %p mode %d", conn, conn->mode); + BT_DBG("hcon %p mode %d", conn, conn->mode); if (test_bit(HCI_RAW, &hdev->flags)) return; @@ -894,7 +894,7 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn) struct hci_dev *hdev = conn->hdev; struct hci_chan *chan; - BT_DBG("%s conn %p", hdev->name, conn); + BT_DBG("%s hcon %p", hdev->name, conn); chan = kzalloc(sizeof(struct hci_chan), GFP_KERNEL); if (!chan) @@ -913,7 +913,7 @@ int hci_chan_del(struct hci_chan *chan) struct hci_conn *conn = chan->conn; struct hci_dev *hdev = conn->hdev; - BT_DBG("%s conn %p chan %p", hdev->name, conn, chan); + BT_DBG("%s hcon %p chan %p", hdev->name, conn, chan); list_del_rcu(&chan->list); @@ -929,7 +929,7 @@ void hci_chan_list_flush(struct hci_conn *conn) { struct hci_chan *chan, *n; - BT_DBG("conn %p", conn); + BT_DBG("hcon %p", conn); list_for_each_entry_safe(chan, n, &conn->chan_list, list) hci_chan_del(chan); -- cgit v1.2.3-70-g09d2 From 4244854d22bf8f782698c5224b9191c8d2d42610 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 30 Jun 2012 03:04:26 +0000 Subject: sctp: be more restrictive in transport selection on bundled sacks It was noticed recently that when we send data on a transport, its possible that we might bundle a sack that arrived on a different transport. While this isn't a major problem, it does go against the SHOULD requirement in section 6.4 of RFC 2960: An endpoint SHOULD transmit reply chunks (e.g., SACK, HEARTBEAT ACK, etc.) to the same destination transport address from which it received the DATA or control chunk to which it is replying. This rule should also be followed if the endpoint is bundling DATA chunks together with the reply chunk. This patch seeks to correct that. It restricts the bundling of sack operations to only those transports which have moved the ctsn of the association forward since the last sack. By doing this we guarantee that we only bundle outbound saks on a transport that has received a chunk since the last sack. This brings us into stricter compliance with the RFC. Vlad had initially suggested that we strictly allow only sack bundling on the transport that last moved the ctsn forward. While this makes sense, I was concerned that doing so prevented us from bundling in the case where we had received chunks that moved the ctsn on multiple transports. In those cases, the RFC allows us to select any of the transports having received chunks to bundle the sack on. so I've modified the approach to allow for that, by adding a state variable to each transport that tracks weather it has moved the ctsn since the last sack. This I think keeps our behavior (and performance), close enough to our current profile that I think we can do this without a sysctl knob to enable/disable it. Signed-off-by: Neil Horman CC: Vlad Yaseivch CC: David S. Miller CC: linux-sctp@vger.kernel.org Reported-by: Michele Baldessari Reported-by: sorin serban Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++++ include/net/sctp/tsnmap.h | 3 ++- net/sctp/associola.c | 1 + net/sctp/output.c | 5 +++++ net/sctp/sm_make_chunk.c | 16 ++++++++++++++++ net/sctp/sm_sideeffect.c | 2 +- net/sctp/transport.c | 2 ++ net/sctp/tsnmap.c | 6 +++++- net/sctp/ulpevent.c | 3 ++- net/sctp/ulpqueue.c | 2 +- 10 files changed, 39 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e4652fe5895..fecdf31816f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -912,6 +912,9 @@ struct sctp_transport { /* Is this structure kfree()able? */ malloced:1; + /* Has this transport moved the ctsn since we last sacked */ + __u32 sack_generation; + struct flowi fl; /* This is the peer's IP address and port. */ @@ -1584,6 +1587,7 @@ struct sctp_association { */ __u8 sack_needed; /* Do we need to sack the peer? */ __u32 sack_cnt; + __u32 sack_generation; /* These are capabilities which our peer advertised. */ __u8 ecn_capable:1, /* Can peer do ECN? */ diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index e7728bc14cc..2c5d2b4d5d1 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -117,7 +117,8 @@ void sctp_tsnmap_free(struct sctp_tsnmap *map); int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn); +int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn, + struct sctp_transport *trans); /* Mark this TSN and all lower as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5bc9ab161b3..b16517ee1aa 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -271,6 +271,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a */ asoc->peer.sack_needed = 1; asoc->peer.sack_cnt = 0; + asoc->peer.sack_generation = 1; /* Assume that the peer will tell us if he recognizes ASCONF * as part of INIT exchange. diff --git a/net/sctp/output.c b/net/sctp/output.c index f1b7d4bb591..6ae47acaaec 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -248,6 +248,11 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, /* If the SACK timer is running, we have a pending SACK */ if (timer_pending(timer)) { struct sctp_chunk *sack; + + if (pkt->transport->sack_generation != + pkt->transport->asoc->peer.sack_generation) + return retval; + asoc->a_rwnd = asoc->rwnd; sack = sctp_make_sack(asoc); if (sack) { diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index a85eeeb55dd..b6de71efb14 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -734,8 +734,10 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) int len; __u32 ctsn; __u16 num_gabs, num_dup_tsns; + struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + struct sctp_transport *trans; memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); @@ -805,6 +807,20 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns, sctp_tsnmap_get_dups(map)); + /* Once we have a sack generated, check to see what our sack + * generation is, if its 0, reset the transports to 0, and reset + * the association generation to 1 + * + * The idea is that zero is never used as a valid generation for the + * association so no transport will match after a wrap event like this, + * Until the next sack + */ + if (++aptr->peer.sack_generation == 0) { + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) + trans->sack_generation = 0; + aptr->peer.sack_generation = 1; + } nodata: return retval; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c96d1a81cf4..8716da1a859 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1268,7 +1268,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ error = sctp_tsnmap_mark(&asoc->peer.tsn_map, - cmd->obj.u32); + cmd->obj.u32, NULL); break; case SCTP_CMD_REPORT_FWDTSN: diff --git a/net/sctp/transport.c b/net/sctp/transport.c index b026ba0c699..1dcceb6e0ce 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -68,6 +68,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); memset(&peer->saddr, 0, sizeof(union sctp_addr)); + peer->sack_generation = 0; + /* From 6.3.1 RTO Calculation: * * C1) Until an RTT measurement has been made for a packet sent to the diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index f1e40cebc98..b5fb7c40902 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -114,7 +114,8 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) /* Mark this TSN as seen. */ -int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, + struct sctp_transport *trans) { u16 gap; @@ -133,6 +134,9 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) */ map->max_tsn_seen++; map->cumulative_tsn_ack_point++; + if (trans) + trans->sack_generation = + trans->asoc->peer.sack_generation; map->base_tsn++; } else { /* Either we already have a gap, or about to record a gap, so diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 8a84017834c..33d89477619 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -715,7 +715,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * can mark it as received so the tsn_map is updated correctly. */ if (sctp_tsnmap_mark(&asoc->peer.tsn_map, - ntohl(chunk->subh.data_hdr->tsn))) + ntohl(chunk->subh.data_hdr->tsn), + chunk->transport)) goto fail_mark; /* First calculate the padding, so we don't inadvertently diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f2d1de7f2ff..f5a6a4f4faf 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1051,7 +1051,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, if (chunk && (freed >= needed)) { __u32 tsn; tsn = ntohl(chunk->subh.data_hdr->tsn); - sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn); + sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); sctp_ulpq_tail_data(ulpq, chunk, gfp); sctp_ulpq_partial_delivery(ulpq, chunk, gfp); -- cgit v1.2.3-70-g09d2 From 3a0c52a6d82cc41da965284412608c74aece34e4 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Mon, 2 Jul 2012 09:32:32 +0300 Subject: cfg80211: add 802.11ad (60gHz band) support Add enumerations for both cfg80211 and nl80211. This expands wiphy.bands etc. arrays. Extend channel <-> frequency translation to cover 60g band and modify the rate check logic since there are no legacy mandatory rates (only MCS is used.) Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/3945-rs.c | 2 +- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 2 ++ net/mac80211/tx.c | 2 ++ net/wireless/core.c | 10 ++++++++-- net/wireless/util.c | 35 +++++++++++++++++++++++++-------- 6 files changed, 42 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlegacy/3945-rs.c b/drivers/net/wireless/iwlegacy/3945-rs.c index 4b10157d868..d4fd29ad90d 100644 --- a/drivers/net/wireless/iwlegacy/3945-rs.c +++ b/drivers/net/wireless/iwlegacy/3945-rs.c @@ -946,7 +946,7 @@ il3945_rate_scale_init(struct ieee80211_hw *hw, s32 sta_id) case IEEE80211_BAND_5GHZ: rs_sta->expected_tpt = il3945_expected_tpt_a; break; - case IEEE80211_NUM_BANDS: + default: BUG(); break; } diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 23003272c70..74cc55c1bf2 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -2545,10 +2545,12 @@ enum nl80211_tx_rate_attributes { * enum nl80211_band - Frequency band * @NL80211_BAND_2GHZ: 2.4 GHz ISM band * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz) + * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) */ enum nl80211_band { NL80211_BAND_2GHZ, NL80211_BAND_5GHZ, + NL80211_BAND_60GHZ, }; /** diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fa269347355..0b564e83a24 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -70,11 +70,13 @@ * * @IEEE80211_BAND_2GHZ: 2.4GHz ISM band * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7) + * @IEEE80211_BAND_60GHZ: around 60 GHz band (58.32 - 64.80 GHz) * @IEEE80211_NUM_BANDS: number of defined bands */ enum ieee80211_band { IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ, IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ, + IEEE80211_BAND_60GHZ = NL80211_BAND_60GHZ, /* keep last */ IEEE80211_NUM_BANDS diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4e753032e48..4990f4fb586 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -140,6 +140,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (r->flags & IEEE80211_RATE_MANDATORY_A) mrate = r->bitrate; break; + case IEEE80211_BAND_60GHZ: + /* TODO, for now fall through */ case IEEE80211_NUM_BANDS: WARN_ON(1); break; diff --git a/net/wireless/core.c b/net/wireless/core.c index ca2b95f2484..e13365f1fa6 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -468,8 +468,14 @@ int wiphy_register(struct wiphy *wiphy) continue; sband->band = band; - - if (WARN_ON(!sband->n_channels || !sband->n_bitrates)) + if (WARN_ON(!sband->n_channels)) + return -EINVAL; + /* + * on 60gHz band, there are no legacy rates, so + * n_bitrates is 0 + */ + if (WARN_ON(band != IEEE80211_BAND_60GHZ && + !sband->n_bitrates)) return -EINVAL; /* diff --git a/net/wireless/util.c b/net/wireless/util.c index a9260ac85cf..0228c64e73d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -35,19 +35,29 @@ int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J * there are overlapping channel numbers in 5GHz and 2GHz bands */ - if (band == IEEE80211_BAND_5GHZ) { - if (chan >= 182 && chan <= 196) - return 4000 + chan * 5; - else - return 5000 + chan * 5; - } else { /* IEEE80211_BAND_2GHZ */ + if (chan <= 0) + return 0; /* not supported */ + switch (band) { + case IEEE80211_BAND_2GHZ: if (chan == 14) return 2484; else if (chan < 14) return 2407 + chan * 5; + break; + case IEEE80211_BAND_5GHZ: + if (chan >= 182 && chan <= 196) + return 4000 + chan * 5; else - return 0; /* not supported */ + return 5000 + chan * 5; + break; + case IEEE80211_BAND_60GHZ: + if (chan < 5) + return 56160 + chan * 2160; + break; + default: + ; } + return 0; /* not supported */ } EXPORT_SYMBOL(ieee80211_channel_to_frequency); @@ -60,8 +70,12 @@ int ieee80211_frequency_to_channel(int freq) return (freq - 2407) / 5; else if (freq >= 4910 && freq <= 4980) return (freq - 4000) / 5; - else + else if (freq <= 45000) /* DMG band lower limit */ return (freq - 5000) / 5; + else if (freq >= 58320 && freq <= 64800) + return (freq - 56160) / 2160; + else + return 0; } EXPORT_SYMBOL(ieee80211_frequency_to_channel); @@ -137,6 +151,11 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband, } WARN_ON(want != 0 && want != 3 && want != 6); break; + case IEEE80211_BAND_60GHZ: + /* check for mandatory HT MCS 1..4 */ + WARN_ON(!sband->ht_cap.ht_supported); + WARN_ON((sband->ht_cap.mcs.rx_mask[0] & 0x1e) != 0x1e); + break; case IEEE80211_NUM_BANDS: WARN_ON(1); break; -- cgit v1.2.3-70-g09d2 From cb831b537d50d21f6afb5dffbde4cf6523627461 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 2 Jul 2012 15:40:18 +0200 Subject: mac80211: remove tx_frags driver callback The implementation of tx_frags is buggy due to not handling queue stop, and there's no driver implementing it so remove it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 --------------- net/mac80211/driver-ops.h | 8 -------- net/mac80211/main.c | 2 +- net/mac80211/tx.c | 7 ++----- 4 files changed, 3 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b5da094468f..dc2a97af95e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1896,19 +1896,6 @@ enum ieee80211_rate_control_changed { * The low-level driver should send the frame out based on * configuration in the TX control data. This handler should, * preferably, never fail and stop queues appropriately. - * This must be implemented if @tx_frags is not. - * Must be atomic. - * - * @tx_frags: Called to transmit multiple fragments of a single MSDU. - * This handler must consume all fragments, sending out some of - * them only is useless and it can't ask for some of them to be - * queued again. If the frame is not fragmented the queue has a - * single SKB only. To avoid issues with the networking stack - * when TX status is reported the frames should be removed from - * the skb queue. - * If this is used, the tx_info @vif and @sta pointers will be - * invalid -- you must not use them in that case. - * This must be implemented if @tx isn't. * Must be atomic. * * @start: Called before the first netdevice attached to the hardware @@ -2260,8 +2247,6 @@ enum ieee80211_rate_control_changed { */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); - void (*tx_frags)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - struct ieee80211_sta *sta, struct sk_buff_head *skbs); int (*start)(struct ieee80211_hw *hw); void (*stop)(struct ieee80211_hw *hw); #ifdef CONFIG_PM diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 44e8c124278..5042151a332 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -27,14 +27,6 @@ static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb) local->ops->tx(&local->hw, skb); } -static inline void drv_tx_frags(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct sk_buff_head *skbs) -{ - local->ops->tx_frags(&local->hw, vif, sta, skbs); -} - static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata, u32 sset, u8 *data) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ab32c59be89..c794101f898 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -587,7 +587,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); - BUG_ON(!ops->tx && !ops->tx_frags); + BUG_ON(!ops->tx); BUG_ON(!ops->start); BUG_ON(!ops->stop); BUG_ON(!ops->config); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4990f4fb586..364a1e7b4af 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1295,11 +1295,8 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } - if (local->ops->tx_frags) - drv_tx_frags(local, vif, pubsta, skbs); - else - result = ieee80211_tx_frags(local, vif, pubsta, skbs, - txpending); + result = ieee80211_tx_frags(local, vif, pubsta, skbs, + txpending); ieee80211_tpt_led_trig_tx(local, fc, led_len); ieee80211_led_tx(local, 1); -- cgit v1.2.3-70-g09d2 From e3e1a0bcb3f192fe2f95f86a74bd4e7967341e74 Mon Sep 17 00:00:00 2001 From: Thomas Huehn Date: Mon, 2 Jul 2012 19:46:16 +0200 Subject: mac80211: reduce IEEE80211_TX_MAX_RATES IEEE80211_TX_MAX_RATES can be reduced from 5 to 4 as there is no current hardware supporting a rate chain with 5 multi rate stages (mrr), so 4 mrr stages are sufficient. The memory that is freed within the ieee80211_tx_info struct will be used in the upcoming Transmission Power Control (TPC) implementation. Suggested-by: Felix Fietkau Signed-off-by: Thomas Huehn [reword commit message] Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/carl9170/tx.c | 6 +++--- drivers/net/wireless/p54/txrx.c | 6 +++--- include/net/mac80211.h | 8 ++++---- net/mac80211/tx.c | 3 +-- 4 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c index aed305177af..ede0b572ceb 100644 --- a/drivers/net/wireless/ath/carl9170/tx.c +++ b/drivers/net/wireless/ath/carl9170/tx.c @@ -277,11 +277,11 @@ static void carl9170_tx_release(struct kref *ref) return; BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len) != 23); + offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); - memset(&txinfo->status.ampdu_ack_len, 0, + memset(&txinfo->status.ack_signal, 0, sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + offsetof(struct ieee80211_tx_info, status.ack_signal)); if (atomic_read(&ar->tx_total_queued)) ar->tx_schedule = true; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 82a1cac920b..f38786e0262 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -422,11 +422,11 @@ static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb) * Clear manually, ieee80211_tx_info_clear_status would * clear the counts too and we need them. */ - memset(&info->status.ampdu_ack_len, 0, + memset(&info->status.ack_signal, 0, sizeof(struct ieee80211_tx_info) - - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); + offsetof(struct ieee80211_tx_info, status.ack_signal)); BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, - status.ampdu_ack_len) != 23); + status.ack_signal) != 20); if (entry_hdr->flags & cpu_to_le16(P54_HDR_FLAG_DATA_ALIGN)) pad = entry_data->align[0]; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dc2a97af95e..3f1b58cf9c8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -475,7 +475,7 @@ enum mac80211_rate_control_flags { #define IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE 24 /* maximum number of rate stages */ -#define IEEE80211_TX_MAX_RATES 5 +#define IEEE80211_TX_MAX_RATES 4 /** * struct ieee80211_tx_rate - rate selection/status @@ -563,11 +563,11 @@ struct ieee80211_tx_info { } control; struct { struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; - u8 ampdu_ack_len; int ack_signal; + u8 ampdu_ack_len; u8 ampdu_len; u8 antenna; - /* 14 bytes free */ + /* 21 bytes free */ } status; struct { struct ieee80211_tx_rate driver_rates[ @@ -634,7 +634,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) info->status.rates[i].count = 0; BUILD_BUG_ON( - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len) != 23); + offsetof(struct ieee80211_tx_info, status.ack_signal) != 20); memset(&info->status.ampdu_ack_len, 0, sizeof(struct ieee80211_tx_info) - offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 364a1e7b4af..c9d2175d15c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -959,8 +959,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) info->control.rates[1].idx = -1; info->control.rates[2].idx = -1; info->control.rates[3].idx = -1; - info->control.rates[4].idx = -1; - BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); + BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 4); info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; } else { hdr->frame_control &= ~morefrags; -- cgit v1.2.3-70-g09d2 From a1845fc7c552977e23fe552ad3f5c6c279e3d550 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Jun 2012 13:18:36 +0200 Subject: mac80211: add TX prepare API Some drivers require setup before being able to send management frames in managed mode, in particular in multi-channel cases. Introduce API to allow the drivers to do such setup while being able to sleep waiting for the setup to finish in the device. This isn't possible inside the TX call since that can't sleep. A future patch may also restructure the TX retry to wait for the driver to report the frame status, as suggested by Arik in http://mid.gmane.org/CA+XVXffKSEL6ZQPQ98x-zO-NL2=TNF1uN==mprRyUmAaRn254g@mail.gmail.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 +++++++++++++++ net/mac80211/driver-ops.h | 14 ++++++++++++++ net/mac80211/mlme.c | 8 ++++++++ net/mac80211/trace.h | 7 +++++++ 4 files changed, 44 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3f1b58cf9c8..e3fa90ce9ec 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2244,6 +2244,18 @@ enum ieee80211_rate_control_changed { * @get_rssi: Get current signal strength in dBm, the function is optional * and can sleep. * + * @mgd_prepare_tx: Prepare for transmitting a management frame for association + * before associated. In multi-channel scenarios, a virtual interface is + * bound to a channel before it is associated, but as it isn't associated + * yet it need not necessarily be given airtime, in particular since any + * transmission to a P2P GO needs to be synchronized against the GO's + * powersave state. mac80211 will call this function before transmitting a + * management frame prior to having successfully associated to allow the + * driver to give it channel time for the transmission, to get a response + * and to be able to synchronize with the GO. + * The callback will be called before each transmission and upon return + * mac80211 will transmit the frame right away. + * The callback is optional and can (should!) sleep. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -2383,6 +2395,9 @@ struct ieee80211_ops { u32 sset, u8 *data); int (*get_rssi)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, s8 *rssi_dbm); + + void (*mgd_prepare_tx)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); }; /** diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 5042151a332..df920319910 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -852,4 +852,18 @@ static inline int drv_get_rssi(struct ieee80211_local *local, return ret; } + +static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + check_sdata_in_driver(sdata); + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + + trace_drv_mgd_prepare_tx(local, sdata); + if (local->ops->mgd_prepare_tx) + local->ops->mgd_prepare_tx(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e9c0d1b68fc..d563f7c5553 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -541,6 +541,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) memcpy(pos, assoc_data->ie + offset, noffset - offset); } + drv_mgd_prepare_tx(local, sdata); + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; ieee80211_tx_skb(sdata, skb); } @@ -580,6 +582,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; + + drv_mgd_prepare_tx(local, sdata); + ieee80211_tx_skb(sdata, skb); } } @@ -1756,6 +1761,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, if (!elems.challenge) return; auth_data->expected_transaction = 4; + drv_mgd_prepare_tx(sdata->local, sdata); ieee80211_send_auth(sdata, 3, auth_data->algorithm, elems.challenge - 2, elems.challenge_len + 2, auth_data->bss->bssid, auth_data->bss->bssid, @@ -2641,6 +2647,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) return -ETIMEDOUT; } + drv_mgd_prepare_tx(local, sdata); + if (auth_data->bss->proberesp_ies) { sdata_info(sdata, "send auth to %pM (try %d/%d)\n", auth_data->bss->bssid, auth_data->tries, diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 2e60f4acd02..e1e9d10ec2e 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1244,6 +1244,13 @@ TRACE_EVENT(drv_get_rssi, ) ); +DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + + TP_ARGS(local, sdata) +); + /* * Tracing for API calls that drivers call. */ -- cgit v1.2.3-70-g09d2 From 08911475d1d0921401e37d83292b217e1411d10b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 29 Jun 2012 05:23:24 +0000 Subject: netfilter: nf_conntrack: generalize nf_ct_l4proto_net This patch generalizes nf_ct_l4proto_net by splitting it into chunks and moving the corresponding protocol part to where it really belongs to. To clarify, note that we follow two different approaches to support per-net depending if it's built-in or run-time loadable protocol tracker. Signed-off-by: Pablo Neira Ayuso Acked-by: Gao feng --- include/net/netfilter/nf_conntrack_l4proto.h | 3 +++ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 ++++++ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++++++ net/netfilter/nf_conntrack_proto.c | 22 ++++++---------------- net/netfilter/nf_conntrack_proto_generic.c | 6 ++++++ net/netfilter/nf_conntrack_proto_tcp.c | 7 +++++++ net/netfilter/nf_conntrack_proto_udp.c | 7 +++++++ 7 files changed, 41 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 08bb571b7ab..c3be4aef6bf 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -99,6 +99,9 @@ struct nf_conntrack_l4proto { /* Init l4proto pernet data */ int (*init_net)(struct net *net, u_int16_t proto); + /* Return the per-net protocol part. */ + struct nf_proto_net *(*get_net_proto)(struct net *net); + /* Protocol name */ const char *name; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 9c2095c5571..5241d997ab7 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -388,6 +388,11 @@ static int icmp_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *icmp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, @@ -418,4 +423,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = icmp_init_net, + .get_net_proto = icmp_get_net_proto, }; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9fc5cf5f3e8..2d54b2061d6 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -358,6 +358,11 @@ static int icmpv6_init_net(struct net *net, u_int16_t proto) return icmpv6_kmemdup_sysctl_table(pn, in); } +static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmpv6.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, @@ -386,4 +391,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = icmpv6_init_net, + .get_net_proto = icmpv6_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 21b850c4b3a..0dc63854390 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -303,22 +303,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { - switch (l4proto->l4proto) { - case IPPROTO_TCP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; - case IPPROTO_UDP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp; - case IPPROTO_ICMP: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp; - case IPPROTO_ICMPV6: - return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmpv6; - case 255: /* l4proto_generic */ - return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; - default: - if (l4proto->net_id) - return net_generic(net, *l4proto->net_id); - else - return NULL; + if (l4proto->get_net_proto) { + /* statically built-in protocols use static per-net */ + return l4proto->get_net_proto(net); + } else if (l4proto->net_id) { + /* ... and loadable protocols use dynamic per-net */ + return net_generic(net, *l4proto->net_id); } return NULL; } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 7c11c544419..d25f2937764 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -186,6 +186,11 @@ static int generic_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *generic_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.generic.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, @@ -207,4 +212,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = generic_init_net, + .get_net_proto = generic_get_net_proto, }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 44f0da83015..07e56ea2e9b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1623,6 +1623,11 @@ static int tcp_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *tcp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, @@ -1656,6 +1661,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = tcp_init_net, + .get_net_proto = tcp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); @@ -1692,5 +1698,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = tcp_init_net, + .get_net_proto = tcp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index e7e0434c305..59623cc56e8 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -297,6 +297,11 @@ static int udp_init_net(struct net *net, u_int16_t proto) return ret; } +static struct nf_proto_net *udp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.udp.pn; +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, @@ -325,6 +330,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); @@ -356,5 +362,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ .init_net = udp_init_net, + .get_net_proto = udp_get_net_proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6); -- cgit v1.2.3-70-g09d2 From a263b3093641fb1ec377582c90986a7fd0625184 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 02:02:15 -0700 Subject: ipv4: Make neigh lookups directly in output packet path. Do not use the dst cached neigh, we'll be getting rid of that. Signed-off-by: David S. Miller --- include/net/arp.h | 28 +++++++++++++++++++--------- include/net/neighbour.h | 11 +++++++++-- net/core/neighbour.c | 12 +++++++----- net/ipv4/ip_output.c | 12 ++++++++---- net/ipv4/route.c | 6 +----- 5 files changed, 44 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 4a1f3fb562e..4617d984113 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -15,24 +15,34 @@ static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd return val * hash_rnd; } -static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) +static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) { - struct neigh_hash_table *nht; + struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht); struct neighbour *n; u32 hash_val; - rcu_read_lock_bh(); - nht = rcu_dereference_bh(arp_tbl.nht); + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) + key = 0; + hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; n = rcu_dereference_bh(n->next)) { - if (n->dev == dev && *(u32 *)n->primary_key == key) { - if (!atomic_inc_not_zero(&n->refcnt)) - n = NULL; - break; - } + if (n->dev == dev && *(u32 *)n->primary_key == key) + return n; } + + return NULL; +} + +static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) +{ + struct neighbour *n; + + rcu_read_lock_bh(); + n = __ipv4_neigh_lookup_noref(dev, key); + if (n && !atomic_inc_not_zero(&n->refcnt)) + n = NULL; rcu_read_unlock_bh(); return n; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 6cdfeedb650..e1d18bdeebb 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -202,9 +202,16 @@ extern struct neighbour * neigh_lookup(struct neigh_table *tbl, extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey); -extern struct neighbour * neigh_create(struct neigh_table *tbl, +extern struct neighbour * __neigh_create(struct neigh_table *tbl, + const void *pkey, + struct net_device *dev, + bool want_ref); +static inline struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev); + struct net_device *dev) +{ + return __neigh_create(tbl, pkey, dev, true); +} extern void neigh_destroy(struct neighbour *neigh); extern int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); extern int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d81d026138f..a793af9af15 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -474,8 +474,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); -struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev) +struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, bool want_ref) { u32 hash_val; int key_len = tbl->key_len; @@ -535,14 +535,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, n1 = rcu_dereference_protected(n1->next, lockdep_is_held(&tbl->lock))) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { - neigh_hold(n1); + if (want_ref) + neigh_hold(n1); rc = n1; goto out_tbl_unlock; } } n->dead = 0; - neigh_hold(n); + if (want_ref) + neigh_hold(n); rcu_assign_pointer(n->next, rcu_dereference_protected(nht->hash_buckets[hash_val], lockdep_is_held(&tbl->lock))); @@ -558,7 +560,7 @@ out_neigh_release: neigh_release(n); goto out; } -EXPORT_SYMBOL(neigh_create); +EXPORT_SYMBOL(__neigh_create); static u32 pneigh_hash(const void *pkey, int key_len) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2630900e480..6e9a266a053 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -170,6 +170,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; + u32 nexthop; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len); @@ -191,15 +192,18 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } - rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + rcu_read_lock_bh(); + nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; + neigh = __ipv4_neigh_lookup_noref(dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (neigh) { int res = neigh_output(neigh, skb); - rcu_read_unlock(); + rcu_read_unlock_bh(); return res; } - rcu_read_unlock(); + rcu_read_unlock_bh(); net_dbg_ratelimited("%s: No header cache and no neighbour!\n", __func__); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a5afc71555..2f40363e285 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1098,17 +1098,13 @@ static int slow_chain_length(const struct rtable *head) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - static const __be32 inaddr_any = 0; struct net_device *dev = dst->dev; const __be32 *pkey = daddr; const struct rtable *rt; struct neighbour *n; rt = (const struct rtable *) dst; - - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - pkey = &inaddr_any; - else if (rt->rt_gateway) + if (rt->rt_gateway) pkey = (const __be32 *) &rt->rt_gateway; n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); -- cgit v1.2.3-70-g09d2 From 5110effee8fde2edfacac9cd12a9960ab2dc39ea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 02:21:03 -0700 Subject: net: Do delayed neigh confirmation. When a dst_confirm() happens, mark the confirmation as pending in the dst. Then on the next packet out, when we have the neigh in-hand, do the update. This removes the dependency in dst_confirm() of dst's having an attached neigh. While we're here, remove the explicit 'dst' NULL check, all except 2 or 3 call sites ensure it's not NULL. So just fix those cases up. Signed-off-by: David S. Miller --- include/net/dst.h | 29 +++++++++++++++++++++-------- include/net/neighbour.h | 15 --------------- net/core/dst.c | 3 ++- net/ipv4/ip_output.c | 2 +- net/ipv4/tcp_input.c | 19 +++++++++++++------ net/ipv6/ip6_output.c | 2 +- 6 files changed, 38 insertions(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index f0bf3b8d591..84e7a3ff968 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -51,7 +51,7 @@ struct dst_entry { int (*input)(struct sk_buff *); int (*output)(struct sk_buff *); - int flags; + unsigned short flags; #define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 @@ -62,6 +62,8 @@ struct dst_entry { #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 + unsigned short pending_confirm; + short error; short obsolete; unsigned short header_len; /* more space at head required */ @@ -371,7 +373,8 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) extern int dst_discard(struct sk_buff *skb); extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, int flags); + int initial_ref, int initial_obsolete, + unsigned short flags); extern void __dst_free(struct dst_entry *dst); extern struct dst_entry *dst_destroy(struct dst_entry *dst); @@ -395,14 +398,24 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - if (dst) { - struct neighbour *n; + dst->pending_confirm = 1; +} - rcu_read_lock(); - n = dst_get_neighbour_noref(dst); - neigh_confirm(n); - rcu_read_unlock(); +static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, + struct sk_buff *skb) +{ + struct hh_cache *hh; + + if (unlikely(dst->pending_confirm)) { + n->confirmed = jiffies; + dst->pending_confirm = 0; } + + hh = &n->hh; + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + return neigh_hh_output(hh, skb); + else + return n->output(n, skb); } static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e1d18bdeebb..344d8988842 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -309,12 +309,6 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) atomic_inc(&(n)->refcnt) -static inline void neigh_confirm(struct neighbour *neigh) -{ - if (neigh) - neigh->confirmed = jiffies; -} - static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; @@ -358,15 +352,6 @@ static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) return dev_queue_xmit(skb); } -static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) -{ - struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) - return neigh_hh_output(hh, skb); - else - return n->output(n, skb); -} - static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat) { diff --git a/net/core/dst.c b/net/core/dst.c index 43d94cedbf7..a6e19a23a74 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -152,7 +152,7 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, int flags) + int initial_ref, int initial_obsolete, unsigned short flags) { struct dst_entry *dst; @@ -188,6 +188,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; + dst->pending_confirm = 0; dst->next = NULL; if (!(flags & DST_NOCOUNT)) dst_entries_add(ops, 1); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6e9a266a053..cc52679790b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,7 +198,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (neigh) { - int res = neigh_output(neigh, skb); + int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock_bh(); return res; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8416f8a68e6..ca0d0e7c977 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -740,13 +740,13 @@ void tcp_update_metrics(struct sock *sk) if (sysctl_tcp_nometrics_save) return; - dst_confirm(dst); - if (dst && (dst->flags & DST_HOST)) { const struct inet_connection_sock *icsk = inet_csk(sk); int m; unsigned long rtt; + dst_confirm(dst); + if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. @@ -3869,9 +3869,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) tcp_cong_avoid(sk, ack, prior_in_flight); } - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) - dst_confirm(__sk_dst_get(sk)); - + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { + struct dst_entry *dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + } return 1; no_queue: @@ -6140,9 +6142,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, case TCP_FIN_WAIT1: if (tp->snd_una == tp->write_seq) { + struct dst_entry *dst; + tcp_set_state(sk, TCP_FIN_WAIT2); sk->sk_shutdown |= SEND_SHUTDOWN; - dst_confirm(__sk_dst_get(sk)); + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); if (!sock_flag(sk, SOCK_DEAD)) /* Wake up lingering close() */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a233a7ccbc3..c94e4aabe11 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -125,7 +125,7 @@ static int ip6_finish_output2(struct sk_buff *skb) rcu_read_lock(); neigh = dst_get_neighbour_noref(dst); if (neigh) { - int res = neigh_output(neigh, skb); + int res = dst_neigh_output(dst, neigh, skb); rcu_read_unlock(); return res; -- cgit v1.2.3-70-g09d2 From f894cbf847c9bea1955095bf37aca6c050553167 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 21:52:24 -0700 Subject: net: Add optional SKB arg to dst_ops->neigh_lookup(). Causes the handler to use the daddr in the ipv4/ipv6 header when the route gateway is unspecified (local subnet). Signed-off-by: David S. Miller --- include/net/dst.h | 8 +++++++- include/net/dst_ops.h | 4 +++- net/bridge/br_netfilter.c | 4 +++- net/decnet/dn_route.c | 8 ++++++-- net/ipv4/route.c | 14 ++++++++++---- net/ipv6/route.c | 14 ++++++++++---- net/xfrm/xfrm_policy.c | 6 ++++-- 7 files changed, 43 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 84e7a3ff968..295a70547e7 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -420,7 +420,13 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return dst->ops->neigh_lookup(dst, daddr); + return dst->ops->neigh_lookup(dst, NULL, daddr); +} + +static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, + struct sk_buff *skb) +{ + return dst->ops->neigh_lookup(dst, skb, NULL); } static inline void dst_link_failure(struct sk_buff *skb) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 3682a0a076c..4badc86e45d 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -26,7 +26,9 @@ struct dst_ops { void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, const void *daddr); + struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); struct kmem_cache *kmem_cachep; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 20fa719889e..4378775432b 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -120,7 +120,9 @@ static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) return NULL; } -static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { return NULL; } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2493ed5bfec..60e4c6e1bac 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,7 +117,9 @@ static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr); +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); static int dn_route_input(struct sk_buff *); static void dn_run_flush(unsigned long dummy); @@ -828,7 +830,9 @@ static unsigned int dn_dst_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bae36386e72..7453dfcdb43 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -188,7 +188,9 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } -static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr); +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, @@ -1088,7 +1090,9 @@ static int slow_chain_length(const struct rtable *head) return length >> FRACT_BITS; } -static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { struct net_device *dev = dst->dev; const __be32 *pkey = daddr; @@ -1098,6 +1102,8 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo rt = (const struct rtable *) dst; if (rt->rt_gateway) pkey = (const __be32 *) &rt->rt_gateway; + else if (skb) + pkey = &ip_hdr(skb)->daddr; n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); if (n) @@ -1107,7 +1113,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo static int rt_bind_neighbour(struct rtable *rt) { - struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + struct neighbour *n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); if (IS_ERR(n)) return PTR_ERR(n); dst_set_neighbour(&rt->dst, n); @@ -1388,7 +1394,7 @@ static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) rt->rt_gateway = peer->redirect_learned.a4; - n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); if (IS_ERR(n)) { rt->rt_gateway = orig_gw; return; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c518e4ec0ce..4b581c675bb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -120,21 +120,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } -static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) +static inline const void *choose_neigh_daddr(struct rt6_info *rt, + struct sk_buff *skb, + const void *daddr) { struct in6_addr *p = &rt->rt6i_gateway; if (!ipv6_addr_any(p)) return (const void *) p; + else if (skb) + return &ipv6_hdr(skb)->daddr; return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, daddr); + daddr = choose_neigh_daddr(rt, skb, daddr); n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); if (n) return n; @@ -1162,7 +1168,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (neigh) neigh_hold(neigh); else { - neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr); + neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); if (IS_ERR(neigh)) { in6_dev_put(idev); dst_free(&rt->dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ccfbd328a69..a28a3f972d5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2404,9 +2404,11 @@ static unsigned int xfrm_mtu(const struct dst_entry *dst) return mtu ? : dst_mtu(dst->path); } -static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr) +static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { - return dst_neigh_lookup(dst->path, daddr); + return dst->path->ops->neigh_lookup(dst, skb, daddr); } int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) -- cgit v1.2.3-70-g09d2 From fccd7d5c77ff61d5283e7ce8242791d5f00dcc17 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 22:22:18 -0700 Subject: decnet: Use neighbours privately in dn_route struct. This allows an easy conversion away from dst_get_neighbour*(). Signed-off-by: David S. Miller --- include/net/dn_route.h | 2 ++ net/decnet/dn_neigh.c | 2 +- net/decnet/dn_route.c | 36 +++++++++++++++++++++++++++++------- 3 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/dn_route.h b/include/net/dn_route.h index c507e05d172..4f7d6a18238 100644 --- a/include/net/dn_route.h +++ b/include/net/dn_route.h @@ -67,6 +67,8 @@ extern void dn_rt_cache_flush(int delay); struct dn_route { struct dst_entry dst; + struct neighbour *n; + struct flowidn fld; __le16 rt_saddr; diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 8e9a35b17df..3aede1b459f 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -202,7 +202,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; - struct neighbour *neigh = dst_get_neighbour_noref(dst); + struct neighbour *neigh = rt->n; struct net_device *dev = neigh->dev; char mac_addr[ETH_ALEN]; unsigned int seq; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 60e4c6e1bac..6e74b3f110b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -114,6 +114,7 @@ static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static unsigned int dn_dst_mtu(const struct dst_entry *dst); static void dn_dst_destroy(struct dst_entry *); +static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -140,6 +141,7 @@ static struct dst_ops dn_dst_ops = { .mtu = dn_dst_mtu, .cow_metrics = dst_cow_metrics_generic, .destroy = dn_dst_destroy, + .ifdown = dn_dst_ifdown, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, @@ -148,9 +150,27 @@ static struct dst_ops dn_dst_ops = { static void dn_dst_destroy(struct dst_entry *dst) { + struct dn_route *rt = (struct dn_route *) dst; + + if (rt->n) + neigh_release(rt->n); dst_destroy_metrics_generic(dst); } +static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) +{ + if (how) { + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; + + if (n && n->dev == dev) { + n->dev = dev_net(dev)->loopback_dev; + dev_hold(n->dev); + dev_put(dev); + } + } +} + static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) { __u16 tmp = (__u16 __force)(src ^ dst); @@ -246,7 +266,8 @@ static int dn_dst_gc(struct dst_ops *ops) */ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) { - struct neighbour *n = dst_get_neighbour_noref(dst); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; u32 min_mtu = 230; struct dn_dev *dn; @@ -715,7 +736,8 @@ out: static int dn_to_neigh_output(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = dst_get_neighbour_noref(dst); + struct dn_route *rt = (struct dn_route *) dst; + struct neighbour *n = rt->n; return n->output(n, skb); } @@ -729,7 +751,7 @@ static int dn_output(struct sk_buff *skb) int err = -EINVAL; - if (dst_get_neighbour_noref(dst) == NULL) + if (rt->n == NULL) goto error; skb->dev = dev; @@ -852,11 +874,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) } rt->rt_type = res->type; - if (dev != NULL && dst_get_neighbour_noref(&rt->dst) == NULL) { + if (dev != NULL && rt->n == NULL) { n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); if (IS_ERR(n)) return PTR_ERR(n); - dst_set_neighbour(&rt->dst, n); + rt->n = n; } if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) @@ -1163,7 +1185,7 @@ make_route: rt->rt_dst_map = fld.daddr; rt->rt_src_map = fld.saddr; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; neigh = NULL; rt->dst.lastuse = jiffies; @@ -1433,7 +1455,7 @@ make_route: rt->fld.flowidn_iif = in_dev->ifindex; rt->fld.flowidn_mark = fld.flowidn_mark; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; rt->dst.lastuse = jiffies; rt->dst.output = dn_rt_bug; switch (res.type) { -- cgit v1.2.3-70-g09d2 From 1d248b1cf4e09dbec8cef5f7fbeda5874248bd09 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 Jul 2012 01:01:51 -0700 Subject: net: Pass neighbours and dest address into NETEVENT_REDIRECT events. Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c | 23 +++++++++------------- include/net/netevent.h | 4 ++++ net/ipv6/route.c | 7 ++++++- 3 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c index 55cf72af69c..633c6029e53 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c @@ -62,7 +62,8 @@ static const unsigned int MAX_ATIDS = 64 * 1024; static const unsigned int ATID_BASE = 0x10000; static void cxgb_neigh_update(struct neighbour *neigh); -static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new); +static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh, + struct dst_entry *new, struct neighbour *new_neigh); static inline int offload_activated(struct t3cdev *tdev) { @@ -968,8 +969,9 @@ static int nb_callback(struct notifier_block *self, unsigned long event, } case (NETEVENT_REDIRECT):{ struct netevent_redirect *nr = ctx; - cxgb_redirect(nr->old, nr->new); - cxgb_neigh_update(dst_get_neighbour_noref(nr->new)); + cxgb_redirect(nr->old, nr->old_neigh, + nr->new, nr->new_neigh); + cxgb_neigh_update(nr->new_neigh); break; } default: @@ -1107,10 +1109,10 @@ static void set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e) tdev->send(tdev, skb); } -static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) +static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh, + struct dst_entry *new, struct neighbour *new_neigh) { struct net_device *olddev, *newdev; - struct neighbour *n; struct tid_info *ti; struct t3cdev *tdev; u32 tid; @@ -1118,15 +1120,8 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new) struct l2t_entry *e; struct t3c_tid_entry *te; - n = dst_get_neighbour_noref(old); - if (!n) - return; - olddev = n->dev; - - n = dst_get_neighbour_noref(new); - if (!n) - return; - newdev = n->dev; + olddev = old_neigh->dev; + newdev = new_neigh->dev; if (!is_offloading(olddev)) return; diff --git a/include/net/netevent.h b/include/net/netevent.h index 086f8a5b59d..3ce4988c9c0 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -12,10 +12,14 @@ */ struct dst_entry; +struct neighbour; struct netevent_redirect { struct dst_entry *old; + struct neighbour *old_neigh; struct dst_entry *new; + struct neighbour *new_neigh; + const void *daddr; }; enum netevent_notif_type { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4b581c675bb..34b29881e22 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1687,6 +1687,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, struct rt6_info *rt, *nrt = NULL; struct netevent_redirect netevent; struct net *net = dev_net(neigh->dev); + struct neighbour *old_neigh; rt = ip6_route_redirect(dest, src, saddr, neigh->dev); @@ -1714,7 +1715,8 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour_noref_raw(&rt->dst)) + old_neigh = dst_get_neighbour_noref_raw(&rt->dst); + if (neigh == old_neigh) goto out; nrt = ip6_rt_copy(rt, dest); @@ -1732,7 +1734,10 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, goto out; netevent.old = &rt->dst; + netevent.old_neigh = old_neigh; netevent.new = &nrt->dst; + netevent.new_neigh = neigh; + netevent.daddr = dest; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags & RTF_CACHE) { -- cgit v1.2.3-70-g09d2 From 97cac0821af4474ec4ba3a9e7a36b98ed9b6db88 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 22:43:47 -0700 Subject: ipv6: Store route neighbour in rt6_info struct. This makes for a simplified conversion away from dst_get_neighbour*(). All code outside of ipv6 will use neigh lookups via dst_neigh_lookup*(). Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/ip6_output.c | 8 ++++++-- net/ipv6/route.c | 42 ++++++++++++++++++++++++++---------------- net/ipv6/xfrm6_policy.c | 1 + 4 files changed, 35 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a192f780765..0fedbd8d747 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -86,6 +86,8 @@ struct fib6_table; struct rt6_info { struct dst_entry dst; + struct neighbour *n; + /* * Tail elements of dst_entry (__refcnt etc.) * and these elements (rarely used in hot path) are in diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c94e4aabe11..6d9c0abc8c2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -88,6 +88,7 @@ static int ip6_finish_output2(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct neighbour *neigh; + struct rt6_info *rt; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -123,7 +124,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); + rt = (struct rt6_info *) dst; + neigh = rt->n; if (neigh) { int res = dst_neigh_output(dst, neigh, skb); @@ -944,6 +946,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct net *net = sock_net(sk); #ifdef CONFIG_IPV6_OPTIMISTIC_DAD struct neighbour *n; + struct rt6_info *rt; #endif int err; @@ -972,7 +975,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry of the nexthop router */ rcu_read_lock(); - n = dst_get_neighbour_noref(*dst); + rt = (struct rt6_info *) dst; + n = rt->n; if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 34b29881e22..ceff71d24f8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -155,7 +155,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) if (IS_ERR(n)) return PTR_ERR(n); } - dst_set_neighbour(&rt->dst, n); + rt->n = n; return 0; } @@ -285,6 +285,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + if (rt->n) + neigh_release(rt->n); + if (!(rt->dst.flags & DST_HOST)) dst_destroy_metrics_generic(dst); @@ -335,12 +338,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev && idev && idev->dev == dev) { - struct inet6_dev *loopback_idev = - in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; - in6_dev_put(idev); + if (dev != loopback_dev) { + if (idev && idev->dev == dev) { + struct inet6_dev *loopback_idev = + in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); + } + } + if (rt->n && rt->n->dev == dev) { + rt->n->dev = loopback_dev; + dev_hold(loopback_dev); + dev_put(dev); } } } @@ -430,7 +440,7 @@ static void rt6_probe(struct rt6_info *rt) * to no more than one per minute. */ rcu_read_lock(); - neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL; + neigh = rt ? rt->n : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) goto out; read_lock_bh(&neigh->lock); @@ -477,7 +487,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) int m; rcu_read_lock(); - neigh = dst_get_neighbour_noref(&rt->dst); + neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -824,7 +834,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, if (rt) { rt->rt6i_flags |= RTF_CACHE; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst))); + rt->n = neigh_clone(ort->n); } return rt; } @@ -858,7 +868,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1178,7 +1188,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; - dst_set_neighbour(&rt->dst, neigh); + rt->n = neigh; atomic_set(&rt->dst.__refcnt, 1); rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; @@ -1715,7 +1725,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - old_neigh = dst_get_neighbour_noref_raw(&rt->dst); + old_neigh = rt->n; if (neigh == old_neigh) goto out; @@ -1728,7 +1738,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, nrt->rt6i_flags &= ~RTF_GATEWAY; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); + nrt->n = neigh_clone(neigh); if (ip6_ins_rt(nrt)) goto out; @@ -2442,7 +2452,7 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { rcu_read_unlock(); @@ -2666,7 +2676,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) seq_puts(m, "00000000000000000000000000000000 00 "); #endif rcu_read_lock(); - n = dst_get_neighbour_noref(&rt->dst); + n = rt->n; if (n) { seq_printf(m, "%pi6", n->primary_key); } else { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d7494845efb..bb02038b822 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -103,6 +103,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ + xdst->u.rt6.n = neigh_clone(rt->n); xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; -- cgit v1.2.3-70-g09d2 From 36bdbcae2fa2a6dfa99344d4190fcea0aa7b7c25 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Jul 2012 22:58:02 -0700 Subject: net: Kill dst->_neighbour, accessors, and final uses. No longer used. Signed-off-by: David S. Miller --- include/net/dst.h | 17 +---------------- net/core/dst.c | 18 ------------------ 2 files changed, 1 insertion(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 295a70547e7..b2634e44661 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -42,7 +42,7 @@ struct dst_entry { struct dst_entry *from; }; struct dst_entry *path; - struct neighbour __rcu *_neighbour; + void *__pad0; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -96,21 +96,6 @@ struct dst_entry { }; }; -static inline struct neighbour *dst_get_neighbour_noref(struct dst_entry *dst) -{ - return rcu_dereference(dst->_neighbour); -} - -static inline struct neighbour *dst_get_neighbour_noref_raw(struct dst_entry *dst) -{ - return rcu_dereference_raw(dst->_neighbour); -} - -static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) -{ - rcu_assign_pointer(dst->_neighbour, neigh); -} - extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[RTAX_MAX]; diff --git a/net/core/dst.c b/net/core/dst.c index a6e19a23a74..07bacff84aa 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -225,19 +224,12 @@ EXPORT_SYMBOL(__dst_free); struct dst_entry *dst_destroy(struct dst_entry * dst) { struct dst_entry *child; - struct neighbour *neigh; smp_rmb(); again: - neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; - if (neigh) { - RCU_INIT_POINTER(dst->_neighbour, NULL); - neigh_release(neigh); - } - if (!(dst->flags & DST_NOCOUNT)) dst_entries_add(dst->ops, -1); @@ -361,19 +353,9 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - struct neighbour *neigh; - dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - rcu_read_lock(); - neigh = dst_get_neighbour_noref(dst); - if (neigh && neigh->dev == dev) { - neigh->dev = dst->dev; - dev_hold(dst->dev); - dev_put(dev); - } - rcu_read_unlock(); } } -- cgit v1.2.3-70-g09d2 From 8eb41c8dfb9e2396d2452ada9023a83d610b9051 Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Thu, 5 Jul 2012 14:25:49 +0300 Subject: {nl,cfg}80211: support high bitrates Until now, a u16 value was used to represent bitrate value. With VHT bitrates this becomes too small. Introduce a new 32-bit bitrate attribute. nl80211 will report both the new and the old attribute, unless the bitrate doesn't fit into the old u16 attribute in which case only the new one will be reported. User space tools encouraged to prefer the 32-bit attribute, if available (since it won't be available on older kernels.) Signed-off-by: Vladimir Kondratiev [reword commit message and comments a bit] Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 9 +++++++++ include/net/cfg80211.h | 2 +- net/wireless/nl80211.c | 9 +++++++-- net/wireless/util.c | 2 +- 4 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 74cc55c1bf2..db961a59247 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1638,12 +1638,20 @@ struct nl80211_sta_flag_update { * * These attribute types are used with %NL80211_STA_INFO_TXRATE * when getting information about the bitrate of a station. + * There are 2 attributes for bitrate, a legacy one that represents + * a 16-bit value, and new one that represents a 32-bit value. + * If the rate value fits into 16 bit, both attributes are reported + * with the same value. If the rate is too high to fit into 16 bits + * (>6.5535Gbps) only 32-bit attribute is included. + * User space tools encouraged to use the 32-bit attribute and fall + * back to the 16-bit one for compatibility with older kernels. * * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval + * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s) * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ @@ -1653,6 +1661,7 @@ enum nl80211_rate_info { NL80211_RATE_INFO_MCS, NL80211_RATE_INFO_40_MHZ_WIDTH, NL80211_RATE_INFO_SHORT_GI, + NL80211_RATE_INFO_BITRATE32, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0b564e83a24..8837efc368f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3487,7 +3487,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, int freq, * * return 0 if MCS index >= 32 */ -u16 cfg80211_calculate_bitrate(struct rate_info *rate); +u32 cfg80211_calculate_bitrate(struct rate_info *rate); /* Logging, debugging and troubleshooting/diagnostic helpers. */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 77102e66f1e..2a5cdb60bc6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2618,7 +2618,8 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) { struct nlattr *rate; - u16 bitrate; + u32 bitrate; + u16 bitrate_compat; rate = nla_nest_start(msg, attr); if (!rate) @@ -2626,8 +2627,12 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ bitrate = cfg80211_calculate_bitrate(info); + /* report 16-bit bitrate only if we can */ + bitrate_compat = bitrate < (1UL << 16) ? bitrate : 0; if ((bitrate > 0 && - nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate)) || + nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) || + (bitrate_compat > 0 && + nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) || ((info->flags & RATE_INFO_FLAGS_MCS) && nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) || ((info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) && diff --git a/net/wireless/util.c b/net/wireless/util.c index 0228c64e73d..6e52726f7fe 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -900,7 +900,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return err; } -u16 cfg80211_calculate_bitrate(struct rate_info *rate) +u32 cfg80211_calculate_bitrate(struct rate_info *rate) { int modulation, streams, bitrate; -- cgit v1.2.3-70-g09d2 From 95ddc1fc4519ed48ddc7d622bd5c84dff3eebb0a Mon Sep 17 00:00:00 2001 From: Vladimir Kondratiev Date: Thu, 5 Jul 2012 14:25:50 +0300 Subject: cfg80211: bitrate calculation for 60g 60g band uses different from .11n MCS scheme, so bitrate should be calculated differently Signed-off-by: Vladimir Kondratiev Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/util.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8837efc368f..51f67a9003a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -580,11 +580,13 @@ enum station_info_flags { * @RATE_INFO_FLAGS_MCS: @tx_bitrate_mcs filled * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 Mhz width transmission * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval + * @RATE_INFO_FLAGS_60G: 60gHz MCS */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = 1<<0, RATE_INFO_FLAGS_40_MHZ_WIDTH = 1<<1, RATE_INFO_FLAGS_SHORT_GI = 1<<2, + RATE_INFO_FLAGS_60G = 1<<3, }; /** diff --git a/net/wireless/util.c b/net/wireless/util.c index 6e52726f7fe..e31f1dba79e 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -900,12 +900,61 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return err; } +static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) +{ + static const u32 __mcs2bitrate[] = { + /* control PHY */ + [0] = 275, + /* SC PHY */ + [1] = 3850, + [2] = 7700, + [3] = 9625, + [4] = 11550, + [5] = 12512, /* 1251.25 mbps */ + [6] = 15400, + [7] = 19250, + [8] = 23100, + [9] = 25025, + [10] = 30800, + [11] = 38500, + [12] = 46200, + /* OFDM PHY */ + [13] = 6930, + [14] = 8662, /* 866.25 mbps */ + [15] = 13860, + [16] = 17325, + [17] = 20790, + [18] = 27720, + [19] = 34650, + [20] = 41580, + [21] = 45045, + [22] = 51975, + [23] = 62370, + [24] = 67568, /* 6756.75 mbps */ + /* LP-SC PHY */ + [25] = 6260, + [26] = 8340, + [27] = 11120, + [28] = 12510, + [29] = 16680, + [30] = 22240, + [31] = 25030, + }; + + if (WARN_ON_ONCE(rate->mcs >= ARRAY_SIZE(__mcs2bitrate))) + return 0; + + return __mcs2bitrate[rate->mcs]; +} + u32 cfg80211_calculate_bitrate(struct rate_info *rate) { int modulation, streams, bitrate; if (!(rate->flags & RATE_INFO_FLAGS_MCS)) return rate->legacy; + if (rate->flags & RATE_INFO_FLAGS_60G) + return cfg80211_calculate_bitrate_60g(rate); /* the formula below does only work for MCS values smaller than 32 */ if (WARN_ON_ONCE(rate->mcs >= 32)) -- cgit v1.2.3-70-g09d2 From f4530fa574df4d833506c53697ed1daa0d390bf4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 5 Jul 2012 22:13:13 -0700 Subject: ipv4: Avoid overhead when no custom FIB rules are installed. If the user hasn't actually installed any custom rules, or fiddled with the default ones, don't go through the whole FIB rules layer. It's just pure overhead. Instead do what we do with CONFIG_IP_MULTIPLE_TABLES disabled, check the individual tables by hand, one by one. Also, move fib_num_tclassid_users into the ipv4 network namespace. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 36 ++++++++++++++++++++++++++++++++---- include/net/netns/ipv4.h | 8 ++++++++ net/ipv4/fib_frontend.c | 27 ++++++++++++++++++++++----- net/ipv4/fib_rules.c | 12 ++++++++---- net/ipv4/fib_semantics.c | 6 +++--- 5 files changed, 73 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 3dc7c96bbea..539c6721f81 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -220,11 +220,33 @@ extern void __net_exit fib4_rules_exit(struct net *net); extern u32 fib_rules_tclass(const struct fib_result *res); #endif -extern int fib_lookup(struct net *n, struct flowi4 *flp, struct fib_result *res); - extern struct fib_table *fib_new_table(struct net *net, u32 id); extern struct fib_table *fib_get_table(struct net *net, u32 id); +extern int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res); + +static inline int fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res) +{ + if (!net->ipv4.fib_has_custom_rules) { + if (net->ipv4.fib_local && + !fib_table_lookup(net->ipv4.fib_local, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + if (net->ipv4.fib_main && + !fib_table_lookup(net->ipv4.fib_main, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + if (net->ipv4.fib_default && + !fib_table_lookup(net->ipv4.fib_default, flp, res, + FIB_LOOKUP_NOREF)) + return 0; + return -ENETUNREACH; + } + return __fib_lookup(net, flp, res); +} + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ @@ -236,9 +258,15 @@ extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, struct in_device *idev, u32 *itag); extern void fib_select_default(struct fib_result *res); #ifdef CONFIG_IP_ROUTE_CLASSID -extern int fib_num_tclassid_users; +static inline int fib_num_tclassid_users(struct net *net) +{ + return net->ipv4.fib_num_tclassid_users; +} #else -#define fib_num_tclassid_users 0 +static inline int fib_num_tclassid_users(struct net *net) +{ + return 0; +} #endif /* Exported by fib_semantics.c */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 227f0cd9d3f..599e48fa97c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -11,6 +11,7 @@ struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; struct hlist_head; +struct fib_table; struct sock; struct netns_ipv4 { @@ -24,6 +25,13 @@ struct netns_ipv4 { struct ipv4_devconf *devconf_dflt; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; + bool fib_has_custom_rules; + struct fib_table *fib_local; + struct fib_table *fib_main; + struct fib_table *fib_default; +#endif +#ifdef CONFIG_IP_ROUTE_CLASSID + int fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; struct sock *fibnl; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3e11ea225da..81f85716a89 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -86,6 +86,24 @@ struct fib_table *fib_new_table(struct net *net, u32 id) tb = fib_trie_table(id); if (!tb) return NULL; + + switch (id) { + case RT_TABLE_LOCAL: + net->ipv4.fib_local = tb; + break; + + case RT_TABLE_MAIN: + net->ipv4.fib_main = tb; + break; + + case RT_TABLE_DEFAULT: + net->ipv4.fib_default = tb; + break; + + default: + break; + } + h = id & (FIB_TABLE_HASHSZ - 1); hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); return tb; @@ -218,10 +236,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) return inet_select_addr(dev, ip_hdr(skb)->saddr, scope); } -#ifdef CONFIG_IP_ROUTE_CLASSID -int fib_num_tclassid_users __read_mostly; -#endif - /* Given (packet source, input interface) and optional (dst, oif, tos): * - (main) check, that source is valid i.e. not broadcast or our local * address. @@ -312,7 +326,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (!r && !fib_num_tclassid_users) { + if (!r && !fib_num_tclassid_users(dev_net(dev))) { *itag = 0; return 0; } @@ -1134,6 +1148,9 @@ static int __net_init fib_net_init(struct net *net) { int error; +#ifdef CONFIG_IP_ROUTE_CLASSID + net->ipv4.fib_num_tclassid_users = 0; +#endif error = ip_fib_net_init(net); if (error < 0) goto out; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index b23fd952c84..c06da93b0b7 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -54,7 +54,7 @@ u32 fib_rules_tclass(const struct fib_result *res) } #endif -int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) +int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, @@ -67,7 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) return err; } -EXPORT_SYMBOL_GPL(fib_lookup); +EXPORT_SYMBOL_GPL(__fib_lookup); static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) @@ -172,7 +172,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); if (rule4->tclassid) - fib_num_tclassid_users++; + net->ipv4.fib_num_tclassid_users++; } #endif @@ -182,6 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->dstmask = inet_make_mask(rule4->dst_len); rule4->tos = frh->tos; + net->ipv4.fib_has_custom_rules = true; err = 0; errout: return err; @@ -189,12 +190,14 @@ errout: static void fib4_rule_delete(struct fib_rule *rule) { + struct net *net = rule->fr_net; #ifdef CONFIG_IP_ROUTE_CLASSID struct fib4_rule *rule4 = (struct fib4_rule *) rule; if (rule4->tclassid) - fib_num_tclassid_users--; + net->ipv4.fib_num_tclassid_users--; #endif + net->ipv4.fib_has_custom_rules = true; } static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, @@ -309,6 +312,7 @@ int __net_init fib4_rules_init(struct net *net) if (err < 0) goto fail; net->ipv4.rules_ops = ops; + net->ipv4.fib_has_custom_rules = false; return 0; fail: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c46c20b6b0b..ae301c897a1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -166,7 +166,7 @@ void free_fib_info(struct fib_info *fi) #ifdef CONFIG_IP_ROUTE_CLASSID change_nexthops(fi) { if (nexthop_nh->nh_tclassid) - fib_num_tclassid_users--; + fi->fib_net->ipv4.fib_num_tclassid_users--; } endfor_nexthops(fi); #endif call_rcu(&fi->rcu, free_fib_info_rcu); @@ -428,7 +428,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; if (nexthop_nh->nh_tclassid) - fib_num_tclassid_users++; + fi->fib_net->ipv4.fib_num_tclassid_users++; #endif } @@ -824,7 +824,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) - fib_num_tclassid_users++; + fi->fib_net->ipv4.fib_num_tclassid_users++; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; -- cgit v1.2.3-70-g09d2 From 6bd0405bb4196b44f1acb7a58f11382cdaf6f7f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Jul 2012 15:42:10 +0200 Subject: netfilter: nf_ct_ecache: fix crash with multiple containers, one shutting down Hans reports that he's still hitting: BUG: unable to handle kernel NULL pointer dereference at 000000000000027c IP: [] netlink_has_listeners+0xb/0x60 PGD 0 Oops: 0000 [#3] PREEMPT SMP CPU 0 It happens when adding a number of containers with do: nfct_query(h, NFCT_Q_CREATE, ct); and most likely one namespace shuts down. this problem was supposed to be fixed by: 70e9942 netfilter: nf_conntrack: make event callback registration per-netns Still, it was missing one rcu_access_pointer to check if the callback is set or not. Reported-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_ecache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index a88fb693938..e1ce1048fe5 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -78,7 +78,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *e; - if (net->ct.nf_conntrack_event_cb == NULL) + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) return; e = nf_ct_ecache_find(ct); -- cgit v1.2.3-70-g09d2 From f72b85b8eb6657fae95ac8f5cb20954b4d87a520 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Jul 2012 19:49:54 +0200 Subject: mac80211: remove ieee80211_key_removed This API call was intended to be used by drivers if they want to optimize key handling by removing one key when another is added. Remove it since no driver is using it. If needed, it can always be added back. Signed-off-by: Johannes Berg --- Documentation/DocBook/80211.tmpl | 1 - include/net/mac80211.h | 16 ---------------- net/mac80211/key.c | 20 -------------------- 3 files changed, 37 deletions(-) (limited to 'include/net') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index f3e214f9e25..42e7f030cb1 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -404,7 +404,6 @@ !Finclude/net/mac80211.h ieee80211_get_tkip_p1k !Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv !Finclude/net/mac80211.h ieee80211_get_tkip_p2k -!Finclude/net/mac80211.h ieee80211_key_removed diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e3fa90ce9ec..c5dbb46debb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3591,22 +3591,6 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); void ieee80211_request_smps(struct ieee80211_vif *vif, enum ieee80211_smps_mode smps_mode); -/** - * ieee80211_key_removed - disable hw acceleration for key - * @key_conf: The key hw acceleration should be disabled for - * - * This allows drivers to indicate that the given key has been - * removed from hardware acceleration, due to a new key that - * was added. Don't use this if the key can continue to be used - * for TX, if the key restriction is on RX only it is permitted - * to keep the key for TX only and not call this function. - * - * Due to locking constraints, it may only be called during - * @set_key. This function must be allowed to sleep, and the - * key it tries to disable may still be used until it returns. - */ -void ieee80211_key_removed(struct ieee80211_key_conf *key_conf); - /** * ieee80211_ready_on_channel - notification of remain-on-channel start * @hw: pointer as obtained from ieee80211_alloc_hw() diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b3b7e526e24..7ae678ba5d6 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -194,26 +194,6 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; } -void ieee80211_key_removed(struct ieee80211_key_conf *key_conf) -{ - struct ieee80211_key *key; - - key = container_of(key_conf, struct ieee80211_key, conf); - - might_sleep(); - assert_key_lock(key->local); - - key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - - /* - * Flush TX path to avoid attempts to use this key - * after this function returns. Until then, drivers - * must be prepared to handle the key. - */ - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(ieee80211_key_removed); - static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, bool uni, bool multi) { -- cgit v1.2.3-70-g09d2 From 89a54e48b9cbb44aed1bf6cd712e087b96b6ae65 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jun 2012 14:33:17 +0200 Subject: nl80211: prepare for non-netdev wireless devs In order to support a P2P device abstraction and Bluetooth high-speed AMPs, we need to have a way to identify virtual interfaces that don't have a netdev associated. Do this by adding a NL80211_ATTR_WDEV attribute to identify a wdev which may or may not also be a netdev. To simplify things, use a 64-bit value with the high 32 bits being the wiphy index for this new wdev identifier in the nl80211 API. Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 5 ++ include/net/cfg80211.h | 22 +++++--- net/wireless/core.c | 13 ++--- net/wireless/core.h | 6 +-- net/wireless/nl80211.c | 135 ++++++++++++++++++++++++++++++++++++++---------- net/wireless/sme.c | 4 +- net/wireless/util.c | 6 +-- 7 files changed, 144 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index db961a59247..e791487ead3 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -771,6 +771,9 @@ enum nl80211_commands { * @NL80211_ATTR_IFNAME: network interface name * @NL80211_ATTR_IFTYPE: type of virtual interface, see &enum nl80211_iftype * + * @NL80211_ATTR_WDEV: wireless device identifier, used for pseudo-devices + * that don't have a netdev (u64) + * * @NL80211_ATTR_MAC: MAC address (various uses) * * @NL80211_ATTR_KEY_DATA: (temporal) key data; for TKIP this consists of @@ -1493,6 +1496,8 @@ enum nl80211_attrs { NL80211_ATTR_BG_SCAN_PERIOD, + NL80211_ATTR_WDEV, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 51f67a9003a..a14e6a40668 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2341,17 +2341,25 @@ struct cfg80211_internal_bss; struct cfg80211_cached_keys; /** - * struct wireless_dev - wireless per-netdev state + * struct wireless_dev - wireless device state * - * This structure must be allocated by the driver/stack - * that uses the ieee80211_ptr field in struct net_device - * (this is intentional so it can be allocated along with - * the netdev.) + * For netdevs, this structure must be allocated by the driver + * that uses the ieee80211_ptr field in struct net_device (this + * is intentional so it can be allocated along with the netdev.) + * It need not be registered then as netdev registration will + * be intercepted by cfg80211 to see the new wireless device. + * + * For non-netdev uses, it must also be allocated by the driver + * in response to the cfg80211 callbacks that require it, as + * there's no netdev registration in that case it may not be + * allocated outside of callback operations that return it. * * @wiphy: pointer to hardware description * @iftype: interface type * @list: (private) Used to collect the interfaces - * @netdev: (private) Used to reference back to the netdev + * @netdev: (private) Used to reference back to the netdev, may be %NULL + * @identifier: (private) Identifier used in nl80211 to identify this + * wireless device if it has no netdev * @current_bss: (private) Used by the internal configuration code * @channel: (private) Used by the internal configuration code to track * the user-set AP, monitor and WDS channel @@ -2383,6 +2391,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; + u32 identifier; + struct list_head mgmt_registrations; spinlock_t mgmt_registrations_lock; diff --git a/net/wireless/core.c b/net/wireless/core.c index e42a97b5b97..b110a8a242d 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -176,7 +176,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, if (!(rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK)) return -EOPNOTSUPP; - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; err = dev_change_net_namespace(wdev->netdev, net, "wlan%d"); if (err) @@ -188,7 +188,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, /* failed -- clean up to old netns */ net = wiphy_net(&rdev->wiphy); - list_for_each_entry_continue_reverse(wdev, &rdev->netdev_list, + list_for_each_entry_continue_reverse(wdev, &rdev->wdev_list, list) { wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; err = dev_change_net_namespace(wdev->netdev, net, @@ -226,7 +226,7 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) + list_for_each_entry(wdev, &rdev->wdev_list, list) dev_close(wdev->netdev); mutex_unlock(&rdev->devlist_mtx); @@ -304,7 +304,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) mutex_init(&rdev->mtx); mutex_init(&rdev->devlist_mtx); mutex_init(&rdev->sched_scan_mtx); - INIT_LIST_HEAD(&rdev->netdev_list); + INIT_LIST_HEAD(&rdev->wdev_list); spin_lock_init(&rdev->bss_lock); INIT_LIST_HEAD(&rdev->bss_list); INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done); @@ -622,7 +622,7 @@ void wiphy_unregister(struct wiphy *wiphy) __count == 0; })); mutex_lock(&rdev->devlist_mtx); - BUG_ON(!list_empty(&rdev->netdev_list)); + BUG_ON(!list_empty(&rdev->wdev_list)); mutex_unlock(&rdev->devlist_mtx); /* @@ -821,7 +821,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, spin_lock_init(&wdev->mgmt_registrations_lock); mutex_lock(&rdev->devlist_mtx); - list_add_rcu(&wdev->list, &rdev->netdev_list); + wdev->identifier = ++rdev->wdev_id; + list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; /* can only change netns with wiphy */ dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/net/wireless/core.h b/net/wireless/core.h index 377dc394f48..6b0170a5f05 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -47,11 +47,11 @@ struct cfg80211_registered_device { /* wiphy index, internal only */ int wiphy_idx; - /* associate netdev list */ + /* associated wireless interfaces */ struct mutex devlist_mtx; /* protected by devlist_mtx or RCU */ - struct list_head netdev_list; - int devlist_generation; + struct list_head wdev_list; + int devlist_generation, wdev_id; int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2a5cdb60bc6..35a9b15289f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -46,28 +46,60 @@ static struct genl_family nl80211_fam = { .post_doit = nl80211_post_doit, }; -/* internal helper: get rdev and dev */ -static int get_rdev_dev_by_ifindex(struct net *netns, struct nlattr **attrs, - struct cfg80211_registered_device **rdev, - struct net_device **dev) +/* returns ERR_PTR values */ +static struct wireless_dev * +__cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) { - int ifindex; + struct cfg80211_registered_device *rdev; + struct wireless_dev *result = NULL; + bool have_ifidx = attrs[NL80211_ATTR_IFINDEX]; + bool have_wdev_id = attrs[NL80211_ATTR_WDEV]; + u64 wdev_id; + int wiphy_idx = -1; + int ifidx = -1; - if (!attrs[NL80211_ATTR_IFINDEX]) - return -EINVAL; + assert_cfg80211_lock(); - ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); - *dev = dev_get_by_index(netns, ifindex); - if (!*dev) - return -ENODEV; + if (!have_ifidx && !have_wdev_id) + return ERR_PTR(-EINVAL); - *rdev = cfg80211_get_dev_from_ifindex(netns, ifindex); - if (IS_ERR(*rdev)) { - dev_put(*dev); - return PTR_ERR(*rdev); + if (have_ifidx) + ifidx = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); + if (have_wdev_id) { + wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); + wiphy_idx = wdev_id >> 32; } - return 0; + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + struct wireless_dev *wdev; + + if (wiphy_net(&rdev->wiphy) != netns) + continue; + + if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) + continue; + + mutex_lock(&rdev->devlist_mtx); + list_for_each_entry(wdev, &rdev->wdev_list, list) { + if (have_ifidx && wdev->netdev && + wdev->netdev->ifindex == ifidx) { + result = wdev; + break; + } + if (have_wdev_id && wdev->identifier == (u32)wdev_id) { + result = wdev; + break; + } + } + mutex_unlock(&rdev->devlist_mtx); + + if (result) + break; + } + + if (result) + return result; + return ERR_PTR(-ENODEV); } static struct cfg80211_registered_device * @@ -79,13 +111,40 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) assert_cfg80211_lock(); if (!attrs[NL80211_ATTR_WIPHY] && - !attrs[NL80211_ATTR_IFINDEX]) + !attrs[NL80211_ATTR_IFINDEX] && + !attrs[NL80211_ATTR_WDEV]) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_WIPHY]) rdev = cfg80211_rdev_by_wiphy_idx( nla_get_u32(attrs[NL80211_ATTR_WIPHY])); + if (attrs[NL80211_ATTR_WDEV]) { + u64 wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); + struct wireless_dev *wdev; + bool found = false; + + tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); + if (tmp) { + /* make sure wdev exists */ + mutex_lock(&tmp->devlist_mtx); + list_for_each_entry(wdev, &tmp->wdev_list, list) { + if (wdev->identifier != (u32)wdev_id) + continue; + found = true; + break; + } + mutex_unlock(&tmp->devlist_mtx); + + if (!found) + tmp = NULL; + + if (rdev && tmp != rdev) + return ERR_PTR(-EINVAL); + rdev = tmp; + } + } + if (attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(netns, ifindex); @@ -294,6 +353,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_NOACK_MAP] = { .type = NLA_U16 }, [NL80211_ATTR_INACTIVITY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, + [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, }; /* policy for the key attributes */ @@ -1674,6 +1734,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev) { void *hdr; + u64 wdev_id = (u64)dev->ieee80211_ptr->identifier | + ((u64)rdev->wiphy_idx << 32); hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) @@ -1684,6 +1746,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2))) @@ -1724,7 +1787,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx = 0; mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { if (if_idx < if_start) { if_idx++; continue; @@ -2350,7 +2413,7 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) continue; @@ -6660,8 +6723,8 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; struct net_device *dev; - int err; bool rtnl = ops->internal_flags & NL80211_FLAG_NEED_RTNL; if (rtnl) @@ -6676,21 +6739,39 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, } info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { - err = get_rdev_dev_by_ifindex(genl_info_net(info), info->attrs, - &rdev, &dev); - if (err) { + mutex_lock(&cfg80211_mutex); + wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), + info->attrs); + if (IS_ERR(wdev)) { + mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); - return err; + return PTR_ERR(wdev); } + + if (!wdev->netdev) { + mutex_unlock(&cfg80211_mutex); + if (rtnl) + rtnl_unlock(); + return -EINVAL; + } + + dev = wdev->netdev; + rdev = wiphy_to_dev(wdev->wiphy); + if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !netif_running(dev)) { - cfg80211_unlock_rdev(rdev); - dev_put(dev); + mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; } + + dev_hold(dev); + cfg80211_lock_rdev(rdev); + + mutex_unlock(&cfg80211_mutex); + info->user_ptr[0] = rdev; info->user_ptr[1] = dev; } @@ -8483,7 +8564,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { - list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) + list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) cfg80211_mlme_unregister_socket(wdev, notify->pid); if (rdev->ap_beacons_nlpid == notify->pid) rdev->ap_beacons_nlpid = 0; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f7e937ff897..dec97981e68 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -51,7 +51,7 @@ static bool cfg80211_is_all_idle(void) */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { cfg80211_lock_rdev(rdev); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); if (wdev->sme_state != CFG80211_SME_IDLE) is_all_idle = false; @@ -221,7 +221,7 @@ void cfg80211_conn_work(struct work_struct *work) cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { wdev_lock(wdev); if (!netif_running(wdev->netdev)) { wdev_unlock(wdev); diff --git a/net/wireless/util.c b/net/wireless/util.c index e31f1dba79e..f7a0647bde9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -793,7 +793,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) + list_for_each_entry(wdev, &rdev->wdev_list, list) cfg80211_process_wdev_events(wdev); mutex_unlock(&rdev->devlist_mtx); @@ -994,7 +994,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->netdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->beacon_interval) continue; if (wdev->beacon_interval != beacon_int) { @@ -1050,7 +1050,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, break; } - list_for_each_entry(wdev_iter, &rdev->netdev_list, list) { + list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { if (wdev_iter == wdev) continue; if (!netif_running(wdev_iter->netdev)) -- cgit v1.2.3-70-g09d2 From 71bbc9943883cffaf5d7a7728a4e4c50b3ac44d3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jun 2012 15:30:18 +0200 Subject: cfg80211: use wdev in mgmt-tx/ROC APIs The management frame and remain-on-channel APIs will be needed in the P2P device abstraction, so move them over to the new wdev-based APIs. Userspace can still use both the interface index and wdev identifier for them so it's backward compatible, but for the P2P Device wdev it will be able to use the wdev identifier only. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 22 +++--- drivers/net/wireless/ath/ath6kl/core.h | 5 ++ drivers/net/wireless/ath/ath6kl/wmi.c | 10 +-- include/net/cfg80211.h | 27 ++++--- net/mac80211/cfg.c | 21 +++-- net/mac80211/ieee80211_i.h | 6 ++ net/mac80211/offchannel.c | 6 +- net/mac80211/rx.c | 2 +- net/mac80211/status.c | 9 ++- net/wireless/core.h | 2 +- net/wireless/mlme.c | 34 ++++----- net/wireless/nl80211.c | 119 +++++++++++++++++------------ net/wireless/nl80211.h | 10 +-- 13 files changed, 152 insertions(+), 121 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index aca1d2689e9..5f0c66bb6bd 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2975,14 +2975,14 @@ static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, } static int ath6kl_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { - struct ath6kl *ar = ath6kl_priv(dev); - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); u32 id; /* TODO: if already pending or ongoing remain-on-channel, @@ -2999,11 +2999,11 @@ static int ath6kl_remain_on_channel(struct wiphy *wiphy, } static int ath6kl_cancel_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie) { - struct ath6kl *ar = ath6kl_priv(dev); - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); if (cookie != vif->last_roc_id) return -ENOENT; @@ -3134,15 +3134,15 @@ static bool ath6kl_is_p2p_go_ssid(const u8 *buf, size_t len) return false; } -static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, +static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct ath6kl *ar = ath6kl_priv(dev); - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); u32 id; const struct ieee80211_mgmt *mgmt; bool more_data, queued; @@ -3187,10 +3187,10 @@ static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, } static void ath6kl_mgmt_frame_register(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u16 frame_type, bool reg) { - struct ath6kl_vif *vif = netdev_priv(dev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: frame_type=0x%x reg=%d\n", __func__, frame_type, reg); diff --git a/drivers/net/wireless/ath/ath6kl/core.h b/drivers/net/wireless/ath/ath6kl/core.h index d38a31de344..cec49a31029 100644 --- a/drivers/net/wireless/ath/ath6kl/core.h +++ b/drivers/net/wireless/ath/ath6kl/core.h @@ -589,6 +589,11 @@ struct ath6kl_vif { struct list_head mc_filter; }; +static inline struct ath6kl_vif *ath6kl_vif_from_wdev(struct wireless_dev *wdev) +{ + return container_of(wdev, struct ath6kl_vif, wdev); +} + #define WOW_LIST_ID 0 #define WOW_HOST_REQ_DELAY 500 /* ms */ diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index a6caa673e8a..c30ab4b11d6 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -474,7 +474,7 @@ static int ath6kl_wmi_remain_on_chnl_event_rx(struct wmi *wmi, u8 *datap, return -EINVAL; } id = vif->last_roc_id; - cfg80211_ready_on_channel(vif->ndev, id, chan, NL80211_CHAN_NO_HT, + cfg80211_ready_on_channel(&vif->wdev, id, chan, NL80211_CHAN_NO_HT, dur, GFP_ATOMIC); return 0; @@ -513,7 +513,7 @@ static int ath6kl_wmi_cancel_remain_on_chnl_event_rx(struct wmi *wmi, else id = vif->last_roc_id; /* timeout on uncanceled r-o-c */ vif->last_cancel_roc_id = 0; - cfg80211_remain_on_channel_expired(vif->ndev, id, chan, + cfg80211_remain_on_channel_expired(&vif->wdev, id, chan, NL80211_CHAN_NO_HT, GFP_ATOMIC); return 0; @@ -533,7 +533,7 @@ static int ath6kl_wmi_tx_status_event_rx(struct wmi *wmi, u8 *datap, int len, ath6kl_dbg(ATH6KL_DBG_WMI, "tx_status: id=%x ack_status=%u\n", id, ev->ack_status); if (wmi->last_mgmt_tx_frame) { - cfg80211_mgmt_tx_status(vif->ndev, id, + cfg80211_mgmt_tx_status(&vif->wdev, id, wmi->last_mgmt_tx_frame, wmi->last_mgmt_tx_frame_len, !!ev->ack_status, GFP_ATOMIC); @@ -568,7 +568,7 @@ static int ath6kl_wmi_rx_probe_req_event_rx(struct wmi *wmi, u8 *datap, int len, dlen, freq, vif->probe_req_report); if (vif->probe_req_report || vif->nw_type == AP_NETWORK) - cfg80211_rx_mgmt(vif->ndev, freq, 0, + cfg80211_rx_mgmt(&vif->wdev, freq, 0, ev->data, dlen, GFP_ATOMIC); return 0; @@ -608,7 +608,7 @@ static int ath6kl_wmi_rx_action_event_rx(struct wmi *wmi, u8 *datap, int len, return -EINVAL; } ath6kl_dbg(ATH6KL_DBG_WMI, "rx_action: len=%u freq=%u\n", dlen, freq); - cfg80211_rx_mgmt(vif->ndev, freq, 0, + cfg80211_rx_mgmt(&vif->wdev, freq, 0, ev->data, dlen, GFP_ATOMIC); return 0; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a14e6a40668..7eaaee7bb07 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1753,23 +1753,23 @@ struct cfg80211_ops { int (*flush_pmksa)(struct wiphy *wiphy, struct net_device *netdev); int (*remain_on_channel)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie); int (*cancel_remain_on_channel)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie); - int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie); int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie); int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, @@ -1780,7 +1780,7 @@ struct cfg80211_ops { s32 rssi_thold, u32 rssi_hyst); void (*mgmt_frame_register)(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u16 frame_type, bool reg); int (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant); @@ -3279,7 +3279,7 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, /** * cfg80211_ready_on_channel - notification of remain_on_channel start - * @dev: network device + * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) * @channel_type: Channel type @@ -3287,21 +3287,20 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, * channel * @gfp: allocation flags */ -void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, +void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); /** * cfg80211_remain_on_channel_expired - remain_on_channel duration expired - * @dev: network device + * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) * @channel_type: Channel type * @gfp: allocation flags */ -void cfg80211_remain_on_channel_expired(struct net_device *dev, - u64 cookie, +void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp); @@ -3329,7 +3328,7 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp); /** * cfg80211_rx_mgmt - notification of received, unprocessed management frame - * @dev: network device + * @wdev: wireless device receiving the frame * @freq: Frequency on which the frame was received in MHz * @sig_dbm: signal strength in mBm, or 0 if unknown * @buf: Management frame (header + body) @@ -3344,12 +3343,12 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp); * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_dbm, +bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp); /** * cfg80211_mgmt_tx_status - notification of TX status for management frame - * @dev: network device + * @wdev: wireless device receiving the frame * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() * @buf: Management frame (header + body) * @len: length of the frame data @@ -3360,7 +3359,7 @@ bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_dbm, * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, +void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 6a171e299b5..7d9abea37b1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2299,13 +2299,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, } static int ieee80211_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; int ret; @@ -2392,23 +2392,23 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; return ieee80211_cancel_roc(local, cookie, false); } -static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct sta_info *sta; @@ -2513,21 +2513,20 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u64 cookie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; + struct ieee80211_local *local = wiphy_priv(wiphy); return ieee80211_cancel_roc(local, cookie, true); } static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, u16 frame_type, bool reg) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); switch (frame_type) { case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e0423f8c0ce..8f8535ee599 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1091,6 +1091,12 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev) return netdev_priv(dev); } +static inline struct ieee80211_sub_if_data * +IEEE80211_WDEV_TO_SUB_IF(struct wireless_dev *wdev) +{ + return container_of(wdev, struct ieee80211_sub_if_data, wdev); +} + /* this struct represents 802.11n's RA/TID combination */ struct ieee80211_ra_tid { u8 ra[ETH_ALEN]; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index b0fb6a2b89a..8c047fc8b32 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -191,7 +191,7 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) roc->frame = NULL; } } else { - cfg80211_ready_on_channel(roc->sdata->dev, (unsigned long)roc, + cfg80211_ready_on_channel(&roc->sdata->wdev, (unsigned long)roc, roc->chan, roc->chan_type, roc->req_duration, GFP_KERNEL); } @@ -299,7 +299,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) /* was never transmitted */ if (roc->frame) { - cfg80211_mgmt_tx_status(roc->sdata->dev, + cfg80211_mgmt_tx_status(&roc->sdata->wdev, (unsigned long)roc->frame, roc->frame->data, roc->frame->len, false, GFP_KERNEL); @@ -307,7 +307,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) } if (!roc->mgmt_tx_cookie) - cfg80211_remain_on_channel_expired(roc->sdata->dev, + cfg80211_remain_on_channel_expired(&roc->sdata->wdev, (unsigned long)roc, roc->chan, roc->chan_type, GFP_KERNEL); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ab5185054e6..f8cf9e7477a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2404,7 +2404,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) if (rx->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) sig = status->signal; - if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq, sig, + if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig, rx->skb->data, rx->skb->len, GFP_ATOMIC)) { if (rx->sta) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 2ed2f27fe8a..8cd72914cda 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -519,14 +519,19 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) u64 cookie = (unsigned long)skb; acked = info->flags & IEEE80211_TX_STAT_ACK; + /* + * TODO: When we have non-netdev frame TX, + * we cannot use skb->dev->ieee80211_ptr + */ + if (ieee80211_is_nullfunc(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control)) cfg80211_probe_status(skb->dev, hdr->addr1, cookie, acked, GFP_ATOMIC); else cfg80211_mgmt_tx_status( - skb->dev, cookie, skb->data, skb->len, - acked, GFP_ATOMIC); + skb->dev->ieee80211_ptr, cookie, skb->data, + skb->len, acked, GFP_ATOMIC); } if (unlikely(info->ack_frame_id)) { diff --git a/net/wireless/core.h b/net/wireless/core.h index 6b0170a5f05..eae5a25a169 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -372,7 +372,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d4fece3bb18..abe9f82d5a8 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -567,29 +567,28 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, } } -void cfg80211_ready_on_channel(struct net_device *dev, u64 cookie, +void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - nl80211_send_remain_on_channel(rdev, dev, cookie, chan, channel_type, + nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, channel_type, duration, gfp); } EXPORT_SYMBOL(cfg80211_ready_on_channel); -void cfg80211_remain_on_channel_expired(struct net_device *dev, - u64 cookie, +void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - nl80211_send_remain_on_channel_cancel(rdev, dev, cookie, chan, + nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, channel_type, gfp); } EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); @@ -678,8 +677,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, list_add(&nreg->list, &wdev->mgmt_registrations); if (rdev->ops->mgmt_frame_register) - rdev->ops->mgmt_frame_register(wiphy, wdev->netdev, - frame_type, true); + rdev->ops->mgmt_frame_register(wiphy, wdev, frame_type, true); out: spin_unlock_bh(&wdev->mgmt_registrations_lock); @@ -702,7 +700,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) if (rdev->ops->mgmt_frame_register) { u16 frame_type = le16_to_cpu(reg->frame_type); - rdev->ops->mgmt_frame_register(wiphy, wdev->netdev, + rdev->ops->mgmt_frame_register(wiphy, wdev, frame_type, false); } @@ -731,14 +729,14 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) } int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, bool no_cck, bool dont_wait_for_ack, u64 *cookie) { - struct wireless_dev *wdev = dev->ieee80211_ptr; + struct net_device *dev = wdev->netdev; const struct ieee80211_mgmt *mgmt; u16 stype; @@ -825,16 +823,15 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan, + return rdev->ops->mgmt_tx(&rdev->wiphy, wdev, chan, offchan, channel_type, channel_type_valid, wait, buf, len, no_cck, dont_wait_for_ack, cookie); } -bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_mbm, +bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, const u8 *buf, size_t len, gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct cfg80211_mgmt_registration *reg; @@ -871,7 +868,7 @@ bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_mbm, /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_mgmt(rdev, dev, reg->nlpid, + if (nl80211_send_mgmt(rdev, wdev, reg->nlpid, freq, sig_mbm, buf, len, gfp)) continue; @@ -886,15 +883,14 @@ bool cfg80211_rx_mgmt(struct net_device *dev, int freq, int sig_mbm, } EXPORT_SYMBOL(cfg80211_rx_mgmt); -void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, +void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { - struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); /* Indicate TX status of the Action frame to user space */ - nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp); + nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp); } EXPORT_SYMBOL(cfg80211_mgmt_tx_status); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5800c49d694..0dc3356eea4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1728,6 +1728,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) return result; } +static inline u64 wdev_id(struct wireless_dev *wdev) +{ + return (u64)wdev->identifier | + ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); +} static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -1735,8 +1740,6 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, { struct net_device *dev = wdev->netdev; void *hdr; - u64 wdev_id = (u64)wdev->identifier | - ((u64)rdev->wiphy_idx << 32); hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) @@ -1750,7 +1753,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || - nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2))) @@ -5752,7 +5755,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct ieee80211_channel *chan; struct sk_buff *msg; void *hdr; @@ -5800,7 +5803,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, goto free_msg; } - err = rdev->ops->remain_on_channel(&rdev->wiphy, dev, chan, + err = rdev->ops->remain_on_channel(&rdev->wiphy, wdev, chan, channel_type, duration, &cookie); if (err) @@ -5824,7 +5827,7 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) @@ -5835,7 +5838,7 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); - return rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie); + return rdev->ops->cancel_remain_on_channel(&rdev->wiphy, wdev, cookie); } static u32 rateset_to_mask(struct ieee80211_supported_band *sband, @@ -5984,7 +5987,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION; if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) @@ -5993,21 +5996,24 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_FRAME_TYPE]) frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]); - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + break; + default: return -EOPNOTSUPP; + } /* not much point in registering if we can't reply */ if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - return cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid, - frame_type, + return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); } @@ -6015,7 +6021,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct ieee80211_channel *chan; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; bool channel_type_valid = false; @@ -6036,14 +6042,18 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_GO: + break; + default: return -EOPNOTSUPP; + } if (info->attrs[NL80211_ATTR_DURATION]) { if (!(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) @@ -6092,7 +6102,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type, + err = cfg80211_mlme_mgmt_tx(rdev, wdev, chan, offchan, channel_type, channel_type_valid, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), @@ -6120,7 +6130,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) @@ -6129,17 +6139,21 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in if (!rdev->ops->mgmt_tx_cancel_wait) return -EOPNOTSUPP; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_GO: + break; + default: return -EOPNOTSUPP; + } cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); - return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie); + return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, wdev, cookie); } static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) @@ -7172,7 +7186,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_remain_on_channel, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7180,7 +7194,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_cancel_remain_on_channel, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7196,7 +7210,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_register_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { @@ -7204,7 +7218,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_tx_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7212,7 +7226,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_tx_mgmt_cancel_wait, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -8040,7 +8054,7 @@ nla_put_failure: static void nl80211_send_remain_on_chan_event( int cmd, struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) @@ -8059,7 +8073,9 @@ static void nl80211_send_remain_on_chan_event( } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u32(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type) || nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) @@ -8081,23 +8097,24 @@ static void nl80211_send_remain_on_chan_event( } void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, - rdev, netdev, cookie, chan, + rdev, wdev, cookie, chan, channel_type, duration, gfp); } void nl80211_send_remain_on_channel_cancel( - struct cfg80211_registered_device *rdev, struct net_device *netdev, + struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, - rdev, netdev, cookie, chan, + rdev, wdev, cookie, chan, channel_type, 0, gfp); } @@ -8211,10 +8228,11 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev, } int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlpid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp) { + struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; @@ -8229,7 +8247,8 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + netdev->ifindex)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || @@ -8247,10 +8266,11 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, } void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { + struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; @@ -8265,7 +8285,8 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + netdev->ifindex)) || nla_put(msg, NL80211_ATTR_FRAME, len, buf) || nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie) || (ack && nla_put_flag(msg, NL80211_ATTR_ACK))) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 01a1122c3b3..0469303b5c3 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -74,13 +74,13 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, gfp_t gfp); void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - u64 cookie, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); void nl80211_send_remain_on_channel_cancel( - struct cfg80211_registered_device *rdev, struct net_device *netdev, + struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, gfp_t gfp); @@ -92,11 +92,11 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, gfp_t gfp); int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlpid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, + struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp); -- cgit v1.2.3-70-g09d2 From a9a741a7e2e6337ae5c030e78827c233c08902a7 Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 30 Apr 2012 18:21:51 +0200 Subject: NFC: Prepare asynchronous error management for driver and shdlc Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- include/net/nfc/hci.h | 2 ++ net/nfc/hci/core.c | 8 ++++++++ net/nfc/hci/shdlc.c | 19 +++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index e30e6a86971..d25dd9b9987 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -112,6 +112,8 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev); void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata); void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev); +void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err); + /* Host IDs */ #define NFC_HCI_HOST_CONTROLLER_ID 0x00 #define NFC_HCI_TERMINAL_HOST_ID 0x01 diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index a8b0b71e8f8..1dc6485343b 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -717,6 +717,14 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev) } EXPORT_SYMBOL(nfc_hci_get_clientdata); +void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err) +{ + /* TODO: lower layer has permanent failure. + * complete potential HCI command or send an empty tag discovered event + */ +} +EXPORT_SYMBOL(nfc_hci_driver_failure); + void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb) { struct hcp_packet *packet; diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c index 6b836e6242b..d7c74d152a7 100644 --- a/net/nfc/hci/shdlc.c +++ b/net/nfc/hci/shdlc.c @@ -523,10 +523,6 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc) r = shdlc->ops->xmit(shdlc, skb); if (r < 0) { - /* - * TODO: Cannot send, shdlc machine is dead, we - * must propagate the information up to HCI. - */ shdlc->hard_fault = r; break; } @@ -590,6 +586,11 @@ static void nfc_shdlc_sm_work(struct work_struct *work) skb_queue_purge(&shdlc->ack_pending_q); break; case SHDLC_CONNECTING: + if (shdlc->hard_fault) { + nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + break; + } + if (shdlc->connect_tries++ < 5) r = nfc_shdlc_connect_initiate(shdlc); else @@ -610,6 +611,11 @@ static void nfc_shdlc_sm_work(struct work_struct *work) } nfc_shdlc_handle_rcv_queue(shdlc); + + if (shdlc->hard_fault) { + nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault); + break; + } break; case SHDLC_CONNECTED: nfc_shdlc_handle_rcv_queue(shdlc); @@ -637,10 +643,7 @@ static void nfc_shdlc_sm_work(struct work_struct *work) } if (shdlc->hard_fault) { - /* - * TODO: Handle hard_fault that occured during - * this invocation of the shdlc worker - */ + nfc_hci_driver_failure(shdlc->hdev, shdlc->hard_fault); } break; default: -- cgit v1.2.3-70-g09d2 From 456411ca812860d7ba06d3e4013ce1d8b9dbc7cd Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Mon, 11 Jun 2012 13:49:51 +0200 Subject: NFC: Driver failure API This API should be used by drivers, HCI, SHDLC or NCI stacks to report an unrecoverable error. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 2 ++ net/nfc/core.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 180964b954a..6431f5e3902 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -204,4 +204,6 @@ int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, int nfc_tm_deactivated(struct nfc_dev *dev); int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); +void nfc_driver_failure(struct nfc_dev *dev, int err); + #endif /* __NET_NFC_H */ diff --git a/net/nfc/core.c b/net/nfc/core.c index 4177bb5104b..32f28326b62 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -651,6 +651,16 @@ int nfc_target_lost(struct nfc_dev *dev, u32 target_idx) } EXPORT_SYMBOL(nfc_target_lost); +void nfc_driver_failure(struct nfc_dev *dev, int err) +{ + /* + * TODO: if polling is active, send empty target_found + * or else do whatever makes sense to let user space + * know this device needs to be closed and reinitialized. + */ +} +EXPORT_SYMBOL(nfc_driver_failure); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); -- cgit v1.2.3-70-g09d2 From a10d595b1074d04446f77161eea165e5809e163c Mon Sep 17 00:00:00 2001 From: Eric Lapuyade Date: Tue, 5 Jun 2012 14:42:11 +0200 Subject: NFC: Allow HCI driver to pre-open pipes to some gates Some NFC chips will statically create and open pipes for both standard and proprietary gates. The driver can now pass this information to HCI such that HCI will not attempt to create and open them, but will instead directly use the passed pipe ids. Signed-off-by: Eric Lapuyade Signed-off-by: Samuel Ortiz --- drivers/nfc/pn544_hci.c | 31 ++++++++++++++++++------------- include/net/nfc/hci.h | 17 ++++++++++++++--- net/nfc/hci/command.c | 8 ++++++-- net/nfc/hci/core.c | 23 +++++++++-------------- net/nfc/hci/hci.h | 5 ----- 5 files changed, 47 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/drivers/nfc/pn544_hci.c b/drivers/nfc/pn544_hci.c index 69df6fecb84..c67b55e224e 100644 --- a/drivers/nfc/pn544_hci.c +++ b/drivers/nfc/pn544_hci.c @@ -108,16 +108,22 @@ enum pn544_state { #define PN544_NFC_WI_MGMT_GATE 0xA1 -static u8 pn544_custom_gates[] = { - PN544_SYS_MGMT_GATE, - PN544_SWP_MGMT_GATE, - PN544_POLLING_LOOP_MGMT_GATE, - PN544_NFC_WI_MGMT_GATE, - PN544_RF_READER_F_GATE, - PN544_RF_READER_JEWEL_GATE, - PN544_RF_READER_ISO15693_GATE, - PN544_RF_READER_NFCIP1_INITIATOR_GATE, - PN544_RF_READER_NFCIP1_TARGET_GATE +static struct nfc_hci_gate pn544_gates[] = { + {NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_LINK_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_B_GATE, NFC_HCI_INVALID_PIPE}, + {NFC_HCI_RF_READER_A_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_SYS_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_SWP_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_POLLING_LOOP_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_NFC_WI_MGMT_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_F_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_JEWEL_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_ISO15693_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_NFCIP1_INITIATOR_GATE, NFC_HCI_INVALID_PIPE}, + {PN544_RF_READER_NFCIP1_TARGET_GATE, NFC_HCI_INVALID_PIPE} }; /* Largest headroom needed for outgoing custom commands */ @@ -849,10 +855,9 @@ static int __devinit pn544_hci_probe(struct i2c_client *client, goto err_rti; } - init_data.gate_count = ARRAY_SIZE(pn544_custom_gates); + init_data.gate_count = ARRAY_SIZE(pn544_gates); - memcpy(init_data.gates, pn544_custom_gates, - ARRAY_SIZE(pn544_custom_gates)); + memcpy(init_data.gates, pn544_gates, sizeof(pn544_gates)); /* * TODO: Session id must include the driver name + some bus addr diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h index d25dd9b9987..f5169b04f08 100644 --- a/include/net/nfc/hci.h +++ b/include/net/nfc/hci.h @@ -44,10 +44,20 @@ struct nfc_hci_ops { struct nfc_target *target); }; -#define NFC_HCI_MAX_CUSTOM_GATES 15 +/* Pipes */ +#define NFC_HCI_INVALID_PIPE 0x80 +#define NFC_HCI_LINK_MGMT_PIPE 0x00 +#define NFC_HCI_ADMIN_PIPE 0x01 + +struct nfc_hci_gate { + u8 gate; + u8 pipe; +}; + +#define NFC_HCI_MAX_CUSTOM_GATES 50 struct nfc_hci_init_data { u8 gate_count; - u8 gates[NFC_HCI_MAX_CUSTOM_GATES]; + struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES]; char session_id[9]; }; @@ -182,7 +192,8 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb); /* connecting to gates and sending hci instructions */ -int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate); +int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, + u8 pipe); int nfc_hci_disconnect_gate(struct nfc_hci_dev *hdev, u8 gate); int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev); int nfc_hci_get_param(struct nfc_hci_dev *hdev, u8 gate, u8 idx, diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c index 12cd6f3f77e..46362ef979d 100644 --- a/net/nfc/hci/command.c +++ b/net/nfc/hci/command.c @@ -299,9 +299,9 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev) } EXPORT_SYMBOL(nfc_hci_disconnect_all_gates); -int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate) +int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate, + u8 pipe) { - u8 pipe = NFC_HCI_INVALID_PIPE; bool pipe_created = false; int r; @@ -310,6 +310,9 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate) if (hdev->gate2pipe[dest_gate] != NFC_HCI_INVALID_PIPE) return -EADDRINUSE; + if (pipe != NFC_HCI_INVALID_PIPE) + goto pipe_is_open; + switch (dest_gate) { case NFC_HCI_LINK_MGMT_GATE: pipe = NFC_HCI_LINK_MGMT_PIPE; @@ -335,6 +338,7 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate) return r; } +pipe_is_open: hdev->gate2pipe[dest_gate] = pipe; return 0; diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index e6b2df3981b..4ccc518f56e 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -315,15 +315,15 @@ static void nfc_hci_cmd_timeout(unsigned long data) } static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count, - u8 gates[]) + struct nfc_hci_gate *gates) { int r; - u8 *p = gates; while (gate_count--) { - r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID, *p); + r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID, + gates->gate, gates->pipe); if (r < 0) return r; - p++; + gates++; } return 0; @@ -333,14 +333,13 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) { struct sk_buff *skb = NULL; int r; - u8 hci_gates[] = { /* NFC_HCI_ADMIN_GATE MUST be first */ - NFC_HCI_ADMIN_GATE, NFC_HCI_LOOPBACK_GATE, - NFC_HCI_ID_MGMT_GATE, NFC_HCI_LINK_MGMT_GATE, - NFC_HCI_RF_READER_B_GATE, NFC_HCI_RF_READER_A_GATE - }; + + if (hdev->init_data.gates[0].gate != NFC_HCI_ADMIN_GATE) + return -EPROTO; r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID, - NFC_HCI_ADMIN_GATE); + hdev->init_data.gates[0].gate, + hdev->init_data.gates[0].pipe); if (r < 0) goto exit; @@ -368,10 +367,6 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev) if (r < 0) goto exit; - r = hci_dev_connect_gates(hdev, sizeof(hci_gates), hci_gates); - if (r < 0) - goto disconnect_all; - r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count, hdev->init_data.gates); if (r < 0) diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h index d3cde075ba6..fa9a21e9223 100644 --- a/net/nfc/hci/hci.h +++ b/net/nfc/hci/hci.h @@ -132,9 +132,4 @@ void nfc_hci_hcp_message_rx(struct nfc_hci_dev *hdev, u8 pipe, u8 type, #define NFC_HCI_ANY_E_REG_ACCESS_DENIED 0x0a #define NFC_HCI_ANY_E_PIPE_ACCESS_DENIED 0x0b -/* Pipes */ -#define NFC_HCI_INVALID_PIPE 0x80 -#define NFC_HCI_LINK_MGMT_PIPE 0x00 -#define NFC_HCI_ADMIN_PIPE 0x01 - #endif /* __LOCAL_HCI_H */ -- cgit v1.2.3-70-g09d2 From e044a651b9b7b1b33d8b7fdb2bb27e443f392083 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 18:05:28 -0700 Subject: ipv4: Fix crashes in fib_rules_tclass(). All paths assume, when CONFIG_IP_MULTIPLE_TABLES is enabled, that any successful call to fib_lookup() will initialize the fib_result->r value to something. We violated that expectation in the new fib_lookup() fast path. Reported-by: Or Gerlitz Tested-by: Eric Dumazet Tested-by: Greg Rose Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 539c6721f81..000c4674e18 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -230,6 +230,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { if (!net->ipv4.fib_has_custom_rules) { + res->r = NULL; if (net->ipv4.fib_local && !fib_table_lookup(net->ipv4.fib_local, flp, res, FIB_LOOKUP_NOREF)) -- cgit v1.2.3-70-g09d2 From 4aabd8ef8c43677cfee3e1e36c5a79edddb41942 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 Jul 2012 16:07:30 -0700 Subject: tcp: Move dynamnic metrics handling into seperate file. Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++ net/ipv4/Makefile | 2 +- net/ipv4/tcp_input.c | 188 +----------------------------------------------- net/ipv4/tcp_metrics.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 199 insertions(+), 187 deletions(-) create mode 100644 net/ipv4/tcp_metrics.c (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 53fb7d81417..98ca797001a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -388,6 +388,8 @@ extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); +extern void tcp_init_metrics(struct sock *sk); +extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); extern void tcp_init_sock(struct sock *sk); extern unsigned int tcp_poll(struct file * file, struct socket *sock, @@ -556,6 +558,8 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return (tp->srtt >> 3) + tp->rttvar; } +extern void tcp_set_rto(struct sock *sk); + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ff75d3bbcd6..5a23e8b3710 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ - tcp_minisocks.o tcp_cong.o \ + tcp_minisocks.o tcp_cong.o tcp_metrics.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ca0d0e7c977..055ac49b8b4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,7 +93,6 @@ int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_frto_response __read_mostly; -int sysctl_tcp_nometrics_save __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; @@ -701,7 +700,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static inline void tcp_set_rto(struct sock *sk) +void tcp_set_rto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) @@ -728,109 +727,6 @@ static inline void tcp_set_rto(struct sock *sk) tcp_bound_rto(sk); } -/* Save metrics learned by this TCP session. - This function is called only, when TCP finishes successfully - i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE. - */ -void tcp_update_metrics(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); - - if (sysctl_tcp_nometrics_save) - return; - - if (dst && (dst->flags & DST_HOST)) { - const struct inet_connection_sock *icsk = inet_csk(sk); - int m; - unsigned long rtt; - - dst_confirm(dst); - - if (icsk->icsk_backoff || !tp->srtt) { - /* This session failed to estimate rtt. Why? - * Probably, no packets returned in time. - * Reset our results. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) - dst_metric_set(dst, RTAX_RTT, 0); - return; - } - - rtt = dst_metric_rtt(dst, RTAX_RTT); - m = rtt - tp->srtt; - - /* If newly calculated rtt larger than stored one, - * store new one. Otherwise, use EWMA. Remember, - * rtt overestimation is always better than underestimation. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) { - if (m <= 0) - set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); - else - set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); - } - - if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { - unsigned long var; - if (m < 0) - m = -m; - - /* Scale deviation to rttvar fixed point */ - m >>= 1; - if (m < tp->mdev) - m = tp->mdev; - - var = dst_metric_rtt(dst, RTAX_RTTVAR); - if (m >= var) - var = m; - else - var -= (var - m) >> 2; - - set_dst_metric_rtt(dst, RTAX_RTTVAR, var); - } - - if (tcp_in_initial_slowstart(tp)) { - /* Slow start still did not finish. */ - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); - if (!dst_metric_locked(dst, RTAX_CWND) && - tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); - } else if (tp->snd_cwnd > tp->snd_ssthresh && - icsk->icsk_ca_state == TCP_CA_Open) { - /* Cong. avoidance phase, cwnd is reliable. */ - if (!dst_metric_locked(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, - max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_cwnd) >> 1); - } else { - /* Else slow start did not finish, cwnd is non-sense, - ssthresh may be also invalid. - */ - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_ssthresh) >> 1); - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); - } - - if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && - tp->reordering != sysctl_tcp_reordering) - dst_metric_set(dst, RTAX_REORDERING, tp->reordering); - } - } -} - __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); @@ -867,7 +763,7 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) * Packet counting of FACK is based on in-order assumptions, therefore TCP * disables it when reordering is detected */ -static void tcp_disable_fack(struct tcp_sock *tp) +void tcp_disable_fack(struct tcp_sock *tp) { /* RFC3517 uses different metric in lost marker => reset on change */ if (tcp_is_fack(tp)) @@ -881,86 +777,6 @@ static void tcp_dsack_seen(struct tcp_sock *tp) tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; } -/* Initialize metrics on socket. */ - -static void tcp_init_metrics(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); - - if (dst == NULL) - goto reset; - - dst_confirm(dst); - - if (dst_metric_locked(dst, RTAX_CWND)) - tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); - if (dst_metric(dst, RTAX_SSTHRESH)) { - tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); - if (tp->snd_ssthresh > tp->snd_cwnd_clamp) - tp->snd_ssthresh = tp->snd_cwnd_clamp; - } else { - /* ssthresh may have been reduced unnecessarily during. - * 3WHS. Restore it back to its initial default. - */ - tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; - } - if (dst_metric(dst, RTAX_REORDERING) && - tp->reordering != dst_metric(dst, RTAX_REORDERING)) { - tcp_disable_fack(tp); - tcp_disable_early_retrans(tp); - tp->reordering = dst_metric(dst, RTAX_REORDERING); - } - - if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) - goto reset; - - /* Initial rtt is determined from SYN,SYN-ACK. - * The segment is small and rtt may appear much - * less than real one. Use per-dst memory - * to make it more realistic. - * - * A bit of theory. RTT is time passed after "normal" sized packet - * is sent until it is ACKed. In normal circumstances sending small - * packets force peer to delay ACKs and calculation is correct too. - * The algorithm is adaptive and, provided we follow specs, it - * NEVER underestimate RTT. BUT! If peer tries to make some clever - * tricks sort of "quick acks" for time long enough to decrease RTT - * to low value, and then abruptly stops to do it and starts to delay - * ACKs, wait for troubles. - */ - if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { - tp->srtt = dst_metric_rtt(dst, RTAX_RTT); - tp->rtt_seq = tp->snd_nxt; - } - if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { - tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); - tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); - } - tcp_set_rto(sk); -reset: - if (tp->srtt == 0) { - /* RFC6298: 5.7 We've failed to get a valid RTT sample from - * 3WHS. This is most likely due to retransmission, - * including spurious one. Reset the RTO back to 3secs - * from the more aggressive 1sec to avoid more spurious - * retransmission. - */ - tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; - } - /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been - * retransmitted. In light of RFC6298 more aggressive 1sec - * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK - * retransmission has occurred. - */ - if (tp->total_retrans > 1) - tp->snd_cwnd = 1; - else - tp->snd_cwnd = tcp_init_cwnd(tp, dst); - tp->snd_cwnd_stamp = tcp_time_stamp; -} - static void tcp_update_reordering(struct sock *sk, const int metric, const int ts) { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c new file mode 100644 index 00000000000..2793ecf928d --- /dev/null +++ b/net/ipv4/tcp_metrics.c @@ -0,0 +1,192 @@ +#include +#include + +#include +#include +#include +#include + +int sysctl_tcp_nometrics_save __read_mostly; + +/* Save metrics learned by this TCP session. This function is called + * only, when TCP finishes successfully i.e. when it enters TIME-WAIT + * or goes from LAST-ACK to CLOSE. + */ +void tcp_update_metrics(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); + + if (sysctl_tcp_nometrics_save) + return; + + if (dst && (dst->flags & DST_HOST)) { + const struct inet_connection_sock *icsk = inet_csk(sk); + int m; + unsigned long rtt; + + dst_confirm(dst); + + if (icsk->icsk_backoff || !tp->srtt) { + /* This session failed to estimate rtt. Why? + * Probably, no packets returned in time. + * Reset our results. + */ + if (!(dst_metric_locked(dst, RTAX_RTT))) + dst_metric_set(dst, RTAX_RTT, 0); + return; + } + + rtt = dst_metric_rtt(dst, RTAX_RTT); + m = rtt - tp->srtt; + + /* If newly calculated rtt larger than stored one, + * store new one. Otherwise, use EWMA. Remember, + * rtt overestimation is always better than underestimation. + */ + if (!(dst_metric_locked(dst, RTAX_RTT))) { + if (m <= 0) + set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); + else + set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); + } + + if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { + unsigned long var; + if (m < 0) + m = -m; + + /* Scale deviation to rttvar fixed point */ + m >>= 1; + if (m < tp->mdev) + m = tp->mdev; + + var = dst_metric_rtt(dst, RTAX_RTTVAR); + if (m >= var) + var = m; + else + var -= (var - m) >> 2; + + set_dst_metric_rtt(dst, RTAX_RTTVAR, var); + } + + if (tcp_in_initial_slowstart(tp)) { + /* Slow start still did not finish. */ + if (dst_metric(dst, RTAX_SSTHRESH) && + !dst_metric_locked(dst, RTAX_SSTHRESH) && + (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); + if (!dst_metric_locked(dst, RTAX_CWND) && + tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) + dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); + } else if (tp->snd_cwnd > tp->snd_ssthresh && + icsk->icsk_ca_state == TCP_CA_Open) { + /* Cong. avoidance phase, cwnd is reliable. */ + if (!dst_metric_locked(dst, RTAX_SSTHRESH)) + dst_metric_set(dst, RTAX_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); + if (!dst_metric_locked(dst, RTAX_CWND)) + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_cwnd) >> 1); + } else { + /* Else slow start did not finish, cwnd is non-sense, + ssthresh may be also invalid. + */ + if (!dst_metric_locked(dst, RTAX_CWND)) + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_ssthresh) >> 1); + if (dst_metric(dst, RTAX_SSTHRESH) && + !dst_metric_locked(dst, RTAX_SSTHRESH) && + tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); + } + + if (!dst_metric_locked(dst, RTAX_REORDERING)) { + if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && + tp->reordering != sysctl_tcp_reordering) + dst_metric_set(dst, RTAX_REORDERING, tp->reordering); + } + } +} + +/* Initialize metrics on socket. */ + +void tcp_init_metrics(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); + + if (dst == NULL) + goto reset; + + dst_confirm(dst); + + if (dst_metric_locked(dst, RTAX_CWND)) + tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); + if (dst_metric(dst, RTAX_SSTHRESH)) { + tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); + if (tp->snd_ssthresh > tp->snd_cwnd_clamp) + tp->snd_ssthresh = tp->snd_cwnd_clamp; + } else { + /* ssthresh may have been reduced unnecessarily during. + * 3WHS. Restore it back to its initial default. + */ + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + } + if (dst_metric(dst, RTAX_REORDERING) && + tp->reordering != dst_metric(dst, RTAX_REORDERING)) { + tcp_disable_fack(tp); + tcp_disable_early_retrans(tp); + tp->reordering = dst_metric(dst, RTAX_REORDERING); + } + + if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) + goto reset; + + /* Initial rtt is determined from SYN,SYN-ACK. + * The segment is small and rtt may appear much + * less than real one. Use per-dst memory + * to make it more realistic. + * + * A bit of theory. RTT is time passed after "normal" sized packet + * is sent until it is ACKed. In normal circumstances sending small + * packets force peer to delay ACKs and calculation is correct too. + * The algorithm is adaptive and, provided we follow specs, it + * NEVER underestimate RTT. BUT! If peer tries to make some clever + * tricks sort of "quick acks" for time long enough to decrease RTT + * to low value, and then abruptly stops to do it and starts to delay + * ACKs, wait for troubles. + */ + if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { + tp->srtt = dst_metric_rtt(dst, RTAX_RTT); + tp->rtt_seq = tp->snd_nxt; + } + if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { + tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); + tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); + } + tcp_set_rto(sk); +reset: + if (tp->srtt == 0) { + /* RFC6298: 5.7 We've failed to get a valid RTT sample from + * 3WHS. This is most likely due to retransmission, + * including spurious one. Reset the RTO back to 3secs + * from the more aggressive 1sec to avoid more spurious + * retransmission. + */ + tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; + } + /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been + * retransmitted. In light of RFC6298 more aggressive 1sec + * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK + * retransmission has occurred. + */ + if (tp->total_retrans > 1) + tp->snd_cwnd = 1; + else + tp->snd_cwnd = tcp_init_cwnd(tp, dst); + tp->snd_cwnd_stamp = tcp_time_stamp; +} -- cgit v1.2.3-70-g09d2 From ab92bb2f679d66c7e12a6b1c0cdd76fe308f6546 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 Jul 2012 16:19:30 -0700 Subject: tcp: Abstract back handling peer aliveness test into helper function. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_metrics.c | 10 ++++++++++ net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 98ca797001a..5478356ea8c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -389,6 +389,7 @@ extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); extern void tcp_init_metrics(struct sock *sk); +extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); extern void tcp_init_sock(struct sock *sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 64568fa21d0..e9312a8f33a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1405,7 +1405,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && - (!dst || !dst_metric(dst, RTAX_RTT))) { + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 2793ecf928d..9afe703c85c 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1,7 +1,9 @@ +#include #include #include #include +#include #include #include #include @@ -190,3 +192,11 @@ reset: tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; } + +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) +{ + if (!dst) + return false; + return dst_metric(dst, RTAX_RTT) ? true : false; +} +EXPORT_SYMBOL_GPL(tcp_peer_is_proven); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6cc67ed6c2e..75d179555c2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1177,7 +1177,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && - (!dst || !dst_metric(dst, RTAX_RTT))) { + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. -- cgit v1.2.3-70-g09d2 From 51c5d0c4b169bf762f09e0d5b283a7f0b2a45739 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 00:49:14 -0700 Subject: tcp: Maintain dynamic metrics in local cache. Maintain a local hash table of TCP dynamic metrics blobs. Computed TCP metrics are no longer maintained in the route metrics. The table uses RCU and an extremely simple hash so that it has low latency and low overhead. A simple hash is legitimate because we only make metrics blobs for fully established connections. Some tweaking of the default hash table sizes, metric timeouts, and the hash chain length limit certainly could use some tweaking. But the basic design seems sound. With help from Eric Dumazet and Joe Perches. Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 + include/net/tcp.h | 1 + net/ipv4/tcp.c | 2 + net/ipv4/tcp_metrics.c | 555 +++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 468 insertions(+), 93 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 599e48fa97c..2e089a99d60 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -7,6 +7,7 @@ #include +struct tcpm_hash_bucket; struct ctl_table_header; struct ipv4_devconf; struct fib_rules_ops; @@ -39,6 +40,8 @@ struct netns_ipv4 { struct sock **icmp_sk; struct sock *tcp_sock; struct inet_peer_base *peers; + struct tcpm_hash_bucket *tcp_metrics_hash; + unsigned int tcp_metrics_hash_mask; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/tcp.h b/include/net/tcp.h index 5478356ea8c..0900d63d162 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -389,6 +389,7 @@ extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); extern void tcp_init_metrics(struct sock *sk); +extern void tcp_metrics_init(void); extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3ba605f60e4..29aa0c800cd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3563,6 +3563,8 @@ void __init tcp_init(void) pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); + tcp_metrics_init(); + tcp_register_congestion_control(&tcp_reno); memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 9afe703c85c..56223bab251 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1,134 +1,431 @@ +#include +#include +#include +#include #include #include +#include +#include #include #include +#include #include +#include #include +#include #include #include int sysctl_tcp_nometrics_save __read_mostly; +enum tcp_metric_index { + TCP_METRIC_RTT, + TCP_METRIC_RTTVAR, + TCP_METRIC_SSTHRESH, + TCP_METRIC_CWND, + TCP_METRIC_REORDERING, + + /* Always last. */ + TCP_METRIC_MAX, +}; + +struct tcp_metrics_block { + struct tcp_metrics_block __rcu *tcpm_next; + struct inetpeer_addr tcpm_addr; + unsigned long tcpm_stamp; + u32 tcpm_lock; + u32 tcpm_vals[TCP_METRIC_MAX]; +}; + +static bool tcp_metric_locked(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) +{ + return tm->tcpm_lock & (1 << idx); +} + +static u32 tcp_metric_get(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) +{ + return tm->tcpm_vals[idx]; +} + +static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) +{ + return msecs_to_jiffies(tm->tcpm_vals[idx]); +} + +static void tcp_metric_set(struct tcp_metrics_block *tm, + enum tcp_metric_index idx, + u32 val) +{ + tm->tcpm_vals[idx] = val; +} + +static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, + enum tcp_metric_index idx, + u32 val) +{ + tm->tcpm_vals[idx] = jiffies_to_msecs(val); +} + +static bool addr_same(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) +{ + const struct in6_addr *a6, *b6; + + if (a->family != b->family) + return false; + if (a->family == AF_INET) + return a->addr.a4 == b->addr.a4; + + a6 = (const struct in6_addr *) &a->addr.a6[0]; + b6 = (const struct in6_addr *) &b->addr.a6[0]; + + return ipv6_addr_equal(a6, b6); +} + +struct tcpm_hash_bucket { + struct tcp_metrics_block __rcu *chain; +}; + +static DEFINE_SPINLOCK(tcp_metrics_lock); + +static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) +{ + u32 val; + + val = 0; + if (dst_metric_locked(dst, RTAX_RTT)) + val |= 1 << TCP_METRIC_RTT; + if (dst_metric_locked(dst, RTAX_RTTVAR)) + val |= 1 << TCP_METRIC_RTTVAR; + if (dst_metric_locked(dst, RTAX_SSTHRESH)) + val |= 1 << TCP_METRIC_SSTHRESH; + if (dst_metric_locked(dst, RTAX_CWND)) + val |= 1 << TCP_METRIC_CWND; + if (dst_metric_locked(dst, RTAX_REORDERING)) + val |= 1 << TCP_METRIC_REORDERING; + tm->tcpm_lock = val; + + tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); + tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); + tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); + tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); + tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); +} + +static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + struct inetpeer_addr *addr, + unsigned int hash, + bool reclaim) +{ + struct tcp_metrics_block *tm; + struct net *net; + + spin_lock_bh(&tcp_metrics_lock); + net = dev_net(dst->dev); + if (unlikely(reclaim)) { + struct tcp_metrics_block *oldest; + + oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); + for (tm = rcu_dereference(oldest->tcpm_next); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) + oldest = tm; + } + tm = oldest; + } else { + tm = kmalloc(sizeof(*tm), GFP_ATOMIC); + if (!tm) + goto out_unlock; + } + tm->tcpm_addr = *addr; + tm->tcpm_stamp = jiffies; + + tcpm_suck_dst(tm, dst); + + if (likely(!reclaim)) { + tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; + rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm); + } + +out_unlock: + spin_unlock_bh(&tcp_metrics_lock); + return tm; +} + +#define TCP_METRICS_TIMEOUT (60 * 60 * HZ) + +static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) +{ + if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) + tcpm_suck_dst(tm, dst); +} + +#define TCP_METRICS_RECLAIM_DEPTH 5 +#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL + +static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) +{ + if (tm) + return tm; + if (depth > TCP_METRICS_RECLAIM_DEPTH) + return TCP_METRICS_RECLAIM_PTR; + return NULL; +} + +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, + struct net *net, unsigned int hash) +{ + struct tcp_metrics_block *tm; + int depth = 0; + + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, addr)) + break; + depth++; + } + return tcp_get_encode(tm, depth); +} + +static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, + struct dst_entry *dst) +{ + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net; + + addr.family = req->rsk_ops->family; + switch (addr.family) { + case AF_INET: + addr.addr.a4 = inet_rsk(req)->rmt_addr; + hash = (__force unsigned int) addr.addr.a4; + break; + case AF_INET6: + *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; + hash = ((__force unsigned int) addr.addr.a6[0] ^ + (__force unsigned int) addr.addr.a6[1] ^ + (__force unsigned int) addr.addr.a6[2] ^ + (__force unsigned int) addr.addr.a6[3]); + break; + default: + return NULL; + } + + hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); + + net = dev_net(dst->dev); + hash &= net->ipv4.tcp_metrics_hash_mask; + + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, &addr)) + break; + } + tcpm_check_stamp(tm, dst); + return tm; +} + +static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, + struct dst_entry *dst, + bool create) +{ + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net; + bool reclaim; + + addr.family = sk->sk_family; + switch (addr.family) { + case AF_INET: + addr.addr.a4 = inet_sk(sk)->inet_daddr; + hash = (__force unsigned int) addr.addr.a4; + break; + case AF_INET6: + *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; + hash = ((__force unsigned int) addr.addr.a6[0] ^ + (__force unsigned int) addr.addr.a6[1] ^ + (__force unsigned int) addr.addr.a6[2] ^ + (__force unsigned int) addr.addr.a6[3]); + break; + default: + return NULL; + } + + hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); + + net = dev_net(dst->dev); + hash &= net->ipv4.tcp_metrics_hash_mask; + + tm = __tcp_get_metrics(&addr, net, hash); + reclaim = false; + if (tm == TCP_METRICS_RECLAIM_PTR) { + reclaim = true; + tm = NULL; + } + if (!tm && create) + tm = tcpm_new(dst, &addr, hash, reclaim); + else + tcpm_check_stamp(tm, dst); + + return tm; +} + /* Save metrics learned by this TCP session. This function is called * only, when TCP finishes successfully i.e. when it enters TIME-WAIT * or goes from LAST-ACK to CLOSE. */ void tcp_update_metrics(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_metrics_block *tm; + unsigned long rtt; + u32 val; + int m; - if (sysctl_tcp_nometrics_save) + if (sysctl_tcp_nometrics_save || !dst) return; - if (dst && (dst->flags & DST_HOST)) { - const struct inet_connection_sock *icsk = inet_csk(sk); - int m; - unsigned long rtt; - + if (dst->flags & DST_HOST) dst_confirm(dst); - if (icsk->icsk_backoff || !tp->srtt) { - /* This session failed to estimate rtt. Why? - * Probably, no packets returned in time. - * Reset our results. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) - dst_metric_set(dst, RTAX_RTT, 0); - return; - } + rcu_read_lock(); + if (icsk->icsk_backoff || !tp->srtt) { + /* This session failed to estimate rtt. Why? + * Probably, no packets returned in time. Reset our + * results. + */ + tm = tcp_get_metrics(sk, dst, false); + if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT)) + tcp_metric_set(tm, TCP_METRIC_RTT, 0); + goto out_unlock; + } else + tm = tcp_get_metrics(sk, dst, true); - rtt = dst_metric_rtt(dst, RTAX_RTT); - m = rtt - tp->srtt; + if (!tm) + goto out_unlock; - /* If newly calculated rtt larger than stored one, - * store new one. Otherwise, use EWMA. Remember, - * rtt overestimation is always better than underestimation. - */ - if (!(dst_metric_locked(dst, RTAX_RTT))) { - if (m <= 0) - set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt); - else - set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3)); - } + rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); + m = rtt - tp->srtt; - if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { - unsigned long var; - if (m < 0) - m = -m; + /* If newly calculated rtt larger than stored one, store new + * one. Otherwise, use EWMA. Remember, rtt overestimation is + * always better than underestimation. + */ + if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { + if (m <= 0) + rtt = tp->srtt; + else + rtt -= (m >> 3); + tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); + } - /* Scale deviation to rttvar fixed point */ - m >>= 1; - if (m < tp->mdev) - m = tp->mdev; + if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { + unsigned long var; - var = dst_metric_rtt(dst, RTAX_RTTVAR); - if (m >= var) - var = m; - else - var -= (var - m) >> 2; + if (m < 0) + m = -m; - set_dst_metric_rtt(dst, RTAX_RTTVAR, var); - } + /* Scale deviation to rttvar fixed point */ + m >>= 1; + if (m < tp->mdev) + m = tp->mdev; - if (tcp_in_initial_slowstart(tp)) { - /* Slow start still did not finish. */ - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); - if (!dst_metric_locked(dst, RTAX_CWND) && - tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); - } else if (tp->snd_cwnd > tp->snd_ssthresh && - icsk->icsk_ca_state == TCP_CA_Open) { - /* Cong. avoidance phase, cwnd is reliable. */ - if (!dst_metric_locked(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, - max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_cwnd) >> 1); - } else { - /* Else slow start did not finish, cwnd is non-sense, - ssthresh may be also invalid. - */ - if (!dst_metric_locked(dst, RTAX_CWND)) - dst_metric_set(dst, RTAX_CWND, - (dst_metric(dst, RTAX_CWND) + - tp->snd_ssthresh) >> 1); - if (dst_metric(dst, RTAX_SSTHRESH) && - !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) - dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); - } + var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); + if (m >= var) + var = m; + else + var -= (var - m) >> 2; - if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && + tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); + } + + if (tcp_in_initial_slowstart(tp)) { + /* Slow start still did not finish. */ + if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && (tp->snd_cwnd >> 1) > val) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + tp->snd_cwnd >> 1); + } + if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { + val = tcp_metric_get(tm, TCP_METRIC_CWND); + if (tp->snd_cwnd > val) + tcp_metric_set(tm, TCP_METRIC_CWND, + tp->snd_cwnd); + } + } else if (tp->snd_cwnd > tp->snd_ssthresh && + icsk->icsk_ca_state == TCP_CA_Open) { + /* Cong. avoidance phase, cwnd is reliable. */ + if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); + if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { + val = tcp_metric_get(tm, TCP_METRIC_CWND); + tcp_metric_set(tm, RTAX_CWND, (val + tp->snd_cwnd) >> 1); + } + } else { + /* Else slow start did not finish, cwnd is non-sense, + * ssthresh may be also invalid. + */ + if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) { + val = tcp_metric_get(tm, TCP_METRIC_CWND); + tcp_metric_set(tm, TCP_METRIC_CWND, + (val + tp->snd_ssthresh) >> 1); + } + if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && tp->snd_ssthresh > val) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + tp->snd_ssthresh); + } + if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) { + val = tcp_metric_get(tm, TCP_METRIC_REORDERING); + if (val < tp->reordering && tp->reordering != sysctl_tcp_reordering) - dst_metric_set(dst, RTAX_REORDERING, tp->reordering); + tcp_metric_set(tm, TCP_METRIC_REORDERING, + tp->reordering); } } + tm->tcpm_stamp = jiffies; +out_unlock: + rcu_read_unlock(); } /* Initialize metrics on socket. */ void tcp_init_metrics(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_metrics_block *tm; + u32 val; if (dst == NULL) goto reset; dst_confirm(dst); - if (dst_metric_locked(dst, RTAX_CWND)) - tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND); - if (dst_metric(dst, RTAX_SSTHRESH)) { - tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); + rcu_read_lock(); + tm = tcp_get_metrics(sk, dst, true); + if (!tm) { + rcu_read_unlock(); + goto reset; + } + + if (tcp_metric_locked(tm, TCP_METRIC_CWND)) + tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); + + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val) { + tp->snd_ssthresh = val; if (tp->snd_ssthresh > tp->snd_cwnd_clamp) tp->snd_ssthresh = tp->snd_cwnd_clamp; } else { @@ -137,16 +434,18 @@ void tcp_init_metrics(struct sock *sk) */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } - if (dst_metric(dst, RTAX_REORDERING) && - tp->reordering != dst_metric(dst, RTAX_REORDERING)) { + val = tcp_metric_get(tm, TCP_METRIC_REORDERING); + if (val && tp->reordering != val) { tcp_disable_fack(tp); tcp_disable_early_retrans(tp); - tp->reordering = dst_metric(dst, RTAX_REORDERING); + tp->reordering = val; } - if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) + val = tcp_metric_get(tm, TCP_METRIC_RTT); + if (val == 0 || tp->srtt == 0) { + rcu_read_unlock(); goto reset; - + } /* Initial rtt is determined from SYN,SYN-ACK. * The segment is small and rtt may appear much * less than real one. Use per-dst memory @@ -161,14 +460,18 @@ void tcp_init_metrics(struct sock *sk) * to low value, and then abruptly stops to do it and starts to delay * ACKs, wait for troubles. */ - if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) { - tp->srtt = dst_metric_rtt(dst, RTAX_RTT); + val = msecs_to_jiffies(val); + if (val > tp->srtt) { + tp->srtt = val; tp->rtt_seq = tp->snd_nxt; } - if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) { - tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR); + val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); + if (val > tp->mdev) { + tp->mdev = val; tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); } + rcu_read_unlock(); + tcp_set_rto(sk); reset: if (tp->srtt == 0) { @@ -195,8 +498,74 @@ reset: bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) { + struct tcp_metrics_block *tm; + bool ret; + if (!dst) return false; - return dst_metric(dst, RTAX_RTT) ? true : false; + + rcu_read_lock(); + tm = __tcp_get_metrics_req(req, dst); + if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) + ret = true; + else + ret = false; + rcu_read_unlock(); + + return ret; } EXPORT_SYMBOL_GPL(tcp_peer_is_proven); + +static unsigned long tcpmhash_entries; +static int __init set_tcpmhash_entries(char *str) +{ + ssize_t ret; + + if (!str) + return 0; + + ret = kstrtoul(str, 0, &tcpmhash_entries); + if (ret) + return 0; + + return 1; +} +__setup("tcpmhash_entries=", set_tcpmhash_entries); + +static int __net_init tcp_net_metrics_init(struct net *net) +{ + int slots, size; + + slots = tcpmhash_entries; + if (!slots) { + if (totalram_pages >= 128 * 1024) + slots = 16 * 1024; + else + slots = 8 * 1024; + } + + size = slots * sizeof(struct tcpm_hash_bucket); + + net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); + if (!net->ipv4.tcp_metrics_hash) + return -ENOMEM; + + net->ipv4.tcp_metrics_hash_mask = (slots - 1); + + return 0; +} + +static void __net_exit tcp_net_metrics_exit(struct net *net) +{ + kfree(net->ipv4.tcp_metrics_hash); +} + +static __net_initdata struct pernet_operations tcp_net_metrics_ops = { + .init = tcp_net_metrics_init, + .exit = tcp_net_metrics_exit, +}; + +void __init tcp_metrics_init(void) +{ + register_pernet_subsys(&tcp_net_metrics_ops); +} -- cgit v1.2.3-70-g09d2 From 94334d5ed4b64ebcd2c4b421e133b921f8ccf75d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 00:53:48 -0700 Subject: net: Kill set_dst_metric_rtt(). No longer used. Signed-off-by: David S. Miller --- include/net/dst.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index b2634e44661..51610468c63 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -209,12 +209,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr return msecs_to_jiffies(dst_metric(dst, metric)); } -static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, - unsigned long rtt) -{ - dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); -} - static inline u32 dst_allfrag(const struct dst_entry *dst) { -- cgit v1.2.3-70-g09d2 From 81166dd6fa8eb780b2132d32fbc77eb6ac04e44e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 03:14:24 -0700 Subject: tcp: Move timestamps from inetpeer to metrics cache. With help from Lin Ming. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 4 +- include/net/tcp.h | 5 +- net/ipv4/inetpeer.c | 1 - net/ipv4/route.c | 8 +-- net/ipv4/tcp_ipv4.c | 30 ++--------- net/ipv4/tcp_metrics.c | 136 +++++++++++++++++++++++++++++++++++++++++++++-- net/ipv4/tcp_minisocks.c | 46 ---------------- net/ipv6/route.c | 13 +---- net/ipv6/tcp_ipv6.c | 33 ++---------- 9 files changed, 149 insertions(+), 127 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index c27c8f10ebd..1119f6f6cdb 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -46,15 +46,13 @@ struct inet_peer { }; /* * Once inet_peer is queued for deletion (refcnt == -1), following fields - * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp + * are not available: rid, ip_id_count * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ atomic_t ip_id_count; /* IP ID for the next packet */ - __u32 tcp_ts; - __u32 tcp_ts_stamp; }; struct rcu_head rcu; struct inet_peer *gc_next; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0900d63d162..3618fefae04 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -390,7 +390,10 @@ extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_update_metrics(struct sock *sk); extern void tcp_init_metrics(struct sock *sk); extern void tcp_metrics_init(void); -extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); +extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); +extern bool tcp_remember_stamp(struct sock *sk); +extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); +extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); extern void tcp_init_sock(struct sock *sk); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da90a8cab61..f457bcb4135 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -508,7 +508,6 @@ relookup: (daddr->family == AF_INET) ? secure_ip_id(daddr->addr.a4) : secure_ipv6_id(daddr->addr.a6)); - p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d02c91177d3..78d81543766 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2846,7 +2846,7 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - u32 id = 0, ts = 0, tsage = 0, error; + u32 id = 0, error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) @@ -2903,10 +2903,6 @@ static int rt_fill_info(struct net *net, const struct inet_peer *peer = rt_peer_ptr(rt); inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; - if (peer->tcp_ts_stamp) { - ts = peer->tcp_ts; - tsage = get_seconds() - peer->tcp_ts_stamp; - } expires = ACCESS_ONCE(peer->pmtu_expires); if (expires) { if (time_before(jiffies, expires)) @@ -2942,7 +2938,7 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; } - if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, + if (rtnl_put_cacheinfo(skb, &rt->dst, id, 0, 0, expires, error) < 0) goto nla_put_failure; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9312a8f33a..d406bf7f37d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -209,22 +209,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (tcp_death_row.sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) { - struct inet_peer *peer = rt_get_peer(rt, fl4->daddr); - /* - * VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state - * TIME-WAIT * and initialize rx_opt.ts_recent from it, - * when trying new connection. - */ - if (peer) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; - } - } - } + !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) + tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; @@ -1375,7 +1361,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = cookie_v4_init_sequence(sk, skb, &req->mss); req->cookie_ts = tmp_opt.tstamp_ok; } else if (!isn) { - struct inet_peer *peer = NULL; struct flowi4 fl4; /* VJ's idea. We save last timestamp seen @@ -1390,12 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && - fl4.daddr == saddr && - (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > - TCP_PAWS_WINDOW) { + fl4.daddr == saddr) { + if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -1404,8 +1385,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) else if (!sysctl_tcp_syncookies && (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && - (!peer || !peer->tcp_ts_stamp) && - !tcp_peer_is_proven(req, dst)) { + !tcp_peer_is_proven(req, dst, false)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 56223bab251..1fd83d3118f 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -34,6 +34,8 @@ struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; struct inetpeer_addr tcpm_addr; unsigned long tcpm_stamp; + u32 tcpm_ts; + u32 tcpm_ts_stamp; u32 tcpm_lock; u32 tcpm_vals[TCP_METRIC_MAX]; }; @@ -114,6 +116,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); + tm->tcpm_ts = 0; + tm->tcpm_ts_stamp = 0; } static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, @@ -230,6 +234,45 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return tm; } +static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) +{ + struct inet6_timewait_sock *tw6; + struct tcp_metrics_block *tm; + struct inetpeer_addr addr; + unsigned int hash; + struct net *net; + + addr.family = tw->tw_family; + switch (addr.family) { + case AF_INET: + addr.addr.a4 = tw->tw_daddr; + hash = (__force unsigned int) addr.addr.a4; + break; + case AF_INET6: + tw6 = inet6_twsk((struct sock *)tw); + *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; + hash = ((__force unsigned int) addr.addr.a6[0] ^ + (__force unsigned int) addr.addr.a6[1] ^ + (__force unsigned int) addr.addr.a6[2] ^ + (__force unsigned int) addr.addr.a6[3]); + break; + default: + return NULL; + } + + hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); + + net = twsk_net(tw); + hash &= net->ipv4.tcp_metrics_hash_mask; + + for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; + tm = rcu_dereference(tm->tcpm_next)) { + if (addr_same(&tm->tcpm_addr, &addr)) + break; + } + return tm; +} + static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, struct dst_entry *dst, bool create) @@ -496,7 +539,7 @@ reset: tp->snd_cwnd_stamp = tcp_time_stamp; } -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check) { struct tcp_metrics_block *tm; bool ret; @@ -506,16 +549,99 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) rcu_read_lock(); tm = __tcp_get_metrics_req(req, dst); - if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) - ret = true; - else - ret = false; + if (paws_check) { + if (tm && + (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && + (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW) + ret = false; + else + ret = true; + } else { + if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp) + ret = true; + else + ret = false; + } rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(tcp_peer_is_proven); +void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) +{ + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, dst, true); + if (tm) { + struct tcp_sock *tp = tcp_sk(sk); + + if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) { + tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp; + tp->rx_opt.ts_recent = tm->tcpm_ts; + } + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp); + +/* VJ's idea. Save last timestamp seen from this destination and hold + * it at least for normal timewait interval to use for duplicate + * segment detection in subsequent connections, before they enter + * synchronized state. + */ +bool tcp_remember_stamp(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + bool ret = false; + + if (dst) { + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, dst, true); + if (tm) { + struct tcp_sock *tp = tcp_sk(sk); + + if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 || + ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && + tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { + tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; + tm->tcpm_ts = tp->rx_opt.ts_recent; + } + ret = true; + } + rcu_read_unlock(); + } + return ret; +} + +bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) +{ + struct tcp_metrics_block *tm; + bool ret = false; + + rcu_read_lock(); + tm = __tcp_get_metrics_tw(tw); + if (tw) { + const struct tcp_timewait_sock *tcptw; + struct sock *sk = (struct sock *) tw; + + tcptw = tcp_twsk(sk); + if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 || + ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && + tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { + tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; + tm->tcpm_ts = tcptw->tw_ts_recent; + } + ret = true; + } + rcu_read_unlock(); + + return ret; +} + static unsigned long tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 72b7c63b1a3..a51aa534dab 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -49,52 +49,6 @@ struct inet_timewait_death_row tcp_death_row = { }; EXPORT_SYMBOL_GPL(tcp_death_row); -/* VJ's idea. Save last timestamp seen from this destination - * and hold it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter synchronized - * state. - */ - -static bool tcp_remember_stamp(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct inet_peer *peer; - - peer = icsk->icsk_af_ops->get_peer(sk); - if (peer) { - if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - peer->tcp_ts = tp->rx_opt.ts_recent; - } - return true; - } - - return false; -} - -static bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) -{ - const struct tcp_timewait_sock *tcptw; - struct sock *sk = (struct sock *) tw; - struct inet_peer *peer; - - tcptw = tcp_twsk(sk); - peer = tcptw->tw_peer; - if (peer) { - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - peer->tcp_ts = tcptw->tw_ts_recent; - } - return true; - } - return false; -} - static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6cc6c881f54..0c068475378 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2348,13 +2348,11 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 pid, u32 seq, int prefix, int nowait, unsigned int flags) { - const struct inet_peer *peer; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; u32 table; struct neighbour *n; - u32 ts, tsage; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2473,16 +2471,7 @@ static int rt6_fill_node(struct net *net, else expires = INT_MAX; - peer = NULL; - if (rt6_has_peer(rt)) - peer = rt6_peer_ptr(rt); - ts = tsage = 0; - if (peer && peer->tcp_ts_stamp) { - ts = peer->tcp_ts; - tsage = get_seconds() - peer->tcp_ts_stamp; - } - - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage, + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, rt->dst.error) < 0) goto nla_put_failure; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 75d179555c2..9e96b5f21d2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -277,22 +277,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) { - struct inet_peer *peer = rt6_get_peer(rt); - /* - * VJ's idea. We save last timestamp seen from - * the destination in peer table, when entering state - * TIME-WAIT * and initialize rx_opt.ts_recent from it, - * when trying new connection. - */ - if (peer) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; - tp->rx_opt.ts_recent = peer->tcp_ts; - } - } - } + ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) + tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -1134,8 +1120,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq->iif = inet6_iif(skb); if (!isn) { - struct inet_peer *peer = NULL; - if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { @@ -1160,14 +1144,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL && - (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && - ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, - &treq->rmt_addr)) { - inet_peer_refcheck(peer); - if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && - (s32)(peer->tcp_ts - req->ts_recent) > - TCP_PAWS_WINDOW) { + (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) { + if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -1176,8 +1154,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) else if (!sysctl_tcp_syncookies && (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && - (!peer || !peer->tcp_ts_stamp) && - !tcp_peer_is_proven(req, dst)) { + !tcp_peer_is_proven(req, dst, false)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. -- cgit v1.2.3-70-g09d2 From 16d1839907e695387654901995f9286b65fbbc6a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 03:32:59 -0700 Subject: inet: Remove ->get_peer() method. No longer used. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 1 - net/ipv4/tcp_ipv4.c | 16 ---------------- net/ipv6/tcp_ipv6.c | 16 ---------------- 3 files changed, 33 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index af3c743a40e..291e7cee14e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -43,7 +43,6 @@ struct inet_connection_sock_af_ops { struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); - struct inet_peer *(*get_peer)(struct sock *sk); u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d406bf7f37d..ddefd39ac0c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1847,21 +1847,6 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v4_get_peer(struct sock *sk) -{ - struct rtable *rt = (struct rtable *) __sk_dst_get(sk); - struct inet_sock *inet = inet_sk(sk); - - /* If we don't have a valid cached route, or we're doing IP - * options which make the IPv4 header destination address - * different from our peer's, do not bother with this. - */ - if (!rt || inet->cork.fl.u.ip4.daddr != inet->inet_daddr) - return NULL; - return rt_get_peer_create(rt, inet->inet_daddr); -} -EXPORT_SYMBOL(tcp_v4_get_peer); - static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1874,7 +1859,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, - .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9e96b5f21d2..61175cb2478 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1689,20 +1689,6 @@ do_time_wait: goto discard_it; } -static struct inet_peer *tcp_v6_get_peer(struct sock *sk) -{ - struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - - /* If we don't have a valid cached route, or we're doing IP - * options which make the IPv6 header destination address - * different from our peer's, do not bother with this. - */ - if (!rt || !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) - return NULL; - return rt6_get_peer_create(rt); -} - static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1715,7 +1701,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .get_peer = tcp_v6_get_peer, .net_header_len = sizeof(struct ipv6hdr), .net_frag_header_len = sizeof(struct frag_hdr), .setsockopt = ipv6_setsockopt, @@ -1747,7 +1732,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, -- cgit v1.2.3-70-g09d2 From 3e12939a2a67fbb4cbd962c3b9bc398c73319766 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 04:01:57 -0700 Subject: inet: Kill FLOWI_FLAG_PRECOW_METRICS. No longer needed. TCP writes metrics, but now in it's own special cache that does not dirty the route metrics. Therefore there is no longer any reason to pre-cow metrics in this way. Signed-off-by: David S. Miller --- include/net/flow.h | 5 ++--- include/net/inet_sock.h | 2 -- include/net/route.h | 2 -- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/route.c | 11 ++--------- net/ipv6/route.c | 2 +- 6 files changed, 6 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index bd524f59856..ce9cb7656b4 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -20,9 +20,8 @@ struct flowi_common { __u8 flowic_proto; __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_PRECOW_METRICS 0x02 -#define FLOWI_FLAG_CAN_SLEEP 0x04 -#define FLOWI_FLAG_RT_NOCACHE 0x08 +#define FLOWI_FLAG_CAN_SLEEP 0x02 +#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ae17e1352d7..924d7b98ab6 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -245,8 +245,6 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) flags |= FLOWI_FLAG_ANYSRC; - if (sk->sk_protocol == IPPROTO_TCP) - flags |= FLOWI_FLAG_PRECOW_METRICS; return flags; } diff --git a/include/net/route.h b/include/net/route.h index 211e2665139..635d7a99d19 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -278,8 +278,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - if (protocol == IPPROTO_TCP) - flow_flags |= FLOWI_FLAG_PRECOW_METRICS; if (can_sleep) flow_flags |= FLOWI_FLAG_CAN_SLEEP; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 034ddbe42ad..76825be3b64 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -375,7 +375,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - int flags = inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS; + int flags = inet_sk_flowi_flags(sk); if (nocache) flags |= FLOWI_FLAG_RT_NOCACHE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e376354dcb6..d4834e2914a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1658,7 +1658,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct rtable *rt; flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, + protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { @@ -1836,18 +1836,11 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, { struct inet_peer_base *base; struct inet_peer *peer; - int create = 0; - - /* If a peer entry exists for this destination, we must hook - * it up in order to get at cached metrics. - */ - if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) - create = 1; base = inetpeer_base_ptr(rt->_peer); BUG_ON(!base); - peer = inet_getpeer_v4(base, rt->rt_dst, create); + peer = inet_getpeer_v4(base, rt->rt_dst, 0); if (peer) { __rt_set_peer(rt, peer); rt->rt_peer_genid = rt_peer_genid(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0c068475378..b7eb51e1a0e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1093,7 +1093,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; + fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; -- cgit v1.2.3-70-g09d2 From 5943634fc5592037db0693b261f7f4bea6bb9457 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 06:58:42 -0700 Subject: ipv4: Maintain redirect and PMTU info in struct rtable again. Maintaining this in the inetpeer entries was not the right way to do this at all. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 4 -- include/net/route.h | 2 +- net/ipv4/inetpeer.c | 3 - net/ipv4/route.c | 185 ++++++++++-------------------------------------- net/ipv4/xfrm4_policy.c | 1 + 5 files changed, 41 insertions(+), 154 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 1119f6f6cdb..53f464d7cdd 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -36,10 +36,6 @@ struct inet_peer { u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ unsigned long rate_last; - unsigned long pmtu_expires; - u32 pmtu_orig; - u32 pmtu_learned; - struct inetpeer_addr_base redirect_learned; union { struct list_head gc_list; struct rcu_head gc_rcu; diff --git a/include/net/route.h b/include/net/route.h index 635d7a99d19..c27449466d1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -65,7 +65,7 @@ struct rtable { __be32 rt_gateway; /* Miscellaneous cached information */ - u32 rt_peer_genid; + u32 rt_pmtu; unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ }; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f457bcb4135..e1e0a4e8fd3 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -511,9 +511,6 @@ relookup: p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; - p->pmtu_expires = 0; - p->pmtu_orig = 0; - memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 67b08745daf..677d65253e4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -669,7 +669,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); + rth->dst.expires; } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -1242,13 +1242,6 @@ skip_hashing: return rt; } -static atomic_t __rt_peer_genid = ATOMIC_INIT(0); - -static u32 rt_peer_genid(void) -{ - return atomic_read(&__rt_peer_genid); -} - void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) { struct inet_peer_base *base; @@ -1262,8 +1255,6 @@ void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) if (peer) { if (!rt_set_peer(rt, peer)) inet_putpeer(peer); - else - rt->rt_peer_genid = rt_peer_genid(); } } @@ -1323,30 +1314,6 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) -{ - struct rtable *rt = (struct rtable *) dst; - __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; - - dst_confirm(&rt->dst); - - rt->rt_gateway = peer->redirect_learned.a4; - - n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); - if (!n) { - rt->rt_gateway = orig_gw; - return; - } - if (!(n->nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - } else { - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); - } - neigh_release(n); -} - /* called in rcu_read_lock() section */ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) @@ -1355,7 +1322,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, struct in_device *in_dev = __in_dev_get_rcu(dev); __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; - struct inet_peer *peer; struct net *net; if (!in_dev) @@ -1388,6 +1354,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rthp = &rt_hash_table[hash].chain; while ((rt = rcu_dereference(*rthp)) != NULL) { + struct neighbour *n; + rthp = &rt->dst.rt_next; if (rt->rt_key_dst != daddr || @@ -1401,13 +1369,16 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->rt_gateway != old_gw) continue; - peer = rt_get_peer_create(rt, rt->rt_dst); - if (peer) { - if (peer->redirect_learned.a4 != new_gw) { - peer->redirect_learned.a4 = new_gw; - atomic_inc(&__rt_peer_genid); + n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); + if (n) { + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + } else { + rt->rt_gateway = new_gw; + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } - check_peer_redir(&rt->dst, peer); + neigh_release(n); } } } @@ -1425,23 +1396,6 @@ reject_redirect: ; } -static bool peer_pmtu_expired(struct inet_peer *peer) -{ - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); - - return orig && - time_after_eq(jiffies, orig) && - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; -} - -static bool peer_pmtu_cleaned(struct inet_peer *peer) -{ - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); - - return orig && - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; -} - static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1451,16 +1405,13 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; - } else if (rt->rt_flags & RTCF_REDIRECTED) { + } else if ((rt->rt_flags & RTCF_REDIRECTED) || + rt->dst.expires) { unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, rt->rt_oif, rt_genid(dev_net(dst->dev))); rt_del(hash, rt); ret = NULL; - } else if (rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - if (peer_pmtu_expired(peer)) - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); } } return ret; @@ -1604,50 +1555,17 @@ out: kfree_skb(skb); return 0; } -static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) -{ - unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); - - if (!expires) - return; - if (time_before(jiffies, expires)) { - u32 orig_dst_mtu = dst_mtu(dst); - if (peer->pmtu_learned < orig_dst_mtu) { - if (!peer->pmtu_orig) - peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); - dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); - } - } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); -} - static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rtable *rt = (struct rtable *) dst; - struct inet_peer *peer; dst_confirm(dst); - peer = rt_get_peer_create(rt, rt->rt_dst); - if (peer) { - unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); - - if (mtu < ip_rt_min_pmtu) - mtu = ip_rt_min_pmtu; - if (!pmtu_expires || mtu < peer->pmtu_learned) { - - pmtu_expires = jiffies + ip_rt_mtu_expires; - if (!pmtu_expires) - pmtu_expires = 1UL; - - peer->pmtu_learned = mtu; - peer->pmtu_expires = pmtu_expires; + if (mtu < ip_rt_min_pmtu) + mtu = ip_rt_min_pmtu; - atomic_inc(&__rt_peer_genid); - rt->rt_peer_genid = rt_peer_genid(); - } - check_peer_pmtu(dst, peer); - } + rt->rt_pmtu = mtu; + dst_set_expires(&rt->dst, ip_rt_mtu_expires); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -1679,30 +1597,12 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); -static void ipv4_validate_peer(struct rtable *rt) -{ - if (rt->rt_peer_genid != rt_peer_genid()) { - struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); - - if (peer) { - check_peer_pmtu(&rt->dst, peer); - - if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) - check_peer_redir(&rt->dst, peer); - } - - rt->rt_peer_genid = rt_peer_genid(); - } -} - static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; if (rt_is_expired(rt)) return NULL; - ipv4_validate_peer(rt); return dst; } @@ -1728,11 +1628,8 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt && rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - if (peer_pmtu_cleaned(peer)) - dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); - } + if (rt) + dst_set_expires(&rt->dst, 0); } static int ip_rt_bug(struct sk_buff *skb) @@ -1812,7 +1709,13 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) static unsigned int ipv4_mtu(const struct dst_entry *dst) { const struct rtable *rt = (const struct rtable *) dst; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + unsigned int mtu = rt->rt_pmtu; + + if (mtu && time_after_eq(jiffies, rt->dst.expires)) + mtu = 0; + + if (!mtu) + mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu && rt_is_output_route(rt)) return mtu; @@ -1843,19 +1746,10 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, peer = inet_getpeer_v4(base, rt->rt_dst, 0); if (peer) { __rt_set_peer(rt, peer); - rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); - - check_peer_pmtu(&rt->dst, peer); - - if (peer->redirect_learned.a4 && - peer->redirect_learned.a4 != rt->rt_gateway) { - rt->rt_gateway = peer->redirect_learned.a4; - rt->rt_flags |= RTCF_REDIRECTED; - } } else { if (fi->fib_metrics != (u32 *) dst_default_metrics) { rt->fi = fi; @@ -1955,8 +1849,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { @@ -2081,8 +1975,8 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; @@ -2260,8 +2154,8 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { @@ -2337,7 +2231,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - ipv4_validate_peer(rth); if (noref) { dst_use_noref(&rth->dst, jiffies); skb_dst_set_noref(skb, &rth->dst); @@ -2459,8 +2352,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; + rth->rt_pmtu = 0; rth->rt_gateway = fl4->daddr; - rth->rt_peer_genid = 0; rt_init_peer(rth, (res->table ? &res->table->tb_peers : dev_net(dev_out)->ipv4.peers)); @@ -2717,7 +2610,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { - ipv4_validate_peer(rth); dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2794,6 +2686,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; + rt->rt_pmtu = ort->rt_pmtu; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -2896,13 +2789,13 @@ static int rt_fill_info(struct net *net, const struct inet_peer *peer = rt_peer_ptr(rt); inet_peer_refcheck(peer); id = atomic_read(&peer->ip_id_count) & 0xffff; - expires = ACCESS_ONCE(peer->pmtu_expires); - if (expires) { - if (time_before(jiffies, expires)) - expires -= jiffies; - else - expires = 0; - } + } + expires = rt->dst.expires; + if (expires) { + if (time_before(jiffies, expires)) + expires -= jiffies; + else + expires = 0; } if (rt_is_input_route(rt)) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9815ea0bca7..951bcf35b21 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,6 +100,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_src = rt->rt_src; xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; + xdst->u.rt.rt_pmtu = rt->rt_pmtu; return 0; } -- cgit v1.2.3-70-g09d2 From f185071ddf799e194ba015d040d3d49cdbfa7e48 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 07:26:01 -0700 Subject: ipv4: Remove inetpeer from routes. No longer used. Signed-off-by: David S. Miller --- include/net/route.h | 57 ---------------------------------------------- net/ipv4/route.c | 60 +++++-------------------------------------------- net/ipv4/xfrm4_policy.c | 7 ------ 3 files changed, 6 insertions(+), 118 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index c27449466d1..52362368af0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -40,7 +40,6 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) struct fib_nh; -struct inet_peer; struct fib_info; struct rtable { struct dst_entry dst; @@ -66,44 +65,9 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; - unsigned long _peer; /* long-living peer info */ struct fib_info *fi; /* for client ref to shared metrics */ }; -static inline struct inet_peer *rt_peer_ptr(struct rtable *rt) -{ - return inetpeer_ptr(rt->_peer); -} - -static inline bool rt_has_peer(struct rtable *rt) -{ - return inetpeer_ptr_is_peer(rt->_peer); -} - -static inline void __rt_set_peer(struct rtable *rt, struct inet_peer *peer) -{ - __inetpeer_ptr_set_peer(&rt->_peer, peer); -} - -static inline bool rt_set_peer(struct rtable *rt, struct inet_peer *peer) -{ - return inetpeer_ptr_set_peer(&rt->_peer, peer); -} - -static inline void rt_init_peer(struct rtable *rt, struct inet_peer_base *base) -{ - inetpeer_init_ptr(&rt->_peer, base); -} - -static inline void rt_transfer_peer(struct rtable *rt, struct rtable *ort) -{ - rt->_peer = ort->_peer; - if (rt_has_peer(ort)) { - struct inet_peer *peer = rt_peer_ptr(ort); - atomic_inc(&peer->refcnt); - } -} - static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_route_iif != 0; @@ -326,27 +290,6 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable return rt; } -extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create); - -static inline struct inet_peer *__rt_get_peer(struct rtable *rt, __be32 daddr, int create) -{ - if (rt_has_peer(rt)) - return rt_peer_ptr(rt); - - rt_bind_peer(rt, daddr, create); - return (rt_has_peer(rt) ? rt_peer_ptr(rt) : NULL); -} - -static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr) -{ - return __rt_get_peer(rt, daddr, 0); -} - -static inline struct inet_peer *rt_get_peer_create(struct rtable *rt, __be32 daddr) -{ - return __rt_get_peer(rt, daddr, 1); -} - static inline int inet_iif(const struct sk_buff *skb) { return skb_rtable(skb)->rt_iif; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9cc00f8a6ee..95bfa1ba5b2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -889,7 +889,6 @@ static void rt_cache_invalidate(struct net *net) get_random_bytes(&shuffle, sizeof(shuffle)); atomic_add(shuffle + 1U, &net->ipv4.rt_genid); - inetpeer_invalidate_family(AF_INET); } /* @@ -1216,22 +1215,6 @@ skip_hashing: return rt; } -void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_peer); - if (!base) - return; - - peer = inet_getpeer_v4(base, daddr, create); - if (peer) { - if (!rt_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - /* * Peer allocation may fail only in serious out-of-memory conditions. However * we still can generate some output. @@ -1588,10 +1571,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst) fib_info_put(rt->fi); rt->fi = NULL; } - if (rt_has_peer(rt)) { - struct inet_peer *peer = rt_peer_ptr(rt); - inet_putpeer(peer); - } } @@ -1711,26 +1690,11 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, struct fib_info *fi) { - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_peer); - BUG_ON(!base); - - peer = inet_getpeer_v4(base, rt->rt_dst, 0); - if (peer) { - __rt_set_peer(rt, peer); - if (inet_metrics_new(peer)) - memcpy(peer->metrics, fi->fib_metrics, - sizeof(u32) * RTAX_MAX); - dst_init_metrics(&rt->dst, peer->metrics, false); - } else { - if (fi->fib_metrics != (u32 *) dst_default_metrics) { - rt->fi = fi; - atomic_inc(&fi->fib_clntref); - } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); + if (fi->fib_metrics != (u32 *) dst_default_metrics) { + rt->fi = fi; + atomic_inc(&fi->fib_clntref); } + dst_init_metrics(&rt->dst, fi->fib_metrics, true); } static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, @@ -1820,7 +1784,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rt_init_peer(rth, dev_net(dev)->ipv4.peers); rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -1946,7 +1909,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rt_init_peer(rth, &res->table->tb_peers); rth->fi = NULL; rth->dst.input = ip_forward; @@ -2125,7 +2087,6 @@ local_input: rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; - rt_init_peer(rth, net->ipv4.peers); rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -2323,9 +2284,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_mark = fl4->flowi4_mark; rth->rt_pmtu = 0; rth->rt_gateway = fl4->daddr; - rt_init_peer(rth, (res->table ? - &res->table->tb_peers : - dev_net(dev_out)->ipv4.peers)); rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2662,7 +2620,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_dst = ort->rt_dst; rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; - rt_transfer_peer(rt, ort); rt->fi = ort->fi; if (rt->fi) atomic_inc(&rt->fi->fib_clntref); @@ -2700,7 +2657,7 @@ static int rt_fill_info(struct net *net, struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; - u32 id = 0, error; + u32 error; nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); if (nlh == NULL) @@ -2753,11 +2710,6 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; error = rt->dst.error; - if (rt_has_peer(rt)) { - const struct inet_peer *peer = rt_peer_ptr(rt); - inet_peer_refcheck(peer); - id = atomic_read(&peer->ip_id_count) & 0xffff; - } expires = rt->dst.expires; if (expires) { if (time_before(jiffies, expires)) @@ -2792,7 +2744,7 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; } - if (rtnl_put_cacheinfo(skb, &rt->dst, id, expires, error) < 0) + if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) goto nla_put_failure; return nlmsg_end(skb, nlh); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 951bcf35b21..87d3fcc302d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -90,8 +90,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.dst.dev = dev; dev_hold(dev); - rt_transfer_peer(&xdst->u.rt, rt); - /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | @@ -210,11 +208,6 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) dst_destroy_metrics_generic(dst); - if (rt_has_peer(&xdst->u.rt)) { - struct inet_peer *peer = rt_peer_ptr(&xdst->u.rt); - inet_putpeer(peer); - } - xfrm_dst_destroy(xdst); } -- cgit v1.2.3-70-g09d2 From 1a203cb33a7dc791b6c0aedf701e70ac00c50cdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Jul 2012 19:05:57 +0000 Subject: ipv6: optimize ipv6 addresses compares On 64 bit arches having efficient unaligned accesses (eg x86_64) we can use long words to reduce number of instructions for free. Joe Perches suggested to change ipv6_masked_addr_cmp() to return a bool instead of 'int', to make sure ipv6_masked_addr_cmp() cannot be used in a sorting function. Signed-off-by: Eric Dumazet Cc: Joe Perches Signed-off-by: David S. Miller --- include/net/ipv6.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index aecf88436ab..d4261d4d6c4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -298,14 +298,23 @@ static inline int ipv6_addr_cmp(const struct in6_addr *a1, const struct in6_addr return memcmp(a1, a2, sizeof(struct in6_addr)); } -static inline int +static inline bool ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ulm = (const unsigned long *)m; + const unsigned long *ul2 = (const unsigned long *)a2; + + return !!(((ul1[0] ^ ul2[0]) & ulm[0]) | + ((ul1[1] ^ ul2[1]) & ulm[1])); +#else return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); +#endif } static inline void ipv6_addr_prefix(struct in6_addr *pfx, @@ -335,10 +344,17 @@ static inline void ipv6_addr_set(struct in6_addr *addr, static inline bool ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul1 = (const unsigned long *)a1; + const unsigned long *ul2 = (const unsigned long *)a2; + + return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; +#else return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; +#endif } static inline bool __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, @@ -391,8 +407,14 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a); static inline bool ipv6_addr_any(const struct in6_addr *a) { +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul = (const unsigned long *)a; + + return (ul[0] | ul[1]) == 0UL; +#else return (a->s6_addr32[0] | a->s6_addr32[1] | a->s6_addr32[2] | a->s6_addr32[3]) == 0; +#endif } static inline bool ipv6_addr_loopback(const struct in6_addr *a) -- cgit v1.2.3-70-g09d2 From 4b10b274e22ca6df1cda2fccf3870b8586feec15 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 11 Jul 2012 14:43:34 +0300 Subject: Bluetooth: debug: Print l2cap_chan refcount Improve debug output. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index d80e3f0691b..e5164ff55f2 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -672,11 +672,15 @@ enum { static inline void l2cap_chan_hold(struct l2cap_chan *c) { + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + atomic_inc(&c->refcnt); } static inline void l2cap_chan_put(struct l2cap_chan *c) { + BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt)); + if (atomic_dec_and_test(&c->refcnt)) kfree(c); } -- cgit v1.2.3-70-g09d2 From 46d3ceabd8d98ed0ad10f20c595ca784e34786c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jul 2012 05:50:31 +0000 Subject: tcp: TCP Small Queues This introduce TSQ (TCP Small Queues) TSQ goal is to reduce number of TCP packets in xmit queues (qdisc & device queues), to reduce RTT and cwnd bias, part of the bufferbloat problem. sk->sk_wmem_alloc not allowed to grow above a given limit, allowing no more than ~128KB [1] per tcp socket in qdisc/dev layers at a given time. TSO packets are sized/capped to half the limit, so that we have two TSO packets in flight, allowing better bandwidth use. As a side effect, setting the limit to 40000 automatically reduces the standard gso max limit (65536) to 40000/2 : It can help to reduce latencies of high prio packets, having smaller TSO packets. This means we divert sock_wfree() to a tcp_wfree() handler, to queue/send following frames when skb_orphan() [2] is called for the already queued skbs. Results on my dev machines (tg3/ixgbe nics) are really impressive, using standard pfifo_fast, and with or without TSO/GSO. Without reduction of nominal bandwidth, we have reduction of buffering per bulk sender : < 1ms on Gbit (instead of 50ms with TSO) < 8ms on 100Mbit (instead of 132 ms) I no longer have 4 MBytes backlogged in qdisc by a single netperf session, and both side socket autotuning no longer use 4 Mbytes. As skb destructor cannot restart xmit itself ( as qdisc lock might be taken at this point ), we delegate the work to a tasklet. We use one tasklest per cpu for performance reasons. If tasklet finds a socket owned by the user, it sets TSQ_OWNED flag. This flag is tested in a new protocol method called from release_sock(), to eventually send new segments. [1] New /proc/sys/net/ipv4/tcp_limit_output_bytes tunable [2] skb_orphan() is usually called at TX completion time, but some drivers call it in their start_xmit() handler. These drivers should at least use BQL, or else a single TCP session can still fill the whole NIC TX ring, since TSQ will have no effect. Signed-off-by: Eric Dumazet Cc: Dave Taht Cc: Tom Herbert Cc: Matt Mathis Cc: Yuchung Cheng Cc: Nandita Dukkipati Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 14 +++ include/linux/tcp.h | 9 ++ include/net/sock.h | 2 + include/net/tcp.h | 4 + net/core/sock.c | 4 + net/ipv4/sysctl_net_ipv4.c | 7 ++ net/ipv4/tcp.c | 6 ++ net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 1 + net/ipv4/tcp_output.c | 154 ++++++++++++++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 1 + 11 files changed, 202 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 47b6c79e9b0..e20c17a7d34 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -551,6 +551,20 @@ tcp_thin_dupack - BOOLEAN Documentation/networking/tcp-thin.txt Default: 0 +tcp_limit_output_bytes - INTEGER + Controls TCP Small Queue limit per tcp socket. + TCP bulk sender tends to increase packets in flight until it + gets losses notifications. With SNDBUF autotuning, this can + result in a large amount of packets queued in qdisc/device + on the local machine, hurting latency of other flows, for + typical pfifo_fast qdiscs. + tcp_limit_output_bytes limits the number of bytes on qdisc + or device to reduce artificial RTT/cwnd and reduce bufferbloat. + Note: For GSO/TSO enabled flows, we try to have at least two + packets in flight. Reducing tcp_limit_output_bytes might also + reduce the size of individual GSO packet (64KB being the max) + Default: 131072 + UDP variables: udp_mem - vector of 3 INTEGERs: min, pressure, max diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2de9cf46f9f..1888169e07c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -339,6 +339,9 @@ struct tcp_sock { u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ + struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ + unsigned long tsq_flags; + /* Data for direct copy to user */ struct { struct sk_buff_head prequeue; @@ -494,6 +497,12 @@ struct tcp_sock { struct tcp_cookie_values *cookie_values; }; +enum tsq_flags { + TSQ_THROTTLED, + TSQ_QUEUED, + TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */ +}; + static inline struct tcp_sock *tcp_sk(const struct sock *sk) { return (struct tcp_sock *)sk; diff --git a/include/net/sock.h b/include/net/sock.h index dcb54a0793e..88de092df50 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -858,6 +858,8 @@ struct proto { int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); + void (*release_cb)(struct sock *sk); + /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index 3618fefae04..439984b9af4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -253,6 +253,7 @@ extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; +extern int sysctl_tcp_limit_output_bytes; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -321,6 +322,8 @@ extern struct proto tcp_prot; extern void tcp_init_mem(struct net *net); +extern void tcp_tasklet_init(void); + extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); @@ -334,6 +337,7 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +extern void tcp_release_cb(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len); diff --git a/net/core/sock.c b/net/core/sock.c index 929bdcc2383..24039ac1242 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2159,6 +2159,10 @@ void release_sock(struct sock *sk) spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); + + if (sk->sk_prot->release_cb) + sk->sk_prot->release_cb(sk); + sk->sk_lock.owned = 0; if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 12aa0c5867c..70730f7aeaf 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -598,6 +598,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_limit_output_bytes", + .data = &sysctl_tcp_limit_output_bytes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_NET_DMA { .procname = "tcp_dma_copybreak", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d902da96d15..4252cd8f39f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -376,6 +376,7 @@ void tcp_init_sock(struct sock *sk) skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); + INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; @@ -796,6 +797,10 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, inet_csk(sk)->icsk_ext_hdr_len - tp->tcp_header_len); + /* TSQ : try to have two TSO segments in flight */ + xmit_size_goal = min_t(u32, xmit_size_goal, + sysctl_tcp_limit_output_bytes >> 1); + xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); /* We try hard to avoid divides here */ @@ -3574,4 +3579,5 @@ void __init tcp_init(void) tcp_secret_primary = &tcp_secret_one; tcp_secret_retiring = &tcp_secret_two; tcp_secret_secondary = &tcp_secret_two; + tcp_tasklet_init(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ddefd39ac0c..01545a3fc0f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2588,6 +2588,7 @@ struct proto tcp_prot = { .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, + .release_cb = tcp_release_cb, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 65608863fde..c66f2ede160 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -424,6 +424,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, treq->snt_isn + 1 + tcp_s_data_size(oldtp); tcp_prequeue_init(newtp); + INIT_LIST_HEAD(&newtp->tsq_node); tcp_init_wl(newtp, treq->rcv_isn); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c465d3e51e2..03854abfd9d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -50,6 +50,9 @@ int sysctl_tcp_retrans_collapse __read_mostly = 1; */ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; +/* Default TSQ limit of two TSO segments */ +int sysctl_tcp_limit_output_bytes __read_mostly = 131072; + /* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. @@ -65,6 +68,8 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); +static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, + int push_one, gfp_t gfp); /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) @@ -783,6 +788,140 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb return size; } + +/* TCP SMALL QUEUES (TSQ) + * + * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev) + * to reduce RTT and bufferbloat. + * We do this using a special skb destructor (tcp_wfree). + * + * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb + * needs to be reallocated in a driver. + * The invariant being skb->truesize substracted from sk->sk_wmem_alloc + * + * Since transmit from skb destructor is forbidden, we use a tasklet + * to process all sockets that eventually need to send more skbs. + * We use one tasklet per cpu, with its own queue of sockets. + */ +struct tsq_tasklet { + struct tasklet_struct tasklet; + struct list_head head; /* queue of tcp sockets */ +}; +static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); + +/* + * One tasklest per cpu tries to send more skbs. + * We run in tasklet context but need to disable irqs when + * transfering tsq->head because tcp_wfree() might + * interrupt us (non NAPI drivers) + */ +static void tcp_tasklet_func(unsigned long data) +{ + struct tsq_tasklet *tsq = (struct tsq_tasklet *)data; + LIST_HEAD(list); + unsigned long flags; + struct list_head *q, *n; + struct tcp_sock *tp; + struct sock *sk; + + local_irq_save(flags); + list_splice_init(&tsq->head, &list); + local_irq_restore(flags); + + list_for_each_safe(q, n, &list) { + tp = list_entry(q, struct tcp_sock, tsq_node); + list_del(&tp->tsq_node); + + sk = (struct sock *)tp; + bh_lock_sock(sk); + + if (!sock_owned_by_user(sk)) { + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | + TCPF_CLOSING | TCPF_CLOSE_WAIT)) + tcp_write_xmit(sk, + tcp_current_mss(sk), + 0, 0, + GFP_ATOMIC); + } else { + /* defer the work to tcp_release_cb() */ + set_bit(TSQ_OWNED, &tp->tsq_flags); + } + bh_unlock_sock(sk); + + clear_bit(TSQ_QUEUED, &tp->tsq_flags); + sk_free(sk); + } +} + +/** + * tcp_release_cb - tcp release_sock() callback + * @sk: socket + * + * called from release_sock() to perform protocol dependent + * actions before socket release. + */ +void tcp_release_cb(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) { + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | + TCPF_CLOSING | TCPF_CLOSE_WAIT)) + tcp_write_xmit(sk, + tcp_current_mss(sk), + 0, 0, + GFP_ATOMIC); + } +} +EXPORT_SYMBOL(tcp_release_cb); + +void __init tcp_tasklet_init(void) +{ + int i; + + for_each_possible_cpu(i) { + struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i); + + INIT_LIST_HEAD(&tsq->head); + tasklet_init(&tsq->tasklet, + tcp_tasklet_func, + (unsigned long)tsq); + } +} + +/* + * Write buffer destructor automatically called from kfree_skb. + * We cant xmit new skbs from this context, as we might already + * hold qdisc lock. + */ +void tcp_wfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct tcp_sock *tp = tcp_sk(sk); + + if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && + !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { + unsigned long flags; + struct tsq_tasklet *tsq; + + /* Keep a ref on socket. + * This last ref will be released in tcp_tasklet_func() + */ + atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); + + /* queue this socket to tasklet queue */ + local_irq_save(flags); + tsq = &__get_cpu_var(tsq_tasklet); + list_add(&tp->tsq_node, &tsq->head); + tasklet_schedule(&tsq->tasklet); + local_irq_restore(flags); + } else { + sock_wfree(skb); + } +} + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -844,7 +983,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); - skb_set_owner_w(skb, sk); + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? + tcp_wfree : sock_wfree; + atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ th = tcp_hdr(skb); @@ -1780,6 +1924,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -1800,6 +1945,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } + /* TSQ : sk_wmem_alloc accounts skb truesize, + * including skb overhead. But thats OK. + */ + if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { + set_bit(TSQ_THROTTLED, &tp->tsq_flags); + break; + } limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 61175cb2478..70458a9cd83 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1970,6 +1970,7 @@ struct proto tcpv6_prot = { .sendmsg = tcp_sendmsg, .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, + .release_cb = tcp_release_cb, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, -- cgit v1.2.3-70-g09d2 From 94206125c4aac32e43c25bfe1b827e7ab993b7dc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 20:38:08 -0700 Subject: ipv4: Rearrange arguments to ip_rt_redirect() Pass in the SKB rather than just the IP addresses, so that policy and other aspects can reside in ip_rt_redirect() rather then icmp_redirect(). Signed-off-by: David S. Miller --- include/net/route.h | 3 +-- net/ipv4/icmp.c | 36 ++++++------------------------------ net/ipv4/route.c | 23 +++++++++++++++++++---- 3 files changed, 26 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 52362368af0..b1402787aec 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -108,8 +108,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); -extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, - __be32 src, struct net_device *dev); +extern void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw); extern void rt_cache_flush(struct net *net, int how); extern void rt_cache_flush_batch(struct net *net); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 588514627aa..70a793559bb 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -755,40 +755,16 @@ out_err: static void icmp_redirect(struct sk_buff *skb) { - const struct iphdr *iph; - - if (skb->len < sizeof(struct iphdr)) - goto out_err; + if (skb->len < sizeof(struct iphdr)) { + ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); + return; + } - /* - * Get the copied header of the packet that caused the redirect - */ if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - - iph = (const struct iphdr *)skb->data; - - switch (icmp_hdr(skb)->code & 7) { - case ICMP_REDIR_NET: - case ICMP_REDIR_NETTOS: - /* - * As per RFC recommendations now handle it as a host redirect. - */ - case ICMP_REDIR_HOST: - case ICMP_REDIR_HOSTTOS: - ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr, - icmp_hdr(skb)->un.gateway, - iph->saddr, skb->dev); - break; - } + return; + ip_rt_redirect(skb, icmp_hdr(skb)->un.gateway); icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway); - -out: - return; -out_err: - ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); - goto out; } /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a4de87f44c3..f8921b448c3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1292,18 +1292,33 @@ static void ip_do_redirect(struct rtable *rt, __be32 old_gw, __be32 new_gw) } /* called in rcu_read_lock() section */ -void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, - __be32 saddr, struct net_device *dev) +void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) { - int s, i; + const struct iphdr *iph = (const struct iphdr *) skb->data; + __be32 old_gw = ip_hdr(skb)->saddr; + __be32 daddr = iph->daddr; + __be32 saddr = iph->saddr; + struct net_device *dev = skb->dev; struct in_device *in_dev = __in_dev_get_rcu(dev); - __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + __be32 skeys[2] = { saddr, 0 }; struct net *net; + int s, i; if (!in_dev) return; + switch (icmp_hdr(skb)->code & 7) { + case ICMP_REDIR_NET: + case ICMP_REDIR_NETTOS: + case ICMP_REDIR_HOST: + case ICMP_REDIR_HOSTTOS: + break; + + default: + return; + } + net = dev_net(dev); if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || -- cgit v1.2.3-70-g09d2 From e47a185b31dd2acd424fac7dc0efb96fc5b31a33 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 20:55:47 -0700 Subject: ipv4: Generalize ip_do_redirect() and hook into new dst_ops->redirect. All of the redirect acceptance policy is now contained within. Signed-off-by: David S. Miller --- include/net/dst_ops.h | 1 + net/ipv4/route.c | 94 +++++++++++++++++++++++++++++---------------------- 2 files changed, 55 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 4badc86e45d..085931fa7ce 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -25,6 +25,7 @@ struct dst_ops { struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); + void (*redirect)(struct dst_entry *dst, struct sk_buff *skb); int (*local_out)(struct sk_buff *skb); struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f8921b448c3..f3d25656ddb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -149,6 +149,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); +static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -179,6 +180,7 @@ static struct dst_ops ipv4_dst_ops = { .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, + .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, }; @@ -1271,42 +1273,18 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct rtable *rt, __be32 old_gw, __be32 new_gw) -{ - struct neighbour *n; - - if (rt->rt_gateway != old_gw) - return; - - n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); - if (n) { - if (!(n->nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - } else { - rt->rt_gateway = new_gw; - rt->rt_flags |= RTCF_REDIRECTED; - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); - } - neigh_release(n); - } -} - -/* called in rcu_read_lock() section */ -void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) +static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) { const struct iphdr *iph = (const struct iphdr *) skb->data; + __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; + struct net_device *dev = skb->dev; __be32 daddr = iph->daddr; __be32 saddr = iph->saddr; - struct net_device *dev = skb->dev; - struct in_device *in_dev = __in_dev_get_rcu(dev); - int ikeys[2] = { dev->ifindex, 0 }; - __be32 skeys[2] = { saddr, 0 }; + struct in_device *in_dev; + struct neighbour *n; + struct rtable *rt; struct net *net; - int s, i; - - if (!in_dev) - return; switch (icmp_hdr(skb)->code & 7) { case ICMP_REDIR_NET: @@ -1319,6 +1297,14 @@ void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) return; } + rt = (struct rtable *) dst; + if (rt->rt_gateway != old_gw) + return; + + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + return; + net = dev_net(dev); if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || @@ -1335,6 +1321,43 @@ void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) goto reject_redirect; } + n = ipv4_neigh_lookup(dst, NULL, &new_gw); + if (n) { + if (!(n->nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + } else { + rt->rt_gateway = new_gw; + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + } + neigh_release(n); + } + return; + +reject_redirect: +#ifdef CONFIG_IP_ROUTE_VERBOSE + if (IN_DEV_LOG_MARTIANS(in_dev)) + net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" + " Advised path = %pI4 -> %pI4\n", + &old_gw, dev->name, &new_gw, + &saddr, &daddr); +#endif + ; +} + +/* called in rcu_read_lock() section */ +void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + __be32 daddr = iph->daddr; + __be32 saddr = iph->saddr; + struct net_device *dev = skb->dev; + int ikeys[2] = { dev->ifindex, 0 }; + __be32 skeys[2] = { saddr, 0 }; + struct net *net; + int s, i; + + net = dev_net(dev); for (s = 0; s < 2; s++) { for (i = 0; i < 2; i++) { unsigned int hash; @@ -1358,21 +1381,12 @@ void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) rt->dst.dev != dev) continue; - ip_do_redirect(rt, old_gw, new_gw); + ip_do_redirect(&rt->dst, skb); } } } return; -reject_redirect: -#ifdef CONFIG_IP_ROUTE_VERBOSE - if (IN_DEV_LOG_MARTIANS(in_dev)) - net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" - " Advised path = %pI4 -> %pI4\n", - &old_gw, dev->name, &new_gw, - &saddr, &daddr); -#endif - ; } static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) -- cgit v1.2.3-70-g09d2 From b42597e2f36e2043756aa7462faddf630962f061 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 21:25:45 -0700 Subject: ipv4: Add ipv4_redirect() and ipv4_sk_redirect() helper functions. Signed-off-by: David S. Miller --- include/net/route.h | 3 +++ net/ipv4/route.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index b1402787aec..6ab93eeb866 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -180,6 +180,9 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); +extern void ipv4_redirect(struct sk_buff *skb, struct net *net, + int oif, u32 mark, u8 protocol, int flow_flags); +extern void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f3d25656ddb..aabece6b729 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1590,6 +1590,34 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); +void ipv4_redirect(struct sk_buff *skb, struct net *net, + int oif, u32 mark, u8 protocol, int flow_flags) +{ + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct flowi4 fl4; + struct rtable *rt; + + flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, + protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); + rt = __ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) { + ip_do_redirect(&rt->dst, skb); + ip_rt_put(rt); + } +} +EXPORT_SYMBOL_GPL(ipv4_redirect); + +void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + + return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, + sk->sk_mark, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk)); +} +EXPORT_SYMBOL_GPL(ipv4_sk_redirect); + static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; -- cgit v1.2.3-70-g09d2 From 1f42539d257af671d56d4bdbcf13aef31abff6ef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 21:30:08 -0700 Subject: ipv4: Kill ip_rt_redirect(). No longer needed, as the protocol handlers now all properly propagate the redirect back into the routing code. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/icmp.c | 1 - net/ipv4/route.c | 44 -------------------------------------------- 3 files changed, 46 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 6ab93eeb866..ace3cb44251 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -108,7 +108,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); -extern void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw); extern void rt_cache_flush(struct net *net, int how); extern void rt_cache_flush_batch(struct net *net); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 70a793559bb..d01aeb4d492 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -763,7 +763,6 @@ static void icmp_redirect(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct iphdr))) return; - ip_rt_redirect(skb, icmp_hdr(skb)->un.gateway); icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aabece6b729..e98207dcd08 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1345,50 +1345,6 @@ reject_redirect: ; } -/* called in rcu_read_lock() section */ -void ip_rt_redirect(struct sk_buff *skb, __be32 new_gw) -{ - const struct iphdr *iph = (const struct iphdr *) skb->data; - __be32 daddr = iph->daddr; - __be32 saddr = iph->saddr; - struct net_device *dev = skb->dev; - int ikeys[2] = { dev->ifindex, 0 }; - __be32 skeys[2] = { saddr, 0 }; - struct net *net; - int s, i; - - net = dev_net(dev); - for (s = 0; s < 2; s++) { - for (i = 0; i < 2; i++) { - unsigned int hash; - struct rtable __rcu **rthp; - struct rtable *rt; - - hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net)); - - rthp = &rt_hash_table[hash].chain; - - while ((rt = rcu_dereference(*rthp)) != NULL) { - rthp = &rt->dst.rt_next; - - if (rt->rt_key_dst != daddr || - rt->rt_key_src != skeys[s] || - rt->rt_oif != ikeys[i] || - rt_is_input_route(rt) || - rt_is_expired(rt) || - !net_eq(dev_net(rt->dst.dev), net) || - rt->dst.error || - rt->dst.dev != dev) - continue; - - ip_do_redirect(&rt->dst, skb); - } - } - } - return; - -} - static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; -- cgit v1.2.3-70-g09d2 From 30f2a5f379d0b4b4e733df138a49e054ebf75ff8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 23:26:46 -0700 Subject: ipv6: Export ndisc option parsing from ndisc.c This is going to be used internally by the rt6 redirect code. Signed-off-by: David S. Miller --- include/net/ndisc.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ndisc.c | 47 ++--------------------------------------------- 2 files changed, 52 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index c02b6ad3f6c..96a3b5c03e3 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -47,6 +47,8 @@ enum { #include #include #include +#include +#include #include @@ -80,6 +82,54 @@ struct nd_opt_hdr { __u8 nd_opt_len; } __packed; +/* ND options */ +struct ndisc_options { + struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX]; +#ifdef CONFIG_IPV6_ROUTE_INFO + struct nd_opt_hdr *nd_opts_ri; + struct nd_opt_hdr *nd_opts_ri_end; +#endif + struct nd_opt_hdr *nd_useropts; + struct nd_opt_hdr *nd_useropts_end; +}; + +#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] +#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] +#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] +#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] +#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] +#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] + +#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) + +extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, + struct ndisc_options *ndopts); + +/* + * Return the padding between the option length and the start of the + * link addr. Currently only IP-over-InfiniBand needs this, although + * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may + * also need a pad of 2. + */ +static int ndisc_addr_option_pad(unsigned short type) +{ + switch (type) { + case ARPHRD_INFINIBAND: return 2; + default: return 0; + } +} + +static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, + struct net_device *dev) +{ + u8 *lladdr = (u8 *)(p + 1); + int lladdrlen = p->nd_opt_len << 3; + int prepad = ndisc_addr_option_pad(dev->type); + if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) + return NULL; + return lladdr + prepad; +} + static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { const u32 *p32 = pkey; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0fddd571400..a3189baa9f4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -143,40 +143,8 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -/* ND options */ -struct ndisc_options { - struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX]; -#ifdef CONFIG_IPV6_ROUTE_INFO - struct nd_opt_hdr *nd_opts_ri; - struct nd_opt_hdr *nd_opts_ri_end; -#endif - struct nd_opt_hdr *nd_useropts; - struct nd_opt_hdr *nd_useropts_end; -}; - -#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR] -#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR] -#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO] -#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END] -#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR] -#define nd_opts_mtu nd_opt_array[ND_OPT_MTU] - #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) -/* - * Return the padding between the option length and the start of the - * link addr. Currently only IP-over-InfiniBand needs this, although - * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may - * also need a pad of 2. - */ -static int ndisc_addr_option_pad(unsigned short type) -{ - switch (type) { - case ARPHRD_INFINIBAND: return 2; - default: return 0; - } -} - static inline int ndisc_opt_addr_space(struct net_device *dev) { return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); @@ -233,8 +201,8 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; } -static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, - struct ndisc_options *ndopts) +struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, + struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; @@ -297,17 +265,6 @@ static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, return ndopts; } -static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, - struct net_device *dev) -{ - u8 *lladdr = (u8 *)(p + 1); - int lladdrlen = p->nd_opt_len << 3; - int prepad = ndisc_addr_option_pad(dev->type); - if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) - return NULL; - return lladdr + prepad; -} - int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir) { switch (dev->type) { -- cgit v1.2.3-70-g09d2 From e8599ff4b1d6b0d61e1074ae4ba9fca8dd0c41d0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jul 2012 23:43:53 -0700 Subject: ipv6: Move bulk of redirect handling into rt6_redirect(). This sets things up so that we can have the protocol error handlers call down into the ipv6 route code for redirects just as ipv4 already does. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 7 +---- net/ipv6/ndisc.c | 72 +---------------------------------------------- net/ipv6/route.c | 75 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 72 insertions(+), 82 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 58cb3fc3487..5cedbd7688c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -133,12 +133,7 @@ extern int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); -extern void rt6_redirect(const struct in6_addr *dest, - const struct in6_addr *src, - const struct in6_addr *saddr, - struct neighbour *neigh, - u8 *lladdr, - int on_link); +extern void rt6_redirect(struct sk_buff *skb); extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a3189baa9f4..b8d53e186a7 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -143,8 +143,6 @@ struct neigh_table nd_tbl = { .gc_thresh3 = 1024, }; -#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) - static inline int ndisc_opt_addr_space(struct net_device *dev) { return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type)); @@ -1336,16 +1334,6 @@ out: static void ndisc_redirect_rcv(struct sk_buff *skb) { - struct inet6_dev *in6_dev; - struct icmp6hdr *icmph; - const struct in6_addr *dest; - const struct in6_addr *target; /* new first hop to destination */ - struct neighbour *neigh; - int on_link = 0; - struct ndisc_options ndopts; - int optlen; - u8 *lladdr = NULL; - #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: @@ -1362,65 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - optlen = skb->tail - skb->transport_header; - optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); - - if (optlen < 0) { - ND_PRINTK(2, warn, "Redirect: packet too short\n"); - return; - } - - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; - - if (ipv6_addr_is_multicast(dest)) { - ND_PRINTK(2, warn, - "Redirect: destination address is multicast\n"); - return; - } - - if (ipv6_addr_equal(dest, target)) { - on_link = 1; - } else if (ipv6_addr_type(target) != - (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: target address is not link-local unicast\n"); - return; - } - - in6_dev = __in6_dev_get(skb->dev); - if (!in6_dev) - return; - if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) - return; - - /* RFC2461 8.1: - * The IP source address of the Redirect MUST be the same as the current - * first-hop router for the specified ICMP Destination Address. - */ - - if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { - ND_PRINTK(2, warn, "Redirect: invalid ND options\n"); - return; - } - if (ndopts.nd_opts_tgt_lladdr) { - lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, - skb->dev); - if (!lladdr) { - ND_PRINTK(2, warn, - "Redirect: invalid link-layer address length\n"); - return; - } - } - - neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); - if (neigh) { - rt6_redirect(dest, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, neigh, lladdr, - on_link); - neigh_release(neigh); - } + rt6_redirect(skb); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 563f12c1c99..73cf3f78aaa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1690,14 +1690,78 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, flags, __ip6_route_redirect); } -void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, - const struct in6_addr *saddr, - struct neighbour *neigh, u8 *lladdr, int on_link) +void rt6_redirect(struct sk_buff *skb) { - struct rt6_info *rt, *nrt = NULL; + struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; - struct net *net = dev_net(neigh->dev); + struct rt6_info *rt, *nrt = NULL; + const struct in6_addr *target; struct neighbour *old_neigh; + const struct in6_addr *dest; + const struct in6_addr *src; + const struct in6_addr *saddr; + struct ndisc_options ndopts; + struct inet6_dev *in6_dev; + struct neighbour *neigh; + struct icmp6hdr *icmph; + int on_link, optlen; + u8 *lladdr = NULL; + + optlen = skb->tail - skb->transport_header; + optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); + + if (optlen < 0) { + net_dbg_ratelimited("rt6_redirect: packet too short\n"); + return; + } + + icmph = icmp6_hdr(skb); + target = (const struct in6_addr *) (icmph + 1); + dest = target + 1; + + if (ipv6_addr_is_multicast(dest)) { + net_dbg_ratelimited("rt6_redirect: destination address is multicast\n"); + return; + } + + if (ipv6_addr_equal(dest, target)) { + on_link = 1; + } else if (ipv6_addr_type(target) != + (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { + net_dbg_ratelimited("rt6_redirect: target address is not link-local unicast\n"); + return; + } + + in6_dev = __in6_dev_get(skb->dev); + if (!in6_dev) + return; + if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) + return; + + /* RFC2461 8.1: + * The IP source address of the Redirect MUST be the same as the current + * first-hop router for the specified ICMP Destination Address. + */ + + if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { + net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); + return; + } + if (ndopts.nd_opts_tgt_lladdr) { + lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, + skb->dev); + if (!lladdr) { + net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); + return; + } + } + + neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); + if (!neigh) + return; + + src = &ipv6_hdr(skb)->daddr; + saddr = &ipv6_hdr(skb)->saddr; rt = ip6_route_redirect(dest, src, saddr, neigh->dev); @@ -1756,6 +1820,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, } out: + neigh_release(neigh); dst_release(&rt->dst); } -- cgit v1.2.3-70-g09d2 From 3a5ad2ee5e2c5030d8a303d06f9148a2f893a369 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:08:07 -0700 Subject: ipv6: Add ip6_redirect() and ip6_sk_redirect() helper functions. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 ++ net/ipv6/route.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5cedbd7688c..693940565d8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -139,6 +139,8 @@ extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu); +extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark); +extern void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk); struct netlink_callback; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 545b1526c14..f52cf83bb1e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1114,6 +1114,33 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) } EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); +void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = 0; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + rt6_do_redirect(dst, skb); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_redirect); + +void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) +{ + ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_redirect); + static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; -- cgit v1.2.3-70-g09d2 From ec18d9a2691d69cd14b48f9b919fddcef28b7f5c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:25:15 -0700 Subject: ipv6: Add redirect support to all protocol icmp error handlers. Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 ++ net/dccp/ipv6.c | 7 +++++++ net/ipv6/ah6.c | 10 ++++++---- net/ipv6/esp6.c | 11 +++++++---- net/ipv6/ip6_tunnel.c | 7 +++++++ net/ipv6/ipcomp6.c | 11 +++++++---- net/ipv6/raw.c | 2 ++ net/ipv6/sit.c | 8 ++++++++ net/ipv6/tcp_ipv6.c | 7 +++++++ net/ipv6/udp.c | 2 ++ net/ipv6/xfrm6_policy.c | 9 +++++++++ net/sctp/input.c | 4 ++-- net/sctp/ipv6.c | 3 +++ 13 files changed, 69 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a2ef81466b0..1f2735dba75 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -162,6 +162,8 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *, void sctp_err_finish(struct sock *, struct sctp_association *); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); +void sctp_icmp_redirect(struct sock *, struct sctp_transport *, + struct sk_buff *); void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 02162cfa504..b4d7d28ce6d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -130,6 +130,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst, skb); + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 49d4d26bda8..7e6139508ee 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -613,16 +613,18 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", - ntohl(ah->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 89a615ba84f..6dc7fd353ef 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -434,16 +434,19 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct xfrm_state *x; if (type != ICMPV6_DEST_UNREACH && - type != ICMPV6_PKT_TOOBIG) + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", - ntohl(esph->spi), &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6af3fcfdcbb..0b5b60ec6f4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -550,6 +550,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; + case NDISC_REDIRECT: + rel_type = ICMP_REDIRECT; + rel_code = ICMP_REDIR_HOST; default: return 0; } @@ -608,6 +611,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); } + if (rel_type == ICMP_REDIRECT) { + if (skb_dst(skb2)->ops->redirect) + skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + } icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 92832385a8e..7af5aee75d9 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,7 +64,9 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; - if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG) + if (type != ICMPV6_DEST_UNREACH && + type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) return; spi = htonl(ntohs(ipcomph->cpi)); @@ -73,9 +75,10 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", - spi, &iph->daddr); - ip6_update_pmtu(skb, net, info, 0, 0); + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, 0, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index b5c1dcb2773..ef0579d5bca 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -332,6 +332,8 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); } + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 49aea94c9be..fbf1622fdee 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -539,6 +539,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (code != ICMP_EXC_TTL) return 0; break; + case ICMP_REDIRECT: + break; } err = -ENOENT; @@ -557,6 +559,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = 0; goto out; } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + IPPROTO_IPV6, 0); + err = 0; + goto out; + } if (t->parms.iph.daddr == 0) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 70458a9cd83..7249e4bb9b8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -363,6 +363,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst && dst->ops->redirect) + dst->ops->redirect(dst,skb); + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1ecd1024948..99d0077b56b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -483,6 +483,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) ip6_sk_update_pmtu(skb, sk, info); + if (type == NDISC_REDIRECT) + ip6_sk_redirect(skb, sk); np = inet6_sk(sk); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index bb02038b822..f5a9cb8257b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -215,6 +215,14 @@ static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) path->ops->update_pmtu(path, mtu); } +static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct dst_entry *path = xdst->route; + + path->ops->redirect(path, skb); +} + static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -261,6 +269,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, diff --git a/net/sctp/input.c b/net/sctp/input.c index 9fb4247f9a9..5943b7d77dd 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -423,8 +423,8 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD); } -static void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, - struct sk_buff *skb) +void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, + struct sk_buff *skb) { struct dst_entry *dst; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 91f479121c5..ed7139ea797 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -185,6 +185,9 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out_unlock; } break; + case NDISC_REDIRECT: + sctp_icmp_redirect(sk, transport, skb); + break; default: break; } -- cgit v1.2.3-70-g09d2 From b94f1c0904da9b8bf031667afc48080ba7c3e8c9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 00:33:37 -0700 Subject: ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect(). And delete rt6_redirect(), since it is no longer used. Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 - include/net/ipv6.h | 2 + net/ipv6/icmp.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/route.c | 107 ------------------------------------------------ 5 files changed, 4 insertions(+), 111 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 693940565d8..b6b6f7d6f3c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -133,8 +133,6 @@ extern int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr); -extern void rt6_redirect(struct sk_buff *skb); - extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark); extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d4261d4d6c4..f695f39e892 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -251,6 +251,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl) atomic_dec(&fl->users); } +extern void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info); + extern int ip6_ra_control(struct sock *sk, int sel); extern int ipv6_parse_hopopts(struct sk_buff *skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a113f7d7e93..24d69dbca4d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -598,7 +598,7 @@ out: icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) +void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { const struct inet6_protocol *ipprot; int inner_offset; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b8d53e186a7..ff36194a71a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1350,7 +1350,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } - rt6_redirect(skb); + icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f52cf83bb1e..7296af144d6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1633,92 +1633,6 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -/* - * Handle redirects - */ -struct ip6rd_flowi { - struct flowi6 fl6; - struct in6_addr gateway; -}; - -static struct rt6_info *__ip6_route_redirect(struct net *net, - struct fib6_table *table, - struct flowi6 *fl6, - int flags) -{ - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt; - struct fib6_node *fn; - - /* - * Get the "current" route for this destination and - * check if the redirect has come from approriate router. - * - * RFC 2461 specifies that redirects should only be - * accepted if they come from the nexthop to the target. - * Due to the way the routes are chosen, this notion - * is a bit fuzzy and one might need to check all possible - * routes. - */ - - read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); -restart: - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - /* - * Current route is on-link; redirect is always invalid. - * - * Seems, previous statement is not true. It could - * be node, which looks for us as on-link (f.e. proxy ndisc) - * But then router serving it might decide, that we should - * know truth 8)8) --ANK (980726). - */ - if (rt6_check_expired(rt)) - continue; - if (!(rt->rt6i_flags & RTF_GATEWAY)) - continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) - continue; - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) - continue; - break; - } - - if (!rt) - rt = net->ipv6.ip6_null_entry; - BACKTRACK(net, &fl6->saddr); -out: - dst_hold(&rt->dst); - - read_unlock_bh(&table->tb6_lock); - - return rt; -}; - -static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, - const struct in6_addr *src, - const struct in6_addr *gateway, - struct net_device *dev) -{ - int flags = RT6_LOOKUP_F_HAS_SADDR; - struct net *net = dev_net(dev); - struct ip6rd_flowi rdfl = { - .fl6 = { - .flowi6_oif = dev->ifindex, - .daddr = *dest, - .saddr = *src, - }, - }; - - rdfl.gateway = *gateway; - - if (rt6_need_strict(dest)) - flags |= RT6_LOOKUP_F_IFACE; - - return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, - flags, __ip6_route_redirect); -} - static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); @@ -1848,27 +1762,6 @@ out: neigh_release(neigh); } -void rt6_redirect(struct sk_buff *skb) -{ - const struct in6_addr *target; - const struct in6_addr *dest; - const struct in6_addr *src; - const struct in6_addr *saddr; - struct icmp6hdr *icmph; - struct rt6_info *rt; - - icmph = icmp6_hdr(skb); - target = (const struct in6_addr *) (icmph + 1); - dest = target + 1; - - src = &ipv6_hdr(skb)->daddr; - saddr = &ipv6_hdr(skb)->saddr; - - rt = ip6_route_redirect(dest, src, saddr, skb->dev); - rt6_do_redirect(&rt->dst, skb); - dst_release(&rt->dst); -} - /* * Misc support functions */ -- cgit v1.2.3-70-g09d2 From 84efbb84cf76238faf26facf481c8675859bfaeb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 16 Jun 2012 00:00:26 +0200 Subject: cfg80211: use wireless_dev for interface management In order to be able to create P2P Device wdevs, move the virtual interface management over to wireless_dev structures. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 30 +++++++++++++++--------------- drivers/net/wireless/ath/ath6kl/cfg80211.h | 6 +++--- drivers/net/wireless/ath/ath6kl/core.c | 8 ++++---- drivers/net/wireless/mwifiex/cfg80211.c | 24 ++++++++++++------------ drivers/net/wireless/mwifiex/main.c | 4 ++-- drivers/net/wireless/mwifiex/main.h | 10 ++++++---- include/net/cfg80211.h | 19 ++++++++++--------- net/mac80211/cfg.c | 20 ++++++++++---------- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 6 +++--- net/wireless/nl80211.c | 26 +++++++++++++++++--------- 11 files changed, 83 insertions(+), 72 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 5f0c66bb6bd..88bed02f752 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1487,14 +1487,14 @@ static int ath6kl_cfg80211_set_power_mgmt(struct wiphy *wiphy, return 0; } -static struct net_device *ath6kl_cfg80211_add_iface(struct wiphy *wiphy, - char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params) +static struct wireless_dev *ath6kl_cfg80211_add_iface(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct ath6kl *ar = wiphy_priv(wiphy); - struct net_device *ndev; + struct wireless_dev *wdev; u8 if_idx, nw_type; if (ar->num_vif == ar->vif_max) { @@ -1507,20 +1507,20 @@ static struct net_device *ath6kl_cfg80211_add_iface(struct wiphy *wiphy, return ERR_PTR(-EINVAL); } - ndev = ath6kl_interface_add(ar, name, type, if_idx, nw_type); - if (!ndev) + wdev = ath6kl_interface_add(ar, name, type, if_idx, nw_type); + if (!wdev) return ERR_PTR(-ENOMEM); ar->num_vif++; - return ndev; + return wdev; } static int ath6kl_cfg80211_del_iface(struct wiphy *wiphy, - struct net_device *ndev) + struct wireless_dev *wdev) { struct ath6kl *ar = wiphy_priv(wiphy); - struct ath6kl_vif *vif = netdev_priv(ndev); + struct ath6kl_vif *vif = netdev_priv(wdev->netdev); spin_lock_bh(&ar->list_lock); list_del(&vif->list); @@ -3477,9 +3477,9 @@ void ath6kl_cfg80211_vif_cleanup(struct ath6kl_vif *vif) ar->num_vif--; } -struct net_device *ath6kl_interface_add(struct ath6kl *ar, char *name, - enum nl80211_iftype type, u8 fw_vif_idx, - u8 nw_type) +struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, char *name, + enum nl80211_iftype type, + u8 fw_vif_idx, u8 nw_type) { struct net_device *ndev; struct ath6kl_vif *vif; @@ -3533,7 +3533,7 @@ struct net_device *ath6kl_interface_add(struct ath6kl *ar, char *name, list_add_tail(&vif->list, &ar->vif_list); spin_unlock_bh(&ar->list_lock); - return ndev; + return &vif->wdev; err: aggr_module_destroy(vif->aggr_cntxt); diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.h b/drivers/net/wireless/ath/ath6kl/cfg80211.h index b992046a1b0..56b1ebe7981 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.h +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.h @@ -25,9 +25,9 @@ enum ath6kl_cfg_suspend_mode { ATH6KL_CFG_SUSPEND_SCHED_SCAN, }; -struct net_device *ath6kl_interface_add(struct ath6kl *ar, char *name, - enum nl80211_iftype type, - u8 fw_vif_idx, u8 nw_type); +struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, char *name, + enum nl80211_iftype type, + u8 fw_vif_idx, u8 nw_type); void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, enum wmi_phy_mode mode); void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted); diff --git a/drivers/net/wireless/ath/ath6kl/core.c b/drivers/net/wireless/ath/ath6kl/core.c index fdb3b1decc7..82c4dd2a960 100644 --- a/drivers/net/wireless/ath/ath6kl/core.c +++ b/drivers/net/wireless/ath/ath6kl/core.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(ath6kl_core_rx_complete); int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type) { struct ath6kl_bmi_target_info targ_info; - struct net_device *ndev; + struct wireless_dev *wdev; int ret = 0, i; switch (htc_type) { @@ -187,12 +187,12 @@ int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type) rtnl_lock(); /* Add an initial station interface */ - ndev = ath6kl_interface_add(ar, "wlan%d", NL80211_IFTYPE_STATION, 0, + wdev = ath6kl_interface_add(ar, "wlan%d", NL80211_IFTYPE_STATION, 0, INFRA_NETWORK); rtnl_unlock(); - if (!ndev) { + if (!wdev) { ath6kl_err("Failed to instantiate a network device\n"); ret = -ENOMEM; wiphy_unregister(ar->wiphy); @@ -200,7 +200,7 @@ int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type) } ath6kl_dbg(ATH6KL_DBG_TRC, "%s: name=%s dev=0x%p, ar=0x%p\n", - __func__, ndev->name, ndev, ar); + __func__, wdev->netdev->name, wdev->netdev, ar); return ret; diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 9c2e08e4b09..1e8024ea691 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -1512,11 +1512,11 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info, /* * create a new virtual interface with the given name */ -struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy, - char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params) +struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct mwifiex_adapter *adapter = mwifiex_cfg80211_get_adapter(wiphy); struct mwifiex_private *priv; @@ -1634,7 +1634,7 @@ struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy, #ifdef CONFIG_DEBUG_FS mwifiex_dev_debugfs_init(priv); #endif - return dev; + return wdev; error: if (dev && (dev->reg_state == NETREG_UNREGISTERED)) free_netdev(dev); @@ -1647,9 +1647,9 @@ EXPORT_SYMBOL_GPL(mwifiex_add_virtual_intf); /* * del_virtual_intf: remove the virtual interface determined by dev */ -int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct net_device *dev) +int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) { - struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); #ifdef CONFIG_DEBUG_FS mwifiex_dev_debugfs_remove(priv); @@ -1661,11 +1661,11 @@ int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct net_device *dev) if (netif_carrier_ok(priv->netdev)) netif_carrier_off(priv->netdev); - if (dev->reg_state == NETREG_REGISTERED) - unregister_netdevice(dev); + if (wdev->netdev->reg_state == NETREG_REGISTERED) + unregister_netdevice(wdev->netdev); - if (dev->reg_state == NETREG_UNREGISTERED) - free_netdev(dev); + if (wdev->netdev->reg_state == NETREG_UNREGISTERED) + free_netdev(wdev->netdev); /* Clear the priv in adapter */ priv->netdev = NULL; diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index f0219efc895..46803621d01 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -377,7 +377,7 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; err_add_intf: - mwifiex_del_virtual_intf(adapter->wiphy, priv->netdev); + mwifiex_del_virtual_intf(adapter->wiphy, priv->wdev); rtnl_unlock(); err_init_fw: pr_debug("info: %s: unregister device\n", __func__); @@ -844,7 +844,7 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter, struct semaphore *sem) rtnl_lock(); if (priv->wdev && priv->netdev) - mwifiex_del_virtual_intf(adapter->wiphy, priv->netdev); + mwifiex_del_virtual_intf(adapter->wiphy, priv->wdev); rtnl_unlock(); } diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h index 0b3b5aa9830..434a90e4693 100644 --- a/drivers/net/wireless/mwifiex/main.h +++ b/drivers/net/wireless/mwifiex/main.h @@ -1005,10 +1005,12 @@ int mwifiex_update_bss_desc_with_ie(struct mwifiex_adapter *adapter, int mwifiex_check_network_compatibility(struct mwifiex_private *priv, struct mwifiex_bssdescriptor *bss_desc); -struct net_device *mwifiex_add_virtual_intf(struct wiphy *wiphy, - char *name, enum nl80211_iftype type, - u32 *flags, struct vif_params *params); -int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct net_device *dev); +struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params); +int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev); void mwifiex_set_sys_config_invalid_data(struct mwifiex_uap_bss_param *config); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7eaaee7bb07..aaaa3a255ed 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1435,10 +1435,10 @@ struct cfg80211_gtk_rekey_data { * * @add_virtual_intf: create a new virtual interface with the given name, * must set the struct wireless_dev's iftype. Beware: You must create - * the new netdev in the wiphy's network namespace! Returns the netdev, - * or an ERR_PTR. + * the new netdev in the wiphy's network namespace! Returns the struct + * wireless_dev, or an ERR_PTR. * - * @del_virtual_intf: remove the virtual interface determined by ifindex. + * @del_virtual_intf: remove the virtual interface * * @change_virtual_intf: change type/configuration of virtual interface, * keep the struct wireless_dev's iftype updated. @@ -1617,12 +1617,13 @@ struct cfg80211_ops { int (*resume)(struct wiphy *wiphy); void (*set_wakeup)(struct wiphy *wiphy, bool enabled); - struct net_device * (*add_virtual_intf)(struct wiphy *wiphy, - char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params); - int (*del_virtual_intf)(struct wiphy *wiphy, struct net_device *dev); + struct wireless_dev * (*add_virtual_intf)(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params); + int (*del_virtual_intf)(struct wiphy *wiphy, + struct wireless_dev *wdev); int (*change_virtual_intf)(struct wiphy *wiphy, struct net_device *dev, enum nl80211_iftype type, u32 *flags, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7d9abea37b1..a752c7341d6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -20,31 +20,31 @@ #include "rate.h" #include "mesh.h" -static struct net_device *ieee80211_add_iface(struct wiphy *wiphy, char *name, - enum nl80211_iftype type, - u32 *flags, - struct vif_params *params) +static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct net_device *dev; + struct wireless_dev *wdev; struct ieee80211_sub_if_data *sdata; int err; - err = ieee80211_if_add(local, name, &dev, type, params); + err = ieee80211_if_add(local, name, &wdev, type, params); if (err) return ERR_PTR(err); if (type == NL80211_IFTYPE_MONITOR && flags) { - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); sdata->u.mntr_flags = *flags; } - return dev; + return wdev; } -static int ieee80211_del_iface(struct wiphy *wiphy, struct net_device *dev) +static int ieee80211_del_iface(struct wiphy *wiphy, struct wireless_dev *wdev) { - ieee80211_if_remove(IEEE80211_DEV_TO_SUB_IF(dev)); + ieee80211_if_remove(IEEE80211_WDEV_TO_SUB_IF(wdev)); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8f8535ee599..c3241c3ec6d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1284,7 +1284,7 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); int ieee80211_iface_init(void); void ieee80211_iface_exit(void); int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum nl80211_iftype type, + struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params); int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fbef7a1ada7..b1edf60fbba 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1373,7 +1373,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, } int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum nl80211_iftype type, + struct wireless_dev **new_wdev, enum nl80211_iftype type, struct vif_params *params) { struct net_device *ndev; @@ -1463,8 +1463,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, list_add_tail_rcu(&sdata->list, &local->interfaces); mutex_unlock(&local->iflist_mtx); - if (new_dev) - *new_dev = ndev; + if (new_wdev) + *new_wdev = &sdata->wdev; return 0; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0dc3356eea4..789d0c7b287 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1971,7 +1971,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; - struct net_device *dev; + struct wireless_dev *wdev; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; @@ -2001,16 +2001,14 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - dev = rdev->ops->add_virtual_intf(&rdev->wiphy, + wdev = rdev->ops->add_virtual_intf(&rdev->wiphy, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); - if (IS_ERR(dev)) - return PTR_ERR(dev); + if (IS_ERR(wdev)) + return PTR_ERR(wdev); if (type == NL80211_IFTYPE_MESH_POINT && info->attrs[NL80211_ATTR_MESH_ID]) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -2027,12 +2025,22 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; - return rdev->ops->del_virtual_intf(&rdev->wiphy, dev); + /* + * If we remove a wireless device without a netdev then clear + * user_ptr[1] so that nl80211_post_doit won't dereference it + * to check if it needs to do dev_put(). Otherwise it crashes + * since the wdev has been freed, unlike with a netdev where + * we need the dev_put() for the netdev to really be freed. + */ + if (!wdev->netdev) + info->user_ptr[1] = NULL; + + return rdev->ops->del_virtual_intf(&rdev->wiphy, wdev); } static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) @@ -6874,7 +6882,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_del_interface, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL, }, { -- cgit v1.2.3-70-g09d2 From fd0142844efa85d89017c89227a0f03de1eee327 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Jun 2012 19:17:03 +0200 Subject: nl80211: move scan API to wdev The new P2P Device will have to be able to scan for P2P search, so move scanning to use struct wireless_dev instead of struct net_device. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 6 ++--- .../net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 3 ++- drivers/net/wireless/iwmc3200wifi/cfg80211.c | 3 ++- drivers/net/wireless/libertas/cfg.c | 1 - drivers/net/wireless/mwifiex/cfg80211.c | 3 ++- drivers/net/wireless/orinoco/cfg.c | 2 +- drivers/net/wireless/rndis_wlan.c | 5 ++-- include/net/cfg80211.h | 7 ++--- net/mac80211/cfg.c | 5 ++-- net/wireless/core.c | 2 +- net/wireless/nl80211.c | 31 ++++++++++++---------- net/wireless/nl80211.h | 6 ++--- net/wireless/scan.c | 24 +++++++++-------- net/wireless/sme.c | 6 ++--- 14 files changed, 57 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 88bed02f752..86aeef4b9d7 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -966,11 +966,11 @@ static int ath6kl_set_probed_ssids(struct ath6kl *ar, return 0; } -static int ath6kl_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, +static int ath6kl_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { - struct ath6kl *ar = ath6kl_priv(ndev); - struct ath6kl_vif *vif = netdev_priv(ndev); + struct ath6kl_vif *vif = ath6kl_vif_from_wdev(request->wdev); + struct ath6kl *ar = ath6kl_priv(vif->ndev); s8 n_channels = 0; u16 *channels = NULL; int ret = 0; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index d13ae9c299f..c6a10caec79 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -691,9 +691,10 @@ scan_out: } static s32 -brcmf_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, +brcmf_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *ndev = request->wdev->netdev; s32 err = 0; WL_TRACE("Enter\n"); diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c index 48e8218fd23..523dd646f05 100644 --- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c +++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c @@ -353,9 +353,10 @@ static int iwm_cfg80211_change_iface(struct wiphy *wiphy, return 0; } -static int iwm_cfg80211_scan(struct wiphy *wiphy, struct net_device *ndev, +static int iwm_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *ndev = request->wdev->netdev; struct iwm_priv *iwm = ndev_to_iwm(ndev); int ret; diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index f4a203049fb..70678131619 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -805,7 +805,6 @@ void lbs_scan_done(struct lbs_private *priv) } static int lbs_cfg_scan(struct wiphy *wiphy, - struct net_device *dev, struct cfg80211_scan_request *request) { struct lbs_private *priv = wiphy_priv(wiphy); diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 1e8024ea691..6ca571a1b8e 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -1376,9 +1376,10 @@ mwifiex_cfg80211_leave_ibss(struct wiphy *wiphy, struct net_device *dev) * it also informs the results. */ static int -mwifiex_cfg80211_scan(struct wiphy *wiphy, struct net_device *dev, +mwifiex_cfg80211_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *dev = request->wdev->netdev; struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); int i; struct ieee80211_channel *chan; diff --git a/drivers/net/wireless/orinoco/cfg.c b/drivers/net/wireless/orinoco/cfg.c index e15675585fb..7b751fba7e1 100644 --- a/drivers/net/wireless/orinoco/cfg.c +++ b/drivers/net/wireless/orinoco/cfg.c @@ -138,7 +138,7 @@ static int orinoco_change_vif(struct wiphy *wiphy, struct net_device *dev, return err; } -static int orinoco_scan(struct wiphy *wiphy, struct net_device *dev, +static int orinoco_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { struct orinoco_private *priv = wiphy_priv(wiphy); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index dfcd02ab6ca..241162e8111 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -484,7 +484,7 @@ static int rndis_change_virtual_intf(struct wiphy *wiphy, enum nl80211_iftype type, u32 *flags, struct vif_params *params); -static int rndis_scan(struct wiphy *wiphy, struct net_device *dev, +static int rndis_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request); static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed); @@ -1941,9 +1941,10 @@ static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm) } #define SCAN_DELAY_JIFFIES (6 * HZ) -static int rndis_scan(struct wiphy *wiphy, struct net_device *dev, +static int rndis_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) { + struct net_device *dev = request->wdev->netdev; struct usbnet *usbdev = netdev_priv(dev); struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev); int ret; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aaaa3a255ed..5a67165f3b1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -999,7 +999,7 @@ struct cfg80211_ssid { * @ie_len: length of ie in octets * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for - * @dev: the interface + * @wdev: the wireless device to scan for * @aborted: (internal) scan request was notified as aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band */ @@ -1012,9 +1012,10 @@ struct cfg80211_scan_request { u32 rates[IEEE80211_NUM_BANDS]; + struct wireless_dev *wdev; + /* internal */ struct wiphy *wiphy; - struct net_device *dev; bool aborted; bool no_cck; @@ -1700,7 +1701,7 @@ struct cfg80211_ops { struct ieee80211_channel *chan, enum nl80211_channel_type channel_type); - int (*scan)(struct wiphy *wiphy, struct net_device *dev, + int (*scan)(struct wiphy *wiphy, struct cfg80211_scan_request *request); int (*auth)(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a752c7341d6..cfdc03f59e2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1763,10 +1763,11 @@ static int ieee80211_resume(struct wiphy *wiphy) #endif static int ieee80211_scan(struct wiphy *wiphy, - struct net_device *dev, struct cfg80211_scan_request *req) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_WDEV_TO_SUB_IF(req->wdev); switch (ieee80211_vif_type_p2p(&sdata->vif)) { case NL80211_IFTYPE_STATION: diff --git a/net/wireless/core.c b/net/wireless/core.c index 2781a411cec..0557bb15902 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -708,7 +708,7 @@ static void wdev_cleanup_work(struct work_struct *work) cfg80211_lock_rdev(rdev); - if (WARN_ON(rdev->scan_req && rdev->scan_req->dev == wdev->netdev)) { + if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { rdev->scan_req->aborted = true; ___cfg80211_scan_done(rdev, true); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6a9a1d7f51d..6472c7f928d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4130,7 +4130,7 @@ static int validate_scan_freqs(struct nlattr *freqs) static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_scan_request *request; struct nlattr *attr; struct wiphy *wiphy; @@ -4290,15 +4290,16 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); - request->dev = dev; + request->wdev = wdev; request->wiphy = &rdev->wiphy; rdev->scan_req = request; - err = rdev->ops->scan(&rdev->wiphy, dev, request); + err = rdev->ops->scan(&rdev->wiphy, request); if (!err) { - nl80211_send_scan_start(rdev, dev); - dev_hold(dev); + nl80211_send_scan_start(rdev, wdev); + if (wdev->netdev) + dev_hold(wdev->netdev); } else { out_free: rdev->scan_req = NULL; @@ -7066,7 +7067,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_trigger_scan, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -7458,7 +7459,7 @@ static int nl80211_add_scan_req(struct sk_buff *msg, static int nl80211_send_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, - struct net_device *netdev, + struct wireless_dev *wdev, u32 pid, u32 seq, int flags, u32 cmd) { @@ -7469,7 +7470,9 @@ static int nl80211_send_scan_msg(struct sk_buff *msg, return -1; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) goto nla_put_failure; /* ignore errors and send incomplete event anyway */ @@ -7506,7 +7509,7 @@ nl80211_send_sched_scan_msg(struct sk_buff *msg, } void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, - struct net_device *netdev) + struct wireless_dev *wdev) { struct sk_buff *msg; @@ -7514,7 +7517,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, if (!msg) return; - if (nl80211_send_scan_msg(msg, rdev, netdev, 0, 0, 0, + if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_TRIGGER_SCAN) < 0) { nlmsg_free(msg); return; @@ -7525,7 +7528,7 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, } void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, - struct net_device *netdev) + struct wireless_dev *wdev) { struct sk_buff *msg; @@ -7533,7 +7536,7 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, if (!msg) return; - if (nl80211_send_scan_msg(msg, rdev, netdev, 0, 0, 0, + if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_NEW_SCAN_RESULTS) < 0) { nlmsg_free(msg); return; @@ -7544,7 +7547,7 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, } void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, - struct net_device *netdev) + struct wireless_dev *wdev) { struct sk_buff *msg; @@ -7552,7 +7555,7 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, if (!msg) return; - if (nl80211_send_scan_msg(msg, rdev, netdev, 0, 0, 0, + if (nl80211_send_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_SCAN_ABORTED) < 0) { nlmsg_free(msg); return; diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 0469303b5c3..89ce99675e6 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -7,11 +7,11 @@ int nl80211_init(void); void nl80211_exit(void); void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, - struct net_device *netdev); + struct wireless_dev *wdev); void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, - struct net_device *netdev); + struct wireless_dev *wdev); void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, - struct net_device *netdev); + struct wireless_dev *wdev); void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev, struct net_device *netdev, u32 cmd); void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/scan.c b/net/wireless/scan.c index af2b1caa37f..848523a2b22 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -23,7 +23,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { struct cfg80211_scan_request *request; - struct net_device *dev; + struct wireless_dev *wdev; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif @@ -35,29 +35,31 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) if (!request) return; - dev = request->dev; + wdev = request->wdev; /* * This must be before sending the other events! * Otherwise, wpa_supplicant gets completely confused with * wext events. */ - cfg80211_sme_scan_done(dev); + if (wdev->netdev) + cfg80211_sme_scan_done(wdev->netdev); if (request->aborted) - nl80211_send_scan_aborted(rdev, dev); + nl80211_send_scan_aborted(rdev, wdev); else - nl80211_send_scan_done(rdev, dev); + nl80211_send_scan_done(rdev, wdev); #ifdef CONFIG_CFG80211_WEXT - if (!request->aborted) { + if (wdev->netdev && !request->aborted) { memset(&wrqu, 0, sizeof(wrqu)); - wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL); + wireless_send_event(wdev->netdev, SIOCGIWSCAN, &wrqu, NULL); } #endif - dev_put(dev); + if (wdev->netdev) + dev_put(wdev->netdev); rdev->scan_req = NULL; @@ -955,7 +957,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, } creq->wiphy = wiphy; - creq->dev = dev; + creq->wdev = dev->ieee80211_ptr; /* SSIDs come after channels */ creq->ssids = (void *)&creq->channels[n_channels]; creq->n_channels = n_channels; @@ -1024,12 +1026,12 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; rdev->scan_req = creq; - err = rdev->ops->scan(wiphy, dev, creq); + err = rdev->ops->scan(wiphy, creq); if (err) { rdev->scan_req = NULL; /* creq will be freed below */ } else { - nl80211_send_scan_start(rdev, dev); + nl80211_send_scan_start(rdev, dev->ieee80211_ptr); /* creq now owned by driver */ creq = NULL; dev_hold(dev); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index dec97981e68..6f39cb80830 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -136,15 +136,15 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) wdev->conn->params.ssid_len); request->ssids[0].ssid_len = wdev->conn->params.ssid_len; - request->dev = wdev->netdev; + request->wdev = wdev; request->wiphy = &rdev->wiphy; rdev->scan_req = request; - err = rdev->ops->scan(wdev->wiphy, wdev->netdev, request); + err = rdev->ops->scan(wdev->wiphy, request); if (!err) { wdev->conn->state = CFG80211_CONN_SCANNING; - nl80211_send_scan_start(rdev, wdev->netdev); + nl80211_send_scan_start(rdev, wdev); dev_hold(wdev->netdev); } else { rdev->scan_req = NULL; -- cgit v1.2.3-70-g09d2 From 30f422925c39edf61cbcf6d35140d726402d4b04 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Jul 2012 13:14:18 +0200 Subject: mac80211: optimize ieee80211_rx_status struct layout We waste a lot of space in this struct because it uses int values where smaller ones would be sufficient. The upcoming A-MPDU information needs some space, optimize the struct now. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c5dbb46debb..7300cb51dd5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -709,13 +709,13 @@ enum mac80211_rx_flags { */ struct ieee80211_rx_status { u64 mactime; - enum ieee80211_band band; - int freq; - int signal; - int antenna; - int rate_idx; - int flag; - unsigned int rx_flags; + u16 flag; + u16 freq; + u8 rate_idx; + u8 rx_flags; + u8 band; + u8 antenna; + s8 signal; }; /** -- cgit v1.2.3-70-g09d2 From 8c358bcd097fa1f63e57fb82525ba52f4a537bfa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 22 May 2012 22:13:05 +0200 Subject: mac80211: add time synchronisation with BSS for assoc Some drivers (iwlegacy, iwlwifi and rt2x00) today use the bss_conf.last_tsf value. By itself though that value is completely worthless since it may be ancient. What really is needed is synchronisation between some device time and the TSF. To clarify this, rename bss_conf.last_tsf to sync_tsf and add sync_device_ts which is obtained from rx_status which gets a new field device_timestamp for this purpose. This is intentionally not using the mactime field since that is used for other things and in IBSS is expected to sync with the IBSS's TSF which isn't necessarily true for the device timestamp. Also, since we have the information and it's useful even before the connection has been established, give all the timing details to the driver before authenticating. Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- drivers/net/wireless/iwlegacy/common.c | 2 +- drivers/net/wireless/iwlwifi/dvm/rxon.c | 2 +- drivers/net/wireless/rt2x00/rt2x00config.c | 2 +- include/net/mac80211.h | 10 ++++++++-- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 15 ++++++++------- net/mac80211/scan.c | 3 ++- net/mac80211/trace.h | 6 ++++-- 8 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c index cbf2dc18341..9cbed0b15b0 100644 --- a/drivers/net/wireless/iwlegacy/common.c +++ b/drivers/net/wireless/iwlegacy/common.c @@ -5360,7 +5360,7 @@ il_mac_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (changes & BSS_CHANGED_ASSOC) { D_MAC80211("ASSOC %d\n", bss_conf->assoc); if (bss_conf->assoc) { - il->timestamp = bss_conf->last_tsf; + il->timestamp = bss_conf->sync_tsf; if (!il_is_rfkill(il)) il->ops->post_associate(il); diff --git a/drivers/net/wireless/iwlwifi/dvm/rxon.c b/drivers/net/wireless/iwlwifi/dvm/rxon.c index 6ee940f497f..10896393e5a 100644 --- a/drivers/net/wireless/iwlwifi/dvm/rxon.c +++ b/drivers/net/wireless/iwlwifi/dvm/rxon.c @@ -1447,7 +1447,7 @@ void iwlagn_bss_info_changed(struct ieee80211_hw *hw, if (changes & BSS_CHANGED_ASSOC) { if (bss_conf->assoc) { - priv->timestamp = bss_conf->last_tsf; + priv->timestamp = bss_conf->sync_tsf; ctx->staging.filter_flags |= RXON_FILTER_ASSOC_MSK; } else { /* diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c index e7361d913e8..49a63e97393 100644 --- a/drivers/net/wireless/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/rt2x00/rt2x00config.c @@ -102,7 +102,7 @@ void rt2x00lib_config_erp(struct rt2x00_dev *rt2x00dev, /* Update the AID, this is needed for dynamic PS support */ rt2x00dev->aid = bss_conf->assoc ? bss_conf->aid : 0; - rt2x00dev->last_beacon = bss_conf->last_tsf; + rt2x00dev->last_beacon = bss_conf->sync_tsf; /* Update global beacon interval time, this is needed for PS support */ rt2x00dev->beacon_int = bss_conf->beacon_int; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7300cb51dd5..bb86aa6f98d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -233,8 +233,10 @@ enum ieee80211_rssi_event { * valid in station mode only while @assoc is true and if also * requested by %IEEE80211_HW_NEED_DTIM_PERIOD (cf. also hw conf * @ps_dtim_period) - * @last_tsf: last beacon's/probe response's TSF timestamp (could be old + * @sync_tsf: last beacon's/probe response's TSF timestamp (could be old * as it may have been received during scanning long ago) + * @sync_device_ts: the device timestamp corresponding to the sync_tsf, + * the driver/device can use this to calculate synchronisation * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp * @basic_rates: bitmap of basic rates, each bit stands for an @@ -281,7 +283,8 @@ struct ieee80211_bss_conf { u8 dtim_period; u16 beacon_int; u16 assoc_capability; - u64 last_tsf; + u64 sync_tsf; + u32 sync_device_ts; u32 basic_rates; int mcast_rate[IEEE80211_NUM_BANDS]; u16 ht_operation_mode; @@ -696,6 +699,8 @@ enum mac80211_rx_flags { * * @mactime: value in microseconds of the 64-bit Time Synchronization Function * (TSF) timer when the first data symbol (MPDU) arrived at the hardware. + * @device_timestamp: arbitrary timestamp for the device, mac80211 doesn't use + * it but can store it and pass it back to the driver for synchronisation * @band: the active band when this frame was received * @freq: frequency the radio was tuned to when receiving this frame, in MHz * @signal: signal strength when receiving this frame, either in dBm, in dB or @@ -709,6 +714,7 @@ enum mac80211_rx_flags { */ struct ieee80211_rx_status { u64 mactime; + u32 device_timestamp; u16 flag; u16 freq; u8 rate_idx; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2a97d668d2d..7998513ec83 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -85,6 +85,8 @@ struct ieee80211_bss { size_t ssid_len; u8 ssid[IEEE80211_MAX_SSID_LEN]; + u32 device_ts; + u8 dtim_period; bool wmm_used; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4b503ce893d..4efcbf89a72 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1269,11 +1269,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; bss_info_changed |= BSS_CHANGED_ASSOC; - /* set timing information */ - bss_conf->beacon_int = cbss->beacon_interval; - bss_conf->last_tsf = cbss->tsf; - - bss_info_changed |= BSS_CHANGED_BEACON_INT; bss_info_changed |= ieee80211_handle_bss_capability(sdata, bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value); @@ -3135,9 +3130,15 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, memcpy(ifmgd->bssid, cbss->bssid, ETH_ALEN); - /* tell driver about BSSID and basic rates */ + /* set timing information */ + sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; + sdata->vif.bss_conf.sync_tsf = cbss->tsf; + sdata->vif.bss_conf.sync_device_ts = bss->device_ts; + + /* tell driver about BSSID, basic rates and timing */ ieee80211_bss_info_change_notify(sdata, - BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES); + BSS_CHANGED_BSSID | BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT); if (assoc) sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 1a893f3637c..e80a8b644aa 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -83,13 +83,14 @@ ieee80211_bss_info_update(struct ieee80211_local *local, cbss = cfg80211_inform_bss_frame(local->hw.wiphy, channel, mgmt, len, signal, GFP_ATOMIC); - if (!cbss) return NULL; cbss->free_priv = ieee80211_rx_bss_free; bss = (void *)cbss->priv; + bss->device_ts = rx_status->device_timestamp; + if (elems->parse_error) { if (beacon) bss->corrupt_data |= IEEE80211_BSS_CORRUPT_BEACON; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e1e9d10ec2e..c6d33b55b2d 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -306,7 +306,8 @@ TRACE_EVENT(drv_bss_info_changed, __field(u8, dtimper) __field(u16, bcnint) __field(u16, assoc_cap) - __field(u64, timestamp) + __field(u64, sync_tsf) + __field(u32, sync_device_ts) __field(u32, basic_rates) __field(u32, changed) __field(bool, enable_beacon) @@ -325,7 +326,8 @@ TRACE_EVENT(drv_bss_info_changed, __entry->dtimper = info->dtim_period; __entry->bcnint = info->beacon_int; __entry->assoc_cap = info->assoc_capability; - __entry->timestamp = info->last_tsf; + __entry->sync_tsf = info->sync_tsf; + __entry->sync_device_ts = info->sync_device_ts; __entry->basic_rates = info->basic_rates; __entry->enable_beacon = info->enable_beacon; __entry->ht_operation_mode = info->ht_operation_mode; -- cgit v1.2.3-70-g09d2 From 391e5c22f5f4e55817f8ba18a08ea717ed2d4a1f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 09:39:28 -0700 Subject: ipv4: Remove tb_peers from fib_table. No longer used. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 - net/ipv4/fib_trie.c | 3 --- 2 files changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 000c4674e18..e91fedd22db 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -162,7 +162,6 @@ struct fib_table { u32 tb_id; int tb_default; int tb_num_default; - struct inet_peer_base tb_peers; unsigned long tb_data[0]; }; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 9b0f25930fb..18cbc15b20d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1843,8 +1843,6 @@ int fib_table_flush(struct fib_table *tb) if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll); - inetpeer_invalidate_tree(&tb->tb_peers); - pr_debug("trie_flush found=%d\n", found); return found; } @@ -1993,7 +1991,6 @@ struct fib_table *fib_trie_table(u32 id) tb->tb_id = id; tb->tb_default = -1; tb->tb_num_default = 0; - inet_peer_base_init(&tb->tb_peers); t = (struct trie *) tb->tb_data; memset(t, 0, sizeof(*t)); -- cgit v1.2.3-70-g09d2 From 5b7ccaf3fc7446e42b83a77fd7aa7ad92850acdd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Jul 2012 19:45:08 +0200 Subject: cfg80211/mac80211: re-add get_channel operation This essentially reverts commit 2e165b818456 but introduces the get_channel operation with a new wireless_dev argument so that you can retrieve the channel per interface. This is necessary as even though we can track all interface channels (except monitor) we can't track the channel type used. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 +++++++++ net/mac80211/cfg.c | 11 +++++++++++ net/wireless/nl80211.c | 16 +++++++++++----- net/wireless/wext-compat.c | 9 +++++++-- 4 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5a67165f3b1..8115d68eb60 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1612,6 +1612,10 @@ struct cfg80211_gtk_rekey_data { * @get_et_strings: Ethtool API to get a set of strings to describe stats * and perhaps other supported types of ethtool data-sets. * See @ethtool_ops.get_strings + * + * @get_channel: Get the current operating channel for the virtual interface. + * For monitor interfaces, it should return %NULL unless there's a single + * current monitoring channel. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -1821,6 +1825,11 @@ struct cfg80211_ops { u32 sset, u8 *data); void (*set_monitor_enabled)(struct wiphy *wiphy, bool enabled); + + struct ieee80211_channel * + (*get_channel)(struct wiphy *wiphy, + struct wireless_dev *wdev, + enum nl80211_channel_type *type); }; /* diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e95f24eef87..10dd9631e4d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2982,6 +2982,16 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, return 0; } +static struct ieee80211_channel * +ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + enum nl80211_channel_type *type) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + *type = local->_oper_channel_type; + return local->oper_channel; +} + #ifdef CONFIG_PM static void ieee80211_set_wakeup(struct wiphy *wiphy, bool enabled) { @@ -3062,4 +3072,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_et_sset_count = ieee80211_get_et_sset_count, .get_et_stats = ieee80211_get_et_stats, .get_et_strings = ieee80211_get_et_strings, + .get_channel = ieee80211_cfg_get_channel, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 079fc49e397..6b001e44571 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1759,11 +1759,17 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, (cfg80211_rdev_list_generation << 2))) goto nla_put_failure; - if (rdev->monitor_channel) { - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, - rdev->monitor_channel->center_freq) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, - rdev->monitor_channel_type)) + if (rdev->ops->get_channel) { + struct ieee80211_channel *chan; + enum nl80211_channel_type channel_type; + + chan = rdev->ops->get_channel(&rdev->wiphy, wdev, + &channel_type); + if (chan && + (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, + chan->center_freq) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + channel_type))) goto nla_put_failure; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7df42f54187..494379eb464 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -827,6 +827,8 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct ieee80211_channel *chan; + enum nl80211_channel_type channel_type; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -834,10 +836,13 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_MONITOR: - if (!rdev->monitor_channel) + if (!rdev->ops->get_channel) return -EINVAL; - freq->m = rdev->monitor_channel->center_freq; + chan = rdev->ops->get_channel(wdev->wiphy, wdev, &channel_type); + if (!chan) + return -EINVAL; + freq->m = chan->center_freq; freq->e = 6; return 0; default: -- cgit v1.2.3-70-g09d2 From 4290cb4bf212112e3d6f860e25f000ca8a1ca6a4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Jul 2012 22:19:48 +0200 Subject: cfg80211: reduce monitor interface tracking Revert commit b78e8ceac23655e1e06b30aa95ab11742d1ac7c0 ("cfg80211: track monitor channel") and remove the set_monitor_enabled() callback. Due to the tracking happening in NETDEV_PRE_UP, it had introduced bugs because the monitor interface callback would be called before the device was started. It looks like there's no way to fix this, and using NETDEV_PRE_UP is broken anyway (since there's no NETDEV_UP_FAIL), so remove all that code, track interfaces in NETDEV_UP and also stop tracking the monitor channel in cfg80211. This mostly reverts to before the tracking, except that we keep the interface count tracking so that setting the monitor channel can be rejected properly. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ---- net/wireless/chan.c | 9 +-------- net/wireless/core.c | 48 +----------------------------------------------- net/wireless/core.h | 3 --- 4 files changed, 2 insertions(+), 62 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8115d68eb60..0245208c297 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1504,8 +1504,6 @@ struct cfg80211_gtk_rekey_data { * interfaces are active this callback should reject the configuration. * If no interfaces are active or the device is down, the channel should * be stored for when a monitor interface becomes active. - * @set_monitor_enabled: Notify driver that there are only monitor - * interfaces running. * * @scan: Request to do a scan. If returning zero, the scan request is given * the driver, and will be valid until passed to cfg80211_scan_done(). @@ -1824,8 +1822,6 @@ struct cfg80211_ops { void (*get_et_strings)(struct wiphy *wiphy, struct net_device *dev, u32 sset, u8 *data); - void (*set_monitor_enabled)(struct wiphy *wiphy, bool enabled); - struct ieee80211_channel * (*get_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, diff --git a/net/wireless/chan.c b/net/wireless/chan.c index a16cdffb24a..d355f67d0cd 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -82,7 +82,6 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, int freq, enum nl80211_channel_type chantype) { struct ieee80211_channel *chan; - int err; if (!rdev->ops->set_monitor_channel) return -EOPNOTSUPP; @@ -93,13 +92,7 @@ int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, if (!chan) return -EINVAL; - err = rdev->ops->set_monitor_channel(&rdev->wiphy, chan, chantype); - if (!err) { - rdev->monitor_channel = chan; - rdev->monitor_channel_type = chantype; - } - - return err; + return rdev->ops->set_monitor_channel(&rdev->wiphy, chan, chantype); } void diff --git a/net/wireless/core.c b/net/wireless/core.c index 0557bb15902..71b684b5a67 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -736,60 +736,14 @@ static struct device_type wiphy_type = { .name = "wlan", }; -static struct ieee80211_channel * -cfg80211_get_any_chan(struct cfg80211_registered_device *rdev) -{ - struct ieee80211_supported_band *sband; - int i; - - for (i = 0; i < IEEE80211_NUM_BANDS; i++) { - sband = rdev->wiphy.bands[i]; - if (sband && sband->n_channels > 0) - return &sband->channels[0]; - } - - return NULL; -} - -static void cfg80211_init_mon_chan(struct cfg80211_registered_device *rdev) -{ - struct ieee80211_channel *chan; - - chan = cfg80211_get_any_chan(rdev); - if (WARN_ON(!chan)) - return; - - mutex_lock(&rdev->devlist_mtx); - WARN_ON(cfg80211_set_monitor_channel(rdev, chan->center_freq, - NL80211_CHAN_NO_HT)); - mutex_unlock(&rdev->devlist_mtx); -} - void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num) { - bool has_monitors_only_old = cfg80211_has_monitors_only(rdev); - bool has_monitors_only_new; - ASSERT_RTNL(); rdev->num_running_ifaces += num; if (iftype == NL80211_IFTYPE_MONITOR) rdev->num_running_monitor_ifaces += num; - - has_monitors_only_new = cfg80211_has_monitors_only(rdev); - if (has_monitors_only_new != has_monitors_only_old) { - if (rdev->ops->set_monitor_enabled) - rdev->ops->set_monitor_enabled(&rdev->wiphy, - has_monitors_only_new); - - if (!has_monitors_only_new) { - rdev->monitor_channel = NULL; - rdev->monitor_channel_type = NL80211_CHAN_NO_HT; - } else { - cfg80211_init_mon_chan(rdev); - } - } } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, @@ -912,6 +866,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, mutex_unlock(&rdev->devlist_mtx); dev_put(dev); } + cfg80211_update_iface_num(rdev, wdev->iftype, 1); cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); @@ -1006,7 +961,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, mutex_unlock(&rdev->devlist_mtx); if (ret) return notifier_from_errno(ret); - cfg80211_update_iface_num(rdev, wdev->iftype, 1); break; } diff --git a/net/wireless/core.h b/net/wireless/core.h index bac97da751d..5206c6844fd 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -61,9 +61,6 @@ struct cfg80211_registered_device { int num_running_ifaces; int num_running_monitor_ifaces; - struct ieee80211_channel *monitor_channel; - enum nl80211_channel_type monitor_channel_type; - /* BSSes/scanning */ spinlock_t bss_lock; struct list_head bss_list; -- cgit v1.2.3-70-g09d2 From 85b91b0339e764f7e56ff5968fa10d85451378b4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Jul 2012 08:21:29 -0700 Subject: ipv4: Don't store a rule pointer in fib_result. We only use it to fetch the rule's tclassid, so just store the tclassid there instead. This also decreases the size of fib_result by a full 8 bytes on 64-bit. On 32-bits it's a wash. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 12 +++--------- net/ipv4/fib_frontend.c | 8 -------- net/ipv4/fib_rules.c | 15 ++++++--------- net/ipv4/route.c | 6 ++---- 4 files changed, 11 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e91fedd22db..5697acefeba 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -106,12 +106,10 @@ struct fib_result { unsigned char nh_sel; unsigned char type; unsigned char scope; + u32 tclassid; struct fib_info *fi; struct fib_table *table; struct list_head *fa_head; -#ifdef CONFIG_IP_MULTIPLE_TABLES - struct fib_rule *r; -#endif }; struct fib_result_nl { @@ -215,10 +213,6 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, extern int __net_init fib4_rules_init(struct net *net); extern void __net_exit fib4_rules_exit(struct net *net); -#ifdef CONFIG_IP_ROUTE_CLASSID -extern u32 fib_rules_tclass(const struct fib_result *res); -#endif - extern struct fib_table *fib_new_table(struct net *net, u32 id); extern struct fib_table *fib_get_table(struct net *net, u32 id); @@ -229,7 +223,7 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { if (!net->ipv4.fib_has_custom_rules) { - res->r = NULL; + res->tclassid = 0; if (net->ipv4.fib_local && !fib_table_lookup(net->ipv4.fib_local, flp, res, FIB_LOOKUP_NOREF)) @@ -289,7 +283,7 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) #endif *itag = FIB_RES_NH(*res).nh_tclassid<<16; #ifdef CONFIG_IP_MULTIPLE_TABLES - rtag = fib_rules_tclass(res); + rtag = res->tclassid; if (*itag == 0) *itag = (rtag<<16); *itag |= (rtag>>16); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 81f85716a89..7a31194ec63 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -169,10 +169,6 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (ipv4_is_multicast(addr)) return RTN_MULTICAST; -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif - local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; @@ -934,10 +930,6 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) .flowi4_scope = frn->fl_scope, }; -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif - frn->err = -ENOENT; if (tb) { local_bh_disable(); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index c06da93b0b7..a83d74e498d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,13 +47,6 @@ struct fib4_rule { #endif }; -#ifdef CONFIG_IP_ROUTE_CLASSID -u32 fib_rules_tclass(const struct fib_result *res) -{ - return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; -} -#endif - int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { @@ -63,8 +56,12 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) int err; err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); - res->r = arg.rule; - +#ifdef CONFIG_IP_ROUTE_CLASSID + if (arg.rule) + res->tclassid = ((struct fib4_rule *)arg.rule)->tclassid; + else + res->tclassid = 0; +#endif return err; } EXPORT_SYMBOL_GPL(__fib_lookup); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9319bf1f835..aad21819316 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1735,7 +1735,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES - set_class_tag(rt, fib_rules_tclass(res)); + set_class_tag(rt, res->tclassid); #endif set_class_tag(rt, itag); #endif @@ -2353,11 +2353,9 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) __be32 orig_saddr; int orig_oif; + res.tclassid = 0; res.fi = NULL; res.table = NULL; -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif orig_daddr = fl4->daddr; orig_saddr = fl4->saddr; -- cgit v1.2.3-70-g09d2 From c20f8e35ca8b0583323d310ec63a0f0d17cfdcf5 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 10 Jul 2012 05:47:07 -0700 Subject: Bluetooth: Use tx window from config response for ack timing This change addresses an L2CAP ERTM throughput problem when a remote device does not fully utilize the available transmit window. The L2CAP ERTM transmit window size determines the maximum number of unacked frames that may be outstanding at any time. It is configured separately for each direction of an ERTM connection. Each side sends a configuration request with a tx_win field indicating how many unacked frames it is capable of receiving before sending an ack. The configuration response's tx_win field shows how many frames the transmitter will actually send before waiting for an ack. It's important to trace both the actual transmit window (to check for validity of incoming frames) and the number of frames that the transmitter will send before waiting (to send acks at the appropriate time). Now there are separate tx_win and ack_win values. ack_win is updated based on configuration responses, and is used to determine when acks are sent. Signed-off-by: Mat Martineau Signed-off-by: Gustavo Padovan --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 59 +++++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index e5164ff55f2..a7679f8913d 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -464,6 +464,7 @@ struct l2cap_chan { __u16 tx_win; __u16 tx_win_max; + __u16 ack_win; __u8 max_tx; __u16 retrans_timeout; __u16 monitor_timeout; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9fd05993f5b..a8964db04bf 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -431,6 +431,7 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan) chan->max_tx = L2CAP_DEFAULT_MAX_TX; chan->tx_win = L2CAP_DEFAULT_TX_WINDOW; chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; + chan->ack_win = L2CAP_DEFAULT_TX_WINDOW; chan->sec_level = BT_SECURITY_LOW; set_bit(FLAG_FORCE_ACTIVE, &chan->flags); @@ -1877,10 +1878,10 @@ static void l2cap_send_ack(struct l2cap_chan *chan) frames_to_ack = 0; } - /* Ack now if the tx window is 3/4ths full. + /* Ack now if the window is 3/4ths full. * Calculate without mul or div */ - threshold = chan->tx_win; + threshold = chan->ack_win; threshold += threshold << 1; threshold >>= 2; @@ -2786,6 +2787,7 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan) L2CAP_DEFAULT_TX_WINDOW); chan->tx_win_max = L2CAP_DEFAULT_TX_WINDOW; } + chan->ack_win = chan->tx_win; } static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) @@ -3175,10 +3177,9 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi break; case L2CAP_CONF_EWS: - chan->tx_win = min_t(u16, val, - L2CAP_DEFAULT_EXT_WINDOW); + chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, - chan->tx_win); + chan->tx_win); break; case L2CAP_CONF_EFS: @@ -3207,6 +3208,9 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); chan->mps = le16_to_cpu(rfc.max_pdu_size); + if (!test_bit(FLAG_EXT_CTRL, &chan->flags)) + chan->ack_win = min_t(u16, chan->ack_win, + rfc.txwin_size); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->local_msdu = le16_to_cpu(efs.msdu); @@ -3268,7 +3272,17 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) { int type, olen; unsigned long val; - struct l2cap_conf_rfc rfc; + /* Use sane default values in case a misbehaving remote device + * did not send an RFC or extended window size option. + */ + u16 txwin_ext = chan->ack_win; + struct l2cap_conf_rfc rfc = { + .mode = chan->mode, + .retrans_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO), + .monitor_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO), + .max_pdu_size = cpu_to_le16(chan->imtu), + .txwin_size = min_t(u16, chan->ack_win, L2CAP_DEFAULT_TX_WINDOW), + }; BT_DBG("chan %p, rsp %p, len %d", chan, rsp, len); @@ -3278,32 +3292,27 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); - if (type != L2CAP_CONF_RFC) - continue; - - if (olen != sizeof(rfc)) + switch (type) { + case L2CAP_CONF_RFC: + if (olen == sizeof(rfc)) + memcpy(&rfc, (void *)val, olen); break; - - memcpy(&rfc, (void *)val, olen); - goto done; + case L2CAP_CONF_EWS: + txwin_ext = val; + break; + } } - /* Use sane default values in case a misbehaving remote device - * did not send an RFC option. - */ - rfc.mode = chan->mode; - rfc.retrans_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); - rfc.monitor_timeout = __constant_cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); - rfc.max_pdu_size = cpu_to_le16(chan->imtu); - - BT_ERR("Expected RFC option was not found, using defaults"); - -done: switch (rfc.mode) { case L2CAP_MODE_ERTM: chan->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); chan->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); - chan->mps = le16_to_cpu(rfc.max_pdu_size); + chan->mps = le16_to_cpu(rfc.max_pdu_size); + if (test_bit(FLAG_EXT_CTRL, &chan->flags)) + chan->ack_win = min_t(u16, chan->ack_win, txwin_ext); + else + chan->ack_win = min_t(u16, chan->ack_win, + rfc.txwin_size); break; case L2CAP_MODE_STREAMING: chan->mps = le16_to_cpu(rfc.max_pdu_size); -- cgit v1.2.3-70-g09d2 From 80d0a69fc57715dc9080c0567df1ed911b78abea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:28:06 -0700 Subject: ipv4: Add helper inet_csk_update_pmtu(). This abstracts away the call to dst_ops->update_pmtu() so that we can transparently handle the fact that, in the future, the dst itself can be invalidated by the PMTU update (when we have non-host routes cached in sockets). So we try to rebuild the socket cached route after the method invocation if necessary. This isn't used by SCTP because it needs to cache dsts per-transport, and thus will need it's own local version of this helper. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 ++ net/dccp/ipv4.c | 11 ++------- net/ipv4/inet_connection_sock.c | 46 ++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 11 ++------- 4 files changed, 52 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 291e7cee14e..2cf44b4ed2e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -337,4 +337,6 @@ extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); + +extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 129ed8f7413..683902fcc8e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -161,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, if (sk->sk_state == DCCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 76825be3b64..200d2180937 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -803,3 +803,49 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); #endif + +static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + struct flowi4 *fl4; + struct rtable *rt; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + fl4 = &fl->u.ip4; + rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_protocol, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); + if (IS_ERR(rt)) + rt = NULL; + if (rt) + sk_setup_caps(sk, &rt->dst); + rcu_read_unlock(); + + return &rt->dst; +} + +struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + struct inet_sock *inet = inet_sk(sk); + + if (!dst) { + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); + if (!dst) + goto out; + } + dst->ops->update_pmtu(dst, mtu); + + dst = __sk_dst_check(sk, 0); + if (!dst) + dst = inet_csk_rebuild_route(sk, &inet->cork.fl); +out: + return dst; +} +EXPORT_SYMBOL_GPL(inet_csk_update_pmtu); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7a0062cb4ed..b8e7e059540 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -289,17 +289,10 @@ static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) if (sk->sk_state == TCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ -- cgit v1.2.3-70-g09d2 From 35ad9b9cf7d8a2e6259a0d24022e910adb6f3489 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:44:56 -0700 Subject: ipv6: Add helper inet6_csk_update_pmtu(). This is the ipv6 version of inet_csk_update_pmtu(). Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 2 ++ net/dccp/ipv6.c | 35 +++----------------------- net/ipv6/inet6_connection_sock.c | 49 ++++++++++++++++++++++++++----------- net/ipv6/tcp_ipv6.c | 37 +++------------------------- 4 files changed, 45 insertions(+), 78 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index df2a857e853..04642c92043 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -43,4 +43,6 @@ extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk, extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); extern int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl); + +extern struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu); #endif /* _INET6_CONNECTION_SOCK_H */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 090c0800ce0..3ee0342e1ce 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -145,39 +145,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; - /* icmp should have updated the destination cache entry */ - dst = __sk_dst_check(sk, np->dst_cookie); - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct flowi6 fl6; - - /* BUGGG_FUTURE: Again, it is not clear how - to handle rthdr case. Ignore this complexity - for now. - */ - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_DCCP; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - goto out; - } - } else - dst_hold(dst); - - dst->ops->update_pmtu(dst, ntohl(info)); + dst = inet6_csk_update_pmtu(sk, ntohl(info)); + if (!dst) + goto out; - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); - } /* else let the usual retransmit timer handle it */ - dst_release(dst); goto out; } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index bceb14450a1..62539a4b2dc 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -203,15 +203,13 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +static struct dst_entry *inet6_csk_route_socket(struct sock *sk) { - struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 fl6; - struct dst_entry *dst; struct in6_addr *final_p, final; - int res; + struct dst_entry *dst; + struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; @@ -228,18 +226,29 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) final_p = fl6_update_dst(&fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { + if (!dst) { dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - sk->sk_route_caps = 0; - kfree_skb(skb); - return PTR_ERR(dst); - } + if (!IS_ERR(dst)) + __inet6_csk_dst_store(sk, dst, NULL, NULL); + } + return dst; +} - __inet6_csk_dst_store(sk, dst, NULL, NULL); +int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +{ + struct sock *sk = skb->sk; + struct ipv6_pinfo *np = inet6_sk(sk); + struct flowi6 fl6; + struct dst_entry *dst; + int res; + + dst = inet6_csk_route_socket(sk); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); + sk->sk_route_caps = 0; + kfree_skb(skb); + return PTR_ERR(dst); } rcu_read_lock(); @@ -253,3 +262,15 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); + +struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct dst_entry *dst = inet6_csk_route_socket(sk); + + if (IS_ERR(dst)) + return NULL; + dst->ops->update_pmtu(dst, mtu); + + return inet6_csk_route_socket(sk); +} +EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3071f377145..ecdf241cad0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -378,43 +378,14 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) goto out; - /* icmp should have updated the destination cache entry */ - dst = __sk_dst_check(sk, np->dst_cookie); - - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct flowi6 fl6; - - /* BUGGG_FUTURE: Again, it is not clear how - to handle rthdr case. Ignore this complexity - for now. - */ - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_dport = inet->inet_dport; - fl6.fl6_sport = inet->inet_sport; - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); - - dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); - if (IS_ERR(dst)) { - sk->sk_err_soft = -PTR_ERR(dst); - goto out; - } - - } else - dst_hold(dst); - - dst->ops->update_pmtu(dst, ntohl(info)); + dst = inet6_csk_update_pmtu(sk, ntohl(info)); + if (!dst) + goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); - } /* else let the usual retransmit timer handle it */ - dst_release(dst); + } goto out; } -- cgit v1.2.3-70-g09d2 From 02f3d4ce9e81434a365f4643020b0402f6fe3332 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 16 Jul 2012 03:57:14 -0700 Subject: sctp: Adjust PMTU updates to accomodate route invalidation. This adjusts the call to dst_ops->update_pmtu() so that we can transparently handle the fact that, in the future, the dst itself can be invalidated by the PMTU update (when we have non-host routes cached in sockets). Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++-- include/net/sctp/structs.h | 4 ++-- net/sctp/associola.c | 4 ++-- net/sctp/input.c | 4 ++-- net/sctp/output.c | 2 +- net/sctp/socket.c | 6 +++--- net/sctp/transport.c | 12 ++++++++++-- 7 files changed, 22 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1f2735dba75..ff499640528 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -519,10 +519,10 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) return frag; } -static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) +static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc) { - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sk, asoc); asoc->pmtu_pending = 0; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index fecdf31816f..536e439ddf1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1091,7 +1091,7 @@ void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); void sctp_transport_reset(struct sctp_transport *); -void sctp_transport_update_pmtu(struct sctp_transport *, u32); +void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); void sctp_transport_immediate_rtx(struct sctp_transport *); @@ -2003,7 +2003,7 @@ void sctp_assoc_update(struct sctp_association *old, __u32 sctp_association_get_next_tsn(struct sctp_association *); -void sctp_assoc_sync_pmtu(struct sctp_association *); +void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *); void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); void sctp_assoc_set_primary(struct sctp_association *, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b16517ee1aa..8cf348e62e7 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1360,7 +1360,7 @@ struct sctp_transport *sctp_assoc_choose_alter_transport( /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ -void sctp_assoc_sync_pmtu(struct sctp_association *asoc) +void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) { struct sctp_transport *t; __u32 pmtu = 0; @@ -1372,7 +1372,7 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(t, dst_mtu(t->dst)); + sctp_transport_update_pmtu(sk, t, dst_mtu(t->dst)); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index f050d45faa9..a67bc31f49f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -408,10 +408,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ - sctp_transport_update_pmtu(t, pmtu); + sctp_transport_update_pmtu(sk, t, pmtu); /* Update association pmtu. */ - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sk, asoc); } /* Retransmit with the new pmtu setting. diff --git a/net/sctp/output.c b/net/sctp/output.c index 539f35d07f4..838e18b4d7e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -410,7 +410,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) { - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sk, asoc); } } dst = dst_clone(tp->dst); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b3b8a8d813e..74bd3c47350 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1853,7 +1853,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(asoc); + sctp_assoc_pending_pmtu(sk, asoc); /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D @@ -2365,7 +2365,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; sctp_frag_point(asoc, params->spp_pathmtu); @@ -2382,7 +2382,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); - sctp_assoc_sync_pmtu(asoc); + sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); } } else if (asoc) { asoc->param_flags = diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 1dcceb6e0ce..e69e1a2175a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -228,7 +228,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) +void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst; @@ -245,8 +245,16 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) } dst = sctp_transport_dst_check(t); - if (dst) + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); + + if (dst) { dst->ops->update_pmtu(dst, pmtu); + + dst = sctp_transport_dst_check(t); + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); + } } /* Caches the dst entry and source address for a transport's destination -- cgit v1.2.3-70-g09d2 From 51d7cccf07238f5236c5b9269231a30dd5f8e714 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 16 Jul 2012 04:28:49 +0000 Subject: net: make sock diag per-namespace Before this patch sock_diag works for init_net only and dumps information about sockets from all namespaces. This patch expands sock_diag for all name-spaces. It creates a netlink kernel socket for each netns and filters data during dumping. v2: filter accoding with netns in all places remove an unused variable. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pavel Emelyanov CC: Eric Dumazet Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Andrew Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 1 - include/net/net_namespace.h | 1 + net/core/sock_diag.c | 27 ++++++++++++++++++++------- net/ipv4/inet_diag.c | 21 ++++++++++++++++----- net/ipv4/udp_diag.c | 10 +++++++--- net/unix/diag.c | 9 +++++++-- 6 files changed, 51 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 6793fac5eab..e3e395acc2f 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -44,6 +44,5 @@ void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); -extern struct sock *sock_diag_nlsk; #endif /* KERNEL */ #endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ac9195e6a06..ae1cd6c9ba5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -101,6 +101,7 @@ struct net { struct netns_xfrm xfrm; #endif struct netns_ipvs *ipvs; + struct sock *diag_nlsk; }; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 07a29eb34a4..9d8755e4a7a 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -166,23 +166,36 @@ static void sock_diag_rcv(struct sk_buff *skb) mutex_unlock(&sock_diag_mutex); } -struct sock *sock_diag_nlsk; -EXPORT_SYMBOL_GPL(sock_diag_nlsk); - -static int __init sock_diag_init(void) +static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = sock_diag_rcv, }; - sock_diag_nlsk = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, + net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, THIS_MODULE, &cfg); - return sock_diag_nlsk == NULL ? -ENOMEM : 0; + return net->diag_nlsk == NULL ? -ENOMEM : 0; +} + +static void __net_exit diag_net_exit(struct net *net) +{ + netlink_kernel_release(net->diag_nlsk); + net->diag_nlsk = NULL; +} + +static struct pernet_operations diag_net_ops = { + .init = diag_net_init, + .exit = diag_net_exit, +}; + +static int __init sock_diag_init(void) +{ + return register_pernet_subsys(&diag_net_ops); } static void __exit sock_diag_exit(void) { - netlink_kernel_release(sock_diag_nlsk); + unregister_pernet_subsys(&diag_net_ops); } module_init(sock_diag_init); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 38064a285cc..570e61f9611 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -272,16 +272,17 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s int err; struct sock *sk; struct sk_buff *rep; + struct net *net = sock_net(in_skb->sk); err = -EINVAL; if (req->sdiag_family == AF_INET) { - sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], + sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { - sk = inet6_lookup(&init_net, hashinfo, + sk = inet6_lookup(net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, @@ -317,7 +318,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s nlmsg_free(rep); goto out; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -724,6 +725,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, { int i, num; int s_i, s_num; + struct net *net = sock_net(skb->sk); s_i = cb->args[1]; s_num = num = cb->args[2]; @@ -743,6 +745,9 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, sk_nulls_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { num++; continue; @@ -813,6 +818,8 @@ skip_listen_ht: sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) @@ -839,6 +846,8 @@ next_normal: inet_twsk_for_each(tw, node, &head->twchain) { + if (!net_eq(twsk_net(tw), net)) + continue; if (num < s_num) goto next_dying; @@ -943,6 +952,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb, static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) { int hdrlen = sizeof(struct inet_diag_req); + struct net *net = sock_net(skb->sk); if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) @@ -963,7 +973,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) struct netlink_dump_control c = { .dump = inet_diag_dump_compat, }; - return netlink_dump_start(sock_diag_nlsk, skb, nlh, &c); + return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } } @@ -973,6 +983,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct inet_diag_req_v2); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -991,7 +1002,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = inet_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index a7f86a3cd50..16d0960062b 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -34,15 +34,16 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, int err = -EINVAL; struct sock *sk; struct sk_buff *rep; + struct net *net = sock_net(in_skb->sk); if (req->sdiag_family == AF_INET) - sk = __udp4_lib_lookup(&init_net, + sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_if, tbl); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) - sk = __udp6_lib_lookup(&init_net, + sk = __udp6_lib_lookup(net, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, @@ -75,7 +76,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, kfree_skb(rep); goto out; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -90,6 +91,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin struct inet_diag_req_v2 *r, struct nlattr *bc) { int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -106,6 +108,8 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlin sk_nulls_for_each(sk, node, &hslot->head) { struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(r->idiag_states & (1 << sk->sk_state))) diff --git a/net/unix/diag.c b/net/unix/diag.c index a74864eedfc..750b1340844 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -177,6 +177,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct unix_diag_req *req; int num, s_num, slot, s_slot; + struct net *net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); @@ -192,6 +193,8 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = 0; sk_for_each(sk, node, &unix_socket_table[slot]) { + if (!net_eq(sock_net(sk), net)) + continue; if (num < s_num) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) @@ -243,6 +246,7 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, struct sock *sk; struct sk_buff *rep; unsigned int extra_len; + struct net *net = sock_net(in_skb->sk); if (req->udiag_ino == 0) goto out_nosk; @@ -273,7 +277,7 @@ again: goto again; } - err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -287,6 +291,7 @@ out_nosk: static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { int hdrlen = sizeof(struct unix_diag_req); + struct net *net = sock_net(skb->sk); if (nlmsg_len(h) < hdrlen) return -EINVAL; @@ -295,7 +300,7 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) struct netlink_dump_control c = { .dump = unix_diag_dump, }; - return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else return unix_diag_get_exact(skb, h, nlmsg_data(h)); } -- cgit v1.2.3-70-g09d2 From 282f23c6ee343126156dd41218b22ece96d747e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Jul 2012 10:13:05 +0200 Subject: tcp: implement RFC 5961 3.2 Implement the RFC 5691 mitigation against Blind Reset attack using RST bit. Idea is to validate incoming RST sequence, to match RCV.NXT value, instead of previouly accepted window : (RCV.NXT <= SEG.SEQ < RCV.NXT+RCV.WND) If sequence is in window but not an exact match, send a "challenge ACK", so that the other part can resend an RST with the appropriate sequence. Add a new sysctl, tcp_challenge_ack_limit, to limit number of challenge ACK sent per second. Add a new SNMP counter to count number of challenge acks sent. (netstat -s | grep TCPChallengeACK) Signed-off-by: Eric Dumazet Cc: Kiran Kumar Kella Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 5 +++++ include/linux/snmp.h | 1 + include/net/tcp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_input.c | 31 ++++++++++++++++++++++++++++++- 6 files changed, 45 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index e20c17a7d34..e1e021594cf 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -565,6 +565,11 @@ tcp_limit_output_bytes - INTEGER reduce the size of individual GSO packet (64KB being the max) Default: 131072 +tcp_challenge_ack_limit - INTEGER + Limits number of Challenge ACK sent per second, as recommended + in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks) + Default: 100 + UDP variables: udp_mem - vector of 3 INTEGERs: min, pressure, max diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 6e4c5112382..673e0e928b2 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -237,6 +237,7 @@ enum LINUX_MIB_TCPOFOQUEUE, /* TCPOFOQueue */ LINUX_MIB_TCPOFODROP, /* TCPOFODrop */ LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ + LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ __LINUX_MIB_MAX }; diff --git a/include/net/tcp.h b/include/net/tcp.h index 439984b9af4..85c5090bfe2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -254,6 +254,7 @@ extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; +extern int sysctl_tcp_challenge_ack_limit; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index dae25e7622c..3e8e78f12a3 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -261,6 +261,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPOFOQueue", LINUX_MIB_TCPOFOQUEUE), SNMP_MIB_ITEM("TCPOFODrop", LINUX_MIB_TCPOFODROP), SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), + SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 70730f7aeaf..3f6a1e762e9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -605,6 +605,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_challenge_ack_limit", + .data = &sysctl_tcp_challenge_ack_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, #ifdef CONFIG_NET_DMA { .procname = "tcp_dma_copybreak", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc4e12f1f2f..c841a899037 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -88,6 +88,9 @@ int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); +/* rfc5961 challenge ack rate limiting */ +int sysctl_tcp_challenge_ack_limit = 100; + int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; @@ -5247,6 +5250,23 @@ out: } #endif /* CONFIG_NET_DMA */ +static void tcp_send_challenge_ack(struct sock *sk) +{ + /* unprotected vars, we dont care of overwrites */ + static u32 challenge_timestamp; + static unsigned int challenge_count; + u32 now = jiffies / HZ; + + if (now != challenge_timestamp) { + challenge_timestamp = now; + challenge_count = 0; + } + if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } +} + /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5283,7 +5303,16 @@ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, /* Step 2: check RST bit */ if (th->rst) { - tcp_reset(sk); + /* RFC 5961 3.2 : + * If sequence number exactly matches RCV.NXT, then + * RESET the connection + * else + * Send a challenge ACK + */ + if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) + tcp_reset(sk); + else + tcp_send_challenge_ack(sk); goto discard; } -- cgit v1.2.3-70-g09d2 From 84f10708f73254878246772cead70a2eb6a123f2 Mon Sep 17 00:00:00 2001 From: Thomas Pedersen Date: Thu, 12 Jul 2012 16:17:33 -0700 Subject: cfg80211: support TX error rate CQM Let the user configure serveral TX error conection quality monitoring parameters: % error rate, survey interval, and # of attempted packets. On exceeding the TX failure rate over the given interval, the driver will send a CQM notify event with the actual TX failure rate and packets attempted. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 16 ++++++++++ include/net/cfg80211.h | 21 ++++++++++++ net/wireless/mlme.c | 13 ++++++++ net/wireless/nl80211.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 5 +++ 5 files changed, 140 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e791487ead3..d6cfacc3ce4 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1550,6 +1550,8 @@ enum nl80211_attrs { /* default RSSI threshold for scan results if none specified. */ #define NL80211_SCAN_RSSI_THOLD_OFF -300 +#define NL80211_CQM_TXE_MAX_INTVL 1800 + /** * enum nl80211_iftype - (virtual) interface types * @@ -2589,6 +2591,17 @@ enum nl80211_ps_state { * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many * consecutive packets were not acknowledged by the peer + * @NL80211_ATTR_CQM_TXE_RATE: TX error rate in %. Minimum % of TX failures + * during the given %NL80211_ATTR_CQM_TXE_INTVL before an + * %NL80211_CMD_NOTIFY_CQM with reported %NL80211_ATTR_CQM_TXE_RATE and + * %NL80211_ATTR_CQM_TXE_PKTS is generated. + * @NL80211_ATTR_CQM_TXE_PKTS: number of attempted packets in a given + * %NL80211_ATTR_CQM_TXE_INTVL before %NL80211_ATTR_CQM_TXE_RATE is + * checked. + * @NL80211_ATTR_CQM_TXE_INTVL: interval in seconds. Specifies the periodic + * interval in which %NL80211_ATTR_CQM_TXE_PKTS and + * %NL80211_ATTR_CQM_TXE_RATE must be satisfied before generating an + * %NL80211_CMD_NOTIFY_CQM. Set to 0 to turn off TX error reporting. * @__NL80211_ATTR_CQM_AFTER_LAST: internal * @NL80211_ATTR_CQM_MAX: highest key attribute */ @@ -2598,6 +2611,9 @@ enum nl80211_attr_cqm { NL80211_ATTR_CQM_RSSI_HYST, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, NL80211_ATTR_CQM_PKT_LOSS_EVENT, + NL80211_ATTR_CQM_TXE_RATE, + NL80211_ATTR_CQM_TXE_PKTS, + NL80211_ATTR_CQM_TXE_INTVL, /* keep last */ __NL80211_ATTR_CQM_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0245208c297..493fa0c7900 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1573,6 +1573,8 @@ struct cfg80211_gtk_rekey_data { * @set_power_mgmt: Configure WLAN power management. A timeout value of -1 * allows the driver to adjust the dynamic ps timeout value. * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold. + * @set_cqm_txe_config: Configure connection quality monitor TX error + * thresholds. * @sched_scan_start: Tell the driver to start a scheduled scan. * @sched_scan_stop: Tell the driver to stop an ongoing scheduled * scan. The driver_initiated flag specifies whether the driver @@ -1783,6 +1785,10 @@ struct cfg80211_ops { struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + int (*set_cqm_txe_config)(struct wiphy *wiphy, + struct net_device *dev, + u32 rate, u32 pkts, u32 intvl); + void (*mgmt_frame_register)(struct wiphy *wiphy, struct wireless_dev *wdev, u16 frame_type, bool reg); @@ -3395,6 +3401,21 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, void cfg80211_cqm_pktloss_notify(struct net_device *dev, const u8 *peer, u32 num_packets, gfp_t gfp); +/** + * cfg80211_cqm_txe_notify - TX error rate event + * @dev: network device + * @peer: peer's MAC address + * @num_packets: how many packets were lost + * @rate: % of packets which failed transmission + * @intvl: interval (in s) over which the TX failure threshold was breached. + * @gfp: context flags + * + * Notify userspace when configured % TX failures over number of packets in a + * given interval is exceeded. + */ +void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, + u32 num_packets, u32 rate, u32 intvl, gfp_t gfp); + /** * cfg80211_gtk_rekey_notify - notify userspace about driver rekeying * @dev: network device diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index abe9f82d5a8..1cdb1d5e6b0 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -919,6 +919,19 @@ void cfg80211_cqm_pktloss_notify(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); +void cfg80211_cqm_txe_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, + u32 rate, u32 intvl, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets, + rate, intvl, gfp); +} +EXPORT_SYMBOL(cfg80211_cqm_txe_notify); + void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index be8750f91d7..9216e45e53a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6267,8 +6267,35 @@ nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = { [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_TXE_PKTS] = { .type = NLA_U32 }, + [NL80211_ATTR_CQM_TXE_INTVL] = { .type = NLA_U32 }, }; +static int nl80211_set_cqm_txe(struct genl_info *info, + u32 rate, u32 pkts, u32 intvl) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev; + struct net_device *dev = info->user_ptr[1]; + + if ((rate < 0 || rate > 100) || + (intvl < 0 || intvl > NL80211_CQM_TXE_MAX_INTVL)) + return -EINVAL; + + wdev = dev->ieee80211_ptr; + + if (!rdev->ops->set_cqm_txe_config) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + return rdev->ops->set_cqm_txe_config(wdev->wiphy, dev, + rate, pkts, intvl); +} + static int nl80211_set_cqm_rssi(struct genl_info *info, s32 threshold, u32 hysteresis) { @@ -6316,6 +6343,14 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]); err = nl80211_set_cqm_rssi(info, threshold, hysteresis); + } else if (attrs[NL80211_ATTR_CQM_TXE_RATE] && + attrs[NL80211_ATTR_CQM_TXE_PKTS] && + attrs[NL80211_ATTR_CQM_TXE_INTVL]) { + u32 rate, pkts, intvl; + rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]); + pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]); + intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]); + err = nl80211_set_cqm_txe(info, rate, pkts, intvl); } else err = -EINVAL; @@ -8495,6 +8530,56 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *pinfoattr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) + goto nla_put_failure; + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); + if (!pinfoattr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_PKTS, num_packets)) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_RATE, rate)) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_INTVL, intvl)) + goto nla_put_failure; + + nla_nest_end(msg, pinfoattr); + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 89ce99675e6..9f2616fffb4 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -110,6 +110,11 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer, u32 num_packets, gfp_t gfp); +void +nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, u32 rate, u32 intvl, gfp_t gfp); + void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp); -- cgit v1.2.3-70-g09d2 From 9e33ce453f8ac8452649802bee1f410319408f4b Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 7 Jul 2012 18:26:10 +0800 Subject: ipvs: fix oops on NAT reply in br_nf context IPVS should not reset skb->nf_bridge in FORWARD hook by calling nf_reset for NAT replies. It triggers oops in br_nf_forward_finish. [ 579.781508] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 579.781669] IP: [] br_nf_forward_finish+0x58/0x112 [ 579.781792] PGD 218f9067 PUD 0 [ 579.781865] Oops: 0000 [#1] SMP [ 579.781945] CPU 0 [ 579.781983] Modules linked in: [ 579.782047] [ 579.782080] [ 579.782114] Pid: 4644, comm: qemu Tainted: G W 3.5.0-rc5-00006-g95e69f9 #282 Hewlett-Packard /30E8 [ 579.782300] RIP: 0010:[] [] br_nf_forward_finish+0x58/0x112 [ 579.782455] RSP: 0018:ffff88007b003a98 EFLAGS: 00010287 [ 579.782541] RAX: 0000000000000008 RBX: ffff8800762ead00 RCX: 000000000001670a [ 579.782653] RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff8800762ead00 [ 579.782845] RBP: ffff88007b003ac8 R08: 0000000000016630 R09: ffff88007b003a90 [ 579.782957] R10: ffff88007b0038e8 R11: ffff88002da37540 R12: ffff88002da01a02 [ 579.783066] R13: ffff88002da01a80 R14: ffff88002d83c000 R15: ffff88002d82a000 [ 579.783177] FS: 0000000000000000(0000) GS:ffff88007b000000(0063) knlGS:00000000f62d1b70 [ 579.783306] CS: 0010 DS: 002b ES: 002b CR0: 000000008005003b [ 579.783395] CR2: 0000000000000004 CR3: 00000000218fe000 CR4: 00000000000027f0 [ 579.783505] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 579.783684] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 579.783795] Process qemu (pid: 4644, threadinfo ffff880021b20000, task ffff880021aba760) [ 579.783919] Stack: [ 579.783959] ffff88007693cedc ffff8800762ead00 ffff88002da01a02 ffff8800762ead00 [ 579.784110] ffff88002da01a02 ffff88002da01a80 ffff88007b003b18 ffffffff817b26c7 [ 579.784260] ffff880080000000 ffffffff81ef59f0 ffff8800762ead00 ffffffff81ef58b0 [ 579.784477] Call Trace: [ 579.784523] [ 579.784562] [ 579.784603] [] br_nf_forward_ip+0x275/0x2c8 [ 579.784707] [] nf_iterate+0x47/0x7d [ 579.784797] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.784906] [] nf_hook_slow+0x6d/0x102 [ 579.784995] [] ? br_dev_queue_push_xmit+0xae/0xae [ 579.785175] [] ? _raw_write_unlock_bh+0x19/0x1b [ 579.785179] [] __br_forward+0x97/0xa2 [ 579.785179] [] br_handle_frame_finish+0x1a6/0x257 [ 579.785179] [] br_nf_pre_routing_finish+0x26d/0x2cb [ 579.785179] [] br_nf_pre_routing+0x55d/0x5c1 [ 579.785179] [] nf_iterate+0x47/0x7d [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] nf_hook_slow+0x6d/0x102 [ 579.785179] [] ? br_handle_local_finish+0x44/0x44 [ 579.785179] [] ? sky2_poll+0xb35/0xb54 [ 579.785179] [] br_handle_frame+0x213/0x229 [ 579.785179] [] ? br_handle_frame_finish+0x257/0x257 [ 579.785179] [] __netif_receive_skb+0x2b4/0x3f1 [ 579.785179] [] process_backlog+0x99/0x1e2 [ 579.785179] [] net_rx_action+0xdf/0x242 [ 579.785179] [] __do_softirq+0xc1/0x1e0 [ 579.785179] [] ? trace_hardirqs_off_thunk+0x3a/0x6c [ 579.785179] [] call_softirq+0x1c/0x30 The steps to reproduce as follow, 1. On Host1, setup brige br0(192.168.1.106) 2. Boot a kvm guest(192.168.1.105) on Host1 and start httpd 3. Start IPVS service on Host1 ipvsadm -A -t 192.168.1.106:80 -s rr ipvsadm -a -t 192.168.1.106:80 -r 192.168.1.105:80 -m 4. Run apache benchmark on Host2(192.168.1.101) ab -n 1000 http://192.168.1.106/ ip_vs_reply4 ip_vs_out handle_response ip_vs_notrack nf_reset() { skb->nf_bridge = NULL; } Actually, IPVS wants in this case just to replace nfct with untracked version. So replace the nf_reset(skb) call in ip_vs_notrack() with a nf_conntrack_put(skb->nfct) call. Signed-off-by: Lin Ming Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d6146b4811c..95374d1696a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1425,7 +1425,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb) struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct || !nf_ct_is_untracked(ct)) { - nf_reset(skb); + nf_conntrack_put(skb->nfct); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); -- cgit v1.2.3-70-g09d2 From 57b5ce072e7361218a8e2ea1d62960cbb71d9cff Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 12 Jul 2012 11:49:18 -0700 Subject: cfg80211: add cellular base station regulatory hint support Cellular base stations can provide hints to cfg80211 about where they think we are. This can be done for example on a cell phone. To enable these hints we simply allow them through as user regulatory hints but we allow userspace to clasify the hint as either coming directly from the user or coming from a cellular base station. This option is only available when you enable CONFIG_CFG80211_CERTIFICATION_ONUS. The base station hints themselves will not be processed by the core unless at least one device on the system supports this feature. Signed-off-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- include/linux/nl80211.h | 32 ++++++++++++++ include/net/regulatory.h | 5 +++ net/wireless/core.c | 1 + net/wireless/nl80211.c | 23 +++++++++- net/wireless/reg.c | 113 ++++++++++++++++++++++++++++++++++++++++++++--- net/wireless/reg.h | 5 ++- 6 files changed, 171 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d6cfacc3ce4..2f387880640 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1245,6 +1245,12 @@ enum nl80211_commands { * @NL80211_ATTR_BG_SCAN_PERIOD: Background scan period in seconds * or 0 to disable background scan. * + * @NL80211_ATTR_USER_REG_HINT_TYPE: type of regulatory hint passed from + * userspace. If unset it is assumed the hint comes directly from + * a user. If set code could specify exactly what type of source + * was used to provide the hint. For the different types of + * allowed user regulatory hints see nl80211_user_reg_hint_type. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1498,6 +1504,8 @@ enum nl80211_attrs { NL80211_ATTR_WDEV, + NL80211_ATTR_USER_REG_HINT_TYPE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2060,6 +2068,26 @@ enum nl80211_dfs_regions { NL80211_DFS_JP = 3, }; +/** + * enum nl80211_user_reg_hint_type - type of user regulatory hint + * + * @NL80211_USER_REG_HINT_USER: a user sent the hint. This is always + * assumed if the attribute is not set. + * @NL80211_USER_REG_HINT_CELL_BASE: the hint comes from a cellular + * base station. Device drivers that have been tested to work + * properly to support this type of hint can enable these hints + * by setting the NL80211_FEATURE_CELL_BASE_REG_HINTS feature + * capability on the struct wiphy. The wireless core will + * ignore all cell base station hints until at least one device + * present has been registered with the wireless core that + * has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a + * supported feature. + */ +enum nl80211_user_reg_hint_type { + NL80211_USER_REG_HINT_USER = 0, + NL80211_USER_REG_HINT_CELL_BASE = 1, +}; + /** * enum nl80211_survey_info - survey information * @@ -2963,11 +2991,15 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates. * @NL80211_FEATURE_INACTIVITY_TIMER: This driver takes care of freeing up * the connected inactive stations in AP mode. + * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested + * to work properly to suppport receiving regulatory hints from + * cellular base stations. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, NL80211_FEATURE_HT_IBSS = 1 << 1, NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, + NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, }; /** diff --git a/include/net/regulatory.h b/include/net/regulatory.h index a5f79933e21..7dcaa2794fd 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -52,6 +52,10 @@ enum environment_cap { * DFS master operation on a known DFS region (NL80211_DFS_*), * dfs_region represents that region. Drivers can use this and the * @alpha2 to adjust their device's DFS parameters as required. + * @user_reg_hint_type: if the @initiator was of type + * %NL80211_REGDOM_SET_BY_USER, this classifies the type + * of hint passed. This could be any of the %NL80211_USER_REG_HINT_* + * types. * @intersect: indicates whether the wireless core should intersect * the requested regulatory domain with the presently set regulatory * domain. @@ -70,6 +74,7 @@ enum environment_cap { struct regulatory_request { int wiphy_idx; enum nl80211_reg_initiator initiator; + enum nl80211_user_reg_hint_type user_reg_hint_type; char alpha2[2]; u8 dfs_region; bool intersect; diff --git a/net/wireless/core.c b/net/wireless/core.c index 71b684b5a67..c0307b05986 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -542,6 +542,7 @@ int wiphy_register(struct wiphy *wiphy) } /* set up regulatory info */ + wiphy_regulatory_register(wiphy); regulatory_update(wiphy, NL80211_REGDOM_SET_BY_CORE); list_add_rcu(&rdev->list, &cfg80211_rdev_list); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9216e45e53a..50b1a0e84f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -354,6 +354,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_INACTIVITY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, + [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -3582,6 +3583,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { int r; char *data = NULL; + enum nl80211_user_reg_hint_type user_reg_hint_type; /* * You should only get this when cfg80211 hasn't yet initialized @@ -3601,7 +3603,21 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); - r = regulatory_hint_user(data); + if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]) + user_reg_hint_type = + nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]); + else + user_reg_hint_type = NL80211_USER_REG_HINT_USER; + + switch (user_reg_hint_type) { + case NL80211_USER_REG_HINT_USER: + case NL80211_USER_REG_HINT_CELL_BASE: + break; + default: + return -EINVAL; + } + + r = regulatory_hint_user(data, user_reg_hint_type); return r; } @@ -3971,6 +3987,11 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) cfg80211_regdomain->dfs_region))) goto nla_put_failure; + if (reg_last_request_cell_base() && + nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, + NL80211_USER_REG_HINT_CELL_BASE)) + goto nla_put_failure; + nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) goto nla_put_failure; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ad6f9029c56..83583a9c15d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -97,9 +97,16 @@ const struct ieee80211_regdomain *cfg80211_regdomain; * - cfg80211_world_regdom * - cfg80211_regdom * - last_request + * - reg_num_devs_support_basehint */ static DEFINE_MUTEX(reg_mutex); +/* + * Number of devices that registered to the core + * that support cellular base station regulatory hints + */ +static int reg_num_devs_support_basehint; + static inline void assert_reg_lock(void) { lockdep_assert_held(®_mutex); @@ -911,6 +918,59 @@ static void handle_band(struct wiphy *wiphy, handle_channel(wiphy, initiator, band, i); } +static bool reg_request_cell_base(struct regulatory_request *request) +{ + if (request->initiator != NL80211_REGDOM_SET_BY_USER) + return false; + if (request->user_reg_hint_type != NL80211_USER_REG_HINT_CELL_BASE) + return false; + return true; +} + +bool reg_last_request_cell_base(void) +{ + assert_cfg80211_lock(); + + mutex_lock(®_mutex); + return reg_request_cell_base(last_request); + mutex_unlock(®_mutex); +} + +#ifdef CONFIG_CFG80211_CERTIFICATION_ONUS + +/* Core specific check */ +static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +{ + if (!reg_num_devs_support_basehint) + return -EOPNOTSUPP; + + if (reg_request_cell_base(last_request)) { + if (!regdom_changes(pending_request->alpha2)) + return -EALREADY; + return 0; + } + return 0; +} + +/* Device specific check */ +static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy) +{ + if (!(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS)) + return true; + return false; +} +#else +static int reg_ignore_cell_hint(struct regulatory_request *pending_request) +{ + return -EOPNOTSUPP; +} +static int reg_dev_ignore_cell_hint(struct wiphy *wiphy) +{ + return true; +} +#endif + + static bool ignore_reg_update(struct wiphy *wiphy, enum nl80211_reg_initiator initiator) { @@ -944,6 +1004,9 @@ static bool ignore_reg_update(struct wiphy *wiphy, return true; } + if (reg_request_cell_base(last_request)) + return reg_dev_ignore_cell_hint(wiphy); + return false; } @@ -1307,6 +1370,13 @@ static int ignore_request(struct wiphy *wiphy, return 0; case NL80211_REGDOM_SET_BY_COUNTRY_IE: + if (reg_request_cell_base(last_request)) { + /* Trust a Cell base station over the AP's country IE */ + if (regdom_changes(pending_request->alpha2)) + return -EOPNOTSUPP; + return -EALREADY; + } + last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); if (unlikely(!is_an_alpha2(pending_request->alpha2))) @@ -1351,6 +1421,12 @@ static int ignore_request(struct wiphy *wiphy, return REG_INTERSECT; case NL80211_REGDOM_SET_BY_USER: + if (reg_request_cell_base(pending_request)) + return reg_ignore_cell_hint(pending_request); + + if (reg_request_cell_base(last_request)) + return -EOPNOTSUPP; + if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) return REG_INTERSECT; /* @@ -1640,7 +1716,8 @@ static int regulatory_hint_core(const char *alpha2) } /* User hints */ -int regulatory_hint_user(const char *alpha2) +int regulatory_hint_user(const char *alpha2, + enum nl80211_user_reg_hint_type user_reg_hint_type) { struct regulatory_request *request; @@ -1654,6 +1731,7 @@ int regulatory_hint_user(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_USER; + request->user_reg_hint_type = user_reg_hint_type; queue_regulatory_request(request); @@ -1906,7 +1984,7 @@ static void restore_regulatory_settings(bool reset_user) * settings, user regulatory settings takes precedence. */ if (is_an_alpha2(alpha2)) - regulatory_hint_user(user_alpha2); + regulatory_hint_user(user_alpha2, NL80211_USER_REG_HINT_USER); if (list_empty(&tmp_reg_req_list)) return; @@ -2081,9 +2159,16 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) else { if (is_unknown_alpha2(rd->alpha2)) pr_info("Regulatory domain changed to driver built-in settings (unknown country)\n"); - else - pr_info("Regulatory domain changed to country: %c%c\n", - rd->alpha2[0], rd->alpha2[1]); + else { + if (reg_request_cell_base(last_request)) + pr_info("Regulatory domain changed " + "to country: %c%c by Cell Station\n", + rd->alpha2[0], rd->alpha2[1]); + else + pr_info("Regulatory domain changed " + "to country: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + } } print_dfs_region(rd->dfs_region); print_rd_rules(rd); @@ -2293,6 +2378,18 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) } #endif /* CONFIG_HOTPLUG */ +void wiphy_regulatory_register(struct wiphy *wiphy) +{ + assert_cfg80211_lock(); + + mutex_lock(®_mutex); + + if (!reg_dev_ignore_cell_hint(wiphy)) + reg_num_devs_support_basehint++; + + mutex_unlock(®_mutex); +} + /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { @@ -2302,6 +2399,9 @@ void reg_device_remove(struct wiphy *wiphy) mutex_lock(®_mutex); + if (!reg_dev_ignore_cell_hint(wiphy)) + reg_num_devs_support_basehint--; + kfree(wiphy->regd); if (last_request) @@ -2367,7 +2467,8 @@ int __init regulatory_init(void) * as a user hint. */ if (!is_world_regdom(ieee80211_regdom)) - regulatory_hint_user(ieee80211_regdom); + regulatory_hint_user(ieee80211_regdom, + NL80211_USER_REG_HINT_USER); return 0; } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e2aaaf525a2..519492fdda3 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -22,9 +22,11 @@ bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); bool reg_supported_dfs_region(u8 dfs_region); -int regulatory_hint_user(const char *alpha2); +int regulatory_hint_user(const char *alpha2, + enum nl80211_user_reg_hint_type user_reg_hint_type); int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env); +void wiphy_regulatory_register(struct wiphy *wiphy); void reg_device_remove(struct wiphy *wiphy); int __init regulatory_init(void); @@ -33,6 +35,7 @@ void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); void regulatory_update(struct wiphy *wiphy, enum nl80211_reg_initiator setby); +bool reg_last_request_cell_base(void); /** * regulatory_hint_found_beacon - hints a beacon was found on a channel -- cgit v1.2.3-70-g09d2 From 6700c2709c08d74ae2c3c29b84a30da012dbc7f1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 03:29:28 -0700 Subject: net: Pass optional SKB and SK arguments to dst_ops->{update_pmtu,redirect}() This will be used so that we can compose a full flow key. Even though we have a route in this context, we need more. In the future the routes will be without destination address, source address, etc. keying. One ipv4 route will cover entire subnets, etc. In this environment we have to have a way to possess persistent storage for redirects and PMTU information. This persistent storage will exist in the FIB tables, and that's why we'll need to be able to rebuild a full lookup flow key here. Using that flow key will do a fib_lookup() and create/update the persistent entry. Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- include/net/dst_ops.h | 6 ++++-- net/bridge/br_netfilter.c | 6 ++++-- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/decnet/dn_route.c | 12 ++++++++---- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/route.c | 21 +++++++++++++-------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/xfrm4_policy.c | 10 ++++++---- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 6 +++--- net/ipv6/route.c | 21 +++++++++++++-------- net/ipv6/sit.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/xfrm6_policy.c | 10 ++++++---- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- net/sctp/input.c | 2 +- net/sctp/transport.c | 2 +- 21 files changed, 71 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 014504d8e43..1ca732201f3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, int e = skb_queue_empty(&priv->cm.skb_queue); if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 085931fa7ce..d079fc61c12 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -24,8 +24,10 @@ struct dst_ops { struct net_device *dev, int how); struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); - void (*update_pmtu)(struct dst_entry *dst, u32 mtu); - void (*redirect)(struct dst_entry *dst, struct sk_buff *skb); + void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); + void (*redirect)(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); int (*local_out)(struct sk_buff *skb); struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, struct sk_buff *skb, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 81f76c402cf..68e8f364bbf 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,11 +111,13 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void fake_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 683902fcc8e..ab4f44c9bb2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -193,7 +193,7 @@ static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 3ee0342e1ce..56840b249f3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -134,7 +134,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index e9c4e2e864c..47de90d8fe9 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -117,8 +117,10 @@ static void dn_dst_destroy(struct dst_entry *); static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb , u32 mtu); +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); @@ -266,7 +268,8 @@ static int dn_dst_gc(struct dst_ops *ops) * We update both the mtu and the advertised mss (i.e. the segment size we * advertise to the other end). */ -static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) +static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; @@ -294,7 +297,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static void dn_dst_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 200d2180937..3ea465286a3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -840,7 +840,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0c3123566d7..42c44b1403c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -833,7 +833,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { df |= (old_iph->frag_off&htons(IP_DF)); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c2d0e6d8baa..2c2c35bace7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -519,7 +519,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if ((old_iph->frag_off & htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index aad21819316..b35d3bfc66c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -148,8 +148,10 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1273,7 +1275,7 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; @@ -1506,7 +1508,8 @@ out: kfree_skb(skb); return 0; } -static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rtable *rt = (struct rtable *) dst; @@ -1531,7 +1534,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, mtu); + ip_rt_update_pmtu(&rt->dst, NULL, skb, mtu); ip_rt_put(rt); } } @@ -1559,7 +1562,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_do_redirect(&rt->dst, skb); + ip_do_redirect(&rt->dst, NULL, skb); ip_rt_put(rt); } } @@ -2587,11 +2590,13 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b8e7e059540..d9caf5c07aa 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -319,7 +319,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) struct dst_entry *dst = __sk_dst_check(sk, 0); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 737131cef37..fcf7678bc00 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -194,20 +194,22 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } -static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm4_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm4_dst_destroy(struct dst_entry *dst) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 62539a4b2dc..4a0c4d2d8b0 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -269,7 +269,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu); return inet6_csk_route_socket(sk); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 61d10659729..db328466796 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -609,10 +609,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); } if (rel_type == ICMP_REDIRECT) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), skb2); + skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -952,7 +952,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { *pmtu = mtu; err = -EMSGSIZE; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2a4c8d48977..31af1ed6c1d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,8 +78,10 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu); +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -187,11 +189,13 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) return mtu ? : dst->dev->mtu; } -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { } -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { } @@ -1071,7 +1075,8 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; @@ -1108,7 +1113,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, ntohl(mtu)); + ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -1136,7 +1141,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - rt6_do_redirect(dst, skb); + rt6_do_redirect(dst, NULL, skb); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_redirect); @@ -1639,7 +1644,7 @@ static int ip6_route_del(struct fib6_config *cfg) return err; } -static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); struct netevent_redirect netevent; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fbf1622fdee..3bd1bfc01f8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -807,7 +807,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr && skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ecdf241cad0..c9dabdd832d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -367,7 +367,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) - dst->ops->redirect(dst,skb); + dst->ops->redirect(dst, sk, skb); } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f5a9cb8257b..ef39812107b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -207,20 +207,22 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } -static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, mtu); + path->ops->update_pmtu(path, sk, skb, mtu); } -static void xfrm6_redirect(struct dst_entry *dst, struct sk_buff *skb) +static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->redirect(path, skb); + path->ops->redirect(path, sk, skb); } static void xfrm6_dst_destroy(struct dst_entry *dst) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 71d6ecb6592..65b616ae171 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -797,7 +797,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); df |= (old_iph->frag_off & htons(IP_DF)); @@ -913,7 +913,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; } if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && !skb_is_gso(skb)) { diff --git a/net/sctp/input.c b/net/sctp/input.c index a67bc31f49f..c201b26879a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -432,7 +432,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, return; dst = sctp_transport_dst_check(t); if (dst) - dst->ops->redirect(dst, skb); + dst->ops->redirect(dst, sk, skb); } /* diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e69e1a2175a..a6b7ee9ce28 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -249,7 +249,7 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); if (dst) { - dst->ops->update_pmtu(dst, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu); dst = sctp_transport_dst_check(t); if (!dst) -- cgit v1.2.3-70-g09d2 From 4895c771c7f006b4b90f9d6b1d2210939ba57b38 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 04:19:00 -0700 Subject: ipv4: Add FIB nexthop exceptions. In a regime where we have subnetted route entries, we need a way to store persistent storage about destination specific learned values such as redirects and PMTU values. This is implemented here via nexthop exceptions. The initial implementation is a 2048 entry hash table with relaiming starting at chain length 5. A more sophisticated scheme can be devised if that proves necessary. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 18 ++++ net/ipv4/fib_semantics.c | 23 +++++ net/ipv4/route.c | 256 +++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 266 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 5697acefeba..e9ee1ca0708 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -46,6 +47,22 @@ struct fib_config { struct fib_info; +struct fib_nh_exception { + struct fib_nh_exception __rcu *fnhe_next; + __be32 fnhe_daddr; + u32 fnhe_pmtu; + u32 fnhe_gw; + unsigned long fnhe_expires; + unsigned long fnhe_stamp; +}; + +struct fnhe_hash_bucket { + struct fib_nh_exception __rcu *chain; +}; + +#define FNHE_HASH_SIZE 2048 +#define FNHE_RECLAIM_DEPTH 5 + struct fib_nh { struct net_device *nh_dev; struct hlist_node nh_hash; @@ -63,6 +80,7 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; + struct fnhe_hash_bucket *nh_exceptions; }; /* diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d71bfbdc0bf..1e09852df51 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -140,6 +140,27 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }, }; +static void free_nh_exceptions(struct fib_nh *nh) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + int i; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + fnhe = rcu_dereference(hash[i].chain); + while (fnhe) { + struct fib_nh_exception *next; + + next = rcu_dereference(fnhe->fnhe_next); + kfree(fnhe); + + fnhe = next; + } + } + kfree(hash); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -148,6 +169,8 @@ static void free_fib_info_rcu(struct rcu_head *head) change_nexthops(fi) { if (nexthop_nh->nh_dev) dev_put(nexthop_nh->nh_dev); + if (nexthop_nh->nh_exceptions) + free_nh_exceptions(nexthop_nh); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b35d3bfc66c..a5bd0b4acc6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1275,14 +1275,130 @@ static void rt_del(unsigned int hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) +static void __build_flow_key(struct flowi4 *fl4, struct sock *sk, + const struct iphdr *iph, + int oif, u8 tos, + u8 prot, u32 mark, int flow_flags) +{ + if (sk) { + const struct inet_sock *inet = inet_sk(sk); + + oif = sk->sk_bound_dev_if; + mark = sk->sk_mark; + tos = RT_CONN_FLAGS(sk); + prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; + } + flowi4_init_output(fl4, oif, mark, tos, + RT_SCOPE_UNIVERSE, prot, + flow_flags, + iph->daddr, iph->saddr, 0, 0); +} + +static void build_skb_flow_key(struct flowi4 *fl4, struct sk_buff *skb, struct sock *sk) +{ + const struct iphdr *iph = ip_hdr(skb); + int oif = skb->dev->ifindex; + u8 tos = RT_TOS(iph->tos); + u8 prot = iph->protocol; + u32 mark = skb->mark; + + __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0); +} + +static void build_sk_flow_key(struct flowi4 *fl4, struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + struct ip_options_rcu *inet_opt; + __be32 daddr = inet->inet_daddr; + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk), + daddr, inet->inet_saddr, 0, 0); + rcu_read_unlock(); +} + +static void ip_rt_build_flow_key(struct flowi4 *fl4, struct sock *sk, + struct sk_buff *skb) +{ + if (skb) + build_skb_flow_key(fl4, skb, sk); + else + build_sk_flow_key(fl4, sk); +} + +static DEFINE_SPINLOCK(fnhe_lock); + +static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr) +{ + struct fib_nh_exception *fnhe, *oldest; + + oldest = rcu_dereference(hash->chain); + for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) + oldest = fnhe; + } + return oldest; +} + +static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fib_nh_exception *fnhe; + int depth; + u32 hval; + + if (!hash) { + hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), + GFP_ATOMIC); + if (!hash) + return NULL; + } + + hval = (__force u32) daddr; + hval ^= (hval >> 11) ^ (hval >> 22); + hash += hval; + + depth = 0; + for (fnhe = rcu_dereference(hash->chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (fnhe->fnhe_daddr == daddr) + goto out; + depth++; + } + + if (depth > FNHE_RECLAIM_DEPTH) { + fnhe = fnhe_oldest(hash + hval, daddr); + goto out_daddr; + } + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + return NULL; + + fnhe->fnhe_next = hash->chain; + rcu_assign_pointer(hash->chain, fnhe); + +out_daddr: + fnhe->fnhe_daddr = daddr; +out: + fnhe->fnhe_stamp = jiffies; + return fnhe; +} + +static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; struct net_device *dev = skb->dev; struct in_device *in_dev; + struct fib_result res; struct neighbour *n; - struct rtable *rt; struct net *net; switch (icmp_hdr(skb)->code & 7) { @@ -1296,7 +1412,6 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf return; } - rt = (struct rtable *) dst; if (rt->rt_gateway != old_gw) return; @@ -1320,11 +1435,21 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf goto reject_redirect; } - n = ipv4_neigh_lookup(dst, NULL, &new_gw); + n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); if (n) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { + if (fib_lookup(net, fl4, &res) == 0) { + struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_exception *fnhe; + + spin_lock_bh(&fnhe_lock); + fnhe = find_or_create_fnhe(nh, fl4->daddr); + if (fnhe) + fnhe->fnhe_gw = new_gw; + spin_unlock_bh(&fnhe_lock); + } rt->rt_gateway = new_gw; rt->rt_flags |= RTCF_REDIRECTED; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); @@ -1349,6 +1474,17 @@ reject_redirect: ; } +static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi4 fl4; + + rt = (struct rtable *) dst; + + ip_rt_build_flow_key(&fl4, sk, skb); + __ip_do_redirect(rt, skb, &fl4); +} + static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) { struct rtable *rt = (struct rtable *)dst; @@ -1508,33 +1644,51 @@ out: kfree_skb(skb); return 0; } -static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) +static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { - struct rtable *rt = (struct rtable *) dst; - - dst_confirm(dst); + struct fib_result res; if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { + struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_exception *fnhe; + + spin_lock_bh(&fnhe_lock); + fnhe = find_or_create_fnhe(nh, fl4->daddr); + if (fnhe) { + fnhe->fnhe_pmtu = mtu; + fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires; + } + spin_unlock_bh(&fnhe_lock); + } rt->rt_pmtu = mtu; dst_set_expires(&rt->dst, ip_rt_mtu_expires); } +static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ + struct rtable *rt = (struct rtable *) dst; + struct flowi4 fl4; + + ip_rt_build_flow_key(&fl4, sk, skb); + __ip_rt_update_pmtu(rt, &fl4, mtu); +} + void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags) { - const struct iphdr *iph = (const struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags, - iph->daddr, iph->saddr, 0, 0); + __build_flow_key(&fl4, NULL, iph, oif, + RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_rt_update_pmtu(&rt->dst, NULL, skb, mtu); + __ip_rt_update_pmtu(rt, &fl4, mtu); ip_rt_put(rt); } } @@ -1542,27 +1696,31 @@ EXPORT_SYMBOL_GPL(ipv4_update_pmtu); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { - const struct inet_sock *inet = inet_sk(sk); + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; - return ipv4_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk)); + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(sock_net(sk), &fl4); + if (!IS_ERR(rt)) { + __ip_rt_update_pmtu(rt, &fl4, mtu); + ip_rt_put(rt); + } } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, u8 protocol, int flow_flags) { - const struct iphdr *iph = (const struct iphdr *)skb->data; + const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, - protocol, flow_flags, iph->daddr, iph->saddr, 0, 0); + __build_flow_key(&fl4, NULL, iph, oif, + RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - ip_do_redirect(&rt->dst, NULL, skb); + __ip_do_redirect(rt, skb, &fl4); ip_rt_put(rt); } } @@ -1570,12 +1728,16 @@ EXPORT_SYMBOL_GPL(ipv4_redirect); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) { - const struct inet_sock *inet = inet_sk(sk); + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct flowi4 fl4; + struct rtable *rt; - return ipv4_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, - sk->sk_mark, - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk)); + __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); + rt = __ip_route_output_key(sock_net(sk), &fl4); + if (!IS_ERR(rt)) { + __ip_do_redirect(rt, skb, &fl4); + ip_rt_put(rt); + } } EXPORT_SYMBOL_GPL(ipv4_sk_redirect); @@ -1722,14 +1884,46 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, fi->fib_metrics, true); } +static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fib_nh_exception *fnhe; + u32 hval; + + hval = (__force u32) daddr; + hval ^= (hval >> 11) ^ (hval >> 22); + + for (fnhe = rcu_dereference(hash[hval].chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { + if (fnhe->fnhe_daddr == daddr) { + if (fnhe->fnhe_pmtu) { + unsigned long expires = fnhe->fnhe_expires; + unsigned long diff = jiffies - expires; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = fnhe->fnhe_pmtu; + dst_set_expires(&rt->dst, diff); + } + } + if (fnhe->fnhe_gw) + rt->rt_gateway = fnhe->fnhe_gw; + fnhe->fnhe_stamp = jiffies; + break; + } + } +} + static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, const struct fib_result *res, struct fib_info *fi, u16 type, u32 itag) { if (fi) { - if (FIB_RES_GW(*res) && - FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - rt->rt_gateway = FIB_RES_GW(*res); + struct fib_nh *nh = &FIB_RES_NH(*res); + + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + rt->rt_gateway = nh->nh_gw; + if (unlikely(nh->nh_exceptions)) + rt_bind_exception(rt, nh, fl4->daddr); rt_init_metrics(rt, fl4, fi); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; -- cgit v1.2.3-70-g09d2 From d3818c92afabecfe6b8e5d2e3734c8753522987c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Jul 2012 21:38:04 +0000 Subject: ipv6: fix inet6_csk_xmit() We should provide to inet6_csk_route_socket a struct flowi6 pointer, so that net6_csk_xmit() works correctly instead of sending garbage. Also add some consts Signed-off-by: Eric Dumazet Reported-by: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- include/net/ip6_route.h | 3 ++- net/ipv6/inet6_connection_sock.c | 40 +++++++++++++++++++++------------------- 3 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bc6c8fd8ed0..379e433e15e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -299,9 +299,9 @@ struct ipv6_pinfo { struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_pktinfo sticky_pktinfo; - struct in6_addr *daddr_cache; + const struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES - struct in6_addr *saddr_cache; + const struct in6_addr *saddr_cache; #endif __be32 flow_label; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index b6b6f7d6f3c..5fa2af00634 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -158,7 +158,8 @@ extern void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); * Store a destination cache entry in a socket */ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 4a0c4d2d8b0..0251a6005be 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -171,7 +171,8 @@ EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); static inline void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) + const struct in6_addr *daddr, + const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); @@ -203,31 +204,31 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) return dst; } -static struct dst_entry *inet6_csk_route_socket(struct sock *sk) +static struct dst_entry *inet6_csk_route_socket(struct sock *sk, + struct flowi6 *fl6) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi6 fl6; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = sk->sk_protocol; - fl6.daddr = np->daddr; - fl6.saddr = np->saddr; - fl6.flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl6.flowlabel); - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; - fl6.fl6_sport = inet->inet_sport; - fl6.fl6_dport = inet->inet_dport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + memset(fl6, 0, sizeof(*fl6)); + fl6->flowi6_proto = sk->sk_protocol; + fl6->daddr = np->daddr; + fl6->saddr = np->saddr; + fl6->flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6->flowlabel); + fl6->flowi6_oif = sk->sk_bound_dev_if; + fl6->flowi6_mark = sk->sk_mark; + fl6->fl6_sport = inet->inet_sport; + fl6->fl6_dport = inet->inet_dport; + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); if (!IS_ERR(dst)) __inet6_csk_dst_store(sk, dst, NULL, NULL); @@ -243,7 +244,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) struct dst_entry *dst; int res; - dst = inet6_csk_route_socket(sk); + dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); sk->sk_route_caps = 0; @@ -265,12 +266,13 @@ EXPORT_SYMBOL_GPL(inet6_csk_xmit); struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) { - struct dst_entry *dst = inet6_csk_route_socket(sk); + struct flowi6 fl6; + struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) return NULL; dst->ops->update_pmtu(dst, sk, NULL, mtu); - return inet6_csk_route_socket(sk); + return inet6_csk_route_socket(sk, &fl6); } EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu); -- cgit v1.2.3-70-g09d2 From eb8637cd4a0d651cf4fcc1559231facee829a0ac Mon Sep 17 00:00:00 2001 From: Saurabh Date: Tue, 17 Jul 2012 09:44:49 +0000 Subject: net/ipv4: VTI support rx-path hook in xfrm4_mode_tunnel. Incorporated David and Steffen's comments. Add hook for rx-path xfmr4_mode_tunnel for VTI tunnel module. Signed-off-by: Saurabh Mohan Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_mode_tunnel.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 17acbc92476..d9509eb29b8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1475,6 +1475,8 @@ extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); +extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler); +extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler); extern int xfrm6_extract_header(struct sk_buff *skb); extern int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index ed4bf11ef9f..ddee0a099a2 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -15,6 +15,65 @@ #include #include +/* Informational hook. The decap is still done here. */ +static struct xfrm_tunnel __rcu *rcv_notify_handlers __read_mostly; +static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex); + +int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler) +{ + struct xfrm_tunnel __rcu **pprev; + struct xfrm_tunnel *t; + int ret = -EEXIST; + int priority = handler->priority; + + mutex_lock(&xfrm4_mode_tunnel_input_mutex); + + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t->priority > priority) + break; + if (t->priority == priority) + goto err; + + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm4_mode_tunnel_input_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register); + +int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler) +{ + struct xfrm_tunnel __rcu **pprev; + struct xfrm_tunnel *t; + int ret = -ENOENT; + + mutex_lock(&xfrm4_mode_tunnel_input_mutex); + for (pprev = &rcv_notify_handlers; + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + mutex_unlock(&xfrm4_mode_tunnel_input_mutex); + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_deregister); + static inline void ipip_ecn_decapsulate(struct sk_buff *skb) { struct iphdr *inner_iph = ipip_hdr(skb); @@ -64,8 +123,14 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#define for_each_input_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) + static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_tunnel *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) @@ -74,6 +139,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; + for_each_input_rcu(rcv_notify_handlers, handler) + handler->handler(skb); + if (skb_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; -- cgit v1.2.3-70-g09d2 From ddbe503203855939946430e39bae58de11b70b69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jul 2012 08:11:12 +0000 Subject: ipv6: add ipv6_addr_hash() helper Introduce ipv6_addr_hash() helper doing a XOR on all bits of an IPv6 address, with an optimized x86_64 version. Use it in flow dissector, as suggested by Andrew McGregor, to reduce hash collision probabilities in fq_codel (and other users of flow dissector) Use it in ip6_tunnel.c and use more bit shuffling, as suggested by David Laight, as existing hash was ignoring most of them. Use it in sunrpc and use more bit shuffling, using hash_32(). Use it in net/ipv6/addrconf.c, using hash_32() as well. As a cleanup, use it in net/ipv4/tcp_metrics.c Signed-off-by: Eric Dumazet Reported-by: Andrew McGregor Cc: Dave Taht Cc: Tom Herbert Cc: David Laight Cc: Joe Perches Signed-off-by: David S. Miller --- include/net/addrconf.h | 3 ++- include/net/ipv6.h | 13 +++++++++++++ net/core/flow_dissector.c | 5 +++-- net/ipv4/tcp_metrics.c | 15 +++------------ net/ipv6/addrconf.c | 21 ++++++++------------- net/ipv6/ip6_tunnel.c | 20 ++++++++++++-------- net/sunrpc/svcauth_unix.c | 22 ++++------------------ 7 files changed, 45 insertions(+), 54 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f2b801c4b55..089a09d001d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -46,7 +46,8 @@ struct prefix_info { #include #include -#define IN6_ADDR_HSIZE 16 +#define IN6_ADDR_HSIZE_SHIFT 4 +#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) extern int addrconf_init(void); extern void addrconf_cleanup(void); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index f695f39e892..01c34b363a3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -419,6 +419,19 @@ static inline bool ipv6_addr_any(const struct in6_addr *a) #endif } +static inline u32 ipv6_addr_hash(const struct in6_addr *a) +{ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + const unsigned long *ul = (const unsigned long *)a; + unsigned long x = ul[0] ^ ul[1]; + + return (u32)(x ^ (x >> 32)); +#else + return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^ + a->s6_addr32[2] ^ a->s6_addr32[3]); +#endif +} + static inline bool ipv6_addr_loopback(const struct in6_addr *a) { return (a->s6_addr32[0] | a->s6_addr32[1] | diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a225089df5b..466820b6e34 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -55,8 +56,8 @@ ipv6: return false; ip_proto = iph->nexthdr; - flow->src = iph->saddr.s6_addr32[3]; - flow->dst = iph->daddr.s6_addr32[3]; + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); break; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 5a38a2d5a95..1a115b66579 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -211,10 +211,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, break; case AF_INET6: *(struct in6_addr *)addr.addr.a6 = inet6_rsk(req)->rmt_addr; - hash = ((__force unsigned int) addr.addr.a6[0] ^ - (__force unsigned int) addr.addr.a6[1] ^ - (__force unsigned int) addr.addr.a6[2] ^ - (__force unsigned int) addr.addr.a6[3]); + hash = ipv6_addr_hash(&inet6_rsk(req)->rmt_addr); break; default: return NULL; @@ -251,10 +248,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock case AF_INET6: tw6 = inet6_twsk((struct sock *)tw); *(struct in6_addr *)addr.addr.a6 = tw6->tw_v6_daddr; - hash = ((__force unsigned int) addr.addr.a6[0] ^ - (__force unsigned int) addr.addr.a6[1] ^ - (__force unsigned int) addr.addr.a6[2] ^ - (__force unsigned int) addr.addr.a6[3]); + hash = ipv6_addr_hash(&tw6->tw_v6_daddr); break; default: return NULL; @@ -291,10 +285,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, break; case AF_INET6: *(struct in6_addr *)addr.addr.a6 = inet6_sk(sk)->daddr; - hash = ((__force unsigned int) addr.addr.a6[0] ^ - (__force unsigned int) addr.addr.a6[1] ^ - (__force unsigned int) addr.addr.a6[2] ^ - (__force unsigned int) addr.addr.a6[3]); + hash = ipv6_addr_hash(&inet6_sk(sk)->daddr); break; default: return NULL; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8f6411c9718..79181819a24 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -579,15 +580,9 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) list_add_tail(&ifp->if_list, p); } -static u32 ipv6_addr_hash(const struct in6_addr *addr) +static u32 inet6_addr_hash(const struct in6_addr *addr) { - /* - * We perform the hash function over the last 64 bits of the address - * This will include the IEEE address token on links that support it. - */ - return jhash_2words((__force u32)addr->s6_addr32[2], - (__force u32)addr->s6_addr32[3], 0) - & (IN6_ADDR_HSIZE - 1); + return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); } /* On success it returns ifp with increased reference count */ @@ -662,7 +657,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, in6_ifa_hold(ifa); /* Add to big hash table */ - hash = ipv6_addr_hash(addr); + hash = inet6_addr_hash(addr); hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); spin_unlock(&addrconf_hash_lock); @@ -1270,7 +1265,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; struct hlist_node *node; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) { @@ -1293,7 +1288,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, struct net_device *dev) { - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); struct inet6_ifaddr *ifp; struct hlist_node *node; @@ -1336,7 +1331,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add struct net_device *dev, int strict) { struct inet6_ifaddr *ifp, *result = NULL; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); struct hlist_node *node; rcu_read_lock_bh(); @@ -3223,7 +3218,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) int ret = 0; struct inet6_ifaddr *ifp = NULL; struct hlist_node *n; - unsigned int hash = ipv6_addr_hash(addr); + unsigned int hash = inet6_addr_hash(addr); rcu_read_lock_bh(); hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index db328466796..9a1d5fe6aef 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -70,11 +71,15 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 -#define HASH_SIZE 32 +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) -#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \ - (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ - (HASH_SIZE - 1)) +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ + u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + + return hash_32(hash, HASH_SIZE_SHIFT); +} static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); @@ -166,12 +171,11 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) static struct ip6_tnl * ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) { - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(local); + unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[h0 ^ h1]) { + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr) && (t->dev->flags & IFF_UP)) @@ -205,7 +209,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; - h = HASH(remote) ^ HASH(local); + h = HASH(remote, local); } return &ip6n->tnls[prio][h]; } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2777fa89664..4d012920373 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -104,23 +104,9 @@ static void ip_map_put(struct kref *kref) kfree(im); } -#if IP_HASHBITS == 8 -/* hash_long on a 64 bit machine is currently REALLY BAD for - * IP addresses in reverse-endian (i.e. on a little-endian machine). - * So use a trivial but reliable hash instead - */ -static inline int hash_ip(__be32 ip) -{ - int hash = (__force u32)ip ^ ((__force u32)ip>>16); - return (hash ^ (hash>>8)) & 0xff; -} -#endif -static inline int hash_ip6(struct in6_addr ip) +static inline int hash_ip6(const struct in6_addr *ip) { - return (hash_ip(ip.s6_addr32[0]) ^ - hash_ip(ip.s6_addr32[1]) ^ - hash_ip(ip.s6_addr32[2]) ^ - hash_ip(ip.s6_addr32[3])); + return hash_32(ipv6_addr_hash(ip), IP_HASHBITS); } static int ip_map_match(struct cache_head *corig, struct cache_head *cnew) { @@ -301,7 +287,7 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, ip.m_addr = *addr; ch = sunrpc_cache_lookup(cd, &ip.h, hash_str(class, IP_HASHBITS) ^ - hash_ip6(*addr)); + hash_ip6(addr)); if (ch) return container_of(ch, struct ip_map, h); @@ -331,7 +317,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, ip.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ - hash_ip6(ipm->m_addr)); + hash_ip6(&ipm->m_addr)); if (!ch) return -ENOMEM; cache_put(ch, cd); -- cgit v1.2.3-70-g09d2 From aee06da6726d4981c51928c2d6d1e2cabeec7a10 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 18 Jul 2012 10:15:35 +0000 Subject: ipv4: use seqlock for nh_exceptions Use global seqlock for the nh_exceptions. Call fnhe_oldest with the right hash chain. Correct the diff value for dst_set_expires. v2: after suggestions from Eric Dumazet: * get rid of spin lock fnhe_lock, rearrange update_or_create_fnhe * continue daddr search in rt_bind_exception v3: * remove the daddr check before seqlock in rt_bind_exception * restart lookup in rt_bind_exception on detected seqlock change, as suggested by David Miller Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/route.c | 118 +++++++++++++++++++++++++++++---------------------- 2 files changed, 69 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e9ee1ca0708..2daf096dfc6 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -51,7 +51,7 @@ struct fib_nh_exception { struct fib_nh_exception __rcu *fnhe_next; __be32 fnhe_daddr; u32 fnhe_pmtu; - u32 fnhe_gw; + __be32 fnhe_gw; unsigned long fnhe_expires; unsigned long fnhe_stamp; }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2c25581bf25..89e39dc5336 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1333,9 +1333,9 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, build_sk_flow_key(fl4, sk); } -static DEFINE_SPINLOCK(fnhe_lock); +static DEFINE_SEQLOCK(fnhe_seqlock); -static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash, __be32 daddr) +static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; @@ -1358,47 +1358,63 @@ static inline u32 fnhe_hashfun(__be32 daddr) return hval & (FNHE_HASH_SIZE - 1); } -static struct fib_nh_exception *find_or_create_fnhe(struct fib_nh *nh, __be32 daddr) +static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, + u32 pmtu, unsigned long expires) { - struct fnhe_hash_bucket *hash = nh->nh_exceptions; + struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; int depth; - u32 hval; + u32 hval = fnhe_hashfun(daddr); + + write_seqlock_bh(&fnhe_seqlock); + hash = nh->nh_exceptions; if (!hash) { - hash = nh->nh_exceptions = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), - GFP_ATOMIC); + hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC); if (!hash) - return NULL; + goto out_unlock; + nh->nh_exceptions = hash; } - hval = fnhe_hashfun(daddr); hash += hval; depth = 0; for (fnhe = rcu_dereference(hash->chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { if (fnhe->fnhe_daddr == daddr) - goto out; + break; depth++; } - if (depth > FNHE_RECLAIM_DEPTH) { - fnhe = fnhe_oldest(hash + hval, daddr); - goto out_daddr; + if (fnhe) { + if (gw) + fnhe->fnhe_gw = gw; + if (pmtu) { + fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_expires = expires; + } + } else { + if (depth > FNHE_RECLAIM_DEPTH) + fnhe = fnhe_oldest(hash); + else { + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + goto out_unlock; + + fnhe->fnhe_next = hash->chain; + rcu_assign_pointer(hash->chain, fnhe); + } + fnhe->fnhe_daddr = daddr; + fnhe->fnhe_gw = gw; + fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_expires = expires; } - fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); - if (!fnhe) - return NULL; - - fnhe->fnhe_next = hash->chain; - rcu_assign_pointer(hash->chain, fnhe); -out_daddr: - fnhe->fnhe_daddr = daddr; -out: fnhe->fnhe_stamp = jiffies; - return fnhe; + +out_unlock: + write_sequnlock_bh(&fnhe_seqlock); + return; } static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) @@ -1452,13 +1468,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow } else { if (fib_lookup(net, fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - struct fib_nh_exception *fnhe; - spin_lock_bh(&fnhe_lock); - fnhe = find_or_create_fnhe(nh, fl4->daddr); - if (fnhe) - fnhe->fnhe_gw = new_gw; - spin_unlock_bh(&fnhe_lock); + update_or_create_fnhe(nh, fl4->daddr, new_gw, + 0, 0); } rt->rt_gateway = new_gw; rt->rt_flags |= RTCF_REDIRECTED; @@ -1663,15 +1675,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); - struct fib_nh_exception *fnhe; - spin_lock_bh(&fnhe_lock); - fnhe = find_or_create_fnhe(nh, fl4->daddr); - if (fnhe) { - fnhe->fnhe_pmtu = mtu; - fnhe->fnhe_expires = jiffies + ip_rt_mtu_expires; - } - spin_unlock_bh(&fnhe_lock); + update_or_create_fnhe(nh, fl4->daddr, 0, mtu, + jiffies + ip_rt_mtu_expires); } rt->rt_pmtu = mtu; dst_set_expires(&rt->dst, ip_rt_mtu_expires); @@ -1902,23 +1908,35 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr hval = fnhe_hashfun(daddr); +restart: for (fnhe = rcu_dereference(hash[hval].chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { - if (fnhe->fnhe_daddr == daddr) { - if (fnhe->fnhe_pmtu) { - unsigned long expires = fnhe->fnhe_expires; - unsigned long diff = expires - jiffies; - - if (time_before(jiffies, expires)) { - rt->rt_pmtu = fnhe->fnhe_pmtu; - dst_set_expires(&rt->dst, diff); - } + __be32 fnhe_daddr, gw; + unsigned long expires; + unsigned int seq; + u32 pmtu; + + seq = read_seqbegin(&fnhe_seqlock); + fnhe_daddr = fnhe->fnhe_daddr; + gw = fnhe->fnhe_gw; + pmtu = fnhe->fnhe_pmtu; + expires = fnhe->fnhe_expires; + if (read_seqretry(&fnhe_seqlock, seq)) + goto restart; + if (daddr != fnhe_daddr) + continue; + if (pmtu) { + unsigned long diff = jiffies - expires; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = pmtu; + dst_set_expires(&rt->dst, diff); } - if (fnhe->fnhe_gw) - rt->rt_gateway = fnhe->fnhe_gw; - fnhe->fnhe_stamp = jiffies; - break; } + if (gw) + rt->rt_gateway = gw; + fnhe->fnhe_stamp = jiffies; + break; } } -- cgit v1.2.3-70-g09d2 From be9f4a44e7d41cee50ddb5f038fc2391cbbb4046 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Jul 2012 07:34:03 +0000 Subject: ipv4: tcp: remove per net tcp_sock tcp_v4_send_reset() and tcp_v4_send_ack() use a single socket per network namespace. This leads to bad behavior on multiqueue NICS, because many cpus contend for the socket lock and once socket lock is acquired, extra false sharing on various socket fields slow down the operations. To better resist to attacks, we use a percpu socket. Each cpu can run without contention, using appropriate memory (local node) Additional features : 1) We also mirror the queue_mapping of the incoming skb, so that answers use the same queue if possible. 2) Setting SOCK_USE_WRITE_QUEUE socket flag speedup sock_wfree() 3) We now limit the number of in-flight RST/ACK [1] packets per cpu, instead of per namespace, and we honor the sysctl_wmem_default limit dynamically. (Prior to this patch, sysctl_wmem_default value was copied at boot time, so any further change would not affect tcp_sock limit) [1] These packets are only generated when no socket was matched for the incoming packet. Reported-by: Bill Sommerfeld Signed-off-by: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/netns/ipv4.h | 1 - net/ipv4/ip_output.c | 50 +++++++++++++++++++++++++++++++----------------- net/ipv4/tcp_ipv4.c | 8 +++----- 4 files changed, 36 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index ec5cfde85e9..bd5e444a19c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -158,7 +158,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, +void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2e089a99d60..d909c7fc3da 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -38,7 +38,6 @@ struct netns_ipv4 { struct sock *fibnl; struct sock **icmp_sk; - struct sock *tcp_sock; struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; unsigned int tcp_metrics_hash_mask; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cc52679790b..c528f841ca4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1463,20 +1463,33 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, /* * Generic function to send a packet as reply to another packet. - * Used to send TCP resets so far. + * Used to send some TCP resets/acks so far. * - * Should run single threaded per socket because it uses the sock - * structure to pass arguments. + * Use a fake percpu inet socket to avoid false sharing and contention. */ -void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, +static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { + .sk = { + .__sk_common = { + .skc_refcnt = ATOMIC_INIT(1), + }, + .sk_wmem_alloc = ATOMIC_INIT(1), + .sk_allocation = GFP_ATOMIC, + .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), + }, + .pmtudisc = IP_PMTUDISC_WANT, +}; + +void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, unsigned int len) { - struct inet_sock *inet = inet_sk(sk); struct ip_options_data replyopts; struct ipcm_cookie ipc; struct flowi4 fl4; struct rtable *rt = skb_rtable(skb); + struct sk_buff *nskb; + struct sock *sk; + struct inet_sock *inet; if (ip_options_echo(&replyopts.opt.opt, skb)) return; @@ -1494,38 +1507,39 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, flowi4_init_output(&fl4, arg->bound_dev_if, 0, RT_TOS(arg->tos), - RT_SCOPE_UNIVERSE, sk->sk_protocol, + RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, ip_reply_arg_flowi_flags(arg), daddr, saddr, tcp_hdr(skb)->source, tcp_hdr(skb)->dest); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); - rt = ip_route_output_key(sock_net(sk), &fl4); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return; - /* And let IP do all the hard work. + inet = &get_cpu_var(unicast_sock); - This chunk is not reenterable, hence spinlock. - Note that it uses the fact, that this function is called - with locally disabled BH and that sk cannot be already spinlocked. - */ - bh_lock_sock(sk); inet->tos = arg->tos; + sk = &inet->sk; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; + sock_net_set(sk, net); + __skb_queue_head_init(&sk->sk_write_queue); + sk->sk_sndbuf = sysctl_wmem_default; ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); - if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { + nskb = skb_peek(&sk->sk_write_queue); + if (nskb) { if (arg->csumoffset >= 0) - *((__sum16 *)skb_transport_header(skb) + - arg->csumoffset) = csum_fold(csum_add(skb->csum, + *((__sum16 *)skb_transport_header(nskb) + + arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); - skb->ip_summed = CHECKSUM_NONE; + nskb->ip_summed = CHECKSUM_NONE; + skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } - bh_unlock_sock(sk); + put_cpu_var(unicast_sock); ip_rt_put(rt); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9caf5c07aa..d7d2fa50f07 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -688,7 +688,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; - ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); @@ -771,7 +771,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; - ip_send_unicast_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, + ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); @@ -2624,13 +2624,11 @@ EXPORT_SYMBOL(tcp_prot); static int __net_init tcp_sk_init(struct net *net) { - return inet_ctl_sock_create(&net->ipv4.tcp_sock, - PF_INET, SOCK_RAW, IPPROTO_TCP, net); + return 0; } static void __net_exit tcp_sk_exit(struct net *net) { - inet_ctl_sock_destroy(net->ipv4.tcp_sock); } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) -- cgit v1.2.3-70-g09d2 From d8f1641b5829629d3af8e52750ba3b542f8f56c8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 19 Jul 2012 10:43:03 -0700 Subject: net: Fix warnings in dst_ops.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit include/net/dst_ops.h:28:20: warning: ‘struct sock’ declared inside parameter list Signed-off-by: David S. Miller --- include/net/dst_ops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index d079fc61c12..2f26dfb8450 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -8,6 +8,7 @@ struct dst_entry; struct kmem_cachep; struct net_device; struct sk_buff; +struct sock; struct dst_ops { unsigned short family; -- cgit v1.2.3-70-g09d2 From 2100c8d2d9db23c0a09901a782bb4e3b21bee298 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:05 +0000 Subject: net-tcp: Fast Open base This patch impelements the common code for both the client and server. 1. TCP Fast Open option processing. Since Fast Open does not have an option number assigned by IANA yet, it shares the experiment option code 254 by implementing draft-ietf-tcpm-experimental-options with a 16 bits magic number 0xF989. This enables global experiments without clashing the scarce(2) experimental options available for TCP. When the draft status becomes standard (maybe), the client should switch to the new option number assigned while the server supports both numbers for transistion. 2. The new sysctl tcp_fastopen 3. A place holder init function Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 10 ++++++++++ include/net/tcp.h | 9 ++++++++- net/ipv4/Makefile | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 7 +++++++ net/ipv4/tcp_fastopen.c | 11 +++++++++++ net/ipv4/tcp_input.c | 26 ++++++++++++++++++++++---- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 4 ++-- net/ipv4/tcp_output.c | 25 +++++++++++++++++++++---- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 12 files changed, 86 insertions(+), 16 deletions(-) create mode 100644 net/ipv4/tcp_fastopen.c (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1888169e07c..12948f54383 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -243,6 +243,16 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb) return (tcp_hdr(skb)->doff - 5) * 4; } +/* TCP Fast Open */ +#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */ +#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */ + +/* TCP Fast Open Cookie as stored in memory */ +struct tcp_fastopen_cookie { + s8 len; + u8 val[TCP_FASTOPEN_COOKIE_MAX]; +}; + /* This defines a selective acknowledgement block. */ struct tcp_sack_block_wire { __be32 start_seq; diff --git a/include/net/tcp.h b/include/net/tcp.h index 85c5090bfe2..5aed3718fde 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -170,6 +170,11 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ #define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ +#define TCPOPT_EXP 254 /* Experimental */ +/* Magic number to be after the option value for sharing TCP + * experimental options. See draft-ietf-tcpm-experimental-options-00.txt + */ +#define TCPOPT_FASTOPEN_MAGIC 0xF989 /* * TCP option lengths @@ -180,6 +185,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_EXP_FASTOPEN_BASE 4 #define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ #define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ #define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) @@ -222,6 +228,7 @@ extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_syncookies; +extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; @@ -418,7 +425,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); extern void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, const u8 **hvpp, - int estab); + int estab, struct tcp_fastopen_cookie *foc); extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a677d804e53..ae2ccf2890e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ - tcp_minisocks.o tcp_cong.o tcp_metrics.o \ + tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index eab2a7fb15d..650e1528e1e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -293,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3f6a1e762e9..5840c325572 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -366,6 +366,13 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, #endif + { + .procname = "tcp_fastopen", + .data = &sysctl_tcp_fastopen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "tcp_tw_recycle", .data = &tcp_death_row.sysctl_tw_recycle, diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c new file mode 100644 index 00000000000..a7f729c409d --- /dev/null +++ b/net/ipv4/tcp_fastopen.c @@ -0,0 +1,11 @@ +#include +#include + +int sysctl_tcp_fastopen; + +static int __init tcp_fastopen_init(void) +{ + return 0; +} + +late_initcall(tcp_fastopen_init); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd49f1b7a5..a06bb8959e7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3732,7 +3732,8 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, - const u8 **hvpp, int estab) + const u8 **hvpp, int estab, + struct tcp_fastopen_cookie *foc) { const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); @@ -3839,8 +3840,25 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o break; } break; - } + case TCPOPT_EXP: + /* Fast Open option shares code 254 using a + * 16 bits magic number. It's valid only in + * SYN or SYN-ACK with an even size. + */ + if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || + get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || + foc == NULL || !th->syn || (opsize & 1)) + break; + foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; + if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && + foc->len <= TCP_FASTOPEN_COOKIE_MAX) + memcpy(foc->val, ptr + 2, foc->len); + else if (foc->len != 0) + foc->len = -1; + break; + + } ptr += opsize-2; length -= opsize; } @@ -3882,7 +3900,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, if (tcp_parse_aligned_timestamp(tp, th)) return true; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); return true; } @@ -5637,7 +5655,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_cookie_values *cvp = tp->cookie_values; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, NULL); if (th->ack) { /* rfc793: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d7d2fa50f07..01aa77a9702 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1307,7 +1307,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c66f2ede160..5912ac3fd24 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -97,7 +97,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; @@ -534,7 +534,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 15a7c7bc3e5..4849be76ccd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -385,15 +385,17 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) #define OPTION_COOKIE_EXTENSION (1 << 4) +#define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { - u8 options; /* bit field of OPTION_* */ + u16 options; /* bit field of OPTION_* */ + u16 mss; /* 0 to disable */ u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ u8 hash_size; /* bytes in hash_location */ - u16 mss; /* 0 to disable */ - __u32 tsval, tsecr; /* need to include OPTION_TS */ __u8 *hash_location; /* temporary pointer, overloaded */ + __u32 tsval, tsecr; /* need to include OPTION_TS */ + struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; /* The sysctl int routines are generic, so check consistency here. @@ -442,7 +444,7 @@ static u8 tcp_cookie_size_check(u8 desired) static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, struct tcp_out_options *opts) { - u8 options = opts->options; /* mungable copy */ + u16 options = opts->options; /* mungable copy */ /* Having both authentication and cookies for security is redundant, * and there's certainly not enough room. Instead, the cookie-less @@ -564,6 +566,21 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, tp->rx_opt.dsack = 0; } + + if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { + struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; + + *ptr++ = htonl((TCPOPT_EXP << 24) | + ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) | + TCPOPT_FASTOPEN_MAGIC); + + memcpy(ptr, foc->val, foc->len); + if ((foc->len & 3) == 2) { + u8 *align = ((u8 *)ptr) + foc->len; + align[0] = align[1] = TCPOPT_NOP; + } + ptr += (foc->len + 3) >> 2; + } } /* Compute TCP options for SYN packets. This is not the final diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7bf3cc427c2..bb46061c813 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -177,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0); + tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c9dabdd832d..0302ec3fecf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1033,7 +1033,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0); + tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); if (tmp_opt.cookie_plus > 0 && tmp_opt.saw_tstamp && -- cgit v1.2.3-70-g09d2 From 1fe4c481ba637660793217769695c146a037bd54 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:06 +0000 Subject: net-tcp: Fast Open client - cookie cache With help from Eric Dumazet, add Fast Open metrics in tcp metrics cache. The basic ones are MSS and the cookies. Later patch will cache more to handle unfriendly middleboxes. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_metrics.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5aed3718fde..e601da19736 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -405,6 +405,10 @@ extern void tcp_metrics_init(void); extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); extern bool tcp_remember_stamp(struct sock *sk); extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); +extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie); +extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie); extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 1a115b66579..d02ff377778 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -30,6 +30,11 @@ enum tcp_metric_index { TCP_METRIC_MAX, }; +struct tcp_fastopen_metrics { + u16 mss; + struct tcp_fastopen_cookie cookie; +}; + struct tcp_metrics_block { struct tcp_metrics_block __rcu *tcpm_next; struct inetpeer_addr tcpm_addr; @@ -38,6 +43,7 @@ struct tcp_metrics_block { u32 tcpm_ts_stamp; u32 tcpm_lock; u32 tcpm_vals[TCP_METRIC_MAX]; + struct tcp_fastopen_metrics tcpm_fastopen; }; static bool tcp_metric_locked(struct tcp_metrics_block *tm, @@ -118,6 +124,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); tm->tcpm_ts = 0; tm->tcpm_ts_stamp = 0; + tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.cookie.len = 0; } static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, @@ -633,6 +641,49 @@ bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) return ret; } +static DEFINE_SEQLOCK(fastopen_seqlock); + +void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie) +{ + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, __sk_dst_get(sk), false); + if (tm) { + struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; + unsigned int seq; + + do { + seq = read_seqbegin(&fastopen_seqlock); + if (tfom->mss) + *mss = tfom->mss; + *cookie = tfom->cookie; + } while (read_seqretry(&fastopen_seqlock, seq)); + } + rcu_read_unlock(); +} + + +void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie) +{ + struct tcp_metrics_block *tm; + + rcu_read_lock(); + tm = tcp_get_metrics(sk, __sk_dst_get(sk), true); + if (tm) { + struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; + + write_seqlock_bh(&fastopen_seqlock); + tfom->mss = mss; + if (cookie->len > 0) + tfom->cookie = *cookie; + write_sequnlock_bh(&fastopen_seqlock); + } + rcu_read_unlock(); +} + static unsigned long tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { -- cgit v1.2.3-70-g09d2 From 783237e8daf13481ee234997cbbbb823872ac388 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:07 +0000 Subject: net-tcp: Fast Open client - sending SYN-data This patch implements sending SYN-data in tcp_connect(). The data is from tcp_sendmsg() with flag MSG_FASTOPEN (implemented in a later patch). The length of the cookie in tcp_fastopen_req, init'd to 0, controls the type of the SYN. If the cookie is not cached (len==0), the host sends data-less SYN with Fast Open cookie request option to solicit a cookie from the remote. If cookie is not available (len > 0), the host sends a SYN-data with Fast Open cookie option. If cookie length is negative, the SYN will not include any Fast Open option (for fall back operations). To deal with middleboxes that may drop SYN with data or experimental TCP option, the SYN-data is only sent once. SYN retransmits do not include data or Fast Open options. The connection will fall back to regular TCP handshake. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/snmp.h | 1 + include/linux/tcp.h | 6 ++- include/net/tcp.h | 9 ++++ net/ipv4/af_inet.c | 10 ++++- net/ipv4/proc.c | 1 + net/ipv4/tcp_output.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 130 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index e5fcbd079e4..00bc189cb39 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -238,6 +238,7 @@ enum LINUX_MIB_TCPOFOMERGE, /* TCPOFOMerge */ LINUX_MIB_TCPCHALLENGEACK, /* TCPChallengeACK */ LINUX_MIB_TCPSYNCHALLENGE, /* TCPSYNChallenge */ + LINUX_MIB_TCPFASTOPENACTIVE, /* TCPFastOpenActive */ __LINUX_MIB_MAX }; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 12948f54383..1edf96afab4 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -386,7 +386,8 @@ struct tcp_sock { unused : 1; u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ - early_retrans_delayed:1; /* Delayed ER timer installed */ + early_retrans_delayed:1, /* Delayed ER timer installed */ + syn_fastopen:1; /* SYN includes Fast Open option */ /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ @@ -500,6 +501,9 @@ struct tcp_sock { struct tcp_md5sig_info __rcu *md5sig_info; #endif +/* TCP fastopen related information */ + struct tcp_fastopen_request *fastopen_req; + /* When the cookie options are generated and exchanged, then this * object holds a reference to them (cookie_values->kref). Also * contains related tcp_cookie_transactions fields. diff --git a/include/net/tcp.h b/include/net/tcp.h index e601da19736..867557b4244 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1289,6 +1289,15 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key); +struct tcp_fastopen_request { + /* Fast Open cookie. Size 0 means a cookie request */ + struct tcp_fastopen_cookie cookie; + struct msghdr *data; /* data in MSG_FASTOPEN */ + u16 copied; /* queued in tcp_connect() */ +}; + +void tcp_free_fastopen_req(struct tcp_sock *tp); + /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 07a02f6e969..edc414625be 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -556,11 +556,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, } EXPORT_SYMBOL(inet_dgram_connect); -static long inet_wait_for_connect(struct sock *sk, long timeo) +static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) { DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + sk->sk_write_pending += writebias; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -576,6 +577,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } finish_wait(sk_sleep(sk), &wait); + sk->sk_write_pending -= writebias; return timeo; } @@ -634,8 +636,12 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + int writebias = (sk->sk_protocol == IPPROTO_TCP) && + tcp_sk(sk)->fastopen_req && + tcp_sk(sk)->fastopen_req->data ? 1 : 0; + /* Error code is set above */ - if (!timeo || !inet_wait_for_connect(sk, timeo)) + if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) goto out; err = sock_intr_errno(timeo); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2a5240b2ea6..957acd12250 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -262,6 +262,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPOFOMerge", LINUX_MIB_TCPOFOMERGE), SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK), SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE), + SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4849be76ccd..88693281da4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -596,6 +596,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? tcp_cookie_size_check(cvp->cookie_desired) : 0; + struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); @@ -636,6 +637,16 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, remaining -= TCPOLEN_SACKPERM_ALIGNED; } + if (fastopen && fastopen->cookie.len >= 0) { + u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len; + need = (need + 3) & ~3U; /* Align to 32 bits */ + if (remaining >= need) { + opts->options |= OPTION_FAST_OPEN_COOKIE; + opts->fastopen_cookie = &fastopen->cookie; + remaining -= need; + tp->syn_fastopen = 1; + } + } /* Note that timestamps are required by the specification. * * Odd numbers of bytes are prohibited by the specification, ensuring @@ -2824,6 +2835,96 @@ void tcp_connect_init(struct sock *sk) tcp_clear_retrans(tp); } +static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + tcb->end_seq += skb->len; + skb_header_release(skb); + __tcp_add_write_queue_tail(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); + tp->write_seq = tcb->end_seq; + tp->packets_out += tcp_skb_pcount(skb); +} + +/* Build and send a SYN with data and (cached) Fast Open cookie. However, + * queue a data-only packet after the regular SYN, such that regular SYNs + * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges + * only the SYN sequence, the data are retransmitted in the first ACK. + * If cookie is not cached or other error occurs, falls back to send a + * regular SYN with Fast Open cookie request option. + */ +static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_fastopen_request *fo = tp->fastopen_req; + int space, i, err = 0, iovlen = fo->data->msg_iovlen; + struct sk_buff *syn_data = NULL, *data; + + tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie); + if (fo->cookie.len <= 0) + goto fallback; + + /* MSS for SYN-data is based on cached MSS and bounded by PMTU and + * user-MSS. Reserve maximum option space for middleboxes that add + * private TCP options. The cost is reduced data space in SYN :( + */ + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp) + tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; + space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - + MAX_TCP_OPTION_SPACE; + + syn_data = skb_copy_expand(syn, skb_headroom(syn), space, + sk->sk_allocation); + if (syn_data == NULL) + goto fallback; + + for (i = 0; i < iovlen && syn_data->len < space; ++i) { + struct iovec *iov = &fo->data->msg_iov[i]; + unsigned char __user *from = iov->iov_base; + int len = iov->iov_len; + + if (syn_data->len + len > space) + len = space - syn_data->len; + else if (i + 1 == iovlen) + /* No more data pending in inet_wait_for_connect() */ + fo->data = NULL; + + if (skb_add_data(syn_data, from, len)) + goto fallback; + } + + /* Queue a data-only packet after the regular SYN for retransmission */ + data = pskb_copy(syn_data, sk->sk_allocation); + if (data == NULL) + goto fallback; + TCP_SKB_CB(data)->seq++; + TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; + TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); + tcp_connect_queue_skb(sk, data); + fo->copied = data->len; + + if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + goto done; + } + syn_data = NULL; + +fallback: + /* Send a regular SYN with Fast Open cookie request option */ + if (fo->cookie.len > 0) + fo->cookie.len = 0; + err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); + if (err) + tp->syn_fastopen = 0; + kfree_skb(syn_data); +done: + fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ + return err; +} + /* Build a SYN and send it off. */ int tcp_connect(struct sock *sk) { @@ -2841,17 +2942,13 @@ int tcp_connect(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); + tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; + tcp_connect_queue_skb(sk, buff); TCP_ECN_send_syn(sk, buff); - /* Send it off. */ - TCP_SKB_CB(buff)->when = tcp_time_stamp; - tp->retrans_stamp = TCP_SKB_CB(buff)->when; - skb_header_release(buff); - __tcp_add_write_queue_tail(sk, buff); - sk->sk_wmem_queued += buff->truesize; - sk_mem_charge(sk, buff->truesize); - tp->packets_out += tcp_skb_pcount(buff); - err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); + /* Send off SYN; include data in Fast Open. */ + err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : + tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); if (err == -ECONNREFUSED) return err; -- cgit v1.2.3-70-g09d2 From cf60af03ca4e71134206809ea892e49b92a88896 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:09 +0000 Subject: net-tcp: Fast Open client - sendmsg(MSG_FASTOPEN) sendmsg() (or sendto()) with MSG_FASTOPEN is a combo of connect(2) and write(2). The application should replace connect() with it to send data in the opening SYN packet. For blocking socket, sendmsg() blocks until all the data are buffered locally and the handshake is completed like connect() call. It returns similar errno like connect() if the TCP handshake fails. For non-blocking socket, it returns the number of bytes queued (and transmitted in the SYN-data packet) if cookie is available. If cookie is not available, it transmits a data-less SYN packet with Fast Open cookie request option and returns -EINPROGRESS like connect(). Using MSG_FASTOPEN on connecting or connected socket will result in simlar errno like repeating connect() calls. Therefore the application should only use this flag on new sockets. The buffer size of sendmsg() is independent of the MSS of the connection. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 11 ++++++ include/linux/socket.h | 1 + include/net/inet_common.h | 6 ++-- include/net/tcp.h | 3 ++ net/ipv4/af_inet.c | 19 ++++++++--- net/ipv4/tcp.c | 61 +++++++++++++++++++++++++++++++--- net/ipv4/tcp_ipv4.c | 3 ++ 7 files changed, 92 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index e1e021594cf..03964e08818 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -468,6 +468,17 @@ tcp_syncookies - BOOLEAN SYN flood warnings in logs not being really flooded, your server is seriously misconfigured. +tcp_fastopen - INTEGER + Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data + in the opening SYN packet. To use this feature, the client application + must not use connect(). Instead, it should use sendmsg() or sendto() + with MSG_FASTOPEN flag which performs a TCP handshake automatically. + + The values (bitmap) are: + 1: Enables sending data in the opening SYN on the client + + Default: 0 + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 255. Default value diff --git a/include/linux/socket.h b/include/linux/socket.h index 25d6322fb63..ba7b2e817cf 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -268,6 +268,7 @@ struct ucred { #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ #define MSG_EOF MSG_FIN +#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file descriptor received through SCM_RIGHTS */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 22fac9892b1..234008782c8 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -14,9 +14,11 @@ struct sockaddr; struct socket; extern int inet_release(struct socket *sock); -extern int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr, +extern int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); -extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, +extern int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); +extern int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); extern int inet_accept(struct socket *sock, struct socket *newsock, int flags); extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock, diff --git a/include/net/tcp.h b/include/net/tcp.h index 867557b4244..c0258100d70 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -212,6 +212,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */ #define TCP_INIT_CWND 10 +/* Bit Flags for sysctl_tcp_fastopen */ +#define TFO_CLIENT_ENABLE 1 + extern struct inet_timewait_death_row tcp_death_row; /* sysctl variables for tcp */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index edc414625be..fe4582ca969 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -585,8 +585,8 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) * Connect to a remote host. There is regrettably still a little * TCP 'magic' in here. */ -int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) +int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) { struct sock *sk = sock->sk; int err; @@ -595,8 +595,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; - lock_sock(sk); - if (uaddr->sa_family == AF_UNSPEC) { err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; @@ -663,7 +661,6 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; err = 0; out: - release_sock(sk); return err; sock_error: @@ -673,6 +670,18 @@ sock_error: sock->state = SS_DISCONNECTING; goto out; } +EXPORT_SYMBOL(__inet_stream_connect); + +int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + int err; + + lock_sock(sock->sk); + err = __inet_stream_connect(sock, uaddr, addr_len, flags); + release_sock(sock->sk); + return err; +} EXPORT_SYMBOL(inet_stream_connect); /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4252cd8f39f..581ecf02c6b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -270,6 +270,7 @@ #include #include +#include #include #include #include @@ -982,26 +983,67 @@ static inline int select_size(const struct sock *sk, bool sg) return tmp; } +void tcp_free_fastopen_req(struct tcp_sock *tp) +{ + if (tp->fastopen_req != NULL) { + kfree(tp->fastopen_req); + tp->fastopen_req = NULL; + } +} + +static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) +{ + struct tcp_sock *tp = tcp_sk(sk); + int err, flags; + + if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) + return -EOPNOTSUPP; + if (tp->fastopen_req != NULL) + return -EALREADY; /* Another Fast Open is in progress */ + + tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), + sk->sk_allocation); + if (unlikely(tp->fastopen_req == NULL)) + return -ENOBUFS; + tp->fastopen_req->data = msg; + + flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; + err = __inet_stream_connect(sk->sk_socket, msg->msg_name, + msg->msg_namelen, flags); + *size = tp->fastopen_req->copied; + tcp_free_fastopen_req(tp); + return err; +} + int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size) { struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags, err, copied; - int mss_now = 0, size_goal; + int iovlen, flags, err, copied = 0; + int mss_now = 0, size_goal, copied_syn = 0, offset = 0; bool sg; long timeo; lock_sock(sk); flags = msg->msg_flags; + if (flags & MSG_FASTOPEN) { + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); + if (err == -EINPROGRESS && copied_syn > 0) + goto out; + else if (err) + goto out_err; + offset = copied_syn; + } + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); /* Wait for a connection to finish. */ if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) - goto out_err; + goto do_error; if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { @@ -1037,6 +1079,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, unsigned char __user *from = iov->iov_base; iov++; + if (unlikely(offset > 0)) { /* Skip bytes copied in SYN */ + if (offset >= seglen) { + offset -= seglen; + continue; + } + seglen -= offset; + from += offset; + offset = 0; + } while (seglen > 0) { int copy = 0; @@ -1199,7 +1250,7 @@ out: if (copied && likely(!tp->repair)) tcp_push(sk, flags, mss_now, tp->nonagle); release_sock(sk); - return copied; + return copied + copied_syn; do_fault: if (!skb->len) { @@ -1212,7 +1263,7 @@ do_fault: } do_error: - if (copied) + if (copied + copied_syn) goto out; out_err: err = sk_stream_error(sk, flags, err); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 01aa77a9702..1d8b75a5898 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1952,6 +1952,9 @@ void tcp_v4_destroy_sock(struct sock *sk) tp->cookie_values = NULL; } + /* If socket is aborted during connect operation */ + tcp_free_fastopen_req(tp); + sk_sockets_allocated_dec(sk); sock_release_memcg(sk); } -- cgit v1.2.3-70-g09d2 From aab4874355679c70f93993cf3b3fd74643b9ac33 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:10 +0000 Subject: net-tcp: Fast Open client - detecting SYN-data drops On paths with firewalls dropping SYN with data or experimental TCP options, Fast Open connections will have experience SYN timeout and bad performance. The solution is to track such incidents in the cookie cache and disables Fast Open temporarily. Since only the original SYN includes data and/or Fast Open option, the SYN-ACK has some tell-tale sign (tcp_rcv_fastopen_synack()) to detect such drops. If a path has recurring Fast Open SYN drops, Fast Open is disabled for 2^(recurring_losses) minutes starting from four minutes up to roughly one and half day. sendmsg with MSG_FASTOPEN flag will succeed but it behaves as connect() then write(). Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++-- net/ipv4/tcp_input.c | 10 +++++++++- net/ipv4/tcp_metrics.c | 16 +++++++++++++--- net/ipv4/tcp_output.c | 13 +++++++++++-- 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index c0258100d70..e07878d246a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -409,9 +409,11 @@ extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, extern bool tcp_remember_stamp(struct sock *sk); extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie); + struct tcp_fastopen_cookie *cookie, + int *syn_loss, unsigned long *last_syn_loss); extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie); + struct tcp_fastopen_cookie *cookie, + bool syn_lost); extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); extern void tcp_disable_fack(struct tcp_sock *tp); extern void tcp_close(struct sock *sk, long timeout); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 38b6a811edf..c49a4fc175b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5652,6 +5652,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *data = tcp_write_queue_head(sk); u16 mss = tp->rx_opt.mss_clamp; + bool syn_drop; if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; @@ -5664,7 +5665,14 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, mss = opt.mss_clamp; } - tcp_fastopen_cache_set(sk, mss, cookie); + /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably + * the remote receives only the retransmitted (regular) SYNs: either + * the original SYN-data or the corresponding SYN-ACK is lost. + */ + syn_drop = (cookie->len <= 0 && data && + inet_csk(sk)->icsk_retransmits); + + tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); if (data) { /* Retransmit unacked data in SYN */ tcp_retransmit_skb(sk, data); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d02ff377778..99779ae44f6 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -32,6 +32,8 @@ enum tcp_metric_index { struct tcp_fastopen_metrics { u16 mss; + u16 syn_loss:10; /* Recurring Fast Open SYN losses */ + unsigned long last_syn_loss; /* Last Fast Open SYN loss */ struct tcp_fastopen_cookie cookie; }; @@ -125,6 +127,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) tm->tcpm_ts = 0; tm->tcpm_ts_stamp = 0; tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.syn_loss = 0; tm->tcpm_fastopen.cookie.len = 0; } @@ -644,7 +647,8 @@ bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) static DEFINE_SEQLOCK(fastopen_seqlock); void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie) + struct tcp_fastopen_cookie *cookie, + int *syn_loss, unsigned long *last_syn_loss) { struct tcp_metrics_block *tm; @@ -659,14 +663,15 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, if (tfom->mss) *mss = tfom->mss; *cookie = tfom->cookie; + *syn_loss = tfom->syn_loss; + *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; } while (read_seqretry(&fastopen_seqlock, seq)); } rcu_read_unlock(); } - void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie) + struct tcp_fastopen_cookie *cookie, bool syn_lost) { struct tcp_metrics_block *tm; @@ -679,6 +684,11 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, tfom->mss = mss; if (cookie->len > 0) tfom->cookie = *cookie; + if (syn_lost) { + ++tfom->syn_loss; + tfom->last_syn_loss = jiffies; + } else + tfom->syn_loss = 0; write_sequnlock_bh(&fastopen_seqlock); } rcu_read_unlock(); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 88693281da4..c5cfd5ec318 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2860,10 +2860,19 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int space, i, err = 0, iovlen = fo->data->msg_iovlen; + int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; struct sk_buff *syn_data = NULL, *data; + unsigned long last_syn_loss = 0; + + tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, + &syn_loss, &last_syn_loss); + /* Recurring FO SYN losses: revert to regular handshake temporarily */ + if (syn_loss > 1 && + time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { + fo->cookie.len = -1; + goto fallback; + } - tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie); if (fo->cookie.len <= 0) goto fallback; -- cgit v1.2.3-70-g09d2 From 67da22d23fa6f3324e03bcd0580b914b2e4afbf3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jul 2012 06:43:11 +0000 Subject: net-tcp: Fast Open client - cookie-less mode In trusted networks, e.g., intranet, data-center, the client does not need to use Fast Open cookie to mitigate DoS attacks. In cookie-less mode, sendmsg() with MSG_FASTOPEN flag will send SYN-data regardless of cookie availability. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 2 ++ include/linux/tcp.h | 1 + include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 8 ++++++-- net/ipv4/tcp_output.c | 6 +++++- 5 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 03964e08818..5f3ef7f7fce 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -476,6 +476,8 @@ tcp_fastopen - INTEGER The values (bitmap) are: 1: Enables sending data in the opening SYN on the client + 5: Enables sending data in the opening SYN on the client regardless + of cookie availability. Default: 0 diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1edf96afab4..9febfb685c3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -387,6 +387,7 @@ struct tcp_sock { u8 repair_queue; u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ early_retrans_delayed:1, /* Delayed ER timer installed */ + syn_data:1, /* SYN includes data */ syn_fastopen:1; /* SYN includes Fast Open option */ /* RTT measurement */ diff --git a/include/net/tcp.h b/include/net/tcp.h index e07878d246a..bc7c134ec05 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -214,6 +214,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* Bit Flags for sysctl_tcp_fastopen */ #define TFO_CLIENT_ENABLE 1 +#define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ extern struct inet_timewait_death_row tcp_death_row; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c49a4fc175b..e67d685a6c0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5650,7 +5650,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, struct tcp_fastopen_cookie *cookie) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *data = tcp_write_queue_head(sk); + struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; u16 mss = tp->rx_opt.mss_clamp; bool syn_drop; @@ -5665,6 +5665,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, mss = opt.mss_clamp; } + if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */ + cookie->len = -1; + /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably * the remote receives only the retransmitted (regular) SYNs: either * the original SYN-data or the corresponding SYN-ACK is lost. @@ -5816,7 +5819,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_finish_connect(sk, skb); - if (tp->syn_fastopen && tcp_rcv_fastopen_synack(sk, skb, &foc)) + if ((tp->syn_fastopen || tp->syn_data) && + tcp_rcv_fastopen_synack(sk, skb, &foc)) return -1; if (sk->sk_write_pending || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c5cfd5ec318..27a32acfdb6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2864,6 +2864,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) struct sk_buff *syn_data = NULL, *data; unsigned long last_syn_loss = 0; + tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, &syn_loss, &last_syn_loss); /* Recurring FO SYN losses: revert to regular handshake temporarily */ @@ -2873,7 +2874,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto fallback; } - if (fo->cookie.len <= 0) + if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) + fo->cookie.len = -1; + else if (fo->cookie.len <= 0) goto fallback; /* MSS for SYN-data is based on cached MSS and bounded by PMTU and @@ -2916,6 +2919,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) fo->copied = data->len; if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { + tp->syn_data = (fo->copied > 0); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); goto done; } -- cgit v1.2.3-70-g09d2 From 5815d5e7aae3cc9c5e85af83094d4d6498c3f4fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Jul 2012 23:02:34 +0000 Subject: tcp: use hash_32() in tcp_metrics Fix a missing roundup_pow_of_two(), since tcpmhash_entries is not guaranteed to be a power of two. Uses hash_32() instead of custom hash. tcpmhash_entries should be an unsigned int. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- net/ipv4/tcp_metrics.c | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d909c7fc3da..0ffb8e31f3c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,7 +40,7 @@ struct netns_ipv4 { struct sock **icmp_sk; struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; - unsigned int tcp_metrics_hash_mask; + unsigned int tcp_metrics_hash_log; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 99779ae44f6..992f1bff4fc 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -228,10 +229,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = dev_net(dst->dev); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { @@ -265,10 +264,8 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = twsk_net(tw); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; tm = rcu_dereference(tm->tcpm_next)) { @@ -302,10 +299,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, return NULL; } - hash ^= (hash >> 24) ^ (hash >> 16) ^ (hash >> 8); - net = dev_net(dst->dev); - hash &= net->ipv4.tcp_metrics_hash_mask; + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); tm = __tcp_get_metrics(&addr, net, hash); reclaim = false; @@ -694,7 +689,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } -static unsigned long tcpmhash_entries; +static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { ssize_t ret; @@ -702,7 +697,7 @@ static int __init set_tcpmhash_entries(char *str) if (!str) return 0; - ret = kstrtoul(str, 0, &tcpmhash_entries); + ret = kstrtouint(str, 0, &tcpmhash_entries); if (ret) return 0; @@ -712,7 +707,8 @@ __setup("tcpmhash_entries=", set_tcpmhash_entries); static int __net_init tcp_net_metrics_init(struct net *net) { - int slots, size; + size_t size; + unsigned int slots; slots = tcpmhash_entries; if (!slots) { @@ -722,14 +718,13 @@ static int __net_init tcp_net_metrics_init(struct net *net) slots = 8 * 1024; } - size = slots * sizeof(struct tcpm_hash_bucket); + net->ipv4.tcp_metrics_hash_log = order_base_2(slots); + size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log; net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL); if (!net->ipv4.tcp_metrics_hash) return -ENOMEM; - net->ipv4.tcp_metrics_hash_mask = (slots - 1); - return 0; } -- cgit v1.2.3-70-g09d2 From 6f458dfb409272082c9bfa412f77ff2fc21c626f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jul 2012 05:45:50 +0000 Subject: tcp: improve latencies of timer triggered events Modern TCP stack highly depends on tcp_write_timer() having a small latency, but current implementation doesn't exactly meet the expectations. When a timer fires but finds the socket is owned by the user, it rearms itself for an additional delay hoping next run will be more successful. tcp_write_timer() for example uses a 50ms delay for next try, and it defeats many attempts to get predictable TCP behavior in term of latencies. Use the recently introduced tcp_release_cb(), so that the user owning the socket will call various handlers right before socket release. This will permit us to post a followup patch to address the tcp_tso_should_defer() syndrome (some deferred packets have to wait RTO timer to be transmitted, while cwnd should allow us to send them sooner) Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Nandita Dukkipati Cc: H.K. Jerry Chu Cc: John Heffner Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++- include/net/tcp.h | 2 ++ net/ipv4/tcp_output.c | 46 ++++++++++++++++++++------------- net/ipv4/tcp_timer.c | 70 +++++++++++++++++++++++++++------------------------ 4 files changed, 71 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9febfb685c3..2761856987b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -515,7 +515,9 @@ struct tcp_sock { enum tsq_flags { TSQ_THROTTLED, TSQ_QUEUED, - TSQ_OWNED, /* tcp_tasklet_func() found socket was locked */ + TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ + TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ + TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index bc7c134ec05..e19124b84cd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -350,6 +350,8 @@ extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern void tcp_release_cb(struct sock *sk); +extern void tcp_write_timer_handler(struct sock *sk); +extern void tcp_delack_timer_handler(struct sock *sk); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 27a32acfdb6..950aebfd996 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -837,6 +837,13 @@ struct tsq_tasklet { }; static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet); +static void tcp_tsq_handler(struct sock *sk) +{ + if ((1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) + tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC); +} /* * One tasklest per cpu tries to send more skbs. * We run in tasklet context but need to disable irqs when @@ -864,16 +871,10 @@ static void tcp_tasklet_func(unsigned long data) bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | - TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, - tcp_current_mss(sk), - 0, 0, - GFP_ATOMIC); + tcp_tsq_handler(sk); } else { /* defer the work to tcp_release_cb() */ - set_bit(TSQ_OWNED, &tp->tsq_flags); + set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); } bh_unlock_sock(sk); @@ -882,6 +883,9 @@ static void tcp_tasklet_func(unsigned long data) } } +#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ + (1UL << TCP_WRITE_TIMER_DEFERRED) | \ + (1UL << TCP_DELACK_TIMER_DEFERRED)) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -892,16 +896,24 @@ static void tcp_tasklet_func(unsigned long data) void tcp_release_cb(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + unsigned long flags, nflags; - if (test_and_clear_bit(TSQ_OWNED, &tp->tsq_flags)) { - if ((1 << sk->sk_state) & - (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | - TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, - tcp_current_mss(sk), - 0, 0, - GFP_ATOMIC); - } + /* perform an atomic operation only if at least one flag is set */ + do { + flags = tp->tsq_flags; + if (!(flags & TCP_DEFERRED_ALL)) + return; + nflags = flags & ~TCP_DEFERRED_ALL; + } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); + + if (flags & (1UL << TCP_TSQ_DEFERRED)) + tcp_tsq_handler(sk); + + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) + tcp_write_timer_handler(sk); + + if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) + tcp_delack_timer_handler(sk); } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e911e6c523e..6df36ad55a3 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -32,17 +32,6 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; int sysctl_tcp_thin_linear_timeouts __read_mostly; -static void tcp_write_timer(unsigned long); -static void tcp_delack_timer(unsigned long); -static void tcp_keepalive_timer (unsigned long data); - -void tcp_init_xmit_timers(struct sock *sk) -{ - inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, - &tcp_keepalive_timer); -} -EXPORT_SYMBOL(tcp_init_xmit_timers); - static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -205,21 +194,11 @@ static int tcp_write_timeout(struct sock *sk) return 0; } -static void tcp_delack_timer(unsigned long data) +void tcp_delack_timer_handler(struct sock *sk) { - struct sock *sk = (struct sock *)data; struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later. */ - icsk->icsk_ack.blocked = 1; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); - goto out_unlock; - } - sk_mem_reclaim_partial(sk); if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) @@ -260,7 +239,21 @@ static void tcp_delack_timer(unsigned long data) out: if (sk_under_memory_pressure(sk)) sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_delack_timer_handler(sk); + } else { + inet_csk(sk)->icsk_ack.blocked = 1; + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); + /* deleguate our work to tcp_release_cb() */ + set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + } bh_unlock_sock(sk); sock_put(sk); } @@ -450,19 +443,11 @@ out_reset_timer: out:; } -static void tcp_write_timer(unsigned long data) +void tcp_write_timer_handler(struct sock *sk) { - struct sock *sk = (struct sock *)data; struct inet_connection_sock *icsk = inet_csk(sk); int event; - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - /* Try again later */ - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); - goto out_unlock; - } - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) goto out; @@ -485,7 +470,19 @@ static void tcp_write_timer(unsigned long data) out: sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_write_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + tcp_write_timer_handler(sk); + } else { + /* deleguate our work to tcp_release_cb() */ + set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); + } bh_unlock_sock(sk); sock_put(sk); } @@ -602,3 +599,10 @@ out: bh_unlock_sock(sk); sock_put(sk); } + +void tcp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, + &tcp_keepalive_timer); +} +EXPORT_SYMBOL(tcp_init_xmit_timers); -- cgit v1.2.3-70-g09d2 From d40156aa5ecbd51fed932ed4813df82b56e5ff4d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:47 +0000 Subject: rtnl: allow to specify different num for rx and tx queue count Also cut out unused function parameters and possible err in return value. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 14 ++++++++------ include/net/rtnetlink.h | 10 ++++++---- net/core/rtnetlink.c | 16 ++++++++-------- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 3960b1b2617..f41ddc2d48b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4845,17 +4845,19 @@ static int bond_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -static int bond_get_tx_queues(struct net *net, struct nlattr *tb[]) +static unsigned int bond_get_num_tx_queues(void) { return tx_queues; } static struct rtnl_link_ops bond_link_ops __read_mostly = { - .kind = "bond", - .priv_size = sizeof(struct bonding), - .setup = bond_setup, - .validate = bond_validate, - .get_tx_queues = bond_get_tx_queues, + .kind = "bond", + .priv_size = sizeof(struct bonding), + .setup = bond_setup, + .validate = bond_validate, + .get_num_tx_queues = bond_get_num_tx_queues, + .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number + as for TX queues */ }; /* Create a new bond based on the specified name and bonding parameters. diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bbcfd099343..6b00c4fc429 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -44,8 +44,10 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * @get_xstats_size: Function to calculate required room for dumping device * specific statistics * @fill_xstats: Function to dump device specific statistics - * @get_tx_queues: Function to determine number of transmit queues to create when - * creating a new device. + * @get_num_tx_queues: Function to determine number of transmit queues + * to create when creating a new device. + * @get_num_rx_queues: Function to determine number of receive queues + * to create when creating a new device. */ struct rtnl_link_ops { struct list_head list; @@ -77,8 +79,8 @@ struct rtnl_link_ops { size_t (*get_xstats_size)(const struct net_device *dev); int (*fill_xstats)(struct sk_buff *skb, const struct net_device *dev); - int (*get_tx_queues)(struct net *net, - struct nlattr *tb[]); + unsigned int (*get_num_tx_queues)(void); + unsigned int (*get_num_rx_queues)(void); }; extern int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 045db8ad87c..db5a8ad8a79 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1624,17 +1624,17 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, { int err; struct net_device *dev; - unsigned int num_queues = 1; + unsigned int num_tx_queues = 1; + unsigned int num_rx_queues = 1; - if (ops->get_tx_queues) { - err = ops->get_tx_queues(src_net, tb); - if (err < 0) - goto err; - num_queues = err; - } + if (ops->get_num_tx_queues) + num_tx_queues = ops->get_num_tx_queues(); + if (ops->get_num_rx_queues) + num_rx_queues = ops->get_num_rx_queues(); err = -ENOMEM; - dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues); + dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup, + num_tx_queues, num_rx_queues); if (!dev) goto err; -- cgit v1.2.3-70-g09d2 From df4ab5b3c295050da09153fa9760042e4de3ffff Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 20 Jul 2012 02:28:49 +0000 Subject: net: rename bond_queue_mapping to slave_dev_queue_mapping As this is going to be used not only by bonding. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 +++--- include/net/sch_generic.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f41ddc2d48b..6fae5f3ec7f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -395,8 +395,8 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, skb->dev = slave_dev; BUILD_BUG_ON(sizeof(skb->queue_mapping) != - sizeof(qdisc_skb_cb(skb)->bond_queue_mapping)); - skb->queue_mapping = qdisc_skb_cb(skb)->bond_queue_mapping; + sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); + skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; if (unlikely(netpoll_tx_running(slave_dev))) bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb); @@ -4184,7 +4184,7 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb) /* * Save the original txq to restore before passing to the driver */ - qdisc_skb_cb(skb)->bond_queue_mapping = skb->queue_mapping; + qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; if (unlikely(txq >= dev->real_num_tx_queues)) { do { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9d7d54a00e6..d9611e03241 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -220,7 +220,7 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; - u16 bond_queue_mapping; + u16 slave_dev_queue_mapping; u16 _pad; unsigned char data[20]; }; -- cgit v1.2.3-70-g09d2 From 89aef8921bfbac22f00e04f8450f6e447db13e42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 11:00:09 -0700 Subject: ipv4: Delete routing cache. The ipv4 routing cache is non-deterministic, performance wise, and is subject to reasonably easy to launch denial of service attacks. The routing cache works great for well behaved traffic, and the world was a much friendlier place when the tradeoffs that led to the routing cache's design were considered. What it boils down to is that the performance of the routing cache is a product of the traffic patterns seen by a system rather than being a product of the contents of the routing tables. The former of which is controllable by external entitites. Even for "well behaved" legitimate traffic, high volume sites can see hit rates in the routing cache of only ~%10. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/fib_frontend.c | 5 - net/ipv4/route.c | 940 +----------------------------------------------- 3 files changed, 13 insertions(+), 933 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index ace3cb44251..5dcfeb621e0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -109,7 +109,6 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); extern void rt_cache_flush(struct net *net, int how); -extern void rt_cache_flush_batch(struct net *net); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b83203658ee..f277cf0e632 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1072,11 +1072,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), 0); break; case NETDEV_UNREGISTER_BATCH: - /* The batch unregister is only called on the first - * device in the list of devices being unregistered. - * Therefore we should not pass dev_net(dev) in here. - */ - rt_cache_flush_batch(NULL); break; } return NOTIFY_DONE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d547f6fae20..6d6146d31f2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -133,10 +133,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; -static int rt_chain_length_max __read_mostly = 20; - -static struct delayed_work expires_work; -static unsigned long expires_ljiffies; /* * Interface to generic destination cache. @@ -152,7 +148,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static int rt_garbage_collect(struct dst_ops *ops); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -172,7 +167,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), - .gc = rt_garbage_collect, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, @@ -209,184 +203,30 @@ const __u8 ip_tos2prio[16] = { }; EXPORT_SYMBOL(ip_tos2prio); -/* - * Route cache. - */ - -/* The locking scheme is rather straight forward: - * - * 1) Read-Copy Update protects the buckets of the central route hash. - * 2) Only writers remove entries, and they hold the lock - * as they look at rtable reference counts. - * 3) Only readers acquire references to rtable entries, - * they do so with atomic increments and with the - * lock held. - */ - -struct rt_hash_bucket { - struct rtable __rcu *chain; -}; - -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ - defined(CONFIG_PROVE_LOCKING) -/* - * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks - * The size of this table is a power of two and depends on the number of CPUS. - * (on lockdep we have a quite big spinlock_t, so keep the size down there) - */ -#ifdef CONFIG_LOCKDEP -# define RT_HASH_LOCK_SZ 256 -#else -# if NR_CPUS >= 32 -# define RT_HASH_LOCK_SZ 4096 -# elif NR_CPUS >= 16 -# define RT_HASH_LOCK_SZ 2048 -# elif NR_CPUS >= 8 -# define RT_HASH_LOCK_SZ 1024 -# elif NR_CPUS >= 4 -# define RT_HASH_LOCK_SZ 512 -# else -# define RT_HASH_LOCK_SZ 256 -# endif -#endif - -static spinlock_t *rt_hash_locks; -# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)] - -static __init void rt_hash_lock_init(void) -{ - int i; - - rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, - GFP_KERNEL); - if (!rt_hash_locks) - panic("IP: failed to allocate rt_hash_locks\n"); - - for (i = 0; i < RT_HASH_LOCK_SZ; i++) - spin_lock_init(&rt_hash_locks[i]); -} -#else -# define rt_hash_lock_addr(slot) NULL - -static inline void rt_hash_lock_init(void) -{ -} -#endif - -static struct rt_hash_bucket *rt_hash_table __read_mostly; -static unsigned int rt_hash_mask __read_mostly; -static unsigned int rt_hash_log __read_mostly; - static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, - int genid) -{ - return jhash_3words((__force u32)daddr, (__force u32)saddr, - idx, genid) - & rt_hash_mask; -} - static inline int rt_genid(struct net *net) { return atomic_read(&net->ipv4.rt_genid); } #ifdef CONFIG_PROC_FS -struct rt_cache_iter_state { - struct seq_net_private p; - int bucket; - int genid; -}; - -static struct rtable *rt_cache_get_first(struct seq_file *seq) -{ - struct rt_cache_iter_state *st = seq->private; - struct rtable *r = NULL; - - for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) - continue; - rcu_read_lock_bh(); - r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); - while (r) { - if (dev_net(r->dst.dev) == seq_file_net(seq) && - r->rt_genid == st->genid) - return r; - r = rcu_dereference_bh(r->dst.rt_next); - } - rcu_read_unlock_bh(); - } - return r; -} - -static struct rtable *__rt_cache_get_next(struct seq_file *seq, - struct rtable *r) -{ - struct rt_cache_iter_state *st = seq->private; - - r = rcu_dereference_bh(r->dst.rt_next); - while (!r) { - rcu_read_unlock_bh(); - do { - if (--st->bucket < 0) - return NULL; - } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); - rcu_read_lock_bh(); - r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); - } - return r; -} - -static struct rtable *rt_cache_get_next(struct seq_file *seq, - struct rtable *r) -{ - struct rt_cache_iter_state *st = seq->private; - while ((r = __rt_cache_get_next(seq, r)) != NULL) { - if (dev_net(r->dst.dev) != seq_file_net(seq)) - continue; - if (r->rt_genid == st->genid) - break; - } - return r; -} - -static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos) -{ - struct rtable *r = rt_cache_get_first(seq); - - if (r) - while (pos && (r = rt_cache_get_next(seq, r))) - --pos; - return pos ? NULL : r; -} - static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { - struct rt_cache_iter_state *st = seq->private; if (*pos) - return rt_cache_get_idx(seq, *pos - 1); - st->genid = rt_genid(seq_file_net(seq)); + return NULL; return SEQ_START_TOKEN; } static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct rtable *r; - - if (v == SEQ_START_TOKEN) - r = rt_cache_get_first(seq); - else - r = rt_cache_get_next(seq, v); ++*pos; - return r; + return NULL; } static void rt_cache_seq_stop(struct seq_file *seq, void *v) { - if (v && v != SEQ_START_TOKEN) - rcu_read_unlock_bh(); } static int rt_cache_seq_show(struct seq_file *seq, void *v) @@ -396,24 +236,6 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" "HHUptod\tSpecDst"); - else { - struct rtable *r = v; - int len; - - seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" - "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", - r->dst.dev ? r->dst.dev->name : "*", - (__force u32)r->rt_dst, - (__force u32)r->rt_gateway, - r->rt_flags, atomic_read(&r->dst.__refcnt), - r->dst.__use, 0, (__force u32)r->rt_src, - dst_metric_advmss(&r->dst) + 40, - dst_metric(&r->dst, RTAX_WINDOW), 0, - r->rt_key_tos, - -1, 0, 0, &len); - - seq_printf(seq, "%*s\n", 127 - len, ""); - } return 0; } @@ -426,8 +248,7 @@ static const struct seq_operations rt_cache_seq_ops = { static int rt_cache_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &rt_cache_seq_ops, - sizeof(struct rt_cache_iter_state)); + return seq_open(file, &rt_cache_seq_ops); } static const struct file_operations rt_cache_seq_fops = { @@ -435,7 +256,7 @@ static const struct file_operations rt_cache_seq_fops = { .open = rt_cache_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_net, + .release = seq_release, }; @@ -625,262 +446,11 @@ static inline int ip_rt_proc_init(void) } #endif /* CONFIG_PROC_FS */ -static inline void rt_free(struct rtable *rt) -{ - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - -static inline void rt_drop(struct rtable *rt) -{ - ip_rt_put(rt); - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - -static inline int rt_fast_clean(struct rtable *rth) -{ - /* Kill broadcast/multicast entries very aggresively, if they - collide in hash table with more useful entries */ - return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && - rt_is_input_route(rth) && rth->dst.rt_next; -} - -static inline int rt_valuable(struct rtable *rth) -{ - return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->dst.expires; -} - -static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) -{ - unsigned long age; - int ret = 0; - - if (atomic_read(&rth->dst.__refcnt)) - goto out; - - age = jiffies - rth->dst.lastuse; - if ((age <= tmo1 && !rt_fast_clean(rth)) || - (age <= tmo2 && rt_valuable(rth))) - goto out; - ret = 1; -out: return ret; -} - -/* Bits of score are: - * 31: very valuable - * 30: not quite useless - * 29..0: usage counter - */ -static inline u32 rt_score(struct rtable *rt) -{ - u32 score = jiffies - rt->dst.lastuse; - - score = ~score & ~(3<<30); - - if (rt_valuable(rt)) - score |= (1<<31); - - if (rt_is_output_route(rt) || - !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) - score |= (1<<30); - - return score; -} - -static inline bool rt_caching(const struct net *net) -{ - return net->ipv4.current_rt_cache_rebuild_count <= - net->ipv4.sysctl_rt_cache_rebuild_count; -} - -static inline bool compare_hash_inputs(const struct rtable *rt1, - const struct rtable *rt2) -{ - return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | - ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0); -} - -static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) -{ - return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | - ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | - (rt1->rt_mark ^ rt2->rt_mark) | - (rt1->rt_key_tos ^ rt2->rt_key_tos) | - (rt1->rt_route_iif ^ rt2->rt_route_iif) | - (rt1->rt_oif ^ rt2->rt_oif)) == 0; -} - -static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) -{ - return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); -} - static inline int rt_is_expired(struct rtable *rth) { return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); } -/* - * Perform a full scan of hash table and free all entries. - * Can be called by a softirq or a process. - * In the later case, we want to be reschedule if necessary - */ -static void rt_do_flush(struct net *net, int process_context) -{ - unsigned int i; - struct rtable *rth, *next; - - for (i = 0; i <= rt_hash_mask; i++) { - struct rtable __rcu **pprev; - struct rtable *list; - - if (process_context && need_resched()) - cond_resched(); - rth = rcu_access_pointer(rt_hash_table[i].chain); - if (!rth) - continue; - - spin_lock_bh(rt_hash_lock_addr(i)); - - list = NULL; - pprev = &rt_hash_table[i].chain; - rth = rcu_dereference_protected(*pprev, - lockdep_is_held(rt_hash_lock_addr(i))); - - while (rth) { - next = rcu_dereference_protected(rth->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i))); - - if (!net || - net_eq(dev_net(rth->dst.dev), net)) { - rcu_assign_pointer(*pprev, next); - rcu_assign_pointer(rth->dst.rt_next, list); - list = rth; - } else { - pprev = &rth->dst.rt_next; - } - rth = next; - } - - spin_unlock_bh(rt_hash_lock_addr(i)); - - for (; list; list = next) { - next = rcu_dereference_protected(list->dst.rt_next, 1); - rt_free(list); - } - } -} - -/* - * While freeing expired entries, we compute average chain length - * and standard deviation, using fixed-point arithmetic. - * This to have an estimation of rt_chain_length_max - * rt_chain_length_max = max(elasticity, AVG + 4*SD) - * We use 3 bits for frational part, and 29 (or 61) for magnitude. - */ - -#define FRACT_BITS 3 -#define ONE (1UL << FRACT_BITS) - -/* - * Given a hash chain and an item in this hash chain, - * find if a previous entry has the same hash_inputs - * (but differs on tos, mark or oif) - * Returns 0 if an alias is found. - * Returns ONE if rth has no alias before itself. - */ -static int has_noalias(const struct rtable *head, const struct rtable *rth) -{ - const struct rtable *aux = head; - - while (aux != rth) { - if (compare_hash_inputs(aux, rth)) - return 0; - aux = rcu_dereference_protected(aux->dst.rt_next, 1); - } - return ONE; -} - -static void rt_check_expire(void) -{ - static unsigned int rover; - unsigned int i = rover, goal; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long samples = 0; - unsigned long sum = 0, sum2 = 0; - unsigned long delta; - u64 mult; - - delta = jiffies - expires_ljiffies; - expires_ljiffies = jiffies; - mult = ((u64)delta) << rt_hash_log; - if (ip_rt_gc_timeout > 1) - do_div(mult, ip_rt_gc_timeout); - goal = (unsigned int)mult; - if (goal > rt_hash_mask) - goal = rt_hash_mask + 1; - for (; goal > 0; goal--) { - unsigned long tmo = ip_rt_gc_timeout; - unsigned long length; - - i = (i + 1) & rt_hash_mask; - rthp = &rt_hash_table[i].chain; - - if (need_resched()) - cond_resched(); - - samples++; - - if (rcu_dereference_raw(*rthp) == NULL) - continue; - length = 0; - spin_lock_bh(rt_hash_lock_addr(i)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { - prefetch(rth->dst.rt_next); - if (rt_is_expired(rth) || - rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - - /* We only count entries on a chain with equal - * hash inputs once so that entries for - * different QOS levels, and other non-hash - * input attributes don't unfairly skew the - * length computation - */ - tmo >>= 1; - rthp = &rth->dst.rt_next; - length += has_noalias(rt_hash_table[i].chain, rth); - } - spin_unlock_bh(rt_hash_lock_addr(i)); - sum += length; - sum2 += length*length; - } - if (samples) { - unsigned long avg = sum / samples; - unsigned long sd = int_sqrt(sum2 / samples - avg*avg); - rt_chain_length_max = max_t(unsigned long, - ip_rt_gc_elasticity, - (avg + 4*sd) >> FRACT_BITS); - } - rover = i; -} - -/* - * rt_worker_func() is run in process context. - * we call rt_check_expire() to scan part of the hash table - */ -static void rt_worker_func(struct work_struct *work) -{ - rt_check_expire(); - schedule_delayed_work(&expires_work, ip_rt_gc_interval); -} - /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -902,167 +472,6 @@ static void rt_cache_invalidate(struct net *net) void rt_cache_flush(struct net *net, int delay) { rt_cache_invalidate(net); - if (delay >= 0) - rt_do_flush(net, !in_softirq()); -} - -/* Flush previous cache invalidated entries from the cache */ -void rt_cache_flush_batch(struct net *net) -{ - rt_do_flush(net, !in_softirq()); -} - -static void rt_emergency_hash_rebuild(struct net *net) -{ - net_warn_ratelimited("Route hash chain too long!\n"); - rt_cache_invalidate(net); -} - -/* - Short description of GC goals. - - We want to build algorithm, which will keep routing cache - at some equilibrium point, when number of aged off entries - is kept approximately equal to newly generated ones. - - Current expiration strength is variable "expire". - We try to adjust it dynamically, so that if networking - is idle expires is large enough to keep enough of warm entries, - and when load increases it reduces to limit cache size. - */ - -static int rt_garbage_collect(struct dst_ops *ops) -{ - static unsigned long expire = RT_GC_TIMEOUT; - static unsigned long last_gc; - static int rover; - static int equilibrium; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long now = jiffies; - int goal; - int entries = dst_entries_get_fast(&ipv4_dst_ops); - - /* - * Garbage collection is pretty expensive, - * do not make it too frequently. - */ - - RT_CACHE_STAT_INC(gc_total); - - if (now - last_gc < ip_rt_gc_min_interval && - entries < ip_rt_max_size) { - RT_CACHE_STAT_INC(gc_ignored); - goto out; - } - - entries = dst_entries_get_slow(&ipv4_dst_ops); - /* Calculate number of entries, which we want to expire now. */ - goal = entries - (ip_rt_gc_elasticity << rt_hash_log); - if (goal <= 0) { - if (equilibrium < ipv4_dst_ops.gc_thresh) - equilibrium = ipv4_dst_ops.gc_thresh; - goal = entries - equilibrium; - if (goal > 0) { - equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); - goal = entries - equilibrium; - } - } else { - /* We are in dangerous area. Try to reduce cache really - * aggressively. - */ - goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); - equilibrium = entries - goal; - } - - if (now - last_gc >= ip_rt_gc_min_interval) - last_gc = now; - - if (goal <= 0) { - equilibrium += goal; - goto work_done; - } - - do { - int i, k; - - for (i = rt_hash_mask, k = rover; i >= 0; i--) { - unsigned long tmo = expire; - - k = (k + 1) & rt_hash_mask; - rthp = &rt_hash_table[k].chain; - spin_lock_bh(rt_hash_lock_addr(k)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) { - if (!rt_is_expired(rth) && - !rt_may_expire(rth, tmo, expire)) { - tmo >>= 1; - rthp = &rth->dst.rt_next; - continue; - } - *rthp = rth->dst.rt_next; - rt_free(rth); - goal--; - } - spin_unlock_bh(rt_hash_lock_addr(k)); - if (goal <= 0) - break; - } - rover = k; - - if (goal <= 0) - goto work_done; - - /* Goal is not achieved. We stop process if: - - - if expire reduced to zero. Otherwise, expire is halfed. - - if table is not full. - - if we are called from interrupt. - - jiffies check is just fallback/debug loop breaker. - We will not spin here for long time in any case. - */ - - RT_CACHE_STAT_INC(gc_goal_miss); - - if (expire == 0) - break; - - expire >>= 1; - - if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - } while (!in_softirq() && time_before_eq(jiffies, now)); - - if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) - goto out; - net_warn_ratelimited("dst cache overflow\n"); - RT_CACHE_STAT_INC(gc_dst_overflow); - return 1; - -work_done: - expire += ip_rt_gc_min_interval; - if (expire > ip_rt_gc_timeout || - dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || - dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) - expire = ip_rt_gc_timeout; -out: return 0; -} - -/* - * Returns number of entries in a hash chain that have different hash_inputs - */ -static int slow_chain_length(const struct rtable *head) -{ - int length = 0; - const struct rtable *rth = head; - - while (rth) { - length += has_noalias(head, rth); - rth = rcu_dereference_protected(rth->dst.rt_next, 1); - } - return length >> FRACT_BITS; } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -1086,139 +495,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt, - struct sk_buff *skb, int ifindex) -{ - struct rtable *rth, *cand; - struct rtable __rcu **rthp, **candp; - unsigned long now; - u32 min_score; - int chain_length; - -restart: - chain_length = 0; - min_score = ~(u32)0; - cand = NULL; - candp = NULL; - now = jiffies; - - if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) { - /* - * If we're not caching, just tell the caller we - * were successful and don't touch the route. The - * caller hold the sole reference to the cache entry, and - * it will be released when the caller is done with it. - * If we drop it here, the callers have no way to resolve routes - * when we're not caching. Instead, just point *rp at rt, so - * the caller gets a single use out of the route - * Note that we do rt_free on this new route entry, so that - * once its refcount hits zero, we are still able to reap it - * (Thanks Alexey) - * Note: To avoid expensive rcu stuff for this uncached dst, - * we set DST_NOCACHE so that dst_release() can free dst without - * waiting a grace period. - */ - - rt->dst.flags |= DST_NOCACHE; - goto skip_hashing; - } - - rthp = &rt_hash_table[hash].chain; - - spin_lock_bh(rt_hash_lock_addr(hash)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { - if (rt_is_expired(rth)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - if (compare_keys(rth, rt) && compare_netns(rth, rt)) { - /* Put it first */ - *rthp = rth->dst.rt_next; - /* - * Since lookup is lockfree, the deletion - * must be visible to another weakly ordered CPU before - * the insertion at the start of the hash chain. - */ - rcu_assign_pointer(rth->dst.rt_next, - rt_hash_table[hash].chain); - /* - * Since lookup is lockfree, the update writes - * must be ordered for consistency on SMP. - */ - rcu_assign_pointer(rt_hash_table[hash].chain, rth); - - dst_use(&rth->dst, now); - spin_unlock_bh(rt_hash_lock_addr(hash)); - - rt_drop(rt); - if (skb) - skb_dst_set(skb, &rth->dst); - return rth; - } - - if (!atomic_read(&rth->dst.__refcnt)) { - u32 score = rt_score(rth); - - if (score <= min_score) { - cand = rth; - candp = rthp; - min_score = score; - } - } - - chain_length++; - - rthp = &rth->dst.rt_next; - } - - if (cand) { - /* ip_rt_gc_elasticity used to be average length of chain - * length, when exceeded gc becomes really aggressive. - * - * The second limit is less certain. At the moment it allows - * only 2 entries per bucket. We will see. - */ - if (chain_length > ip_rt_gc_elasticity) { - *candp = cand->dst.rt_next; - rt_free(cand); - } - } else { - if (chain_length > rt_chain_length_max && - slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { - struct net *net = dev_net(rt->dst.dev); - int num = ++net->ipv4.current_rt_cache_rebuild_count; - if (!rt_caching(net)) { - pr_warn("%s: %d rebuilds is over limit, route caching disabled\n", - rt->dst.dev->name, num); - } - rt_emergency_hash_rebuild(net); - spin_unlock_bh(rt_hash_lock_addr(hash)); - - hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, - ifindex, rt_genid(net)); - goto restart; - } - } - - rt->dst.rt_next = rt_hash_table[hash].chain; - - /* - * Since lookup is lockfree, we must make sure - * previous writes to rt are committed to memory - * before making rt visible to other CPUS. - */ - rcu_assign_pointer(rt_hash_table[hash].chain, rt); - - spin_unlock_bh(rt_hash_lock_addr(hash)); - -skip_hashing: - if (skb) - skb_dst_set(skb, &rt->dst); - return rt; -} - /* * Peer allocation may fail only in serious out-of-memory conditions. However * we still can generate some output. @@ -1255,26 +531,6 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) } EXPORT_SYMBOL(__ip_select_ident); -static void rt_del(unsigned int hash, struct rtable *rt) -{ - struct rtable __rcu **rthp; - struct rtable *aux; - - rthp = &rt_hash_table[hash].chain; - spin_lock_bh(rt_hash_lock_addr(hash)); - ip_rt_put(rt); - while ((aux = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) { - if (aux == rt || rt_is_expired(aux)) { - *rthp = aux->dst.rt_next; - rt_free(aux); - continue; - } - rthp = &aux->dst.rt_next; - } - spin_unlock_bh(rt_hash_lock_addr(hash)); -} - static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, int oif, u8 tos, @@ -1518,10 +774,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ret = NULL; } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->dst.expires) { - unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, - rt->rt_oif, - rt_genid(dev_net(dst->dev))); - rt_del(hash, rt); + ip_rt_put(rt); ret = NULL; } } @@ -1969,7 +1222,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm) { return dst_alloc(&ipv4_dst_ops, dev, 1, -1, - DST_HOST | + DST_HOST | DST_NOCACHE | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1978,7 +1231,6 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { - unsigned int hash; struct rtable *rth; struct in_device *in_dev = __in_dev_get_rcu(dev); u32 itag = 0; @@ -2042,9 +1294,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif RT_CACHE_STAT_INC(in_slow_mc); - hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - rth = rt_intern_hash(hash, rth, skb, dev->ifindex); - return IS_ERR(rth) ? PTR_ERR(rth) : 0; + skb_dst_set(skb, &rth->dst); + return 0; e_nobufs: return -ENOBUFS; @@ -2176,7 +1427,6 @@ static int ip_mkroute_input(struct sk_buff *skb, { struct rtable *rth = NULL; int err; - unsigned int hash; #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) @@ -2188,12 +1438,7 @@ static int ip_mkroute_input(struct sk_buff *skb, if (err) return err; - /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl4->flowi4_iif, - rt_genid(dev_net(rth->dst.dev))); - rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); - if (IS_ERR(rth)) - return PTR_ERR(rth); + skb_dst_set(skb, &rth->dst); return 0; } @@ -2217,7 +1462,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned int flags = 0; u32 itag = 0; struct rtable *rth; - unsigned int hash; int err = -EINVAL; struct net *net = dev_net(dev); @@ -2339,11 +1583,8 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); - rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); + skb_dst_set(skb, &rth->dst); err = 0; - if (IS_ERR(rth)) - err = PTR_ERR(rth); goto out; no_route: @@ -2382,46 +1623,10 @@ martian_source_keep_err: int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, bool noref) { - struct rtable *rth; - unsigned int hash; - int iif = dev->ifindex; - struct net *net; int res; - net = dev_net(dev); - rcu_read_lock(); - if (!rt_caching(net)) - goto skip_cache; - - tos &= IPTOS_RT_MASK; - hash = rt_hash(daddr, saddr, iif, rt_genid(net)); - - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->dst.rt_next)) { - if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | - ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | - (rth->rt_route_iif ^ iif) | - (rth->rt_key_tos ^ tos)) == 0 && - rth->rt_mark == skb->mark && - net_eq(dev_net(rth->dst.dev), net) && - !rt_is_expired(rth)) { - if (noref) { - dst_use_noref(&rth->dst, jiffies); - skb_dst_set_noref(skb, &rth->dst); - } else { - dst_use(&rth->dst, jiffies); - skb_dst_set(skb, &rth->dst); - } - RT_CACHE_STAT_INC(in_hit); - rcu_read_unlock(); - return 0; - } - RT_CACHE_STAT_INC(in_hlist_search); - } - -skip_cache: /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2563,10 +1768,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, /* * Major route resolver routine. - * called with rcu_read_lock(); */ -static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) +struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2746,57 +1950,11 @@ static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) make_route: rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, tos, dev_out, flags); - if (!IS_ERR(rth)) { - unsigned int hash; - - hash = rt_hash(orig_daddr, orig_saddr, orig_oif, - rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, orig_oif); - } out: rcu_read_unlock(); return rth; } - -struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) -{ - struct rtable *rth; - unsigned int hash; - - if (!rt_caching(net)) - goto slow_output; - - hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); - - rcu_read_lock_bh(); - for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; - rth = rcu_dereference_bh(rth->dst.rt_next)) { - if (rth->rt_key_dst == flp4->daddr && - rth->rt_key_src == flp4->saddr && - rt_is_output_route(rth) && - rth->rt_oif == flp4->flowi4_oif && - rth->rt_mark == flp4->flowi4_mark && - !((rth->rt_key_tos ^ flp4->flowi4_tos) & - (IPTOS_RT_MASK | RTO_ONLINK)) && - net_eq(dev_net(rth->dst.dev), net) && - !rt_is_expired(rth)) { - dst_use(&rth->dst, jiffies); - RT_CACHE_STAT_INC(out_hit); - rcu_read_unlock_bh(); - if (!flp4->saddr) - flp4->saddr = rth->rt_src; - if (!flp4->daddr) - flp4->daddr = rth->rt_dst; - return rth; - } - RT_CACHE_STAT_INC(out_hlist_search); - } - rcu_read_unlock_bh(); - -slow_output: - return ip_route_output_slow(net, flp4); -} EXPORT_SYMBOL_GPL(__ip_route_output_key); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) @@ -3106,43 +2264,6 @@ errout_free: int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct rtable *rt; - int h, s_h; - int idx, s_idx; - struct net *net; - - net = sock_net(skb->sk); - - s_h = cb->args[0]; - if (s_h < 0) - s_h = 0; - s_idx = idx = cb->args[1]; - for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { - if (!rt_hash_table[h].chain) - continue; - rcu_read_lock_bh(); - for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; - rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { - if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) - continue; - if (rt_is_expired(rt)) - continue; - skb_dst_set_noref(skb, &rt->dst); - if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWROUTE, - 1, NLM_F_MULTI) <= 0) { - skb_dst_drop(skb); - rcu_read_unlock_bh(); - goto done; - } - skb_dst_drop(skb); - } - rcu_read_unlock_bh(); - } - -done: - cb->args[0] = h; - cb->args[1] = idx; return skb->len; } @@ -3376,22 +2497,6 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_IP_ROUTE_CLASSID */ -static __initdata unsigned long rhash_entries; -static int __init set_rhash_entries(char *str) -{ - ssize_t ret; - - if (!str) - return 0; - - ret = kstrtoul(str, 0, &rhash_entries); - if (ret) - return 0; - - return 1; -} -__setup("rhash_entries=", set_rhash_entries); - int __init ip_rt_init(void) { int rc = 0; @@ -3414,31 +2519,12 @@ int __init ip_rt_init(void) if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); - rt_hash_table = (struct rt_hash_bucket *) - alloc_large_system_hash("IP route cache", - sizeof(struct rt_hash_bucket), - rhash_entries, - (totalram_pages >= 128 * 1024) ? - 15 : 17, - 0, - &rt_hash_log, - &rt_hash_mask, - 0, - rhash_entries ? 0 : 512 * 1024); - memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); - rt_hash_lock_init(); - - ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); - ip_rt_max_size = (rt_hash_mask + 1) * 16; + ipv4_dst_ops.gc_thresh = ~0; + ip_rt_max_size = INT_MAX; devinet_init(); ip_fib_init(); - INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); - expires_ljiffies = jiffies; - schedule_delayed_work(&expires_work, - net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (ip_rt_proc_init()) pr_err("Unable to create route proc files\n"); #ifdef CONFIG_XFRM -- cgit v1.2.3-70-g09d2 From 38a424e4657462fe9f8b76f01a0e879abde99ab4 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:02:53 +0000 Subject: ipv4: Kill ip_route_input_noref(). The "noref" argument to ip_route_input_common() is now always ignored because we do not cache routes, and in that case we must always grab a reference to the resulting 'dst'. Signed-off-by: David S. Miller --- include/net/route.h | 16 ++-------------- net/ipv4/arp.c | 2 +- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/ip_input.c | 4 ++-- net/ipv4/route.c | 6 +++--- net/ipv4/xfrm4_input.c | 4 ++-- 6 files changed, 12 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 5dcfeb621e0..5c86c4773b2 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -160,20 +160,8 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 return ip_route_output_key(net, fl4); } -extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin, bool noref); - -static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) -{ - return ip_route_input_common(skb, dst, src, tos, devin, false); -} - -static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin) -{ - return ip_route_input_common(skb, dst, src, tos, devin, true); -} +extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin); extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2e560f0c757..c38293f3816 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { + ip_route_input(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 8d07c973409..7ad88e5e711 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) /* skb dst is stale, drop it, and perform route lookup again */ skb_dst_drop(head); iph = ip_hdr(head); - err = ip_route_input_noref(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + err = ip_route_input(head, iph->daddr, iph->saddr, + iph->tos, head->dev); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b27d4440f52..4ebc6feee25 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -336,8 +336,8 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { - int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + int err = ip_route_input(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6d6146d31f2..55eb4634ed6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1620,8 +1620,8 @@ martian_source_keep_err: goto out; } -int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, bool noref) +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev) { int res; @@ -1664,7 +1664,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_unlock(); return res; } -EXPORT_SYMBOL(ip_route_input_common); +EXPORT_SYMBOL(ip_route_input); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 06814b6216d..58d23a57250 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) if (skb_dst(skb) == NULL) { const struct iphdr *iph = ip_hdr(skb); - if (ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + if (ip_route_input(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) goto drop; } return dst_input(skb); -- cgit v1.2.3-70-g09d2 From 1a00fee4ffb22312a0ac40045ecd6f222b55eb3d Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:02:56 +0000 Subject: ipv4: Remove rt_key_{src,dst,tos} from struct rtable. They are always used in contexts where they can be reconstituted, or where the finally resolved rt->rt_{src,dst} is semantically equivalent. Signed-off-by: David S. Miller --- include/net/route.h | 5 ----- net/ipv4/route.c | 39 +++++++++------------------------------ net/ipv4/xfrm4_policy.c | 3 --- 3 files changed, 9 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 5c86c4773b2..935fa59630b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -44,14 +44,9 @@ struct fib_info; struct rtable { struct dst_entry dst; - /* Lookup key. */ - __be32 rt_key_dst; - __be32 rt_key_src; - int rt_genid; unsigned int rt_flags; __u16 rt_type; - __u8 rt_key_tos; __be32 rt_dst; /* Path destination */ __be32 rt_src; /* Path source */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 55eb4634ed6..c89d690acdf 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1268,12 +1268,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, #endif rth->dst.output = ip_rt_bug; - rth->rt_key_dst = daddr; - rth->rt_key_src = saddr; rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; - rth->rt_key_tos = tos; rth->rt_dst = daddr; rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; @@ -1392,12 +1389,9 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - rth->rt_key_dst = daddr; - rth->rt_key_src = saddr; rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rth->rt_flags = flags; rth->rt_type = res->type; - rth->rt_key_tos = tos; rth->rt_dst = daddr; rth->rt_src = saddr; rth->rt_route_iif = in_dev->dev->ifindex; @@ -1563,12 +1557,9 @@ local_input: rth->dst.tclassid = itag; #endif - rth->rt_key_dst = daddr; - rth->rt_key_src = saddr; rth->rt_genid = rt_genid(net); rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; - rth->rt_key_tos = tos; rth->rt_dst = daddr; rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; @@ -1668,9 +1659,7 @@ EXPORT_SYMBOL(ip_route_input); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, - const struct flowi4 *fl4, - __be32 orig_daddr, __be32 orig_saddr, - int orig_oif, __u8 orig_rtos, + const struct flowi4 *fl4, int orig_oif, struct net_device *dev_out, unsigned int flags) { @@ -1721,12 +1710,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->dst.output = ip_output; - rth->rt_key_dst = orig_daddr; - rth->rt_key_src = orig_saddr; rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_key_tos = orig_rtos; rth->rt_dst = fl4->daddr; rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; @@ -1777,16 +1763,12 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) unsigned int flags = 0; struct fib_result res; struct rtable *rth; - __be32 orig_daddr; - __be32 orig_saddr; int orig_oif; res.tclassid = 0; res.fi = NULL; res.table = NULL; - orig_daddr = fl4->daddr; - orig_saddr = fl4->saddr; orig_oif = fl4->flowi4_oif; fl4->flowi4_iif = net->loopback_dev->ifindex; @@ -1948,8 +1930,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) make_route: - rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, - tos, dev_out, flags); + rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags); out: rcu_read_unlock(); @@ -2014,9 +1995,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or if (new->dev) dev_hold(new->dev); - rt->rt_key_dst = ort->rt_key_dst; - rt->rt_key_src = ort->rt_key_src; - rt->rt_key_tos = ort->rt_key_tos; rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; @@ -2058,7 +2036,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -static int rt_fill_info(struct net *net, +static int rt_fill_info(struct net *net, __be32 src, u8 tos, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { @@ -2077,7 +2055,7 @@ static int rt_fill_info(struct net *net, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = rt->rt_key_tos; + r->rtm_tos = tos; r->rtm_table = RT_TABLE_MAIN; if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) goto nla_put_failure; @@ -2090,9 +2068,9 @@ static int rt_fill_info(struct net *net, if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) goto nla_put_failure; - if (rt->rt_key_src) { + if (src) { r->rtm_src_len = 32; - if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src)) + if (nla_put_be32(skb, RTA_SRC, src)) goto nla_put_failure; } if (rt->dst.dev && @@ -2104,7 +2082,7 @@ static int rt_fill_info(struct net *net, goto nla_put_failure; #endif if (!rt_is_input_route(rt) && - rt->rt_src != rt->rt_key_src) { + rt->rt_src != src) { if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) goto nla_put_failure; } @@ -2248,7 +2226,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + err = rt_fill_info(net, src, rtm->rtm_tos, skb, + NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index fcf7678bc00..2a8d5cfc340 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -79,9 +79,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; - xdst->u.rt.rt_key_dst = fl4->daddr; - xdst->u.rt.rt_key_src = fl4->saddr; - xdst->u.rt.rt_key_tos = fl4->flowi4_tos; xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; -- cgit v1.2.3-70-g09d2 From d6c0a4f609847d6e65658913f9ccbcb1c137cff3 Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:02:59 +0000 Subject: ipv4: Kill 'rt_src' from 'struct rtable' Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/route.c | 34 +++++++++++++++------------------- net/ipv4/xfrm4_policy.c | 1 - 3 files changed, 15 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 935fa59630b..85d1093e42d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -49,7 +49,6 @@ struct rtable { __u16 rt_type; __be32 rt_dst; /* Path destination */ - __be32 rt_src; /* Path source */ int rt_route_iif; int rt_iif; int rt_oif; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c89d690acdf..fc1199dc23e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1272,7 +1272,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; rth->rt_dst = daddr; - rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1393,7 +1392,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_flags = flags; rth->rt_type = res->type; rth->rt_dst = daddr; - rth->rt_src = saddr; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; @@ -1561,7 +1559,6 @@ local_input: rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; rth->rt_dst = daddr; - rth->rt_src = saddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1714,7 +1711,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_flags = flags; rth->rt_type = type; rth->rt_dst = fl4->daddr; - rth->rt_src = fl4->saddr; rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; @@ -2005,7 +2001,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; - rt->rt_src = ort->rt_src; rt->rt_gateway = ort->rt_gateway; rt->fi = ort->fi; if (rt->fi) @@ -2036,7 +2031,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -static int rt_fill_info(struct net *net, __be32 src, u8 tos, +static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { @@ -2055,7 +2050,7 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = tos; + r->rtm_tos = fl4->flowi4_tos; r->rtm_table = RT_TABLE_MAIN; if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN)) goto nla_put_failure; @@ -2082,11 +2077,11 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos, goto nla_put_failure; #endif if (!rt_is_input_route(rt) && - rt->rt_src != src) { - if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src)) + fl4->saddr != src) { + if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_dst != rt->rt_gateway && + if (fl4->daddr != rt->rt_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; @@ -2116,7 +2111,7 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos, if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, - rt->rt_src, rt->rt_dst, + fl4->saddr, fl4->daddr, r, nowait); if (err <= 0) { if (!nowait) { @@ -2151,6 +2146,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; + struct flowi4 fl4; __be32 dst = 0; __be32 src = 0; u32 iif; @@ -2185,6 +2181,13 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = dst; + fl4.saddr = src; + fl4.flowi4_tos = rtm->rtm_tos; + fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; + fl4.flowi4_mark = mark; + if (iif) { struct net_device *dev; @@ -2205,13 +2208,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { - struct flowi4 fl4 = { - .daddr = dst, - .saddr = src, - .flowi4_tos = rtm->rtm_tos, - .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - .flowi4_mark = mark, - }; rt = ip_route_output_key(net, &fl4); err = 0; @@ -2226,7 +2222,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(net, src, rtm->rtm_tos, skb, + err = rt_fill_info(net, src, &fl4, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 2a8d5cfc340..00d49e41511 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -92,7 +92,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; - xdst->u.rt.rt_src = rt->rt_src; xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; -- cgit v1.2.3-70-g09d2 From b48698895de86e07b685f8e4b8db0f1cd5a97e9a Mon Sep 17 00:00:00 2001 From: David Miller Date: Sun, 1 Jul 2012 02:03:01 +0000 Subject: ipv4: Remove 'rt_mark' from 'struct rtable' Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/ipmr.c | 2 +- net/ipv4/route.c | 9 ++------- net/ipv4/xfrm4_policy.c | 1 - 4 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 85d1093e42d..757fe40b6ce 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -52,7 +52,6 @@ struct rtable { int rt_route_iif; int rt_iif; int rt_oif; - __u32 rt_mark; /* Info on neighbour */ __be32 rt_gateway; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5716c6b808d..eee3bf6676f 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1797,7 +1797,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .flowi4_tos = RT_TOS(iph->tos), .flowi4_oif = rt->rt_oif, .flowi4_iif = rt->rt_iif, - .flowi4_mark = rt->rt_mark, + .flowi4_mark = skb->mark, }; struct mr_table *mrt; int err; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fc1199dc23e..264617c98c2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1275,7 +1275,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; - rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; rth->fi = NULL; @@ -1395,7 +1394,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; - rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; rth->fi = NULL; @@ -1562,7 +1560,6 @@ local_input: rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; - rth->rt_mark = skb->mark; rth->rt_pmtu = 0; rth->rt_gateway = daddr; rth->fi = NULL; @@ -1714,7 +1711,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; - rth->rt_mark = fl4->flowi4_mark; rth->rt_pmtu = 0; rth->rt_gateway = fl4->daddr; rth->fi = NULL; @@ -1994,7 +1990,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; - rt->rt_mark = ort->rt_mark; rt->rt_pmtu = ort->rt_pmtu; rt->rt_genid = rt_genid(net); @@ -2091,8 +2086,8 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; - if (rt->rt_mark && - nla_put_be32(skb, RTA_MARK, rt->rt_mark)) + if (fl4->flowi4_mark && + nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; error = rt->dst.error; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 00d49e41511..f73ba8210bd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -82,7 +82,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; - xdst->u.rt.rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); -- cgit v1.2.3-70-g09d2 From f1ce3062c53809d862d8a04e7a0566c3cc4e0bda Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 Jul 2012 10:10:17 -0700 Subject: ipv4: Remove 'rt_dst' from 'struct rtable' Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/route.c | 45 +++++++++------------------------------------ net/ipv4/xfrm4_policy.c | 1 - 3 files changed, 9 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 757fe40b6ce..6d111bceb16 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -48,7 +48,6 @@ struct rtable { unsigned int rt_flags; __u16 rt_type; - __be32 rt_dst; /* Path destination */ int rt_route_iif; int rt_iif; int rt_oif; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 264617c98c2..85d103fee88 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -850,7 +850,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, rt->rt_iif, - &rt->rt_dst, &rt->rt_gateway); + &ip_hdr(skb)->daddr, &rt->rt_gateway); #endif } out_put_peer: @@ -1132,8 +1132,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - - if (rt->rt_gateway != rt->rt_dst && mtu > 576) + if (rt->rt_gateway != 0 && mtu > 576) mtu = 576; } @@ -1271,7 +1270,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; - rth->rt_dst = daddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1390,7 +1388,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rth->rt_flags = flags; rth->rt_type = res->type; - rth->rt_dst = daddr; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; @@ -1556,7 +1553,6 @@ local_input: rth->rt_genid = rt_genid(net); rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; - rth->rt_dst = daddr; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; @@ -1707,7 +1703,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_dst = fl4->daddr; rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; @@ -1995,7 +1990,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; - rt->rt_dst = ort->rt_dst; rt->rt_gateway = ort->rt_gateway; rt->fi = ort->fi; if (rt->fi) @@ -2026,9 +2020,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, - struct sk_buff *skb, u32 pid, u32 seq, int event, - int nowait, unsigned int flags) +static int rt_fill_info(struct net *net, __be32 dst, __be32 src, + struct flowi4 *fl4, struct sk_buff *skb, u32 pid, + u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb_rtable(skb); struct rtmsg *r; @@ -2056,7 +2050,7 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - if (nla_put_be32(skb, RTA_DST, rt->rt_dst)) + if (nla_put_be32(skb, RTA_DST, dst)) goto nla_put_failure; if (src) { r->rtm_src_len = 32; @@ -2100,29 +2094,8 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4, } if (rt_is_input_route(rt)) { -#ifdef CONFIG_IP_MROUTE - __be32 dst = rt->rt_dst; - - if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && - IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { - int err = ipmr_get_route(net, skb, - fl4->saddr, fl4->daddr, - r, nowait); - if (err <= 0) { - if (!nowait) { - if (err == 0) - return 0; - goto nla_put_failure; - } else { - if (err == -EMSGSIZE) - goto nla_put_failure; - error = err; - } - } - } else -#endif - if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) - goto nla_put_failure; + if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) + goto nla_put_failure; } if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) @@ -2217,7 +2190,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(net, src, &fl4, skb, + err = rt_fill_info(net, dst, src, &fl4, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index f73ba8210bd..6074b694f11 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,7 +91,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; - xdst->u.rt.rt_dst = rt->rt_dst; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; -- cgit v1.2.3-70-g09d2 From f8126f1d5136be1ca1a3536d43ad7a710b5620f8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Jul 2012 05:03:45 -0700 Subject: ipv4: Adjust semantics of rt->rt_gateway. In order to allow prefixed routes, we have to adjust how rt_gateway is set and interpreted. The new interpretation is: 1) rt_gateway == 0, destination is on-link, nexthop is iph->daddr 2) rt_gateway != 0, destination requires a nexthop gateway Abstract the fetching of the proper nexthop value using a new inline helper, rt_nexthop(), as suggested by Joe Perches. Signed-off-by: David S. Miller Tested-by: Vijay Subramanian --- include/net/route.h | 7 +++++++ net/ipv4/arp.c | 3 +-- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 5 +++-- net/ipv4/route.c | 17 +++++++++-------- 8 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 6d111bceb16..3c1eeab9749 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -70,6 +70,13 @@ static inline bool rt_is_output_route(const struct rtable *rt) return rt->rt_route_iif == 0; } +static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) +{ + if (rt->rt_gateway) + return rt->rt_gateway; + return daddr; +} + struct ip_rt_acct { __u32 o_bytes; __u32 o_packets; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c38293f3816..a0124eb7dbe 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -475,8 +475,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) return 1; } - paddr = skb_rtable(skb)->rt_gateway; - + paddr = rt_nexthop(skb_rtable(skb), ip_hdr(skb)->daddr); if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) return 0; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c7a4de05ca0..0a290d719bc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -389,7 +389,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; return &rt->dst; @@ -422,7 +422,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_gateway) goto route_err; return &rt->dst; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 42c44b1403c..b062a98574f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -766,7 +766,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); - dst = rt->rt_gateway; + dst = rt_nexthop(rt, old_iph->daddr); } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c528f841ca4..4494015f7e3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2c2c35bace7..99af1f0cc65 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -487,7 +487,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_fifo_errors++; goto tx_error; } - dst = rt->rt_gateway; + dst = rt_nexthop(rt, old_iph->daddr); } rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 2f210c79dc8..cbb6a1a6f6f 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -52,7 +52,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_nat_ipv4_range newrange; const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; - __be32 newsrc; + __be32 newsrc, nh; NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); @@ -70,7 +70,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) mr = par->targinfo; rt = skb_rtable(skb); - newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + nh = rt_nexthop(rt, ip_hdr(skb)->daddr); + newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE); if (!newsrc) { pr_info("%s ate my IP address\n", par->out->name); return NF_DROP; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85d103fee88..d1d57963809 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1085,8 +1085,9 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else - src = inet_select_addr(rt->dst.dev, rt->rt_gateway, - RT_SCOPE_UNIVERSE); + src = inet_select_addr(rt->dst.dev, + rt_nexthop(rt, iph->daddr), + RT_SCOPE_UNIVERSE); rcu_read_unlock(); } memcpy(addr, &src, 4); @@ -1132,7 +1133,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - if (rt->rt_gateway != 0 && mtu > 576) + if (rt->rt_gateway && mtu > 576) mtu = 576; } @@ -1274,7 +1275,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; @@ -1392,7 +1393,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; rth->dst.input = ip_forward; @@ -1557,7 +1558,7 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_pmtu = 0; - rth->rt_gateway = daddr; + rth->rt_gateway = 0; rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1707,7 +1708,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_pmtu = 0; - rth->rt_gateway = fl4->daddr; + rth->rt_gateway = 0; rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2070,7 +2071,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (fl4->daddr != rt->rt_gateway && + if (rt->rt_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; -- cgit v1.2.3-70-g09d2 From f5b0a8743601a4477419171f5046bd07d1c080a0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 19 Jul 2012 12:31:33 -0700 Subject: net: Document dst->obsolete better. Add a big comment explaining how the field works, and use defines instead of magic constants for the values assigned to it. Suggested by Joe Perches. Signed-off-by: David S. Miller --- include/net/dst.h | 14 +++++++++++++- net/core/dst.c | 4 ++-- net/decnet/dn_route.c | 4 ++-- net/ipv4/route.c | 5 +++-- net/ipv6/route.c | 4 ++-- net/sctp/transport.c | 2 +- net/xfrm/xfrm_policy.c | 23 ++++++++++++----------- 7 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 51610468c63..0df661a0c29 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -65,7 +65,19 @@ struct dst_entry { unsigned short pending_confirm; short error; + + /* A non-zero value of dst->obsolete forces by-hand validation + * of the route entry. Positive values are set by the generic + * dst layer to indicate that the entry has been forcefully + * destroyed. + * + * Negative values are used by the implementation layer code to + * force invocation of the dst_ops->check() method. + */ short obsolete; +#define DST_OBSOLETE_NONE 0 +#define DST_OBSOLETE_DEAD 2 +#define DST_OBSOLETE_FORCE_CHK -1 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ #ifdef CONFIG_IP_ROUTE_CLASSID @@ -359,7 +371,7 @@ extern struct dst_entry *dst_destroy(struct dst_entry *dst); static inline void dst_free(struct dst_entry *dst) { - if (dst->obsolete > 1) + if (dst->obsolete > 0) return; if (!atomic_read(&dst->__refcnt)) { dst = dst_destroy(dst); diff --git a/net/core/dst.c b/net/core/dst.c index 07bacff84aa..069d51d2941 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -94,7 +94,7 @@ loop: * But we do not have state "obsoleted, but * referenced by parent", so it is right. */ - if (dst->obsolete > 1) + if (dst->obsolete > 0) continue; ___dst_free(dst); @@ -202,7 +202,7 @@ static void ___dst_free(struct dst_entry *dst) */ if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) dst->input = dst->output = dst_discard; - dst->obsolete = 2; + dst->obsolete = DST_OBSOLETE_DEAD; } void __dst_free(struct dst_entry *dst) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 47de90d8fe9..23cc11dd4e4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1176,7 +1176,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops, dev_out, 1, 0, DST_HOST); + rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST); if (rt == NULL) goto e_nobufs; @@ -1444,7 +1444,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops, out_dev, 0, 0, DST_HOST); + rt = dst_alloc(&dn_dst_ops, out_dev, 0, DST_OBSOLETE_NONE, DST_HOST); if (rt == NULL) goto e_nobufs; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d1d57963809..50d2498c928 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1221,7 +1221,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm) { - return dst_alloc(&ipv4_dst_ops, dev, 1, -1, + return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, DST_HOST | DST_NOCACHE | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); @@ -1969,9 +1969,10 @@ static struct dst_ops ipv4_dst_blackhole_ops = { struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, 0, 0); struct rtable *ort = (struct rtable *) dst_orig; + struct rtable *rt; + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); if (rt) { struct dst_entry *new = &rt->dst; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 84f6564dd37..cf02cb97bbd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -281,7 +281,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, 0, flags); + 0, DST_OBSOLETE_NONE, flags); if (rt) { struct dst_entry *dst = &rt->dst; @@ -985,7 +985,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); + rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); if (rt) { new = &rt->dst; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index a6b7ee9ce28..ec3a12b9b80 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -216,7 +216,7 @@ void sctp_transport_set_owner(struct sctp_transport *transport, void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) { /* If we don't have a fresh route, look one up */ - if (!transport->dst || transport->dst->obsolete > 1) { + if (!transport->dst || transport->dst->obsolete) { dst_release(transport->dst); transport->af_specific->get_dst(transport, &transport->saddr, &transport->fl, sk); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 65bd1ca5151..c5a5165a592 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1350,7 +1350,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, NULL, 0, 0, 0); + xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { struct dst_entry *dst = &xdst->u.dst; @@ -1477,7 +1477,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->xfrm = xfrm[i]; xdst->xfrm_genid = xfrm[i]->genid; - dst1->obsolete = -1; + dst1->obsolete = DST_OBSOLETE_FORCE_CHK; dst1->flags |= DST_HOST; dst1->lastuse = now; @@ -2219,12 +2219,13 @@ EXPORT_SYMBOL(__xfrm_route_forward); static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete - * to "-1" to force all XFRM destinations to get validated by - * dst_ops->check on every use. We do this because when a - * normal route referenced by an XFRM dst is obsoleted we do - * not go looking around for all parent referencing XFRM dsts - * so that we can invalidate them. It is just too much work. - * Instead we make the checks here on every use. For example: + * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to + * get validated by dst_ops->check on every use. We do this + * because when a normal route referenced by an XFRM dst is + * obsoleted we do not go looking around for all parent + * referencing XFRM dsts so that we can invalidate them. It + * is just too much work. Instead we make the checks here on + * every use. For example: * * XFRM dst A --> IPv4 dst X * @@ -2234,9 +2235,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * stale_bundle() check. * * When a policy's bundle is pruned, we dst_free() the XFRM - * dst which causes it's ->obsolete field to be set to a - * positive non-zero integer. If an XFRM dst has been pruned - * like this, we want to force a new route lookup. + * dst which causes it's ->obsolete field to be set to + * DST_OBSOLETE_DEAD. If an XFRM dst has been pruned like + * this, we want to force a new route lookup. */ if (dst->obsolete < 0 && !stale_bundle(dst)) return dst; -- cgit v1.2.3-70-g09d2 From ceb3320610d6f15ff20dd4c042b36473d77de76f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 11:31:28 -0700 Subject: ipv4: Kill routes during PMTU/redirect updates. Mark them obsolete so there will be a re-lookup to fetch the FIB nexthop exception info. Signed-off-by: David S. Miller --- include/net/dst.h | 1 + net/ipv4/route.c | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 0df661a0c29..baf59789006 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -78,6 +78,7 @@ struct dst_entry { #define DST_OBSOLETE_NONE 0 #define DST_OBSOLETE_DEAD 2 #define DST_OBSOLETE_FORCE_CHK -1 +#define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 50d2498c928..d52f7699c2f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -673,7 +673,8 @@ out_unlock: return; } -static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4) +static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, + bool kill_route) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; @@ -728,8 +729,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow update_or_create_fnhe(nh, fl4->daddr, new_gw, 0, 0); } - rt->rt_gateway = new_gw; - rt->rt_flags |= RTCF_REDIRECTED; + if (kill_route) + rt->dst.obsolete = DST_OBSOLETE_KILL; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } neigh_release(n); @@ -760,7 +761,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf rt = (struct rtable *) dst; ip_rt_build_flow_key(&fl4, sk, skb); - __ip_do_redirect(rt, skb, &fl4); + __ip_do_redirect(rt, skb, &fl4, true); } static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) @@ -919,7 +920,7 @@ out: kfree_skb(skb); return 0; } -static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) +static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct fib_result res; @@ -932,8 +933,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } - rt->rt_pmtu = mtu; - dst_set_expires(&rt->dst, ip_rt_mtu_expires); + return mtu; } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -943,7 +943,14 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - __ip_rt_update_pmtu(rt, &fl4, mtu); + mtu = __ip_rt_update_pmtu(rt, &fl4, mtu); + + if (!rt->rt_pmtu) { + dst->obsolete = DST_OBSOLETE_KILL; + } else { + rt->rt_pmtu = mtu; + dst_set_expires(&rt->dst, ip_rt_mtu_expires); + } } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -989,7 +996,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, RT_TOS(iph->tos), protocol, mark, flow_flags); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { - __ip_do_redirect(rt, skb, &fl4); + __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); } } @@ -1004,7 +1011,7 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); rt = __ip_route_output_key(sock_net(sk), &fl4); if (!IS_ERR(rt)) { - __ip_do_redirect(rt, skb, &fl4); + __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); } } @@ -1014,7 +1021,15 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; - if (rt_is_expired(rt)) + /* All IPV4 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + * + * When a PMTU/redirect information update invalidates a + * route, this is indicated by setting obsolete to + * DST_OBSOLETE_KILL. + */ + if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) return NULL; return dst; } @@ -1186,8 +1201,10 @@ restart: dst_set_expires(&rt->dst, diff); } } - if (gw) + if (gw) { + rt->rt_flags |= RTCF_REDIRECTED; rt->rt_gateway = gw; + } fnhe->fnhe_stamp = jiffies; break; } -- cgit v1.2.3-70-g09d2 From f2bb4bedf35d5167a073dcdddf16543f351ef3ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 12:20:47 -0700 Subject: ipv4: Cache output routes in fib_info nexthops. If we have an output route that lacks nexthop exceptions, we can cache it in the FIB info nexthop. Such routes will have DST_HOST cleared because such routes refer to a family of destinations, rather than just one. The sequence of the handling of exceptions during route lookup is adjusted to make the logic work properly. Before we allocate the route, we lookup the exception. Then we know if we will cache this route or not, and therefore whether DST_HOST should be set on the allocated route. Then we use DST_HOST to key off whether we should store the resulting route, during rt_set_nexthop(), in the FIB nexthop cache. With help from Eric Dumazet. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 + net/ipv4/fib_semantics.c | 2 + net/ipv4/route.c | 140 ++++++++++++++++++++++++++++++++--------------- 3 files changed, 101 insertions(+), 43 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2daf096dfc6..fb62c590360 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -46,6 +46,7 @@ struct fib_config { }; struct fib_info; +struct rtable; struct fib_nh_exception { struct fib_nh_exception __rcu *fnhe_next; @@ -80,6 +81,7 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; + struct rtable *nh_rth_output; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2b57d768240..83d0f42b619 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -171,6 +171,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); + if (nexthop_nh->nh_rth_output) + dst_release(&nexthop_nh->nh_rth_output->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d52f7699c2f..8a0260010ea 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1158,8 +1158,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, - struct fib_info *fi) +static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) { if (fi->fib_metrics != (u32 *) dst_default_metrics) { rt->fi = fi; @@ -1168,50 +1167,83 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4, dst_init_metrics(&rt->dst, fi->fib_metrics, true); } -static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr) +static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; struct fib_nh_exception *fnhe; u32 hval; + if (!hash) + return NULL; + hval = fnhe_hashfun(daddr); -restart: for (fnhe = rcu_dereference(hash[hval].chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { - __be32 fnhe_daddr, gw; - unsigned long expires; - unsigned int seq; - u32 pmtu; - - seq = read_seqbegin(&fnhe_seqlock); - fnhe_daddr = fnhe->fnhe_daddr; - gw = fnhe->fnhe_gw; - pmtu = fnhe->fnhe_pmtu; - expires = fnhe->fnhe_expires; - if (read_seqretry(&fnhe_seqlock, seq)) - goto restart; - if (daddr != fnhe_daddr) - continue; - if (pmtu) { - unsigned long diff = expires - jiffies; + if (fnhe->fnhe_daddr == daddr) + return fnhe; + } + return NULL; +} - if (time_before(jiffies, expires)) { - rt->rt_pmtu = pmtu; - dst_set_expires(&rt->dst, diff); - } - } - if (gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = gw; +static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, + __be32 daddr) +{ + __be32 fnhe_daddr, gw; + unsigned long expires; + unsigned int seq; + u32 pmtu; + +restart: + seq = read_seqbegin(&fnhe_seqlock); + fnhe_daddr = fnhe->fnhe_daddr; + gw = fnhe->fnhe_gw; + pmtu = fnhe->fnhe_pmtu; + expires = fnhe->fnhe_expires; + if (read_seqretry(&fnhe_seqlock, seq)) + goto restart; + + if (daddr != fnhe_daddr) + return; + + if (pmtu) { + unsigned long diff = expires - jiffies; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = pmtu; + dst_set_expires(&rt->dst, diff); } - fnhe->fnhe_stamp = jiffies; - break; + } + if (gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = gw; + } + fnhe->fnhe_stamp = jiffies; +} + +static inline void rt_release_rcu(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + dst_release(dst); +} + +static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) +{ + struct rtable *orig, *prev, **p = &nh->nh_rth_output; + + orig = *p; + + prev = cmpxchg(p, orig, rt); + if (prev == orig) { + dst_clone(&rt->dst); + if (orig) + call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); } } -static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, +static void rt_set_nexthop(struct rtable *rt, __be32 daddr, const struct fib_result *res, + struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag) { if (fi) { @@ -1219,12 +1251,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) rt->rt_gateway = nh->nh_gw; - if (unlikely(nh->nh_exceptions)) - rt_bind_exception(rt, nh, fl4->daddr); - rt_init_metrics(rt, fl4, fi); + if (unlikely(fnhe)) + rt_bind_exception(rt, fnhe, daddr); + rt_init_metrics(rt, fi); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + rt->dst.tclassid = nh->nh_tclassid; #endif + if (!(rt->dst.flags & DST_HOST) && + rt_is_output_route(rt)) + rt_cache_route(nh, rt); } #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1236,10 +1271,10 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, } static struct rtable *rt_dst_alloc(struct net_device *dev, - bool nopolicy, bool noxfrm) + bool nopolicy, bool noxfrm, bool will_cache) { return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - DST_HOST | DST_NOCACHE | + (will_cache ? 0 : DST_HOST) | DST_NOCACHE | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1276,7 +1311,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto e_err; } rth = rt_dst_alloc(dev_net(dev)->loopback_dev, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) goto e_nobufs; @@ -1349,6 +1384,7 @@ static int __mkroute_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { + struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; @@ -1395,9 +1431,13 @@ static int __mkroute_input(struct sk_buff *skb, } } + fnhe = NULL; + if (res->fi) + fnhe = find_exception(&FIB_RES_NH(*res), daddr); + rth = rt_dst_alloc(out_dev->dev, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM)); + IN_DEV_CONF_GET(out_dev, NOXFRM), false); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -1416,7 +1456,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); + rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); *result = rth; err = 0; @@ -1558,7 +1598,7 @@ brd_input: local_input: rth = rt_dst_alloc(net->loopback_dev, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) goto e_nobufs; @@ -1672,6 +1712,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, unsigned int flags) { struct fib_info *fi = res->fi; + struct fib_nh_exception *fnhe; struct in_device *in_dev; u16 type = res->type; struct rtable *rth; @@ -1710,9 +1751,22 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } + fnhe = NULL; + if (fi) { + fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + if (!fnhe) { + rth = FIB_RES_NH(*res).nh_rth_output; + if (rth && + rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) { + dst_use(&rth->dst, jiffies); + return rth; + } + } + } rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(in_dev, NOXFRM)); + IN_DEV_CONF_GET(in_dev, NOXFRM), + fi && !fnhe); if (!rth) return ERR_PTR(-ENOBUFS); @@ -1749,7 +1803,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #endif } - rt_set_nexthop(rth, fl4, res, fi, type, 0); + rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) rth->dst.flags |= DST_NOCACHE; -- cgit v1.2.3-70-g09d2 From d2d68ba9fe8b38eb03124b3176a013bb8aa2b5e5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 12:58:50 -0700 Subject: ipv4: Cache input routes in fib_info nexthops. Caching input routes is slightly simpler than output routes, since we don't need to be concerned with nexthop exceptions. (locally destined, and routed packets, never trigger PMTU events or redirects that will be processed by us). However, we have to elide caching for the DIRECTSRC and non-zero itag cases. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_semantics.c | 2 ++ net/ipv4/route.c | 55 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index fb62c590360..e69c3a47153 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -82,6 +82,7 @@ struct fib_nh { __be32 nh_saddr; int nh_saddr_genid; struct rtable *nh_rth_output; + struct rtable *nh_rth_input; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 83d0f42b619..e55171f184f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -173,6 +173,8 @@ static void free_fib_info_rcu(struct rcu_head *head) free_nh_exceptions(nexthop_nh); if (nexthop_nh->nh_rth_output) dst_release(&nexthop_nh->nh_rth_output->dst); + if (nexthop_nh->nh_rth_input) + dst_release(&nexthop_nh->nh_rth_input->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8a0260010ea..97cca8a03d9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1231,6 +1231,9 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { struct rtable *orig, *prev, **p = &nh->nh_rth_output; + if (rt_is_input_route(rt)) + p = &nh->nh_rth_input; + orig = *p; prev = cmpxchg(p, orig, rt); @@ -1241,6 +1244,11 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) } } +static bool rt_cache_valid(struct rtable *rt) +{ + return (rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK); +} + static void rt_set_nexthop(struct rtable *rt, __be32 daddr, const struct fib_result *res, struct fib_nh_exception *fnhe, @@ -1257,8 +1265,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_HOST) && - rt_is_output_route(rt)) + if (!(rt->dst.flags & DST_HOST)) rt_cache_route(nh, rt); } @@ -1384,11 +1391,11 @@ static int __mkroute_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { - struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; unsigned int flags = 0; + bool do_cache; u32 itag; /* get a working reference to the output device */ @@ -1431,13 +1438,21 @@ static int __mkroute_input(struct sk_buff *skb, } } - fnhe = NULL; - if (res->fi) - fnhe = find_exception(&FIB_RES_NH(*res), daddr); + do_cache = false; + if (res->fi) { + if (!(flags & RTCF_DIRECTSRC) && !itag) { + rth = FIB_RES_NH(*res).nh_rth_input; + if (rt_cache_valid(rth)) { + dst_use(&rth->dst, jiffies); + goto out; + } + do_cache = true; + } + } rth = rt_dst_alloc(out_dev->dev, IN_DEV_CONF_GET(in_dev, NOPOLICY), - IN_DEV_CONF_GET(out_dev, NOXFRM), false); + IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache); if (!rth) { err = -ENOBUFS; goto cleanup; @@ -1456,8 +1471,8 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); - + rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); +out: *result = rth; err = 0; cleanup: @@ -1509,6 +1524,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct rtable *rth; int err = -EINVAL; struct net *net = dev_net(dev); + bool do_cache; /* IP on this device is disabled. */ @@ -1522,6 +1538,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) goto martian_source; + res.fi = NULL; if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) goto brd_input; @@ -1597,8 +1614,20 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: + do_cache = false; + if (res.fi) { + if (!(flags & RTCF_DIRECTSRC) && !itag) { + rth = FIB_RES_NH(res).nh_rth_input; + if (rt_cache_valid(rth)) { + dst_use(&rth->dst, jiffies); + goto set_and_out; + } + do_cache = true; + } + } + rth = rt_dst_alloc(net->loopback_dev, - IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); + IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache); if (!rth) goto e_nobufs; @@ -1622,6 +1651,9 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } + if (do_cache) + rt_cache_route(&FIB_RES_NH(res), rth); +set_and_out: skb_dst_set(skb, &rth->dst); err = 0; goto out; @@ -1756,8 +1788,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); if (!fnhe) { rth = FIB_RES_NH(*res).nh_rth_output; - if (rth && - rth->dst.obsolete == DST_OBSOLETE_FORCE_CHK) { + if (rt_cache_valid(rth)) { dst_use(&rth->dst, jiffies); return rth; } -- cgit v1.2.3-70-g09d2 From ba3f7f04ef2b19aace38f855aedd17fe43035d50 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:02:46 -0700 Subject: ipv4: Kill FLOWI_FLAG_RT_NOCACHE and associated code. Signed-off-by: David S. Miller --- include/net/flow.h | 1 - include/net/inet_connection_sock.h | 3 +-- net/dccp/ipv4.c | 2 +- net/ipv4/inet_connection_sock.c | 5 +---- net/ipv4/route.c | 3 --- net/ipv4/tcp_ipv4.c | 4 ++-- 6 files changed, 5 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index ce9cb7656b4..e1dd5082ec7 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -21,7 +21,6 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_CAN_SLEEP 0x02 -#define FLOWI_FLAG_RT_NOCACHE 0x04 __u32 flowic_secid; }; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2cf44b4ed2e..5ee66f517b4 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -250,8 +250,7 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum); extern struct dst_entry* inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache); + const struct request_sock *req); extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk, struct sock *newsk, const struct request_sock *req); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ab4f44c9bb2..25428d0c50c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -508,7 +508,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; struct flowi4 fl4; - dst = inet_csk_route_req(sk, &fl4, req, false); + dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0a290d719bc..db0cf17c00f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -368,8 +368,7 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); struct dst_entry *inet_csk_route_req(struct sock *sk, struct flowi4 *fl4, - const struct request_sock *req, - bool nocache) + const struct request_sock *req) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); @@ -377,8 +376,6 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); int flags = inet_sk_flowi_flags(sk); - if (nocache) - flags |= FLOWI_FLAG_RT_NOCACHE; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97cca8a03d9..7e1c0ed0ef7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1836,9 +1836,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0); - if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE) - rth->dst.flags |= DST_NOCACHE; - return rth; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d8b75a5898..59110caeb07 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -824,7 +824,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; skb = tcp_make_synack(sk, dst, req, rvp); @@ -1378,7 +1378,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle && - (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL && + (dst = inet_csk_route_req(sk, &fl4, req)) != NULL && fl4.daddr == saddr) { if (!tcp_peer_is_proven(req, dst, true)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); -- cgit v1.2.3-70-g09d2 From 4fd551d7bed93af60af61c5a324b8f5dff37953a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:39:44 -0700 Subject: ipv4: Kill rt->rt_oif Never actually used. It was being set on output routes to the original OIF specified in the flow key used for the lookup. Adjust the only user, ipmr_rt_fib_lookup(), for greater correctness of the flowi4_oif and flowi4_iif values, thanks to feedback from Julian Anastasov. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/ipmr.c | 7 +++++-- net/ipv4/route.c | 5 ----- net/ipv4/xfrm4_policy.c | 1 - 4 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 3c1eeab9749..e789a92fd60 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -50,7 +50,6 @@ struct rtable { int rt_route_iif; int rt_iif; - int rt_oif; /* Info on neighbour */ __be32 rt_gateway; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index eee3bf6676f..8eec8f4a053 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1795,8 +1795,11 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, .flowi4_tos = RT_TOS(iph->tos), - .flowi4_oif = rt->rt_oif, - .flowi4_iif = rt->rt_iif, + .flowi4_oif = (rt_is_output_route(rt) ? + skb->dev->ifindex : 0), + .flowi4_iif = (rt_is_output_route(rt) ? + net->loopback_dev->ifindex : + skb->dev->ifindex), .flowi4_mark = skb->mark, }; struct mr_table *mrt; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b8707779b85..a280b6ac8eb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1332,7 +1332,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_type = RTN_MULTICAST; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->rt_oif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -1463,7 +1462,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_type = res->type; rth->rt_route_iif = in_dev->dev->ifindex; rth->rt_iif = in_dev->dev->ifindex; - rth->rt_oif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -1642,7 +1640,6 @@ local_input: rth->rt_type = res.type; rth->rt_route_iif = dev->ifindex; rth->rt_iif = dev->ifindex; - rth->rt_oif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -1808,7 +1805,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_type = type; rth->rt_route_iif = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; - rth->rt_oif = orig_oif; rth->rt_pmtu = 0; rth->rt_gateway = 0; rth->fi = NULL; @@ -2085,7 +2081,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_route_iif = ort->rt_route_iif; rt->rt_iif = ort->rt_iif; - rt->rt_oif = ort->rt_oif; rt->rt_pmtu = ort->rt_pmtu; rt->rt_genid = rt_genid(net); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6074b694f11..3c99b4cdd29 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -81,7 +81,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; - xdst->u.rt.rt_oif = fl4->flowi4_oif; xdst->u.dst.dev = dev; dev_hold(dev); -- cgit v1.2.3-70-g09d2 From 9917e1e8762745191eba5a3bf2040278cbddbee1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:44:26 -0700 Subject: ipv4: Turn rt->rt_route_iif into rt->rt_is_input. That is this value's only use, as a boolean to indicate whether a route is an input route or not. So implement it that way, using a u16 gap present in the struct already. Signed-off-by: David S. Miller --- include/net/route.h | 6 +++--- net/ipv4/route.c | 10 +++++----- net/ipv4/xfrm4_policy.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index e789a92fd60..4bafe0bfe82 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -47,8 +47,8 @@ struct rtable { int rt_genid; unsigned int rt_flags; __u16 rt_type; + __u16 rt_is_input; - int rt_route_iif; int rt_iif; /* Info on neighbour */ @@ -61,12 +61,12 @@ struct rtable { static inline bool rt_is_input_route(const struct rtable *rt) { - return rt->rt_route_iif != 0; + return rt->rt_is_input != 0; } static inline bool rt_is_output_route(const struct rtable *rt) { - return rt->rt_route_iif == 0; + return rt->rt_is_input == 0; } static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a280b6ac8eb..fac4c4acdba 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1330,7 +1330,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; - rth->rt_route_iif = dev->ifindex; + rth->rt_is_input= 1; rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1460,7 +1460,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); rth->rt_flags = flags; rth->rt_type = res->type; - rth->rt_route_iif = in_dev->dev->ifindex; + rth->rt_is_input = 1; rth->rt_iif = in_dev->dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1638,7 +1638,7 @@ local_input: rth->rt_genid = rt_genid(net); rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; - rth->rt_route_iif = dev->ifindex; + rth->rt_is_input = 1; rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1803,7 +1803,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_genid = rt_genid(dev_net(dev_out)); rth->rt_flags = flags; rth->rt_type = type; - rth->rt_route_iif = 0; + rth->rt_is_input = 0; rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -2079,7 +2079,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or if (new->dev) dev_hold(new->dev); - rt->rt_route_iif = ort->rt_route_iif; + rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 3c99b4cdd29..c6281847f16 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -79,7 +79,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, struct rtable *rt = (struct rtable *)xdst->route; const struct flowi4 *fl4 = &fl->u.ip4; - xdst->u.rt.rt_route_iif = fl4->flowi4_iif; xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.dst.dev = dev; @@ -87,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ + xdst->u.rt.rt_is_input = rt->rt_is_input; xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; -- cgit v1.2.3-70-g09d2 From 2860583fe840d972573363dfa190b2149a604534 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2012 14:55:59 -0700 Subject: ipv4: Kill rt->fi It's not really needed. We only grabbed a reference to the fib_info for the sake of fib_info local metrics. However, fib_info objects are freed using RCU, as are therefore their private metrics (if any). We would have triggered a route cache flush if we eliminated a reference to a fib_info object in the routing tables. Therefore, any existing cached routes will first check and see that they have been invalidated before an errant reference to these metric values would occur. Signed-off-by: David S. Miller --- include/net/route.h | 1 - net/ipv4/route.c | 32 +------------------------------- 2 files changed, 1 insertion(+), 32 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 4bafe0bfe82..60d611dc5ce 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -56,7 +56,6 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; - struct fib_info *fi; /* for client ref to shared metrics */ }; static inline bool rt_is_input_route(const struct rtable *rt) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fac4c4acdba..9add08869c7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -141,7 +141,6 @@ static int ip_rt_min_advmss __read_mostly = 256; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); static unsigned int ipv4_mtu(const struct dst_entry *dst); -static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -171,7 +170,6 @@ static struct dst_ops ipv4_dst_ops = { .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, - .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, @@ -1034,17 +1032,6 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return dst; } -static void ipv4_dst_destroy(struct dst_entry *dst) -{ - struct rtable *rt = (struct rtable *) dst; - - if (rt->fi) { - fib_info_put(rt->fi); - rt->fi = NULL; - } -} - - static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; @@ -1158,15 +1145,6 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) -{ - if (fi->fib_metrics != (u32 *) dst_default_metrics) { - rt->fi = fi; - atomic_inc(&fi->fib_clntref); - } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); -} - static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; @@ -1261,7 +1239,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->rt_gateway = nh->nh_gw; if (unlikely(fnhe)) rt_bind_exception(rt, fnhe, daddr); - rt_init_metrics(rt, fi); + dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif @@ -1334,7 +1312,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1464,7 +1441,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1642,7 +1618,6 @@ local_input: rth->rt_iif = dev->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -1807,7 +1782,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_pmtu = 0; rth->rt_gateway = 0; - rth->fi = NULL; RT_CACHE_STAT_INC(out_slow_tot); @@ -2052,7 +2026,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), - .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, .mtu = ipv4_blackhole_mtu, .default_advmss = ipv4_default_advmss, @@ -2087,9 +2060,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; - rt->fi = ort->fi; - if (rt->fi) - atomic_inc(&rt->fi->fib_clntref); dst_free(new); } -- cgit v1.2.3-70-g09d2 From 0bb4087cbec0ef74fd416789d6aad67957063057 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 20 Jul 2012 16:00:53 -0700 Subject: ipv4: Fix neigh lookup keying over loopback/point-to-point devices. We were using a special key "0" for all loopback and point-to-point device neigh lookups under ipv4, but we wouldn't use that special key for the neigh creation. So basically we'd make a new neigh at each and every lookup :-) This special case to use only one neigh for these device types is of dubious value, so just remove it entirely. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/arp.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 4617d984113..7f7df93f37c 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -21,9 +21,6 @@ static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev struct neighbour *n; u32 hash_val; - if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) - key = 0; - hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; -- cgit v1.2.3-70-g09d2 From 5aa93bcf66f4af094d6f11096e81d5501a0b4ba5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 21 Jul 2012 07:56:07 +0000 Subject: sctp: Implement quick failover draft from tsvwg I've seen several attempts recently made to do quick failover of sctp transports by reducing various retransmit timers and counters. While its possible to implement a faster failover on multihomed sctp associations, its not particularly robust, in that it can lead to unneeded retransmits, as well as false connection failures due to intermittent latency on a network. Instead, lets implement the new ietf quick failover draft found here: http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 This will let the sctp stack identify transports that have had a small number of errors, and avoid using them quickly until their reliability can be re-established. I've tested this out on two virt guests connected via multiple isolated virt networks and believe its in compliance with the above draft and works well. Signed-off-by: Neil Horman CC: Vlad Yasevich CC: Sridhar Samudrala CC: "David S. Miller" CC: linux-sctp@vger.kernel.org CC: joe@perches.com Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 14 +++++ include/net/sctp/constants.h | 1 + include/net/sctp/structs.h | 20 ++++++- include/net/sctp/user.h | 11 ++++ net/sctp/associola.c | 37 +++++++++--- net/sctp/outqueue.c | 6 +- net/sctp/sm_sideeffect.c | 33 +++++++++-- net/sctp/socket.c | 101 +++++++++++++++++++++++++++++++++ net/sctp/sysctl.c | 9 +++ net/sctp/transport.c | 4 +- 10 files changed, 221 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5f3ef7f7fce..406a5226220 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1440,6 +1440,20 @@ path_max_retrans - INTEGER Default: 5 +pf_retrans - INTEGER + The number of retransmissions that will be attempted on a given path + before traffic is redirected to an alternate transport (should one + exist). Note this is distinct from path_max_retrans, as a path that + passes the pf_retrans threshold can still be used. Its only + deprioritized when a transmission path is selected by the stack. This + setting is primarily used to enable fast failover mechanisms without + having to reduce path_max_retrans to a very low value. See: + http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt + for details. Note also that a value of pf_retrans > path_max_retrans + disables this feature + + Default: 0 + rto_initial - INTEGER The initial round trip timeout value in milliseconds that will be used in calculating round trip times. This is the initial time interval diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 942b864f613..d053d2e9987 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -334,6 +334,7 @@ typedef enum { typedef enum { SCTP_TRANSPORT_UP, SCTP_TRANSPORT_DOWN, + SCTP_TRANSPORT_PF, } sctp_transport_cmd_t; /* These are the address scopes defined mainly for IPv4 addresses diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 536e439ddf1..fc5e60016e3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -161,6 +161,12 @@ extern struct sctp_globals { int max_retrans_path; int max_retrans_init; + /* Potentially-Failed.Max.Retrans sysctl value + * taken from: + * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 + */ + int pf_retrans; + /* * Policy for preforming sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf @@ -258,6 +264,7 @@ extern struct sctp_globals { #define sctp_sndbuf_policy (sctp_globals.sndbuf_policy) #define sctp_rcvbuf_policy (sctp_globals.rcvbuf_policy) #define sctp_max_retrans_path (sctp_globals.max_retrans_path) +#define sctp_pf_retrans (sctp_globals.pf_retrans) #define sctp_max_retrans_init (sctp_globals.max_retrans_init) #define sctp_sack_timeout (sctp_globals.sack_timeout) #define sctp_hb_interval (sctp_globals.hb_interval) @@ -990,10 +997,15 @@ struct sctp_transport { /* This is the max_retrans value for the transport and will * be initialized from the assocs value. This can be changed - * using SCTP_SET_PEER_ADDR_PARAMS socket option. + * using the SCTP_SET_PEER_ADDR_PARAMS socket option. */ __u16 pathmaxrxt; + /* This is the partially failed retrans value for the transport + * and will be initialized from the assocs value. This can be changed + * using the SCTP_PEER_ADDR_THLDS socket option + */ + int pf_retrans; /* PMTU : The current known path MTU. */ __u32 pathmtu; @@ -1664,6 +1676,12 @@ struct sctp_association { */ int max_retrans; + /* This is the partially failed retrans value for the transport + * and will be initialized from the assocs value. This can be + * changed using the SCTP_PEER_ADDR_THLDS socket option + */ + int pf_retrans; + /* Maximum number of times the endpoint will retransmit INIT */ __u16 max_init_attempts; diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 0842ef00b2f..1b02d7ad453 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -93,6 +93,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_GET_ASSOC_NUMBER 28 /* Read only */ #define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */ #define SCTP_AUTO_ASCONF 30 +#define SCTP_PEER_ADDR_THLDS 31 /* Internal Socket Options. Some of the sctp library functions are * implemented using these socket options. @@ -649,6 +650,7 @@ struct sctp_paddrinfo { */ enum sctp_spinfo_state { SCTP_INACTIVE, + SCTP_PF, SCTP_ACTIVE, SCTP_UNCONFIRMED, SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ @@ -741,4 +743,13 @@ typedef struct { int sd; } sctp_peeloff_arg_t; +/* + * Peer Address Thresholds socket option + */ +struct sctp_paddrthlds { + sctp_assoc_t spt_assoc_id; + struct sockaddr_storage spt_address; + __u16 spt_pathmaxrxt; + __u16 spt_pathpfthld; +}; #endif /* __net_sctp_user_h__ */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 8cf348e62e7..ebaef3ed606 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -124,6 +124,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; + asoc->pf_retrans = sctp_pf_retrans; + asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); asoc->rto_min = msecs_to_jiffies(sp->rtoinfo.srto_min); @@ -686,6 +688,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Set the path max_retrans. */ peer->pathmaxrxt = asoc->pathmaxrxt; + /* And the partial failure retrnas threshold */ + peer->pf_retrans = asoc->pf_retrans; + /* Initialize the peer's SACK delay timeout based on the * association configured value. */ @@ -841,6 +846,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, struct sctp_ulpevent *event; struct sockaddr_storage addr; int spc_state = 0; + bool ulp_notify = true; /* Record the transition on the transport. */ switch (command) { @@ -854,6 +860,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, spc_state = SCTP_ADDR_CONFIRMED; else spc_state = SCTP_ADDR_AVAILABLE; + /* Don't inform ULP about transition from PF to + * active state and set cwnd to 1, see SCTP + * Quick failover draft section 5.1, point 5 + */ + if (transport->state == SCTP_PF) { + ulp_notify = false; + transport->cwnd = 1; + } transport->state = SCTP_ACTIVE; break; @@ -872,6 +886,11 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, spc_state = SCTP_ADDR_UNREACHABLE; break; + case SCTP_TRANSPORT_PF: + transport->state = SCTP_PF; + ulp_notify = false; + break; + default: return; } @@ -879,12 +898,15 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, /* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the * user. */ - memset(&addr, 0, sizeof(struct sockaddr_storage)); - memcpy(&addr, &transport->ipaddr, transport->af_specific->sockaddr_len); - event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, - 0, spc_state, error, GFP_ATOMIC); - if (event) - sctp_ulpq_tail_event(&asoc->ulpq, event); + if (ulp_notify) { + memset(&addr, 0, sizeof(struct sockaddr_storage)); + memcpy(&addr, &transport->ipaddr, + transport->af_specific->sockaddr_len); + event = sctp_ulpevent_make_peer_addr_change(asoc, &addr, + 0, spc_state, error, GFP_ATOMIC); + if (event) + sctp_ulpq_tail_event(&asoc->ulpq, event); + } /* Select new active and retran paths. */ @@ -900,7 +922,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, transports) { if ((t->state == SCTP_INACTIVE) || - (t->state == SCTP_UNCONFIRMED)) + (t->state == SCTP_UNCONFIRMED) || + (t->state == SCTP_PF)) continue; if (!first || t->last_time_heard > first->last_time_heard) { second = first; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a0fa19f5650..e7aa177c952 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -792,7 +792,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (!new_transport) new_transport = asoc->peer.active_path; } else if ((new_transport->state == SCTP_INACTIVE) || - (new_transport->state == SCTP_UNCONFIRMED)) { + (new_transport->state == SCTP_UNCONFIRMED) || + (new_transport->state == SCTP_PF)) { /* If the chunk is Heartbeat or Heartbeat Ack, * send it to chunk->transport, even if it's * inactive. @@ -987,7 +988,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) new_transport = chunk->transport; if (!new_transport || ((new_transport->state == SCTP_INACTIVE) || - (new_transport->state == SCTP_UNCONFIRMED))) + (new_transport->state == SCTP_UNCONFIRMED) || + (new_transport->state == SCTP_PF))) new_transport = asoc->peer.active_path; if (new_transport->state == SCTP_UNCONFIRMED) continue; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8716da1a859..fe99628e125 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -76,6 +76,8 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, sctp_cmd_seq_t *commands, gfp_t gfp); +static void sctp_cmd_hb_timer_update(sctp_cmd_seq_t *cmds, + struct sctp_transport *t); /******************************************************************** * Helper functions ********************************************************************/ @@ -470,7 +472,8 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { * notification SHOULD be sent to the upper layer. * */ -static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, +static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, + struct sctp_association *asoc, struct sctp_transport *transport, int is_hb) { @@ -495,6 +498,23 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, transport->error_count++; } + /* If the transport error count is greater than the pf_retrans + * threshold, and less than pathmaxrtx, then mark this transport + * as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1, + * point 1 + */ + if ((transport->state != SCTP_PF) && + (asoc->pf_retrans < transport->pathmaxrxt) && + (transport->error_count > asoc->pf_retrans)) { + + sctp_assoc_control_transport(asoc, transport, + SCTP_TRANSPORT_PF, + 0); + + /* Update the hb timer to resend a heartbeat every rto */ + sctp_cmd_hb_timer_update(commands, transport); + } + if (transport->state != SCTP_INACTIVE && (transport->error_count > transport->pathmaxrxt)) { SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", @@ -699,6 +719,10 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, SCTP_HEARTBEAT_SUCCESS); } + if (t->state == SCTP_PF) + sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, + SCTP_HEARTBEAT_SUCCESS); + /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. @@ -1565,8 +1589,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_STRIKE: /* Mark one strike against a transport. */ - sctp_do_8_2_transport_strike(asoc, cmd->obj.transport, - 0); + sctp_do_8_2_transport_strike(commands, asoc, + cmd->obj.transport, 0); break; case SCTP_CMD_TRANSPORT_IDLE: @@ -1576,7 +1600,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_TRANSPORT_HB_SENT: t = cmd->obj.transport; - sctp_do_8_2_transport_strike(asoc, t, 1); + sctp_do_8_2_transport_strike(commands, asoc, + t, 1); t->hb_sent = 1; break; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5d488cdcf67..5e259817a7f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3478,6 +3478,56 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, } +/* + * SCTP_PEER_ADDR_THLDS + * + * This option allows us to alter the partially failed threshold for one or all + * transports in an association. See Section 6.1 of: + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt + */ +static int sctp_setsockopt_paddr_thresholds(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_paddrthlds val; + struct sctp_transport *trans; + struct sctp_association *asoc; + + if (optlen < sizeof(struct sctp_paddrthlds)) + return -EINVAL; + if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, + sizeof(struct sctp_paddrthlds))) + return -EFAULT; + + + if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc) + return -ENOENT; + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + } + + if (val.spt_pathmaxrxt) + asoc->pathmaxrxt = val.spt_pathmaxrxt; + asoc->pf_retrans = val.spt_pathpfthld; + } else { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) + return -ENOENT; + + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + } + + return 0; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -3627,6 +3677,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_AUTO_ASCONF: retval = sctp_setsockopt_auto_asconf(sk, optval, optlen); break; + case SCTP_PEER_ADDR_THLDS: + retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -5498,6 +5551,51 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len, return 0; } +/* + * SCTP_PEER_ADDR_THLDS + * + * This option allows us to fetch the partially failed threshold for one or all + * transports in an association. See Section 6.1 of: + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt + */ +static int sctp_getsockopt_paddr_thresholds(struct sock *sk, + char __user *optval, + int len, + int __user *optlen) +{ + struct sctp_paddrthlds val; + struct sctp_transport *trans; + struct sctp_association *asoc; + + if (len < sizeof(struct sctp_paddrthlds)) + return -EINVAL; + len = sizeof(struct sctp_paddrthlds); + if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) + return -EFAULT; + + if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc) + return -ENOENT; + + val.spt_pathpfthld = asoc->pf_retrans; + val.spt_pathmaxrxt = asoc->pathmaxrxt; + } else { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) + return -ENOENT; + + val.spt_pathmaxrxt = trans->pathmaxrxt; + val.spt_pathpfthld = trans->pf_retrans; + } + + if (put_user(len, optlen) || copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; +} + SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -5636,6 +5734,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_AUTO_ASCONF: retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen); break; + case SCTP_PEER_ADDR_THLDS: + retval = sctp_getsockopt_paddr_thresholds(sk, optval, len, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index e5fe639c89e..2b2bfe933ff 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -140,6 +140,15 @@ static ctl_table sctp_table[] = { .extra1 = &one, .extra2 = &int_max }, + { + .procname = "pf_retrans", + .data = &sctp_pf_retrans, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max + }, { .procname = "max_init_retransmits", .data = &sctp_max_retrans_init, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index a6b7ee9ce28..d1c652ed2f3 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -87,6 +87,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Initialize the default path max_retrans. */ peer->pathmaxrxt = sctp_max_retrans_path; + peer->pf_retrans = sctp_pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); @@ -595,7 +596,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *t) { unsigned long timeout; timeout = t->rto + sctp_jitter(t->rto); - if (t->state != SCTP_UNCONFIRMED) + if ((t->state != SCTP_UNCONFIRMED) && + (t->state != SCTP_PF)) timeout += t->hbinterval; timeout += jiffies; return timeout; -- cgit v1.2.3-70-g09d2 From 406a3c638ce8b17d9704052c07955490f732c2b8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 20 Jul 2012 10:39:25 +0000 Subject: net: netprio_cgroup: rework update socket logic Instead of updating the sk_cgrp_prioidx struct field on every send this only updates the field when a task is moved via cgroup infrastructure. This allows sockets that may be used by a kernel worker thread to be managed. For example in the iscsi case today a user can put iscsid in a netprio cgroup and control traffic will be sent with the correct sk_cgrp_prioidx value set but as soon as data is sent the kernel worker thread isssues a send and sk_cgrp_prioidx is updated with the kernel worker threads value which is the default case. It seems more correct to only update the field when the user explicitly sets it via control group infrastructure. This allows the users to manage sockets that may be used with other threads. Signed-off-by: John Fastabend Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/net.h | 1 + include/net/netprio_cgroup.h | 4 ++-- net/core/netprio_cgroup.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ net/core/sock.c | 6 ++--- net/socket.c | 5 ++--- 5 files changed, 61 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/net.h b/include/linux/net.h index dc95700de5d..99276c3dc89 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -248,6 +248,7 @@ extern int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); +extern struct socket *sock_from_file(struct file *file, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index d58fdec4759..2719dec6b5a 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -35,7 +35,7 @@ struct cgroup_netprio_state { extern int net_prio_subsys_id; #endif -extern void sock_update_netprioidx(struct sock *sk); +extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) @@ -82,7 +82,7 @@ static inline u32 task_netprioidx(struct task_struct *p) #endif /* CONFIG_NETPRIO_CGROUP */ #else -#define sock_update_netprioidx(sk) +#define sock_update_netprioidx(sk, task) #endif #endif /* _NET_CLS_CGROUP_H */ diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index b2e9caa1ad1..63d15e8f80e 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -25,6 +25,8 @@ #include #include +#include + #define PRIOIDX_SZ 128 static unsigned long prioidx_map[PRIOIDX_SZ]; @@ -272,6 +274,56 @@ out_free_devname: return ret; } +void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +{ + struct task_struct *p; + char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); + + if (!tmp) { + pr_warn("Unable to attach cgrp due to alloc failure!\n"); + return; + } + + cgroup_taskset_for_each(p, cgrp, tset) { + unsigned int fd; + struct fdtable *fdt; + struct files_struct *files; + + task_lock(p); + files = p->files; + if (!files) { + task_unlock(p); + continue; + } + + rcu_read_lock(); + fdt = files_fdtable(files); + for (fd = 0; fd < fdt->max_fds; fd++) { + char *path; + struct file *file; + struct socket *sock; + unsigned long s; + int rv, err = 0; + + file = fcheck_files(files, fd); + if (!file) + continue; + + path = d_path(&file->f_path, tmp, PAGE_SIZE); + rv = sscanf(path, "socket:[%lu]", &s); + if (rv <= 0) + continue; + + sock = sock_from_file(file, &err); + if (!err) + sock_update_netprioidx(sock->sk, p); + } + rcu_read_unlock(); + task_unlock(p); + } + kfree(tmp); +} + static struct cftype ss_files[] = { { .name = "prioidx", @@ -289,6 +341,7 @@ struct cgroup_subsys net_prio_subsys = { .name = "net_prio", .create = cgrp_create, .destroy = cgrp_destroy, + .attach = net_prio_attach, #ifdef CONFIG_NETPRIO_CGROUP .subsys_id = net_prio_subsys_id, #endif diff --git a/net/core/sock.c b/net/core/sock.c index 24039ac1242..2676a88f533 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1180,12 +1180,12 @@ void sock_update_classid(struct sock *sk) } EXPORT_SYMBOL(sock_update_classid); -void sock_update_netprioidx(struct sock *sk) +void sock_update_netprioidx(struct sock *sk, struct task_struct *task) { if (in_interrupt()) return; - sk->sk_cgrp_prioidx = task_netprioidx(current); + sk->sk_cgrp_prioidx = task_netprioidx(task); } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif @@ -1215,7 +1215,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, atomic_set(&sk->sk_wmem_alloc, 1); sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_netprioidx(sk, current); } return sk; diff --git a/net/socket.c b/net/socket.c index 0452dca4cd2..dfe5b66c97e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -398,7 +398,7 @@ int sock_map_fd(struct socket *sock, int flags) } EXPORT_SYMBOL(sock_map_fd); -static struct socket *sock_from_file(struct file *file, int *err) +struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ @@ -406,6 +406,7 @@ static struct socket *sock_from_file(struct file *file, int *err) *err = -ENOTSOCK; return NULL; } +EXPORT_SYMBOL(sock_from_file); /** * sockfd_lookup - Go from a file number to its socket slot @@ -554,8 +555,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, sock_update_classid(sock->sk); - sock_update_netprioidx(sock->sk); - si->sock = sock; si->scm = NULL; si->msg = msg; -- cgit v1.2.3-70-g09d2 From 6120d3dbb1220792ebea88cd475e1ec8f8620a93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jun 2012 10:03:05 +0400 Subject: get rid of ->scm_work_list recursion in __scm_destroy() will be cut by delaying final fput() Signed-off-by: Al Viro --- include/linux/sched.h | 1 - include/net/scm.h | 1 - net/core/scm.c | 22 +++------------------- 3 files changed, 3 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/sched.h b/include/linux/sched.h index af3555cc760..598ba2da786 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1546,7 +1546,6 @@ struct task_struct { unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; - struct list_head *scm_work_list; #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; diff --git a/include/net/scm.h b/include/net/scm.h index d456f4c71a3..079d7887dac 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -13,7 +13,6 @@ #define SCM_MAX_FD 253 struct scm_fp_list { - struct list_head list; short count; short max; struct file *fp[SCM_MAX_FD]; diff --git a/net/core/scm.c b/net/core/scm.c index 611c5efd4cb..8f6ccfd68ef 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -109,25 +109,9 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - if (current->scm_work_list) { - list_add_tail(&fpl->list, current->scm_work_list); - } else { - LIST_HEAD(work_list); - - current->scm_work_list = &work_list; - - list_add(&fpl->list, &work_list); - while (!list_empty(&work_list)) { - fpl = list_first_entry(&work_list, struct scm_fp_list, list); - - list_del(&fpl->list); - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); - } - - current->scm_work_list = NULL; - } + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); } } EXPORT_SYMBOL(__scm_destroy); -- cgit v1.2.3-70-g09d2 From 563d34d05786263893ba4a1042eb9b9374127cf5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jul 2012 09:48:52 +0200 Subject: tcp: dont drop MTU reduction indications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ICMP messages generated in output path if frame length is bigger than mtu are actually lost because socket is owned by user (doing the xmit) One example is the ipgre_tunnel_xmit() calling icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); We had a similar case fixed in commit a34a101e1e6 (ipv6: disable GSO on sockets hitting dst_allfrag). Problem of such fix is that it relied on retransmit timers, so short tcp sessions paid a too big latency increase price. This patch uses the tcp_release_cb() infrastructure so that MTU reduction messages (ICMP messages) are not lost, and no extra delay is added in TCP transmits. Reported-by: Maciej Å»enczykowski Diagnosed-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Nandita Dukkipati Cc: Tom Herbert Cc: Tore Anderson Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/sock.h | 1 + net/ipv4/tcp_ipv4.c | 19 +++++++++++++++---- net/ipv4/tcp_output.c | 6 +++++- net/ipv6/tcp_ipv6.c | 40 ++++++++++++++++++++++++---------------- 5 files changed, 51 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2761856987b..eb125a4c30b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -493,6 +493,9 @@ struct tcp_sock { u32 probe_seq_start; u32 probe_seq_end; } mtu_probe; + u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG + * while socket was owned by user. + */ #ifdef CONFIG_TCP_MD5SIG /* TCP AF-Specific parts; only used by MD5 Signature support so far */ @@ -518,6 +521,9 @@ enum tsq_flags { TCP_TSQ_DEFERRED, /* tcp_tasklet_func() found socket was owned */ TCP_WRITE_TIMER_DEFERRED, /* tcp_write_timer() found socket was owned */ TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ + TCP_MTU_REDUCED_DEFERRED, /* tcp_v{4|6}_err() could not call + * tcp_v{4|6}_mtu_reduced() + */ }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff --git a/include/net/sock.h b/include/net/sock.h index 88de092df50..e067f8c18f8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -859,6 +859,7 @@ struct proto { struct sk_buff *skb); void (*release_cb)(struct sock *sk); + void (*mtu_reduced)(struct sock *sk); /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59110caeb07..bc5432e3c77 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -275,12 +275,15 @@ failure: EXPORT_SYMBOL(tcp_v4_connect); /* - * This routine does path mtu discovery as defined in RFC1191. + * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. + * It can be called through tcp_release_cb() if socket was owned by user + * at the time tcp_v4_err() was called to handle ICMP message. */ -static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu) +static void tcp_v4_mtu_reduced(struct sock *sk) { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); + u32 mtu = tcp_sk(sk)->mtu_info; /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs * send out by Linux are always <576bytes so they should go through @@ -373,8 +376,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. + * We do take care of PMTU discovery (RFC1191) special case : + * we can receive locally generated ICMP messages while socket is held. */ - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk) && + type != ICMP_DEST_UNREACH && + code != ICMP_FRAG_NEEDED) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) @@ -409,8 +416,11 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + tp->mtu_info = info; if (!sock_owned_by_user(sk)) - do_pmtu_discovery(sk, iph, info); + tcp_v4_mtu_reduced(sk); + else + set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); goto out; } @@ -2596,6 +2606,7 @@ struct proto tcp_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, .release_cb = tcp_release_cb, + .mtu_reduced = tcp_v4_mtu_reduced, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 950aebfd996..33cd065cfbd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -885,7 +885,8 @@ static void tcp_tasklet_func(unsigned long data) #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ (1UL << TCP_WRITE_TIMER_DEFERRED) | \ - (1UL << TCP_DELACK_TIMER_DEFERRED)) + (1UL << TCP_DELACK_TIMER_DEFERRED) | \ + (1UL << TCP_MTU_REDUCED_DEFERRED)) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -914,6 +915,9 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) tcp_delack_timer_handler(sk); + + if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) + sk->sk_prot->mtu_reduced(sk); } EXPORT_SYMBOL(tcp_release_cb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0302ec3fecf..f49476e2d88 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -315,6 +315,23 @@ failure: return err; } +static void tcp_v6_mtu_reduced(struct sock *sk) +{ + struct dst_entry *dst; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + + dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info); + if (!dst) + return; + + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { + tcp_sync_mss(sk, dst_mtu(dst)); + tcp_simple_retransmit(sk); + } +} + static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { @@ -342,7 +359,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } bh_lock_sock(sk); - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) @@ -371,21 +388,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (type == ICMPV6_PKT_TOOBIG) { - struct dst_entry *dst; - - if (sock_owned_by_user(sk)) - goto out; - if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) - goto out; - - dst = inet6_csk_update_pmtu(sk, ntohl(info)); - if (!dst) - goto out; - - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { - tcp_sync_mss(sk, dst_mtu(dst)); - tcp_simple_retransmit(sk); - } + tp->mtu_info = ntohl(info); + if (!sock_owned_by_user(sk)) + tcp_v6_mtu_reduced(sk); + else + set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); goto out; } @@ -1949,6 +1956,7 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, + .mtu_reduced = tcp_v6_mtu_reduced, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, -- cgit v1.2.3-70-g09d2 From 92101b3b2e3178087127709a556b091dae314e9e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Jul 2012 16:29:00 -0700 Subject: ipv4: Prepare for change of rt->rt_iif encoding. Use inet_iif() consistently, and for TCP record the input interface of cached RX dst in inet sock. rt->rt_iif is going to be encoded differently, so that we can legitimately cache input routes in the FIB info more aggressively. When the input interface is "use SKB device index" the rt->rt_iif will be set to zero. This forces us to move the TCP RX dst cache installation into the ipv4 specific code, and as well it should since doing the route caching for ipv6 is pointless at the moment since it is not inspected in the ipv6 input paths yet. Also, remove the unlikely on dst->obsolete, all ipv4 dsts have obsolete set to a non-zero value to force invocation of the check callback. Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + net/dccp/ipv4.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/ip_sockglue.c | 5 ++--- net/ipv4/route.c | 2 +- net/ipv4/tcp_input.c | 12 ------------ net/ipv4/tcp_ipv4.c | 24 ++++++++++++++++++------ net/sched/cls_route.c | 2 +- net/sched/em_meta.c | 2 +- net/sctp/protocol.c | 2 +- 10 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 924d7b98ab6..613cfa40167 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -172,6 +172,7 @@ struct inet_sock { int uc_index; int mc_index; __be32 mc_addr; + int rx_dst_ifindex; struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; }; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 25428d0c50c..176ecdba4a2 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -481,7 +481,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct rtable *rt; const struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { - .flowi4_oif = skb_rtable(skb)->rt_iif, + .flowi4_oif = inet_iif(skb), .daddr = iph->saddr, .saddr = iph->daddr, .flowi4_tos = RT_CONN_FLAGS(sk), diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f2a06beffbd..f2eccd53174 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -571,7 +571,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) rcu_read_lock(); if (rt_is_input_route(rt) && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index_rcu(net, rt->rt_iif); + dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index de29f46f68b..5eea4a81104 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1027,10 +1027,9 @@ e_inval: void ipv4_pktinfo_prepare(struct sk_buff *skb) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); - const struct rtable *rt = skb_rtable(skb); - if (rt) { - pktinfo->ipi_ifindex = rt->rt_iif; + if (skb_rtable(skb)) { + pktinfo->ipi_ifindex = inet_iif(skb); pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 34017be87c8..f6be7811939 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -848,7 +848,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (log_martians && peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", - &ip_hdr(skb)->saddr, rt->rt_iif, + &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &rt->rt_gateway); #endif } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 21d7f8f3a7a..3e07a64ca44 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5391,18 +5391,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); - if (sk->sk_rx_dst) { - struct dst_entry *dst = sk->sk_rx_dst; - if (unlikely(dst->obsolete)) { - if (dst->ops->check(dst, 0) == NULL) { - dst_release(dst); - sk->sk_rx_dst = NULL; - } - } - } - if (unlikely(sk->sk_rx_dst == NULL)) - sk->sk_rx_dst = dst_clone(skb_dst(skb)); - /* * Header prediction. * The code loosely follows the one in the famous diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bc5432e3c77..3e30548ac32 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1618,6 +1618,20 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb); + if (sk->sk_rx_dst) { + struct dst_entry *dst = sk->sk_rx_dst; + if (dst->ops->check(dst, 0) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (unlikely(sk->sk_rx_dst == NULL)) { + struct inet_sock *icsk = inet_sk(sk); + struct rtable *rt = skb_rtable(skb); + + sk->sk_rx_dst = dst_clone(&rt->dst); + icsk->rx_dst_ifindex = inet_iif(skb); + } if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1700,14 +1714,12 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); if (dst) dst = dst_check(dst, 0); - if (dst) { - struct rtable *rt = (struct rtable *) dst; - - if (rt->rt_iif == dev->ifindex) - skb_dst_set_noref(skb, dst); - } + if (dst && + icsk->rx_dst_ifindex == dev->ifindex) + skb_dst_set_noref(skb, dst); } } } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 36fec422740..44f405cb9aa 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (head == NULL) goto old_method; - iif = ((struct rtable *)dst)->rt_iif; + iif = inet_iif(skb); h = route4_fastmap_hash(id, iif); if (id == head->fastmap[h].id && diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4790c696cbc..4ab6e332557 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif) if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else - dst->value = skb_rtable(skb)->rt_iif; + dst->value = inet_iif(skb); } /************************************************************************** diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9c90811d113..1f89c4e6964 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -568,7 +568,7 @@ static void sctp_v4_get_saddr(struct sctp_sock *sk, /* What interface did this skb arrive on? */ static int sctp_v4_skb_iif(const struct sk_buff *skb) { - return skb_rtable(skb)->rt_iif; + return inet_iif(skb); } /* Was this packet marked by Explicit Congestion Notification? */ -- cgit v1.2.3-70-g09d2 From 13378cad02afc2adc6c0e07fca03903c7ada0b37 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Jul 2012 13:57:45 -0700 Subject: ipv4: Change rt->rt_iif encoding. On input packet processing, rt->rt_iif will be zero if we should use skb->dev->ifindex. Since we access rt->rt_iif consistently via inet_iif(), that is the only spot whose interpretation have to adjust. Signed-off-by: David S. Miller --- include/net/route.h | 6 +++++- net/ipv4/route.c | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 60d611dc5ce..c29ef2733f2 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -277,7 +277,11 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable static inline int inet_iif(const struct sk_buff *skb) { - return skb_rtable(skb)->rt_iif; + int iif = skb_rtable(skb)->rt_iif; + + if (iif) + return iif; + return skb->skb_iif; } extern int sysctl_ip_default_ttl; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6be7811939..6bcb8fc71cb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1309,7 +1309,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; rth->rt_is_input= 1; - rth->rt_iif = dev->ifindex; + rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; if (our) { @@ -1435,7 +1435,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_flags = flags; rth->rt_type = res->type; rth->rt_is_input = 1; - rth->rt_iif = in_dev->dev->ifindex; + rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; @@ -1608,7 +1608,7 @@ local_input: rth->rt_flags = flags|RTCF_LOCAL; rth->rt_type = res.type; rth->rt_is_input = 1; - rth->rt_iif = dev->ifindex; + rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; if (res.type == RTN_UNREACHABLE) { @@ -1772,7 +1772,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_flags = flags; rth->rt_type = type; rth->rt_is_input = 0; - rth->rt_iif = orig_oif ? : dev_out->ifindex; + rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; -- cgit v1.2.3-70-g09d2 From c6cffba4ffa26a8ffacd0bb9f3144e34f20da7de Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jul 2012 11:14:38 +0000 Subject: ipv4: Fix input route performance regression. With the routing cache removal we lost the "noref" code paths on input, and this can kill some routing workloads. Reinstate the noref path when we hit a cached route in the FIB nexthops. With help from Eric Dumazet. Reported-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/route.h | 19 +++++++++++++++++-- net/ipv4/arp.c | 2 +- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/ip_input.c | 4 ++-- net/ipv4/route.c | 48 ++++++++++++++++++++++-------------------------- net/ipv4/xfrm4_input.c | 4 ++-- 7 files changed, 48 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index c29ef2733f2..8c52bc6f1c9 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -157,8 +158,22 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 return ip_route_output_key(net, fl4); } -extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, - u8 tos, struct net_device *devin); +extern int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin); + +static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, + u8 tos, struct net_device *devin) +{ + int err; + + rcu_read_lock(); + err = ip_route_input_noref(skb, dst, src, tos, devin); + if (!err) + skb_dst_force(skb); + rcu_read_unlock(); + + return err; +} extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u32 mark, u8 protocol, int flow_flags); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a0124eb7dbe..77e87aff419 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -827,7 +827,7 @@ static int arp_process(struct sk_buff *skb) } if (arp->ar_op == htons(ARPOP_REQUEST) && - ip_route_input(skb, tip, sip, 0, dev) == 0) { + ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { rt = skb_rtable(skb); addr_type = rt->rt_type; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e55171f184f..da0cc2e6b25 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -172,9 +172,9 @@ static void free_fib_info_rcu(struct rcu_head *head) if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); if (nexthop_nh->nh_rth_output) - dst_release(&nexthop_nh->nh_rth_output->dst); + dst_free(&nexthop_nh->nh_rth_output->dst); if (nexthop_nh->nh_rth_input) - dst_release(&nexthop_nh->nh_rth_input->dst); + dst_free(&nexthop_nh->nh_rth_input->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7ad88e5e711..8d07c973409 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) /* skb dst is stale, drop it, and perform route lookup again */ skb_dst_drop(head); iph = ip_hdr(head); - err = ip_route_input(head, iph->daddr, iph->saddr, - iph->tos, head->dev); + err = ip_route_input_noref(head, iph->daddr, iph->saddr, + iph->tos, head->dev); if (err) goto out_rcu_unlock; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 93134b0eab0..bda8cac2ae9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -339,8 +339,8 @@ static int ip_rcv_finish(struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_dst(skb)) { - int err = ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); if (unlikely(err)) { if (err == -EXDEV) NET_INC_STATS_BH(dev_net(skb->dev), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f7bb7185c5..fc1a81ca79a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1199,10 +1199,9 @@ restart: fnhe->fnhe_stamp = jiffies; } -static inline void rt_release_rcu(struct rcu_head *head) +static inline void rt_free(struct rtable *rt) { - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_release(dst); + call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) @@ -1216,9 +1215,15 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) prev = cmpxchg(p, orig, rt); if (prev == orig) { - dst_clone(&rt->dst); if (orig) - call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); + rt_free(orig); + } else { + /* Routes we intend to cache in the FIB nexthop have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; } } @@ -1245,7 +1250,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_HOST)) + if (!(rt->dst.flags & DST_NOCACHE)) rt_cache_route(nh, rt); } @@ -1261,7 +1266,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, bool nopolicy, bool noxfrm, bool will_cache) { return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, - (will_cache ? 0 : DST_HOST) | DST_NOCACHE | + (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0)); } @@ -1366,8 +1371,7 @@ static void ip_handle_martian_source(struct net_device *dev, static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos, - struct rtable **result) + __be32 daddr, __be32 saddr, u32 tos) { struct rtable *rth; int err; @@ -1418,7 +1422,7 @@ static int __mkroute_input(struct sk_buff *skb, if (!itag) { rth = FIB_RES_NH(*res).nh_rth_input; if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); + skb_dst_set_noref(skb, &rth->dst); goto out; } do_cache = true; @@ -1445,8 +1449,8 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.output = ip_output; rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + skb_dst_set(skb, &rth->dst); out: - *result = rth; err = 0; cleanup: return err; @@ -1458,21 +1462,13 @@ static int ip_mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { - struct rtable *rth = NULL; - int err; - #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) fib_select_multipath(res); #endif /* create a routing cache entry */ - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); - if (err) - return err; - - skb_dst_set(skb, &rth->dst); - return 0; + return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); } /* @@ -1588,8 +1584,9 @@ local_input: if (!itag) { rth = FIB_RES_NH(res).nh_rth_input; if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - goto set_and_out; + skb_dst_set_noref(skb, &rth->dst); + err = 0; + goto out; } do_cache = true; } @@ -1620,7 +1617,6 @@ local_input: } if (do_cache) rt_cache_route(&FIB_RES_NH(res), rth); -set_and_out: skb_dst_set(skb, &rth->dst); err = 0; goto out; @@ -1658,8 +1654,8 @@ martian_source_keep_err: goto out; } -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev) +int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev) { int res; @@ -1702,7 +1698,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_unlock(); return res; } -EXPORT_SYMBOL(ip_route_input); +EXPORT_SYMBOL(ip_route_input_noref); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 58d23a57250..06814b6216d 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) if (skb_dst(skb) == NULL) { const struct iphdr *iph = ip_hdr(skb); - if (ip_route_input(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev)) + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) goto drop; } return dst_input(skb); -- cgit v1.2.3-70-g09d2 From c7109986db3c945f50ceed884a30e0fd8af3b89b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jul 2012 12:18:11 +0000 Subject: ipv6: Early TCP socket demux This is the IPv6 missing bits for infrastructure added in commit 41063e9dd1195 (ipv4: Early TCP socket demux.) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 13 +++++++------ include/net/protocol.h | 2 ++ net/ipv4/ip_input.c | 1 + net/ipv6/ip6_input.c | 13 +++++++++++-- net/ipv6/tcp_ipv6.c | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 00cbb4384c7..9e34c877a77 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -96,14 +96,15 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, const __be16 sport, const __be16 dport) { - struct sock *sk; + struct sock *sk = skb_steal_sock(skb); - if (unlikely(sk = skb_steal_sock(skb))) + if (sk) return sk; - else return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, - &ipv6_hdr(skb)->saddr, sport, - &ipv6_hdr(skb)->daddr, ntohs(dport), - inet6_iif(skb)); + + return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, + &ipv6_hdr(skb)->saddr, sport, + &ipv6_hdr(skb)->daddr, ntohs(dport), + inet6_iif(skb)); } extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, diff --git a/include/net/protocol.h b/include/net/protocol.h index 057f2d31556..929528c73fe 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -52,6 +52,8 @@ struct net_protocol { #if IS_ENABLED(CONFIG_IPV6) struct inet6_protocol { + void (*early_demux)(struct sk_buff *skb); + int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index bda8cac2ae9..981ff1eef28 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,6 +314,7 @@ drop: } int sysctl_ip_early_demux __read_mostly = 1; +EXPORT_SYMBOL(sysctl_ip_early_demux); static int ip_rcv_finish(struct sk_buff *skb) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5ab923e51af..47975e363fc 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -47,9 +47,18 @@ -inline int ip6_rcv_finish( struct sk_buff *skb) +int ip6_rcv_finish(struct sk_buff *skb) { - if (skb_dst(skb) == NULL) + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct inet6_protocol *ipprot; + + rcu_read_lock(); + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + rcu_read_unlock(); + } + if (!skb_dst(skb)) ip6_route_input(skb); return dst_input(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f49476e2d88..221224e7250 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1674,6 +1674,43 @@ do_time_wait: goto discard_it; } +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); + if (dst) + dst = dst_check(dst, 0); + if (dst && + icsk->rx_dst_ifindex == inet6_iif(skb)) + skb_dst_set_noref(skb, dst); + } + } +} + static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1984,6 +2021,7 @@ struct proto tcpv6_prot = { }; static const struct inet6_protocol tcpv6_protocol = { + .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .gso_send_check = tcp_v6_gso_send_check, -- cgit v1.2.3-70-g09d2 From 404e0a8b6a55d5e1cd138c6deb1bca9abdf75d8c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Jul 2012 23:20:37 +0000 Subject: net: ipv4: fix RCU races on dst refcounts commit c6cffba4ffa2 (ipv4: Fix input route performance regression.) added various fatal races with dst refcounts. crashes happen on tcp workloads if routes are added/deleted at the same time. The dst_free() calls from free_fib_info_rcu() are clearly racy. We need instead regular dst refcounting (dst_release()) and make sure dst_release() is aware of RCU grace periods : Add DST_RCU_FREE flag so that dst_release() respects an RCU grace period before dst destruction for cached dst Introduce a new inet_sk_rx_dst_set() helper, using atomic_inc_not_zero() to make sure we dont increase a zero refcount (On a dst currently waiting an rcu grace period before destruction) rt_cache_route() must take a reference on the new cached route, and release it if was not able to install it. With this patch, my machines survive various benchmarks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 7 +------ include/net/inet_sock.h | 13 +++++++++++++ net/core/dst.c | 26 +++++++++++++++++++++----- net/decnet/dn_route.c | 6 ++++++ net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/route.c | 16 ++++------------ net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_ipv4.c | 12 ++++++------ net/ipv4/tcp_minisocks.c | 3 +-- 9 files changed, 55 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index baf59789006..31a9fd39edb 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -61,6 +61,7 @@ struct dst_entry { #define DST_NOPEER 0x0040 #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 +#define DST_RCU_FREE 0x0200 unsigned short pending_confirm; @@ -382,12 +383,6 @@ static inline void dst_free(struct dst_entry *dst) __dst_free(dst); } -static inline void dst_rcu_free(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_free(dst); -} - static inline void dst_confirm(struct dst_entry *dst) { dst->pending_confirm = 1; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 613cfa40167..e3fd34c83ac 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -249,4 +249,17 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) return flags; } +static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + if (atomic_inc_not_zero(&dst->__refcnt)) { + if (!(dst->flags & DST_RCU_FREE)) + dst->flags |= DST_RCU_FREE; + + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + } +} + #endif /* _INET_SOCK_H */ diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d2941..d9e33ebe170 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -258,6 +258,15 @@ again: } EXPORT_SYMBOL(dst_destroy); +static void dst_rcu_destroy(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -265,10 +274,14 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); + if (unlikely(dst->flags & (DST_NOCACHE | DST_RCU_FREE)) && !newrefcnt) { + if (dst->flags & DST_RCU_FREE) { + call_rcu_bh(&dst->rcu_head, dst_rcu_destroy); + } else { + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); + } } } } @@ -320,11 +333,14 @@ EXPORT_SYMBOL(__dst_destroy_metrics_generic); */ void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { + bool hold; + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - if (unlikely(dst->flags & DST_NOCACHE)) { + hold = (dst->flags & (DST_NOCACHE | DST_RCU_FREE)) == DST_NOCACHE; + if (unlikely(hold)) { dst_hold(dst); skb_dst_set(skb, dst); } else { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 85a3604c87c..26719779ad8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -184,6 +184,12 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) return dn_rt_hash_mask & (unsigned int)tmp; } +static inline void dst_rcu_free(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + dst_free(dst); +} + static inline void dnrt_free(struct dn_route *rt) { call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da0cc2e6b25..e55171f184f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -172,9 +172,9 @@ static void free_fib_info_rcu(struct rcu_head *head) if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); if (nexthop_nh->nh_rth_output) - dst_free(&nexthop_nh->nh_rth_output->dst); + dst_release(&nexthop_nh->nh_rth_output->dst); if (nexthop_nh->nh_rth_input) - dst_free(&nexthop_nh->nh_rth_input->dst); + dst_release(&nexthop_nh->nh_rth_input->dst); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fc1a81ca79a..d6eabcfe8a9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1199,11 +1199,6 @@ restart: fnhe->fnhe_stamp = jiffies; } -static inline void rt_free(struct rtable *rt) -{ - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { struct rtable *orig, *prev, **p = &nh->nh_rth_output; @@ -1213,17 +1208,14 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) orig = *p; + rt->dst.flags |= DST_RCU_FREE; + dst_hold(&rt->dst); prev = cmpxchg(p, orig, rt); if (prev == orig) { if (orig) - rt_free(orig); + dst_release(&orig->dst); } else { - /* Routes we intend to cache in the FIB nexthop have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ - rt->dst.flags |= DST_NOCACHE; + dst_release(&rt->dst); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a356e1fecf9..9be30b039ae 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5604,8 +5604,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); if (skb != NULL) { - sk->sk_rx_dst = dst_clone(skb_dst(skb)); - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + inet_sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2fbd9921253..7f91e5ac827 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1617,19 +1617,19 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); - if (sk->sk_rx_dst) { - struct dst_entry *dst = sk->sk_rx_dst; + if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, 0) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } } - if (unlikely(sk->sk_rx_dst == NULL)) { - sk->sk_rx_dst = dst_clone(skb_dst(skb)); - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - } + if (unlikely(sk->sk_rx_dst == NULL)) + inet_sk_rx_dst_set(sk, skb); + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 3f1cc2028ed..232a90c3ec8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,8 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newsk->sk_rx_dst = dst_clone(skb_dst(skb)); - inet_sk(newsk)->rx_dst_ifindex = skb->skb_iif; + inet_sk_rx_dst_set(newsk, skb); /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to -- cgit v1.2.3-70-g09d2 From 0c7462a2351b4cc502f326aad7fedd04909928be Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jul 2012 07:14:29 +0000 Subject: ipv4: remove rt_cache_rebuild_count After IP route cache removal, rt_cache_rebuild_count is no longer used. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 6 ------ include/net/netns/ipv4.h | 2 -- net/ipv4/sysctl_net_ipv4.c | 11 ----------- 3 files changed, 19 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 406a5226220..ca447b35b83 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -48,12 +48,6 @@ min_adv_mss - INTEGER The advertised MSS depends on the first hop route MTU, but will never be lower than this setting. -rt_cache_rebuild_count - INTEGER - The per net-namespace route cache emergency rebuild threshold. - Any net-namespace having its route cache rebuilt due to - a hash bucket chain being too long more than this many times - will have its route caching disabled - IP Fragmentation: ipfrag_high_thresh - INTEGER diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 0ffb8e31f3c..1474dd65c66 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -61,8 +61,6 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; - int sysctl_rt_cache_rebuild_count; - int current_rt_cache_rebuild_count; unsigned int sysctl_ping_group_range[2]; long sysctl_tcp_mem[3]; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c325572..4b6487a6827 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -783,13 +783,6 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "rt_cache_rebuild_count", - .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "ping_group_range", .data = &init_net.ipv4.sysctl_ping_group_range, @@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[5].data = &net->ipv4.sysctl_icmp_ratemask; table[6].data = - &net->ipv4.sysctl_rt_cache_rebuild_count; - table[7].data = &net->ipv4.sysctl_ping_group_range; } @@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = 1; net->ipv4.sysctl_ping_group_range[1] = 0; - net->ipv4.sysctl_rt_cache_rebuild_count = 4; - tcp_init_mem(net); net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); -- cgit v1.2.3-70-g09d2 From 54764bb647b2e847c512acf8d443df965da35000 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jul 2012 01:08:23 +0000 Subject: ipv4: Restore old dst_free() behavior. commit 404e0a8b6a55 (net: ipv4: fix RCU races on dst refcounts) tried to solve a race but added a problem at device/fib dismantle time : We really want to call dst_free() as soon as possible, even if sockets still have dst in their cache. dst_release() calls in free_fib_info_rcu() are not welcomed. Root of the problem was that now we also cache output routes (in nh_rth_output), we must use call_rcu() instead of call_rcu_bh() in rt_free(), because output route lookups are done in process context. Based on feedback and initial patch from David Miller (adding another call_rcu_bh() call in fib, but it appears it was not the right fix) I left the inet_sk_rx_dst_set() helper and added __rcu attributes to nh_rth_output and nh_rth_input to better document what is going on in this code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 7 ++++++- include/net/inet_sock.h | 10 +++------- include/net/ip_fib.h | 4 ++-- net/core/dst.c | 26 +++++--------------------- net/decnet/dn_route.c | 6 ------ net/ipv4/fib_semantics.c | 21 +++++++++++++++++---- net/ipv4/route.c | 26 +++++++++++++++++--------- 7 files changed, 50 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 31a9fd39edb..baf59789006 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -61,7 +61,6 @@ struct dst_entry { #define DST_NOPEER 0x0040 #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 -#define DST_RCU_FREE 0x0200 unsigned short pending_confirm; @@ -383,6 +382,12 @@ static inline void dst_free(struct dst_entry *dst) __dst_free(dst); } +static inline void dst_rcu_free(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + dst_free(dst); +} + static inline void dst_confirm(struct dst_entry *dst) { dst->pending_confirm = 1; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e3fd34c83ac..83b567fe194 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -253,13 +253,9 @@ static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb { struct dst_entry *dst = skb_dst(skb); - if (atomic_inc_not_zero(&dst->__refcnt)) { - if (!(dst->flags & DST_RCU_FREE)) - dst->flags |= DST_RCU_FREE; - - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - } + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } #endif /* _INET_SOCK_H */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e69c3a47153..e521a03515b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -81,8 +81,8 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; - struct rtable *nh_rth_output; - struct rtable *nh_rth_input; + struct rtable __rcu *nh_rth_output; + struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/core/dst.c b/net/core/dst.c index d9e33ebe170..069d51d2941 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -258,15 +258,6 @@ again: } EXPORT_SYMBOL(dst_destroy); -static void dst_rcu_destroy(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); -} - void dst_release(struct dst_entry *dst) { if (dst) { @@ -274,14 +265,10 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & (DST_NOCACHE | DST_RCU_FREE)) && !newrefcnt) { - if (dst->flags & DST_RCU_FREE) { - call_rcu_bh(&dst->rcu_head, dst_rcu_destroy); - } else { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); - } + if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); } } } @@ -333,14 +320,11 @@ EXPORT_SYMBOL(__dst_destroy_metrics_generic); */ void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { - bool hold; - WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); /* If dst not in cache, we must take a reference, because * dst_release() will destroy dst as soon as its refcount becomes zero */ - hold = (dst->flags & (DST_NOCACHE | DST_RCU_FREE)) == DST_NOCACHE; - if (unlikely(hold)) { + if (unlikely(dst->flags & DST_NOCACHE)) { dst_hold(dst); skb_dst_set(skb, dst); } else { diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 26719779ad8..85a3604c87c 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -184,12 +184,6 @@ static __inline__ unsigned int dn_hash(__le16 src, __le16 dst) return dn_rt_hash_mask & (unsigned int)tmp; } -static inline void dst_rcu_free(struct rcu_head *head) -{ - struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); - dst_free(dst); -} - static inline void dnrt_free(struct dn_route *rt) { call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e55171f184f..625cf185c48 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -161,6 +161,21 @@ static void free_nh_exceptions(struct fib_nh *nh) kfree(hash); } +static void rt_nexthop_free(struct rtable __rcu **rtp) +{ + struct rtable *rt = rcu_dereference_protected(*rtp, 1); + + if (!rt) + return; + + /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); + * because we waited an RCU grace period before calling + * free_fib_info_rcu() + */ + + dst_free(&rt->dst); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -171,10 +186,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - if (nexthop_nh->nh_rth_output) - dst_release(&nexthop_nh->nh_rth_output->dst); - if (nexthop_nh->nh_rth_input) - dst_release(&nexthop_nh->nh_rth_input->dst); + rt_nexthop_free(&nexthop_nh->nh_rth_output); + rt_nexthop_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d6eabcfe8a9..2bd10747746 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1199,23 +1199,31 @@ restart: fnhe->fnhe_stamp = jiffies; } +static inline void rt_free(struct rtable *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { - struct rtable *orig, *prev, **p = &nh->nh_rth_output; + struct rtable *orig, *prev, **p = (struct rtable **)&nh->nh_rth_output; if (rt_is_input_route(rt)) - p = &nh->nh_rth_input; + p = (struct rtable **)&nh->nh_rth_input; orig = *p; - rt->dst.flags |= DST_RCU_FREE; - dst_hold(&rt->dst); prev = cmpxchg(p, orig, rt); if (prev == orig) { if (orig) - dst_release(&orig->dst); + rt_free(orig); } else { - dst_release(&rt->dst); + /* Routes we intend to cache in the FIB nexthop have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; } } @@ -1412,7 +1420,7 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = false; if (res->fi) { if (!itag) { - rth = FIB_RES_NH(*res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1574,7 +1582,7 @@ local_input: do_cache = false; if (res.fi) { if (!itag) { - rth = FIB_RES_NH(res).nh_rth_input; + rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -1742,7 +1750,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (fi) { fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); if (!fnhe) { - rth = FIB_RES_NH(*res).nh_rth_output; + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_output); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; -- cgit v1.2.3-70-g09d2 From d26b3a7c4b3b26319f18bb645de93eba8f4bdcd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jul 2012 05:45:30 +0000 Subject: ipv4: percpu nh_rth_output cache Input path is mostly run under RCU and doesnt touch dst refcnt But output path on forwarding or UDP workloads hits badly dst refcount, and we have lot of false sharing, for example in ipv4_mtu() when reading rt->rt_pmtu Using a percpu cache for nh_rth_output gives a nice performance increase at a small cost. 24 udpflood test on my 24 cpu machine (dummy0 output device) (each process sends 1.000.000 udp frames, 24 processes are started) before : 5.24 s after : 2.06 s For reference, time on linux-3.5 : 6.60 s Signed-off-by: Eric Dumazet Tested-by: Alexander Duyck Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_semantics.c | 20 +++++++++++++++++++- net/ipv4/route.c | 18 +++++++++++++----- 3 files changed, 34 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e521a03515b..e331746029b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -21,6 +21,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -81,7 +82,7 @@ struct fib_nh { __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; - struct rtable __rcu *nh_rth_output; + struct rtable __rcu * __percpu *nh_pcpu_rth_output; struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket *nh_exceptions; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 625cf185c48..fe2ca02a197 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -176,6 +176,23 @@ static void rt_nexthop_free(struct rtable __rcu **rtp) dst_free(&rt->dst); } +static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp) +{ + int cpu; + + if (!rtp) + return; + + for_each_possible_cpu(cpu) { + struct rtable *rt; + + rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); + if (rt) + dst_free(&rt->dst); + } + free_percpu(rtp); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { @@ -186,7 +203,7 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - rt_nexthop_free(&nexthop_nh->nh_rth_output); + rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_nexthop_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); @@ -817,6 +834,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_nhs = nhs; change_nexthops(fi) { nexthop_nh->nh_parent = fi; + nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); } endfor_nexthops(fi) if (cfg->fc_mx) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2bd10747746..4f6276ce0af 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1206,11 +1206,15 @@ static inline void rt_free(struct rtable *rt) static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) { - struct rtable *orig, *prev, **p = (struct rtable **)&nh->nh_rth_output; + struct rtable *orig, *prev, **p; - if (rt_is_input_route(rt)) + if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; - + } else { + if (!nh->nh_pcpu_rth_output) + goto nocache; + p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); + } orig = *p; prev = cmpxchg(p, orig, rt); @@ -1223,6 +1227,7 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) * unsuccessful at storing this route into the cache * we really need to set it. */ +nocache: rt->dst.flags |= DST_NOCACHE; } } @@ -1749,8 +1754,11 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; if (fi) { fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); - if (!fnhe) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_output); + if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) { + struct rtable __rcu **prth; + + prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; -- cgit v1.2.3-70-g09d2 From c5038a8327b980a5b279fa193163c468011de009 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jul 2012 15:02:02 -0700 Subject: ipv4: Cache routes in nexthop exception entries. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_semantics.c | 39 +++++++++--------- net/ipv4/route.c | 103 ++++++++++++++++++++++++++--------------------- 3 files changed, 79 insertions(+), 64 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e331746029b..926142ed8d7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -55,6 +55,7 @@ struct fib_nh_exception { u32 fnhe_pmtu; __be32 fnhe_gw; unsigned long fnhe_expires; + struct rtable __rcu *fnhe_rth; unsigned long fnhe_stamp; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index fe2ca02a197..da80dc14cc7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { }, }; +static void rt_fibinfo_free(struct rtable __rcu **rtp) +{ + struct rtable *rt = rcu_dereference_protected(*rtp, 1); + + if (!rt) + return; + + /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); + * because we waited an RCU grace period before calling + * free_fib_info_rcu() + */ + + dst_free(&rt->dst); +} + static void free_nh_exceptions(struct fib_nh *nh) { struct fnhe_hash_bucket *hash = nh->nh_exceptions; @@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh) struct fib_nh_exception *next; next = rcu_dereference_protected(fnhe->fnhe_next, 1); + + rt_fibinfo_free(&fnhe->fnhe_rth); + kfree(fnhe); fnhe = next; @@ -161,22 +179,7 @@ static void free_nh_exceptions(struct fib_nh *nh) kfree(hash); } -static void rt_nexthop_free(struct rtable __rcu **rtp) -{ - struct rtable *rt = rcu_dereference_protected(*rtp, 1); - - if (!rt) - return; - - /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); - * because we waited an RCU grace period before calling - * free_fib_info_rcu() - */ - - dst_free(&rt->dst); -} - -static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp) +static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) { int cpu; @@ -203,8 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head) dev_put(nexthop_nh->nh_dev); if (nexthop_nh->nh_exceptions) free_nh_exceptions(nexthop_nh); - rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output); - rt_nexthop_free(&nexthop_nh->nh_rth_input); + rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); + rt_fibinfo_free(&nexthop_nh->nh_rth_input); } endfor_nexthops(fi); release_net(fi->fib_net); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4f6276ce0af..b102eeb16e3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -587,11 +587,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, build_sk_flow_key(fl4, sk); } -static DEFINE_SEQLOCK(fnhe_seqlock); +static inline void rt_free(struct rtable *rt) +{ + call_rcu(&rt->dst.rcu_head, dst_rcu_free); +} + +static DEFINE_SPINLOCK(fnhe_lock); static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; + struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -599,6 +605,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } + orig = rcu_dereference(oldest->fnhe_rth); + if (orig) { + RCU_INIT_POINTER(oldest->fnhe_rth, NULL); + rt_free(orig); + } return oldest; } @@ -620,7 +631,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, int depth; u32 hval = fnhe_hashfun(daddr); - write_seqlock_bh(&fnhe_seqlock); + spin_lock_bh(&fnhe_lock); hash = nh->nh_exceptions; if (!hash) { @@ -667,7 +678,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_stamp = jiffies; out_unlock: - write_sequnlock_bh(&fnhe_seqlock); + spin_unlock_bh(&fnhe_lock); return; } @@ -1167,41 +1178,40 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr) { - __be32 fnhe_daddr, gw; - unsigned long expires; - unsigned int seq; - u32 pmtu; - -restart: - seq = read_seqbegin(&fnhe_seqlock); - fnhe_daddr = fnhe->fnhe_daddr; - gw = fnhe->fnhe_gw; - pmtu = fnhe->fnhe_pmtu; - expires = fnhe->fnhe_expires; - if (read_seqretry(&fnhe_seqlock, seq)) - goto restart; - - if (daddr != fnhe_daddr) - return; + spin_lock_bh(&fnhe_lock); - if (pmtu) { - unsigned long diff = expires - jiffies; + if (daddr == fnhe->fnhe_daddr) { + struct rtable *orig; - if (time_before(jiffies, expires)) { - rt->rt_pmtu = pmtu; - dst_set_expires(&rt->dst, diff); + if (fnhe->fnhe_pmtu) { + unsigned long expires = fnhe->fnhe_expires; + unsigned long diff = expires - jiffies; + + if (time_before(jiffies, expires)) { + rt->rt_pmtu = fnhe->fnhe_pmtu; + dst_set_expires(&rt->dst, diff); + } + } + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; } - } - if (gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = gw; - } - fnhe->fnhe_stamp = jiffies; -} -static inline void rt_free(struct rtable *rt) -{ - call_rcu(&rt->dst.rcu_head, dst_rcu_free); + orig = rcu_dereference(fnhe->fnhe_rth); + rcu_assign_pointer(fnhe->fnhe_rth, rt); + if (orig) + rt_free(orig); + + fnhe->fnhe_stamp = jiffies; + } else { + /* Routes we intend to cache in nexthop exception have + * the DST_NOCACHE bit clear. However, if we are + * unsuccessful at storing this route into the cache + * we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; + } + spin_unlock_bh(&fnhe_lock); } static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) @@ -1249,13 +1259,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) rt->rt_gateway = nh->nh_gw; - if (unlikely(fnhe)) - rt_bind_exception(rt, fnhe, daddr); dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - if (!(rt->dst.flags & DST_NOCACHE)) + if (unlikely(fnhe)) + rt_bind_exception(rt, fnhe, daddr); + else if (!(rt->dst.flags & DST_NOCACHE)) rt_cache_route(nh, rt); } @@ -1753,22 +1763,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; if (fi) { - fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); - if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) { - struct rtable __rcu **prth; + struct rtable __rcu **prth; + fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + if (fnhe) + prth = &fnhe->fnhe_rth; + else prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); - rth = rcu_dereference(*prth); - if (rt_cache_valid(rth)) { - dst_hold(&rth->dst); - return rth; - } + rth = rcu_dereference(*prth); + if (rt_cache_valid(rth)) { + dst_hold(&rth->dst); + return rth; } } rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi && !fnhe); + fi); if (!rth) return ERR_PTR(-ENOBUFS); -- cgit v1.2.3-70-g09d2 From caacf05e5ad1abf0a2864863da4e33024bc68ec6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jul 2012 15:06:50 -0700 Subject: ipv4: Properly purge netdev references on uncached routes. When a device is unregistered, we have to purge all of the references to it that may exist in the entire system. If a route is uncached, we currently have no way of accomplishing this. So create a global list that is scanned when a network device goes down. This mirrors the logic in net/core/dst.c's dst_ifdown(). Signed-off-by: David S. Miller --- include/net/route.h | 3 +++ net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++--- net/ipv4/xfrm4_policy.c | 1 + 4 files changed, 69 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 8c52bc6f1c9..776a27f1ab7 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -57,6 +57,8 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; + + struct list_head rt_uncached; }; static inline bool rt_is_input_route(const struct rtable *rt) @@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); extern void rt_cache_flush(struct net *net, int how); +extern void rt_flush_dev(struct net_device *dev); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8732cc7920e..c43ae3fba79 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2, -1); + rt_flush_dev(dev); return NOTIFY_DONE; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b102eeb16e3..c035251beb0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void ipv4_dst_destroy(struct dst_entry *dst); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = { .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, + .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, @@ -1175,9 +1177,11 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) return NULL; } -static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, +static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr) { + bool ret = false; + spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { @@ -1203,6 +1207,7 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, rt_free(orig); fnhe->fnhe_stamp = jiffies; + ret = true; } else { /* Routes we intend to cache in nexthop exception have * the DST_NOCACHE bit clear. However, if we are @@ -1212,11 +1217,14 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, rt->dst.flags |= DST_NOCACHE; } spin_unlock_bh(&fnhe_lock); + + return ret; } -static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) { struct rtable *orig, *prev, **p; + bool ret = true; if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; @@ -1239,6 +1247,48 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) */ nocache: rt->dst.flags |= DST_NOCACHE; + ret = false; + } + + return ret; +} + +static DEFINE_SPINLOCK(rt_uncached_lock); +static LIST_HEAD(rt_uncached_list); + +static void rt_add_uncached_list(struct rtable *rt) +{ + spin_lock_bh(&rt_uncached_lock); + list_add_tail(&rt->rt_uncached, &rt_uncached_list); + spin_unlock_bh(&rt_uncached_lock); +} + +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct rtable *rt = (struct rtable *) dst; + + if (dst->flags & DST_NOCACHE) { + spin_lock_bh(&rt_uncached_lock); + list_del(&rt->rt_uncached); + spin_unlock_bh(&rt_uncached_lock); + } +} + +void rt_flush_dev(struct net_device *dev) +{ + if (!list_empty(&rt_uncached_list)) { + struct net *net = dev_net(dev); + struct rtable *rt; + + spin_lock_bh(&rt_uncached_lock); + list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + if (rt->dst.dev != dev) + continue; + rt->dst.dev = net->loopback_dev; + dev_hold(rt->dst.dev); + dev_put(dev); + } + spin_unlock_bh(&rt_uncached_lock); } } @@ -1254,6 +1304,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag) { + bool cached = false; + if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); @@ -1264,10 +1316,12 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->dst.tclassid = nh->nh_tclassid; #endif if (unlikely(fnhe)) - rt_bind_exception(rt, fnhe, daddr); + cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) - rt_cache_route(nh, rt); + cached = rt_cache_route(nh, rt); } + if (unlikely(!cached)) + rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1334,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1459,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1625,6 +1681,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -1792,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -2071,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + INIT_LIST_HEAD(&rt->rt_uncached); + dst_free(new); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c6281847f16..681ea2f413e 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); return 0; } -- cgit v1.2.3-70-g09d2 From c255a458055e459f65eb7b7f51dc5dbdd0caf1d8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 31 Jul 2012 16:43:02 -0700 Subject: memcg: rename config variables Sanity: CONFIG_CGROUP_MEM_RES_CTLR -> CONFIG_MEMCG CONFIG_CGROUP_MEM_RES_CTLR_SWAP -> CONFIG_MEMCG_SWAP CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED -> CONFIG_MEMCG_SWAP_ENABLED CONFIG_CGROUP_MEM_RES_CTLR_KMEM -> CONFIG_MEMCG_KMEM [mhocko@suse.cz: fix missed bits] Cc: Glauber Costa Acked-by: Michal Hocko Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Tejun Heo Cc: Aneesh Kumar K.V Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memory.txt | 10 +++++----- arch/powerpc/configs/chroma_defconfig | 4 ++-- arch/s390/defconfig | 2 +- arch/sh/configs/apsh4ad0a_defconfig | 2 +- arch/sh/configs/sdk7786_defconfig | 4 ++-- arch/sh/configs/se7206_defconfig | 2 +- arch/sh/configs/shx3_defconfig | 2 +- arch/sh/configs/urquell_defconfig | 4 ++-- arch/tile/configs/tilegx_defconfig | 4 ++-- arch/tile/configs/tilepro_defconfig | 4 ++-- arch/um/defconfig | 8 ++++---- include/linux/cgroup_subsys.h | 2 +- include/linux/memcontrol.h | 14 +++++++------- include/linux/mmzone.h | 8 ++++---- include/linux/page_cgroup.h | 10 +++++----- include/linux/sched.h | 2 +- include/linux/swap.h | 6 +++--- include/net/sock.h | 4 ++-- init/Kconfig | 14 +++++++------- kernel/fork.c | 2 +- mm/Makefile | 2 +- mm/hwpoison-inject.c | 2 +- mm/memcontrol.c | 20 ++++++++++---------- mm/memory-failure.c | 2 +- mm/mmzone.c | 2 +- mm/oom_kill.c | 2 +- mm/page_cgroup.c | 2 +- mm/vmscan.c | 4 ++-- net/core/sock.c | 2 +- net/ipv4/Makefile | 2 +- net/ipv4/sysctl_net_ipv4.c | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 33 files changed, 78 insertions(+), 78 deletions(-) (limited to 'include/net') diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index dd88540bb99..672676ac961 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -187,12 +187,12 @@ the cgroup that brought it in -- this will happen on memory pressure). But see section 8.2: when moving a task to another cgroup, its pages may be recharged to the new cgroup, if move_charge_at_immigrate has been chosen. -Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. +Exception: If CONFIG_CGROUP_CGROUP_MEMCG_SWAP is not used. When you do swapoff and make swapped-out pages of shmem(tmpfs) to be backed into memory in force, charges for pages are accounted against the caller of swapoff rather than the users of shmem. -2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) +2.4 Swap Extension (CONFIG_MEMCG_SWAP) Swap Extension allows you to record charge for swap. A swapped-in page is charged back to original page allocator if possible. @@ -259,7 +259,7 @@ When oom event notifier is registered, event will be delivered. per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by zone->lru_lock, it has no lock of its own. -2.7 Kernel Memory Extension (CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM) With the Kernel memory extension, the Memory Controller is able to limit the amount of kernel memory used by the system. Kernel memory is fundamentally @@ -286,8 +286,8 @@ per cgroup, instead of globally. a. Enable CONFIG_CGROUPS b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_RES_CTLR -d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension) +c. Enable CONFIG_MEMCG +d. Enable CONFIG_MEMCG_SWAP (to use swap extension) 1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) # mount -t tmpfs none /sys/fs/cgroup diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig index b1f9597fe31..29bb11ec6c6 100644 --- a/arch/powerpc/configs/chroma_defconfig +++ b/arch/powerpc/configs/chroma_defconfig @@ -21,8 +21,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_NAMESPACES=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 37d2bf26796..de57702a3f4 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -13,7 +13,7 @@ CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index e7583484cc0..95ae23fcfdd 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -11,7 +11,7 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 8a7dd7b59c5..76a76a295d7 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -18,8 +18,8 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 72c3fad7383..6bc30ab9fd1 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -11,7 +11,7 @@ CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 6bb41303689..cd6c519f8fa 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -13,7 +13,7 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 8bfa4d056d7..d7f89be9f47 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -15,8 +15,8 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index b8d99aca543..0270620a169 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -18,8 +18,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index 2b1fd31894f..c11de27a9bc 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -17,8 +17,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/um/defconfig b/arch/um/defconfig index 7823ab12e6a..fec0d5d2746 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -155,10 +155,10 @@ CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y -# CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED is not set -# CONFIG_CGROUP_MEM_RES_CTLR_KMEM is not set +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y +# CONFIG_CGROUP_MEMCG_SWAP_ENABLED is not set +# CONFIG_CGROUP_MEMCG_KMEM is not set CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_CFS_BANDWIDTH is not set diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 5b41ce07902..dfae957398c 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -31,7 +31,7 @@ SUBSYS(cpuacct) /* */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG SUBSYS(mem_cgroup) #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 83e7ba90d6e..17007622243 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -38,7 +38,7 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG /* * All "charge" functions with gfp_mask should use GFP_KERNEL or * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't @@ -124,7 +124,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern int do_swap_account; #endif @@ -193,7 +193,7 @@ void mem_cgroup_split_huge_fixup(struct page *head); bool mem_cgroup_bad_page_check(struct page *page); void mem_cgroup_print_bad_page(struct page *page); #endif -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */ struct mem_cgroup; static inline int mem_cgroup_newpage_charge(struct page *page, @@ -384,9 +384,9 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */ -#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM) +#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM) static inline bool mem_cgroup_bad_page_check(struct page *page) { @@ -406,7 +406,7 @@ enum { }; struct sock; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); #else @@ -416,6 +416,6 @@ static inline void sock_update_memcg(struct sock *sk) static inline void sock_release_memcg(struct sock *sk) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 458988bd55a..3bdfa15b201 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -201,7 +201,7 @@ struct zone_reclaim_stat { struct lruvec { struct list_head lists[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG struct zone *zone; #endif }; @@ -671,7 +671,7 @@ typedef struct pglist_data { int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG struct page_cgroup *node_page_cgroup; #endif #endif @@ -736,7 +736,7 @@ extern void lruvec_init(struct lruvec *lruvec, struct zone *zone); static inline struct zone *lruvec_zone(struct lruvec *lruvec) { -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG return lruvec->zone; #else return container_of(lruvec, struct zone, lruvec); @@ -1052,7 +1052,7 @@ struct mem_section { /* See declaration of similar field in struct zone */ unsigned long *pageblock_flags; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG /* * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use * section. (see memcontrol.h/page_cgroup.h about this.) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index a88cdba2780..777a524716d 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -12,7 +12,7 @@ enum { #ifndef __GENERATING_BOUNDS_H #include -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG #include /* @@ -82,7 +82,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) bit_spin_unlock(PCG_LOCK, &pc->flags); } -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */ struct page_cgroup; static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) @@ -102,11 +102,11 @@ static inline void __init page_cgroup_init_flatmem(void) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */ #include -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); @@ -138,7 +138,7 @@ static inline void swap_cgroup_swapoff(int type) return; } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */ +#endif /* CONFIG_MEMCG_SWAP */ #endif /* !__GENERATING_BOUNDS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 68dcffaa62a..865725adb9d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1584,7 +1584,7 @@ struct task_struct { /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ +#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ struct memcg_batch_info { int do_batch; /* incremented when batch uncharge started */ struct mem_cgroup *memcg; /* target memcg of uncharge */ diff --git a/include/linux/swap.h b/include/linux/swap.h index c84ec68eaec..9a16bb1cefd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -301,7 +301,7 @@ static inline void scan_unevictable_unregister_node(struct node *node) extern int kswapd_run(int nid); extern void kswapd_stop(int nid); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG extern int mem_cgroup_swappiness(struct mem_cgroup *mem); #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) @@ -309,7 +309,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) return vm_swappiness; } #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_uncharge_swap(swp_entry_t ent); #else static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) @@ -360,7 +360,7 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); #else diff --git a/include/net/sock.h b/include/net/sock.h index e067f8c18f8..cee528c119c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -913,7 +913,7 @@ struct proto { #ifdef SOCK_REFCNT_DEBUG atomic_t socks; #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM /* * cgroup specific init/deinit functions. Called once for all * protocols that implement it, from cgroups populate function. @@ -994,7 +994,7 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET) extern struct static_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) diff --git a/init/Kconfig b/init/Kconfig index 72437760e90..af6c7f8ba01 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -686,7 +686,7 @@ config RESOURCE_COUNTERS This option enables controller independent resource accounting infrastructure that works with cgroups. -config CGROUP_MEM_RES_CTLR +config MEMCG bool "Memory Resource Controller for Control Groups" depends on RESOURCE_COUNTERS select MM_OWNER @@ -709,9 +709,9 @@ config CGROUP_MEM_RES_CTLR This config option also selects MM_OWNER config option, which could in turn add some fork/exit overhead. -config CGROUP_MEM_RES_CTLR_SWAP +config MEMCG_SWAP bool "Memory Resource Controller Swap Extension" - depends on CGROUP_MEM_RES_CTLR && SWAP + depends on MEMCG && SWAP help Add swap management feature to memory resource controller. When you enable this, you can limit mem+swap usage per cgroup. In other words, @@ -726,9 +726,9 @@ config CGROUP_MEM_RES_CTLR_SWAP if boot option "swapaccount=0" is set, swap will not be accounted. Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. -config CGROUP_MEM_RES_CTLR_SWAP_ENABLED +config MEMCG_SWAP_ENABLED bool "Memory Resource Controller Swap Extension enabled by default" - depends on CGROUP_MEM_RES_CTLR_SWAP + depends on MEMCG_SWAP default y help Memory Resource Controller Swap Extension comes with its price in @@ -739,9 +739,9 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED For those who want to have the feature enabled by default should select this option (if, for some reason, they need to disable it then swapaccount=0 does the trick). -config CGROUP_MEM_RES_CTLR_KMEM +config MEMCG_KMEM bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)" - depends on CGROUP_MEM_RES_CTLR && EXPERIMENTAL + depends on MEMCG && EXPERIMENTAL default n help The Kernel Memory extension for Memory Resource Controller can limit diff --git a/kernel/fork.c b/kernel/fork.c index aaa8813c45d..3bd2280d79f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1306,7 +1306,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif diff --git a/mm/Makefile b/mm/Makefile index fd6fc1c1966..290bbfe3369 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index cc448bb983b..3a61efc518d 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -123,7 +123,7 @@ static int pfn_inject_init(void) if (!dentry) goto fail; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP dentry = debugfs_create_u64("corrupt-filter-memcg", 0600, hwpoison_dir, &hwpoison_filter_memcg); if (!dentry) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a2677e0a638..55a85e1a342 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -61,12 +61,12 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 static struct mem_cgroup *root_mem_cgroup __read_mostly; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ int do_swap_account __read_mostly; /* for remember boot option*/ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED +#ifdef CONFIG_MEMCG_SWAP_ENABLED static int really_do_swap_account __initdata = 1; #else static int really_do_swap_account __initdata = 0; @@ -407,7 +407,7 @@ static void mem_cgroup_get(struct mem_cgroup *memcg); static void mem_cgroup_put(struct mem_cgroup *memcg); /* Writing them here to avoid exposing memcg's inner layout */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM #include #include @@ -466,9 +466,9 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(tcp_proto_cgroup); #endif /* CONFIG_INET */ -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */ -#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) static void disarm_sock_keys(struct mem_cgroup *memcg) { if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto)) @@ -3085,7 +3085,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) } #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP /* * called from swap_entry_free(). remove record in swap_cgroup and * uncharge "memsw" account. @@ -4518,7 +4518,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, return 0; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { return mem_cgroup_sockets_init(memcg, ss); @@ -4608,7 +4608,7 @@ static struct cftype mem_cgroup_files[] = { .read_seq_string = mem_control_numa_stat_show, }, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), @@ -4795,7 +4795,7 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(parent_mem_cgroup); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static void __init enable_swap_cgroup(void) { if (!mem_cgroup_disabled() && really_do_swap_account) @@ -5526,7 +5526,7 @@ struct cgroup_subsys mem_cgroup_subsys = { .__DEPRECATED_clear_css_refs = true, }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static int __init enable_swap_account(char *s) { /* consider enabled if no parameter or 1 is given */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b04ff2d6f73..a6e2141a661 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -128,7 +128,7 @@ static int hwpoison_filter_flags(struct page *p) * can only guarantee that the page either belongs to the memcg tasks, or is * a freed page. */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP u64 hwpoison_filter_memcg; EXPORT_SYMBOL_GPL(hwpoison_filter_memcg); static int hwpoison_filter_task(struct page *p) diff --git a/mm/mmzone.c b/mm/mmzone.c index 6830eab5bf0..3cef80f6ac7 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -96,7 +96,7 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG lruvec->zone = zone; #endif } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c82ede69bf3..e6c10640e56 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -541,7 +541,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index eb750f85139..5ddad0c6daa 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -317,7 +317,7 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static DEFINE_MUTEX(swap_cgroup_mutex); struct swap_cgroup_ctrl { diff --git a/mm/vmscan.c b/mm/vmscan.c index 347b3ff2a47..6b1f89a9121 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -133,7 +133,7 @@ long vm_total_pages; /* The total number of pages which the VM controls */ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; @@ -2142,7 +2142,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, return nr_reclaimed; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533..a67b06280e4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,7 +142,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { struct proto *proto; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ae2ccf2890e..15ca63ec604 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o +obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c325572..ed7db3f1b6f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, int ret; unsigned long vec[3]; struct net *net = current->nsproxy->net_ns; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, if (ret) return ret; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2fbd9921253..4bc8f6769b5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2633,7 +2633,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .init_cgroup = tcp_init_cgroup, .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 221224e7250..61c7b6d8317 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2015,7 +2015,7 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .proto_cgroup = tcp_proto_cgroup, #endif }; -- cgit v1.2.3-70-g09d2 From 99a1dec70d5acbd8c6b3928cdebb4a2d1da676c8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:14 -0700 Subject: net: introduce sk_gfp_atomic() to allow addition of GFP flags depending on the individual socket Introduce sk_gfp_atomic(), this function allows to inject sock specific flags to each sock related allocation. It is only used on allocation paths that may be required for writing pages back to network storage. [davem@davemloft.net: Use sk_gfp_atomic only when necessary] Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 5 +++++ net/ipv4/tcp_output.c | 12 +++++++----- net/ipv6/tcp_ipv6.c | 8 +++++--- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index cee528c119c..11ccde65c4c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -658,6 +658,11 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) return test_bit(flag, &sk->sk_flags); } +static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) +{ + return GFP_ATOMIC; +} + static inline void sk_acceptq_removed(struct sock *sk) { sk->sk_ack_backlog--; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 33cd065cfbd..3f1bcff0b10 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2045,7 +2045,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; - if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, + sk_gfp_atomic(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -2666,7 +2667,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, + sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -3064,7 +3066,7 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (buff == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; @@ -3079,7 +3081,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; - tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); + tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } /* This routine sends a packet with an out of date sequence @@ -3099,7 +3101,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (skb == NULL) return -1; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 61c7b6d8317..c66b90f71c9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1299,7 +1299,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone(treq->pktopts, + sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(treq->pktopts); treq->pktopts = NULL; if (newnp->pktoptions) @@ -1349,7 +1350,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, - AF_INET6, key->key, key->keylen, GFP_ATOMIC); + AF_INET6, key->key, key->keylen, + sk_gfp_atomic(sk, GFP_ATOMIC)); } #endif @@ -1442,7 +1444,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb); -- cgit v1.2.3-70-g09d2 From 7cb0240492caea2f6467f827313478f41877e6ef Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:16 -0700 Subject: netvm: allow the use of __GFP_MEMALLOC by specific sockets Allow specific sockets to be tagged SOCK_MEMALLOC and use __GFP_MEMALLOC for their allocations. These sockets will be able to go below watermarks and allocate from the emergency reserve. Such sockets are to be used to service the VM (iow. to swap over). They must be handled kernel side, exposing such a socket to user-space is a bug. There is a risk that the reserves be depleted so for now, the administrator is responsible for increasing min_free_kbytes as necessary to prevent deadlock for their workloads. [a.p.zijlstra@chello.nl: Original patches] Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Peter Zijlstra Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 5 ++++- net/core/sock.c | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 11ccde65c4c..bfa7d20e664 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -621,6 +621,7 @@ enum sock_flags { SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_MEMALLOC, /* VM depends on this socket for swapping */ SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */ SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */ SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */ @@ -660,7 +661,7 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) { - return GFP_ATOMIC; + return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); } static inline void sk_acceptq_removed(struct sock *sk) @@ -803,6 +804,8 @@ extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p); extern void sk_stream_wait_close(struct sock *sk, long timeo_p); extern int sk_stream_error(struct sock *sk, int flags, int err); extern void sk_stream_kill_queues(struct sock *sk); +extern void sk_set_memalloc(struct sock *sk); +extern void sk_clear_memalloc(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); diff --git a/net/core/sock.c b/net/core/sock.c index a67b06280e4..3617f652f6b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -271,6 +271,28 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +/** + * sk_set_memalloc - sets %SOCK_MEMALLOC + * @sk: socket to set it on + * + * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. + * It's the responsibility of the admin to adjust min_free_kbytes + * to meet the requirements + */ +void sk_set_memalloc(struct sock *sk) +{ + sock_set_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation |= __GFP_MEMALLOC; +} +EXPORT_SYMBOL_GPL(sk_set_memalloc); + +void sk_clear_memalloc(struct sock *sk) +{ + sock_reset_flag(sk, SOCK_MEMALLOC); + sk->sk_allocation &= ~__GFP_MEMALLOC; +} +EXPORT_SYMBOL_GPL(sk_clear_memalloc); + #if defined(CONFIG_CGROUPS) #if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; -- cgit v1.2.3-70-g09d2 From c93bdd0e03e848555d144eb44a1f275b871a8dd5 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:19 -0700 Subject: netvm: allow skb allocation to use PFMEMALLOC reserves Change the skb allocation API to indicate RX usage and use this to fall back to the PFMEMALLOC reserve when needed. SKBs allocated from the reserve are tagged in skb->pfmemalloc. If an SKB is allocated from the reserve and the socket is later found to be unrelated to page reclaim, the packet is dropped so that the memory remains available for page reclaim. Network protocols are expected to recover from this packet loss. [a.p.zijlstra@chello.nl: Ideas taken from various patches] [davem@davemloft.net: Use static branches, coding style corrections] [sebastian@breakpoint.cc: Avoid unnecessary cast, fix !CONFIG_NET build] Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Peter Zijlstra Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 ++ include/linux/skbuff.h | 14 +++++- include/net/sock.h | 15 ++++++ mm/internal.h | 3 -- net/core/filter.c | 8 ++++ net/core/skbuff.c | 124 +++++++++++++++++++++++++++++++++++++++---------- net/core/sock.c | 5 ++ 7 files changed, 142 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cbd7400e586..4883f393f50 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -385,6 +385,9 @@ void drain_local_pages(void *dummy); */ extern gfp_t gfp_allowed_mask; +/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ +bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); + extern void pm_restrict_gfp_mask(void); extern void pm_restore_gfp_mask(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d205c4be7f5..0336f02e366 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -462,6 +462,7 @@ struct sk_buff { #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif + __u8 pfmemalloc:1; __u8 ooo_okay:1; __u8 l4_rxhash:1; __u8 wifi_acked_valid:1; @@ -502,6 +503,15 @@ struct sk_buff { #include +#define SKB_ALLOC_FCLONE 0x01 +#define SKB_ALLOC_RX 0x02 + +/* Returns true if the skb was allocated from PFMEMALLOC reserves */ +static inline bool skb_pfmemalloc(const struct sk_buff *skb) +{ + return unlikely(skb->pfmemalloc); +} + /* * skb might have a dst pointer attached, refcounted or not. * _skb_refdst low order bit is set if refcount was _not_ taken @@ -565,7 +575,7 @@ extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize); extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int fclone, int node); + gfp_t priority, int flags, int node); extern struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) @@ -576,7 +586,7 @@ static inline struct sk_buff *alloc_skb(unsigned int size, static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1, NUMA_NO_NODE); + return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } extern void skb_recycle(struct sk_buff *skb); diff --git a/include/net/sock.h b/include/net/sock.h index bfa7d20e664..81198632ac2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -659,6 +659,21 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) return test_bit(flag, &sk->sk_flags); } +#ifdef CONFIG_NET +extern struct static_key memalloc_socks; +static inline int sk_memalloc_socks(void) +{ + return static_key_false(&memalloc_socks); +} +#else + +static inline int sk_memalloc_socks(void) +{ + return 0; +} + +#endif + static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) { return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); diff --git a/mm/internal.h b/mm/internal.h index eb76b67890d..3314f79d775 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -279,9 +279,6 @@ static inline struct page *mem_map_next(struct page *iter, #define __paginginit __init #endif -/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ -bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); - /* Memory initialisation debug and verification */ enum mminit_level { MMINIT_WARNING, diff --git a/net/core/filter.c b/net/core/filter.c index d4ce2dc712e..907efd27ec7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) int err; struct sk_filter *filter; + /* + * If the skb was allocated from pfmemalloc reserves, only + * allow SOCK_MEMALLOC sockets to use it as this socket is + * helping free memory + */ + if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) + return -ENOMEM; + err = security_sock_rcv_skb(sk, skb); if (err) return err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4..fe00d120816 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) BUG(); } + +/* + * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells + * the caller if emergency pfmemalloc reserves are being used. If it is and + * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves + * may be used. Otherwise, the packet data may be discarded until enough + * memory is free + */ +#define kmalloc_reserve(size, gfp, node, pfmemalloc) \ + __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) +void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, + bool *pfmemalloc) +{ + void *obj; + bool ret_pfmemalloc = false; + + /* + * Try a regular allocation, when that fails and we're not entitled + * to the reserves, fail. + */ + obj = kmalloc_node_track_caller(size, + flags | __GFP_NOMEMALLOC | __GFP_NOWARN, + node); + if (obj || !(gfp_pfmemalloc_allowed(flags))) + goto out; + + /* Try again but now we are using pfmemalloc reserves */ + ret_pfmemalloc = true; + obj = kmalloc_node_track_caller(size, flags, node); + +out: + if (pfmemalloc) + *pfmemalloc = ret_pfmemalloc; + + return obj; +} + /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. @@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask - * @fclone: allocate from fclone cache instead of head cache - * and allocate a cloned (child) skb + * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache + * instead of head cache and allocate a cloned (child) skb. + * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for + * allocations in case the data is required for writeback * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a @@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone, int node) + int flags, int node) { struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; + bool pfmemalloc; - cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; + cache = (flags & SKB_ALLOC_FCLONE) + ? skbuff_fclone_cache : skbuff_head_cache; + + if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) + gfp_mask |= __GFP_MEMALLOC; /* Get the HEAD */ skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); @@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, */ size = SKB_DATA_ALIGN(size); size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - data = kmalloc_node_track_caller(size, gfp_mask, node); + data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); if (!data) goto nodata; /* kmalloc(size) might give us more room than requested. @@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, memset(skb, 0, offsetof(struct sk_buff, tail)); /* Account for allocated memory : skb + skb->head */ skb->truesize = SKB_TRUESIZE(size); + skb->pfmemalloc = pfmemalloc; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(&shinfo->dataref, 1); kmemcheck_annotate_variable(shinfo->destructor_arg); - if (fclone) { + if (flags & SKB_ALLOC_FCLONE) { struct sk_buff *child = skb + 1; atomic_t *fclone_ref = (atomic_t *) (child + 1); @@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, atomic_set(fclone_ref, 1); child->fclone = SKB_FCLONE_UNAVAILABLE; + child->pfmemalloc = pfmemalloc; } out: return skb; @@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) -/** - * netdev_alloc_frag - allocate a page fragment - * @fragsz: fragment size - * - * Allocates a frag from a page for receive buffer. - * Uses GFP_ATOMIC allocations. - */ -void *netdev_alloc_frag(unsigned int fragsz) +static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { struct netdev_alloc_cache *nc; void *data = NULL; @@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz) nc = &__get_cpu_var(netdev_alloc_cache); if (unlikely(!nc->page)) { refill: - nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); + nc->page = alloc_page(gfp_mask); if (unlikely(!nc->page)) goto end; recycle: @@ -343,6 +382,18 @@ end: local_irq_restore(flags); return data; } + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +void *netdev_alloc_frag(unsigned int fragsz) +{ + return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); +} EXPORT_SYMBOL(netdev_alloc_frag); /** @@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { - void *data = netdev_alloc_frag(fragsz); + void *data; + + if (sk_memalloc_socks()) + gfp_mask |= __GFP_MEMALLOC; + + data = __netdev_alloc_frag(fragsz, gfp_mask); if (likely(data)) { skb = build_skb(data, fragsz); @@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, put_page(virt_to_head_page(data)); } } else { - skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, + SKB_ALLOC_RX, NUMA_NO_NODE); } if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); @@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #if IS_ENABLED(CONFIG_IP_VS) new->ipvs_property = old->ipvs_property; #endif + new->pfmemalloc = old->pfmemalloc; new->protocol = old->protocol; new->mark = old->mark; new->skb_iif = old->skb_iif; @@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_CLONE; atomic_inc(fclone_ref); } else { + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); if (!n) return NULL; @@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } +static inline int skb_alloc_rx_flag(const struct sk_buff *skb) +{ + if (skb_pfmemalloc(skb)) + return SKB_ALLOC_RX; + return 0; +} + /** * skb_copy - create private copy of an sk_buff * @skb: buffer to copy @@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { int headerlen = skb_headroom(skb); unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy); struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) { unsigned int size = skb_headlen(skb) + headroom; - struct sk_buff *n = alloc_skb(size, gfp_mask); + struct sk_buff *n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) goto out; @@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, size = SKB_DATA_ALIGN(size); - data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), - gfp_mask); + if (skb_pfmemalloc(skb)) + gfp_mask |= __GFP_MEMALLOC; + data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), + gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(ksize(data)); @@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); + struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; int off; @@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { - nskb = alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC); + nskb = __alloc_skb(hsize + doffset + headroom, + GFP_ATOMIC, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (unlikely(!nskb)) goto err; diff --git a/net/core/sock.c b/net/core/sock.c index 3617f652f6b..c8c5816289f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -271,6 +271,9 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); EXPORT_SYMBOL(sysctl_optmem_max); +struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; +EXPORT_SYMBOL_GPL(memalloc_socks); + /** * sk_set_memalloc - sets %SOCK_MEMALLOC * @sk: socket to set it on @@ -283,6 +286,7 @@ void sk_set_memalloc(struct sock *sk) { sock_set_flag(sk, SOCK_MEMALLOC); sk->sk_allocation |= __GFP_MEMALLOC; + static_key_slow_inc(&memalloc_socks); } EXPORT_SYMBOL_GPL(sk_set_memalloc); @@ -290,6 +294,7 @@ void sk_clear_memalloc(struct sock *sk) { sock_reset_flag(sk, SOCK_MEMALLOC); sk->sk_allocation &= ~__GFP_MEMALLOC; + static_key_slow_dec(&memalloc_socks); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); -- cgit v1.2.3-70-g09d2 From b4b9e3558508980fc0cd161a545ffb55a1f13ee9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:26 -0700 Subject: netvm: set PF_MEMALLOC as appropriate during SKB processing In order to make sure pfmemalloc packets receive all memory needed to proceed, ensure processing of pfmemalloc SKBs happens under PF_MEMALLOC. This is limited to a subset of protocols that are expected to be used for writing to swap. Taps are not allowed to use PF_MEMALLOC as these are expected to communicate with userspace processes which could be paged out. [a.p.zijlstra@chello.nl: Ideas taken from various patches] [jslaby@suse.cz: Lock imbalance fix] Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Peter Zijlstra Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 5 +++++ net/core/dev.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------ net/core/sock.c | 16 ++++++++++++++++ 3 files changed, 68 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 81198632ac2..43a470d40d7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -754,8 +754,13 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s return 0; } +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + return __sk_backlog_rcv(sk, skb); + return sk->sk_backlog_rcv(sk, skb); } diff --git a/net/core/dev.c b/net/core/dev.c index 0ebaea16632..ce132443d5d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3155,6 +3155,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +/* + * Limit the use of PFMEMALLOC reserves to those protocols that implement + * the special handling of PFMEMALLOC skbs. + */ +static bool skb_pfmemalloc_protocol(struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + return true; + default: + return false; + } +} + static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3164,14 +3181,27 @@ static int __netif_receive_skb(struct sk_buff *skb) bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + current->flags |= PF_MEMALLOC; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; orig_dev = skb->dev; @@ -3191,7 +3221,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { skb = vlan_untag(skb); if (unlikely(!skb)) - goto out; + goto unlock; } #ifdef CONFIG_NET_CLS_ACT @@ -3201,6 +3231,9 @@ another_round: } #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) @@ -3209,13 +3242,18 @@ another_round: } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; ncls: #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb) + && !skb_pfmemalloc_protocol(skb)) + goto drop; + rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { @@ -3225,7 +3263,7 @@ ncls: if (vlan_do_receive(&skb, !rx_handler)) goto another_round; else if (unlikely(!skb)) - goto out; + goto unlock; } if (rx_handler) { @@ -3235,7 +3273,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: - goto out; + goto unlock; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3268,6 +3306,7 @@ ncls: else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -3276,8 +3315,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } diff --git a/net/core/sock.c b/net/core/sock.c index c8c5816289f..32fdcd2d6e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -298,6 +298,22 @@ void sk_clear_memalloc(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_clear_memalloc); +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + unsigned long pflags = current->flags; + + /* these should have been dropped before queueing */ + BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); + + current->flags |= PF_MEMALLOC; + ret = sk->sk_backlog_rcv(sk, skb); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + + return ret; +} +EXPORT_SYMBOL(__sk_backlog_rcv); + #if defined(CONFIG_CGROUPS) #if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; -- cgit v1.2.3-70-g09d2 From c76562b6709fee5eff8a6a779be41c0bce661fd7 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:41 -0700 Subject: netvm: prevent a stream-specific deadlock This patch series is based on top of "Swap-over-NBD without deadlocking v15" as it depends on the same reservation of PF_MEMALLOC reserves logic. When a user or administrator requires swap for their application, they create a swap partition and file, format it with mkswap and activate it with swapon. In diskless systems this is not an option so if swap if required then swapping over the network is considered. The two likely scenarios are when blade servers are used as part of a cluster where the form factor or maintenance costs do not allow the use of disks and thin clients. The Linux Terminal Server Project recommends the use of the Network Block Device (NBD) for swap but this is not always an option. There is no guarantee that the network attached storage (NAS) device is running Linux or supports NBD. However, it is likely that it supports NFS so there are users that want support for swapping over NFS despite any performance concern. Some distributions currently carry patches that support swapping over NFS but it would be preferable to support it in the mainline kernel. Patch 1 avoids a stream-specific deadlock that potentially affects TCP. Patch 2 is a small modification to SELinux to avoid using PFMEMALLOC reserves. Patch 3 adds three helpers for filesystems to handle swap cache pages. For example, page_file_mapping() returns page->mapping for file-backed pages and the address_space of the underlying swap file for swap cache pages. Patch 4 adds two address_space_operations to allow a filesystem to pin all metadata relevant to a swapfile in memory. Upon successful activation, the swapfile is marked SWP_FILE and the address space operation ->direct_IO is used for writing and ->readpage for reading in swap pages. Patch 5 notes that patch 3 is bolting filesystem-specific-swapfile-support onto the side and that the default handlers have different information to what is available to the filesystem. This patch refactors the code so that there are generic handlers for each of the new address_space operations. Patch 6 adds an API to allow a vector of kernel addresses to be translated to struct pages and pinned for IO. Patch 7 adds support for using highmem pages for swap by kmapping the pages before calling the direct_IO handler. Patch 8 updates NFS to use the helpers from patch 3 where necessary. Patch 9 avoids setting PF_private on PG_swapcache pages within NFS. Patch 10 implements the new swapfile-related address_space operations for NFS and teaches the direct IO handler how to manage kernel addresses. Patch 11 prevents page allocator recursions in NFS by using GFP_NOIO where appropriate. Patch 12 fixes a NULL pointer dereference that occurs when using swap-over-NFS. With the patches applied, it is possible to mount a swapfile that is on an NFS filesystem. Swap performance is not great with a swap stress test taking roughly twice as long to complete than if the swap device was backed by NBD. This patch: netvm: prevent a stream-specific deadlock It could happen that all !SOCK_MEMALLOC sockets have buffered so much data that we're over the global rmem limit. This will prevent SOCK_MEMALLOC buffers from receiving data, which will prevent userspace from running, which is needed to reduce the buffered data. Fix this by exempting the SOCK_MEMALLOC sockets from the rmem limit. Once this change it applied, it is important that sockets that set SOCK_MEMALLOC do not clear the flag until the socket is being torn down. If this happens, a warning is generated and the tokens reclaimed to avoid accounting errors until the bug is fixed. [davem@davemloft.net: Warning about clearing SOCK_MEMALLOC] Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: David S. Miller Acked-by: Rik van Riel Cc: Trond Myklebust Cc: Neil Brown Cc: Christoph Hellwig Cc: Mike Christie Cc: Eric B Munson Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 8 +++++--- net/caif/caif_socket.c | 2 +- net/core/sock.c | 14 +++++++++++++- net/ipv4/tcp_input.c | 21 +++++++++++---------- net/sctp/ulpevent.c | 3 ++- 5 files changed, 32 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 43a470d40d7..b3730239bf1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1329,12 +1329,14 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size) __sk_mem_schedule(sk, size, SK_MEM_SEND); } -static inline bool sk_rmem_schedule(struct sock *sk, int size) +static inline bool +sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size) { if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_RECV); + return size<= sk->sk_forward_alloc || + __sk_mem_schedule(sk, size, SK_MEM_RECV) || + skb_pfmemalloc(skb); } static inline void sk_mem_reclaim(struct sock *sk) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 78f1cdad5b3..095259f8390 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) err = sk_filter(sk, skb); if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { set_rx_flow_off(cf_sk); net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); diff --git a/net/core/sock.c b/net/core/sock.c index 32fdcd2d6e8..6b654b3ddfd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -295,6 +295,18 @@ void sk_clear_memalloc(struct sock *sk) sock_reset_flag(sk, SOCK_MEMALLOC); sk->sk_allocation &= ~__GFP_MEMALLOC; static_key_slow_dec(&memalloc_socks); + + /* + * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward + * progress of swapping. However, if SOCK_MEMALLOC is cleared while + * it has rmem allocations there is a risk that the user of the + * socket cannot make forward progress due to exceeding the rmem + * limits. By rights, sk_clear_memalloc() should only be called + * on sockets being torn down but warn and reset the accounting if + * that assumption breaks. + */ + if (WARN_ON(sk->sk_forward_alloc)) + sk_mem_reclaim(sk); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); @@ -396,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) return err; - if (!sk_rmem_schedule(sk, skb->truesize)) { + if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a356e1fecf9..00b91b4b866 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4351,19 +4351,20 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk); -static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) +static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, + unsigned int size) { if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, size)) { + !sk_rmem_schedule(sk, skb, size)) { if (tcp_prune_queue(sk) < 0) return -1; - if (!sk_rmem_schedule(sk, size)) { + if (!sk_rmem_schedule(sk, skb, size)) { if (!tcp_prune_ofo_queue(sk)) return -1; - if (!sk_rmem_schedule(sk, size)) + if (!sk_rmem_schedule(sk, skb, size)) return -1; } } @@ -4418,7 +4419,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) TCP_ECN_check_ce(tp, skb); - if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) { + if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); __kfree_skb(skb); return; @@ -4552,17 +4553,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct tcphdr *th; bool fragstolen; - if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) - goto err; - skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); if (!skb) goto err; + if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th))) + goto err_free; + th = (struct tcphdr *)skb_put(skb, sizeof(*th)); skb_reset_transport_header(skb); memset(th, 0, sizeof(*th)); @@ -4633,7 +4634,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten <= 0) { queue_and_out: if (eaten < 0 && - tcp_try_rmem_schedule(sk, skb->truesize)) + tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto drop; eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 33d89477619..10c018a5b9f 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (rx_count >= asoc->base.sk->sk_rcvbuf) { if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || - (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) + (!sk_rmem_schedule(asoc->base.sk, chunk->skb, + chunk->skb->truesize))) goto fail; } -- cgit v1.2.3-70-g09d2 From 1485348d2424e1131ea42efc033cbd9366462b01 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 30 Jul 2012 16:11:42 +0000 Subject: tcp: Apply device TSO segment limit earlier Cache the device gso_max_segs in sock::sk_gso_max_segs and use it to limit the size of TSO skbs. This avoids the need to fall back to software GSO for local TCP senders. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ net/core/sock.c | 1 + net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_cong.c | 3 ++- net/ipv4/tcp_output.c | 21 ++++++++++++--------- 5 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index b3730239bf1..72132aef53f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -218,6 +218,7 @@ struct cg_proto; * @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK) * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) * @sk_gso_max_size: Maximum GSO segment size to build + * @sk_gso_max_segs: Maximum number of GSO segments * @sk_lingertime: %SO_LINGER l_linger setting * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct @@ -338,6 +339,7 @@ struct sock { netdev_features_t sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; + u16 sk_gso_max_segs; int sk_rcvlowat; unsigned long sk_lingertime; struct sk_buff_head sk_error_queue; diff --git a/net/core/sock.c b/net/core/sock.c index 6b654b3ddfd..8f67ced8d6a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1458,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = dst->dev->gso_max_size; + sk->sk_gso_max_segs = dst->dev->gso_max_segs; } } } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e7e6eeae49c..2109ff4a1da 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, old_size_goal + mss_now > xmit_size_goal)) { xmit_size_goal = old_size_goal; } else { - tp->xmit_size_goal_segs = xmit_size_goal / mss_now; + tp->xmit_size_goal_segs = + min_t(u16, xmit_size_goal / mss_now, + sk->sk_gso_max_segs); xmit_size_goal = tp->xmit_size_goal_segs * mss_now; } } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4d4db16e336..1432cdb0644 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left * tp->mss_cache < sk->sk_gso_max_size) + left * tp->mss_cache < sk->sk_gso_max_size && + left < sk->sk_gso_max_segs) return true; return left <= tcp_max_tso_deferred_mss(tp); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3f1bcff0b10..a7b3ec9b6c3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1522,21 +1522,21 @@ static void tcp_cwnd_validate(struct sock *sk) * when we would be allowed to send the split-due-to-Nagle skb fully. */ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, - unsigned int mss_now, unsigned int cwnd) + unsigned int mss_now, unsigned int max_segs) { const struct tcp_sock *tp = tcp_sk(sk); - u32 needed, window, cwnd_len; + u32 needed, window, max_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; - cwnd_len = mss_now * cwnd; + max_len = mss_now * max_segs; - if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) - return cwnd_len; + if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) + return max_len; needed = min(skb->len, window); - if (cwnd_len <= needed) - return cwnd_len; + if (max_len <= needed) + return max_len; return needed - needed % mss_now; } @@ -1765,7 +1765,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) limit = min(send_win, cong_win); /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= sk->sk_gso_max_size) + if (limit >= min_t(unsigned int, sk->sk_gso_max_size, + sk->sk_gso_max_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1999,7 +2000,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, - cwnd_quota); + min_t(unsigned int, + cwnd_quota, + sk->sk_gso_max_segs)); if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) -- cgit v1.2.3-70-g09d2 From e3c0d04750751389d5116267f8cf4687444d9a50 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Mon, 30 Jul 2012 21:43:54 +0000 Subject: Fix unexpected SA hard expiration after changing date After SA is setup, one timer is armed to detect soft/hard expiration, however the timer handler uses xtime to do the math. This makes hard expiration occurs first before soft expiration after setting new date with big interval. As a result new child SA is deleted before rekeying the new one. Signed-off-by: Fan Du Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++++ net/xfrm/xfrm_state.c | 21 +++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9509eb29b8..62b619e82a9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -213,6 +213,9 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct tasklet_hrtimer mtimer; + /* used to fix curlft->add_time when changing date */ + long saved_tmo; + /* Last used time */ unsigned long lastused; @@ -238,6 +241,7 @@ static inline struct net *xs_net(struct xfrm_state *x) /* xflags - make enum if more show up */ #define XFRM_TIME_DEFER 1 +#define XFRM_SOFT_EXPIRE 2 enum { XFRM_STATE_VOID, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b..87cd0e4d428 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.hard_add_expires_seconds) { long tmo = x->lft.hard_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) - goto expired; + if (tmo <= 0) { + if (x->xflags & XFRM_SOFT_EXPIRE) { + /* enter hard expire without soft expire first?! + * setting a new date could trigger this. + * workarbound: fix x->curflt.add_time by below: + */ + x->curlft.add_time = now - x->saved_tmo - 1; + tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; + } else + goto expired; + } if (tmo < next) next = tmo; } @@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) if (x->lft.soft_add_expires_seconds) { long tmo = x->lft.soft_add_expires_seconds + x->curlft.add_time - now; - if (tmo <= 0) + if (tmo <= 0) { warn = 1; - else if (tmo < next) + x->xflags &= ~XFRM_SOFT_EXPIRE; + } else if (tmo < next) { next = tmo; + x->xflags |= XFRM_SOFT_EXPIRE; + x->saved_tmo = tmo; + } } if (x->lft.soft_use_expires_seconds) { long tmo = x->lft.soft_use_expires_seconds + -- cgit v1.2.3-70-g09d2 From 03f6b0843ad6512f27bc2e48f04c21065311e03e Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Wed, 1 Aug 2012 15:58:42 -0500 Subject: cfg80211: add channel flag to prohibit OFDM operation Currently the only way for wireless drivers to tell whether or not OFDM is allowed on the current channel is to check the regulatory information. However, this requires hodling cfg80211_mutex, which is not visible to the drivers. Other regulatory restrictions are provided as flags in the channel definition, so let's do similarly with OFDM. This patch adds a new flag, IEEE80211_CHAN_NO_OFDM, to tell drivers that OFDM on a channel is not allowed. This flag is set on any channels for which regulatory indicates that OFDM is prohibited. Signed-off-by: Seth Forshee Tested-by: Arend van Spriel Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/reg.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 493fa0c7900..3d254e10ff3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -96,6 +96,7 @@ enum ieee80211_band { * is not permitted. * @IEEE80211_CHAN_NO_HT40MINUS: extension channel below this channel * is not permitted. + * @IEEE80211_CHAN_NO_OFDM: OFDM is not allowed on this channel. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -104,6 +105,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_RADAR = 1<<3, IEEE80211_CHAN_NO_HT40PLUS = 1<<4, IEEE80211_CHAN_NO_HT40MINUS = 1<<5, + IEEE80211_CHAN_NO_OFDM = 1<<6, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index a9175fedeb5..cbf30de79c6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -680,6 +680,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_IBSS; if (rd_flags & NL80211_RRF_DFS) channel_flags |= IEEE80211_CHAN_RADAR; + if (rd_flags & NL80211_RRF_NO_OFDM) + channel_flags |= IEEE80211_CHAN_NO_OFDM; return channel_flags; } -- cgit v1.2.3-70-g09d2 From 5d299f3d3c8a2fbc732b1bf03af36333ccec3130 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Aug 2012 05:09:33 +0000 Subject: net: ipv6: fix TCP early demux IPv6 needs a cookie in dst_check() call. We need to add rx_dst_cookie and provide a family independent sk_rx_dst_set(sk, skb) method to properly support IPv6 TCP early demux. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/inet_connection_sock.h | 1 + include/net/inet_sock.h | 9 --------- net/ipv4/tcp_input.c | 4 +++- net/ipv4/tcp_ipv4.c | 13 ++++++++++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 27 +++++++++++++++++++++++++-- 7 files changed, 41 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 379e433e15e..879db26ec40 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -369,6 +369,7 @@ struct ipv6_pinfo { __u8 rcv_tclass; __u32 dst_cookie; + __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5ee66f517b4..ba1d3615acb 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -39,6 +39,7 @@ struct inet_connection_sock_af_ops { int (*queue_xmit)(struct sk_buff *skb, struct flowi *fl); void (*send_check)(struct sock *sk, struct sk_buff *skb); int (*rebuild_header)(struct sock *sk); + void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb); int (*conn_request)(struct sock *sk, struct sk_buff *skb); struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 83b567fe194..613cfa40167 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -249,13 +249,4 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) return flags; } -static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - dst_hold(dst); - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; -} - #endif /* _INET_SOCK_H */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2fd2bc9e3c6..85308b90df8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5392,6 +5392,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); + if (unlikely(sk->sk_rx_dst == NULL)) + inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. * The code loosely follows the one in the famous @@ -5605,7 +5607,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); if (skb != NULL) { - inet_sk_rx_dst_set(sk, skb); + icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 42b2a6a7309..272241f16fc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1627,9 +1627,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - if (unlikely(sk->sk_rx_dst == NULL)) - inet_sk_rx_dst_set(sk, skb); - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1872,10 +1869,20 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; +} + const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .net_header_len = sizeof(struct iphdr), diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 232a90c3ec8..d9c9dcef2de 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,7 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - inet_sk_rx_dst_set(newsk, skb); + newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb); /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c66b90f71c9..5a439e9a4c0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1447,7 +1447,17 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); + if (dst) { + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1705,9 +1715,9 @@ static void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; struct inet_sock *icsk = inet_sk(sk); if (dst) - dst = dst_check(dst, 0); + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == inet6_iif(skb)) + icsk->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1719,10 +1729,23 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, + .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), -- cgit v1.2.3-70-g09d2 From a37e6e344910a43b9ebc2bbf29a029f5ea942598 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Aug 2012 10:55:45 +0000 Subject: net: force dst_default_metrics to const section While investigating on network performance problems, I found this little gem : $ nm -v vmlinux | grep -1 dst_default_metrics ffffffff82736540 b busy.46605 ffffffff82736560 B dst_default_metrics ffffffff82736598 b dst_busy_list Apparently, declaring a const array without initializer put it in (writeable) bss section, in middle of possibly often dirtied cache lines. Since we really want dst_default_metrics be const to avoid any possible false sharing and catch any buggy writes, I force a null initializer. ffffffff818a4c20 R dst_default_metrics Signed-off-by: Eric Dumazet Cc: Ben Hutchings Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- net/core/dst.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index baf59789006..621e3513ef5 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -110,7 +110,7 @@ struct dst_entry { }; extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); -extern const u32 dst_default_metrics[RTAX_MAX]; +extern const u32 dst_default_metrics[]; #define DST_METRICS_READ_ONLY 0x1UL #define __DST_METRICS_PTR(Y) \ diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d2941..56d63612e1e 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX + 1] = { + /* This initializer is needed to force linker to place this variable + * into const section. Otherwise it might end into bss section. + * We really want to avoid false sharing on this variable, and catch + * any writes on it. + */ + [RTAX_MAX] = 0xdeadbeef, +}; + void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, unsigned short flags) -- cgit v1.2.3-70-g09d2 From 63d02d157ec4124990258d66517b6c11fd6df0cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Aug 2012 14:11:00 +0000 Subject: net: tcp: ipv6_mapped needs sk_rx_dst_set method commit 5d299f3d3c8a2fb (net: ipv6: fix TCP early demux) added a regression for ipv6_mapped case. [ 67.422369] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 67.449678] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 92.631060] BUG: unable to handle kernel NULL pointer dereference at (null) [ 92.631435] IP: [< (null)>] (null) [ 92.631645] PGD 0 [ 92.631846] Oops: 0010 [#1] SMP [ 92.632095] Modules linked in: autofs4 sunrpc ipv6 dm_mirror dm_region_hash dm_log dm_multipath dm_mod video sbs sbshc battery ac lp parport sg snd_hda_intel snd_hda_codec snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device pcspkr snd_pcm_oss snd_mixer_oss snd_pcm snd_timer serio_raw button floppy snd i2c_i801 i2c_core soundcore snd_page_alloc shpchp ide_cd_mod cdrom microcode ehci_hcd ohci_hcd uhci_hcd [ 92.634294] CPU 0 [ 92.634294] Pid: 4469, comm: sendmail Not tainted 3.6.0-rc1 #3 [ 92.634294] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 92.634294] RSP: 0018:ffff880245fc7cb0 EFLAGS: 00010282 [ 92.634294] RAX: ffffffffa01985f0 RBX: ffff88024827ad00 RCX: 0000000000000000 [ 92.634294] RDX: 0000000000000218 RSI: ffff880254735380 RDI: ffff88024827ad00 [ 92.634294] RBP: ffff880245fc7cc8 R08: 0000000000000001 R09: 0000000000000000 [ 92.634294] R10: 0000000000000000 R11: ffff880245fc7bf8 R12: ffff880254735380 [ 92.634294] R13: ffff880254735380 R14: 0000000000000000 R15: 7fffffffffff0218 [ 92.634294] FS: 00007f4516ccd6f0(0000) GS:ffff880256600000(0000) knlGS:0000000000000000 [ 92.634294] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 92.634294] CR2: 0000000000000000 CR3: 0000000245ed1000 CR4: 00000000000007f0 [ 92.634294] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 92.634294] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 92.634294] Process sendmail (pid: 4469, threadinfo ffff880245fc6000, task ffff880254b8cac0) [ 92.634294] Stack: [ 92.634294] ffffffff813837a7 ffff88024827ad00 ffff880254b6b0e8 ffff880245fc7d68 [ 92.634294] ffffffff81385083 00000000001d2680 ffff8802547353a8 ffff880245fc7d18 [ 92.634294] ffffffff8105903a ffff88024827ad60 0000000000000002 00000000000000ff [ 92.634294] Call Trace: [ 92.634294] [] ? tcp_finish_connect+0x2c/0xfa [ 92.634294] [] tcp_rcv_state_process+0x2b6/0x9c6 [ 92.634294] [] ? sched_clock_cpu+0xc3/0xd1 [ 92.634294] [] ? local_clock+0x2b/0x3c [ 92.634294] [] tcp_v4_do_rcv+0x63a/0x670 [ 92.634294] [] release_sock+0x128/0x1bd [ 92.634294] [] __inet_stream_connect+0x1b1/0x352 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? wake_up_bit+0x25/0x25 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? inet_stream_connect+0x22/0x4b [ 92.634294] [] inet_stream_connect+0x33/0x4b [ 92.634294] [] sys_connect+0x78/0x9e [ 92.634294] [] ? sysret_check+0x1b/0x56 [ 92.634294] [] ? __audit_syscall_entry+0x195/0x1c8 [ 92.634294] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 92.634294] [] system_call_fastpath+0x16/0x1b [ 92.634294] Code: Bad RIP value. [ 92.634294] RIP [< (null)>] (null) [ 92.634294] RSP [ 92.634294] CR2: 0000000000000000 [ 92.648982] ---[ end trace 24e2bed94314c8d9 ]--- [ 92.649146] Kernel panic - not syncing: Fatal exception in interrupt Fix this using inet_sk_rx_dst_set(), and export this function in case IPv6 is modular. Reported-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e19124b84cd..1f000ffe707 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -464,6 +464,7 @@ extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 272241f16fc..76782376401 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1869,7 +1869,7 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -1877,6 +1877,7 @@ static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } +EXPORT_SYMBOL(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a439e9a4c0..bb9ce2b2f37 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1777,6 +1777,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), -- cgit v1.2.3-70-g09d2 From b5ec8eeac46a99004c26791f70b15d001e970acf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Aug 2012 02:22:47 +0000 Subject: ipv4: fix ip_send_skb() ip_send_skb() can send orphaned skb, so we must pass the net pointer to avoid possible NULL dereference in error path. Bug added by commit 3a7c384ffd57 (ipv4: tcp: unicast_sock should not land outside of TCP stack) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_output.c | 5 ++--- net/ipv4/udp.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index bd5e444a19c..5a5d84d3d2c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -120,7 +120,7 @@ extern struct sk_buff *__ip_make_skb(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork); -extern int ip_send_skb(struct sk_buff *skb); +extern int ip_send_skb(struct net *net, struct sk_buff *skb); extern int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4); extern void ip_flush_pending_frames(struct sock *sk); extern struct sk_buff *ip_make_skb(struct sock *sk, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ec410e08b4b..147ccc3e93d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1366,9 +1366,8 @@ out: return skb; } -int ip_send_skb(struct sk_buff *skb) +int ip_send_skb(struct net *net, struct sk_buff *skb) { - struct net *net = sock_net(skb->sk); int err; err = ip_local_out(skb); @@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) return 0; /* Netfilter gets whole the not fragmented skb. */ - return ip_send_skb(skb); + return ip_send_skb(sock_net(sk), skb); } /* diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991..6f6d1aca3c3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->check = CSUM_MANGLED_0; send: - err = ip_send_skb(skb); + err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), -- cgit v1.2.3-70-g09d2 From 2359a47671fc4fb0fe5e9945f76c2cb10792c0f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Jul 2012 20:52:21 +0000 Subject: codel: refine one condition to avoid a nul rec_inv_sqrt One condition before codel_Newton_step() was not good if we never left the dropping state for a flow. As a result rec_inv_sqrt was 0, instead of the ~0 initial value. codel control law was then set to a very aggressive mode, dropping many packets before reaching 'target' and recovering from this problem. To keep codel_vars_init() as efficient as possible, refine the condition to make sure rec_inv_sqrt initial value is correct Many thanks to Anton Mich for discovering the issue and suggesting a fix. Reported-by: Anton Mich Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/codel.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/codel.h b/include/net/codel.h index 550debfc240..389cf621161 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -305,6 +305,8 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, } } } else if (drop) { + u32 delta; + if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; } else { @@ -320,9 +322,11 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, * assume that the drop rate that controlled the queue on the * last cycle is a good starting point to control it now. */ - if (codel_time_before(now - vars->drop_next, + delta = vars->count - vars->lastcount; + if (delta > 1 && + codel_time_before(now - vars->drop_next, 16 * params->interval)) { - vars->count = (vars->count - vars->lastcount) | 1; + vars->count = delta; /* we dont care if rec_inv_sqrt approximation * is not very precise : * Next Newton steps will correct it quadratically. -- cgit v1.2.3-70-g09d2