aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/filter.txt13
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h12
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.c7
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c92
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c31
-rw-r--r--drivers/net/ethernet/neterion/s2io.c9
-rw-r--r--include/linux/ethtool.h13
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/netlink.h3
-rw-r--r--include/uapi/linux/audit.h8
-rw-r--r--include/uapi/linux/capability.h7
-rw-r--r--include/uapi/linux/ethtool.h32
-rw-r--r--include/uapi/linux/filter.h3
-rw-r--r--kernel/audit.c64
-rw-r--r--net/8021q/vlan_dev.c58
-rw-r--r--net/core/ethtool.c221
-rw-r--r--net/core/filter.c12
-rw-r--r--net/ipv4/tcp_output.c34
-rw-r--r--net/ipv6/addrconf.c20
-rw-r--r--net/netfilter/nfnetlink.c10
-rw-r--r--net/netlink/af_netlink.c70
-rw-r--r--net/netlink/af_netlink.h6
-rw-r--r--net/tipc/bcast.c37
-rw-r--r--net/tipc/bcast.h5
-rw-r--r--net/tipc/bearer.c117
-rw-r--r--net/tipc/bearer.h12
-rw-r--r--net/tipc/config.c6
-rw-r--r--net/tipc/core.h2
-rw-r--r--net/tipc/discover.c70
-rw-r--r--net/tipc/discover.h1
-rw-r--r--net/tipc/link.c89
-rw-r--r--net/tipc/link.h6
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/net.c61
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/node.c10
-rw-r--r--security/selinux/include/classmap.h2
-rw-r--r--tools/net/bpf_exp.l1
-rw-r--r--tools/net/bpf_exp.y11
40 files changed, 824 insertions, 338 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 81f940f4e88..82e1cb0b3da 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -281,6 +281,7 @@ Possible BPF extensions are shown in the following table:
cpu raw_smp_processor_id()
vlan_tci vlan_tx_tag_get(skb)
vlan_pr vlan_tx_tag_present(skb)
+ rand prandom_u32()
These extensions can also be prefixed with '#'.
Examples for low-level BPF:
@@ -308,6 +309,18 @@ Examples for low-level BPF:
ret #-1
drop: ret #0
+** icmp random packet sampling, 1 in 4
+ ldh [12]
+ jne #0x800, drop
+ ldb [23]
+ jneq #1, drop
+ # get a random uint32 number
+ ld rand
+ mod #4
+ jneq #1, drop
+ ret #-1
+ drop: ret #0
+
** SECCOMP filter example:
ld [4] /* offsetof(struct seccomp_data, arch) */
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 97db5a7179d..2f67c7c8d41 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -120,6 +120,9 @@ static inline char *nic_name(struct pci_dev *pdev)
#define MAX_VFS 30 /* Max VFs supported by BE3 FW */
#define FW_VER_LEN 32
+#define RSS_INDIR_TABLE_LEN 128
+#define RSS_HASH_KEY_LEN 40
+
struct be_dma_mem {
void *va;
dma_addr_t dma;
@@ -409,6 +412,13 @@ struct be_resources {
u32 if_cap_flags;
};
+struct rss_info {
+ u64 rss_flags;
+ u8 rsstable[RSS_INDIR_TABLE_LEN];
+ u8 rss_queue[RSS_INDIR_TABLE_LEN];
+ u8 rss_hkey[RSS_HASH_KEY_LEN];
+};
+
struct be_adapter {
struct pci_dev *pdev;
struct net_device *netdev;
@@ -507,7 +517,7 @@ struct be_adapter {
u32 msg_enable;
int be_get_temp_freq;
u8 pf_number;
- u64 rss_flags;
+ struct rss_info rss_info;
};
#define be_physfn(adapter) (!adapter->virtfn)
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index d1ec15af0d2..07e78e89a34 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2020,13 +2020,10 @@ int be_cmd_reset_function(struct be_adapter *adapter)
}
int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
- u32 rss_hash_opts, u16 table_size)
+ u32 rss_hash_opts, u16 table_size, u8 *rss_hkey)
{
struct be_mcc_wrb *wrb;
struct be_cmd_req_rss_config *req;
- u32 myhash[10] = {0x15d43fa5, 0x2534685a, 0x5f87693a, 0x5668494e,
- 0x33cf6a53, 0x383334c6, 0x76ac4257, 0x59b242b2,
- 0x3ea83c02, 0x4a110304};
int status;
if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
@@ -2049,7 +2046,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
req->hdr.version = 1;
memcpy(req->cpu_table, rsstable, table_size);
- memcpy(req->hash, myhash, sizeof(myhash));
+ memcpy(req->hash, rss_hkey, RSS_HASH_KEY_LEN);
be_dws_cpu_to_le(req->hash, sizeof(req->hash));
status = be_mbox_notify_wait(adapter);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index b60e4d53c1c..4ea79b9c67e 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2068,7 +2068,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
u32 *function_mode, u32 *function_caps, u16 *asic_rev);
int be_cmd_reset_function(struct be_adapter *adapter);
int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
- u32 rss_hash_opts, u16 table_size);
+ u32 rss_hash_opts, u16 table_size, u8 *rss_hkey);
int be_process_mcc(struct be_adapter *adapter);
int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
u8 status, u8 state);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 15ba96cba65..6f3494e4151 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -933,27 +933,27 @@ static u64 be_get_rss_hash_opts(struct be_adapter *adapter, u64 flow_type)
switch (flow_type) {
case TCP_V4_FLOW:
- if (adapter->rss_flags & RSS_ENABLE_IPV4)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV4)
data |= RXH_IP_DST | RXH_IP_SRC;
- if (adapter->rss_flags & RSS_ENABLE_TCP_IPV4)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_TCP_IPV4)
data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
break;
case UDP_V4_FLOW:
- if (adapter->rss_flags & RSS_ENABLE_IPV4)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV4)
data |= RXH_IP_DST | RXH_IP_SRC;
- if (adapter->rss_flags & RSS_ENABLE_UDP_IPV4)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_UDP_IPV4)
data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
break;
case TCP_V6_FLOW:
- if (adapter->rss_flags & RSS_ENABLE_IPV6)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV6)
data |= RXH_IP_DST | RXH_IP_SRC;
- if (adapter->rss_flags & RSS_ENABLE_TCP_IPV6)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_TCP_IPV6)
data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
break;
case UDP_V6_FLOW:
- if (adapter->rss_flags & RSS_ENABLE_IPV6)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_IPV6)
data |= RXH_IP_DST | RXH_IP_SRC;
- if (adapter->rss_flags & RSS_ENABLE_UDP_IPV6)
+ if (adapter->rss_info.rss_flags & RSS_ENABLE_UDP_IPV6)
data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
break;
}
@@ -992,7 +992,7 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter,
struct be_rx_obj *rxo;
int status = 0, i, j;
u8 rsstable[128];
- u32 rss_flags = adapter->rss_flags;
+ u32 rss_flags = adapter->rss_info.rss_flags;
if (cmd->data != L3_RSS_FLAGS &&
cmd->data != (L3_RSS_FLAGS | L4_RSS_FLAGS))
@@ -1039,7 +1039,7 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter,
return -EINVAL;
}
- if (rss_flags == adapter->rss_flags)
+ if (rss_flags == adapter->rss_info.rss_flags)
return status;
if (be_multi_rxq(adapter)) {
@@ -1051,9 +1051,11 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter,
}
}
}
- status = be_cmd_rss_config(adapter, rsstable, rss_flags, 128);
+
+ status = be_cmd_rss_config(adapter, adapter->rss_info.rsstable,
+ rss_flags, 128, adapter->rss_info.rss_hkey);
if (!status)
- adapter->rss_flags = rss_flags;
+ adapter->rss_info.rss_flags = rss_flags;
return status;
}
@@ -1103,6 +1105,68 @@ static int be_set_channels(struct net_device *netdev,
return be_update_queues(adapter);
}
+static u32 be_get_rxfh_indir_size(struct net_device *netdev)
+{
+ return RSS_INDIR_TABLE_LEN;
+}
+
+static u32 be_get_rxfh_key_size(struct net_device *netdev)
+{
+ return RSS_HASH_KEY_LEN;
+}
+
+static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
+{
+ struct be_adapter *adapter = netdev_priv(netdev);
+ int i;
+ struct rss_info *rss = &adapter->rss_info;
+
+ if (indir) {
+ for (i = 0; i < RSS_INDIR_TABLE_LEN; i++)
+ indir[i] = rss->rss_queue[i];
+ }
+
+ if (hkey)
+ memcpy(hkey, rss->rss_hkey, RSS_HASH_KEY_LEN);
+
+ return 0;
+}
+
+static int be_set_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
+{
+ int rc = 0, i, j;
+ struct be_adapter *adapter = netdev_priv(netdev);
+ u8 rsstable[RSS_INDIR_TABLE_LEN];
+
+ if (indir) {
+ struct be_rx_obj *rxo;
+ for (i = 0; i < RSS_INDIR_TABLE_LEN; i++) {
+ j = indir[i];
+ rxo = &adapter->rx_obj[j];
+ rsstable[i] = rxo->rss_id;
+ adapter->rss_info.rss_queue[i] = j;
+ }
+ } else {
+ memcpy(rsstable, adapter->rss_info.rsstable,
+ RSS_INDIR_TABLE_LEN);
+ }
+
+ if (!hkey)
+ hkey = adapter->rss_info.rss_hkey;
+
+ rc = be_cmd_rss_config(adapter, rsstable,
+ adapter->rss_info.rss_flags,
+ RSS_INDIR_TABLE_LEN, hkey);
+ if (rc) {
+ adapter->rss_info.rss_flags = RSS_ENABLE_NONE;
+ return -EIO;
+ }
+ memcpy(adapter->rss_info.rss_hkey, hkey, RSS_HASH_KEY_LEN);
+ memcpy(adapter->rss_info.rsstable, rsstable,
+ RSS_INDIR_TABLE_LEN);
+ return 0;
+}
+
const struct ethtool_ops be_ethtool_ops = {
.get_settings = be_get_settings,
.get_drvinfo = be_get_drvinfo,
@@ -1129,6 +1193,10 @@ const struct ethtool_ops be_ethtool_ops = {
.self_test = be_self_test,
.get_rxnfc = be_get_rxnfc,
.set_rxnfc = be_set_rxnfc,
+ .get_rxfh_indir_size = be_get_rxfh_indir_size,
+ .get_rxfh_key_size = be_get_rxfh_key_size,
+ .get_rxfh = be_get_rxfh,
+ .set_rxfh = be_set_rxfh,
.get_channels = be_get_channels,
.set_channels = be_set_channels
};
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index a18645407d2..a3c6a27d13f 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2774,7 +2774,8 @@ static int be_rx_qs_create(struct be_adapter *adapter)
{
struct be_rx_obj *rxo;
int rc, i, j;
- u8 rsstable[128];
+ u8 rss_hkey[RSS_HASH_KEY_LEN];
+ struct rss_info *rss = &adapter->rss_info;
for_all_rx_queues(adapter, rxo, i) {
rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
@@ -2799,31 +2800,37 @@ static int be_rx_qs_create(struct be_adapter *adapter)
}
if (be_multi_rxq(adapter)) {
- for (j = 0; j < 128; j += adapter->num_rx_qs - 1) {
+ for (j = 0; j < RSS_INDIR_TABLE_LEN;
+ j += adapter->num_rx_qs - 1) {
for_all_rss_queues(adapter, rxo, i) {
- if ((j + i) >= 128)
+ if ((j + i) >= RSS_INDIR_TABLE_LEN)
break;
- rsstable[j + i] = rxo->rss_id;
+ rss->rsstable[j + i] = rxo->rss_id;
+ rss->rss_queue[j + i] = i;
}
}
- adapter->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
- RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
+ rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
+ RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
if (!BEx_chip(adapter))
- adapter->rss_flags |= RSS_ENABLE_UDP_IPV4 |
- RSS_ENABLE_UDP_IPV6;
+ rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
+ RSS_ENABLE_UDP_IPV6;
} else {
/* Disable RSS, if only default RX Q is created */
- adapter->rss_flags = RSS_ENABLE_NONE;
+ rss->rss_flags = RSS_ENABLE_NONE;
}
- rc = be_cmd_rss_config(adapter, rsstable, adapter->rss_flags,
- 128);
+ get_random_bytes(rss_hkey, RSS_HASH_KEY_LEN);
+ rc = be_cmd_rss_config(adapter, rss->rsstable,
+ rss->rss_flags,
+ 128, rss_hkey);
if (rc) {
- adapter->rss_flags = RSS_ENABLE_NONE;
+ rss->rss_flags = RSS_ENABLE_NONE;
return rc;
}
+ memcpy(rss->rss_hkey, rss_hkey, RSS_HASH_KEY_LEN);
+
/* First time posting */
for_all_rx_queues(adapter, rxo, i)
be_post_rx_frags(rxo, GFP_KERNEL);
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index a2844ff322c..e900c1abdef 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -534,15 +534,6 @@ static inline void s2io_start_all_tx_queue(struct s2io_nic *sp)
netif_tx_start_all_queues(sp->dev);
}
-static inline void s2io_start_tx_queue(struct s2io_nic *sp, int fifo_no)
-{
- if (!sp->config.multiq)
- sp->mac_control.fifos[fifo_no].queue_state =
- FIFO_QUEUE_START;
-
- netif_tx_start_all_queues(sp->dev);
-}
-
static inline void s2io_wake_all_tx_queue(struct s2io_nic *sp)
{
if (!sp->config.multiq) {
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 0a114d05f68..212f537fc68 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -154,13 +154,23 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
* @reset: Reset (part of) the device, as specified by a bitmask of
* flags from &enum ethtool_reset_flags. Returns a negative
* error code or zero.
+ * @get_rxfh_key_size: Get the size of the RX flow hash key.
+ * Returns zero if not supported for this specific device.
* @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
* Returns zero if not supported for this specific device.
* @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
* Will not be called if @get_rxfh_indir_size returns zero.
+ * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
+ * key.
+ * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
+ * returns zero.
* Returns a negative error code or zero.
* @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
* Will not be called if @get_rxfh_indir_size returns zero.
+ * @set_rxfh: Set the contents of the RX flow hash indirection table and
+ * hash key.
+ * Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
+ * returns zero.
* Returns a negative error code or zero.
* @get_channels: Get number of channels.
* @set_channels: Set number of channels. Returns a negative error code or
@@ -232,7 +242,10 @@ struct ethtool_ops {
int (*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
int (*flash_device)(struct net_device *, struct ethtool_flash *);
int (*reset)(struct net_device *, u32 *);
+ u32 (*get_rxfh_key_size)(struct net_device *);
u32 (*get_rxfh_indir_size)(struct net_device *);
+ int (*get_rxfh)(struct net_device *, u32 *, u8 *);
+ int (*set_rxfh)(struct net_device *, u32 *, u8 *);
int (*get_rxfh_indir)(struct net_device *, u32 *);
int (*set_rxfh_indir)(struct net_device *, const u32 *);
void (*get_channels)(struct net_device *, struct ethtool_channels *);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 024fd03e5d1..759abf78dd6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -223,6 +223,7 @@ enum {
BPF_S_ANC_VLAN_TAG,
BPF_S_ANC_VLAN_TAG_PRESENT,
BPF_S_ANC_PAY_OFFSET,
+ BPF_S_ANC_RANDOM,
};
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index aad8eeaf416..5146ce06649 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -45,7 +45,8 @@ struct netlink_kernel_cfg {
unsigned int flags;
void (*input)(struct sk_buff *skb);
struct mutex *cb_mutex;
- void (*bind)(int group);
+ int (*bind)(int group);
+ void (*unbind)(int group);
bool (*compare)(struct net *net, struct sock *sk);
};
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 11917f747cb..dfa4c860cce 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -373,6 +373,14 @@ enum {
*/
#define AUDIT_MESSAGE_TEXT_MAX 8560
+/* Multicast Netlink socket groups (default up to 32) */
+enum audit_nlgrps {
+ AUDIT_NLGRP_NONE, /* Group 0 not used */
+ AUDIT_NLGRP_READLOG, /* "best effort" read only socket */
+ __AUDIT_NLGRP_MAX
+};
+#define AUDIT_NLGRP_MAX (__AUDIT_NLGRP_MAX - 1)
+
struct audit_status {
__u32 mask; /* Bit mask for valid entries */
__u32 enabled; /* 1 = enabled, 0 = disabled */
diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index 154dd6d3c8f..12c37a197d2 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -347,7 +347,12 @@ struct vfs_cap_data {
#define CAP_BLOCK_SUSPEND 36
-#define CAP_LAST_CAP CAP_BLOCK_SUSPEND
+/* Allow reading the audit log via multicast netlink socket */
+
+#define CAP_AUDIT_READ 37
+
+
+#define CAP_LAST_CAP CAP_AUDIT_READ
#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index fd161e91b6d..d47d31d6fa0 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -847,6 +847,35 @@ struct ethtool_rxfh_indir {
};
/**
+ * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
+ * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
+ * @rss_context: RSS context identifier.
+ * @indir_size: On entry, the array size of the user buffer, which may be zero.
+ * On return from %ETHTOOL_GRSSH, the array size of the hardware
+ * indirection table.
+ * @key_size: On entry, the array size of the user buffer in bytes,
+ * which may be zero.
+ * On return from %ETHTOOL_GRSSH, the size of the RSS hash key.
+ * @rsvd: Reserved for future extensions.
+ * @rss_config: RX ring/queue index for each hash value i.e., indirection table
+ * of size @indir_size followed by hash key of size @key_size.
+ *
+ * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
+ * size should be returned. For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF
+ * means that indir table setting is not requested and a @indir_size of zero
+ * means the indir table should be reset to default values. This last feature
+ * is not supported by the original implementations.
+ */
+struct ethtool_rxfh {
+ __u32 cmd;
+ __u32 rss_context;
+ __u32 indir_size;
+ __u32 key_size;
+ __u32 rsvd[2];
+ __u32 rss_config[0];
+};
+
+/**
* struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
* @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
* @h_u: Flow field values to match (dependent on @flow_type)
@@ -1118,6 +1147,9 @@ enum ethtool_sfeatures_retval_bits {
#define ETHTOOL_GEEE 0x00000044 /* Get EEE settings */
#define ETHTOOL_SEEE 0x00000045 /* Set EEE settings */
+#define ETHTOOL_GRSSH 0x00000046 /* Get RX flow hash configuration */
+#define ETHTOOL_SRSSH 0x00000047 /* Set RX flow hash configuration */
+
/* compatibility with older code */
#define SPARC_ETH_GSET ETHTOOL_GSET
#define SPARC_ETH_SSET ETHTOOL_SSET
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 8eb9ccaa5b4..253b4d42cf2 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -130,7 +130,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */
#define SKF_AD_VLAN_TAG 44
#define SKF_AD_VLAN_TAG_PRESENT 48
#define SKF_AD_PAY_OFFSET 52
-#define SKF_AD_MAX 56
+#define SKF_AD_RANDOM 56
+#define SKF_AD_MAX 60
#define SKF_NET_OFF (-0x100000)
#define SKF_LL_OFF (-0x200000)
diff --git a/kernel/audit.c b/kernel/audit.c
index 7c2893602d0..33531d72e4a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -424,6 +424,38 @@ static void kauditd_send_skb(struct sk_buff *skb)
}
/*
+ * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+ *
+ * This function doesn't consume an skb as might be expected since it has to
+ * copy it anyways.
+ */
+static void kauditd_send_multicast_skb(struct sk_buff *skb)
+{
+ struct sk_buff *copy;
+ struct audit_net *aunet = net_generic(&init_net, audit_net_id);
+ struct sock *sock = aunet->nlsk;
+
+ if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
+ return;
+
+ /*
+ * The seemingly wasteful skb_copy() rather than bumping the refcount
+ * using skb_get() is necessary because non-standard mods are made to
+ * the skb by the original kaudit unicast socket send routine. The
+ * existing auditd daemon assumes this breakage. Fixing this would
+ * require co-ordinating a change in the established protocol between
+ * the kaudit kernel subsystem and the auditd userspace code. There is
+ * no reason for new multicast clients to continue with this
+ * non-compliance.
+ */
+ copy = skb_copy(skb, GFP_KERNEL);
+ if (!copy)
+ return;
+
+ nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
+}
+
+/*
* flush_hold_queue - empty the hold queue if auditd appears
*
* If auditd just started, drain the queue of messages already
@@ -1076,10 +1108,22 @@ static void audit_receive(struct sk_buff *skb)
mutex_unlock(&audit_cmd_mutex);
}
+/* Run custom bind function on netlink socket group connect or bind requests. */
+static int audit_bind(int group)
+{
+ if (!capable(CAP_AUDIT_READ))
+ return -EPERM;
+
+ return 0;
+}
+
static int __net_init audit_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = audit_receive,
+ .bind = audit_bind,
+ .flags = NL_CFG_F_NONROOT_RECV,
+ .groups = AUDIT_NLGRP_MAX,
};
struct audit_net *aunet = net_generic(net, audit_net_id);
@@ -1901,10 +1945,10 @@ out:
* audit_log_end - end one audit record
* @ab: the audit_buffer
*
- * The netlink_* functions cannot be called inside an irq context, so
- * the audit buffer is placed on a queue and a tasklet is scheduled to
- * remove them from the queue outside the irq context. May be called in
- * any context.
+ * netlink_unicast() cannot be called inside an irq context because it blocks
+ * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
+ * on a queue and a tasklet is scheduled to remove them from the queue outside
+ * the irq context. May be called in any context.
*/
void audit_log_end(struct audit_buffer *ab)
{
@@ -1914,6 +1958,18 @@ void audit_log_end(struct audit_buffer *ab)
audit_log_lost("rate limit exceeded");
} else {
struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
+
+ kauditd_send_multicast_skb(ab->skb);
+
+ /*
+ * The original kaudit unicast socket sends up messages with
+ * nlmsg_len set to the payload length rather than the entire
+ * message length. This breaks the standard set by netlink.
+ * The existing auditd daemon assumes this breakage. Fixing
+ * this would require co-ordinating a change in the established
+ * protocol between the kaudit kernel subsystem and the auditd
+ * userspace code.
+ */
nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN;
if (audit_pid) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 733ec283ed1..8f025afa29f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -706,38 +706,36 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
+ struct vlan_pcpu_stats *p;
+ u32 rx_errors = 0, tx_dropped = 0;
+ int i;
- if (vlan_dev_priv(dev)->vlan_pcpu_stats) {
- struct vlan_pcpu_stats *p;
- u32 rx_errors = 0, tx_dropped = 0;
- int i;
-
- for_each_possible_cpu(i) {
- u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
- unsigned int start;
-
- p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
- do {
- start = u64_stats_fetch_begin_irq(&p->syncp);
- rxpackets = p->rx_packets;
- rxbytes = p->rx_bytes;
- rxmulticast = p->rx_multicast;
- txpackets = p->tx_packets;
- txbytes = p->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&p->syncp, start));
-
- stats->rx_packets += rxpackets;
- stats->rx_bytes += rxbytes;
- stats->multicast += rxmulticast;
- stats->tx_packets += txpackets;
- stats->tx_bytes += txbytes;
- /* rx_errors & tx_dropped are u32 */
- rx_errors += p->rx_errors;
- tx_dropped += p->tx_dropped;
- }
- stats->rx_errors = rx_errors;
- stats->tx_dropped = tx_dropped;
+ for_each_possible_cpu(i) {
+ u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
+ unsigned int start;
+
+ p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&p->syncp);
+ rxpackets = p->rx_packets;
+ rxbytes = p->rx_bytes;
+ rxmulticast = p->rx_multicast;
+ txpackets = p->tx_packets;
+ txbytes = p->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&p->syncp, start));
+
+ stats->rx_packets += rxpackets;
+ stats->rx_bytes += rxbytes;
+ stats->multicast += rxmulticast;
+ stats->tx_packets += txpackets;
+ stats->tx_bytes += txbytes;
+ /* rx_errors & tx_dropped are u32 */
+ rx_errors += p->rx_errors;
+ tx_dropped += p->tx_dropped;
}
+ stats->rx_errors = rx_errors;
+ stats->tx_dropped = tx_dropped;
+
return stats;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 640ba0e5831..1d72786ef86 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -557,6 +557,23 @@ err_out:
return ret;
}
+static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
+ struct ethtool_rxnfc *rx_rings,
+ u32 size)
+{
+ int ret = 0, i;
+
+ if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
+ ret = -EFAULT;
+
+ /* Validate ring indices */
+ for (i = 0; i < size; i++) {
+ if (indir[i] >= rx_rings->data)
+ ret = -EINVAL;
+ }
+ return ret;
+}
+
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
@@ -613,6 +630,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
u32 *indir;
const struct ethtool_ops *ops = dev->ethtool_ops;
int ret;
+ u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
!ops->get_rxnfc)
@@ -643,28 +661,196 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
for (i = 0; i < dev_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
- if (copy_from_user(indir,
- useraddr +
- offsetof(struct ethtool_rxfh_indir,
- ring_index[0]),
- dev_size * sizeof(indir[0]))) {
+ ret = ethtool_copy_validate_indir(indir,
+ useraddr + ringidx_offset,
+ &rx_rings,
+ dev_size);
+ if (ret)
+ goto out;
+ }
+
+ ret = ops->set_rxfh_indir(dev, indir);
+
+out:
+ kfree(indir);
+ return ret;
+}
+
+static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ u32 user_indir_size = 0, user_key_size = 0;
+ u32 dev_indir_size = 0, dev_key_size = 0;
+ u32 total_size;
+ u32 indir_offset, indir_bytes;
+ u32 key_offset;
+ u32 *indir = NULL;
+ u8 *hkey = NULL;
+ u8 *rss_config;
+
+ if (!(dev->ethtool_ops->get_rxfh_indir_size ||
+ dev->ethtool_ops->get_rxfh_key_size) ||
+ !dev->ethtool_ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ if (ops->get_rxfh_indir_size)
+ dev_indir_size = ops->get_rxfh_indir_size(dev);
+
+ indir_offset = offsetof(struct ethtool_rxfh, indir_size);
+
+ if (copy_from_user(&user_indir_size,
+ useraddr + indir_offset,
+ sizeof(user_indir_size)))
+ return -EFAULT;
+
+ if (copy_to_user(useraddr + indir_offset,
+ &dev_indir_size, sizeof(dev_indir_size)))
+ return -EFAULT;
+
+ if (ops->get_rxfh_key_size)
+ dev_key_size = ops->get_rxfh_key_size(dev);
+
+ if ((dev_key_size + dev_indir_size) == 0)
+ return -EOPNOTSUPP;
+
+ key_offset = offsetof(struct ethtool_rxfh, key_size);
+
+ if (copy_from_user(&user_key_size,
+ useraddr + key_offset,
+ sizeof(user_key_size)))
+ return -EFAULT;
+
+ if (copy_to_user(useraddr + key_offset,
+ &dev_key_size, sizeof(dev_key_size)))
+ return -EFAULT;
+
+ /* If the user buffer size is 0, this is just a query for the
+ * device table size and key size. Otherwise, if the User size is
+ * not equal to device table size or key size it's an error.
+ */
+ if (!user_indir_size && !user_key_size)
+ return 0;
+
+ if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
+ (user_key_size && (user_key_size != dev_key_size)))
+ return -EINVAL;
+
+ indir_bytes = user_indir_size * sizeof(indir[0]);
+ total_size = indir_bytes + user_key_size;
+ rss_config = kzalloc(total_size, GFP_USER);
+ if (!rss_config)
+ return -ENOMEM;
+
+ if (user_indir_size)
+ indir = (u32 *)rss_config;
+
+ if (user_key_size)
+ hkey = rss_config + indir_bytes;
+
+ ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
+ if (!ret) {
+ if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, rss_config[0]),
+ rss_config, total_size))
ret = -EFAULT;
+ }
+
+ kfree(rss_config);
+
+ return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
+ void __user *useraddr)
+{
+ int ret;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxnfc rx_rings;
+ u32 user_indir_size = 0, dev_indir_size = 0, i;
+ u32 user_key_size = 0, dev_key_size = 0;
+ u32 *indir = NULL, indir_bytes = 0;
+ u8 *hkey = NULL;
+ u8 *rss_config;
+ u32 indir_offset, key_offset;
+ u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+
+ if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
+ !ops->get_rxnfc || !ops->set_rxfh)
+ return -EOPNOTSUPP;
+
+ if (ops->get_rxfh_indir_size)
+ dev_indir_size = ops->get_rxfh_indir_size(dev);
+
+ indir_offset = offsetof(struct ethtool_rxfh, indir_size);
+ if (copy_from_user(&user_indir_size,
+ useraddr + indir_offset,
+ sizeof(user_indir_size)))
+ return -EFAULT;
+
+ if (ops->get_rxfh_key_size)
+ dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+
+ if ((dev_key_size + dev_indir_size) == 0)
+ return -EOPNOTSUPP;
+
+ key_offset = offsetof(struct ethtool_rxfh, key_size);
+ if (copy_from_user(&user_key_size,
+ useraddr + key_offset,
+ sizeof(user_key_size)))
+ return -EFAULT;
+
+ /* If either indir or hash key is valid, proceed further.
+ */
+ if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) &&
+ user_indir_size != dev_indir_size)) ||
+ (user_key_size && (user_key_size != dev_key_size)))
+ return -EINVAL;
+
+ if (user_indir_size != 0xDEADBEEF)
+ indir_bytes = dev_indir_size * sizeof(indir[0]);
+
+ rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER);
+ if (!rss_config)
+ return -ENOMEM;
+
+ rx_rings.cmd = ETHTOOL_GRXRINGS;
+ ret = ops->get_rxnfc(dev, &rx_rings, NULL);
+ if (ret)
+ goto out;
+
+ /* user_indir_size == 0 means reset the indir table to default.
+ * user_indir_size == 0xDEADBEEF means indir setting is not requested.
+ */
+ if (user_indir_size && user_indir_size != 0xDEADBEEF) {
+ indir = (u32 *)rss_config;
+ ret = ethtool_copy_validate_indir(indir,
+ useraddr + rss_cfg_offset,
+ &rx_rings,
+ user_indir_size);
+ if (ret)
goto out;
- }
+ } else if (user_indir_size == 0) {
+ indir = (u32 *)rss_config;
+ for (i = 0; i < dev_indir_size; i++)
+ indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+ }
- /* Validate ring indices */
- for (i = 0; i < dev_size; i++) {
- if (indir[i] >= rx_rings.data) {
- ret = -EINVAL;
- goto out;
- }
+ if (user_key_size) {
+ hkey = rss_config + indir_bytes;
+ if (copy_from_user(hkey,
+ useraddr + rss_cfg_offset + indir_bytes,
+ user_key_size)) {
+ ret = -EFAULT;
+ goto out;
}
}
- ret = ops->set_rxfh_indir(dev, indir);
+ ret = ops->set_rxfh(dev, indir, hkey);
out:
- kfree(indir);
+ kfree(rss_config);
return ret;
}
@@ -1491,6 +1677,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_GRXCLSRLALL:
case ETHTOOL_GRXFHINDIR:
+ case ETHTOOL_GRSSH:
case ETHTOOL_GFEATURES:
case ETHTOOL_GCHANNELS:
case ETHTOOL_GET_TS_INFO:
@@ -1628,6 +1815,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXFHINDIR:
rc = ethtool_set_rxfh_indir(dev, useraddr);
break;
+ case ETHTOOL_GRSSH:
+ rc = ethtool_get_rxfh(dev, useraddr);
+ break;
+ case ETHTOOL_SRSSH:
+ rc = ethtool_set_rxfh(dev, useraddr);
+ break;
case ETHTOOL_GFEATURES:
rc = ethtool_get_features(dev, useraddr);
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index cd58614660c..78a636e60a0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -643,6 +643,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
return raw_smp_processor_id();
}
+/* note that this only generates 32-bit random numbers */
+static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+{
+ return (u64)prandom_u32();
+}
+
/* Register mappings for user programs. */
#define A_REG 0
#define X_REG 7
@@ -779,6 +785,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_NLATTR:
case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
case SKF_AD_OFF + SKF_AD_CPU:
+ case SKF_AD_OFF + SKF_AD_RANDOM:
/* arg1 = ctx */
insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
insn->a_reg = ARG1_REG;
@@ -812,6 +819,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_CPU:
insn->imm = __get_raw_cpu_id - __bpf_call_base;
break;
+ case SKF_AD_OFF + SKF_AD_RANDOM:
+ insn->imm = __get_random_u32 - __bpf_call_base;
+ break;
}
break;
@@ -1362,6 +1372,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
ANCILLARY(VLAN_TAG);
ANCILLARY(VLAN_TAG_PRESENT);
ANCILLARY(PAY_OFFSET);
+ ANCILLARY(RANDOM);
}
/* ancillary operation unknown or unsupported */
@@ -1746,6 +1757,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
[BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
+ [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS,
[BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
[BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
[BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 29dde97c3c4..20847de991e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb));
if (clone_it) {
- const struct sk_buff *fclone = skb + 1;
-
skb_mstamp_get(&skb->skb_mstamp);
- if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
- fclone->fclone == SKB_FCLONE_CLONE))
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
-
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
@@ -2061,6 +2054,25 @@ bool tcp_schedule_loss_probe(struct sock *sk)
return true;
}
+/* Thanks to skb fast clones, we can detect if a prior transmit of
+ * a packet is still in a qdisc or driver queue.
+ * In this case, there is very little point doing a retransmit !
+ * Note: This is called from BH context only.
+ */
+static bool skb_still_in_host_queue(const struct sock *sk,
+ const struct sk_buff *skb)
+{
+ const struct sk_buff *fclone = skb + 1;
+
+ if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+ fclone->fclone == SKB_FCLONE_CLONE)) {
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ return true;
+ }
+ return false;
+}
+
/* When probe timeout (PTO) fires, send a new segment if one exists, else
* retransmit the last segment.
*/
@@ -2086,6 +2098,9 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb))
goto rearm_timer;
+ if (skb_still_in_host_queue(sk, skb))
+ goto rearm_timer;
+
pcount = tcp_skb_pcount(skb);
if (WARN_ON(!pcount))
goto rearm_timer;
@@ -2407,6 +2422,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
return -EAGAIN;
+ if (skb_still_in_host_queue(sk, skb))
+ return -EBUSY;
+
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG();
@@ -2500,7 +2518,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
* see tcp_input.c tcp_sacktag_write_queue().
*/
TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
- } else {
+ } else if (err != -EBUSY) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
return err;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6c7fa0853fc..fc203db2211 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2504,8 +2504,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
return PTR_ERR(ifp);
}
-static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx,
- unsigned int plen)
+static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
+ const struct in6_addr *pfx, unsigned int plen)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2528,7 +2528,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifa_flags & IFA_F_MANAGETEMPADDR))
+ manage_tempaddrs(idev, ifp, 0, 0, false,
+ jiffies);
ipv6_del_addr(ifp);
+ addrconf_verify_rtnl();
return 0;
}
}
@@ -2568,7 +2573,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
return -EFAULT;
rtnl_lock();
- err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr,
+ err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen);
rtnl_unlock();
return err;
@@ -3743,6 +3748,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *pfx, *peer_pfx;
+ u32 ifa_flags;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
@@ -3754,7 +3760,13 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
if (pfx == NULL)
return -EINVAL;
- return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+ ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+
+ /* We ignore other flags so far. */
+ ifa_flags &= IFA_F_MANAGETEMPADDR;
+
+ return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen);
}
static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e8138da4c14..6e42dcfad40 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -400,19 +400,17 @@ static void nfnetlink_rcv(struct sk_buff *skb)
}
#ifdef CONFIG_MODULES
-static void nfnetlink_bind(int group)
+static int nfnetlink_bind(int group)
{
const struct nfnetlink_subsystem *ss;
int type = nfnl_group2type[group];
rcu_read_lock();
ss = nfnetlink_get_subsys(type);
- if (!ss) {
- rcu_read_unlock();
- request_module("nfnetlink-subsys-%d", type);
- return;
- }
rcu_read_unlock();
+ if (!ss)
+ request_module("nfnetlink-subsys-%d", type);
+ return 0;
}
#endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 894cda0206b..92f4b6915e8 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
struct module *module = NULL;
struct mutex *cb_mutex;
struct netlink_sock *nlk;
- void (*bind)(int group);
+ int (*bind)(int group);
+ void (*unbind)(int group);
int err = 0;
sock->state = SS_UNCONNECTED;
@@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
err = -EPROTONOSUPPORT;
cb_mutex = nl_table[protocol].cb_mutex;
bind = nl_table[protocol].bind;
+ unbind = nl_table[protocol].unbind;
netlink_unlock_table();
if (err < 0)
@@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
nlk = nlk_sk(sock->sk);
nlk->module = module;
nlk->netlink_bind = bind;
+ nlk->netlink_unbind = unbind;
out:
return err;
@@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock)
kfree_rcu(old, rcu);
nl_table[sk->sk_protocol].module = NULL;
nl_table[sk->sk_protocol].bind = NULL;
+ nl_table[sk->sk_protocol].unbind = NULL;
nl_table[sk->sk_protocol].flags = 0;
nl_table[sk->sk_protocol].registered = 0;
}
@@ -1411,6 +1415,19 @@ static int netlink_realloc_groups(struct sock *sk)
return err;
}
+static void netlink_unbind(int group, long unsigned int groups,
+ struct netlink_sock *nlk)
+{
+ int undo;
+
+ if (!nlk->netlink_unbind)
+ return;
+
+ for (undo = 0; undo < group; undo++)
+ if (test_bit(group, &groups))
+ nlk->netlink_unbind(undo);
+}
+
static int netlink_bind(struct socket *sock, struct sockaddr *addr,
int addr_len)
{
@@ -1419,6 +1436,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err;
+ long unsigned int groups = nladdr->nl_groups;
if (addr_len < sizeof(struct sockaddr_nl))
return -EINVAL;
@@ -1427,7 +1445,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
/* Only superuser is allowed to listen multicasts */
- if (nladdr->nl_groups) {
+ if (groups) {
if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
@@ -1435,37 +1453,45 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
return err;
}
- if (nlk->portid) {
+ if (nlk->portid)
if (nladdr->nl_pid != nlk->portid)
return -EINVAL;
- } else {
+
+ if (nlk->netlink_bind && groups) {
+ int group;
+
+ for (group = 0; group < nlk->ngroups; group++) {
+ if (!test_bit(group, &groups))
+ continue;
+ err = nlk->netlink_bind(group);
+ if (!err)
+ continue;
+ netlink_unbind(group, groups, nlk);
+ return err;
+ }
+ }
+
+ if (!nlk->portid) {
err = nladdr->nl_pid ?
netlink_insert(sk, net, nladdr->nl_pid) :
netlink_autobind(sock);
- if (err)
+ if (err) {
+ netlink_unbind(nlk->ngroups - 1, groups, nlk);
return err;
+ }
}
- if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
+ if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
return 0;
netlink_table_grab();
netlink_update_subscriptions(sk, nlk->subscriptions +
- hweight32(nladdr->nl_groups) -
+ hweight32(groups) -
hweight32(nlk->groups[0]));
- nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
+ nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
netlink_update_listeners(sk);
netlink_table_ungrab();
- if (nlk->netlink_bind && nlk->groups[0]) {
- int i;
-
- for (i = 0; i < nlk->ngroups; i++) {
- if (test_bit(i, nlk->groups))
- nlk->netlink_bind(i);
- }
- }
-
return 0;
}
@@ -2103,13 +2129,17 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return err;
if (!val || val - 1 >= nlk->ngroups)
return -EINVAL;
+ if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
+ err = nlk->netlink_bind(val);
+ if (err)
+ return err;
+ }
netlink_table_grab();
netlink_update_socket_mc(nlk, val,
optname == NETLINK_ADD_MEMBERSHIP);
netlink_table_ungrab();
-
- if (nlk->netlink_bind)
- nlk->netlink_bind(val);
+ if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
+ nlk->netlink_unbind(val);
err = 0;
break;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index ed13a790b00..0b59d441f5b 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -38,7 +38,8 @@ struct netlink_sock {
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
- void (*netlink_bind)(int group);
+ int (*netlink_bind)(int group);
+ void (*netlink_unbind)(int group);
struct module *module;
#ifdef CONFIG_NETLINK_MMAP
struct mutex pg_vec_lock;
@@ -74,7 +75,8 @@ struct netlink_table {
unsigned int groups;
struct mutex *cb_mutex;
struct module *module;
- void (*bind)(int group);
+ int (*bind)(int group);
+ void (*unbind)(int group);
bool (*compare)(struct net *net, struct sock *sock);
int registered;
};
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 95ab5ef9292..119a59b4bec 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -112,6 +112,8 @@ const char tipc_bclink_name[] = "broadcast-link";
static void tipc_nmap_diff(struct tipc_node_map *nm_a,
struct tipc_node_map *nm_b,
struct tipc_node_map *nm_diff);
+static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
+static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
static u32 bcbuf_acks(struct sk_buff *buf)
{
@@ -273,7 +275,7 @@ exit:
/**
* tipc_bclink_update_link_state - update broadcast link state
*
- * tipc_net_lock and node lock set
+ * RCU and node lock set
*/
void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
{
@@ -321,7 +323,7 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
: n_ptr->bclink.last_sent);
spin_lock_bh(&bc_lock);
- tipc_bearer_send(&bcbearer->bearer, buf, NULL);
+ tipc_bearer_send(MAX_BEARERS, buf, NULL);
bcl->stats.sent_nacks++;
spin_unlock_bh(&bc_lock);
kfree_skb(buf);
@@ -335,8 +337,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent)
*
* Delay any upcoming NACK by this node if another node has already
* requested the first message this node is going to ask for.
- *
- * Only tipc_net_lock set.
*/
static void bclink_peek_nack(struct tipc_msg *msg)
{
@@ -408,7 +408,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
/**
* tipc_bclink_rcv - receive a broadcast packet, and deliver upwards
*
- * tipc_net_lock is read_locked, no other locks set
+ * RCU is locked, no other locks set
*/
void tipc_bclink_rcv(struct sk_buff *buf)
{
@@ -627,13 +627,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
if (bp_index == 0) {
/* Use original buffer for first bearer */
- tipc_bearer_send(b, buf, &b->bcast_addr);
+ tipc_bearer_send(b->identity, buf, &b->bcast_addr);
} else {
/* Avoid concurrent buffer access */
tbuf = pskb_copy(buf, GFP_ATOMIC);
if (!tbuf)
break;
- tipc_bearer_send(b, tbuf, &b->bcast_addr);
+ tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
kfree_skb(tbuf); /* Bearer keeps a clone */
}
@@ -655,20 +655,27 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
/**
* tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
*/
-void tipc_bcbearer_sort(void)
+void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action)
{
struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
struct tipc_bcbearer_pair *bp_curr;
+ struct tipc_bearer *b;
int b_index;
int pri;
spin_lock_bh(&bc_lock);
+ if (action)
+ tipc_nmap_add(nm_ptr, node);
+ else
+ tipc_nmap_remove(nm_ptr, node);
+
/* Group bearers by priority (can assume max of two per priority) */
memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
+ rcu_read_lock();
for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
- struct tipc_bearer *b = bearer_list[b_index];
+ b = rcu_dereference_rtnl(bearer_list[b_index]);
if (!b || !b->nodes.count)
continue;
@@ -677,6 +684,7 @@ void tipc_bcbearer_sort(void)
else
bp_temp[b->priority].secondary = b;
}
+ rcu_read_unlock();
/* Create array of bearer pairs for broadcasting */
bp_curr = bcbearer->bpairs;
@@ -783,8 +791,8 @@ void tipc_bclink_init(void)
bcl->owner = &bclink->node;
bcl->max_pkt = MAX_PKT_DEFAULT_MCAST;
tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
- bcl->b_ptr = &bcbearer->bearer;
- bearer_list[BCBEARER] = &bcbearer->bearer;
+ bcl->bearer_id = MAX_BEARERS;
+ rcu_assign_pointer(bearer_list[MAX_BEARERS], &bcbearer->bearer);
bcl->state = WORKING_WORKING;
strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
}
@@ -795,16 +803,15 @@ void tipc_bclink_stop(void)
tipc_link_purge_queues(bcl);
spin_unlock_bh(&bc_lock);
- bearer_list[BCBEARER] = NULL;
+ RCU_INIT_POINTER(bearer_list[BCBEARER], NULL);
memset(bclink, 0, sizeof(*bclink));
memset(bcbearer, 0, sizeof(*bcbearer));
}
-
/**
* tipc_nmap_add - add a node to a node map
*/
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
{
int n = tipc_node(node);
int w = n / WSIZE;
@@ -819,7 +826,7 @@ void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
/**
* tipc_nmap_remove - remove a node from a node map
*/
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
{
int n = tipc_node(node);
int w = n / WSIZE;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a80ef54b818..7c1ef1b3d7b 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -69,9 +69,6 @@ struct tipc_node;
extern const char tipc_bclink_name[];
-void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
-void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
-
/**
* tipc_nmap_equal - test for equality of node maps
*/
@@ -98,6 +95,6 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent);
int tipc_bclink_stats(char *stats_buf, const u32 buf_size);
int tipc_bclink_reset_stats(void);
int tipc_bclink_set_queue_limits(u32 limit);
-void tipc_bcbearer_sort(void);
+void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action);
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 3fef7eb776d..f3259d4133b 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -49,7 +49,7 @@ static struct tipc_media * const media_info_array[] = {
NULL
};
-struct tipc_bearer *bearer_list[MAX_BEARERS + 1];
+struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down);
@@ -178,7 +178,7 @@ struct tipc_bearer *tipc_bearer_find(const char *name)
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
- b_ptr = bearer_list[i];
+ b_ptr = rtnl_dereference(bearer_list[i]);
if (b_ptr && (!strcmp(b_ptr->name, name)))
return b_ptr;
}
@@ -198,10 +198,9 @@ struct sk_buff *tipc_bearer_get_names(void)
if (!buf)
return NULL;
- read_lock_bh(&tipc_net_lock);
for (i = 0; media_info_array[i] != NULL; i++) {
for (j = 0; j < MAX_BEARERS; j++) {
- b = bearer_list[j];
+ b = rtnl_dereference(bearer_list[j]);
if (!b)
continue;
if (b->media == media_info_array[i]) {
@@ -211,22 +210,33 @@ struct sk_buff *tipc_bearer_get_names(void)
}
}
}
- read_unlock_bh(&tipc_net_lock);
return buf;
}
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest)
+void tipc_bearer_add_dest(u32 bearer_id, u32 dest)
{
- tipc_nmap_add(&b_ptr->nodes, dest);
- tipc_bcbearer_sort();
- tipc_disc_add_dest(b_ptr->link_req);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+ if (b_ptr) {
+ tipc_bcbearer_sort(&b_ptr->nodes, dest, true);
+ tipc_disc_add_dest(b_ptr->link_req);
+ }
+ rcu_read_unlock();
}
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest)
+void tipc_bearer_remove_dest(u32 bearer_id, u32 dest)
{
- tipc_nmap_remove(&b_ptr->nodes, dest);
- tipc_bcbearer_sort();
- tipc_disc_remove_dest(b_ptr->link_req);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+ if (b_ptr) {
+ tipc_bcbearer_sort(&b_ptr->nodes, dest, false);
+ tipc_disc_remove_dest(b_ptr->link_req);
+ }
+ rcu_read_unlock();
}
/**
@@ -271,13 +281,11 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
return -EINVAL;
}
- write_lock_bh(&tipc_net_lock);
-
m_ptr = tipc_media_find(b_names.media_name);
if (!m_ptr) {
pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
name, b_names.media_name);
- goto exit;
+ return -EINVAL;
}
if (priority == TIPC_MEDIA_LINK_PRI)
@@ -287,7 +295,7 @@ restart:
bearer_id = MAX_BEARERS;
with_this_prio = 1;
for (i = MAX_BEARERS; i-- != 0; ) {
- b_ptr = bearer_list[i];
+ b_ptr = rtnl_dereference(bearer_list[i]);
if (!b_ptr) {
bearer_id = i;
continue;
@@ -295,14 +303,14 @@ restart:
if (!strcmp(name, b_ptr->name)) {
pr_warn("Bearer <%s> rejected, already enabled\n",
name);
- goto exit;
+ return -EINVAL;
}
if ((b_ptr->priority == priority) &&
(++with_this_prio > 2)) {
if (priority-- == 0) {
pr_warn("Bearer <%s> rejected, duplicate priority\n",
name);
- goto exit;
+ return -EINVAL;
}
pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
name, priority + 1, priority);
@@ -312,21 +320,20 @@ restart:
if (bearer_id >= MAX_BEARERS) {
pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
name, MAX_BEARERS);
- goto exit;
+ return -EINVAL;
}
b_ptr = kzalloc(sizeof(*b_ptr), GFP_ATOMIC);
- if (!b_ptr) {
- res = -ENOMEM;
- goto exit;
- }
+ if (!b_ptr)
+ return -ENOMEM;
+
strcpy(b_ptr->name, name);
b_ptr->media = m_ptr;
res = m_ptr->enable_media(b_ptr);
if (res) {
pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
name, -res);
- goto exit;
+ return -EINVAL;
}
b_ptr->identity = bearer_id;
@@ -341,16 +348,14 @@ restart:
bearer_disable(b_ptr, false);
pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
name);
- goto exit;
+ return -EINVAL;
}
- bearer_list[bearer_id] = b_ptr;
+ rcu_assign_pointer(bearer_list[bearer_id], b_ptr);
pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
name,
tipc_addr_string_fill(addr_string, disc_domain), priority);
-exit:
- write_unlock_bh(&tipc_net_lock);
return res;
}
@@ -359,19 +364,16 @@ exit:
*/
static int tipc_reset_bearer(struct tipc_bearer *b_ptr)
{
- read_lock_bh(&tipc_net_lock);
pr_info("Resetting bearer <%s>\n", b_ptr->name);
- tipc_disc_delete(b_ptr->link_req);
tipc_link_reset_list(b_ptr->identity);
- tipc_disc_create(b_ptr, &b_ptr->bcast_addr);
- read_unlock_bh(&tipc_net_lock);
+ tipc_disc_reset(b_ptr);
return 0;
}
/**
* bearer_disable
*
- * Note: This routine assumes caller holds tipc_net_lock.
+ * Note: This routine assumes caller holds RTNL lock.
*/
static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
{
@@ -385,12 +387,12 @@ static void bearer_disable(struct tipc_bearer *b_ptr, bool shutting_down)
tipc_disc_delete(b_ptr->link_req);
for (i = 0; i < MAX_BEARERS; i++) {
- if (b_ptr == bearer_list[i]) {
- bearer_list[i] = NULL;
+ if (b_ptr == rtnl_dereference(bearer_list[i])) {
+ RCU_INIT_POINTER(bearer_list[i], NULL);
break;
}
}
- kfree(b_ptr);
+ kfree_rcu(b_ptr, rcu);
}
int tipc_disable_bearer(const char *name)
@@ -398,7 +400,6 @@ int tipc_disable_bearer(const char *name)
struct tipc_bearer *b_ptr;
int res;
- write_lock_bh(&tipc_net_lock);
b_ptr = tipc_bearer_find(name);
if (b_ptr == NULL) {
pr_warn("Attempt to disable unknown bearer <%s>\n", name);
@@ -407,7 +408,6 @@ int tipc_disable_bearer(const char *name)
bearer_disable(b_ptr, false);
res = 0;
}
- write_unlock_bh(&tipc_net_lock);
return res;
}
@@ -444,7 +444,7 @@ int tipc_enable_l2_media(struct tipc_bearer *b)
return -ENODEV;
/* Associate TIPC bearer with Ethernet bearer */
- b->media_ptr = dev;
+ rcu_assign_pointer(b->media_ptr, dev);
memset(b->bcast_addr.value, 0, sizeof(b->bcast_addr.value));
memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
b->bcast_addr.media_id = b->media->type_id;
@@ -463,8 +463,12 @@ int tipc_enable_l2_media(struct tipc_bearer *b)
*/
void tipc_disable_l2_media(struct tipc_bearer *b)
{
- struct net_device *dev = (struct net_device *)b->media_ptr;
+ struct net_device *dev;
+
+ dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+ RCU_INIT_POINTER(b->media_ptr, NULL);
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
+ synchronize_net();
dev_put(dev);
}
@@ -478,8 +482,12 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
struct tipc_media_addr *dest)
{
struct sk_buff *clone;
+ struct net_device *dev;
int delta;
- struct net_device *dev = (struct net_device *)b->media_ptr;
+
+ dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
+ if (!dev)
+ return 0;
clone = skb_clone(buf, GFP_ATOMIC);
if (!clone)
@@ -507,10 +515,16 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
* The media send routine must not alter the buffer being passed in
* as it may be needed for later retransmission!
*/
-void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
+void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
struct tipc_media_addr *dest)
{
- b->media->send_msg(buf, b, dest);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[bearer_id]);
+ if (likely(b_ptr))
+ b_ptr->media->send_msg(buf, b_ptr, dest);
+ rcu_read_unlock();
}
/**
@@ -535,7 +549,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev,
}
rcu_read_lock();
- b_ptr = rcu_dereference(dev->tipc_ptr);
+ b_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
if (likely(b_ptr)) {
if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
buf->next = NULL;
@@ -568,12 +582,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
- rcu_read_lock();
- b_ptr = rcu_dereference(dev->tipc_ptr);
- if (!b_ptr) {
- rcu_read_unlock();
+ b_ptr = rtnl_dereference(dev->tipc_ptr);
+ if (!b_ptr)
return NOTIFY_DONE;
- }
b_ptr->mtu = dev->mtu;
@@ -592,11 +603,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
break;
case NETDEV_UNREGISTER:
case NETDEV_CHANGENAME:
- tipc_disable_bearer(b_ptr->name);
+ bearer_disable(b_ptr, false);
break;
}
- rcu_read_unlock();
-
return NOTIFY_OK;
}
@@ -633,7 +642,7 @@ void tipc_bearer_stop(void)
u32 i;
for (i = 0; i < MAX_BEARERS; i++) {
- b_ptr = bearer_list[i];
+ b_ptr = rtnl_dereference(bearer_list[i]);
if (b_ptr) {
bearer_disable(b_ptr, true);
bearer_list[i] = NULL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index ba48145e871..a983b3005e7 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -113,6 +113,7 @@ struct tipc_media {
* @name: bearer name (format = media:interface)
* @media: ptr to media structure associated with bearer
* @bcast_addr: media address used in broadcasting
+ * @rcu: rcu struct for tipc_bearer
* @priority: default link priority for bearer
* @window: default window size for bearer
* @tolerance: default link tolerance for bearer
@@ -127,12 +128,13 @@ struct tipc_media {
* care of initializing all other fields.
*/
struct tipc_bearer {
- void *media_ptr; /* initalized by media */
+ void __rcu *media_ptr; /* initalized by media */
u32 mtu; /* initalized by media */
struct tipc_media_addr addr; /* initalized by media */
char name[TIPC_MAX_BEARER_NAME];
struct tipc_media *media;
struct tipc_media_addr bcast_addr;
+ struct rcu_head rcu;
u32 priority;
u32 window;
u32 tolerance;
@@ -150,7 +152,7 @@ struct tipc_bearer_names {
struct tipc_link;
-extern struct tipc_bearer *bearer_list[];
+extern struct tipc_bearer __rcu *bearer_list[];
/*
* TIPC routines available to supported media types
@@ -181,14 +183,14 @@ int tipc_l2_send_msg(struct sk_buff *buf, struct tipc_bearer *b,
struct tipc_media_addr *dest);
struct sk_buff *tipc_bearer_get_names(void);
-void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest);
-void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest);
+void tipc_bearer_add_dest(u32 bearer_id, u32 dest);
+void tipc_bearer_remove_dest(u32 bearer_id, u32 dest);
struct tipc_bearer *tipc_bearer_find(const char *name);
struct tipc_media *tipc_media_find(const char *name);
int tipc_bearer_setup(void);
void tipc_bearer_cleanup(void);
void tipc_bearer_stop(void);
-void tipc_bearer_send(struct tipc_bearer *b, struct sk_buff *buf,
+void tipc_bearer_send(u32 bearer_id, struct sk_buff *buf,
struct tipc_media_addr *dest);
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 4b981c05382..251f5a2028e 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -42,8 +42,6 @@
#define REPLY_TRUNCATED "<truncated>\n"
-static DEFINE_MUTEX(config_mutex);
-
static const void *req_tlv_area; /* request message TLV area */
static int req_tlv_space; /* request message TLV area size */
static int rep_headroom; /* reply message headroom to use */
@@ -223,7 +221,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
{
struct sk_buff *rep_tlv_buf;
- mutex_lock(&config_mutex);
+ rtnl_lock();
/* Save request and reply details in a well-known location */
req_tlv_area = request_area;
@@ -337,6 +335,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
/* Return reply buffer */
exit:
- mutex_unlock(&config_mutex);
+ rtnl_unlock();
return rep_tlv_buf;
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 8985bbcb942..36cbf158845 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -56,7 +56,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-
+#include <linux/rtnetlink.h>
#define TIPC_MOD_VER "2.0.0"
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 542fe3413dc..ada42e436f5 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -46,8 +46,9 @@
/**
* struct tipc_link_req - information about an ongoing link setup request
- * @bearer: bearer issuing requests
+ * @bearer_id: identity of bearer issuing requests
* @dest: destination address for request messages
+ * @domain: network domain to which links can be established
* @num_nodes: number of nodes currently discovered (i.e. with an active link)
* @lock: spinlock for controlling access to requests
* @buf: request message to be (repeatedly) sent
@@ -55,8 +56,9 @@
* @timer_intv: current interval between requests (in ms)
*/
struct tipc_link_req {
- struct tipc_bearer *bearer;
+ u32 bearer_id;
struct tipc_media_addr dest;
+ u32 domain;
int num_nodes;
spinlock_t lock;
struct sk_buff *buf;
@@ -69,22 +71,19 @@ struct tipc_link_req {
* @type: message type (request or response)
* @b_ptr: ptr to bearer issuing message
*/
-static struct sk_buff *tipc_disc_init_msg(u32 type, struct tipc_bearer *b_ptr)
+static void tipc_disc_init_msg(struct sk_buff *buf, u32 type,
+ struct tipc_bearer *b_ptr)
{
- struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE);
struct tipc_msg *msg;
u32 dest_domain = b_ptr->domain;
- if (buf) {
- msg = buf_msg(buf);
- tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
- msg_set_non_seq(msg, 1);
- msg_set_node_sig(msg, tipc_random);
- msg_set_dest_domain(msg, dest_domain);
- msg_set_bc_netid(msg, tipc_net_id);
- b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg));
- }
- return buf;
+ msg = buf_msg(buf);
+ tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain);
+ msg_set_non_seq(msg, 1);
+ msg_set_node_sig(msg, tipc_random);
+ msg_set_dest_domain(msg, dest_domain);
+ msg_set_bc_netid(msg, tipc_net_id);
+ b_ptr->media->addr2msg(&b_ptr->addr, msg_media_addr(msg));
}
/**
@@ -239,9 +238,10 @@ void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr)
link_fully_up = link_working_working(link);
if ((type == DSC_REQ_MSG) && !link_fully_up) {
- rbuf = tipc_disc_init_msg(DSC_RESP_MSG, b_ptr);
+ rbuf = tipc_buf_acquire(INT_H_SIZE);
if (rbuf) {
- tipc_bearer_send(b_ptr, rbuf, &media_addr);
+ tipc_disc_init_msg(rbuf, DSC_RESP_MSG, b_ptr);
+ tipc_bearer_send(b_ptr->identity, rbuf, &media_addr);
kfree_skb(rbuf);
}
}
@@ -303,7 +303,7 @@ static void disc_timeout(struct tipc_link_req *req)
spin_lock_bh(&req->lock);
/* Stop searching if only desired node has been found */
- if (tipc_node(req->bearer->domain) && req->num_nodes) {
+ if (tipc_node(req->domain) && req->num_nodes) {
req->timer_intv = TIPC_LINK_REQ_INACTIVE;
goto exit;
}
@@ -315,7 +315,7 @@ static void disc_timeout(struct tipc_link_req *req)
* hold at fast polling rate if don't have any associated nodes,
* otherwise hold at slow polling rate
*/
- tipc_bearer_send(req->bearer, req->buf, &req->dest);
+ tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
req->timer_intv *= 2;
@@ -347,21 +347,21 @@ int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest)
if (!req)
return -ENOMEM;
- req->buf = tipc_disc_init_msg(DSC_REQ_MSG, b_ptr);
- if (!req->buf) {
- kfree(req);
- return -ENOMSG;
- }
+ req->buf = tipc_buf_acquire(INT_H_SIZE);
+ if (!req->buf)
+ return -ENOMEM;
+ tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
memcpy(&req->dest, dest, sizeof(*dest));
- req->bearer = b_ptr;
+ req->bearer_id = b_ptr->identity;
+ req->domain = b_ptr->domain;
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
spin_lock_init(&req->lock);
k_init_timer(&req->timer, (Handler)disc_timeout, (unsigned long)req);
k_start_timer(&req->timer, req->timer_intv);
b_ptr->link_req = req;
- tipc_bearer_send(req->bearer, req->buf, &req->dest);
+ tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
return 0;
}
@@ -376,3 +376,23 @@ void tipc_disc_delete(struct tipc_link_req *req)
kfree_skb(req->buf);
kfree(req);
}
+
+/**
+ * tipc_disc_reset - reset object to send periodic link setup requests
+ * @b_ptr: ptr to bearer issuing requests
+ * @dest_domain: network domain to which links can be established
+ */
+void tipc_disc_reset(struct tipc_bearer *b_ptr)
+{
+ struct tipc_link_req *req = b_ptr->link_req;
+
+ spin_lock_bh(&req->lock);
+ tipc_disc_init_msg(req->buf, DSC_REQ_MSG, b_ptr);
+ req->bearer_id = b_ptr->identity;
+ req->domain = b_ptr->domain;
+ req->num_nodes = 0;
+ req->timer_intv = TIPC_LINK_REQ_INIT;
+ k_start_timer(&req->timer, req->timer_intv);
+ tipc_bearer_send(req->bearer_id, req->buf, &req->dest);
+ spin_unlock_bh(&req->lock);
+}
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index 07f34729459..515b57392f4 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -41,6 +41,7 @@ struct tipc_link_req;
int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest);
void tipc_disc_delete(struct tipc_link_req *req);
+void tipc_disc_reset(struct tipc_bearer *b_ptr);
void tipc_disc_add_dest(struct tipc_link_req *req);
void tipc_disc_remove_dest(struct tipc_link_req *req);
void tipc_disc_rcv(struct sk_buff *buf, struct tipc_bearer *b_ptr);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index c5190ab7529..c723ee90219 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -101,9 +101,18 @@ static unsigned int align(unsigned int i)
static void link_init_max_pkt(struct tipc_link *l_ptr)
{
+ struct tipc_bearer *b_ptr;
u32 max_pkt;
- max_pkt = (l_ptr->b_ptr->mtu & ~3);
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+ if (!b_ptr) {
+ rcu_read_unlock();
+ return;
+ }
+ max_pkt = (b_ptr->mtu & ~3);
+ rcu_read_unlock();
+
if (max_pkt > MAX_MSG_SIZE)
max_pkt = MAX_MSG_SIZE;
@@ -248,7 +257,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
l_ptr->owner = n_ptr;
l_ptr->checkpoint = 1;
l_ptr->peer_session = INVALID_SESSION;
- l_ptr->b_ptr = b_ptr;
+ l_ptr->bearer_id = b_ptr->identity;
link_set_supervision_props(l_ptr, b_ptr->tolerance);
l_ptr->state = RESET_UNKNOWN;
@@ -263,6 +272,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
l_ptr->priority = b_ptr->priority;
tipc_link_set_queue_limits(l_ptr, b_ptr->window);
+ l_ptr->net_plane = b_ptr->net_plane;
link_init_max_pkt(l_ptr);
l_ptr->next_out_no = 1;
@@ -426,7 +436,7 @@ void tipc_link_reset(struct tipc_link *l_ptr)
return;
tipc_node_link_down(l_ptr->owner, l_ptr);
- tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
+ tipc_bearer_remove_dest(l_ptr->bearer_id, l_ptr->addr);
if (was_active_link && tipc_node_active_links(l_ptr->owner)) {
l_ptr->reset_checkpoint = checkpoint;
@@ -477,7 +487,7 @@ static void link_activate(struct tipc_link *l_ptr)
{
l_ptr->next_in_no = l_ptr->stats.recv_info = 1;
tipc_node_link_up(l_ptr->owner, l_ptr);
- tipc_bearer_add_dest(l_ptr->b_ptr, l_ptr->addr);
+ tipc_bearer_add_dest(l_ptr->bearer_id, l_ptr->addr);
}
/**
@@ -777,7 +787,7 @@ int __tipc_link_xmit(struct tipc_link *l_ptr, struct sk_buff *buf)
if (likely(!link_congested(l_ptr))) {
link_add_to_outqueue(l_ptr, buf, msg);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->unacked_window = 0;
return dsz;
}
@@ -825,7 +835,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector)
struct tipc_node *n_ptr;
int res = -ELINKCONG;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(dest);
if (n_ptr) {
tipc_node_lock(n_ptr);
@@ -838,7 +847,6 @@ int tipc_link_xmit(struct sk_buff *buf, u32 dest, u32 selector)
} else {
kfree_skb(buf);
}
- read_unlock_bh(&tipc_net_lock);
return res;
}
@@ -902,7 +910,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest)
if (list_empty(message_list))
return;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(dest);
if (n_ptr) {
tipc_node_lock(n_ptr);
@@ -917,7 +924,6 @@ void tipc_link_names_xmit(struct list_head *message_list, u32 dest)
}
tipc_node_unlock(n_ptr);
}
- read_unlock_bh(&tipc_net_lock);
/* discard the messages if they couldn't be sent */
list_for_each_safe(buf, temp_buf, ((struct sk_buff *)message_list)) {
@@ -941,7 +947,7 @@ static int tipc_link_xmit_fast(struct tipc_link *l_ptr, struct sk_buff *buf,
if (likely(!link_congested(l_ptr))) {
if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
link_add_to_outqueue(l_ptr, buf, msg);
- tipc_bearer_send(l_ptr->b_ptr, buf,
+ tipc_bearer_send(l_ptr->bearer_id, buf,
&l_ptr->media_addr);
l_ptr->unacked_window = 0;
return res;
@@ -979,7 +985,6 @@ again:
if (unlikely(res < 0))
return res;
- read_lock_bh(&tipc_net_lock);
node = tipc_node_find(destaddr);
if (likely(node)) {
tipc_node_lock(node);
@@ -990,7 +995,6 @@ again:
&sender->max_pkt);
exit:
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return res;
}
@@ -1007,7 +1011,6 @@ exit:
*/
sender->max_pkt = l_ptr->max_pkt;
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
@@ -1018,7 +1021,6 @@ exit:
}
tipc_node_unlock(node);
}
- read_unlock_bh(&tipc_net_lock);
/* Couldn't find a link to the destination node */
kfree_skb(buf);
@@ -1204,7 +1206,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (r_q_size && buf) {
msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->retransm_queue_head = mod(++r_q_head);
l_ptr->retransm_queue_size = --r_q_size;
l_ptr->stats.retransmitted++;
@@ -1216,7 +1218,7 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (buf) {
msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->unacked_window = 0;
kfree_skb(buf);
l_ptr->proto_msg_queue = NULL;
@@ -1233,7 +1235,8 @@ static u32 tipc_link_push_packet(struct tipc_link *l_ptr)
if (mod(next - first) < l_ptr->queue_limit[0]) {
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf,
+ &l_ptr->media_addr);
if (msg_user(msg) == MSG_BUNDLER)
msg_set_type(msg, CLOSED_MSG);
l_ptr->next_out = buf->next;
@@ -1262,12 +1265,9 @@ static void link_reset_all(unsigned long addr)
char addr_string[16];
u32 i;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find((u32)addr);
- if (!n_ptr) {
- read_unlock_bh(&tipc_net_lock);
+ if (!n_ptr)
return; /* node no longer exists */
- }
tipc_node_lock(n_ptr);
@@ -1282,7 +1282,6 @@ static void link_reset_all(unsigned long addr)
}
tipc_node_unlock(n_ptr);
- read_unlock_bh(&tipc_net_lock);
}
static void link_retransmit_failure(struct tipc_link *l_ptr,
@@ -1352,7 +1351,7 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
msg = buf_msg(buf);
msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
buf = buf->next;
retransmits--;
l_ptr->stats.retransmitted++;
@@ -1440,14 +1439,13 @@ static int link_recv_buf_validate(struct sk_buff *buf)
/**
* tipc_rcv - process TIPC packets/messages arriving from off-node
* @head: pointer to message buffer chain
- * @tb_ptr: pointer to bearer message arrived on
+ * @b_ptr: pointer to bearer message arrived on
*
* Invoked with no locks held. Bearer pointer must point to a valid bearer
* structure (i.e. cannot be NULL), but bearer can be inactive.
*/
void tipc_rcv(struct sk_buff *head, struct tipc_bearer *b_ptr)
{
- read_lock_bh(&tipc_net_lock);
while (head) {
struct tipc_node *n_ptr;
struct tipc_link *l_ptr;
@@ -1635,7 +1633,6 @@ unlock_discard:
discard:
kfree_skb(buf);
}
- read_unlock_bh(&tipc_net_lock);
}
/**
@@ -1752,7 +1749,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
/* Create protocol message with "out-of-sequence" sequence number */
msg_set_type(msg, msg_typ);
- msg_set_net_plane(msg, l_ptr->b_ptr->net_plane);
+ msg_set_net_plane(msg, l_ptr->net_plane);
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
msg_set_last_bcast(msg, tipc_bclink_get_last_sent());
@@ -1818,7 +1815,7 @@ void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
buf->priority = TC_PRIO_CONTROL;
- tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr);
+ tipc_bearer_send(l_ptr->bearer_id, buf, &l_ptr->media_addr);
l_ptr->unacked_window = 0;
kfree_skb(buf);
}
@@ -1843,9 +1840,9 @@ static void tipc_link_proto_rcv(struct tipc_link *l_ptr, struct sk_buff *buf)
/* record unnumbered packet arrival (force mismatch on next timeout) */
l_ptr->checkpoint--;
- if (l_ptr->b_ptr->net_plane != msg_net_plane(msg))
+ if (l_ptr->net_plane != msg_net_plane(msg))
if (tipc_own_addr > msg_prevnode(msg))
- l_ptr->b_ptr->net_plane = msg_net_plane(msg);
+ l_ptr->net_plane = msg_net_plane(msg);
switch (msg_type(msg)) {
@@ -2397,8 +2394,6 @@ void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window)
/* tipc_link_find_owner - locate owner node of link by link's name
* @name: pointer to link name string
* @bearer_id: pointer to index in 'node->links' array where the link was found.
- * Caller must hold 'tipc_net_lock' to ensure node and bearer are not deleted;
- * this also prevents link deletion.
*
* Returns pointer to node owning the link, or 0 if no matching link is found.
*/
@@ -2460,7 +2455,7 @@ static int link_value_is_valid(u16 cmd, u32 new_value)
* @new_value: new value of link, bearer, or media setting
* @cmd: which link, bearer, or media attribute to set (TIPC_CMD_SET_LINK_*)
*
- * Caller must hold 'tipc_net_lock' to ensure link/bearer/media is not deleted.
+ * Caller must hold RTNL lock to ensure link/bearer/media is not deleted.
*
* Returns 0 if value updated and negative value on error.
*/
@@ -2566,9 +2561,7 @@ struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space
" (cannot change setting on broadcast link)");
}
- read_lock_bh(&tipc_net_lock);
res = link_cmd_set_value(args->name, new_value, cmd);
- read_unlock_bh(&tipc_net_lock);
if (res)
return tipc_cfg_reply_error_string("cannot change link setting");
@@ -2602,22 +2595,18 @@ struct sk_buff *tipc_link_cmd_reset_stats(const void *req_tlv_area, int req_tlv_
return tipc_cfg_reply_error_string("link not found");
return tipc_cfg_reply_none();
}
- read_lock_bh(&tipc_net_lock);
node = tipc_link_find_owner(link_name, &bearer_id);
- if (!node) {
- read_unlock_bh(&tipc_net_lock);
+ if (!node)
return tipc_cfg_reply_error_string("link not found");
- }
+
tipc_node_lock(node);
l_ptr = node->links[bearer_id];
if (!l_ptr) {
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return tipc_cfg_reply_error_string("link not found");
}
link_reset_statistics(l_ptr);
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return tipc_cfg_reply_none();
}
@@ -2650,18 +2639,15 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
if (!strcmp(name, tipc_bclink_name))
return tipc_bclink_stats(buf, buf_size);
- read_lock_bh(&tipc_net_lock);
node = tipc_link_find_owner(name, &bearer_id);
- if (!node) {
- read_unlock_bh(&tipc_net_lock);
+ if (!node)
return 0;
- }
+
tipc_node_lock(node);
l = node->links[bearer_id];
if (!l) {
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return 0;
}
@@ -2727,7 +2713,6 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
(s->accu_queue_sz / s->queue_sz_counts) : 0);
tipc_node_unlock(node);
- read_unlock_bh(&tipc_net_lock);
return ret;
}
@@ -2778,7 +2763,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
if (dest == tipc_own_addr)
return MAX_MSG_SIZE;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(dest);
if (n_ptr) {
tipc_node_lock(n_ptr);
@@ -2787,13 +2771,18 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
res = l_ptr->max_pkt;
tipc_node_unlock(n_ptr);
}
- read_unlock_bh(&tipc_net_lock);
return res;
}
static void link_print(struct tipc_link *l_ptr, const char *str)
{
- pr_info("%s Link %x<%s>:", str, l_ptr->addr, l_ptr->b_ptr->name);
+ struct tipc_bearer *b_ptr;
+
+ rcu_read_lock();
+ b_ptr = rcu_dereference_rtnl(bearer_list[l_ptr->bearer_id]);
+ if (b_ptr)
+ pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
+ rcu_read_unlock();
if (link_working_unknown(l_ptr))
pr_cont(":WU\n");
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 8c0b49b5b2e..4b556c181ba 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -107,7 +107,7 @@ struct tipc_stats {
* @checkpoint: reference point for triggering link continuity checking
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
- * @b_ptr: pointer to bearer used by link
+ * @bearer_id: local bearer id used by link
* @tolerance: minimum link continuity loss needed to reset link [in ms]
* @continuity_interval: link continuity testing interval [in ms]
* @abort_limit: # of unacknowledged continuity probes needed to reset link
@@ -116,6 +116,7 @@ struct tipc_stats {
* @proto_msg: template for control messages generated by link
* @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
+ * @net_plane: current link network plane ('A' through 'H')
* @queue_limit: outbound message queue congestion thresholds (indexed by user)
* @exp_msg_count: # of tunnelled messages expected during link changeover
* @reset_checkpoint: seq # of last acknowledged message at time of link reset
@@ -155,7 +156,7 @@ struct tipc_link {
u32 checkpoint;
u32 peer_session;
u32 peer_bearer_id;
- struct tipc_bearer *b_ptr;
+ u32 bearer_id;
u32 tolerance;
u32 continuity_interval;
u32 abort_limit;
@@ -167,6 +168,7 @@ struct tipc_link {
} proto_msg;
struct tipc_msg *pmsg;
u32 priority;
+ char net_plane;
u32 queue_limit[15]; /* queue_limit[0]==window limit */
/* Changeover */
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index aff8041dc15..36a72822601 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -248,7 +248,6 @@ void tipc_named_node_up(unsigned long nodearg)
u32 max_item_buf = 0;
/* compute maximum amount of publication data to send per message */
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(node);
if (n_ptr) {
tipc_node_lock(n_ptr);
@@ -258,7 +257,6 @@ void tipc_named_node_up(unsigned long nodearg)
ITEM_SIZE) * ITEM_SIZE;
tipc_node_unlock(n_ptr);
}
- read_unlock_bh(&tipc_net_lock);
if (!max_item_buf)
return;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 4c564eb69e1..75bb39025d5 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -45,39 +45,34 @@
/*
* The TIPC locking policy is designed to ensure a very fine locking
* granularity, permitting complete parallel access to individual
- * port and node/link instances. The code consists of three major
+ * port and node/link instances. The code consists of four major
* locking domains, each protected with their own disjunct set of locks.
*
- * 1: The routing hierarchy.
- * Comprises the structures 'zone', 'cluster', 'node', 'link'
- * and 'bearer'. The whole hierarchy is protected by a big
- * read/write lock, tipc_net_lock, to enssure that nothing is added
- * or removed while code is accessing any of these structures.
- * This layer must not be called from the two others while they
- * hold any of their own locks.
- * Neither must it itself do any upcalls to the other two before
- * it has released tipc_net_lock and other protective locks.
+ * 1: The bearer level.
+ * RTNL lock is used to serialize the process of configuring bearer
+ * on update side, and RCU lock is applied on read side to make
+ * bearer instance valid on both paths of message transmission and
+ * reception.
*
- * Within the tipc_net_lock domain there are two sub-domains;'node' and
- * 'bearer', where local write operations are permitted,
- * provided that those are protected by individual spin_locks
- * per instance. Code holding tipc_net_lock(read) and a node spin_lock
- * is permitted to poke around in both the node itself and its
- * subordinate links. I.e, it can update link counters and queues,
- * change link state, send protocol messages, and alter the
- * "active_links" array in the node; but it can _not_ remove a link
- * or a node from the overall structure.
- * Correspondingly, individual bearers may change status within a
- * tipc_net_lock(read), protected by an individual spin_lock ber bearer
- * instance, but it needs tipc_net_lock(write) to remove/add any bearers.
+ * 2: The node and link level.
+ * All node instances are saved into two tipc_node_list and node_htable
+ * lists. The two lists are protected by node_list_lock on write side,
+ * and they are guarded with RCU lock on read side. Especially node
+ * instance is destroyed only when TIPC module is removed, and we can
+ * confirm that there has no any user who is accessing the node at the
+ * moment. Therefore, Except for iterating the two lists within RCU
+ * protection, it's no needed to hold RCU that we access node instance
+ * in other places.
*
+ * In addition, all members in node structure including link instances
+ * are protected by node spin lock.
*
- * 2: The transport level of the protocol.
- * This consists of the structures port, (and its user level
- * representations, such as user_port and tipc_sock), reference and
- * tipc_user (port.c, reg.c, socket.c).
+ * 3: The transport level of the protocol.
+ * This consists of the structures port, (and its user level
+ * representations, such as user_port and tipc_sock), reference and
+ * tipc_user (port.c, reg.c, socket.c).
*
- * This layer has four different locks:
+ * This layer has four different locks:
* - The tipc_port spin_lock. This is protecting each port instance
* from parallel data access and removal. Since we can not place
* this lock in the port itself, it has been placed in the
@@ -96,7 +91,7 @@
* There are two such lists; 'port_list', which is used for management,
* and 'wait_list', which is used to queue ports during congestion.
*
- * 3: The name table (name_table.c, name_distr.c, subscription.c)
+ * 4: The name table (name_table.c, name_distr.c, subscription.c)
* - There is one big read/write-lock (tipc_nametbl_lock) protecting the
* overall name table structure. Nothing must be added/removed to
* this structure without holding write access to it.
@@ -108,8 +103,6 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
-DEFINE_RWLOCK(tipc_net_lock);
-
static void net_route_named_msg(struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
@@ -175,15 +168,13 @@ void tipc_net_start(u32 addr)
{
char addr_string[16];
- write_lock_bh(&tipc_net_lock);
tipc_own_addr = addr;
tipc_named_reinit();
tipc_port_reinit();
tipc_bclink_init();
- write_unlock_bh(&tipc_net_lock);
-
tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr,
TIPC_ZONE_SCOPE, 0, tipc_own_addr);
+
pr_info("Started in network mode\n");
pr_info("Own node address %s, network identity %u\n",
tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
@@ -195,11 +186,11 @@ void tipc_net_stop(void)
return;
tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr);
- write_lock_bh(&tipc_net_lock);
+ rtnl_lock();
tipc_bearer_stop();
tipc_bclink_stop();
tipc_node_stop();
- write_unlock_bh(&tipc_net_lock);
+ rtnl_unlock();
pr_info("Left network mode\n");
}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 079daadb3f7..f781cae8df4 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -37,8 +37,6 @@
#ifndef _TIPC_NET_H
#define _TIPC_NET_H
-extern rwlock_t tipc_net_lock;
-
void tipc_net_route_msg(struct sk_buff *buf);
void tipc_net_start(u32 addr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 1d3a4999a70..be90115cda1 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -148,7 +148,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
n_ptr->working_links++;
pr_info("Established link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ l_ptr->name, l_ptr->net_plane);
if (!active[0]) {
active[0] = active[1] = l_ptr;
@@ -208,11 +208,11 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
if (!tipc_link_is_active(l_ptr)) {
pr_info("Lost standby link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ l_ptr->name, l_ptr->net_plane);
return;
}
pr_info("Lost link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ l_ptr->name, l_ptr->net_plane);
active = &n_ptr->active_links[0];
if (active[0] == l_ptr)
@@ -239,7 +239,7 @@ int tipc_node_is_up(struct tipc_node *n_ptr)
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
{
- n_ptr->links[l_ptr->b_ptr->identity] = l_ptr;
+ n_ptr->links[l_ptr->bearer_id] = l_ptr;
spin_lock_bh(&node_list_lock);
tipc_num_links++;
spin_unlock_bh(&node_list_lock);
@@ -273,14 +273,12 @@ static void node_name_purge_complete(unsigned long node_addr)
{
struct tipc_node *n_ptr;
- read_lock_bh(&tipc_net_lock);
n_ptr = tipc_node_find(node_addr);
if (n_ptr) {
tipc_node_lock(n_ptr);
n_ptr->block_setup &= ~WAIT_NAMES_GONE;
tipc_node_unlock(n_ptr);
}
- read_unlock_bh(&tipc_net_lock);
}
static void node_lost_contact(struct tipc_node *n_ptr)
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 14d04e63b1f..be491a74c1e 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -147,7 +147,7 @@ struct security_class_mapping secclass_map[] = {
{ "peer", { "recv", NULL } },
{ "capability2",
{ "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend",
- NULL } },
+ "audit_read", NULL } },
{ "kernel_service", { "use_as_override", "create_files_as", NULL } },
{ "tun_socket",
{ COMMON_SOCK_PERMS, "attach_queue", NULL } },
diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
index bf7be77ddd6..833a96611da 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/net/bpf_exp.l
@@ -92,6 +92,7 @@ extern void yyerror(const char *str);
"#"?("cpu") { return K_CPU; }
"#"?("vlan_tci") { return K_VLANT; }
"#"?("vlan_pr") { return K_VLANP; }
+"#"?("rand") { return K_RAND; }
":" { return ':'; }
"," { return ','; }
diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
index d15efc989ef..e6306c51c26 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/net/bpf_exp.y
@@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type);
%token OP_LDXI
%token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
-%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF
+%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND
%token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
@@ -164,6 +164,9 @@ ldb
| OP_LDB K_POFF {
bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+ | OP_LDB K_RAND {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RANDOM); }
;
ldh
@@ -212,6 +215,9 @@ ldh
| OP_LDH K_POFF {
bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+ | OP_LDH K_RAND {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RANDOM); }
;
ldi
@@ -265,6 +271,9 @@ ld
| OP_LD K_POFF {
bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+ | OP_LD K_RAND {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RANDOM); }
| OP_LD 'M' '[' number ']' {
bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
| OP_LD '[' 'x' '+' number ']' {