diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
commit | c226951b93f7cd7c3a10b17384535b617bd43fd0 (patch) | |
tree | 07b8796a5c99fbbf587b8d0dbcbc173cfe5e381e /net | |
parent | b0df3bd1e553e901ec7297267611a5db88240b38 (diff) | |
parent | e8216dee838c09776680a6f1a2e54d81f3cdfa14 (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'net')
-rw-r--r-- | net/dccp/Kconfig | 16 | ||||
-rw-r--r-- | net/dccp/Makefile | 2 | ||||
-rw-r--r-- | net/dccp/ccids/ccid2.c | 2 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 2 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 3 | ||||
-rw-r--r-- | net/dccp/probe.c | 198 | ||||
-rw-r--r-- | net/dccp/proto.c | 11 | ||||
-rw-r--r-- | net/ipv4/Kconfig | 53 | ||||
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 267 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 2 | ||||
-rw-r--r-- | net/netlabel/Kconfig | 5 | ||||
-rw-r--r-- | net/netlabel/netlabel_cipso_v4.c | 628 | ||||
-rw-r--r-- | net/netlabel/netlabel_cipso_v4.h | 225 | ||||
-rw-r--r-- | net/netlabel/netlabel_domainhash.c | 183 | ||||
-rw-r--r-- | net/netlabel/netlabel_domainhash.h | 6 | ||||
-rw-r--r-- | net/netlabel/netlabel_kapi.c | 23 | ||||
-rw-r--r-- | net/netlabel/netlabel_mgmt.c | 541 | ||||
-rw-r--r-- | net/netlabel/netlabel_mgmt.h | 211 | ||||
-rw-r--r-- | net/netlabel/netlabel_unlabeled.c | 79 | ||||
-rw-r--r-- | net/netlabel/netlabel_unlabeled.h | 41 | ||||
-rw-r--r-- | net/netlabel/netlabel_user.c | 82 | ||||
-rw-r--r-- | net/netlabel/netlabel_user.h | 141 |
23 files changed, 1300 insertions, 1427 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 859e3359fcd..e2a095d0fd8 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -40,6 +40,22 @@ config IP_DCCP_DEBUG Just say N. +config NET_DCCPPROBE + tristate "DCCP connection probing" + depends on PROC_FS && KPROBES + ---help--- + This module allows for capturing the changes to DCCP connection + state in response to incoming packets. It is used for debugging + DCCP congestion avoidance modules. If you don't understand + what was just said, you don't need it: say N. + + Documentation on how to use the packet generator can be found + at http://linux-net.osdl.org/index.php/DccpProbe + + To compile this code as a module, choose M here: the + module will be called dccp_probe. + + endmenu endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 7696e219b05..17ed99c4661 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -11,9 +11,11 @@ dccp_ipv4-y := ipv4.o dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o +obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o dccp-$(CONFIG_SYSCTL) += sysctl.o dccp_diag-y := diag.o +dccp_probe-y := probe.o obj-y += ccids/ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 457dd3db7f4..2efb505aeb3 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -808,7 +808,7 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } static struct ccid_operations ccid2 = { - .ccid_id = 2, + .ccid_id = DCCPC_CCID2, .ccid_name = "ccid2", .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 195aa956622..67d2dc0e7c6 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -1240,7 +1240,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, } static struct ccid_operations ccid3 = { - .ccid_id = 3, + .ccid_id = DCCPC_CCID3, .ccid_name = "ccid3", .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9a1a76a7dc4..66be29b6f50 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -56,9 +56,6 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) dp->dccps_role = DCCP_ROLE_CLIENT; - if (dccp_service_not_initialized(sk)) - return -EPROTO; - if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; diff --git a/net/dccp/probe.c b/net/dccp/probe.c new file mode 100644 index 00000000000..146496fce2e --- /dev/null +++ b/net/dccp/probe.c @@ -0,0 +1,198 @@ +/* + * dccp_probe - Observe the DCCP flow with kprobes. + * + * The idea for this came from Werner Almesberger's umlsim + * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org> + * + * Modified for DCCP from Stephen Hemminger's code + * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/socket.h> +#include <linux/dccp.h> +#include <linux/proc_fs.h> +#include <linux/module.h> +#include <linux/kfifo.h> +#include <linux/vmalloc.h> + +#include "dccp.h" +#include "ccid.h" +#include "ccids/ccid3.h" + +static int port; + +static int bufsize = 64 * 1024; + +static const char procname[] = "dccpprobe"; + +struct { + struct kfifo *fifo; + spinlock_t lock; + wait_queue_head_t wait; + struct timeval tstart; +} dccpw; + +static void printl(const char *fmt, ...) +{ + va_list args; + int len; + struct timeval now; + char tbuf[256]; + + va_start(args, fmt); + do_gettimeofday(&now); + + now.tv_sec -= dccpw.tstart.tv_sec; + now.tv_usec -= dccpw.tstart.tv_usec; + if (now.tv_usec < 0) { + --now.tv_sec; + now.tv_usec += 1000000; + } + + len = sprintf(tbuf, "%lu.%06lu ", + (unsigned long) now.tv_sec, + (unsigned long) now.tv_usec); + len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args); + va_end(args); + + kfifo_put(dccpw.fifo, tbuf, len); + wake_up(&dccpw.wait); +} + +static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size) +{ + const struct dccp_minisock *dmsk = dccp_msk(sk); + const struct inet_sock *inet = inet_sk(sk); + const struct ccid3_hc_tx_sock *hctx; + + if (dmsk->dccpms_tx_ccid == DCCPC_CCID3) + hctx = ccid3_hc_tx_sk(sk); + else + hctx = NULL; + + if (port == 0 || ntohs(inet->dport) == port || + ntohs(inet->sport) == port) { + if (hctx) + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size, + hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi); + else + printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n", + NIPQUAD(inet->saddr), ntohs(inet->sport), + NIPQUAD(inet->daddr), ntohs(inet->dport), size); + } + + jprobe_return(); + return 0; +} + +static struct jprobe dccp_send_probe = { + .kp = { .addr = (kprobe_opcode_t *)&dccp_sendmsg, }, + .entry = (kprobe_opcode_t *)&jdccp_sendmsg, +}; + +static int dccpprobe_open(struct inode *inode, struct file *file) +{ + kfifo_reset(dccpw.fifo); + do_gettimeofday(&dccpw.tstart); + return 0; +} + +static ssize_t dccpprobe_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + int error = 0, cnt = 0; + unsigned char *tbuf; + + if (!buf || len < 0) + return -EINVAL; + + if (len == 0) + return 0; + + tbuf = vmalloc(len); + if (!tbuf) + return -ENOMEM; + + error = wait_event_interruptible(dccpw.wait, + __kfifo_len(dccpw.fifo) != 0); + if (error) + goto out_free; + + cnt = kfifo_get(dccpw.fifo, tbuf, len); + error = copy_to_user(buf, tbuf, cnt); + +out_free: + vfree(tbuf); + + return error ? error : cnt; +} + +static struct file_operations dccpprobe_fops = { + .owner = THIS_MODULE, + .open = dccpprobe_open, + .read = dccpprobe_read, +}; + +static __init int dccpprobe_init(void) +{ + int ret = -ENOMEM; + + init_waitqueue_head(&dccpw.wait); + spin_lock_init(&dccpw.lock); + dccpw.fifo = kfifo_alloc(bufsize, GFP_KERNEL, &dccpw.lock); + + if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) + goto err0; + + ret = register_jprobe(&dccp_send_probe); + if (ret) + goto err1; + + pr_info("DCCP watch registered (port=%d)\n", port); + return 0; +err1: + proc_net_remove(procname); +err0: + kfifo_free(dccpw.fifo); + return ret; +} +module_init(dccpprobe_init); + +static __exit void dccpprobe_exit(void) +{ + kfifo_free(dccpw.fifo); + proc_net_remove(procname); + unregister_jprobe(&dccp_send_probe); + +} +module_exit(dccpprobe_exit); + +MODULE_PARM_DESC(port, "Port to match (0=all)"); +module_param(port, int, 0); + +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +module_param(bufsize, int, 0); + +MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>"); +MODULE_DESCRIPTION("DCCP snooper"); +MODULE_LICENSE("GPL"); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 962df0ea31a..72cbdcfc2c6 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -217,7 +217,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; + dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; return 0; @@ -267,12 +267,6 @@ static inline int dccp_listen_start(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; - /* - * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) - * before calling listen() - */ - if (dccp_service_not_initialized(sk)) - return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -540,9 +534,6 @@ static int dccp_getsockopt_service(struct sock *sk, int len, int err = -ENOENT, slen = 0, total_len = sizeof(u32); lock_sock(sk); - if (dccp_service_not_initialized(sk)) - goto out; - if ((sl = dp->dccps_service_list) != NULL) { slen = sl->dccpsl_nr * sizeof(u32); total_len += slen; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 1650b64415a..30af4a4dfcc 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -448,24 +448,22 @@ config INET_TCP_DIAG depends on INET_DIAG def_tristate INET_DIAG -config TCP_CONG_ADVANCED +menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- Support for selection of various TCP congestion control modules. Nearly all users can safely say no here, and a safe default - selection will be made (BIC-TCP with new Reno as a fallback). + selection will be made (CUBIC with new Reno as a fallback). If unsure, say N. -# TCP Reno is builtin (required as fallback) -menu "TCP congestion control" - depends on TCP_CONG_ADVANCED +if TCP_CONG_ADVANCED config TCP_CONG_BIC tristate "Binary Increase Congestion (BIC) control" - default y + default m ---help--- BIC-TCP is a sender-side only change that ensures a linear RTT fairness under large windows while offering both scalability and @@ -479,7 +477,7 @@ config TCP_CONG_BIC config TCP_CONG_CUBIC tristate "CUBIC TCP" - default m + default y ---help--- This is version 2.0 of BIC-TCP which uses a cubic growth function among other techniques. @@ -574,12 +572,49 @@ config TCP_CONG_VENO loss packets. See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf -endmenu +choice + prompt "Default TCP congestion control" + default DEFAULT_CUBIC + help + Select the TCP congestion control that will be used by default + for all connections. -config TCP_CONG_BIC + config DEFAULT_BIC + bool "Bic" if TCP_CONG_BIC=y + + config DEFAULT_CUBIC + bool "Cubic" if TCP_CONG_CUBIC=y + + config DEFAULT_HTCP + bool "Htcp" if TCP_CONG_HTCP=y + + config DEFAULT_VEGAS + bool "Vegas" if TCP_CONG_VEGAS=y + + config DEFAULT_WESTWOOD + bool "Westwood" if TCP_CONG_WESTWOOD=y + + config DEFAULT_RENO + bool "Reno" + +endchoice + +endif + +config TCP_CONG_CUBIC tristate depends on !TCP_CONG_ADVANCED default y +config DEFAULT_TCP_CONG + string + default "bic" if DEFAULT_BIC + default "cubic" if DEFAULT_CUBIC + default "htcp" if DEFAULT_HTCP + default "vegas" if DEFAULT_VEGAS + default "westwood" if DEFAULT_WESTWOOD + default "reno" if DEFAULT_RENO + default "cubic" + source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 80a2a0911b4..e6ce0b3ba62 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -259,7 +259,7 @@ void cipso_v4_cache_invalidate(void) u32 iter; for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { - spin_lock(&cipso_v4_cache[iter].lock); + spin_lock_bh(&cipso_v4_cache[iter].lock); list_for_each_entry_safe(entry, tmp_entry, &cipso_v4_cache[iter].list, list) { @@ -267,7 +267,7 @@ void cipso_v4_cache_invalidate(void) cipso_v4_cache_entry_free(entry); } cipso_v4_cache[iter].size = 0; - spin_unlock(&cipso_v4_cache[iter].lock); + spin_unlock_bh(&cipso_v4_cache[iter].lock); } return; @@ -309,7 +309,7 @@ static int cipso_v4_cache_check(const unsigned char *key, hash = cipso_v4_map_cache_hash(key, key_len); bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); + spin_lock_bh(&cipso_v4_cache[bkt].lock); list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { if (entry->hash == hash && entry->key_len == key_len && @@ -318,7 +318,7 @@ static int cipso_v4_cache_check(const unsigned char *key, secattr->cache.free = entry->lsm_data.free; secattr->cache.data = entry->lsm_data.data; if (prev_entry == NULL) { - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } @@ -333,12 +333,12 @@ static int cipso_v4_cache_check(const unsigned char *key, &prev_entry->list); } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; } prev_entry = entry; } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return -ENOENT; } @@ -387,7 +387,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, entry->lsm_data.data = secattr->cache.data; bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); + spin_lock_bh(&cipso_v4_cache[bkt].lock); if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache[bkt].size += 1; @@ -398,7 +398,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, list_add(&entry->list, &cipso_v4_cache[bkt].list); cipso_v4_cache_entry_free(old_entry); } - spin_unlock(&cipso_v4_cache[bkt].lock); + spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; @@ -530,197 +530,42 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) } /** - * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff - * @headroom: the amount of headroom to allocate for the sk_buff + * cipso_v4_doi_walk - Iterate through the DOI definitions + * @skip_cnt: skip past this number of DOI definitions, updated + * @callback: callback for each DOI definition + * @cb_arg: argument for the callback function * * Description: - * Dump a list of all the configured DOI values into a sk_buff. The returned - * sk_buff has room at the front of the sk_buff for @headroom bytes. See - * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This - * function may fail if another process is changing the DOI list at the same - * time. Returns a pointer to a sk_buff on success, NULL on error. + * Iterate over the DOI definition list, skipping the first @skip_cnt entries. + * For each entry call @callback, if @callback returns a negative value stop + * 'walking' through the list and return. Updates the value in @skip_cnt upon + * return. Returns zero on success, negative values on failure. * */ -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) { - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; + int ret_val = -ENOENT; u32 doi_cnt = 0; - ssize_t buf_len; + struct cipso_v4_doi *iter_doi; - buf_len = NETLBL_LEN_U32; rcu_read_lock(); - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - doi_cnt += 1; - buf_len += 2 * NETLBL_LEN_U32; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_all_failure; - - if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) - goto doi_dump_all_failure; - buf_len -= NETLBL_LEN_U32; - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - if (buf_len < 2 * NETLBL_LEN_U32) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_all_failure; - buf_len -= 2 * NETLBL_LEN_U32; - } - rcu_read_unlock(); - - return skb; - -doi_dump_all_failure: - rcu_read_unlock(); - kfree(skb); - return NULL; -} - -/** - * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff - * @doi: the DOI value - * @headroom: the amount of headroom to allocate for the sk_buff - * - * Description: - * Lookup the DOI definition matching @doi and dump it's contents into a - * sk_buff. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message - * format. This function may fail if another process is changing the DOI list - * at the same time. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; - u32 tag_cnt = 0; - u32 lvl_cnt = 0; - u32 cat_cnt = 0; - ssize_t buf_len; - ssize_t tmp; - - rcu_read_lock(); - iter = cipso_v4_doi_getdef(doi); - if (iter == NULL) - goto doi_dump_failure; - buf_len = NETLBL_LEN_U32; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - buf_len += NETLBL_LEN_U32; - while(tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - break; - case CIPSO_V4_MAP_STD: - buf_len += 3 * NETLBL_LEN_U32; - while (tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - lvl_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - cat_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; + list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) + if (iter_doi->valid) { + if (doi_cnt++ < *skip_cnt) + continue; + ret_val = callback(iter_doi, cb_arg); + if (ret_val < 0) { + doi_cnt--; + goto doi_walk_return; } - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_failure; - - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - if (iter != cipso_v4_doi_getdef(doi)) - goto doi_dump_failure; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; } - break; - case CIPSO_V4_MAP_STD: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) - goto doi_dump_failure; - buf_len -= 3 * NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u8(skb, - NLA_U8, - iter->map.std->lvl.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u16(skb, - NLA_U16, - iter->map.std->cat.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; - } - break; - } - rcu_read_unlock(); - - return skb; -doi_dump_failure: +doi_walk_return: rcu_read_unlock(); - kfree(skb); - return NULL; + *skip_cnt = doi_cnt; + return ret_val; } /** @@ -1486,43 +1331,40 @@ socket_setattr_failure: } /** - * cipso_v4_socket_getattr - Get the security attributes from a socket - * @sock: the socket + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock * @secattr: the security attributes * * Description: - * Query @sock to see if there is a CIPSO option attached to the socket and if - * there is return the CIPSO security attributes in @secattr. Returns zero on - * success and negative values on failure. + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. * */ -int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; - struct sock *sk; struct inet_sock *sk_inet; unsigned char *cipso_ptr; u32 doi; struct cipso_v4_doi *doi_def; - sk = sock->sk; - lock_sock(sk); sk_inet = inet_sk(sk); if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) - goto socket_getattr_return; + return -ENOMSG; cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - sizeof(struct iphdr); ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); if (ret_val == 0) - goto socket_getattr_return; + return ret_val; doi = ntohl(*(u32 *)&cipso_ptr[2]); rcu_read_lock(); doi_def = cipso_v4_doi_getdef(doi); if (doi_def == NULL) { rcu_read_unlock(); - goto socket_getattr_return; + return -ENOMSG; } switch (cipso_ptr[6]) { case CIPSO_V4_TAG_RBITMAP: @@ -1533,8 +1375,29 @@ int cipso_v4_socket_getattr(const struct socket *sock, } rcu_read_unlock(); -socket_getattr_return: - release_sock(sk); + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + lock_sock(sock->sk); + ret_val = cipso_v4_sock_getattr(sock->sk, secattr); + release_sock(sock->sk); + return ret_val; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 19b2071ff31..e82a5be894b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -129,6 +129,12 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, return ret; } +static int __init tcp_congestion_default(void) +{ + return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); +} + +late_initcall(tcp_congestion_default); ctl_table ipv4_table[] = { { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 7ff2e4273a7..af0aca1e6be 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -48,7 +48,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) printk(KERN_NOTICE "TCP %s already registered\n", ca->name); ret = -EEXIST; } else { - list_add_rcu(&ca->list, &tcp_cong_list); + list_add_tail_rcu(&ca->list, &tcp_cong_list); printk(KERN_INFO "TCP %s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig index fe23cb7f1e8..9f7121ae13e 100644 --- a/net/netlabel/Kconfig +++ b/net/netlabel/Kconfig @@ -9,6 +9,9 @@ config NETLABEL ---help--- NetLabel provides support for explicit network packet labeling protocols such as CIPSO and RIPSO. For more information see - Documentation/netlabel. + Documentation/netlabel as well as the NetLabel SourceForge project + for configuration tools and additional documentation. + + * http://netlabel.sf.net If you are unsure, say N. diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index a4f40adc447..4125a55f469 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -41,15 +41,37 @@ #include "netlabel_user.h" #include "netlabel_cipso_v4.h" +/* Argument struct for cipso_v4_doi_walk() */ +struct netlbl_cipsov4_doiwalk_arg { + struct netlink_callback *nl_cb; + struct sk_buff *skb; + u32 seq; +}; + /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, - .maxattr = 0, + .maxattr = NLBL_CIPSOV4_A_MAX, }; +/* NetLabel Netlink attribute policy */ +static struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { + [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MTYPE] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_TAG] = { .type = NLA_U8 }, + [NLBL_CIPSOV4_A_TAGLST] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSLVLLOC] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSLVLREM] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSLVL] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSLVLLST] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSCATLOC] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSCATREM] = { .type = NLA_U32 }, + [NLBL_CIPSOV4_A_MLSCAT] = { .type = NLA_NESTED }, + [NLBL_CIPSOV4_A_MLSCATLST] = { .type = NLA_NESTED }, +}; /* * Helper Functions @@ -81,6 +103,41 @@ static void netlbl_cipsov4_doi_free(struct rcu_head *entry) kfree(ptr); } +/** + * netlbl_cipsov4_add_common - Parse the common sections of a ADD message + * @info: the Generic NETLINK info block + * @doi_def: the CIPSO V4 DOI definition + * + * Description: + * Parse the common sections of a ADD message and fill in the related values + * in @doi_def. Returns zero on success, negative values on failure. + * + */ +static int netlbl_cipsov4_add_common(struct genl_info *info, + struct cipso_v4_doi *doi_def) +{ + struct nlattr *nla; + int nla_rem; + u32 iter = 0; + + doi_def->doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + + if (nla_validate_nested(info->attrs[NLBL_CIPSOV4_A_TAGLST], + NLBL_CIPSOV4_A_MAX, + netlbl_cipsov4_genl_policy) != 0) + return -EINVAL; + + nla_for_each_nested(nla, info->attrs[NLBL_CIPSOV4_A_TAGLST], nla_rem) + if (nla->nla_type == NLBL_CIPSOV4_A_TAG) { + i |