diff options
Diffstat (limited to 'net/dccp')
-rw-r--r-- | net/dccp/Kconfig | 50 | ||||
-rw-r--r-- | net/dccp/Makefile | 10 | ||||
-rw-r--r-- | net/dccp/ccid.c | 139 | ||||
-rw-r--r-- | net/dccp/ccid.h | 180 | ||||
-rw-r--r-- | net/dccp/ccids/Kconfig | 29 | ||||
-rw-r--r-- | net/dccp/ccids/Makefile | 5 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.c | 1221 | ||||
-rw-r--r-- | net/dccp/ccids/ccid3.h | 137 | ||||
-rw-r--r-- | net/dccp/ccids/lib/Makefile | 3 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 144 | ||||
-rw-r--r-- | net/dccp/ccids/lib/loss_interval.h | 61 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 398 | ||||
-rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 199 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 22 | ||||
-rw-r--r-- | net/dccp/ccids/lib/tfrc_equation.c | 644 | ||||
-rw-r--r-- | net/dccp/dccp.h | 493 | ||||
-rw-r--r-- | net/dccp/diag.c | 71 | ||||
-rw-r--r-- | net/dccp/input.c | 600 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 1356 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 264 | ||||
-rw-r--r-- | net/dccp/options.c | 855 | ||||
-rw-r--r-- | net/dccp/output.c | 528 | ||||
-rw-r--r-- | net/dccp/proto.c | 826 | ||||
-rw-r--r-- | net/dccp/timer.c | 255 |
24 files changed, 8490 insertions, 0 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 00000000000..187ac182e24 --- /dev/null +++ b/net/dccp/Kconfig @@ -0,0 +1,50 @@ +menu "DCCP Configuration (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL + +config IP_DCCP + tristate "The DCCP Protocol (EXPERIMENTAL)" + ---help--- + Datagram Congestion Control Protocol + + From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>. + + The Datagram Congestion Control Protocol (DCCP) is a transport + protocol that implements bidirectional, unicast connections of + congestion-controlled, unreliable datagrams. It should be suitable + for use by applications such as streaming media, Internet telephony, + and on-line games + + To compile this protocol support as a module, choose M here: the + module will be called dccp. + + If in doubt, say N. + +config INET_DCCP_DIAG + depends on IP_DCCP && INET_DIAG + def_tristate y if (IP_DCCP = y && INET_DIAG = y) + def_tristate m + +source "net/dccp/ccids/Kconfig" + +menu "DCCP Kernel Hacking" + depends on IP_DCCP && DEBUG_KERNEL=y + +config IP_DCCP_DEBUG + bool "DCCP debug messages" + ---help--- + Only use this if you're hacking DCCP. + + Just say N. + +config IP_DCCP_UNLOAD_HACK + depends on IP_DCCP=m && IP_DCCP_CCID3=m + bool "DCCP control sock unload hack" + ---help--- + Enable this to be able to unload the dccp module when the it + has only one refcount held, the control sock one. Just execute + "rmmod dccp_ccid3 dccp" + + Just say N. +endmenu + +endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 00000000000..fb97bb04245 --- /dev/null +++ b/net/dccp/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_IP_DCCP) += dccp.o + +dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ + timer.o + +obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o + +dccp_diag-y := diag.o + +obj-y += ccids/ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 00000000000..9d8fc0e289e --- /dev/null +++ b/net/dccp/ccid.c @@ -0,0 +1,139 @@ +/* + * net/dccp/ccid.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "ccid.h" + +static struct ccid *ccids[CCID_MAX]; +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) +static atomic_t ccids_lockct = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(ccids_lock); + +/* + * The strategy is: modifications ccids vector are short, do not sleep and + * veeery rare, but read access should be free of any exclusive locks. + */ +static void ccids_write_lock(void) +{ + spin_lock(&ccids_lock); + while (atomic_read(&ccids_lockct) != 0) { + spin_unlock(&ccids_lock); + yield(); + spin_lock(&ccids_lock); + } +} + +static inline void ccids_write_unlock(void) +{ + spin_unlock(&ccids_lock); +} + +static inline void ccids_read_lock(void) +{ + atomic_inc(&ccids_lockct); + spin_unlock_wait(&ccids_lock); +} + +static inline void ccids_read_unlock(void) +{ + atomic_dec(&ccids_lockct); +} + +#else +#define ccids_write_lock() do { } while(0) +#define ccids_write_unlock() do { } while(0) +#define ccids_read_lock() do { } while(0) +#define ccids_read_unlock() do { } while(0) +#endif + +int ccid_register(struct ccid *ccid) +{ + int err; + + if (ccid->ccid_init == NULL) + return -1; + + ccids_write_lock(); + err = -EEXIST; + if (ccids[ccid->ccid_id] == NULL) { + ccids[ccid->ccid_id] = ccid; + err = 0; + } + ccids_write_unlock(); + if (err == 0) + pr_info("CCID: Registered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return err; +} + +EXPORT_SYMBOL_GPL(ccid_register); + +int ccid_unregister(struct ccid *ccid) +{ + ccids_write_lock(); + ccids[ccid->ccid_id] = NULL; + ccids_write_unlock(); + pr_info("CCID: Unregistered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return 0; +} + +EXPORT_SYMBOL_GPL(ccid_unregister); + +struct ccid *ccid_init(unsigned char id, struct sock *sk) +{ + struct ccid *ccid; + +#ifdef CONFIG_KMOD + if (ccids[id] == NULL) + request_module("net-dccp-ccid-%d", id); +#endif + ccids_read_lock(); + + ccid = ccids[id]; + if (ccid == NULL) + goto out; + + if (!try_module_get(ccid->ccid_owner)) + goto out_err; + + if (ccid->ccid_init(sk) != 0) + goto out_module_put; +out: + ccids_read_unlock(); + return ccid; +out_module_put: + module_put(ccid->ccid_owner); +out_err: + ccid = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(ccid_init); + +void ccid_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid == NULL) + return; + + ccids_read_lock(); + + if (ccids[ccid->ccid_id] != NULL) { + if (ccid->ccid_exit != NULL) + ccid->ccid_exit(sk); + module_put(ccid->ccid_owner); + } + + ccids_read_unlock(); +} + +EXPORT_SYMBOL_GPL(ccid_exit); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 00000000000..962f1e9e2f7 --- /dev/null +++ b/net/dccp/ccid.h @@ -0,0 +1,180 @@ +#ifndef _CCID_H +#define _CCID_H +/* + * net/dccp/ccid.h + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <net/sock.h> +#include <linux/dccp.h> +#include <linux/list.h> +#include <linux/module.h> + +#define CCID_MAX 255 + +struct ccid { + unsigned char ccid_id; + const char *ccid_name; + struct module *ccid_owner; + int (*ccid_init)(struct sock *sk); + void (*ccid_exit)(struct sock *sk); + int (*ccid_hc_rx_init)(struct sock *sk); + int (*ccid_hc_tx_init)(struct sock *sk); + void (*ccid_hc_rx_exit)(struct sock *sk); + void (*ccid_hc_tx_exit)(struct sock *sk); + void (*ccid_hc_rx_packet_recv)(struct sock *sk, + struct sk_buff *skb); + int (*ccid_hc_rx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + void (*ccid_hc_rx_insert_options)(struct sock *sk, + struct sk_buff *skb); + void (*ccid_hc_tx_insert_options)(struct sock *sk, + struct sk_buff *skb); + void (*ccid_hc_tx_packet_recv)(struct sock *sk, + struct sk_buff *skb); + int (*ccid_hc_tx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + int (*ccid_hc_tx_send_packet)(struct sock *sk, + struct sk_buff *skb, int len); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, + int len); + void (*ccid_hc_rx_get_info)(struct sock *sk, + struct tcp_info *info); + void (*ccid_hc_tx_get_info)(struct sock *sk, + struct tcp_info *info); +}; + +extern int ccid_register(struct ccid *ccid); +extern int ccid_unregister(struct ccid *ccid); + +extern struct ccid *ccid_init(unsigned char id, struct sock *sk); +extern void ccid_exit(struct ccid *ccid, struct sock *sk); + +static inline void __ccid_get(struct ccid *ccid) +{ + __module_get(ccid->ccid_owner); +} + +static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb, int len) +{ + int rc = 0; + if (ccid->ccid_hc_tx_send_packet != NULL) + rc = ccid->ccid_hc_tx_send_packet(sk, skb, len); + return rc; +} + +static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, + int more, int len) +{ + if (ccid->ccid_hc_tx_packet_sent != NULL) + ccid->ccid_hc_tx_packet_sent(sk, more, len); +} + +static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_rx_init != NULL) + rc = ccid->ccid_hc_rx_init(sk); + return rc; +} + +static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_tx_init != NULL) + rc = ccid->ccid_hc_tx_init(sk); + return rc; +} + +static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_rx_exit != NULL && + dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL) + ccid->ccid_hc_rx_exit(sk); +} + +static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_tx_exit != NULL && + dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL) + ccid->ccid_hc_tx_exit(sk); +} + +static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_packet_recv != NULL) + ccid->ccid_hc_rx_packet_recv(sk, skb); +} + +static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_packet_recv != NULL) + ccid->ccid_hc_tx_packet_recv(sk, skb); +} + +static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_tx_parse_options != NULL) + rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, + value); + return rc; +} + +static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_rx_parse_options != NULL) + rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); + return rc; +} + +static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_insert_options != NULL) + ccid->ccid_hc_tx_insert_options(sk, skb); +} + +static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_insert_options != NULL) + ccid->ccid_hc_rx_insert_options(sk, skb); +} + +static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk, + struct tcp_info *info) +{ + if (ccid->ccid_hc_rx_get_info != NULL) + ccid->ccid_hc_rx_get_info(sk, info); +} + +static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, + struct tcp_info *info) +{ + if (ccid->ccid_hc_tx_get_info != NULL) + ccid->ccid_hc_tx_get_info(sk, info); +} +#endif /* _CCID_H */ diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 00000000000..7684d83946a --- /dev/null +++ b/net/dccp/ccids/Kconfig @@ -0,0 +1,29 @@ +menu "DCCP CCIDs Configuration (EXPERIMENTAL)" + depends on IP_DCCP && EXPERIMENTAL + +config IP_DCCP_CCID3 + tristate "CCID3 (TFRC) (EXPERIMENTAL)" + depends on IP_DCCP + ---help--- + CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based + rate-controlled congestion control mechanism. TFRC is designed to + be reasonably fair when competing for bandwidth with TCP-like flows, + where a flow is "reasonably fair" if its sending rate is generally + within a factor of two of the sending rate of a TCP flow under the + same conditions. However, TFRC has a much lower variation of + throughput over time compared with TCP, which makes CCID 3 more + suitable than CCID 2 for applications such streaming media where a + relatively smooth sending rate is of importance. + + CCID 3 is further described in [CCID 3 PROFILE]. The TFRC + congestion control algorithms were initially described in RFC 3448. + + This text was extracted from draft-ietf-dccp-spec-11.txt. + + If in doubt, say M. + +config IP_DCCP_TFRC_LIB + depends on IP_DCCP_CCID3 + def_tristate IP_DCCP_CCID3 + +endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 00000000000..956f79f5074 --- /dev/null +++ b/net/dccp/ccids/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o + +dccp_ccid3-y := ccid3.o + +obj-y += lib/ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 00000000000..7bf3b3a91e9 --- /dev/null +++ b/net/dccp/ccids/ccid3.c @@ -0,0 +1,1221 @@ +/* + * net/dccp/ccids/ccid3.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/config.h> +#include "../ccid.h" +#include "../dccp.h" +#include "lib/packet_history.h" +#include "lib/loss_interval.h" +#include "lib/tfrc.h" +#include "ccid3.h" + +/* + * Reason for maths with 10 here is to avoid 32 bit overflow when a is big. + */ +static inline u32 usecs_div(const u32 a, const u32 b) +{ + const u32 tmp = a * (USEC_PER_SEC / 10); + return b > 20 ? tmp / (b / 10) : tmp; +} + +static int ccid3_debug; + +#ifdef CCID3_DEBUG +#define ccid3_pr_debug(format, a...) \ + do { if (ccid3_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ + } while (0) +#else +#define ccid3_pr_debug(format, a...) +#endif + +static struct dccp_tx_hist *ccid3_tx_hist; +static struct dccp_rx_hist *ccid3_rx_hist; +static struct dccp_li_hist *ccid3_li_hist; + +static int ccid3_init(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + return 0; +} + +static void ccid3_exit(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); +} + +/* TFRC sender states */ +enum ccid3_hc_tx_states { + TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_FBACK, + TFRC_SSTATE_FBACK, + TFRC_SSTATE_TERM, +}; + +#ifdef CCID3_DEBUG +static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) +{ + static char *ccid3_state_names[] = { + [TFRC_SSTATE_NO_SENT] = "NO_SENT", + [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", + [TFRC_SSTATE_FBACK] = "FBACK", + [TFRC_SSTATE_TERM] = "TERM", + }; + + return ccid3_state_names[state]; +} +#endif + +static inline void ccid3_hc_tx_set_state(struct sock *sk, + enum ccid3_hc_tx_states state) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), + ccid3_tx_state_name(state)); + WARN_ON(state == oldstate); + hctx->ccid3hctx_state = state; +} + +/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ +static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) +{ + /* + * If no feedback spec says t_ipi is 1 second (set elsewhere and then + * doubles after every no feedback timer (separate function) + */ + if (hctx->ccid3hctx_state != TFRC_SSTATE_NO_FBACK) + hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x); +} + +/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ +static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) +{ + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, + TFRC_OPSYS_HALF_TIME_GRAN); +} + +/* + * Update X by + * If (p > 0) + * x_calc = calcX(s, R, p); + * X = max(min(X_calc, 2 * X_recv), s / t_mbi); + * Else + * If (now - tld >= R) + * X = max(min(2 * X, 2 * X_recv), s / R); + * tld = now; + */ +static void ccid3_hc_tx_update_x(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + /* To avoid large error in calcX */ + if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { + hctx->ccid3hctx_x_calc = tfrc_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, + 2 * hctx->ccid3hctx_x_recv), + (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME)); + } else { + struct timeval now; + + do_gettimeofday(&now); + if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= + hctx->ccid3hctx_rtt) { + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, + hctx->ccid3hctx_x) * 2, + usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt)); + hctx->ccid3hctx_t_ld = now; + } + } +} + +static void ccid3_hc_tx_no_feedback_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); + unsigned long next_tmout = 0; + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + /* XXX: set some sensible MIB */ + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + HZ / 5); + goto out; + } + + ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_TERM: + goto out; + case TFRC_SSTATE_NO_FBACK: + /* Halve send rate */ + hctx->ccid3hctx_x /= 2; + if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME)) + hctx->ccid3hctx_x = (hctx->ccid3hctx_s / + TFRC_MAX_BACK_OFF_TIME); + + ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d " + "bytes/s\n", + dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state), + hctx->ccid3hctx_x); + next_tmout = max_t(u32, 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x), + TFRC_INITIAL_TIMEOUT); + /* + * FIXME - not sure above calculation is correct. See section + * 5 of CCID3 11 should adjust tx_t_ipi and double that to + * achieve it really + */ + break; + case TFRC_SSTATE_FBACK: + /* + * Check if IDLE since last timeout and recv rate is less than + * 4 packets per RTT + */ + if (!hctx->ccid3hctx_idle || + (hctx->ccid3hctx_x_recv >= + 4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) { + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", + dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + /* Halve sending rate */ + + /* If (X_calc > 2 * X_recv) + * X_recv = max(X_recv / 2, s / (2 * t_mbi)); + * Else + * X_recv = X_calc / 4; + */ + BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && + hctx->ccid3hctx_x_calc == 0); + + /* check also if p is zero -> x_calc is infinity? */ + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || + hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) + hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, + hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); + else + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; + + /* Update sending rate */ + ccid3_hc_tx_update_x(sk); + } + /* + * Schedule no feedback timer to expire in + * max(4 * R, 2 * s / X) + */ + next_tmout = max_t(u32, hctx->ccid3hctx_t_rto, + 2 * usecs_div(hctx->ccid3hctx_s, + hctx->ccid3hctx_x)); + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + goto out; + } + + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + hctx->ccid3hctx_idle = 1; +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +static int ccid3_hc_tx_send_packet(struct sock *sk, + struct sk_buff *skb, int len) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct dccp_tx_hist_entry *new_packet; + struct timeval now; + long delay; + int rc = -ENOTCONN; + + /* Check if pure ACK or Terminating*/ + + /* + * XXX: We only call this function for DATA and DATAACK, on, these + * packets can have zero length, but why the comment about "pure ACK"? + */ + if (hctx == NULL || len == 0 || + hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + goto out; + + /* See if last packet allocated was not sent */ + new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (new_packet == NULL || new_packet->dccphtx_sent) { + new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, + SLAB_ATOMIC); + + rc = -ENOBUFS; + if (new_packet == NULL) { + ccid3_pr_debug("%s, sk=%p, not enough mem to add " + "to history, send refused\n", + dccp_role(sk), sk); + goto out; + } + + dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); + } + + do_gettimeofday(&now); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", + dccp_role(sk), sk, dp->dccps_gss); + + hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + hctx->ccid3hctx_last_win_count = 0; + hctx->ccid3hctx_t_last_win_count = now; + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); + hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; + + /* Set nominal send time for initial packet */ + hctx->ccid3hctx_t_nom = now; + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); + ccid3_calc_new_delta(hctx); + rc = 0; + break; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + delay = (timeval_delta(&now, &hctx->ccid3hctx_t_nom) - + hctx->ccid3hctx_delta); + ccid3_pr_debug("send_packet delay=%ld\n", delay); + delay /= -1000; + /* divide by -1000 is to convert to ms and get sign right */ + rc = delay > 0 ? delay : 0; + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + rc = -EINVAL; + break; + } + + /* Can we send? if so add options and add to packet history */ + if (rc == 0) + new_packet->dccphtx_ccval = + DCCP_SKB_CB(skb)->dccpd_ccval = + hctx->ccid3hctx_last_win_count; +out: + return rc; +} + +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct timeval now; + + BUG_ON(hctx == NULL); + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", + dccp_role(sk), sk); + return; + } + + do_gettimeofday(&now); + + /* check if we have sent a data packet */ + if (len > 0) { + unsigned long quarter_rtt; + struct dccp_tx_hist_entry *packet; + + packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (packet == NULL) { + printk(KERN_CRIT "%s: packet doesn't exists in " + "history!\n", __FUNCTION__); + return; + } + if (packet->dccphtx_sent) { + printk(KERN_CRIT "%s: no unsent packet in history!\n", + __FUNCTION__); + return; + } + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dp->dccps_gss; + /* + * Check if win_count have changed + * Algorithm in "8.1. Window Counter Valuer" in + * draft-ietf-dccp-ccid3-11.txt + */ + quarter_rtt = timeval_delta(&now, &hctx->ccid3hctx_t_last_win_count); + if (likely(hctx->ccid3hctx_rtt > 8)) + quarter_rtt /= hctx->ccid3hctx_rtt / 4; + + if (quarter_rtt > 0) { + hctx->ccid3hctx_t_last_win_count = now; + hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + + min_t(unsigned long, quarter_rtt, 5)) % 16; + ccid3_pr_debug("%s, sk=%p, window changed from " + "%u to %u!\n", + dccp_role(sk), sk, + packet->dccphtx_ccval, + hctx->ccid3hctx_last_win_count); + } + + hctx->ccid3hctx_idle = 0; + packet->dccphtx_rtt = hctx->ccid3hctx_rtt; + packet->dccphtx_sent = 1; + } else + ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", + dccp_role(sk), sk, dp->dccps_gss); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* if first wasn't pure ack */ + if (len != 0) + printk(KERN_CRIT "%s: %s, First packet sent is noted " + "as a data packet\n", + __FUNCTION__, dccp_role(sk)); + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + if (len > 0) { + hctx->ccid3hctx_t_nom = now; + ccid3_calc_new_t_ipi(hctx); + ccid3_calc_new_delta(hctx); + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + break; + } +} + +static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_options_received *opt_recv; + struct dccp_tx_hist_entry *packet; + unsigned long next_tmout; + u32 t_elapsed; + u32 pinv; + u32 x_recv; + u32 r_sample; + + if (hctx == NULL) + return; + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, received a packet when " + "terminating!\n", dccp_role(sk), sk); + return; + } + + /* we are only interested in ACKs */ + if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || + DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) + return; + + opt_recv = &hctx->ccid3hctx_options_received; + + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + x_recv = opt_recv->ccid3or_receive_rate; + pinv = opt_recv->ccid3or_loss_event_rate; + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* FIXME: what to do here? */ + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + /* Calculate new round trip sample by + * R_sample = (now - t_recvdata) - t_delay */ + /* get t_recvdata from history */ + packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, + DCCP_SKB_CB(skb)->dccpd_ack_seq); + if (packet == NULL) { + ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't " + "exist in history!\n", + dccp_role(sk), sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + return; + } + + /* Update RTT */ + r_sample = timeval_now_delta(&packet->dccphtx_tstamp); + /* FIXME: */ + // r_sample -= usecs_to_jiffies(t_elapsed * 10); + + /* Update RTT estimate by + * If (No feedback recv) + * R = R_sample; + * Else + * R = q * R + (1 - q) * R_sample; + * + * q is a constant, RFC 3448 recomments 0.9 + */ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); + hctx->ccid3hctx_rtt = r_sample; + } else + hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + + r_sample / 10; + + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, " + "r_sample=%us\n", dccp_role(sk), sk, + hctx->ccid3hctx_rtt, r_sample); + + /* Update timeout interval */ + hctx->ccid3hctx_t_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, + USEC_PER_SEC); + + /* Update receive rate */ + hctx->ccid3hctx_x_recv = x_recv;/* X_recv in bytes per sec */ + |