aboutsummaryrefslogtreecommitdiff
path: root/net/sctp
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-06 14:23:39 +0200
committerIngo Molnar <mingo@elte.hu>2008-07-06 14:23:39 +0200
commit68083e05d72d94f347293d8cc0067050ba904bfa (patch)
tree842e71365bd90866be7add181661a4039d891564 /net/sctp
parent7baac8b91f9871ba8cb09af84de4ae1d86d07812 (diff)
parentb7279469d66b55119784b8b9529c99c1955fe747 (diff)
Merge commit 'v2.6.26-rc9' into cpus4096
Diffstat (limited to 'net/sctp')
-rw-r--r--net/sctp/associola.c34
-rw-r--r--net/sctp/ipv6.c11
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c120
-rw-r--r--net/sctp/protocol.c26
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/sctp/transport.c50
7 files changed, 169 insertions, 78 deletions
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b4cd2b71953..024c3ebd966 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -474,6 +474,15 @@ static void sctp_association_destroy(struct sctp_association *asoc)
void sctp_assoc_set_primary(struct sctp_association *asoc,
struct sctp_transport *transport)
{
+ int changeover = 0;
+
+ /* it's a changeover only if we already have a primary path
+ * that we are changing
+ */
+ if (asoc->peer.primary_path != NULL &&
+ asoc->peer.primary_path != transport)
+ changeover = 1 ;
+
asoc->peer.primary_path = transport;
/* Set a default msg_name for events. */
@@ -499,12 +508,12 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
* double switch to the same destination address.
*/
if (transport->cacc.changeover_active)
- transport->cacc.cycling_changeover = 1;
+ transport->cacc.cycling_changeover = changeover;
/* 2) The sender MUST set CHANGEOVER_ACTIVE to indicate that
* a changeover has occurred.
*/
- transport->cacc.changeover_active = 1;
+ transport->cacc.changeover_active = changeover;
/* 3) The sender MUST store the next TSN to be sent in
* next_tsn_at_change.
@@ -1203,6 +1212,9 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
struct list_head *head = &asoc->peer.transport_addr_list;
struct list_head *pos;
+ if (asoc->peer.transport_count == 1)
+ return;
+
/* Find the next transport in a round-robin fashion. */
t = asoc->peer.retran_path;
pos = &t->transports;
@@ -1217,6 +1229,15 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
t = list_entry(pos, struct sctp_transport, transports);
+ /* We have exhausted the list, but didn't find any
+ * other active transports. If so, use the next
+ * transport.
+ */
+ if (t == asoc->peer.retran_path) {
+ t = next;
+ break;
+ }
+
/* Try to find an active transport. */
if ((t->state == SCTP_ACTIVE) ||
@@ -1229,15 +1250,6 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
if (!next)
next = t;
}
-
- /* We have exhausted the list, but didn't find any
- * other active transports. If so, use the next
- * transport.
- */
- if (t == asoc->peer.retran_path) {
- t = next;
- break;
- }
}
asoc->peer.retran_path = t;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e45e44c6063..a2f4d4d5159 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
/* Fills in the source address(saddr) based on the destination address(daddr)
* and asoc's bind address list.
*/
-static void sctp_v6_get_saddr(struct sctp_association *asoc,
+static void sctp_v6_get_saddr(struct sctp_sock *sk,
+ struct sctp_association *asoc,
struct dst_entry *dst,
union sctp_addr *daddr,
union sctp_addr *saddr)
@@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
if (!asoc) {
ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL,
&daddr->v6.sin6_addr,
- inet6_sk(asoc->base.sk)->srcprefs,
+ inet6_sk(&sk->inet.sk)->srcprefs,
&saddr->v6.sin6_addr);
SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n",
NIP6(saddr->v6.sin6_addr));
@@ -726,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr));
}
+static void sctp_v6_ecn_capable(struct sock *sk)
+{
+ inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+}
+
/* Initialize a PF_INET6 socket msg_name. */
static void sctp_inet6_msgname(char *msgname, int *addr_len)
{
@@ -996,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = {
.skb_iif = sctp_v6_skb_iif,
.is_ce = sctp_v6_is_ce,
.seq_dump_addr = sctp_v6_seq_dump_addr,
+ .ecn_capable = sctp_v6_ecn_capable,
.net_header_len = sizeof(struct ipv6hdr),
.sockaddr_len = sizeof(struct sockaddr_in6),
#ifdef CONFIG_COMPAT
diff --git a/net/sctp/output.c b/net/sctp/output.c
index cf4f9fb6819..6d45bae93b4 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
* Note: The works for IPv6 layer checks this bit too later
* in transmission. See IP6_ECN_flow_xmit().
*/
- INET_ECN_xmit(nskb->sk);
+ (*tp->af_specific->ecn_capable)(nskb->sk);
/* Set up the IP options. */
/* BUG: not implemented
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 59edfd25a19..ace6770e904 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
+ q->fast_rtx = 0;
q->outstanding_bytes = 0;
q->empty = 1;
q->cork = 0;
@@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
case SCTP_RTXR_FAST_RTX:
SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
+ q->fast_rtx = 1;
break;
case SCTP_RTXR_PMTUD:
SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
@@ -518,9 +520,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
* the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by
* following the procedures outlined in C1 - C5.
*/
- sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
+ if (reason == SCTP_RTXR_T3_RTX)
+ sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
- error = sctp_outq_flush(q, /* rtx_timeout */ 1);
+ /* Flush the queues only on timeout, since fast_rtx is only
+ * triggered during sack processing and the queue
+ * will be flushed at the end.
+ */
+ if (reason != SCTP_RTXR_FAST_RTX)
+ error = sctp_outq_flush(q, /* rtx_timeout */ 1);
if (error)
q->asoc->base.sk->sk_err = -error;
@@ -538,17 +546,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
int rtx_timeout, int *start_timer)
{
struct list_head *lqueue;
- struct list_head *lchunk;
struct sctp_transport *transport = pkt->transport;
sctp_xmit_t status;
struct sctp_chunk *chunk, *chunk1;
struct sctp_association *asoc;
+ int fast_rtx;
int error = 0;
+ int timer = 0;
+ int done = 0;
asoc = q->asoc;
lqueue = &q->retransmit;
+ fast_rtx = q->fast_rtx;
- /* RFC 2960 6.3.3 Handle T3-rtx Expiration
+ /* This loop handles time-out retransmissions, fast retransmissions,
+ * and retransmissions due to opening of whindow.
+ *
+ * RFC 2960 6.3.3 Handle T3-rtx Expiration
*
* E3) Determine how many of the earliest (i.e., lowest TSN)
* outstanding DATA chunks for the address for which the
@@ -563,12 +577,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
* [Just to be painfully clear, if we are retransmitting
* because a timeout just happened, we should send only ONE
* packet of retransmitted data.]
+ *
+ * For fast retransmissions we also send only ONE packet. However,
+ * if we are just flushing the queue due to open window, we'll
+ * try to send as much as possible.
*/
- lchunk = sctp_list_dequeue(lqueue);
-
- while (lchunk) {
- chunk = list_entry(lchunk, struct sctp_chunk,
- transmitted_list);
+ list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) {
/* Make sure that Gap Acked TSNs are not retransmitted. A
* simple approach is just to move such TSNs out of the
@@ -576,58 +590,60 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
* next chunk.
*/
if (chunk->tsn_gap_acked) {
- list_add_tail(lchunk, &transport->transmitted);
- lchunk = sctp_list_dequeue(lqueue);
+ list_del(&chunk->transmitted_list);
+ list_add_tail(&chunk->transmitted_list,
+ &transport->transmitted);
continue;
}
+ /* If we are doing fast retransmit, ignore non-fast_rtransmit
+ * chunks
+ */
+ if (fast_rtx && !chunk->fast_retransmit)
+ continue;
+
/* Attempt to append this chunk to the packet. */
status = sctp_packet_append_chunk(pkt, chunk);
switch (status) {
case SCTP_XMIT_PMTU_FULL:
/* Send this packet. */
- if ((error = sctp_packet_transmit(pkt)) == 0)
- *start_timer = 1;
+ error = sctp_packet_transmit(pkt);
/* If we are retransmitting, we should only
* send a single packet.
*/
- if (rtx_timeout) {
- list_add(lchunk, lqueue);
- lchunk = NULL;
- }
+ if (rtx_timeout || fast_rtx)
+ done = 1;
- /* Bundle lchunk in the next round. */
+ /* Bundle next chunk in the next round. */
break;
case SCTP_XMIT_RWND_FULL:
/* Send this packet. */
- if ((error = sctp_packet_transmit(pkt)) == 0)
- *start_timer = 1;
+ error = sctp_packet_transmit(pkt);
/* Stop sending DATA as there is no more room
* at the receiver.
*/
- list_add(lchunk, lqueue);
- lchunk = NULL;
+ done = 1;
break;
case SCTP_XMIT_NAGLE_DELAY:
/* Send this packet. */
- if ((error = sctp_packet_transmit(pkt)) == 0)
- *start_timer = 1;
+ error = sctp_packet_transmit(pkt);
/* Stop sending DATA because of nagle delay. */
- list_add(lchunk, lqueue);
- lchunk = NULL;
+ done = 1;
break;
default:
/* The append was successful, so add this chunk to
* the transmitted list.
*/
- list_add_tail(lchunk, &transport->transmitted);
+ list_del(&chunk->transmitted_list);
+ list_add_tail(&chunk->transmitted_list,
+ &transport->transmitted);
/* Mark the chunk as ineligible for fast retransmit
* after it is retransmitted.
@@ -635,27 +651,44 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
if (chunk->fast_retransmit > 0)
chunk->fast_retransmit = -1;
- *start_timer = 1;
- q->empty = 0;
+ /* Force start T3-rtx timer when fast retransmitting
+ * the earliest outstanding TSN
+ */
+ if (!timer && fast_rtx &&
+ ntohl(chunk->subh.data_hdr->tsn) ==
+ asoc->ctsn_ack_point + 1)
+ timer = 2;
- /* Retrieve a new chunk to bundle. */
- lchunk = sctp_list_dequeue(lqueue);
+ q->empty = 0;
break;
}
- /* If we are here due to a retransmit timeout or a fast
- * retransmit and if there are any chunks left in the retransmit
- * queue that could not fit in the PMTU sized packet, they need
- * to be marked as ineligible for a subsequent fast retransmit.
- */
- if (rtx_timeout && !lchunk) {
- list_for_each_entry(chunk1, lqueue, transmitted_list) {
- if (chunk1->fast_retransmit > 0)
- chunk1->fast_retransmit = -1;
- }
+ /* Set the timer if there were no errors */
+ if (!error && !timer)
+ timer = 1;
+
+ if (done)
+ break;
+ }
+
+ /* If we are here due to a retransmit timeout or a fast
+ * retransmit and if there are any chunks left in the retransmit
+ * queue that could not fit in the PMTU sized packet, they need
+ * to be marked as ineligible for a subsequent fast retransmit.
+ */
+ if (rtx_timeout || fast_rtx) {
+ list_for_each_entry(chunk1, lqueue, transmitted_list) {
+ if (chunk1->fast_retransmit > 0)
+ chunk1->fast_retransmit = -1;
}
}
+ *start_timer = timer;
+
+ /* Clear fast retransmit hint */
+ if (fast_rtx)
+ q->fast_rtx = 0;
+
return error;
}
@@ -862,7 +895,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
rtx_timeout, &start_timer);
if (start_timer)
- sctp_transport_reset_timers(transport);
+ sctp_transport_reset_timers(transport,
+ start_timer-1);
/* This can happen on COOKIE-ECHO resend. Only
* one chunk can get bundled with a COOKIE-ECHO.
@@ -977,7 +1011,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
list_add_tail(&chunk->transmitted_list,
&transport->transmitted);
- sctp_transport_reset_timers(transport);
+ sctp_transport_reset_timers(transport, start_timer-1);
q->empty = 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 0ec234b762c..9258dfe784a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -108,14 +108,23 @@ static __init int sctp_proc_init(void)
}
if (sctp_snmp_proc_init())
- goto out_nomem;
+ goto out_snmp_proc_init;
if (sctp_eps_proc_init())
- goto out_nomem;
+ goto out_eps_proc_init;
if (sctp_assocs_proc_init())
- goto out_nomem;
+ goto out_assocs_proc_init;
return 0;
+out_assocs_proc_init:
+ sctp_eps_proc_exit();
+out_eps_proc_init:
+ sctp_snmp_proc_exit();
+out_snmp_proc_init:
+ if (proc_net_sctp) {
+ proc_net_sctp = NULL;
+ remove_proc_entry("sctp", init_net.proc_net);
+ }
out_nomem:
return -ENOMEM;
}
@@ -470,11 +479,11 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
/* Walk through the bind address list and look for a bind
* address that matches the source address of the returned dst.
*/
+ sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC))
continue;
- sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
goto out_unlock;
}
@@ -519,7 +528,8 @@ out:
/* For v4, the source address is cached in the route entry(dst). So no need
* to cache it separately and hence this is an empty routine.
*/
-static void sctp_v4_get_saddr(struct sctp_association *asoc,
+static void sctp_v4_get_saddr(struct sctp_sock *sk,
+ struct sctp_association *asoc,
struct dst_entry *dst,
union sctp_addr *daddr,
union sctp_addr *saddr)
@@ -616,6 +626,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
}
+static void sctp_v4_ecn_capable(struct sock *sk)
+{
+ INET_ECN_xmit(sk);
+}
+
/* Event handler for inet address addition/deletion events.
* The sctp_local_addr_list needs to be protocted by a spin lock since
* multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -934,6 +949,7 @@ static struct sctp_af sctp_af_inet = {
.skb_iif = sctp_v4_skb_iif,
.is_ce = sctp_v4_is_ce,
.seq_dump_addr = sctp_v4_seq_dump_addr,
+ .ecn_capable = sctp_v4_ecn_capable,
.net_header_len = sizeof(struct iphdr),
.sockaddr_len = sizeof(struct sockaddr_in),
#ifdef CONFIG_COMPAT
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e7e3baf7009..0dbcde6758e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
if (copy_from_user(&getaddrs, optval, len))
return -EFAULT;
- if (getaddrs.addr_num <= 0) return -EINVAL;
+ if (getaddrs.addr_num <= 0 ||
+ getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr)))
+ return -EINVAL;
/*
* For UDP-style sockets, id specifies the association to query.
* If the id field is set to the value '0' then the locally bound
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f4938f6c5ab..3f34f61221e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
peer->rttvar = 0;
peer->srtt = 0;
peer->rto_pending = 0;
+ peer->fast_recovery = 0;
peer->last_time_heard = jiffies;
peer->last_time_used = jiffies;
@@ -190,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
/* Start T3_rtx timer if it is not already running and update the heartbeat
* timer. This routine is called every time a DATA chunk is sent.
*/
-void sctp_transport_reset_timers(struct sctp_transport *transport)
+void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
{
/* RFC 2960 6.3.2 Retransmission Timer Rules
*
@@ -200,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport)
* address.
*/
- if (!timer_pending(&transport->T3_rtx_timer))
+ if (force || !timer_pending(&transport->T3_rtx_timer))
if (!mod_timer(&transport->T3_rtx_timer,
jiffies + transport->rto))
sctp_transport_hold(transport);
@@ -291,7 +292,7 @@ void sctp_transport_route(struct sctp_transport *transport,
if (saddr)
memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
else
- af->get_saddr(asoc, dst, daddr, &transport->saddr);
+ af->get_saddr(opt, asoc, dst, daddr, &transport->saddr);
transport->dst = dst;
if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
@@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
cwnd = transport->cwnd;
flight_size = transport->flight_size;
+ /* See if we need to exit Fast Recovery first */
+ if (transport->fast_recovery &&
+ TSN_lte(transport->fast_recovery_exit, sack_ctsn))
+ transport->fast_recovery = 0;
+
/* The appropriate cwnd increase algorithm is performed if, and only
- * if the cumulative TSN has advanced and the congestion window is
+ * if the cumulative TSN whould advanced and the congestion window is
* being fully utilized.
*/
- if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
+ if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||
(flight_size < cwnd))
return;
@@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
pmtu = transport->asoc->pathmtu;
if (cwnd <= ssthresh) {
- /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
- * than or equal to ssthresh an SCTP endpoint MUST use the
- * slow start algorithm to increase cwnd only if the current
- * congestion window is being fully utilized and an incoming
- * SACK advances the Cumulative TSN Ack Point. Only when these
- * two conditions are met can the cwnd be increased otherwise
- * the cwnd MUST not be increased. If these conditions are met
- * then cwnd MUST be increased by at most the lesser of
- * 1) the total size of the previously outstanding DATA
- * chunk(s) acknowledged, and 2) the destination's path MTU.
+ /* RFC 4960 7.2.1
+ * o When cwnd is less than or equal to ssthresh, an SCTP
+ * endpoint MUST use the slow-start algorithm to increase
+ * cwnd only if the current congestion window is being fully
+ * utilized, an incoming SACK advances the Cumulative TSN
+ * Ack Point, and the data sender is not in Fast Recovery.
+ * Only when these three conditions are met can the cwnd be
+ * increased; otherwise, the cwnd MUST not be increased.
+ * If these conditions are met, then cwnd MUST be increased
+ * by, at most, the lesser of 1) the total size of the
+ * previously outstanding DATA chunk(s) acknowledged, and
+ * 2) the destination's path MTU. This upper bound protects
+ * against the ACK-Splitting attack outlined in [SAVAGE99].
*/
+ if (transport->fast_recovery)
+ return;
+
if (bytes_acked > pmtu)
cwnd += pmtu;
else
@@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
* cwnd = ssthresh
* partial_bytes_acked = 0
*/
+ if (transport->fast_recovery)
+ return;
+
+ /* Mark Fast recovery */
+ transport->fast_recovery = 1;
+ transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+
transport->ssthresh = max(transport->cwnd/2,
4*transport->asoc->pathmtu);
transport->cwnd = transport->ssthresh;
@@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t)
t->flight_size = 0;
t->error_count = 0;
t->rto_pending = 0;
+ t->fast_recovery = 0;
/* Initialize the state information for SFR-CACC */
t->cacc.changeover_active = 0;