diff options
Diffstat (limited to 'net/dccp/output.c')
| -rw-r--r-- | net/dccp/output.c | 280 |
1 files changed, 164 insertions, 116 deletions
diff --git a/net/dccp/output.c b/net/dccp/output.c index aadbdb58758..0248e8a3460 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -27,11 +27,13 @@ static inline void dccp_event_ack_sent(struct sock *sk) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } -static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) +/* enqueue @skb on sk_send_head for retransmission, return clone to send now */ +static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) { skb_set_owner_w(skb, sk); WARN_ON(sk->sk_send_head); sk->sk_send_head = skb; + return skb_clone(sk->sk_send_head, gfp_any()); } /* @@ -43,7 +45,7 @@ static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (likely(skb != NULL)) { - const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); @@ -136,14 +138,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); return net_xmit_eval(err); } return -ENOBUFS; } /** - * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits + * dccp_determine_ccmps - Find out about CCID-specific packet-size limits * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), * since the RX CCID is restricted to feedback packets (Acks), which are small * in comparison with the data traffic. A value of 0 means "no current CCMPS". @@ -209,108 +211,159 @@ void dccp_write_space(struct sock *sk) } /** - * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet + * dccp_wait_for_ccid - Await CCID send permission * @sk: socket to wait for - * @skb: current skb to pass on for waiting - * @delay: sleep timeout in milliseconds (> 0) - * This function is called by default when the socket is closed, and - * when a non-zero linger time is set on the socket. For consistency + * @delay: timeout in jiffies + * + * This is used by CCIDs which need to delay the send time in process context. */ -static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) +static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) { - struct dccp_sock *dp = dccp_sk(sk); DEFINE_WAIT(wait); - unsigned long jiffdelay; - int rc; + long remaining; - do { - dccp_pr_debug("delayed send by %d msec\n", delay); - jiffdelay = msecs_to_jiffies(delay); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + sk->sk_write_pending++; + release_sock(sk); - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + remaining = schedule_timeout(delay); - sk->sk_write_pending++; - release_sock(sk); - schedule_timeout(jiffdelay); - lock_sock(sk); - sk->sk_write_pending--; + lock_sock(sk); + sk->sk_write_pending--; + finish_wait(sk_sleep(sk), &wait); - if (sk->sk_err) - goto do_error; - if (signal_pending(current)) - goto do_interrupted; + if (signal_pending(current) || sk->sk_err) + return -1; + return remaining; +} - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - } while ((delay = rc) > 0); -out: - finish_wait(sk_sleep(sk), &wait); - return rc; - -do_error: - rc = -EPIPE; - goto out; -do_interrupted: - rc = -EINTR; - goto out; +/** + * dccp_xmit_packet - Send data packet under control of CCID + * Transmits next-queued payload and informs CCID to account for the packet. + */ +static void dccp_xmit_packet(struct sock *sk) +{ + int err, len; + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb = dccp_qpolicy_pop(sk); + + if (unlikely(skb == NULL)) + return; + len = skb->len; + + if (sk->sk_state == DCCP_PARTOPEN) { + const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; + /* + * See 8.1.5 - Handshake Completion. + * + * For robustness we resend Confirm options until the client has + * entered OPEN. During the initial feature negotiation, the MPS + * is smaller than usual, reduced by the Change/Confirm options. + */ + if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { + DCCP_WARN("Payload too large (%d) for featneg.\n", len); + dccp_send_ack(sk); + dccp_feat_list_purge(&dp->dccps_featneg); + } + + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + inet_csk(sk)->icsk_rto, + DCCP_RTO_MAX); + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; + } else if (dccp_ack_pending(sk)) { + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK; + } else { + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA; + } + + err = dccp_transmit_skb(sk, skb); + if (err) + dccp_pr_debug("transmit_skb() returned err=%d\n", err); + /* + * Register this one as sent even if an error occurred. To the remote + * end a local packet drop is indistinguishable from network loss, i.e. + * any local drop will eventually be reported via receiver feedback. + */ + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len); + + /* + * If the CCID needs to transfer additional header options out-of-band + * (e.g. Ack Vectors or feature-negotiation options), it activates this + * flag to schedule a Sync. The Sync will automatically incorporate all + * currently pending header options, thus clearing the backlog. + */ + if (dp->dccps_sync_scheduled) + dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } -void dccp_write_xmit(struct sock *sk, int block) +/** + * dccp_flush_write_queue - Drain queue at end of connection + * Since dccp_sendmsg queues packets without waiting for them to be sent, it may + * happen that the TX queue is not empty at the end of a connection. We give the + * HC-sender CCID a grace period of up to @time_budget jiffies. If this function + * returns with a non-empty write queue, it will be purged later. + */ +void dccp_flush_write_queue(struct sock *sk, long *time_budget) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; + long delay, rc; - while ((skb = skb_peek(&sk->sk_write_queue))) { - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - - if (err > 0) { - if (!block) { - sk_reset_timer(sk, &dp->dccps_xmit_timer, - msecs_to_jiffies(err)+jiffies); - break; - } else - err = dccp_wait_for_ccid(sk, skb, err); - if (err && err != -EINTR) - DCCP_BUG("err=%d after dccp_wait_for_ccid", err); - } + while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) { + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); - skb_dequeue(&sk->sk_write_queue); - if (err == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const int len = skb->len; - - if (sk->sk_state == DCCP_PARTOPEN) { - const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; - /* - * See 8.1.5 - Handshake Completion. - * - * For robustness we resend Confirm options until the client has - * entered OPEN. During the initial feature negotiation, the MPS - * is smaller than usual, reduced by the Change/Confirm options. - */ - if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { - DCCP_WARN("Payload too large (%d) for featneg.\n", len); - dccp_send_ack(sk); - dccp_feat_list_purge(&dp->dccps_featneg); - } - - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - inet_csk(sk)->icsk_rto, - DCCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; - - err = dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - if (err) - DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", - err); - } else { - dccp_pr_debug("packet discarded due to err=%d\n", err); + switch (ccid_packet_dequeue_eval(rc)) { + case CCID_PACKET_WILL_DEQUEUE_LATER: + /* + * If the CCID determines when to send, the next sending + * time is unknown or the CCID may not even send again + * (e.g. remote host crashes or lost Ack packets). + */ + DCCP_WARN("CCID did not manage to send all packets\n"); + return; + case CCID_PACKET_DELAY: + delay = msecs_to_jiffies(rc); + if (delay > *time_budget) + return; + rc = dccp_wait_for_ccid(sk, delay); + if (rc < 0) + return; + *time_budget -= (delay - rc); + /* check again if we can send now */ + break; + case CCID_PACKET_SEND_AT_ONCE: + dccp_xmit_packet(sk); + break; + case CCID_PACKET_ERR: + skb_dequeue(&sk->sk_write_queue); kfree_skb(skb); + dccp_pr_debug("packet discarded due to err=%ld\n", rc); + } + } +} + +void dccp_write_xmit(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb; + + while ((skb = dccp_qpolicy_top(sk))) { + int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); + + switch (ccid_packet_dequeue_eval(rc)) { + case CCID_PACKET_WILL_DEQUEUE_LATER: + return; + case CCID_PACKET_DELAY: + sk_reset_timer(sk, &dp->dccps_xmit_timer, + jiffies + msecs_to_jiffies(rc)); + return; + case CCID_PACKET_SEND_AT_ONCE: + dccp_xmit_packet(sk); + break; + case CCID_PACKET_ERR: + dccp_qpolicy_drop(sk, skb); + dccp_pr_debug("packet discarded due to err=%d\n", rc); } } } @@ -356,10 +409,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb_dst_set(skb, dst_clone(dst)); dreq = dccp_rsk(req); - if (inet_rsk(req)->acked) /* increase ISS upon retransmission */ - dccp_inc_seqno(&dreq->dreq_iss); + if (inet_rsk(req)->acked) /* increase GSS upon retransmission */ + dccp_inc_seqno(&dreq->dreq_gss); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; + DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_gss; /* Resolve feature dependencies resulting from choice of CCID */ if (dccp_feat_server_ccid_dependencies(dreq)) @@ -371,14 +424,14 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); - dh->dccph_sport = inet_rsk(req)->loc_port; - dh->dccph_dport = inet_rsk(req)->rmt_port; + dh->dccph_sport = htons(inet_rsk(req)->ir_num); + dh->dccph_dport = inet_rsk(req)->ir_rmt_port; dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dreq->dreq_iss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); + dccp_hdr_set_seq(dh, dreq->dreq_gss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_gsr); dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; dccp_csum_outgoing(skb); @@ -474,8 +527,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) /* * Do all connect socket setups that can be done AF independent. */ -static inline void dccp_connect_init(struct sock *sk) +int dccp_connect(struct sock *sk) { + struct sk_buff *skb; struct dccp_sock *dp = dccp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -485,22 +539,12 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ - dp->dccps_gar = dp->dccps_iss; - - icsk->icsk_retransmits = 0; -} - -int dccp_connect(struct sock *sk) -{ - struct sk_buff *skb; - struct inet_connection_sock *icsk = inet_csk(sk); - /* do not connect if feature negotiation setup fails */ if (dccp_feat_finalise_settings(dccp_sk(sk))) return -EPROTO; - dccp_connect_init(sk); + /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */ + dp->dccps_gar = dp->dccps_iss; skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); if (unlikely(skb == NULL)) @@ -511,11 +555,11 @@ int dccp_connect(struct sock *sk) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); + dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); /* Timer for repeating the REQUEST until an answer. */ + icsk->icsk_retransmits = 0; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, DCCP_RTO_MAX); return 0; @@ -602,6 +646,12 @@ void dccp_send_sync(struct sock *sk, const u64 ackno, DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno; + /* + * Clear the flag in case the Sync was scheduled for out-of-band data, + * such as carrying a long Ack Vector. + */ + dccp_sk(sk)->dccps_sync_scheduled = 0; + dccp_transmit_skb(sk, skb); } @@ -630,9 +680,7 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; if (active) { - dccp_write_xmit(sk, 1); - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, prio)); + skb = dccp_skb_entail(sk, skb); /* * Retransmission timer for active-close: RFC 4340, 8.3 requires * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ @@ -645,6 +693,6 @@ void dccp_send_close(struct sock *sk, const int active) */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); - } else - dccp_transmit_skb(sk, skb); + } + dccp_transmit_skb(sk, skb); } |
