[3.16.y-ckt stable] Patch "tcp: make connect() mem charging friendly" has been added to staging queue

Luis Henriques luis.henriques at canonical.com
Tue Mar 24 15:35:50 UTC 2015


This is a note to let you know that I have just added a patch titled

    tcp: make connect() mem charging friendly

to the linux-3.16.y-queue branch of the 3.16.y-ckt extended stable tree 
which can be found at:

 http://kernel.ubuntu.com/git?p=ubuntu/linux.git;a=shortlog;h=refs/heads/linux-3.16.y-queue

This patch is scheduled to be released in version 3.16.7-ckt9.

If you, or anyone else, feels it should not be added to this tree, please 
reply to this email.

For more information about the 3.16.y-ckt tree, see
https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable

Thanks.
-Luis

------

>From bea5f6ef9fcb34e77d2d0426c5a8b576e17edb1f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet at google.com>
Date: Mon, 17 Nov 2014 23:06:20 -0800
Subject: tcp: make connect() mem charging friendly

commit 355a901e6cf1b2b763ec85caa2a9f04fbcc4ab4a upstream.

While working on sk_forward_alloc problems reported by Denys
Fedoryshchenko, we found that tcp connect() (and fastopen) do not call
sk_wmem_schedule() for SYN packet (and/or SYN/DATA packet), so
sk_forward_alloc is negative while connect is in progress.

We can fix this by calling regular sk_stream_alloc_skb() both for the
SYN packet (in tcp_connect()) and the syn_data packet in
tcp_send_syn_data()

Then, tcp_send_syn_data() can avoid copying syn_data as we simply
can manipulate syn_data->cb[] to remove SYN flag (and increment seq)

Instead of open coding memcpy_fromiovecend(), simply use this helper.

This leaves in socket write queue clean fast clone skbs.

This was tested against our fastopen packetdrill tests.

Reported-by: Denys Fedoryshchenko <nuclearcat at nuclearcat.com>
Signed-off-by: Eric Dumazet <edumazet at google.com>
Acked-by: Yuchung Cheng <ycheng at google.com>
Signed-off-by: David S. Miller <davem at davemloft.net>
Signed-off-by: Luis Henriques <luis.henriques at canonical.com>
---
 net/ipv4/tcp_output.c | 68 +++++++++++++++++++++------------------------------
 1 file changed, 28 insertions(+), 40 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index faf54abdb4d7..d5457e40f5be 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2954,9 +2954,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_fastopen_request *fo = tp->fastopen_req;
-	int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
-	struct sk_buff *syn_data = NULL, *data;
+	int syn_loss = 0, space, err = 0;
 	unsigned long last_syn_loss = 0;
+	struct sk_buff *syn_data;

 	tp->rx_opt.mss_clamp = tp->advmss;  /* If MSS is not cached */
 	tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
@@ -2987,48 +2987,40 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 	/* limit to order-0 allocations */
 	space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));

-	syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
-				   sk->sk_allocation);
-	if (syn_data == NULL)
+	syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
+	if (!syn_data)
 		goto fallback;
+	syn_data->ip_summed = CHECKSUM_PARTIAL;
+	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
+	if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
+					 fo->data->msg_iov, 0, space))) {
+		kfree_skb(syn_data);
+		goto fallback;
+	}

-	for (i = 0; i < iovlen && syn_data->len < space; ++i) {
-		struct iovec *iov = &fo->data->msg_iov[i];
-		unsigned char __user *from = iov->iov_base;
-		int len = iov->iov_len;
+	/* No more data pending in inet_wait_for_connect() */
+	if (space == fo->size)
+		fo->data = NULL;
+	fo->copied = space;

-		if (syn_data->len + len > space)
-			len = space - syn_data->len;
-		else if (i + 1 == iovlen)
-			/* No more data pending in inet_wait_for_connect() */
-			fo->data = NULL;
+	tcp_connect_queue_skb(sk, syn_data);

-		if (skb_add_data(syn_data, from, len))
-			goto fallback;
-	}
+	err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);

-	/* Queue a data-only packet after the regular SYN for retransmission */
-	data = pskb_copy(syn_data, sk->sk_allocation);
-	if (data == NULL)
-		goto fallback;
-	TCP_SKB_CB(data)->seq++;
-	TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
-	TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
-	tcp_connect_queue_skb(sk, data);
-	fo->copied = data->len;
-
-	/* syn_data is about to be sent, we need to take current time stamps
-	 * for the packets that are in write queue : SYN packet and DATA
-	 */
-	skb_mstamp_get(&syn->skb_mstamp);
-	data->skb_mstamp = syn->skb_mstamp;
+	syn->skb_mstamp = syn_data->skb_mstamp;

-	if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
+	/* Now full SYN+DATA was cloned and sent (or not),
+	 * remove the SYN from the original skb (syn_data)
+	 * we keep in write queue in case of a retransmit, as we
+	 * also have the SYN packet (with no data) in the same queue.
+	 */
+	TCP_SKB_CB(syn_data)->seq++;
+	TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
+	if (!err) {
 		tp->syn_data = (fo->copied > 0);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
 		goto done;
 	}
-	syn_data = NULL;

 fallback:
 	/* Send a regular SYN with Fast Open cookie request option */
@@ -3037,7 +3029,6 @@ fallback:
 	err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
 	if (err)
 		tp->syn_fastopen = 0;
-	kfree_skb(syn_data);
 done:
 	fo->cookie.len = -1;  /* Exclude Fast Open option for SYN retries */
 	return err;
@@ -3057,13 +3048,10 @@ int tcp_connect(struct sock *sk)
 		return 0;
 	}

-	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
-	if (unlikely(buff == NULL))
+	buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+	if (unlikely(!buff))
 		return -ENOBUFS;

-	/* Reserve space for headers. */
-	skb_reserve(buff, MAX_TCP_HEADER);
-
 	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
 	tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
 	tcp_connect_queue_skb(sk, buff);




More information about the kernel-team mailing list