[RFC SRU Trusty] tcp: make challenge acks less predictable
Tim Gardner
tim.gardner at canonical.com
Fri Aug 12 13:14:04 UTC 2016
On 08/12/2016 03:32 AM, Stefan Bader wrote:
> This is a rather quick (and not the most elegant) backport of the
> upstream fix for Trusty (Precise will need the same or even more).
> The Problem is that neither WRITE_ONCE nor prandom_u32_max do exist,
> yet. The prandom_u32_max we might be able to just replace by the
> actual function body. I am not sure I would trust myself enough to
> try that for the WRITE_ONCE part.
>
> -Stefan
>
>
> From 16961a6e38afb30c0afb41d766b3fa487fb017f4 Mon Sep 17 00:00:00 2001
> From: Eric Dumazet <edumazet at google.com>
> Date: Sun, 10 Jul 2016 10:04:02 +0200
> Subject: [PATCH] tcp: make challenge acks less predictable
>
> Yue Cao claims that current host rate limiting of challenge ACKS
> (RFC 5961) could leak enough information to allow a patient attacker
> to hijack TCP sessions. He will soon provide details in an academic
> paper.
>
> This patch increases the default limit from 100 to 1000, and adds
> some randomization so that the attacker can no longer hijack
> sessions without spending a considerable amount of probes.
>
> Based on initial analysis and patch from Linus.
>
> Note that we also have per socket rate limiting, so it is tempting
> to remove the host limit in the future.
>
> v2: randomize the count of challenge acks per second, not the period.
>
> Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
> Reported-by: Yue Cao <ycao009 at ucr.edu>
> Signed-off-by: Eric Dumazet <edumazet at google.com>
> Suggested-by: Linus Torvalds <torvalds at linux-foundation.org>
> Cc: Yuchung Cheng <ycheng at google.com>
> Cc: Neal Cardwell <ncardwell at google.com>
> Acked-by: Neal Cardwell <ncardwell at google.com>
> Acked-by: Yuchung Cheng <ycheng at google.com>
> Signed-off-by: David S. Miller <davem at davemloft.net>
>
> CVE-2016-5696
>
> (backported from commit 75ff39ccc1bd5d3c455b6822ab09e533c551f758 upstream)
> Signed-off-by: Stefan Bader <stefan.bader at canonical.com>
> ---
> net/ipv4/tcp_input.c | 43 +++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 39 insertions(+), 4 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 2cc1313..4c244cd 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -86,7 +86,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
> EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
>
> /* rfc5961 challenge ack rate limiting */
> -int sysctl_tcp_challenge_ack_limit = 100;
> +int sysctl_tcp_challenge_ack_limit = 1000;
>
> int sysctl_tcp_stdurg __read_mostly;
> int sysctl_tcp_rfc1337 __read_mostly;
> @@ -3282,23 +3282,58 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
> return flag;
> }
>
> +static __always_inline void __write_once_size(volatile void *p, void *res, int size)
> +{
> + switch (size) {
> + case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
> + case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
> + case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
> + case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
> + default:
> + barrier();
> + __builtin_memcpy((void *)p, (const void *)res, size);
> + barrier();
> + }
> +}
> +
> +#define WRITE_ONCE(x, val) \
> +({ \
> + union { typeof(x) __val; char __c[1]; } __u = \
> + { .__val = (__force typeof(x)) (val) }; \
> + __write_once_size(&(x), __u.__c, sizeof(x)); \
> + __u.__val; \
> +})
> +
> +static inline u32 prandom_u32_max(u32 ep_ro)
> +{
> + return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
> +}
> +
> /* RFC 5961 7 [ACK Throttling] */
> static void tcp_send_challenge_ack(struct sock *sk)
> {
> /* unprotected vars, we dont care of overwrites */
> static u32 challenge_timestamp;
> static unsigned int challenge_count;
> - u32 now = jiffies / HZ;
> + u32 count, now = jiffies / HZ;
>
> + /* Check host-wide RFC 5961 rate limit. */
> if (now != challenge_timestamp) {
> + u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
> +
> challenge_timestamp = now;
> - challenge_count = 0;
> + *((volatile unsigned int *) &challenge_count) =
> + WRITE_ONCE(challenge_count, half +
> + prandom_u32_max(sysctl_tcp_challenge_ack_limit));
> }
> - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
> + count = ACCESS_ONCE(challenge_count);
> + if (count > 0) {
> + WRITE_ONCE(challenge_count, count - 1);
> NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
> tcp_send_ack(sk);
> }
> }
> +#undef WRITE_ONCE
>
> static void tcp_store_ts_recent(struct tcp_sock *tp)
> {
>
commit 002e47cc2aec702e1eb850816b44e12611a0eda1 ('kernel: Provide
READ_ONCE and ASSIGN_ONCE') and commit
76858ed8581706c776c56ee872fd5259e40b750d ('kernel: Change
ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)') are clean cherry-picks. That
would at least avoid having to embed WRITE_ONCE() in the backport.
rtg
--
Tim Gardner tim.gardner at canonical.com
More information about the kernel-team
mailing list