[3.8.y.z extended stable] Patch "packet: fix use after free race in send path when dev is released" has been added to staging queue

Kamal Mostafa kamal at canonical.com
Wed Dec 11 20:09:39 UTC 2013


This is a note to let you know that I have just added a patch titled

    packet: fix use after free race in send path when dev is released

to the linux-3.8.y-queue branch of the 3.8.y.z extended stable tree 
which can be found at:

 http://kernel.ubuntu.com/git?p=ubuntu/linux.git;a=shortlog;h=refs/heads/linux-3.8.y-queue

This patch is scheduled to be released in version 3.8.13.15.

If you, or anyone else, feels it should not be added to this tree, please 
reply to this email.

For more information about the 3.8.y.z tree, see
https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable

Thanks.
-Kamal

------

>From 21cdf8835ff9fdf8b4db5f27e05ba823a4cc2904 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman at redhat.com>
Date: Thu, 21 Nov 2013 16:50:58 +0100
Subject: packet: fix use after free race in send path when dev is released

[ Upstream commit e40526cb20b5ee53419452e1f03d97092f144418 ]

Salam reported a use after free bug in PF_PACKET that occurs when
we're sending out frames on a socket bound device and suddenly the
net device is being unregistered. It appears that commit 827d9780
introduced a possible race condition between {t,}packet_snd() and
packet_notifier(). In the case of a bound socket, packet_notifier()
can drop the last reference to the net_device and {t,}packet_snd()
might end up suddenly sending a packet over a freed net_device.

To avoid reverting 827d9780 and thus introducing a performance
regression compared to the current state of things, we decided to
hold a cached RCU protected pointer to the net device and maintain
it on write side via bind spin_lock protected register_prot_hook()
and __unregister_prot_hook() calls.

In {t,}packet_snd() path, we access this pointer under rcu_read_lock
through packet_cached_dev_get() that holds reference to the device
to prevent it from being freed through packet_notifier() while
we're in send path. This is okay to do as dev_put()/dev_hold() are
per-cpu counters, so this should not be a performance issue. Also,
the code simplifies a bit as we don't need need_rls_dev anymore.

Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.")
Reported-by: Salam Noureddine <noureddine at aristanetworks.com>
Signed-off-by: Daniel Borkmann <dborkman at redhat.com>
Signed-off-by: Salam Noureddine <noureddine at aristanetworks.com>
Cc: Ben Greear <greearb at candelatech.com>
Cc: Eric Dumazet <eric.dumazet at gmail.com>
Signed-off-by: David S. Miller <davem at davemloft.net>
Signed-off-by: Kamal Mostafa <kamal at canonical.com>
---
 net/packet/af_packet.c | 59 ++++++++++++++++++++++++++++++--------------------
 net/packet/internal.h  |  1 +
 2 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 517d154..2d5b24a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -236,11 +236,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
 static void register_prot_hook(struct sock *sk)
 {
 	struct packet_sock *po = pkt_sk(sk);
+
 	if (!po->running) {
-		if (po->fanout)
+		if (po->fanout) {
 			__fanout_link(sk, po);
-		else
+		} else {
 			dev_add_pack(&po->prot_hook);
+			rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
+		}
+
 		sock_hold(sk);
 		po->running = 1;
 	}
@@ -258,10 +262,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
 	struct packet_sock *po = pkt_sk(sk);

 	po->running = 0;
-	if (po->fanout)
+	if (po->fanout) {
 		__fanout_unlink(sk, po);
-	else
+	} else {
 		__dev_remove_pack(&po->prot_hook);
+		RCU_INIT_POINTER(po->cached_dev, NULL);
+	}
+
 	__sock_put(sk);

 	if (sync) {
@@ -1960,12 +1967,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	return tp_len;
 }

+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = rcu_dereference(po->cached_dev);
+	if (dev)
+		dev_hold(dev);
+	rcu_read_unlock();
+
+	return dev;
+}
+
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 {
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	int err, reserve = 0;
 	void *ph;
 	struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
@@ -1978,7 +1997,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	mutex_lock(&po->pg_vec_lock);

 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -1992,19 +2011,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}

 	err = -ENXIO;
 	if (unlikely(dev == NULL))
 		goto out;
-
-	reserve = dev->hard_header_len;
-
 	err = -ENETDOWN;
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;

+	reserve = dev->hard_header_len;
+
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));

@@ -2081,8 +2098,7 @@ out_status:
 	__packet_set_status(po, ph, status);
 	kfree_skb(skb);
 out_put:
-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);
 out:
 	mutex_unlock(&po->pg_vec_lock);
 	return err;
@@ -2120,7 +2136,6 @@ static int packet_snd(struct socket *sock,
 	struct sk_buff *skb;
 	struct net_device *dev;
 	__be16 proto;
-	bool need_rls_dev = false;
 	unsigned char *addr;
 	int err, reserve = 0;
 	struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2136,7 +2151,7 @@ static int packet_snd(struct socket *sock,
 	 */

 	if (saddr == NULL) {
-		dev = po->prot_hook.dev;
+		dev	= packet_cached_dev_get(po);
 		proto	= po->num;
 		addr	= NULL;
 	} else {
@@ -2148,19 +2163,17 @@ static int packet_snd(struct socket *sock,
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
 		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
-		need_rls_dev = true;
 	}

 	err = -ENXIO;
-	if (dev == NULL)
+	if (unlikely(dev == NULL))
 		goto out_unlock;
-	if (sock->type == SOCK_RAW)
-		reserve = dev->hard_header_len;
-
 	err = -ENETDOWN;
-	if (!(dev->flags & IFF_UP))
+	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;

+	if (sock->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
 	if (po->has_vnet_hdr) {
 		vnet_hdr_len = sizeof(vnet_hdr);

@@ -2293,15 +2306,14 @@ static int packet_snd(struct socket *sock,
 	if (err > 0 && (err = net_xmit_errno(err)) != 0)
 		goto out_unlock;

-	if (need_rls_dev)
-		dev_put(dev);
+	dev_put(dev);

 	return len;

 out_free:
 	kfree_skb(skb);
 out_unlock:
-	if (dev && need_rls_dev)
+	if (dev)
 		dev_put(dev);
 out:
 	return err;
@@ -2521,6 +2533,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 	po = pkt_sk(sk);
 	sk->sk_family = PF_PACKET;
 	po->num = proto;
+	RCU_INIT_POINTER(po->cached_dev, NULL);

 	sk->sk_destruct = packet_sock_destruct;
 	sk_refcnt_debug_inc(sk);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e84cab8..821dfee 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -111,6 +111,7 @@ struct packet_sock {
 	unsigned int		tp_loss:1;
 	unsigned int		tp_tx_has_off:1;
 	unsigned int		tp_tstamp;
+	struct net_device __rcu	*cached_dev;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };

--
1.8.3.2





More information about the kernel-team mailing list