[PATCH 1/2][Focal] Revert "netfilter: x_tables: Switch synchronization to RCU"
Tim Gardner
tim.gardner at canonical.com
Thu Apr 8 18:40:47 UTC 2021
From: Mark Tomlinson <mark.tomlinson at alliedtelesis.co.nz>
CVE-2021-29650
This reverts commit cc00bcaa589914096edef7fb87ca5cee4a166b5c.
This (and the preceding) patch basically re-implemented the RCU
mechanisms of patch 784544739a25. That patch was replaced because of the
performance problems that it created when replacing tables. Now, we have
the same issue: the call to synchronize_rcu() makes replacing tables
slower by as much as an order of magnitude.
Prior to using RCU a script calling "iptables" approx. 200 times was
taking 1.16s. With RCU this increased to 11.59s.
Revert these patches and fix the issue in a different way.
Signed-off-by: Mark Tomlinson <mark.tomlinson at alliedtelesis.co.nz>
Signed-off-by: Pablo Neira Ayuso <pablo at netfilter.org>
(cherry picked from commit d3d40f237480abf3268956daf18cdc56edd32834)
Signed-off-by: Tim Gardner <tim.gardner at canonical.com>
---
include/linux/netfilter/x_tables.h | 5 +--
net/ipv4/netfilter/arp_tables.c | 14 ++++-----
net/ipv4/netfilter/ip_tables.c | 14 ++++-----
net/ipv6/netfilter/ip6_tables.c | 14 ++++-----
net/netfilter/x_tables.c | 49 +++++++++++++++++++++---------
5 files changed, 56 insertions(+), 40 deletions(-)
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index f5c21b7d2974..1b261c51b3a3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -227,7 +227,7 @@ struct xt_table {
unsigned int valid_hooks;
/* Man behind the curtain... */
- struct xt_table_info __rcu *private;
+ struct xt_table_info *private;
/* Set this to THIS_MODULE if you are a module, otherwise NULL */
struct module *me;
@@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
-struct xt_table_info
-*xt_table_get_private_protected(const struct xt_table *table);
-
#ifdef CONFIG_COMPAT
#include <net/compat.h>
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8394afcb2c8f..22491532c41b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
- private = rcu_access_pointer(table->private);
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
@@ -666,7 +666,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
@@ -690,7 +690,7 @@ static int copy_entries_to_user(unsigned int total_size,
unsigned int off, num;
const struct arpt_entry *e;
struct xt_counters *counters;
- struct xt_table_info *private = xt_table_get_private_protected(table);
+ struct xt_table_info *private = table->private;
int ret = 0;
void *loc_cpu_entry;
@@ -825,7 +825,7 @@ static int get_info(struct net *net, void __user *user,
t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
if (!IS_ERR(t)) {
struct arpt_getinfo info;
- const struct xt_table_info *private = xt_table_get_private_protected(t);
+ const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
struct xt_table_info tmp;
@@ -878,7 +878,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
if (!IS_ERR(t)) {
- const struct xt_table_info *private = xt_table_get_private_protected(t);
+ const struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1037,7 +1037,7 @@ static int do_add_counters(struct net *net, const void __user *user,
}
local_bh_disable();
- private = xt_table_get_private_protected(t);
+ private = t->private;
if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1374,7 +1374,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
void __user *userptr)
{
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 62db61532163..f3df4be7af7b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb,
WARN_ON(!(table->valid_hooks & (1 << hook)));
local_bh_disable();
addend = xt_write_recseq_begin();
- private = rcu_access_pointer(table->private);
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
@@ -808,7 +808,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -832,7 +832,7 @@ copy_entries_to_user(unsigned int total_size,
unsigned int off, num;
const struct ipt_entry *e;
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
int ret = 0;
const void *loc_cpu_entry;
@@ -982,7 +982,7 @@ static int get_info(struct net *net, void __user *user,
t = xt_request_find_table_lock(net, AF_INET, name);
if (!IS_ERR(t)) {
struct ipt_getinfo info;
- const struct xt_table_info *private = xt_table_get_private_protected(t);
+ const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
struct xt_table_info tmp;
@@ -1036,7 +1036,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
t = xt_find_table_lock(net, AF_INET, get.name);
if (!IS_ERR(t)) {
- const struct xt_table_info *private = xt_table_get_private_protected(t);
+ const struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
@@ -1192,7 +1192,7 @@ do_add_counters(struct net *net, const void __user *user,
}
local_bh_disable();
- private = xt_table_get_private_protected(t);
+ private = t->private;
if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1587,7 +1587,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *userptr)
{
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 30eb8307582f..7a875be17c3b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
- private = rcu_access_pointer(table->private);
+ private = READ_ONCE(table->private); /* Address dependency. */
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -824,7 +824,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -848,7 +848,7 @@ copy_entries_to_user(unsigned int total_size,
unsigned int off, num;
const struct ip6t_entry *e;
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
int ret = 0;
const void *loc_cpu_entry;
@@ -998,7 +998,7 @@ static int get_info(struct net *net, void __user *user,
t = xt_request_find_table_lock(net, AF_INET6, name);
if (!IS_ERR(t)) {
struct ip6t_getinfo info;
- const struct xt_table_info *private = xt_table_get_private_protected(t);
+ const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
struct xt_table_info tmp;
@@ -1053,7 +1053,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
t = xt_find_table_lock(net, AF_INET6, get.name);
if (!IS_ERR(t)) {
- struct xt_table_info *private = xt_table_get_private_protected(t);
+ struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
@@ -1208,7 +1208,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
}
local_bh_disable();
- private = xt_table_get_private_protected(t);
+ private = t->private;
if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
@@ -1596,7 +1596,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
void __user *userptr)
{
struct xt_counters *counters;
- const struct xt_table_info *private = xt_table_get_private_protected(table);
+ const struct xt_table_info *private = table->private;
void __user *pos;
unsigned int size;
int ret = 0;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8b60fc04c67c..ef6d51a3798b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1351,14 +1351,6 @@ struct xt_counters *xt_counters_alloc(unsigned int counters)
}
EXPORT_SYMBOL(xt_counters_alloc);
-struct xt_table_info
-*xt_table_get_private_protected(const struct xt_table *table)
-{
- return rcu_dereference_protected(table->private,
- mutex_is_locked(&xt[table->af].mutex));
-}
-EXPORT_SYMBOL(xt_table_get_private_protected);
-
struct xt_table_info *
xt_replace_table(struct xt_table *table,
unsigned int num_counters,
@@ -1366,6 +1358,7 @@ xt_replace_table(struct xt_table *table,
int *error)
{
struct xt_table_info *private;
+ unsigned int cpu;
int ret;
ret = xt_jumpstack_alloc(newinfo);
@@ -1375,20 +1368,47 @@ xt_replace_table(struct xt_table *table,
}
/* Do the substitution. */
- private = xt_table_get_private_protected(table);
+ local_bh_disable();
+ private = table->private;
/* Check inside lock: is the old number correct? */
if (num_counters != private->number) {
pr_debug("num_counters != table->private->number (%u/%u)\n",
num_counters, private->number);
+ local_bh_enable();
*error = -EAGAIN;
return NULL;
}
newinfo->initial_entries = private->initial_entries;
+ /*
+ * Ensure contents of newinfo are visible before assigning to
+ * private.
+ */
+ smp_wmb();
+ table->private = newinfo;
+
+ /* make sure all cpus see new ->private value */
+ smp_wmb();
- rcu_assign_pointer(table->private, newinfo);
- synchronize_rcu();
+ /*
+ * Even though table entries have now been swapped, other CPU's
+ * may still be using the old entries...
+ */
+ local_bh_enable();
+
+ /* ... so wait for even xt_recseq on all cpus */
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+ u32 seq = raw_read_seqcount(s);
+
+ if (seq & 1) {
+ do {
+ cond_resched();
+ cpu_relax();
+ } while (seq == raw_read_seqcount(s));
+ }
+ }
#ifdef CONFIG_AUDIT
if (audit_enabled) {
@@ -1429,12 +1449,12 @@ struct xt_table *xt_register_table(struct net *net,
}
/* Simplifies replace_table code. */
- rcu_assign_pointer(table->private, bootstrap);
+ table->private = bootstrap;
if (!xt_replace_table(table, 0, newinfo, &ret))
goto unlock;
- private = xt_table_get_private_protected(table);
+ private = table->private;
pr_debug("table->private->number = %u\n", private->number);
/* save number of initial entries */
@@ -1457,8 +1477,7 @@ void *xt_unregister_table(struct xt_table *table)
struct xt_table_info *private;
mutex_lock(&xt[table->af].mutex);
- private = xt_table_get_private_protected(table);
- RCU_INIT_POINTER(table->private, NULL);
+ private = table->private;
list_del(&table->list);
mutex_unlock(&xt[table->af].mutex);
kfree(table);
--
2.17.1
More information about the kernel-team
mailing list