[SRU][F:linux-bluefield][PATCH 04/10] netfilter: conntrack: convert to refcount_t api
Bodong Wang
bodong at nvidia.com
Thu Oct 27 21:26:53 UTC 2022
From: Florian Westphal <fw at strlen.de>
BugLink: https://bugs.launchpad.net/bugs/1995004
Convert nf_conn reference counting from atomic_t to refcount_t based api.
refcount_t api provides more runtime sanity checks and will warn on
certain constructs, e.g. refcount_inc() on a zero reference count, which
usually indicates use-after-free.
For this reason template allocation is changed to init the refcount to
1, the subsequenct add operations are removed.
Likewise, init_conntrack() is changed to set the initial refcount to 1
instead refcount_inc().
This is safe because the new entry is not (yet) visible to other cpus.
Signed-off-by: Florian Westphal <fw at strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo at netfilter.org>
(Cherry-picked from upstream 719774377622bc4025d2a74f551b5dc2158c6c30)
Signed-off-by: Bodong Wang <bodong at nvidia.com>
---
include/linux/netfilter/nf_conntrack_common.h | 8 ++++----
net/netfilter/nf_conntrack_core.c | 26 +++++++++++++-------------
net/netfilter/nf_conntrack_expect.c | 4 ++--
net/netfilter/nf_conntrack_netlink.c | 6 +++---
net/netfilter/nf_conntrack_standalone.c | 4 ++--
net/netfilter/nf_flow_table_core.c | 2 +-
net/netfilter/nf_synproxy_core.c | 1 -
net/netfilter/nft_ct.c | 4 +---
net/netfilter/xt_CT.c | 3 +--
net/openvswitch/conntrack.c | 1 -
net/sched/act_ct.c | 1 -
11 files changed, 27 insertions(+), 33 deletions(-)
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 1db83c9..5230316 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -2,7 +2,7 @@
#ifndef _NF_CONNTRACK_COMMON_H
#define _NF_CONNTRACK_COMMON_H
-#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <uapi/linux/netfilter/nf_conntrack_common.h>
struct ip_conntrack_stat {
@@ -24,19 +24,19 @@ struct ip_conntrack_stat {
#define NFCT_PTRMASK ~(NFCT_INFOMASK)
struct nf_conntrack {
- atomic_t use;
+ refcount_t use;
};
void nf_conntrack_destroy(struct nf_conntrack *nfct);
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
{
- if (nfct && atomic_dec_and_test(&nfct->use))
+ if (nfct && refcount_dec_and_test(&nfct->use))
nf_conntrack_destroy(nfct);
}
static inline void nf_conntrack_get(struct nf_conntrack *nfct)
{
if (nfct)
- atomic_inc(&nfct->use);
+ refcount_inc(&nfct->use);
}
#endif /* _NF_CONNTRACK_COMMON_H */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f8213dc..b993618 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -560,7 +560,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
nf_ct_zone_add(tmpl, zone);
- atomic_set(&tmpl->ct_general.use, 0);
+ refcount_set(&tmpl->ct_general.use, 1);
return tmpl;
}
@@ -594,7 +594,7 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
struct nf_conn *ct = (struct nf_conn *)nfct;
pr_debug("destroy_conntrack(%p)\n", ct);
- WARN_ON(atomic_read(&nfct->use) != 0);
+ WARN_ON(refcount_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
@@ -713,7 +713,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
- if (!atomic_inc_not_zero(&ct->ct_general.use))
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
return;
if (nf_ct_should_gc(ct))
@@ -781,7 +781,7 @@ static void nf_ct_gc_expired(struct nf_conn *ct)
* in, try to obtain a reference and re-check tuple
*/
ct = nf_ct_tuplehash_to_ctrack(h);
- if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
+ if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
goto found;
@@ -850,7 +850,7 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
smp_wmb();
/* The caller holds a reference to this object */
- atomic_set(&ct->ct_general.use, 2);
+ refcount_set(&ct->ct_general.use, 2);
__nf_conntrack_hash_insert(ct, hash, reply_hash);
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
@@ -900,7 +900,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
{
struct nf_conn_tstamp *tstamp;
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
/* set conntrack timestamp, if enabled. */
@@ -1277,7 +1277,7 @@ static unsigned int early_drop_list(struct net *net,
nf_ct_is_dying(tmp))
continue;
- if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
/* kill only if still in same netns -- might have moved due to
@@ -1393,7 +1393,7 @@ static void gc_worker(struct work_struct *work)
continue;
/* need to take reference to avoid possible races */
- if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
if (gc_worker_skip_ct(tmp)) {
@@ -1494,7 +1494,7 @@ static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
- atomic_set(&ct->ct_general.use, 0);
+ refcount_set(&ct->ct_general.use, 0);
return ct;
out:
atomic_dec(&net->ct.count);
@@ -1518,7 +1518,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
- WARN_ON(atomic_read(&ct->ct_general.use) != 0);
+ WARN_ON(refcount_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
@@ -1607,8 +1607,8 @@ void nf_conntrack_free(struct nf_conn *ct)
if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
- /* Now it is inserted into the unconfirmed list, bump refcount */
- nf_conntrack_get(&ct->ct_general);
+ /* Now it is inserted into the unconfirmed list, set refcount to 1. */
+ refcount_set(&ct->ct_general.use, 1);
nf_ct_add_to_unconfirmed_list(ct);
local_bh_enable();
@@ -2201,7 +2201,7 @@ static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
return NULL;
found:
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
spin_unlock(lockp);
local_bh_enable();
return ct;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 42557d2..516a9f0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -187,12 +187,12 @@ struct nf_conntrack_expect *
* about to invoke ->destroy(), or nf_ct_delete() via timeout
* or early_drop().
*
- * The atomic_inc_not_zero() check tells: If that fails, we
+ * The refcount_inc_not_zero() check tells: If that fails, we
* know that the ct is being destroyed. If it succeeds, we
* can be sure the ct cannot disappear underneath.
*/
if (unlikely(nf_ct_is_dying(exp->master) ||
- !atomic_inc_not_zero(&exp->master->ct_general.use)))
+ !refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0a9dff3..d6339db 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -501,7 +501,7 @@ static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
{
- if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use))))
+ if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use))))
goto nla_put_failure;
return 0;
@@ -940,7 +940,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_is_expired(ct)) {
if (i < ARRAY_SIZE(nf_ct_evict) &&
- atomic_inc_not_zero(&ct->ct_general.use))
+ refcount_inc_not_zero(&ct->ct_general.use))
nf_ct_evict[i++] = ct;
continue;
}
@@ -1441,7 +1441,7 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
ct);
rcu_read_unlock();
if (res < 0) {
- if (!atomic_inc_not_zero(&ct->ct_general.use))
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
continue;
cb->args[0] = cpu;
cb->args[1] = (unsigned long)ct;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 1e78ad8..0d2b3f8 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -300,7 +300,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
int ret = 0;
WARN_ON(!ct);
- if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
return 0;
if (nf_ct_should_gc(ct)) {
@@ -367,7 +367,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
ct_show_delta_time(s, ct);
- seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
+ seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use));
if (seq_has_overflowed(s))
goto release;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index c7d4416..dd29f4f 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -48,7 +48,7 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
struct flow_offload *flow;
if (unlikely(nf_ct_is_dying(ct) ||
- !atomic_inc_not_zero(&ct->ct_general.use)))
+ !refcount_inc_not_zero(&ct->ct_general.use)))
return NULL;
flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index c6c0d27..df94f72 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -349,7 +349,6 @@ static int __net_init synproxy_net_init(struct net *net)
goto err2;
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
- nf_conntrack_get(&ct->ct_general);
snet->tmpl = ct;
snet->stats = alloc_percpu(struct synproxy_stats);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 2899173..38cb892 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -258,7 +258,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
- if (likely(atomic_read(&ct->ct_general.use) == 1)) {
+ if (likely(refcount_read(&ct->ct_general.use) == 1)) {
nf_ct_zone_add(ct, &zone);
} else {
/* previous skb got queued to userspace */
@@ -269,7 +269,6 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
}
}
- atomic_inc(&ct->ct_general.use);
nf_ct_set(skb, ct, IP_CT_NEW);
}
#endif
@@ -374,7 +373,6 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
- atomic_set(&tmp->ct_general.use, 1);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index d4deee39..ffff1e1 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -24,7 +24,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
return XT_CONTINUE;
if (ct) {
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
nf_ct_set(skb, ct, IP_CT_NEW);
} else {
nf_ct_set(skb, ct, IP_CT_UNTRACKED);
@@ -202,7 +202,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
goto err4;
}
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
- nf_conntrack_get(&ct->ct_general);
out:
info->ct = ct;
return 0;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 62ab962..6e30440 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1723,7 +1723,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
goto err_free_ct;
__set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
- nf_conntrack_get(&ct_info.ct->ct_general);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 0f44608..32a720e 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1255,7 +1255,6 @@ static int tcf_ct_fill_params(struct net *net,
return -ENOMEM;
}
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
- nf_conntrack_get(&tmpl->ct_general);
p->tmpl = tmpl;
return 0;
--
1.8.3.1
More information about the kernel-team
mailing list