[SRU][J][PATCH 5/5] cgroup: Reorganize css_set_lock and kernfs path processing
Philip Cox
philip.cox at canonical.com
Thu Feb 13 19:55:53 UTC 2025
From: Michal Koutný <mkoutny at suse.com>
The commit 74e4b956eb1c incorrectly wrapped kernfs_walk_and_get
(might_sleep) under css_set_lock (spinlock). css_set_lock is needed by
__cset_cgroup_from_root to ensure stable cset->cgrp_links but not for
kernfs_walk_and_get.
We only need to make sure that the returned root_cgrp won't be freed
under us. This is given in the case of global root because it is static
(cgrp_dfl_root.cgrp). When the root_cgrp is lower in the hierarchy, it
is pinned by cgroup_ns->root_cset (and `current` task cannot switch
namespace asynchronously so ns_proxy pins cgroup_ns).
Note this reasoning won't hold for root cgroups in v1 hierarchies,
therefore create a special-cased helper function just for the default
hierarchy.
Fixes: 74e4b956eb1c ("cgroup: Honor caller's cgroup NS when resolving path")
Reported-by: Dan Carpenter <dan.carpenter at oracle.com>
Signed-off-by: Michal Koutný <mkoutny at suse.com>
Signed-off-by: Tejun Heo <tj at kernel.org>
(backported from commit 46307fd6e27a3f678a1678b02e667678c22aa8cc)
[philcox: context changes in cset_cgroup_from_root()]
Signed-off-by: Philip Cox <philip.cox at canonical.com>
---
kernel/cgroup/cgroup.c | 39 +++++++++++++++++++++++++++------------
1 file changed, 27 insertions(+), 12 deletions(-)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index da2b61f60bfb8..142fd9bd6bc25 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1378,6 +1378,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
}
+/*
+ * Returned cgroup is without refcount but it's valid as long as cset pins it.
+ */
static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
@@ -1389,6 +1392,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
res_cgroup = cset->dfl_cgrp;
} else {
struct cgrp_cset_link *link;
+ lockdep_assert_held(&css_set_lock);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
@@ -1400,6 +1404,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
}
}
+ BUG_ON(!res_cgroup);
return res_cgroup;
}
@@ -1425,18 +1430,32 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
return res;
}
+/*
+ * Look up cgroup associated with current task's cgroup namespace on the default
+ * hierarchy.
+ *
+ * Unlike current_cgns_cgroup_from_root(), this doesn't need locks:
+ * - Internal rcu_read_lock is unnecessary because we don't dereference any rcu
+ * pointers.
+ * - css_set_lock is not needed because we just read cset->dfl_cgrp.
+ * - As a bonus returned cgrp is pinned with the current because it cannot
+ * switch cgroup_ns asynchronously.
+ */
+static struct cgroup *current_cgns_cgroup_dfl(void)
+{
+ struct css_set *cset;
+
+ cset = current->nsproxy->cgroup_ns->root_cset;
+ return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
+}
+
/* look up cgroup associated with given css_set on the specified hierarchy */
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
- struct cgroup *res = NULL;
-
lockdep_assert_held(&css_set_lock);
- res = __cset_cgroup_from_root(cset, root);
-
- BUG_ON(!res);
- return res;
+ return __cset_cgroup_from_root(cset, root);
}
/*
@@ -6057,9 +6076,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
if (!cgrp)
return ERR_PTR(-ENOENT);
- spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
- spin_unlock_irq(&css_set_lock);
+ root_cgrp = current_cgns_cgroup_dfl();
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
cgroup_put(cgrp);
return ERR_PTR(-ENOENT);
@@ -6629,10 +6646,8 @@ struct cgroup *cgroup_get_from_path(const char *path)
struct cgroup *cgrp = ERR_PTR(-ENOENT);
struct cgroup *root_cgrp;
- spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
+ root_cgrp = current_cgns_cgroup_dfl();
kn = kernfs_walk_and_get(root_cgrp->kn, path);
- spin_unlock_irq(&css_set_lock);
if (!kn)
goto out;
--
2.43.0
More information about the kernel-team
mailing list