[Xenial][SRU][CVE-2018-20784][PATCH 6/6] sched/fair: Fix O(nr_cgroups) in the load balancing path
Connor Kuehl
connor.kuehl at canonical.com
Thu Oct 17 17:51:33 UTC 2019
From: Vincent Guittot <vincent.guittot at linaro.org>
CVE-2018-20784
This re-applies the commit reverted here:
commit c40f7d74c741 ("sched/fair: Fix infinite loop in update_blocked_averages() by reverting a9e7f6544b9c")
I.e. now that cfs_rq can be safely removed/added in the list, we can re-apply:
commit a9e7f6544b9c ("sched/fair: Fix O(nr_cgroups) in load balance path")
Signed-off-by: Vincent Guittot <vincent.guittot at linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
Cc: Linus Torvalds <torvalds at linux-foundation.org>
Cc: Peter Zijlstra <peterz at infradead.org>
Cc: Thomas Gleixner <tglx at linutronix.de>
Cc: sargun at sargun.me
Cc: tj at kernel.org
Cc: xiexiuqi at huawei.com
Cc: xiezhipeng1 at huawei.com
Link: https://lkml.kernel.org/r/1549469662-13614-3-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo at kernel.org>
(backported from commit 039ae8bcf7a5f4476f4487e6bf816885fb3fb617)
[ Connor Kuehl: In 'cfs_rq_is_decayed' the data member
'runnable_load_sum' belongs to struct cfs_rq and not sched_avg, so
update that. Some instances of 'for_each_leaf_cfs_rq' required manual
updating to the new 'for_each_leaf_cfs_rq_safe' and the last hunk for
'update_blocked_averages' required manual placement. ]
Signed-off-by: Connor Kuehl <connor.kuehl at canonical.com>
---
kernel/sched/fair.c | 48 +++++++++++++++++++++++++++++++++------------
1 file changed, 36 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8bc9a5e24380..d16eec4b8d76 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -376,9 +376,10 @@ static inline void assert_list_leaf_cfs_rq(struct rq *rq)
WARN_ON_ONCE((rq->tmp_alone_branch != &rq->leaf_cfs_rq_list));
}
-/* Iterate through all cfs_rq's on a runqueue in bottom-up order */
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
- list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+/* Iterate thr' all leaf cfs_rq's on a runqueue */
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \
+ list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list, \
+ leaf_cfs_rq_list)
/* Do the two (enqueued) entities belong to the same group ? */
static inline struct cfs_rq *
@@ -476,8 +477,8 @@ static inline void assert_list_leaf_cfs_rq(struct rq *rq)
{
}
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
- for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) \
+ for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
static inline struct sched_entity *parent_entity(struct sched_entity *se)
{
@@ -4203,9 +4204,9 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
static void __maybe_unused update_runtime_enabled(struct rq *rq)
{
- struct cfs_rq *cfs_rq;
+ struct cfs_rq *cfs_rq, *pos;
- for_each_leaf_cfs_rq(rq, cfs_rq) {
+ for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
struct cfs_bandwidth *cfs_b = &cfs_rq->tg->cfs_bandwidth;
raw_spin_lock(&cfs_b->lock);
@@ -4218,7 +4219,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
{
struct cfs_rq *cfs_rq, *pos;
- for_each_leaf_cfs_rq(rq, cfs_rq) {
+ for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
if (!cfs_rq->runtime_enabled)
continue;
@@ -6132,10 +6133,27 @@ static void attach_tasks(struct lb_env *env)
#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+ if (cfs_rq->load.weight)
+ return false;
+
+ if (cfs_rq->avg.load_sum)
+ return false;
+
+ if (cfs_rq->avg.util_sum)
+ return false;
+
+ if (cfs_rq->runnable_load_sum)
+ return false;
+
+ return true;
+}
+
static void update_blocked_averages(int cpu)
{
struct rq *rq = cpu_rq(cpu);
- struct cfs_rq *cfs_rq;
+ struct cfs_rq *cfs_rq, *pos;
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
@@ -6145,9 +6163,15 @@ static void update_blocked_averages(int cpu)
* Iterates the task_group tree in a bottom up fashion, see
* list_add_leaf_cfs_rq() for details.
*/
- for_each_leaf_cfs_rq(rq, cfs_rq) {
+ for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
update_tg_load_avg(cfs_rq, 0);
+ /*
+ * There can be a lot of idle CPU cgroups. Don't let fully
+ * decayed cfs_rqs linger on the list.
+ */
+ if (cfs_rq_is_decayed(cfs_rq))
+ list_del_leaf_cfs_rq(cfs_rq);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -8554,10 +8578,10 @@ const struct sched_class fair_sched_class = {
#ifdef CONFIG_SCHED_DEBUG
void print_cfs_stats(struct seq_file *m, int cpu)
{
- struct cfs_rq *cfs_rq;
+ struct cfs_rq *cfs_rq, *pos;
rcu_read_lock();
- for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+ for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
print_cfs_rq(m, cpu, cfs_rq);
rcu_read_unlock();
}
--
2.17.1
More information about the kernel-team
mailing list