[SRU][J][PATCH 9/9] ipc/msg: mitigate the lock contention with percpu counter
Thibault Ferrante
thibault.ferrante at canonical.com
Thu Mar 21 12:14:57 UTC 2024
From: Jiebin Sun <jiebin.sun at intel.com>
BugLink: https://bugs.launchpad.net/bugs/2058485
The msg_bytes and msg_hdrs atomic counters are frequently updated when IPC
msg queue is in heavy use, causing heavy cache bounce and overhead.
Change them to percpu_counter greatly improve the performance. Since
there is one percpu struct per namespace, additional memory cost is
minimal. Reading of the count done in msgctl call, which is infrequent.
So the need to sum up the counts in each CPU is infrequent.
Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).
Score gain: 3.99x
CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)
[akpm at linux-foundation.org: coding-style cleanups]
[jiebin.sun at intel.com: avoid negative value by overflow in msginfo]
Link: https://lkml.kernel.org/r/20220920150809.4014944-1-jiebin.sun@intel.com
[akpm at linux-foundation.org: fix min() warnings]
Link: https://lkml.kernel.org/r/20220913192538.3023708-3-jiebin.sun@intel.com
Signed-off-by: Jiebin Sun <jiebin.sun at intel.com>
Reviewed-by: Tim Chen <tim.c.chen at linux.intel.com>
Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
Cc: Alexey Gladkov <legion at kernel.org>
Cc: Christoph Lameter <cl at linux.com>
Cc: Davidlohr Bueso <dave at stgolabs.net>
Cc: Dennis Zhou <dennis at kernel.org>
Cc: "Eric W . Biederman" <ebiederm at xmission.com>
Cc: Manfred Spraul <manfred at colorfullife.com>
Cc: Shakeel Butt <shakeelb at google.com>
Cc: Tejun Heo <tj at kernel.org>
Cc: Vasily Averin <vasily.averin at linux.dev>
Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
(cherry picked from commit 72d1e611082eda18689106a0c192f2827072713c)
Signed-off-by: Thibault Ferrante <thibault.ferrante at canonical.com>
---
include/linux/ipc_namespace.h | 5 ++--
ipc/msg.c | 48 +++++++++++++++++++++++++----------
ipc/namespace.c | 5 +++-
ipc/util.h | 4 +--
4 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 2c4ef98ae436c..e02b71e3fd6aa 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
#include <linux/refcount.h>
#include <linux/rhashtable-types.h>
#include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
struct user_namespace;
@@ -36,8 +37,8 @@ struct ipc_namespace {
unsigned int msg_ctlmax;
unsigned int msg_ctlmnb;
unsigned int msg_ctlmni;
- atomic_t msg_bytes;
- atomic_t msg_hdrs;
+ struct percpu_counter percpu_msg_bytes;
+ struct percpu_counter percpu_msg_hdrs;
size_t shm_ctlmax;
size_t shm_ctlall;
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c5..e4e0990e08f75 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
#include <asm/current.h>
#include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
rcu_read_unlock();
list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
free_msg(msg);
}
- atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+ percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes);
ipc_update_pid(&msq->q_lspid, NULL);
ipc_update_pid(&msq->q_lrpid, NULL);
ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,22 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
msginfo->msgssz = MSGSSZ;
msginfo->msgseg = MSGSEG;
down_read(&msg_ids(ns).rwsem);
- if (cmd == MSG_INFO) {
+ if (cmd == MSG_INFO)
msginfo->msgpool = msg_ids(ns).in_use;
- msginfo->msgmap = atomic_read(&ns->msg_hdrs);
- msginfo->msgtql = atomic_read(&ns->msg_bytes);
+ max_idx = ipc_get_maxidx(&msg_ids(ns));
+ up_read(&msg_ids(ns).rwsem);
+ if (cmd == MSG_INFO) {
+ msginfo->msgmap = min_t(int,
+ percpu_counter_sum(&ns->percpu_msg_hdrs),
+ INT_MAX);
+ msginfo->msgtql = min_t(int,
+ percpu_counter_sum(&ns->percpu_msg_bytes),
+ INT_MAX);
} else {
msginfo->msgmap = MSGMAP;
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
- max_idx = ipc_get_maxidx(&msg_ids(ns));
- up_read(&msg_ids(ns).rwsem);
return (max_idx < 0) ? 0 : max_idx;
}
@@ -935,8 +941,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
- atomic_add(msgsz, &ns->msg_bytes);
- atomic_inc(&ns->msg_hdrs);
+ percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+ percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
}
err = 0;
@@ -1159,8 +1165,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msq->q_rtime = ktime_get_real_seconds();
ipc_update_pid(&msq->q_lrpid, task_tgid(current));
msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts, &ns->msg_bytes);
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_sub_local(&ns->percpu_msg_bytes, msg->m_ts);
+ percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
ss_wakeup(msq, &wake_q, false);
goto out_unlock0;
@@ -1297,20 +1303,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
}
#endif
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
{
+ int ret;
+
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
ns->msg_ctlmni = MSGMNI;
- atomic_set(&ns->msg_bytes, 0);
- atomic_set(&ns->msg_hdrs, 0);
+ ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+ if (ret)
+ goto fail_msg_bytes;
+ ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+ if (ret)
+ goto fail_msg_hdrs;
ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+ return 0;
+
+fail_msg_hdrs:
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+fail_msg_bytes:
+ return ret;
}
#ifdef CONFIG_IPC_NS
void msg_exit_ns(struct ipc_namespace *ns)
{
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+ percpu_counter_destroy(&ns->percpu_msg_hdrs);
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index dae5f8affd7ca..556178d2a4ecf 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (!setup_ipc_sysctls(ns))
goto fail_put;
+ err = msg_init_ns(ns);
+ if (err)
+ goto fail_put;
+
sem_init_ns(ns);
- msg_init_ns(ns);
shm_init_ns(ns);
return ns;
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8e..b2906e3665394 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
#ifdef CONFIG_SYSVIPC
void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
void shm_init_ns(struct ipc_namespace *ns);
void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
#else
static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; }
static inline void shm_init_ns(struct ipc_namespace *ns) { }
static inline void sem_exit_ns(struct ipc_namespace *ns) { }
--
2.43.0
More information about the kernel-team
mailing list