[SRU Focal] io_uring: add a sysctl to disable io_uring system-wide
Thadeu Lima de Souza Cascardo
cascardo at canonical.com
Mon Sep 11 18:43:07 UTC 2023
From: Matteo Rizzo <matteorizzo at google.com>
Introduce a new sysctl (io_uring_disabled) which can be either 0, 1, or
2. When 0 (the default), all processes are allowed to create io_uring
instances, which is the current behavior. When 1, io_uring creation is
disabled (io_uring_setup() will fail with -EPERM) for unprivileged
processes not in the kernel.io_uring_group group. When 2, calls to
io_uring_setup() fail with -EPERM regardless of privilege.
Signed-off-by: Matteo Rizzo <matteorizzo at google.com>
[JEM: modified to add io_uring_group]
Signed-off-by: Jeff Moyer <jmoyer at redhat.com>
Link: https://lore.kernel.org/r/x49y1i42j1z.fsf@segfault.boston.devel.redhat.com
Signed-off-by: Jens Axboe <axboe at kernel.dk>
(backported from commit 76d3ccecfa186af3120e206d62f03db1a94a535f)
[cascardo: Documentation conflict, position options description in the
right alphabetical order and add the new options to the list at the
beginning of the file]
[cascardo: move code from io_uring/io_uring.c to fs/io_uring.c, use
pointer to new variable two instead of SYSCTL_TWO]
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo at canonical.com>
---
Documentation/admin-guide/sysctl/kernel.rst | 32 ++++++++++++-
fs/io_uring.c | 52 +++++++++++++++++++++
2 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 568c24ff00a7..861154616637 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -48,6 +48,8 @@ show up in /proc/sys/kernel:
- hyperv_record_panic_msg
- kexec_load_disabled
- kptr_restrict
+- io_uring_disabled
+- io_uring_group
- l2cr [ PPC only ]
- modprobe ==> Documentation/debugging-modules.txt
- modules_disabled
@@ -415,7 +417,6 @@ with the "modules_disabled" sysctl.
kptr_restrict:
==============
-
This toggle indicates whether restrictions are placed on
exposing kernel addresses via /proc and other interfaces.
@@ -438,6 +439,35 @@ When kptr_restrict is set to (2), kernel pointers printed using
%pK will be replaced with 0's regardless of privileges.
+io_uring_disabled:
+==================
+
+Prevents all processes from creating new io_uring instances. Enabling this
+shrinks the kernel's attack surface.
+
+= ======================================================================
+0 All processes can create io_uring instances as normal. This is the
+ default setting.
+1 io_uring creation is disabled (io_uring_setup() will fail with
+ -EPERM) for unprivileged processes not in the io_uring_group group.
+ Existing io_uring instances can still be used. See the
+ documentation for io_uring_group for more information.
+2 io_uring creation is disabled for all processes. io_uring_setup()
+ always fails with -EPERM. Existing io_uring instances can still be
+ used.
+= ======================================================================
+
+
+io_uring_group:
+===============
+
+When io_uring_disabled is set to 1, a process must either be
+privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order
+to create an io_uring instance. If io_uring_group is set to -1 (the
+default), only processes with the CAP_SYS_ADMIN capability may create
+io_uring instances.
+
+
l2cr: (PPC only)
================
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0d374d7ab14f..eeb64bd42b0f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -381,6 +381,32 @@ static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops;
+static int __read_mostly sysctl_io_uring_disabled;
+static int __read_mostly sysctl_io_uring_group = -1;
+
+#ifdef CONFIG_SYSCTL
+static int two = 2;
+static struct ctl_table kernel_io_uring_disabled_table[] = {
+ {
+ .procname = "io_uring_disabled",
+ .data = &sysctl_io_uring_disabled,
+ .maxlen = sizeof(sysctl_io_uring_disabled),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
+ },
+ {
+ .procname = "io_uring_group",
+ .data = &sysctl_io_uring_group,
+ .maxlen = sizeof(gid_t),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {},
+};
+#endif
+
struct sock *io_uring_get_socket(struct file *file)
{
#if defined(CONFIG_UNIX)
@@ -4102,9 +4128,30 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
return ret;
}
+static inline bool io_uring_allowed(void)
+{
+ int disabled = READ_ONCE(sysctl_io_uring_disabled);
+ kgid_t io_uring_group;
+
+ if (disabled == 2)
+ return false;
+
+ if (disabled == 0 || capable(CAP_SYS_ADMIN))
+ return true;
+
+ io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group);
+ if (!gid_valid(io_uring_group))
+ return false;
+
+ return in_group_p(io_uring_group);
+}
+
SYSCALL_DEFINE2(io_uring_setup, u32, entries,
struct io_uring_params __user *, params)
{
+ if (!io_uring_allowed())
+ return -EPERM;
+
return io_uring_setup(entries, params);
}
@@ -4206,6 +4253,11 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
static int __init io_uring_init(void)
{
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+
+#ifdef CONFIG_SYSCTL
+ register_sysctl_init("kernel", kernel_io_uring_disabled_table);
+#endif
+
return 0;
};
__initcall(io_uring_init);
--
2.34.1
More information about the kernel-team
mailing list