[PATCH v2 48/57][X] debugfs: prevent access to possibly dead file_operations at file open

Seth Forshee seth.forshee at canonical.com
Fri Jun 19 16:50:01 UTC 2020


From: Nicolai Stange <nicstange at gmail.com>

BugLink: https://bugs.launchpad.net/bugs/1884159

Nothing prevents a dentry found by path lookup before a return of
__debugfs_remove() to actually get opened after that return. Now, after
the return of __debugfs_remove(), there are no guarantees whatsoever
regarding the memory the corresponding inode's file_operations object
had been kept in.

Since __debugfs_remove() is seldomly invoked, usually from module exit
handlers only, the race is hard to trigger and the impact is very low.

A discussion of the problem outlined above as well as a suggested
solution can be found in the (sub-)thread rooted at

  http://lkml.kernel.org/g/20130401203445.GA20862@ZenIV.linux.org.uk
  ("Yet another pipe related oops.")

Basically, Greg KH suggests to introduce an intermediate fops and
Al Viro points out that a pointer to the original ones may be stored in
->d_fsdata.

Follow this line of reasoning:
- Add SRCU as a reverse dependency of DEBUG_FS.
- Introduce a srcu_struct object for the debugfs subsystem.
- In debugfs_create_file(), store a pointer to the original
  file_operations object in ->d_fsdata.
- Make debugfs_remove() and debugfs_remove_recursive() wait for a
  SRCU grace period after the dentry has been delete()'d and before they
  return to their callers.
- Introduce an intermediate file_operations object named
  "debugfs_open_proxy_file_operations". It's ->open() functions checks,
  under the protection of a SRCU read lock, whether the dentry is still
  alive, i.e. has not been d_delete()'d and if so, tries to acquire a
  reference on the owning module.
  On success, it sets the file object's ->f_op to the original
  file_operations and forwards the ongoing open() call to the original
  ->open().
- For clarity, rename the former debugfs_file_operations to
  debugfs_noop_file_operations -- they are in no way canonical.

The choice of SRCU over "normal" RCU is justified by the fact, that the
former may also be used to protect ->i_private data from going away
during the execution of a file's readers and writers which may (and do)
sleep.

Finally, introduce the fs/debugfs/internal.h header containing some
declarations internal to the debugfs implementation.

Signed-off-by: Nicolai Stange <nicstange at gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
(backported from commit 9fd4dcece43a53e5a9e65a973df5693702ee6401)
Signed-off-by: Seth Forshee <seth.forshee at canonical.com>
---
 fs/debugfs/file.c       | 91 ++++++++++++++++++++++++++++++++++++++++-
 fs/debugfs/inode.c      | 13 +++++-
 fs/debugfs/internal.h   | 24 +++++++++++
 include/linux/debugfs.h |  3 --
 lib/Kconfig.debug       |  1 +
 5 files changed, 127 insertions(+), 5 deletions(-)
 create mode 100644 fs/debugfs/internal.h

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index d2ba12e23ed9..736ab3c988f2 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -22,6 +22,9 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <linux/device.h>
+#include <linux/srcu.h>
+
+#include "internal.h"
 
 static ssize_t default_read_file(struct file *file, char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -35,13 +38,99 @@ static ssize_t default_write_file(struct file *file, const char __user *buf,
 	return count;
 }
 
-const struct file_operations debugfs_file_operations = {
+const struct file_operations debugfs_noop_file_operations = {
 	.read =		default_read_file,
 	.write =	default_write_file,
 	.open =		simple_open,
 	.llseek =	noop_llseek,
 };
 
+/**
+ * debugfs_use_file_start - mark the beginning of file data access
+ * @dentry: the dentry object whose data is being accessed.
+ * @srcu_idx: a pointer to some memory to store a SRCU index in.
+ *
+ * Up to a matching call to debugfs_use_file_finish(), any
+ * successive call into the file removing functions debugfs_remove()
+ * and debugfs_remove_recursive() will block. Since associated private
+ * file data may only get freed after a successful return of any of
+ * the removal functions, you may safely access it after a successful
+ * call to debugfs_use_file_start() without worrying about
+ * lifetime issues.
+ *
+ * If -%EIO is returned, the file has already been removed and thus,
+ * it is not safe to access any of its data. If, on the other hand,
+ * it is allowed to access the file data, zero is returned.
+ *
+ * Regardless of the return code, any call to
+ * debugfs_use_file_start() must be followed by a matching call
+ * to debugfs_use_file_finish().
+ */
+static int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx)
+	__acquires(&debugfs_srcu)
+{
+	*srcu_idx = srcu_read_lock(&debugfs_srcu);
+	barrier();
+	if (d_unlinked(dentry))
+		return -EIO;
+	return 0;
+}
+
+/**
+ * debugfs_use_file_finish - mark the end of file data access
+ * @srcu_idx: the SRCU index "created" by a former call to
+ *            debugfs_use_file_start().
+ *
+ * Allow any ongoing concurrent call into debugfs_remove() or
+ * debugfs_remove_recursive() blocked by a former call to
+ * debugfs_use_file_start() to proceed and return to its caller.
+ */
+static void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu)
+{
+	srcu_read_unlock(&debugfs_srcu, srcu_idx);
+}
+
+#define F_DENTRY(filp) ((filp)->f_path.dentry)
+
+#define REAL_FOPS_DEREF(dentry)					\
+	((const struct file_operations *)(dentry)->d_fsdata)
+
+static int open_proxy_open(struct inode *inode, struct file *filp)
+{
+	const struct dentry *dentry = F_DENTRY(filp);
+	const struct file_operations *real_fops = NULL;
+	int srcu_idx, r;
+
+	r = debugfs_use_file_start(dentry, &srcu_idx);
+	if (r) {
+		r = -ENOENT;
+		goto out;
+	}
+
+	real_fops = REAL_FOPS_DEREF(dentry);
+	real_fops = fops_get(real_fops);
+	if (!real_fops) {
+		/* Huh? Module did not clean up after itself at exit? */
+		WARN(1, "debugfs file owner did not clean up at exit: %pd",
+			dentry);
+		r = -ENXIO;
+		goto out;
+	}
+	replace_fops(filp, real_fops);
+
+	if (real_fops->open)
+		r = real_fops->open(inode, filp);
+
+out:
+	fops_put(real_fops);
+	debugfs_use_file_finish(srcu_idx);
+	return r;
+}
+
+const struct file_operations debugfs_open_proxy_file_operations = {
+	.open = open_proxy_open,
+};
+
 static struct dentry *debugfs_create_mode(const char *name, umode_t mode,
 					  struct dentry *parent, void *value,
 				          const struct file_operations *fops,
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d7111b8ce36a..f01e61932747 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -27,9 +27,14 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
+
+#include "internal.h"
 
 #define DEBUGFS_DEFAULT_MODE	0700
 
+DEFINE_SRCU(debugfs_srcu);
+
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
@@ -345,8 +350,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
 		return failed_creating(dentry);
 
 	inode->i_mode = mode;
-	inode->i_fop = fops ? fops : &debugfs_file_operations;
 	inode->i_private = data;
+
+	inode->i_fop = fops ? &debugfs_open_proxy_file_operations
+		: &debugfs_noop_file_operations;
+	dentry->d_fsdata = (void *)fops;
+
 	d_instantiate(dentry, inode);
 	fsnotify_create(d_inode(dentry->d_parent), dentry);
 	return end_creating(dentry);
@@ -570,6 +579,7 @@ void debugfs_remove(struct dentry *dentry)
 	mutex_unlock(&d_inode(parent)->i_mutex);
 	if (!ret)
 		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+	synchronize_srcu(&debugfs_srcu);
 }
 EXPORT_SYMBOL_GPL(debugfs_remove);
 
@@ -647,6 +657,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 	if (!__debugfs_remove(child, parent))
 		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 	mutex_unlock(&d_inode(parent)->i_mutex);
+	synchronize_srcu(&debugfs_srcu);
 }
 EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
 
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
new file mode 100644
index 000000000000..c7aaa5cb6685
--- /dev/null
+++ b/fs/debugfs/internal.h
@@ -0,0 +1,24 @@
+/*
+ *  internal.h - declarations internal to debugfs
+ *
+ *  Copyright (C) 2016 Nicolai Stange <nicstange at gmail.com>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License version
+ *	2 as published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _DEBUGFS_INTERNAL_H_
+#define _DEBUGFS_INTERNAL_H_
+
+struct file_operations;
+struct srcu_struct;
+
+/* declared over in file.c */
+extern const struct file_operations debugfs_noop_file_operations;
+extern const struct file_operations debugfs_open_proxy_file_operations;
+
+extern struct srcu_struct debugfs_srcu;
+
+#endif /* _DEBUGFS_INTERNAL_H_ */
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 51362ff607fc..6215d2852a3e 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -43,9 +43,6 @@ extern struct dentry *arch_debugfs_dir;
 
 #if defined(CONFIG_DEBUG_FS)
 
-/* declared over in file.c */
-extern const struct file_operations debugfs_file_operations;
-
 struct dentry *debugfs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1f2d6e38dc30..8f27ef6f84e9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -257,6 +257,7 @@ config PAGE_OWNER
 
 config DEBUG_FS
 	bool "Debug Filesystem"
+	select SRCU
 	help
 	  debugfs is a virtual file system that kernel developers use to put
 	  debugging files into.  Enable this option to be able to read and
-- 
2.27.0




More information about the kernel-team mailing list