[PATCH 19/23][SRU][OEM-5.14] block: drain file system I/O on del_gendisk
Koba Ko
koba.ko at canonical.com
Fri Jan 28 03:26:06 UTC 2022
From: Christoph Hellwig <hch at lst.de>
BugLink: https://bugs.launchpad.net/bugs/1958850
Instead of delaying draining of file system I/O related items like the
blk-qos queues, the integrity read workqueue and timeouts only when the
request_queue is removed, do that when del_gendisk is called. This is
important for SCSI where the upper level drivers that control the gendisk
are separate entities, and the disk can be freed much earlier than the
request_queue, or can even be unbound without tearing down the queue.
Fixes: edb0872f44ec ("block: move the bdi from the request_queue to the gendisk")
Reported-by: Ming Lei <ming.lei at redhat.com>
Signed-off-by: Christoph Hellwig <hch at lst.de>
Tested-by: Darrick J. Wong <djwong at kernel.org>
Link: https://lore.kernel.org/r/20210929071241.934472-5-hch@lst.de
Tested-by: Yi Zhang <yi.zhang at redhat.com>
Signed-off-by: Jens Axboe <axboe at kernel.dk>
(cherry picked from commit 8e141f9eb803e209714a80aa6ec073893f94c526)
Signed-off-by: Koba Ko <koba.ko at canonical.com>
---
block/blk-core.c | 54 +++++++++++++++++++++++++++----------------
block/blk.h | 1 +
block/genhd.c | 22 +++++++++++++++++-
include/linux/genhd.h | 1 +
4 files changed, 57 insertions(+), 21 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 4ec5511e6cb59..80bb11140344e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -49,7 +49,6 @@
#include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-pm.h"
-#include "blk-rq-qos.h"
struct dentry *blk_debugfs_root;
@@ -337,23 +336,25 @@ void blk_put_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_put_queue);
-void blk_set_queue_dying(struct request_queue *q)
+void blk_queue_start_drain(struct request_queue *q)
{
- blk_queue_flag_set(QUEUE_FLAG_DYING, q);
-
/*
* When queue DYING flag is set, we need to block new req
* entering queue, so we call blk_freeze_queue_start() to
* prevent I/O from crossing blk_queue_enter().
*/
blk_freeze_queue_start(q);
-
if (queue_is_mq(q))
blk_mq_wake_waiters(q);
-
/* Make blk_queue_enter() reexamine the DYING flag. */
wake_up_all(&q->mq_freeze_wq);
}
+
+void blk_set_queue_dying(struct request_queue *q)
+{
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+ blk_queue_start_drain(q);
+}
EXPORT_SYMBOL_GPL(blk_set_queue_dying);
/**
@@ -385,13 +386,8 @@ void blk_cleanup_queue(struct request_queue *q)
*/
blk_freeze_queue(q);
- rq_qos_exit(q);
-
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
- /* for synchronous bio-based driver finish in-flight integrity i/o */
- blk_flush_integrity();
-
blk_sync_queue(q);
if (queue_is_mq(q)) {
@@ -477,19 +473,37 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
static inline int bio_queue_enter(struct bio *bio)
{
- struct request_queue *q = bio->bi_bdev->bd_disk->queue;
- bool nowait = bio->bi_opf & REQ_NOWAIT;
- int ret;
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+ struct request_queue *q = disk->queue;
- ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
- if (unlikely(ret)) {
- if (nowait && !blk_queue_dying(q))
+ while (!blk_try_enter_queue(q, false)) {
+ if (bio->bi_opf & REQ_NOWAIT) {
+ if (test_bit(GD_DEAD, &disk->state))
+ goto dead;
bio_wouldblock_error(bio);
- else
- bio_io_error(bio);
+ return -EBUSY;
+ }
+
+ /*
+ * read pair of barrier in blk_freeze_queue_start(), we need to
+ * order reading __PERCPU_REF_DEAD flag of .q_usage_counter and
+ * reading .mq_freeze_depth or queue dying flag, otherwise the
+ * following wait may never return if the two reads are
+ * reordered.
+ */
+ smp_rmb();
+ wait_event(q->mq_freeze_wq,
+ (!q->mq_freeze_depth &&
+ blk_pm_resume_queue(false, q)) ||
+ test_bit(GD_DEAD, &disk->state));
+ if (test_bit(GD_DEAD, &disk->state))
+ goto dead;
}
- return ret;
+ return 0;
+dead:
+ bio_io_error(bio);
+ return -ENODEV;
}
void blk_queue_exit(struct request_queue *q)
diff --git a/block/blk.h b/block/blk.h
index 1af7c13ccc708..bb2193fe8aea9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,6 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
void blk_free_flush_queue(struct blk_flush_queue *q);
void blk_freeze_queue(struct request_queue *q);
+void blk_queue_start_drain(struct request_queue *q);
#define BIO_INLINE_VECS 4
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
diff --git a/block/genhd.c b/block/genhd.c
index a02960681a151..f8cc2476f8ee6 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -26,6 +26,7 @@
#include <linux/badblocks.h>
#include "blk.h"
+#include "blk-rq-qos.h"
static struct kobject *block_depr;
@@ -587,6 +588,8 @@ EXPORT_SYMBOL(device_add_disk_no_queue_reg);
*/
void del_gendisk(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
+
might_sleep();
if (WARN_ON_ONCE(!disk->queue))
@@ -608,9 +611,26 @@ void del_gendisk(struct gendisk *disk)
* up any more even if openers still hold references to it.
*/
remove_inode_hash(disk->part0->bd_inode);
-
+ /*
+ * Fail any new I/O.
+ */
+ set_bit(GD_DEAD, &disk->state);
set_capacity(disk, 0);
+ /*
+ * Prevent new I/O from crossing bio_queue_enter().
+ */
+ blk_queue_start_drain(q);
+ blk_mq_freeze_queue_wait(q);
+
+ rq_qos_exit(q);
+ blk_sync_queue(q);
+ blk_flush_integrity();
+ /*
+ * Allow using passthrough request again after the queue is torn down.
+ */
+ blk_mq_unfreeze_queue(q);
+
if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 522bc090e2945..82d5ef6ea78c3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -153,6 +153,7 @@ struct gendisk {
unsigned long state;
#define GD_NEED_PART_SCAN 0
#define GD_READ_ONLY 1
+#define GD_DEAD 2
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
--
2.25.1
More information about the kernel-team
mailing list