[SRU X] [PATCH 1/1] nvme/pci: Poll CQ on timeout

Guilherme G. Piccoli gpiccoli at canonical.com
Fri Dec 7 21:28:11 UTC 2018

From: Keith Busch <keith.busch at intel.com>

BugLink: https://launchpad.net/bugs/1807393

If an IO timeout occurs, it's helpful to know if the controller did not
post a completion or the driver missed an interrupt. While we never expect
the latter, this patch will make it possible to tell the difference so
we don't have to guess.

Signed-off-by: Keith Busch <keith.busch at intel.com>
Signed-off-by: Christoph Hellwig <hch at lst.de>
Tested-by: Johannes Thumshirn <jthumshirn at suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>
(backported from 7776db1ccc123d5944a8c170c9c45f7e91d49643 upstream)
[gpiccoli: context adjustment, fixed struct member access that changed]
Signed-off-by: Guilherme G. Piccoli <gpiccoli at canonical.com>
 drivers/nvme/host/pci.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3752052ae20a..5d2a7ee2f922 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -929,10 +929,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 	return IRQ_NONE;
-static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
-	struct nvme_queue *nvmeq = hctx->driver_data;
 	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
 		__nvme_process_cq(nvmeq, &tag);
@@ -945,6 +943,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 	return 0;
+static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+	struct nvme_queue *nvmeq = hctx->driver_data;
+	return __nvme_poll(nvmeq, tag);
 static void nvme_async_event_work(struct work_struct *work)
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, async_work);
@@ -1045,6 +1050,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct request *abort_req;
 	struct nvme_command cmd;
+	/*
+	* Did we miss an interrupt?
+	*/
+	if (__nvme_poll(nvmeq, req->tag)) {
+		dev_warn(dev->dev,
+			"I/O %d QID %d timeout, completion polled\n",
+			req->tag, nvmeq->qid);
+		return BLK_EH_HANDLED;
+	}
 	 * Shutdown immediately if controller times out while starting. The
 	 * reset work will see the pci device disabled when it gets the forced

