[PATCH 2/3 Xenial SRU] NVMe: Move error handling to failed reset handler

Thu May 26 13:05:12 UTC 2016

On Wed, May 25, 2016 at 08:47:32AM -0600, Tim Gardner wrote:
> From: Keith Busch <keith.busch at intel.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1581034
> 
> This moves failed queue handling out of the namespace removal path and
> into the reset failure path, fixing a hanging condition if the controller
> fails or link down during del_gendisk. Previously the driver had to see
> the controller as degraded prior to calling del_gendisk to setup the
> queues to fail. But, if the controller happened to fail after this,
> there was no task to end outstanding requests.
> 
> On failure, all namespace states are set to dead. This has capacity
> revalidate to 0, and ends all new requests with error status.
> 
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> Reviewed-by: Christoph Hellwig <hch at lst.de>
> Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>
> Signed-off-by: Jens Axboe <axboe at fb.com>
> (back ported from commit 69d9a99c258eb1d6478fd9608a2070890797eed7)
> Signed-off-by: Tim Gardner <tim.gardner at canonical.com>
> 
> Conflicts:
> 	drivers/nvme/host/pci.c
> ---
>  drivers/nvme/host/core.c | 53 ++++++++++++++++++++++++++++++++----------------
>  drivers/nvme/host/nvme.h |  2 ++
>  drivers/nvme/host/pci.c  | 33 ++++++++++++++++++++++--------
>  3 files changed, 63 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8a21897a..a87320c 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -556,6 +556,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
>  	u16 old_ms;
>  	unsigned short bs;
>  
> +	if (test_bit(NVME_NS_DEAD, &ns->flags)) {
> +		set_capacity(disk, 0);
> +		return -ENODEV;
> +	}
>  	if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
>  		dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
>  				__func__, ns->ctrl->instance, ns->ns_id);
> @@ -1180,32 +1184,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>  
>  static void nvme_ns_remove(struct nvme_ns *ns)
>  {
> -	bool kill;
> -
>  	if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
>  		return;
>  
> -	kill = nvme_io_incapable(ns->ctrl) &&
> -			!blk_queue_dying(ns->queue);
> -	if (kill) {
> -		blk_set_queue_dying(ns->queue);
> -
> -		/*
> -		 * The controller was shutdown first if we got here through
> -		 * device removal. The shutdown may requeue outstanding
> -		 * requests. These need to be aborted immediately so
> -		 * del_gendisk doesn't block indefinitely for their completion.
> -		 */
> -		blk_mq_abort_requeue_list(ns->queue);
> -	}
>  	if (ns->disk->flags & GENHD_FL_UP) {
>  		if (blk_get_integrity(ns->disk))
>  			blk_integrity_unregister(ns->disk);
>  		sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
>  					&nvme_ns_attr_group);
>  		del_gendisk(ns->disk);
> -	}
> -	if (kill || !blk_queue_dying(ns->queue)) {
>  		blk_mq_abort_requeue_list(ns->queue);
>  		blk_cleanup_queue(ns->queue);
>  	}
> @@ -1405,6 +1392,38 @@ out:
>  	return ret;
>  }
>  
> +/**
> + * nvme_kill_queues(): Ends all namespace queues
> + * @ctrl: the dead controller that needs to end
> + *
> + * Call this function when the driver determines it is unable to get the
> + * controller in a state capable of servicing IO.
> + */
> +void nvme_kill_queues(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_ns *ns;
> +
> +	mutex_lock(&ctrl->namespaces_mutex);
> +	list_for_each_entry(ns, &ctrl->namespaces, list) {
> +		if (!kref_get_unless_zero(&ns->kref))
> +			continue;
> +
> +		/*
> +		 * Revalidating a dead namespace sets capacity to 0. This will
> +		 * end buffered writers dirtying pages that can't be synced.
> +		 */
> +		if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags))
> +			revalidate_disk(ns->disk);
> +
> +		blk_set_queue_dying(ns->queue);
> +		blk_mq_abort_requeue_list(ns->queue);
> +		blk_mq_start_stopped_hw_queues(ns->queue, true);
> +
> +		nvme_put_ns(ns);
> +	}
> +	mutex_unlock(&ctrl->namespaces_mutex);
> +}
> +
>  void nvme_stop_queues(struct nvme_ctrl *ctrl)
>  {
>  	struct nvme_ns *ns;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 784fba8..4d74625 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -115,6 +115,7 @@ struct nvme_ns {
>  	unsigned long flags;
>  
>  #define NVME_NS_REMOVING 0
> +#define NVME_NS_DEAD     1
>  
>  	u64 mode_select_num_blocks;
>  	u32 mode_select_block_len;
> @@ -244,6 +245,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
>  
>  void nvme_stop_queues(struct nvme_ctrl *ctrl);
>  void nvme_start_queues(struct nvme_ctrl *ctrl);
> +void nvme_kill_queues(struct nvme_ctrl *ctrl);
>  
>  struct request *nvme_alloc_request(struct request_queue *q,
>  		struct nvme_command *cmd, unsigned int flags);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 2b2211e..2ecd712 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -690,6 +690,14 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	blk_mq_start_request(req);
>  
>  	spin_lock_irq(&nvmeq->q_lock);
> +	if (unlikely(nvmeq->cq_vector < 0)) {
> +		if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
> +			ret = BLK_MQ_RQ_QUEUE_BUSY;
> +		else
> +			ret = BLK_MQ_RQ_QUEUE_ERROR;
> +		spin_unlock_irq(&nvmeq->q_lock);
> +		goto out;
> +	}

Are there support patches that introduce this change that would be worth
backporting too?
--chris


>  	__nvme_submit_cmd(nvmeq, &cmnd);
>  	nvme_process_cq(nvmeq);
>  	spin_unlock_irq(&nvmeq->q_lock);
> @@ -1257,6 +1265,12 @@ static struct blk_mq_ops nvme_mq_ops = {
>  static void nvme_dev_remove_admin(struct nvme_dev *dev)
>  {
>  	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
> +		/*
> +		 * If the controller was reset during removal, it's possible
> +		 * user requests may be waiting on a stopped queue. Start the
> +		 * queue to flush these to completion.
> +		 */
> +		blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
>  		blk_cleanup_queue(dev->ctrl.admin_q);
>  		blk_mq_free_tag_set(&dev->admin_tagset);
>  	}
> @@ -1906,6 +1920,16 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
>  	kfree(dev);
>  }
>  
> +static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
> +{
> +	dev_warn(dev->dev, "Removing after probe failure\n");
> +
> +	kref_get(&dev->ctrl.kref);
> +	nvme_dev_disable(dev, false);
> +	if (!schedule_work(&dev->remove_work))
> +		nvme_put_ctrl(&dev->ctrl);
> +}
> +
>  static void nvme_reset_work(struct work_struct *work)
>  {
>  	struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
> @@ -1985,19 +2009,12 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
>  	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
>  	struct pci_dev *pdev = to_pci_dev(dev->dev);
>  
> +	nvme_kill_queues(&dev->ctrl);
>  	if (pci_get_drvdata(pdev))
>  		pci_stop_and_remove_bus_device_locked(pdev);
>  	nvme_put_ctrl(&dev->ctrl);
>  }
>  
> -static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
> -{
> -	dev_warn(dev->dev, "Removing after probe failure\n");
> -	kref_get(&dev->ctrl.kref);
> -	if (!schedule_work(&dev->remove_work))
> -		nvme_put_ctrl(&dev->ctrl);
> -}
> -
>  static int nvme_reset(struct nvme_dev *dev)
>  {
>  	if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
> -- 
> 1.9.1
> 
> 
> -- 
> kernel-team mailing list
> kernel-team at lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team