[PATCH 2/3 Xenial SRU] NVMe: Move error handling to failed reset handler
Christopher Arges
chris.j.arges at canonical.com
Thu May 26 13:05:12 UTC 2016
On Wed, May 25, 2016 at 08:47:32AM -0600, Tim Gardner wrote:
> From: Keith Busch <keith.busch at intel.com>
>
> BugLink: http://bugs.launchpad.net/bugs/1581034
>
> This moves failed queue handling out of the namespace removal path and
> into the reset failure path, fixing a hanging condition if the controller
> fails or link down during del_gendisk. Previously the driver had to see
> the controller as degraded prior to calling del_gendisk to setup the
> queues to fail. But, if the controller happened to fail after this,
> there was no task to end outstanding requests.
>
> On failure, all namespace states are set to dead. This has capacity
> revalidate to 0, and ends all new requests with error status.
>
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> Reviewed-by: Christoph Hellwig <hch at lst.de>
> Reviewed-by: Johannes Thumshirn <jthumshirn at suse.de>
> Signed-off-by: Jens Axboe <axboe at fb.com>
> (back ported from commit 69d9a99c258eb1d6478fd9608a2070890797eed7)
> Signed-off-by: Tim Gardner <tim.gardner at canonical.com>
>
> Conflicts:
> drivers/nvme/host/pci.c
> ---
> drivers/nvme/host/core.c | 53 ++++++++++++++++++++++++++++++++----------------
> drivers/nvme/host/nvme.h | 2 ++
> drivers/nvme/host/pci.c | 33 ++++++++++++++++++++++--------
> 3 files changed, 63 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8a21897a..a87320c 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -556,6 +556,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
> u16 old_ms;
> unsigned short bs;
>
> + if (test_bit(NVME_NS_DEAD, &ns->flags)) {
> + set_capacity(disk, 0);
> + return -ENODEV;
> + }
> if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
> dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
> __func__, ns->ctrl->instance, ns->ns_id);
> @@ -1180,32 +1184,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>
> static void nvme_ns_remove(struct nvme_ns *ns)
> {
> - bool kill;
> -
> if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
> return;
>
> - kill = nvme_io_incapable(ns->ctrl) &&
> - !blk_queue_dying(ns->queue);
> - if (kill) {
> - blk_set_queue_dying(ns->queue);
> -
> - /*
> - * The controller was shutdown first if we got here through
> - * device removal. The shutdown may requeue outstanding
> - * requests. These need to be aborted immediately so
> - * del_gendisk doesn't block indefinitely for their completion.
> - */
> - blk_mq_abort_requeue_list(ns->queue);
> - }
> if (ns->disk->flags & GENHD_FL_UP) {
> if (blk_get_integrity(ns->disk))
> blk_integrity_unregister(ns->disk);
> sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
> &nvme_ns_attr_group);
> del_gendisk(ns->disk);
> - }
> - if (kill || !blk_queue_dying(ns->queue)) {
> blk_mq_abort_requeue_list(ns->queue);
> blk_cleanup_queue(ns->queue);
> }
> @@ -1405,6 +1392,38 @@ out:
> return ret;
> }
>
> +/**
> + * nvme_kill_queues(): Ends all namespace queues
> + * @ctrl: the dead controller that needs to end
> + *
> + * Call this function when the driver determines it is unable to get the
> + * controller in a state capable of servicing IO.
> + */
> +void nvme_kill_queues(struct nvme_ctrl *ctrl)
> +{
> + struct nvme_ns *ns;
> +
> + mutex_lock(&ctrl->namespaces_mutex);
> + list_for_each_entry(ns, &ctrl->namespaces, list) {
> + if (!kref_get_unless_zero(&ns->kref))
> + continue;
> +
> + /*
> + * Revalidating a dead namespace sets capacity to 0. This will
> + * end buffered writers dirtying pages that can't be synced.
> + */
> + if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags))
> + revalidate_disk(ns->disk);
> +
> + blk_set_queue_dying(ns->queue);
> + blk_mq_abort_requeue_list(ns->queue);
> + blk_mq_start_stopped_hw_queues(ns->queue, true);
> +
> + nvme_put_ns(ns);
> + }
> + mutex_unlock(&ctrl->namespaces_mutex);
> +}
> +
> void nvme_stop_queues(struct nvme_ctrl *ctrl)
> {
> struct nvme_ns *ns;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 784fba8..4d74625 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -115,6 +115,7 @@ struct nvme_ns {
> unsigned long flags;
>
> #define NVME_NS_REMOVING 0
> +#define NVME_NS_DEAD 1
>
> u64 mode_select_num_blocks;
> u32 mode_select_block_len;
> @@ -244,6 +245,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
>
> void nvme_stop_queues(struct nvme_ctrl *ctrl);
> void nvme_start_queues(struct nvme_ctrl *ctrl);
> +void nvme_kill_queues(struct nvme_ctrl *ctrl);
>
> struct request *nvme_alloc_request(struct request_queue *q,
> struct nvme_command *cmd, unsigned int flags);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 2b2211e..2ecd712 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -690,6 +690,14 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
> blk_mq_start_request(req);
>
> spin_lock_irq(&nvmeq->q_lock);
> + if (unlikely(nvmeq->cq_vector < 0)) {
> + if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
> + ret = BLK_MQ_RQ_QUEUE_BUSY;
> + else
> + ret = BLK_MQ_RQ_QUEUE_ERROR;
> + spin_unlock_irq(&nvmeq->q_lock);
> + goto out;
> + }
Are there support patches that introduce this change that would be worth
backporting too?
--chris
> __nvme_submit_cmd(nvmeq, &cmnd);
> nvme_process_cq(nvmeq);
> spin_unlock_irq(&nvmeq->q_lock);
> @@ -1257,6 +1265,12 @@ static struct blk_mq_ops nvme_mq_ops = {
> static void nvme_dev_remove_admin(struct nvme_dev *dev)
> {
> if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
> + /*
> + * If the controller was reset during removal, it's possible
> + * user requests may be waiting on a stopped queue. Start the
> + * queue to flush these to completion.
> + */
> + blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
> blk_cleanup_queue(dev->ctrl.admin_q);
> blk_mq_free_tag_set(&dev->admin_tagset);
> }
> @@ -1906,6 +1920,16 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
> kfree(dev);
> }
>
> +static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
> +{
> + dev_warn(dev->dev, "Removing after probe failure\n");
> +
> + kref_get(&dev->ctrl.kref);
> + nvme_dev_disable(dev, false);
> + if (!schedule_work(&dev->remove_work))
> + nvme_put_ctrl(&dev->ctrl);
> +}
> +
> static void nvme_reset_work(struct work_struct *work)
> {
> struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
> @@ -1985,19 +2009,12 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
> struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
> struct pci_dev *pdev = to_pci_dev(dev->dev);
>
> + nvme_kill_queues(&dev->ctrl);
> if (pci_get_drvdata(pdev))
> pci_stop_and_remove_bus_device_locked(pdev);
> nvme_put_ctrl(&dev->ctrl);
> }
>
> -static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
> -{
> - dev_warn(dev->dev, "Removing after probe failure\n");
> - kref_get(&dev->ctrl.kref);
> - if (!schedule_work(&dev->remove_work))
> - nvme_put_ctrl(&dev->ctrl);
> -}
> -
> static int nvme_reset(struct nvme_dev *dev)
> {
> if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
> --
> 1.9.1
>
>
> --
> kernel-team mailing list
> kernel-team at lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
More information about the kernel-team
mailing list