ACK: [PATCH][SRU Bionic] scsi: libsas: defer ata device eh commands to libata
Stefan Bader
stefan.bader at canonical.com
Tue May 22 17:06:10 UTC 2018
On 03.05.2018 23:49, dann frazier wrote:
> From: Jason Yan <yanaijie at huawei.com>
>
> BugLink: https://bugs.launchpad.net/bugs/1768971
>
> When ata device doing EH, some commands still attached with tasks are
> not passed to libata when abort failed or recover failed, so libata did
> not handle these commands. After these commands done, sas task is freed,
> but ata qc is not freed. This will cause ata qc leak and trigger a
> warning like below:
>
> WARNING: CPU: 0 PID: 28512 at drivers/ata/libata-eh.c:4037
> ata_eh_finish+0xb4/0xcc
> CPU: 0 PID: 28512 Comm: kworker/u32:2 Tainted: G W OE 4.14.0#1
> ......
> Call trace:
> [<ffff0000088b7bd0>] ata_eh_finish+0xb4/0xcc
> [<ffff0000088b8420>] ata_do_eh+0xc4/0xd8
> [<ffff0000088b8478>] ata_std_error_handler+0x44/0x8c
> [<ffff0000088b8068>] ata_scsi_port_error_handler+0x480/0x694
> [<ffff000008875fc4>] async_sas_ata_eh+0x4c/0x80
> [<ffff0000080f6be8>] async_run_entry_fn+0x4c/0x170
> [<ffff0000080ebd70>] process_one_work+0x144/0x390
> [<ffff0000080ec100>] worker_thread+0x144/0x418
> [<ffff0000080f2c98>] kthread+0x10c/0x138
> [<ffff0000080855dc>] ret_from_fork+0x10/0x18
>
> If ata qc leaked too many, ata tag allocation will fail and io blocked
> for ever.
>
> As suggested by Dan Williams, defer ata device commands to libata and
> merge sas_eh_finish_cmd() with sas_eh_defer_cmd(). libata will handle
> ata qcs correctly after this.
>
> Signed-off-by: Jason Yan <yanaijie at huawei.com>
> CC: Xiaofei Tan <tanxiaofei at huawei.com>
> CC: John Garry <john.garry at huawei.com>
> CC: Dan Williams <dan.j.williams at intel.com>
> Reviewed-by: Dan Williams <dan.j.williams at intel.com>
> Signed-off-by: Martin K. Petersen <martin.petersen at oracle.com>
> (cherry picked from commit 318aaf34f1179b39fa9c30fa0f3288b645beee39)
> Signed-off-by: dann frazier <dann.frazier at canonical.com>
Acked-by: Stefan Bader <stefan.bader at canonical.com>
> ---
> drivers/scsi/libsas/sas_scsi_host.c | 33 ++++++++++++-----------------
> 1 file changed, 13 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
> index a68caa0d3fb5..00902c9a34b8 100644
> --- a/drivers/scsi/libsas/sas_scsi_host.c
> +++ b/drivers/scsi/libsas/sas_scsi_host.c
> @@ -222,6 +222,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
> static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
> {
> struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
> + struct domain_device *dev = cmd_to_domain_dev(cmd);
> struct sas_task *task = TO_SAS_TASK(cmd);
>
> /* At this point, we only get called following an actual abort
> @@ -230,6 +231,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
> */
> sas_end_task(cmd, task);
>
> + if (dev_is_sata(dev)) {
> + /* defer commands to libata so that libata EH can
> + * handle ata qcs correctly
> + */
> + list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q);
> + return;
> + }
> +
> /* now finish the command and move it on to the error
> * handler done list, this also takes it off the
> * error handler pending list.
> @@ -237,22 +246,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
> scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q);
> }
>
> -static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
> -{
> - struct domain_device *dev = cmd_to_domain_dev(cmd);
> - struct sas_ha_struct *ha = dev->port->ha;
> - struct sas_task *task = TO_SAS_TASK(cmd);
> -
> - if (!dev_is_sata(dev)) {
> - sas_eh_finish_cmd(cmd);
> - return;
> - }
> -
> - /* report the timeout to libata */
> - sas_end_task(cmd, task);
> - list_move_tail(&cmd->eh_entry, &ha->eh_ata_q);
> -}
> -
> static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
> {
> struct scsi_cmnd *cmd, *n;
> @@ -260,7 +253,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd
> list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
> if (cmd->device->sdev_target == my_cmd->device->sdev_target &&
> cmd->device->lun == my_cmd->device->lun)
> - sas_eh_defer_cmd(cmd);
> + sas_eh_finish_cmd(cmd);
> }
> }
>
> @@ -633,12 +626,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
> case TASK_IS_DONE:
> SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
> task);
> - sas_eh_defer_cmd(cmd);
> + sas_eh_finish_cmd(cmd);
> continue;
> case TASK_IS_ABORTED:
> SAS_DPRINTK("%s: task 0x%p is aborted\n",
> __func__, task);
> - sas_eh_defer_cmd(cmd);
> + sas_eh_finish_cmd(cmd);
> continue;
> case TASK_IS_AT_LU:
> SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
> @@ -649,7 +642,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
> "recovered\n",
> SAS_ADDR(task->dev),
> cmd->device->lun);
> - sas_eh_defer_cmd(cmd);
> + sas_eh_finish_cmd(cmd);
> sas_scsi_clear_queue_lu(work_q, cmd);
> goto Again;
> }
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20180522/3d38e45c/attachment.sig>
More information about the kernel-team
mailing list