ACK: [PATCH][SRU Bionic] scsi: libsas: defer ata device eh commands to libata

Stefan Bader stefan.bader at canonical.com
Tue May 22 17:06:10 UTC 2018


On 03.05.2018 23:49, dann frazier wrote:
> From: Jason Yan <yanaijie at huawei.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1768971
> 
> When ata device doing EH, some commands still attached with tasks are
> not passed to libata when abort failed or recover failed, so libata did
> not handle these commands. After these commands done, sas task is freed,
> but ata qc is not freed. This will cause ata qc leak and trigger a
> warning like below:
> 
> WARNING: CPU: 0 PID: 28512 at drivers/ata/libata-eh.c:4037
> ata_eh_finish+0xb4/0xcc
> CPU: 0 PID: 28512 Comm: kworker/u32:2 Tainted: G     W  OE 4.14.0#1
> ......
> Call trace:
> [<ffff0000088b7bd0>] ata_eh_finish+0xb4/0xcc
> [<ffff0000088b8420>] ata_do_eh+0xc4/0xd8
> [<ffff0000088b8478>] ata_std_error_handler+0x44/0x8c
> [<ffff0000088b8068>] ata_scsi_port_error_handler+0x480/0x694
> [<ffff000008875fc4>] async_sas_ata_eh+0x4c/0x80
> [<ffff0000080f6be8>] async_run_entry_fn+0x4c/0x170
> [<ffff0000080ebd70>] process_one_work+0x144/0x390
> [<ffff0000080ec100>] worker_thread+0x144/0x418
> [<ffff0000080f2c98>] kthread+0x10c/0x138
> [<ffff0000080855dc>] ret_from_fork+0x10/0x18
> 
> If ata qc leaked too many, ata tag allocation will fail and io blocked
> for ever.
> 
> As suggested by Dan Williams, defer ata device commands to libata and
> merge sas_eh_finish_cmd() with sas_eh_defer_cmd(). libata will handle
> ata qcs correctly after this.
> 
> Signed-off-by: Jason Yan <yanaijie at huawei.com>
> CC: Xiaofei Tan <tanxiaofei at huawei.com>
> CC: John Garry <john.garry at huawei.com>
> CC: Dan Williams <dan.j.williams at intel.com>
> Reviewed-by: Dan Williams <dan.j.williams at intel.com>
> Signed-off-by: Martin K. Petersen <martin.petersen at oracle.com>
> (cherry picked from commit 318aaf34f1179b39fa9c30fa0f3288b645beee39)
> Signed-off-by: dann frazier <dann.frazier at canonical.com>
Acked-by: Stefan Bader <stefan.bader at canonical.com>

> ---
>  drivers/scsi/libsas/sas_scsi_host.c | 33 ++++++++++++-----------------
>  1 file changed, 13 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
> index a68caa0d3fb5..00902c9a34b8 100644
> --- a/drivers/scsi/libsas/sas_scsi_host.c
> +++ b/drivers/scsi/libsas/sas_scsi_host.c
> @@ -222,6 +222,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
>  {
>  	struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
> +	struct domain_device *dev = cmd_to_domain_dev(cmd);
>  	struct sas_task *task = TO_SAS_TASK(cmd);
>  
>  	/* At this point, we only get called following an actual abort
> @@ -230,6 +231,14 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
>  	 */
>  	sas_end_task(cmd, task);
>  
> +	if (dev_is_sata(dev)) {
> +		/* defer commands to libata so that libata EH can
> +		 * handle ata qcs correctly
> +		 */
> +		list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q);
> +		return;
> +	}
> +
>  	/* now finish the command and move it on to the error
>  	 * handler done list, this also takes it off the
>  	 * error handler pending list.
> @@ -237,22 +246,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
>  	scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q);
>  }
>  
> -static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
> -{
> -	struct domain_device *dev = cmd_to_domain_dev(cmd);
> -	struct sas_ha_struct *ha = dev->port->ha;
> -	struct sas_task *task = TO_SAS_TASK(cmd);
> -
> -	if (!dev_is_sata(dev)) {
> -		sas_eh_finish_cmd(cmd);
> -		return;
> -	}
> -
> -	/* report the timeout to libata */
> -	sas_end_task(cmd, task);
> -	list_move_tail(&cmd->eh_entry, &ha->eh_ata_q);
> -}
> -
>  static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
>  {
>  	struct scsi_cmnd *cmd, *n;
> @@ -260,7 +253,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd
>  	list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
>  		if (cmd->device->sdev_target == my_cmd->device->sdev_target &&
>  		    cmd->device->lun == my_cmd->device->lun)
> -			sas_eh_defer_cmd(cmd);
> +			sas_eh_finish_cmd(cmd);
>  	}
>  }
>  
> @@ -633,12 +626,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
>  		case TASK_IS_DONE:
>  			SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
>  				    task);
> -			sas_eh_defer_cmd(cmd);
> +			sas_eh_finish_cmd(cmd);
>  			continue;
>  		case TASK_IS_ABORTED:
>  			SAS_DPRINTK("%s: task 0x%p is aborted\n",
>  				    __func__, task);
> -			sas_eh_defer_cmd(cmd);
> +			sas_eh_finish_cmd(cmd);
>  			continue;
>  		case TASK_IS_AT_LU:
>  			SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
> @@ -649,7 +642,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
>  					    "recovered\n",
>  					    SAS_ADDR(task->dev),
>  					    cmd->device->lun);
> -				sas_eh_defer_cmd(cmd);
> +				sas_eh_finish_cmd(cmd);
>  				sas_scsi_clear_queue_lu(work_q, cmd);
>  				goto Again;
>  			}
> 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20180522/3d38e45c/attachment.sig>


More information about the kernel-team mailing list