ACK+Applied: [Xenial SRU] Fix scsi_dh_alua races on s390x
Kamal Mostafa
kamal at canonical.com
Thu Jun 30 20:57:00 UTC 2016
On Thu, Jun 30, 2016 at 09:32:14PM +0200, Stefan Bader wrote:
>
> BugLink: http://bugs.launchpad.net/bugs/1567602
>
> IBM reported that with the alua device handler loaded, when running
> automatic LUN detection they run into timeouts and fail to discover
> some disks completely.
>
> Unfortunately the change to get rid of the addition vpd_pg83 probing
> was not enough (that would have been easy).
> The changes that likely do the fix come later and I would expect them
> to depend a lot on the previous changes (at least most of those). So
> I ended up with picking almost all changes between v4.4 and v4.6 for
> the alua device handler and hating myself.
>
> The last patch is an addition which I know I need because otherwise
> a ppc64el host was quite unhappy with multipath on v4.6.
> [by know I tried a 4.4 kernel with this set added on ppc64el and could see no
> obvious breakage there]
>
> I am also inlining the combined diff of all changes below to show
> that the changes outside scsi_dh_alua.c are not too bad.
>
> -Stefan
>
> The following changes since commit a6409cad137621e2b43d9f49c757fbc996539e85:
>
> UBUNTU: Ubuntu-4.4.0-28.47
>
> are available in the git repository at:
>
> git://git.launchpad.net/~smb/+git/linux-xenial lp1567602
>
> for you to fetch changes up to a21cddda99e26e2f4527dd9ac56e7538d5afa253:
>
> scsi_dh_alua: do not fail for unknown VPD identification
(from irc) "Acked-by: Brad Figg <brad.figg at canonical.com>"
Applied to Xenial (4.4.0-30.49).
-Kamal
>
> ----------------------------------------------------------------
> Hannes Reinecke (34):
> scsi_dh_alua: Disable ALUA handling for non-disk devices
> scsi_dh_alua: Use vpd_pg83 information
> scsi_dh_alua: improved logging
> scsi_dh_alua: sanitze sense code handling
> scsi_dh_alua: use standard logging functions
> scsi_dh_alua: return standard SCSI return codes in submit_rtpg
> scsi_dh_alua: fixup description of stpg_endio()
> scsi_dh_alua: use flag for RTPG extended header
> scsi_dh_alua: use unaligned access macros
> scsi_dh_alua: rework alua_check_tpgs() to return the tpgs mode
> scsi_dh_alua: simplify sense code handling
> scsi: Add scsi_vpd_lun_id()
> scsi: Add scsi_vpd_tpg_id()
> scsi_dh_alua: use scsi_vpd_tpg_id()
> scsi_dh_alua: Remove stale variables
> scsi_dh_alua: Pass buffer as function argument
> scsi_dh_alua: separate out alua_stpg()
> scsi_dh_alua: Make stpg synchronous
> scsi_dh_alua: call alua_rtpg() if stpg fails
> scsi_dh_alua: switch to scsi_execute_req_flags()
> scsi_dh_alua: allocate RTPG buffer separately
> scsi_dh_alua: Use separate alua_port_group structure
> scsi_dh_alua: use unique device id
> scsi_dh_alua: simplify alua_initialize()
> revert commit a8e5a2d593cb ("[SCSI] scsi_dh_alua: ALUA handler attach
> should succeed while TPG is transitioning")
> scsi_dh_alua: move optimize_stpg evaluation
> scsi_dh_alua: remove 'rel_port' from alua_dh_data structure
> scsi_dh_alua: Use workqueue for RTPG
> scsi_dh_alua: Allow workqueue to run synchronously
> scsi_dh_alua: Add new blacklist flag 'BLIST_SYNC_ALUA'
> scsi_dh_alua: Recheck state on unit attention
> scsi_dh_alua: update all port states
> scsi_dh_alua: Send TEST UNIT READY to poll for transitioning
> scsi_dh_alua: do not fail for unknown VPD identification
>
> drivers/scsi/device_handler/scsi_dh_alua.c | 1100 +++++++++++++++++-----------
> drivers/scsi/scsi_devinfo.c | 2 +
> drivers/scsi/scsi_lib.c | 188 +++++
> drivers/scsi/scsi_scan.c | 3 +
> include/scsi/scsi_device.h | 3 +
> include/scsi/scsi_devinfo.h | 1 +
> include/scsi/scsi_dh.h | 1 +
> 7 files changed, 870 insertions(+), 428 deletions(-)
>
> --
>
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c
> b/drivers/scsi/device_handler/scsi_dh_alua.c
> index cc2773b..952e3b7 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -22,7 +22,9 @@
> #include <linux/slab.h>
> #include <linux/delay.h>
> #include <linux/module.h>
> +#include <asm/unaligned.h>
> #include <scsi/scsi.h>
> +#include <scsi/scsi_dbg.h>
> #include <scsi/scsi_eh.h>
> #include <scsi/scsi_dh.h>
>
> @@ -54,27 +56,60 @@
> #define TPGS_MODE_IMPLICIT 0x1
> #define TPGS_MODE_EXPLICIT 0x2
>
> -#define ALUA_INQUIRY_SIZE 36
> +#define ALUA_RTPG_SIZE 128
> #define ALUA_FAILOVER_TIMEOUT 60
> #define ALUA_FAILOVER_RETRIES 5
> +#define ALUA_RTPG_DELAY_MSECS 5
>
> -/* flags passed from user level */
> -#define ALUA_OPTIMIZE_STPG 1
> +/* device handler flags */
> +#define ALUA_OPTIMIZE_STPG 0x01
> +#define ALUA_RTPG_EXT_HDR_UNSUPP 0x02
> +#define ALUA_SYNC_STPG 0x04
> +/* State machine flags */
> +#define ALUA_PG_RUN_RTPG 0x10
> +#define ALUA_PG_RUN_STPG 0x20
> +#define ALUA_PG_RUNNING 0x40
>
> -struct alua_dh_data {
> +static uint optimize_stpg;
> +module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
> +MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than
> sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
> +
> +static LIST_HEAD(port_group_list);
> +static DEFINE_SPINLOCK(port_group_lock);
> +static struct workqueue_struct *kaluad_wq;
> +static struct workqueue_struct *kaluad_sync_wq;
> +
> +struct alua_port_group {
> + struct kref kref;
> + struct rcu_head rcu;
> + struct list_head node;
> + unsigned char device_id_str[256];
> + int device_id_len;
> int group_id;
> - int rel_port;
> int tpgs;
> int state;
> int pref;
> unsigned flags; /* used for optimizing STPG */
> - unsigned char inq[ALUA_INQUIRY_SIZE];
> - unsigned char *buff;
> - int bufflen;
> unsigned char transition_tmo;
> - unsigned char sense[SCSI_SENSE_BUFFERSIZE];
> - int senselen;
> + unsigned long expiry;
> + unsigned long interval;
> + struct delayed_work rtpg_work;
> + spinlock_t lock;
> + struct list_head rtpg_list;
> + struct scsi_device *rtpg_sdev;
> +};
> +
> +struct alua_dh_data {
> + struct alua_port_group *pg;
> + int group_id;
> + spinlock_t pg_lock;
> struct scsi_device *sdev;
> + int init_error;
> + struct mutex init_mutex;
> +};
> +
> +struct alua_queue_data {
> + struct list_head entry;
> activate_complete callback_fn;
> void *callback_data;
> };
> @@ -82,231 +117,162 @@ struct alua_dh_data {
> #define ALUA_POLICY_SWITCH_CURRENT 0
> #define ALUA_POLICY_SWITCH_ALL 1
>
> -static char print_alua_state(int);
> -static int alua_check_sense(struct scsi_device *, struct scsi_sense_hdr *);
> -
> -static int realloc_buffer(struct alua_dh_data *h, unsigned len)
> -{
> - if (h->buff && h->buff != h->inq)
> - kfree(h->buff);
> -
> - h->buff = kmalloc(len, GFP_NOIO);
> - if (!h->buff) {
> - h->buff = h->inq;
> - h->bufflen = ALUA_INQUIRY_SIZE;
> - return 1;
> - }
> - h->bufflen = len;
> - return 0;
> -}
> +static void alua_rtpg_work(struct work_struct *work);
> +static void alua_rtpg_queue(struct alua_port_group *pg,
> + struct scsi_device *sdev,
> + struct alua_queue_data *qdata, bool force);
> +static void alua_check(struct scsi_device *sdev, bool force);
>
> -static struct request *get_alua_req(struct scsi_device *sdev,
> - void *buffer, unsigned buflen, int rw)
> +static void release_port_group(struct kref *kref)
> {
> - struct request *rq;
> - struct request_queue *q = sdev->request_queue;
> -
> - rq = blk_get_request(q, rw, GFP_NOIO);
> -
> - if (IS_ERR(rq)) {
> - sdev_printk(KERN_INFO, sdev,
> - "%s: blk_get_request failed\n", __func__);
> - return NULL;
> - }
> - blk_rq_set_block_pc(rq);
> -
> - if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
> - blk_put_request(rq);
> - sdev_printk(KERN_INFO, sdev,
> - "%s: blk_rq_map_kern failed\n", __func__);
> - return NULL;
> - }
> -
> - rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
> - REQ_FAILFAST_DRIVER;
> - rq->retries = ALUA_FAILOVER_RETRIES;
> - rq->timeout = ALUA_FAILOVER_TIMEOUT * HZ;
> -
> - return rq;
> + struct alua_port_group *pg;
> +
> + pg = container_of(kref, struct alua_port_group, kref);
> + if (pg->rtpg_sdev)
> + flush_delayed_work(&pg->rtpg_work);
> + spin_lock(&port_group_lock);
> + list_del(&pg->node);
> + spin_unlock(&port_group_lock);
> + kfree_rcu(pg, rcu);
> }
>
> /*
> - * submit_vpd_inquiry - Issue an INQUIRY VPD page 0x83 command
> + * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
> * @sdev: sdev the command should be sent to
> */
> -static int submit_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
> +static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
> + int bufflen, struct scsi_sense_hdr *sshdr, int flags)
> {
> - struct request *rq;
> - int err = SCSI_DH_RES_TEMP_UNAVAIL;
> -
> - rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
> - if (!rq)
> - goto done;
> + u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)];
> + int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
> + REQ_FAILFAST_DRIVER;
>
> /* Prepare the command. */
> - rq->cmd[0] = INQUIRY;
> - rq->cmd[1] = 1;
> - rq->cmd[2] = 0x83;
> - rq->cmd[4] = h->bufflen;
> - rq->cmd_len = COMMAND_SIZE(INQUIRY);
> -
> - rq->sense = h->sense;
> - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
> - rq->sense_len = h->senselen = 0;
> -
> - err = blk_execute_rq(rq->q, NULL, rq, 1);
> - if (err == -EIO) {
> - sdev_printk(KERN_INFO, sdev,
> - "%s: evpd inquiry failed with %x\n",
> - ALUA_DH_NAME, rq->errors);
> - h->senselen = rq->sense_len;
> - err = SCSI_DH_IO;
> - }
> - blk_put_request(rq);
> -done:
> - return err;
> + memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN));
> + cdb[0] = MAINTENANCE_IN;
> + if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
> + cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
> + else
> + cdb[1] = MI_REPORT_TARGET_PGS;
> + put_unaligned_be32(bufflen, &cdb[6]);
> +
> + return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE,
> + buff, bufflen, sshdr,
> + ALUA_FAILOVER_TIMEOUT * HZ,
> + ALUA_FAILOVER_RETRIES, NULL, req_flags);
> }
>
> /*
> - * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
> - * @sdev: sdev the command should be sent to
> + * submit_stpg - Issue a SET TARGET PORT GROUP command
> + *
> + * Currently we're only setting the current target port group state
> + * to 'active/optimized' and let the array firmware figure out
> + * the states of the remaining groups.
> */
> -static unsigned submit_rtpg(struct scsi_device *sdev, struct alua_dh_data *h,
> - bool rtpg_ext_hdr_req)
> +static int submit_stpg(struct scsi_device *sdev, int group_id,
> + struct scsi_sense_hdr *sshdr)
> {
> - struct request *rq;
> - int err = SCSI_DH_RES_TEMP_UNAVAIL;
> + u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)];
> + unsigned char stpg_data[8];
> + int stpg_len = 8;
> + int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
> + REQ_FAILFAST_DRIVER;
>
> - rq = get_alua_req(sdev, h->buff, h->bufflen, READ);
> - if (!rq)
> - goto done;
> + /* Prepare the data buffer */
> + memset(stpg_data, 0, stpg_len);
> + stpg_data[4] = TPGS_STATE_OPTIMIZED & 0x0f;
> + put_unaligned_be16(group_id, &stpg_data[6]);
>
> /* Prepare the command. */
> - rq->cmd[0] = MAINTENANCE_IN;
> - if (rtpg_ext_hdr_req)
> - rq->cmd[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
> - else
> - rq->cmd[1] = MI_REPORT_TARGET_PGS;
> - rq->cmd[6] = (h->bufflen >> 24) & 0xff;
> - rq->cmd[7] = (h->bufflen >> 16) & 0xff;
> - rq->cmd[8] = (h->bufflen >> 8) & 0xff;
> - rq->cmd[9] = h->bufflen & 0xff;
> - rq->cmd_len = COMMAND_SIZE(MAINTENANCE_IN);
> -
> - rq->sense = h->sense;
> - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
> - rq->sense_len = h->senselen = 0;
> -
> - err = blk_execute_rq(rq->q, NULL, rq, 1);
> - if (err == -EIO) {
> - sdev_printk(KERN_INFO, sdev,
> - "%s: rtpg failed with %x\n",
> - ALUA_DH_NAME, rq->errors);
> - h->senselen = rq->sense_len;
> - err = SCSI_DH_IO;
> - }
> - blk_put_request(rq);
> -done:
> - return err;
> + memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT));
> + cdb[0] = MAINTENANCE_OUT;
> + cdb[1] = MO_SET_TARGET_PGS;
> + put_unaligned_be32(stpg_len, &cdb[6]);
> +
> + return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
> + stpg_data, stpg_len,
> + sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
> + ALUA_FAILOVER_RETRIES, NULL, req_flags);
> }
>
> -/*
> - * alua_stpg - Evaluate SET TARGET GROUP STATES
> - * @sdev: the device to be evaluated
> - * @state: the new target group state
> - *
> - * Send a SET TARGET GROUP STATES command to the device.
> - * We only have to test here if we should resubmit the command;
> - * any other error is assumed as a failure.
> - */
> -static void stpg_endio(struct request *req, int error)
> +struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
> + int group_id)
> {
> - struct alua_dh_data *h = req->end_io_data;
> - struct scsi_sense_hdr sense_hdr;
> - unsigned err = SCSI_DH_OK;
> + struct alua_port_group *pg;
> +
> + if (!id_str || !id_size || !strlen(id_str))
> + return NULL;
>
> - if (host_byte(req->errors) != DID_OK ||
> - msg_byte(req->errors) != COMMAND_COMPLETE) {
> - err = SCSI_DH_IO;
> - goto done;
> + list_for_each_entry(pg, &port_group_list, node) {
> + if (pg->group_id != group_id)
> + continue;
> + if (!pg->device_id_len || pg->device_id_len != id_size)
> + continue;
> + if (strncmp(pg->device_id_str, id_str, id_size))
> + continue;
> + if (!kref_get_unless_zero(&pg->kref))
> + continue;
> + return pg;
> }
>
> - if (req->sense_len > 0) {
> - err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
> - &sense_hdr);
> - if (!err) {
> - err = SCSI_DH_IO;
> - goto done;
> - }
> - err = alua_check_sense(h->sdev, &sense_hdr);
> - if (err == ADD_TO_MLQUEUE) {
> - err = SCSI_DH_RETRY;
> - goto done;
> - }
> - sdev_printk(KERN_INFO, h->sdev,
> - "%s: stpg sense code: %02x/%02x/%02x\n",
> - ALUA_DH_NAME, sense_hdr.sense_key,
> - sense_hdr.asc, sense_hdr.ascq);
> - err = SCSI_DH_IO;
> - } else if (error)
> - err = SCSI_DH_IO;
> -
> - if (err == SCSI_DH_OK) {
> - h->state = TPGS_STATE_OPTIMIZED;
> - sdev_printk(KERN_INFO, h->sdev,
> - "%s: port group %02x switched to state %c\n",
> - ALUA_DH_NAME, h->group_id,
> - print_alua_state(h->state));
> - }
> -done:
> - req->end_io_data = NULL;
> - __blk_put_request(req->q, req);
> - if (h->callback_fn) {
> - h->callback_fn(h->callback_data, err);
> - h->callback_fn = h->callback_data = NULL;
> - }
> - return;
> + return NULL;
> }
>
> /*
> - * submit_stpg - Issue a SET TARGET GROUP STATES command
> + * alua_alloc_pg - Allocate a new port_group structure
> + * @sdev: scsi device
> + * @h: alua device_handler data
> + * @group_id: port group id
> *
> - * Currently we're only setting the current target port group state
> - * to 'active/optimized' and let the array firmware figure out
> - * the states of the remaining groups.
> + * Allocate a new port_group structure for a given
> + * device.
> */
> -static unsigned submit_stpg(struct alua_dh_data *h)
> +struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
> + int group_id, int tpgs)
> {
> - struct request *rq;
> - int stpg_len = 8;
> - struct scsi_device *sdev = h->sdev;
> + struct alua_port_group *pg, *tmp_pg;
>
> - /* Prepare the data buffer */
> - memset(h->buff, 0, stpg_len);
> - h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f;
> - h->buff[6] = (h->group_id >> 8) & 0xff;
> - h->buff[7] = h->group_id & 0xff;
> + pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
> + if (!pg)
> + return ERR_PTR(-ENOMEM);
>
> - rq = get_alua_req(sdev, h->buff, stpg_len, WRITE);
> - if (!rq)
> - return SCSI_DH_RES_TEMP_UNAVAIL;
> + pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
> + sizeof(pg->device_id_str));
> + if (pg->device_id_len <= 0) {
> + /*
> + * TPGS supported but no device identification found.
> + * Generate private device identification.
> + */
> + sdev_printk(KERN_INFO, sdev,
> + "%s: No device descriptors found\n",
> + ALUA_DH_NAME);
> + pg->device_id_str[0] = '\0';
> + pg->device_id_len = 0;
> + }
> + pg->group_id = group_id;
> + pg->tpgs = tpgs;
> + pg->state = TPGS_STATE_OPTIMIZED;
> + if (optimize_stpg)
> + pg->flags |= ALUA_OPTIMIZE_STPG;
> + kref_init(&pg->kref);
> + INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
> + INIT_LIST_HEAD(&pg->rtpg_list);
> + INIT_LIST_HEAD(&pg->node);
> + spin_lock_init(&pg->lock);
> +
> + spin_lock(&port_group_lock);
> + tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
> + group_id);
> + if (tmp_pg) {
> + spin_unlock(&port_group_lock);
> + kfree(pg);
> + return tmp_pg;
> + }
>
> - /* Prepare the command. */
> - rq->cmd[0] = MAINTENANCE_OUT;
> - rq->cmd[1] = MO_SET_TARGET_PGS;
> - rq->cmd[6] = (stpg_len >> 24) & 0xff;
> - rq->cmd[7] = (stpg_len >> 16) & 0xff;
> - rq->cmd[8] = (stpg_len >> 8) & 0xff;
> - rq->cmd[9] = stpg_len & 0xff;
> - rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT);
> -
> - rq->sense = h->sense;
> - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
> - rq->sense_len = h->senselen = 0;
> - rq->end_io_data = h;
> -
> - blk_execute_rq_nowait(rq->q, NULL, rq, 1, stpg_endio);
> - return SCSI_DH_OK;
> + list_add(&pg->node, &port_group_list);
> + spin_unlock(&port_group_lock);
> +
> + return pg;
> }
>
> /*
> @@ -316,12 +282,23 @@ static unsigned submit_stpg(struct alua_dh_data *h)
> * Examine the TPGS setting of the sdev to find out if ALUA
> * is supported.
> */
> -static int alua_check_tpgs(struct scsi_device *sdev, struct alua_dh_data *h)
> +static int alua_check_tpgs(struct scsi_device *sdev)
> {
> - int err = SCSI_DH_OK;
> + int tpgs = TPGS_MODE_NONE;
> +
> + /*
> + * ALUA support for non-disk devices is fraught with
> + * difficulties, so disable it for now.
> + */
> + if (sdev->type != TYPE_DISK) {
> + sdev_printk(KERN_INFO, sdev,
> + "%s: disable for non-disk devices\n",
> + ALUA_DH_NAME);
> + return tpgs;
> + }
>
> - h->tpgs = scsi_device_tpgs(sdev);
> - switch (h->tpgs) {
> + tpgs = scsi_device_tpgs(sdev);
> + switch (tpgs) {
> case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
> sdev_printk(KERN_INFO, sdev,
> "%s: supports implicit and explicit TPGS\n",
> @@ -335,71 +312,36 @@ static int alua_check_tpgs(struct scsi_device *sdev,
> struct alua_dh_data *h)
> sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
> ALUA_DH_NAME);
> break;
> - default:
> - h->tpgs = TPGS_MODE_NONE;
> + case TPGS_MODE_NONE:
> sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
> ALUA_DH_NAME);
> - err = SCSI_DH_DEV_UNSUPP;
> + break;
> + default:
> + sdev_printk(KERN_INFO, sdev,
> + "%s: unsupported TPGS setting %d\n",
> + ALUA_DH_NAME, tpgs);
> + tpgs = TPGS_MODE_NONE;
> break;
> }
>
> - return err;
> + return tpgs;
> }
>
> /*
> - * alua_vpd_inquiry - Evaluate INQUIRY vpd page 0x83
> + * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
> * @sdev: device to be checked
> *
> * Extract the relative target port and the target port group
> * descriptor from the list of identificators.
> */
> -static int alua_vpd_inquiry(struct scsi_device *sdev, struct alua_dh_data *h)
> +static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
> + int tpgs)
> {
> - int len;
> - unsigned err;
> - unsigned char *d;
> -
> - retry:
> - err = submit_vpd_inquiry(sdev, h);
> + int rel_port = -1, group_id;
> + struct alua_port_group *pg, *old_pg = NULL;
>
> - if (err != SCSI_DH_OK)
> - return err;
> -
> - /* Check if vpd page exceeds initial buffer */
> - len = (h->buff[2] << 8) + h->buff[3] + 4;
> - if (len > h->bufflen) {
> - /* Resubmit with the correct length */
> - if (realloc_buffer(h, len)) {
> - sdev_printk(KERN_WARNING, sdev,
> - "%s: kmalloc buffer failed\n",
> - ALUA_DH_NAME);
> - /* Temporary failure, bypass */
> - return SCSI_DH_DEV_TEMP_BUSY;
> - }
> - goto retry;
> - }
> -
> - /*
> - * Now look for the correct descriptor.
> - */
> - d = h->buff + 4;
> - while (d < h->buff + len) {
> - switch (d[1] & 0xf) {
> - case 0x4:
> - /* Relative target port */
> - h->rel_port = (d[6] << 8) + d[7];
> - break;
> - case 0x5:
> - /* Target port group */
> - h->group_id = (d[6] << 8) + d[7];
> - break;
> - default:
> - break;
> - }
> - d += d[3] + 4;
> - }
> -
> - if (h->group_id == -1) {
> + group_id = scsi_vpd_tpg_id(sdev, &rel_port);
> + if (group_id < 0) {
> /*
> * Internal error; TPGS supported but required
> * VPD identification descriptors not present.
> @@ -408,16 +350,41 @@ static int alua_vpd_inquiry(struct scsi_device *sdev,
> struct alua_dh_data *h)
> sdev_printk(KERN_INFO, sdev,
> "%s: No target port descriptors found\n",
> ALUA_DH_NAME);
> - h->state = TPGS_STATE_OPTIMIZED;
> - h->tpgs = TPGS_MODE_NONE;
> - err = SCSI_DH_DEV_UNSUPP;
> - } else {
> + return SCSI_DH_DEV_UNSUPP;
> + }
> +
> + pg = alua_alloc_pg(sdev, group_id, tpgs);
> + if (IS_ERR(pg)) {
> + if (PTR_ERR(pg) == -ENOMEM)
> + return SCSI_DH_NOMEM;
> + return SCSI_DH_DEV_UNSUPP;
> + }
> + if (pg->device_id_len)
> sdev_printk(KERN_INFO, sdev,
> - "%s: port group %02x rel port %02x\n",
> - ALUA_DH_NAME, h->group_id, h->rel_port);
> + "%s: device %s port group %x rel port %x\n",
> + ALUA_DH_NAME, pg->device_id_str,
> + group_id, rel_port);
> + else
> + sdev_printk(KERN_INFO, sdev,
> + "%s: port group %x rel port %x\n",
> + ALUA_DH_NAME, group_id, rel_port);
> +
> + /* Check for existing port group references */
> + spin_lock(&h->pg_lock);
> + old_pg = h->pg;
> + if (old_pg != pg) {
> + /* port group has changed. Update to new port group */
> + rcu_assign_pointer(h->pg, pg);
> }
> + if (sdev->synchronous_alua)
> + pg->flags |= ALUA_SYNC_STPG;
> + alua_rtpg_queue(h->pg, sdev, NULL, true);
> + spin_unlock(&h->pg_lock);
>
> - return err;
> + if (old_pg)
> + kref_put(&old_pg->kref, release_port_group);
> +
> + return SCSI_DH_OK;
> }
>
> static char print_alua_state(int state)
> @@ -447,40 +414,24 @@ static int alua_check_sense(struct scsi_device *sdev,
> {
> switch (sense_hdr->sense_key) {
> case NOT_READY:
> - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a)
> + if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
> /*
> * LUN Not Accessible - ALUA state transition
> */
> - return ADD_TO_MLQUEUE;
> - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0b)
> - /*
> - * LUN Not Accessible -- Target port in standby state
> - */
> - return SUCCESS;
> - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0c)
> - /*
> - * LUN Not Accessible -- Target port in unavailable state
> - */
> - return SUCCESS;
> - if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x12)
> - /*
> - * LUN Not Ready -- Offline
> - */
> - return SUCCESS;
> - if (sdev->allow_restart &&
> - sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x02)
> - /*
> - * if the device is not started, we need to wake
> - * the error handler to start the motor
> - */
> - return FAILED;
> + alua_check(sdev, false);
> + return NEEDS_RETRY;
> + }
> break;
> case UNIT_ATTENTION:
> - if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
> + if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
> /*
> - * Power On, Reset, or Bus Device Reset, just retry.
> + * Power On, Reset, or Bus Device Reset.
> + * Might have obscured a state transition,
> + * so schedule a recheck.
> */
> + alua_check(sdev, true);
> return ADD_TO_MLQUEUE;
> + }
> if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
> /*
> * Device internal reset
> @@ -491,16 +442,20 @@ static int alua_check_sense(struct scsi_device *sdev,
> * Mode Parameters Changed
> */
> return ADD_TO_MLQUEUE;
> - if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06)
> + if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
> /*
> * ALUA state changed
> */
> + alua_check(sdev, true);
> return ADD_TO_MLQUEUE;
> - if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07)
> + }
> + if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
> /*
> * Implicit ALUA state transition failed
> */
> + alua_check(sdev, true);
> return ADD_TO_MLQUEUE;
> + }
> if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
> /*
> * Inquiry data has changed
> @@ -520,38 +475,74 @@ static int alua_check_sense(struct scsi_device *sdev,
> }
>
> /*
> + * alua_tur - Send a TEST UNIT READY
> + * @sdev: device to which the TEST UNIT READY command should be send
> + *
> + * Send a TEST UNIT READY to @sdev to figure out the device state
> + * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
> + * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
> + */
> +static int alua_tur(struct scsi_device *sdev)
> +{
> + struct scsi_sense_hdr sense_hdr;
> + int retval;
> +
> + retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
> + ALUA_FAILOVER_RETRIES, &sense_hdr);
> + if (sense_hdr.sense_key == NOT_READY &&
> + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
> + return SCSI_DH_RETRY;
> + else if (retval)
> + return SCSI_DH_IO;
> + else
> + return SCSI_DH_OK;
> +}
> +
> +/*
> * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
> * @sdev: the device to be evaluated.
> - * @wait_for_transition: if nonzero, wait ALUA_FAILOVER_TIMEOUT seconds for
> device to exit transitioning state
> *
> * Evaluate the Target Port Group State.
> * Returns SCSI_DH_DEV_OFFLINED if the path is
> * found to be unusable.
> */
> -static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, int
> wait_for_transition)
> +static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
> {
> struct scsi_sense_hdr sense_hdr;
> - int len, k, off, valid_states = 0;
> - unsigned char *ucp;
> - unsigned err;
> - bool rtpg_ext_hdr_req = 1;
> - unsigned long expiry, interval = 0;
> + struct alua_port_group *tmp_pg;
> + int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE;
> + unsigned char *desc, *buff;
> + unsigned err, retval;
> unsigned int tpg_desc_tbl_off;
> unsigned char orig_transition_tmo;
> + unsigned long flags;
>
> - if (!h->transition_tmo)
> - expiry = round_jiffies_up(jiffies + ALUA_FAILOVER_TIMEOUT * HZ);
> - else
> - expiry = round_jiffies_up(jiffies + h->transition_tmo * HZ);
> + if (!pg->expiry) {
> + unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
>
> - retry:
> - err = submit_rtpg(sdev, h, rtpg_ext_hdr_req);
> + if (pg->transition_tmo)
> + transition_tmo = pg->transition_tmo * HZ;
> +
> + pg->expiry = round_jiffies_up(jiffies + transition_tmo);
> + }
> +
> + buff = kzalloc(bufflen, GFP_KERNEL);
> + if (!buff)
> + return SCSI_DH_DEV_TEMP_BUSY;
>
> - if (err == SCSI_DH_IO && h->senselen > 0) {
> - err = scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
> - &sense_hdr);
> - if (!err)
> + retry:
> + retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
> +
> + if (retval) {
> + if (!scsi_sense_valid(&sense_hdr)) {
> + sdev_printk(KERN_INFO, sdev,
> + "%s: rtpg failed, result %d\n",
> + ALUA_DH_NAME, retval);
> + kfree(buff);
> + if (driver_byte(retval) == DRIVER_ERROR)
> + return SCSI_DH_DEV_TEMP_BUSY;
> return SCSI_DH_IO;
> + }
>
> /*
> * submit_rtpg() has failed on existing arrays
> @@ -561,73 +552,101 @@ static int alua_rtpg(struct scsi_device *sdev, struct
> alua_dh_data *h, int wait_
> * The retry without rtpg_ext_hdr_req set
> * handles this.
> */
> - if (rtpg_ext_hdr_req == 1 &&
> + if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
> sense_hdr.sense_key == ILLEGAL_REQUEST &&
> sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
> - rtpg_ext_hdr_req = 0;
> + pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
> goto retry;
> }
> -
> - err = alua_check_sense(sdev, &sense_hdr);
> - if (err == ADD_TO_MLQUEUE && time_before(jiffies, expiry))
> - goto retry;
> - sdev_printk(KERN_INFO, sdev,
> - "%s: rtpg sense code %02x/%02x/%02x\n",
> - ALUA_DH_NAME, sense_hdr.sense_key,
> - sense_hdr.asc, sense_hdr.ascq);
> - err = SCSI_DH_IO;
> + /*
> + * Retry on ALUA state transition or if any
> + * UNIT ATTENTION occurred.
> + */
> + if (sense_hdr.sense_key == NOT_READY &&
> + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
> + err = SCSI_DH_RETRY;
> + else if (sense_hdr.sense_key == UNIT_ATTENTION)
> + err = SCSI_DH_RETRY;
> + if (err == SCSI_DH_RETRY &&
> + pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
> + sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
> + ALUA_DH_NAME);
> + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
> + return err;
> + }
> + sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
> + ALUA_DH_NAME);
> + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
> + kfree(buff);
> + pg->expiry = 0;
> + return SCSI_DH_IO;
> }
> - if (err != SCSI_DH_OK)
> - return err;
>
> - len = (h->buff[0] << 24) + (h->buff[1] << 16) +
> - (h->buff[2] << 8) + h->buff[3] + 4;
> + len = get_unaligned_be32(&buff[0]) + 4;
>
> - if (len > h->bufflen) {
> + if (len > bufflen) {
> /* Resubmit with the correct length */
> - if (realloc_buffer(h, len)) {
> + kfree(buff);
> + bufflen = len;
> + buff = kmalloc(bufflen, GFP_KERNEL);
> + if (!buff) {
> sdev_printk(KERN_WARNING, sdev,
> "%s: kmalloc buffer failed\n",__func__);
> /* Temporary failure, bypass */
> + pg->expiry = 0;
> return SCSI_DH_DEV_TEMP_BUSY;
> }
> goto retry;
> }
>
> - orig_transition_tmo = h->transition_tmo;
> - if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && h->buff[5] != 0)
> - h->transition_tmo = h->buff[5];
> + orig_transition_tmo = pg->transition_tmo;
> + if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
> + pg->transition_tmo = buff[5];
> else
> - h->transition_tmo = ALUA_FAILOVER_TIMEOUT;
> + pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
>
> - if (wait_for_transition && (orig_transition_tmo != h->transition_tmo)) {
> + if (orig_transition_tmo != pg->transition_tmo) {
> sdev_printk(KERN_INFO, sdev,
> "%s: transition timeout set to %d seconds\n",
> - ALUA_DH_NAME, h->transition_tmo);
> - expiry = jiffies + h->transition_tmo * HZ;
> + ALUA_DH_NAME, pg->transition_tmo);
> + pg->expiry = jiffies + pg->transition_tmo * HZ;
> }
>
> - if ((h->buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
> + if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
> tpg_desc_tbl_off = 8;
> else
> tpg_desc_tbl_off = 4;
>
> - for (k = tpg_desc_tbl_off, ucp = h->buff + tpg_desc_tbl_off;
> + for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
> k < len;
> - k += off, ucp += off) {
> -
> - if (h->group_id == (ucp[2] << 8) + ucp[3]) {
> - h->state = ucp[0] & 0x0f;
> - h->pref = ucp[0] >> 7;
> - valid_states = ucp[1];
> + k += off, desc += off) {
> + u16 group_id = get_unaligned_be16(&desc[2]);
> +
> + spin_lock_irqsave(&port_group_lock, flags);
> + tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
> + group_id);
> + spin_unlock_irqrestore(&port_group_lock, flags);
> + if (tmp_pg) {
> + if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
> + if ((tmp_pg == pg) ||
> + !(tmp_pg->flags & ALUA_PG_RUNNING)) {
> + tmp_pg->state = desc[0] & 0x0f;
> + tmp_pg->pref = desc[0] >> 7;
> + }
> + if (tmp_pg == pg)
> + valid_states = desc[1];
> + spin_unlock_irqrestore(&tmp_pg->lock, flags);
> + }
> + kref_put(&tmp_pg->kref, release_port_group);
> }
> - off = 8 + (ucp[7] * 4);
> + off = 8 + (desc[7] * 4);
> }
>
> + spin_lock_irqsave(&pg->lock, flags);
> sdev_printk(KERN_INFO, sdev,
> "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
> - ALUA_DH_NAME, h->group_id, print_alua_state(h->state),
> - h->pref ? "preferred" : "non-preferred",
> + ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
> + pg->pref ? "preferred" : "non-preferred",
> valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
> valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
> valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
> @@ -636,36 +655,224 @@ static int alua_rtpg(struct scsi_device *sdev, struct
> alua_dh_data *h, int wait_
> valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
> valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
>
> - switch (h->state) {
> + switch (pg->state) {
> case TPGS_STATE_TRANSITIONING:
> - if (wait_for_transition) {
> - if (time_before(jiffies, expiry)) {
> - /* State transition, retry */
> - interval += 2000;
> - msleep(interval);
> - goto retry;
> - }
> + if (time_before(jiffies, pg->expiry)) {
> + /* State transition, retry */
> + pg->interval = 2;
> err = SCSI_DH_RETRY;
> } else {
> - err = SCSI_DH_OK;
> + /* Transitioning time exceeded, set port to standby */
> + err = SCSI_DH_IO;
> + pg->state = TPGS_STATE_STANDBY;
> + pg->expiry = 0;
> }
> -
> - /* Transitioning time exceeded, set port to standby */
> - h->state = TPGS_STATE_STANDBY;
> break;
> case TPGS_STATE_OFFLINE:
> /* Path unusable */
> err = SCSI_DH_DEV_OFFLINED;
> + pg->expiry = 0;
> break;
> default:
> /* Useable path if active */
> err = SCSI_DH_OK;
> + pg->expiry = 0;
> break;
> }
> + spin_unlock_irqrestore(&pg->lock, flags);
> + kfree(buff);
> return err;
> }
>
> /*
> + * alua_stpg - Issue a SET TARGET PORT GROUP command
> + *
> + * Issue a SET TARGET PORT GROUP command and evaluate the
> + * response. Returns SCSI_DH_RETRY per default to trigger
> + * a re-evaluation of the target group state or SCSI_DH_OK
> + * if no further action needs to be taken.
> + */
> +static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
> +{
> + int retval;
> + struct scsi_sense_hdr sense_hdr;
> +
> + if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
> + /* Only implicit ALUA supported, retry */
> + return SCSI_DH_RETRY;
> + }
> + switch (pg->state) {
> + case TPGS_STATE_OPTIMIZED:
> + return SCSI_DH_OK;
> + case TPGS_STATE_NONOPTIMIZED:
> + if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
> + !pg->pref &&
> + (pg->tpgs & TPGS_MODE_IMPLICIT))
> + return SCSI_DH_OK;
> + break;
> + case TPGS_STATE_STANDBY:
> + case TPGS_STATE_UNAVAILABLE:
> + break;
> + case TPGS_STATE_OFFLINE:
> + return SCSI_DH_IO;
> + case TPGS_STATE_TRANSITIONING:
> + break;
> + default:
> + sdev_printk(KERN_INFO, sdev,
> + "%s: stpg failed, unhandled TPGS state %d",
> + ALUA_DH_NAME, pg->state);
> + return SCSI_DH_NOSYS;
> + }
> + retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
> +
> + if (retval) {
> + if (!scsi_sense_valid(&sense_hdr)) {
> + sdev_printk(KERN_INFO, sdev,
> + "%s: stpg failed, result %d",
> + ALUA_DH_NAME, retval);
> + if (driver_byte(retval) == DRIVER_ERROR)
> + return SCSI_DH_DEV_TEMP_BUSY;
> + } else {
> + sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
> + ALUA_DH_NAME);
> + scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
> + }
> + }
> + /* Retry RTPG */
> + return SCSI_DH_RETRY;
> +}
> +
> +static void alua_rtpg_work(struct work_struct *work)
> +{
> + struct alua_port_group *pg =
> + container_of(work, struct alua_port_group, rtpg_work.work);
> + struct scsi_device *sdev;
> + LIST_HEAD(qdata_list);
> + int err = SCSI_DH_OK;
> + struct alua_queue_data *qdata, *tmp;
> + unsigned long flags;
> + struct workqueue_struct *alua_wq = kaluad_wq;
> +
> + spin_lock_irqsave(&pg->lock, flags);
> + sdev = pg->rtpg_sdev;
> + if (!sdev) {
> + WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
> + WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
> + spin_unlock_irqrestore(&pg->lock, flags);
> + return;
> + }
> + if (pg->flags & ALUA_SYNC_STPG)
> + alua_wq = kaluad_sync_wq;
> + pg->flags |= ALUA_PG_RUNNING;
> + if (pg->flags & ALUA_PG_RUN_RTPG) {
> + int state = pg->state;
> +
> + pg->flags &= ~ALUA_PG_RUN_RTPG;
> + spin_unlock_irqrestore(&pg->lock, flags);
> + if (state == TPGS_STATE_TRANSITIONING) {
> + if (alua_tur(sdev) == SCSI_DH_RETRY) {
> + spin_lock_irqsave(&pg->lock, flags);
> + pg->flags &= ~ALUA_PG_RUNNING;
> + pg->flags |= ALUA_PG_RUN_RTPG;
> + spin_unlock_irqrestore(&pg->lock, flags);
> + queue_delayed_work(alua_wq, &pg->rtpg_work,
> + pg->interval * HZ);
> + return;
> + }
> + /* Send RTPG on failure or if TUR indicates SUCCESS */
> + }
> + err = alua_rtpg(sdev, pg);
> + spin_lock_irqsave(&pg->lock, flags);
> + if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
> + pg->flags &= ~ALUA_PG_RUNNING;
> + pg->flags |= ALUA_PG_RUN_RTPG;
> + spin_unlock_irqrestore(&pg->lock, flags);
> + queue_delayed_work(alua_wq, &pg->rtpg_work,
> + pg->interval * HZ);
> + return;
> + }
> + if (err != SCSI_DH_OK)
> + pg->flags &= ~ALUA_PG_RUN_STPG;
> + }
> + if (pg->flags & ALUA_PG_RUN_STPG) {
> + pg->flags &= ~ALUA_PG_RUN_STPG;
> + spin_unlock_irqrestore(&pg->lock, flags);
> + err = alua_stpg(sdev, pg);
> + spin_lock_irqsave(&pg->lock, flags);
> + if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
> + pg->flags |= ALUA_PG_RUN_RTPG;
> + pg->interval = 0;
> + pg->flags &= ~ALUA_PG_RUNNING;
> + spin_unlock_irqrestore(&pg->lock, flags);
> + queue_delayed_work(alua_wq, &pg->rtpg_work,
> + pg->interval * HZ);
> + return;
> + }
> + }
> +
> + list_splice_init(&pg->rtpg_list, &qdata_list);
> + pg->rtpg_sdev = NULL;
> + spin_unlock_irqrestore(&pg->lock, flags);
> +
> + list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
> + list_del(&qdata->entry);
> + if (qdata->callback_fn)
> + qdata->callback_fn(qdata->callback_data, err);
> + kfree(qdata);
> + }
> + spin_lock_irqsave(&pg->lock, flags);
> + pg->flags &= ~ALUA_PG_RUNNING;
> + spin_unlock_irqrestore(&pg->lock, flags);
> + scsi_device_put(sdev);
> + kref_put(&pg->kref, release_port_group);
> +}
> +
> +static void alua_rtpg_queue(struct alua_port_group *pg,
> + struct scsi_device *sdev,
> + struct alua_queue_data *qdata, bool force)
> +{
> + int start_queue = 0;
> + unsigned long flags;
> + struct workqueue_struct *alua_wq = kaluad_wq;
> +
> + if (!pg)
> + return;
> +
> + spin_lock_irqsave(&pg->lock, flags);
> + if (qdata) {
> + list_add_tail(&qdata->entry, &pg->rtpg_list);
> + pg->flags |= ALUA_PG_RUN_STPG;
> + force = true;
> + }
> + if (pg->rtpg_sdev == NULL) {
> + pg->interval = 0;
> + pg->flags |= ALUA_PG_RUN_RTPG;
> + kref_get(&pg->kref);
> + pg->rtpg_sdev = sdev;
> + scsi_device_get(sdev);
> + start_queue = 1;
> + } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
> + pg->flags |= ALUA_PG_RUN_RTPG;
> + /* Do not queue if the worker is already running */
> + if (!(pg->flags & ALUA_PG_RUNNING)) {
> + kref_get(&pg->kref);
> + start_queue = 1;
> + }
> + }
> +
> + if (pg->flags & ALUA_SYNC_STPG)
> + alua_wq = kaluad_sync_wq;
> + spin_unlock_irqrestore(&pg->lock, flags);
> +
> + if (start_queue &&
> + !queue_delayed_work(alua_wq, &pg->rtpg_work,
> + msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
> + scsi_device_put(sdev);
> + kref_put(&pg->kref, release_port_group);
> + }
> +}
> +
> +/*
> * alua_initialize - Initialize ALUA state
> * @sdev: the device to be initialized
> *
> @@ -674,21 +881,14 @@ static int alua_rtpg(struct scsi_device *sdev, struct
> alua_dh_data *h, int wait_
> */
> static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
> {
> - int err;
> -
> - err = alua_check_tpgs(sdev, h);
> - if (err != SCSI_DH_OK)
> - goto out;
> -
> - err = alua_vpd_inquiry(sdev, h);
> - if (err != SCSI_DH_OK)
> - goto out;
> -
> - err = alua_rtpg(sdev, h, 0);
> - if (err != SCSI_DH_OK)
> - goto out;
> -
> -out:
> + int err = SCSI_DH_DEV_UNSUPP, tpgs;
> +
> + mutex_lock(&h->init_mutex);
> + tpgs = alua_check_tpgs(sdev);
> + if (tpgs != TPGS_MODE_NONE)
> + err = alua_check_vpd(sdev, h, tpgs);
> + h->init_error = err;
> + mutex_unlock(&h->init_mutex);
> return err;
> }
> /*
> @@ -703,9 +903,11 @@ out:
> static int alua_set_params(struct scsi_device *sdev, const char *params)
> {
> struct alua_dh_data *h = sdev->handler_data;
> + struct alua_port_group __rcu *pg = NULL;
> unsigned int optimize = 0, argc;
> const char *p = params;
> int result = SCSI_DH_OK;
> + unsigned long flags;
>
> if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
> return -EINVAL;
> @@ -715,18 +917,23 @@ static int alua_set_params(struct scsi_device *sdev, const
> char *params)
> if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
> return -EINVAL;
>
> + rcu_read_lock();
> + pg = rcu_dereference(h->pg);
> + if (!pg) {
> + rcu_read_unlock();
> + return -ENXIO;
> + }
> + spin_lock_irqsave(&pg->lock, flags);
> if (optimize)
> - h->flags |= ALUA_OPTIMIZE_STPG;
> + pg->flags |= ALUA_OPTIMIZE_STPG;
> else
> - h->flags &= ~ALUA_OPTIMIZE_STPG;
> + pg->flags &= ~ALUA_OPTIMIZE_STPG;
> + spin_unlock_irqrestore(&pg->lock, flags);
> + rcu_read_unlock();
>
> return result;
> }
>
> -static uint optimize_stpg;
> -module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
> -MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than
> sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
> -
> /*
> * alua_activate - activate a path
> * @sdev: device on the path to be activated
> @@ -742,48 +949,33 @@ static int alua_activate(struct scsi_device *sdev,
> {
> struct alua_dh_data *h = sdev->handler_data;
> int err = SCSI_DH_OK;
> - int stpg = 0;
> + struct alua_queue_data *qdata;
> + struct alua_port_group __rcu *pg;
>
> - err = alua_rtpg(sdev, h, 1);
> - if (err != SCSI_DH_OK)
> + qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
> + if (!qdata) {
> + err = SCSI_DH_RES_TEMP_UNAVAIL;
> goto out;
> -
> - if (optimize_stpg)
> - h->flags |= ALUA_OPTIMIZE_STPG;
> -
> - if (h->tpgs & TPGS_MODE_EXPLICIT) {
> - switch (h->state) {
> - case TPGS_STATE_NONOPTIMIZED:
> - stpg = 1;
> - if ((h->flags & ALUA_OPTIMIZE_STPG) &&
> - (!h->pref) &&
> - (h->tpgs & TPGS_MODE_IMPLICIT))
> - stpg = 0;
> - break;
> - case TPGS_STATE_STANDBY:
> - case TPGS_STATE_UNAVAILABLE:
> - stpg = 1;
> - break;
> - case TPGS_STATE_OFFLINE:
> - err = SCSI_DH_IO;
> - break;
> - case TPGS_STATE_TRANSITIONING:
> - err = SCSI_DH_RETRY;
> - break;
> - default:
> - break;
> - }
> }
> -
> - if (stpg) {
> - h->callback_fn = fn;
> - h->callback_data = data;
> - err = submit_stpg(h);
> - if (err == SCSI_DH_OK)
> - return 0;
> - h->callback_fn = h->callback_data = NULL;
> + qdata->callback_fn = fn;
> + qdata->callback_data = data;
> +
> + mutex_lock(&h->init_mutex);
> + rcu_read_lock();
> + pg = rcu_dereference(h->pg);
> + if (!pg || !kref_get_unless_zero(&pg->kref)) {
> + rcu_read_unlock();
> + kfree(qdata);
> + err = h->init_error;
> + mutex_unlock(&h->init_mutex);
> + goto out;
> }
> + fn = NULL;
> + rcu_read_unlock();
> + mutex_unlock(&h->init_mutex);
>
> + alua_rtpg_queue(pg, sdev, qdata, true);
> + kref_put(&pg->kref, release_port_group);
> out:
> if (fn)
> fn(data, err);
> @@ -791,6 +983,29 @@ out:
> }
>
> /*
> + * alua_check - check path status
> + * @sdev: device on the path to be checked
> + *
> + * Check the device status
> + */
> +static void alua_check(struct scsi_device *sdev, bool force)
> +{
> + struct alua_dh_data *h = sdev->handler_data;
> + struct alua_port_group *pg;
> +
> + rcu_read_lock();
> + pg = rcu_dereference(h->pg);
> + if (!pg || !kref_get_unless_zero(&pg->kref)) {
> + rcu_read_unlock();
> + return;
> + }
> + rcu_read_unlock();
> +
> + alua_rtpg_queue(pg, sdev, NULL, force);
> + kref_put(&pg->kref, release_port_group);
> +}
> +
> +/*
> * alua_prep_fn - request callback
> *
> * Fail I/O to all paths not in state
> @@ -799,13 +1014,20 @@ out:
> static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
> {
> struct alua_dh_data *h = sdev->handler_data;
> + struct alua_port_group __rcu *pg;
> + int state = TPGS_STATE_OPTIMIZED;
> int ret = BLKPREP_OK;
>
> - if (h->state == TPGS_STATE_TRANSITIONING)
> + rcu_read_lock();
> + pg = rcu_dereference(h->pg);
> + if (pg)
> + state = pg->state;
> + rcu_read_unlock();
> + if (state == TPGS_STATE_TRANSITIONING)
> ret = BLKPREP_DEFER;
> - else if (h->state != TPGS_STATE_OPTIMIZED &&
> - h->state != TPGS_STATE_NONOPTIMIZED &&
> - h->state != TPGS_STATE_LBA_DEPENDENT) {
> + else if (state != TPGS_STATE_OPTIMIZED &&
> + state != TPGS_STATE_NONOPTIMIZED &&
> + state != TPGS_STATE_LBA_DEPENDENT) {
> ret = BLKPREP_KILL;
> req->cmd_flags |= REQ_QUIET;
> }
> @@ -820,20 +1042,20 @@ static int alua_prep_fn(struct scsi_device *sdev, struct
> request *req)
> static int alua_bus_attach(struct scsi_device *sdev)
> {
> struct alua_dh_data *h;
> - int err;
> + int err, ret = -EINVAL;
>
> h = kzalloc(sizeof(*h) , GFP_KERNEL);
> if (!h)
> return -ENOMEM;
> - h->tpgs = TPGS_MODE_UNINITIALIZED;
> - h->state = TPGS_STATE_OPTIMIZED;
> - h->group_id = -1;
> - h->rel_port = -1;
> - h->buff = h->inq;
> - h->bufflen = ALUA_INQUIRY_SIZE;
> + spin_lock_init(&h->pg_lock);
> + rcu_assign_pointer(h->pg, NULL);
> + h->init_error = SCSI_DH_OK;
> h->sdev = sdev;
>
> + mutex_init(&h->init_mutex);
> err = alua_initialize(sdev, h);
> + if (err == SCSI_DH_NOMEM)
> + ret = -ENOMEM;
> if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
> goto failed;
>
> @@ -841,7 +1063,7 @@ static int alua_bus_attach(struct scsi_device *sdev)
> return 0;
> failed:
> kfree(h);
> - return -EINVAL;
> + return ret;
> }
>
> /*
> @@ -851,9 +1073,16 @@ failed:
> static void alua_bus_detach(struct scsi_device *sdev)
> {
> struct alua_dh_data *h = sdev->handler_data;
> + struct alua_port_group *pg;
> +
> + spin_lock(&h->pg_lock);
> + pg = h->pg;
> + rcu_assign_pointer(h->pg, NULL);
> + h->sdev = NULL;
> + spin_unlock(&h->pg_lock);
> + if (pg)
> + kref_put(&pg->kref, release_port_group);
>
> - if (h->buff && h->inq != h->buff)
> - kfree(h->buff);
> sdev->handler_data = NULL;
> kfree(h);
> }
> @@ -873,16 +1102,31 @@ static int __init alua_init(void)
> {
> int r;
>
> + kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
> + if (!kaluad_wq) {
> + /* Temporary failure, bypass */
> + return SCSI_DH_DEV_TEMP_BUSY;
> + }
> + kaluad_sync_wq = create_workqueue("kaluad_sync");
> + if (!kaluad_sync_wq) {
> + destroy_workqueue(kaluad_wq);
> + return SCSI_DH_DEV_TEMP_BUSY;
> + }
> r = scsi_register_device_handler(&alua_dh);
> - if (r != 0)
> + if (r != 0) {
> printk(KERN_ERR "%s: Failed to register scsi device handler",
> ALUA_DH_NAME);
> + destroy_workqueue(kaluad_sync_wq);
> + destroy_workqueue(kaluad_wq);
> + }
> return r;
> }
>
> static void __exit alua_exit(void)
> {
> scsi_unregister_device_handler(&alua_dh);
> + destroy_workqueue(kaluad_sync_wq);
> + destroy_workqueue(kaluad_wq);
> }
>
> module_init(alua_init);
> diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
> index da2e068..0678535 100644
> --- a/drivers/scsi/scsi_devinfo.c
> +++ b/drivers/scsi/scsi_devinfo.c
> @@ -219,6 +219,8 @@ static struct {
> {"NAKAMICH", "MJ-5.16S", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
> {"NEC", "PD-1 ODX654P", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
> {"NEC", "iStorage", NULL, BLIST_REPORTLUN2},
> + {"NETAPP", "LUN C-Mode", NULL, BLIST_SYNC_ALUA},
> + {"NETAPP", "INF-01-00", NULL, BLIST_SYNC_ALUA},
> {"NRC", "MBR-7", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
> {"NRC", "MBR-7.4", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
> {"PIONEER", "CD-ROM DRM-600", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index dd8ad2a..fa6b2c4 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -23,6 +23,7 @@
> #include <linux/scatterlist.h>
> #include <linux/blk-mq.h>
> #include <linux/ratelimit.h>
> +#include <asm/unaligned.h>
>
> #include <scsi/scsi.h>
> #include <scsi/scsi_cmnd.h>
> @@ -3154,3 +3155,190 @@ void sdev_enable_disk_events(struct scsi_device *sdev)
> atomic_dec(&sdev->disk_events_disable_depth);
> }
> EXPORT_SYMBOL(sdev_enable_disk_events);
> +
> +/**
> + * scsi_vpd_lun_id - return a unique device identification
> + * @sdev: SCSI device
> + * @id: buffer for the identification
> + * @id_len: length of the buffer
> + *
> + * Copies a unique device identification into @id based
> + * on the information in the VPD page 0x83 of the device.
> + * The string will be formatted as a SCSI name string.
> + *
> + * Returns the length of the identification or error on failure.
> + * If the identifier is longer than the supplied buffer the actual
> + * identifier length is returned and the buffer is not zero-padded.
> + */
> +int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len)
> +{
> + u8 cur_id_type = 0xff;
> + u8 cur_id_size = 0;
> + unsigned char *d, *cur_id_str;
> + unsigned char __rcu *vpd_pg83;
> + int id_size = -EINVAL;
> +
> + rcu_read_lock();
> + vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
> + if (!vpd_pg83) {
> + rcu_read_unlock();
> + return -ENXIO;
> + }
> +
> + /*
> + * Look for the correct descriptor.
> + * Order of preference for lun descriptor:
> + * - SCSI name string
> + * - NAA IEEE Registered Extended
> + * - EUI-64 based 16-byte
> + * - EUI-64 based 12-byte
> + * - NAA IEEE Registered
> + * - NAA IEEE Extended
> + * as longer descriptors reduce the likelyhood
> + * of identification clashes.
> + */
> +
> + /* The id string must be at least 20 bytes + terminating NULL byte */
> + if (id_len < 21) {
> + rcu_read_unlock();
> + return -EINVAL;
> + }
> +
> + memset(id, 0, id_len);
> + d = vpd_pg83 + 4;
> + while (d < vpd_pg83 + sdev->vpd_pg83_len) {
> + /* Skip designators not referring to the LUN */
> + if ((d[1] & 0x30) != 0x00)
> + goto next_desig;
> +
> + switch (d[1] & 0xf) {
> + case 0x2:
> + /* EUI-64 */
> + if (cur_id_size > d[3])
> + break;
> + /* Prefer NAA IEEE Registered Extended */
> + if (cur_id_type == 0x3 &&
> + cur_id_size == d[3])
> + break;
> + cur_id_size = d[3];
> + cur_id_str = d + 4;
> + cur_id_type = d[1] & 0xf;
> + switch (cur_id_size) {
> + case 8:
> + id_size = snprintf(id, id_len,
> + "eui.%8phN",
> + cur_id_str);
> + break;
> + case 12:
> + id_size = snprintf(id, id_len,
> + "eui.%12phN",
> + cur_id_str);
> + break;
> + case 16:
> + id_size = snprintf(id, id_len,
> + "eui.%16phN",
> + cur_id_str);
> + break;
> + default:
> + cur_id_size = 0;
> + break;
> + }
> + break;
> + case 0x3:
> + /* NAA */
> + if (cur_id_size > d[3])
> + break;
> + cur_id_size = d[3];
> + cur_id_str = d + 4;
> + cur_id_type = d[1] & 0xf;
> + switch (cur_id_size) {
> + case 8:
> + id_size = snprintf(id, id_len,
> + "naa.%8phN",
> + cur_id_str);
> + break;
> + case 16:
> + id_size = snprintf(id, id_len,
> + "naa.%16phN",
> + cur_id_str);
> + break;
> + default:
> + cur_id_size = 0;
> + break;
> + }
> + break;
> + case 0x8:
> + /* SCSI name string */
> + if (cur_id_size + 4 > d[3])
> + break;
> + /* Prefer others for truncated descriptor */
> + if (cur_id_size && d[3] > id_len)
> + break;
> + cur_id_size = id_size = d[3];
> + cur_id_str = d + 4;
> + cur_id_type = d[1] & 0xf;
> + if (cur_id_size >= id_len)
> + cur_id_size = id_len - 1;
> + memcpy(id, cur_id_str, cur_id_size);
> + /* Decrease priority for truncated descriptor */
> + if (cur_id_size != id_size)
> + cur_id_size = 6;
> + break;
> + default:
> + break;
> + }
> +next_desig:
> + d += d[3] + 4;
> + }
> + rcu_read_unlock();
> +
> + return id_size;
> +}
> +EXPORT_SYMBOL(scsi_vpd_lun_id);
> +
> +/*
> + * scsi_vpd_tpg_id - return a target port group identifier
> + * @sdev: SCSI device
> + *
> + * Returns the Target Port Group identifier from the information
> + * froom VPD page 0x83 of the device.
> + *
> + * Returns the identifier or error on failure.
> + */
> +int scsi_vpd_tpg_id(struct scsi_device *sdev, int *rel_id)
> +{
> + unsigned char *d;
> + unsigned char __rcu *vpd_pg83;
> + int group_id = -EAGAIN, rel_port = -1;
> +
> + rcu_read_lock();
> + vpd_pg83 = rcu_dereference(sdev->vpd_pg83);
> + if (!vpd_pg83) {
> + rcu_read_unlock();
> + return -ENXIO;
> + }
> +
> + d = sdev->vpd_pg83 + 4;
> + while (d < sdev->vpd_pg83 + sdev->vpd_pg83_len) {
> + switch (d[1] & 0xf) {
> + case 0x4:
> + /* Relative target port */
> + rel_port = get_unaligned_be16(&d[6]);
> + break;
> + case 0x5:
> + /* Target port group */
> + group_id = get_unaligned_be16(&d[6]);
> + break;
> + default:
> + break;
> + }
> + d += d[3] + 4;
> + }
> + rcu_read_unlock();
> +
> + if (group_id >= 0 && rel_id && rel_port != -1)
> + *rel_id = rel_port;
> +
> + return group_id;
> +}
> +EXPORT_SYMBOL(scsi_vpd_tpg_id);
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index f0cfaac..d84b2c5 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -962,6 +962,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned
> char *inq_result,
> if (*bflags & BLIST_NO_DIF)
> sdev->no_dif = 1;
>
> + if (*bflags & BLIST_SYNC_ALUA)
> + sdev->synchronous_alua = 1;
> +
> sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT;
>
> if (*bflags & BLIST_TRY_VPD_PAGES)
> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> index 4f6ba34..c7c8876 100644
> --- a/include/scsi/scsi_device.h
> +++ b/include/scsi/scsi_device.h
> @@ -175,6 +175,7 @@ struct scsi_device {
> unsigned no_dif:1; /* T10 PI (DIF) should be disabled */
> unsigned broken_fua:1; /* Don't set FUA bit */
> unsigned lun_in_cdb:1; /* Store LUN bits in CDB[1] */
> + unsigned synchronous_alua:1; /* Synchronous ALUA commands */
>
> atomic_t disk_events_disable_depth; /* disable depth for disk events */
>
> @@ -415,6 +416,8 @@ static inline int scsi_execute_req(struct scsi_device *sdev,
> }
> extern void sdev_disable_disk_events(struct scsi_device *sdev);
> extern void sdev_enable_disk_events(struct scsi_device *sdev);
> +extern int scsi_vpd_lun_id(struct scsi_device *, char *, size_t);
> +extern int scsi_vpd_tpg_id(struct scsi_device *, int *);
>
> #ifdef CONFIG_PM
> extern int scsi_autopm_get_device(struct scsi_device *);
> diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h
> index 96e3f56..9f750cb 100644
> --- a/include/scsi/scsi_devinfo.h
> +++ b/include/scsi/scsi_devinfo.h
> @@ -37,5 +37,6 @@
> #define BLIST_TRY_VPD_PAGES 0x10000000 /* Attempt to read VPD pages */
> #define BLIST_NO_RSOC 0x20000000 /* don't try to issue RSOC */
> #define BLIST_MAX_1024 0x40000000 /* maximum 1024 sector cdb length */
> +#define BLIST_SYNC_ALUA 0x80000000 /* Synchronous ALUA commands */
>
> #endif
> diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
> index 85d7317..7e184c6 100644
> --- a/include/scsi/scsi_dh.h
> +++ b/include/scsi/scsi_dh.h
> @@ -52,6 +52,7 @@ enum {
> SCSI_DH_TIMED_OUT,
> SCSI_DH_RES_TEMP_UNAVAIL,
> SCSI_DH_DEV_OFFLINED,
> + SCSI_DH_NOMEM,
> SCSI_DH_NOSYS,
> SCSI_DH_DRIVER_MAX,
> };
>
> --
> canonical-kernel-team mailing list
> canonical-kernel-team at lists.canonical.com
> Modify settings or unsubscribe at:
> https://lists.canonical.com/mailman/listinfo/canonical-kernel-team
>
>
>
> --
> kernel-team mailing list
> kernel-team at lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
More information about the kernel-team
mailing list