ACK: [SRU][F:linux-bluefield][PATCH v1 1/1] UBUNTU: SAUCE: ipmb_host.c: Fix slow transactions

Kleber Souza kleber.souza at canonical.com
Thu Apr 8 09:17:53 UTC 2021


On 02.04.21 23:40, Asmaa Mnebhi wrote:
> BugLink: https://bugs.launchpad.net/bugs/1922393
> 
> The previous ipmb_host patch broke customers' IPMB tests. They
> requested to make the ipmb_host response time as long as before.
> In the case where the I2C bus is made very busy, the ipmb_host driver
> just drops slow/delayed responses. This fix elongates the timeout of the response. So restore the previous stable ipmb_host patch.
> This patch also fixes a crash which occurs after powercycling certain BlueField-2 systems.
> The crash is due to the handshake which takes too long to wait for a response at boot time.
> 
> Signed-off-by: Asmaa Mnebhi <asmaa at nvidia.com>
> Reviewed-by: David Thompson <davthompson at nvidia.com>
> Signed-off-by: Asmaa Mnebhi <asmaa at nvidia.com>

Acked-by: Kleber Sacilotto de Souza <kleber.souza at canonical.com>

Thanks

> ---
>   drivers/char/ipmi/ipmb_host.c | 131 +++++++++++++++++++++++++++++++++++-------
>   1 file changed, 111 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/char/ipmi/ipmb_host.c b/drivers/char/ipmi/ipmb_host.c
> index a408c1a..5cc9d92 100644
> --- a/drivers/char/ipmi/ipmb_host.c
> +++ b/drivers/char/ipmi/ipmb_host.c
> @@ -3,7 +3,7 @@
>   /*
>    * Copyright 2020, NVIDIA Corporation. All rights reserved.
>    *
> - * This was inspired by Brendan Higgins' bt-i2c driver.
> + * This was inspired by Brendan Higgins' bt-i2c driver.
>    *
>    */
>   
> @@ -24,6 +24,8 @@
>   
>   #define	IPMB_TIMEOUT			(msecs_to_jiffies(20000))
>   
> +static bool handshake_rsp = false;
> +
>   /*
>    * The least we expect in an IPMB message is:
>    * netfn_rs_lun, checksum1, rq_sa, rq_seq_rq_lun,
> @@ -44,7 +46,7 @@
>   #define	IPMB_MAX_SMI_SIZE		125
>   #define	IPMB_SMI_MSG_HEADER_SIZE	2
>   
> -#define	IPMB_SEQ_MAX			1024
> +#define	IPMB_SEQ_MAX			64
>   
>   #define	MAX_BUF_SIZE			122
>   
> @@ -132,6 +134,8 @@ struct ipmb_master {
>   	struct list_head		rsp_queue;
>   	atomic_t			rsp_queue_len;
>   	wait_queue_head_t		wait_queue;
> +
> +	bool				slave_registered;
>   };
>   
>   /* +1 is for the checksum integrated in payload */
> @@ -206,25 +210,20 @@ static int ipmb_handle_response(struct ipmb_master *master)
>    * i2c_master_send
>    */
>   static int ipmb_send_request(struct ipmb_master *master,
> -				struct request *request)
> +				struct request *request, u8 i2c_msg_len)
>   {
>   	struct i2c_client *client = master->client;
>   	unsigned long timeout, read_time;
>   	u8 *buf = (u8 *) request;
>   	int ret;
> -	int msg_len;
>   	union i2c_smbus_data data;
>   
>   	/*
> -	 * subtract netfn_rs_lun payload since it is passed as arg
> +	 * skip netfn_rs_lun payload since it is passed as arg
>   	 * 5 to i2c_smbus_xfer.
>   	 */
> -	msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
> -	if (msg_len > I2C_SMBUS_BLOCK_MAX)
> -		msg_len = I2C_SMBUS_BLOCK_MAX;
> -
> -	data.block[0] = msg_len;
> -	memcpy(&data.block[1], buf + 1, msg_len);
> +	data.block[0] = i2c_msg_len;
> +	memcpy(&data.block[1], buf + 1, i2c_msg_len);
>   
>   	timeout = jiffies + msecs_to_jiffies(WRITE_TIMEOUT);
>   	do {
> @@ -387,11 +386,12 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
>   {
>   	struct ipmb_rsp_elem	*queue_elem;
>   	int			ret = 1;
> +	unsigned long		flags;
>   
> -	spin_lock_irq(&master->lock);
> +	spin_lock_irqsave(&master->lock, flags);
>   
>   	if (list_empty(&master->rsp_queue)) {
> -		spin_unlock_irq(&master->lock);
> +		spin_unlock_irqrestore(&master->lock, flags);
>   
>   		ret = wait_event_interruptible_timeout(master->wait_queue,
>   			!list_empty(&master->rsp_queue), IPMB_TIMEOUT);
> @@ -399,16 +399,17 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
>   		if (ret <= 0)
>   			return ret;
>   
> -		spin_lock_irq(&master->lock);
> +		spin_lock_irqsave(&master->lock, flags);
>   	}
>   
>   	queue_elem = list_first_entry(&master->rsp_queue,
>   			struct ipmb_rsp_elem, list);
> +
>   	memcpy(ipmb_rsp, &queue_elem->rsp, sizeof(struct response));
>   	list_del(&queue_elem->list);
>   	kfree(queue_elem);
>   	atomic_dec(&master->rsp_queue_len);
> -	spin_unlock_irq(&master->lock);
> +	spin_unlock_irqrestore(&master->lock, flags);
>   
>   	return ret;
>   }
> @@ -438,6 +439,8 @@ static void ipmb_send_workfn(struct work_struct *work)
>   	int				rsp_msg_len;
>   	u8 				*buf_rsp;
>   	u8 				verify_checksum;
> +	u8				seq;
> +	u8				i2c_msg_len;
>   
>   	u8 *buf = (u8 *) &ipmb_req_msg;
>   
> @@ -460,11 +463,13 @@ static void ipmb_send_workfn(struct work_struct *work)
>   		return;
>   	}
>   
> -	if (!ipmb_assign_seq(master, req_msg, &ipmb_req_msg.rq_seq_rq_lun)) {
> +	if (!ipmb_assign_seq(master, req_msg, &seq)) {
>   		ipmb_error_reply(master, req_msg, IPMI_NODE_BUSY_ERR);
>   		return;
>   	}
>   
> +	ipmb_req_msg.rq_seq_rq_lun = seq << 2;
> +
>   	msg_len = ipmi_smi_to_ipmb_len(smi_msg_size);
>   
>   	/* Responder  */
> @@ -481,7 +486,15 @@ static void ipmb_send_workfn(struct work_struct *work)
>   	ipmb_req_msg.payload[ipmb_payload_len((size_t)msg_len)] =
>   		ipmb_checksum(buf + 2, msg_len - 2, 0);
>   
> -	if (ipmb_send_request(master, &ipmb_req_msg) < 0) {
> +	/*
> +	 * subtract netfn_rs_lun payload since it is passed as arg
> +	 * 5 to i2c_smbus_xfer.
> +	 */
> +	i2c_msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
> +	if (i2c_msg_len > I2C_SMBUS_BLOCK_MAX)
> +		i2c_msg_len = I2C_SMBUS_BLOCK_MAX;
> +
> +	if (ipmb_send_request(master, &ipmb_req_msg, i2c_msg_len) < 0) {
>   		ipmb_free_seq(master, (GET_SEQ(ipmb_req_msg.rq_seq_rq_lun)));
>   		ipmb_error_reply(master, req_msg, IPMI_BUS_ERR);
>   		spin_lock_irqsave(&master->lock, flags);
> @@ -621,6 +634,11 @@ static int ipmb_slave_cb(struct i2c_client *client,
>   	struct ipmb_master *master = i2c_get_clientdata(client);
>   	u8 *buf;
>   
> +	if (!handshake_rsp) {
> +		handshake_rsp = true;
> +		return 0;
> +	}
> +
>   	spin_lock(&master->lock);
>   
>   	switch (event) {
> @@ -665,6 +683,56 @@ static unsigned short slave_add = 0x0;
>   module_param(slave_add, ushort, 0);
>   MODULE_PARM_DESC(slave_add, "The i2c slave address of the responding device");
>   
> +#define GET_DEVICE_ID_MSG_LEN 7
> +
> +static bool ipmb_detect(struct ipmb_master *master)
> +{
> +	struct ipmb_rsp_elem *q_elem, *tmp_q_elem;
> +	struct request request;
> +	u8 *buf = (u8 *) &request;
> +	struct device dev;
> +	int retry = 2000;
> +	u8 i2c_msg_len;
> +	int ret;
> +
> +	/* Subtract rs sa and netfn */
> +	i2c_msg_len = GET_DEVICE_ID_MSG_LEN - 2;
> +
> +	dev = master->client->dev;
> +
> +	request.netfn_rs_lun = IPMI_NETFN_APP_REQUEST << 2;
> +	request.checksum1 = ipmb_checksum1((u8)(master->rs_sa << 1),
> +						request.netfn_rs_lun);
> +	request.rq_sa = (u8)(master->client->addr << 1);
> +	request.rq_seq_rq_lun = 0;
> +	request.cmd = IPMI_GET_DEVICE_ID_CMD;
> +	request.payload[0] = ipmb_checksum(buf + 2, 3, 0);
> +
> +	ret = ipmb_send_request(master, &request, i2c_msg_len);
> +	if (ret < 0) {
> +		dev_err(&dev, "ERROR: ipmb_send_request failed during ipmb detection\n");
> +		return false;
> +	}
> +
> +	while(!handshake_rsp && (retry > 0)) {
> +		mdelay(10);
> +		retry--;
> +	}
> +
> +	if (!retry) {
> +		dev_err(&dev, "ERROR: Response timed out during ipmb detection\n");
> +		return false;
> +	}
> +
> +	list_for_each_entry_safe(q_elem, tmp_q_elem, &master->rsp_queue, list){
> +		list_del(&q_elem->list);
> +		kfree(q_elem);
> +		atomic_dec(&master->rsp_queue_len);
> +	}
> +
> +	return true;
> +}
> +
>   static int ipmb_probe(struct i2c_client *client,
>   			const struct i2c_device_id *id)
>   {
> @@ -702,12 +770,30 @@ static int ipmb_probe(struct i2c_client *client,
>   	if (ret)
>   		return ret;
>   
> +	master->slave_registered = true;
> +
> +	/*
> +	 * Send a simple message "get device ID" to detect whether the BMC is responsive or not.
> +	 * This is necessary before calling ipmi_register_smi which executes a handshake with the
> +	 * slave device and can hold the lock for a very long if the BMC is not up. This long wait
> +	 * at boot time causes the system to crash.
> +	 */
> +	if (!ipmb_detect(master)) {
> +		dev_err(&client->dev, "Unable to get response from slave device at this time\n");
> +		i2c_slave_unregister(client);
> +		master->slave_registered = false;
> +		return -ENXIO;
> +	}
> +
>   	ret = ipmi_register_smi(&ipmb_smi_handlers, master,
>   				&client->dev,
>   				(unsigned char)master->rs_sa);
>   
> -	if (ret)
> +	if (ret) {
> +		dev_err(&client->dev, "ipmi_register_smi failed with ret = %d\n", ret);
>   		i2c_slave_unregister(client);
> +		master->slave_registered = false;
> +	}
>   
>   	return ret;
>   }
> @@ -717,8 +803,13 @@ static int ipmb_remove(struct i2c_client *client)
>   	struct ipmb_master *master;
>   
>   	master = i2c_get_clientdata(client);
> -	ipmi_unregister_smi(master->intf);
> -	i2c_slave_unregister(client);
> +	if (!master)
> +		return 0;
> +
> +	if (master->slave_registered) {
> +		ipmi_unregister_smi(master->intf);
> +		i2c_slave_unregister(client);
> +	}
>   
>   	return 0;
>   }
> 




More information about the kernel-team mailing list