[SRU][F:linux-bluefield][PATCH v1 1/1] UBUNTU: SAUCE: ipmb_host.c: Fix slow transactions

Asmaa Mnebhi asmaa at nvidia.com
Fri Apr 2 21:40:50 UTC 2021


BugLink: https://bugs.launchpad.net/bugs/1922393

The previous ipmb_host patch broke customers' IPMB tests. They
requested to make the ipmb_host response time as long as before.
In the case where the I2C bus is made very busy, the ipmb_host driver
just drops slow/delayed responses. This fix elongates the timeout of the response. So restore the previous stable ipmb_host patch.
This patch also fixes a crash which occurs after powercycling certain BlueField-2 systems.
The crash is due to the handshake which takes too long to wait for a response at boot time.

Signed-off-by: Asmaa Mnebhi <asmaa at nvidia.com>
Reviewed-by: David Thompson <davthompson at nvidia.com>
Signed-off-by: Asmaa Mnebhi <asmaa at nvidia.com>
---
 drivers/char/ipmi/ipmb_host.c | 131 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 111 insertions(+), 20 deletions(-)

diff --git a/drivers/char/ipmi/ipmb_host.c b/drivers/char/ipmi/ipmb_host.c
index a408c1a..5cc9d92 100644
--- a/drivers/char/ipmi/ipmb_host.c
+++ b/drivers/char/ipmi/ipmb_host.c
@@ -3,7 +3,7 @@
 /*
  * Copyright 2020, NVIDIA Corporation. All rights reserved.
  *
- * This was inspired by Brendan Higgins' bt-i2c driver. 
+ * This was inspired by Brendan Higgins' bt-i2c driver.
  *
  */
 
@@ -24,6 +24,8 @@
 
 #define	IPMB_TIMEOUT			(msecs_to_jiffies(20000))
 
+static bool handshake_rsp = false;
+
 /*
  * The least we expect in an IPMB message is:
  * netfn_rs_lun, checksum1, rq_sa, rq_seq_rq_lun,
@@ -44,7 +46,7 @@
 #define	IPMB_MAX_SMI_SIZE		125
 #define	IPMB_SMI_MSG_HEADER_SIZE	2
 
-#define	IPMB_SEQ_MAX			1024
+#define	IPMB_SEQ_MAX			64
 
 #define	MAX_BUF_SIZE			122
 
@@ -132,6 +134,8 @@ struct ipmb_master {
 	struct list_head		rsp_queue;
 	atomic_t			rsp_queue_len;
 	wait_queue_head_t		wait_queue;
+
+	bool				slave_registered;
 };
 
 /* +1 is for the checksum integrated in payload */
@@ -206,25 +210,20 @@ static int ipmb_handle_response(struct ipmb_master *master)
  * i2c_master_send
  */
 static int ipmb_send_request(struct ipmb_master *master,
-				struct request *request)
+				struct request *request, u8 i2c_msg_len)
 {
 	struct i2c_client *client = master->client;
 	unsigned long timeout, read_time;
 	u8 *buf = (u8 *) request;
 	int ret;
-	int msg_len;
 	union i2c_smbus_data data;
 
 	/*
-	 * subtract netfn_rs_lun payload since it is passed as arg
+	 * skip netfn_rs_lun payload since it is passed as arg
 	 * 5 to i2c_smbus_xfer.
 	 */
-	msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
-	if (msg_len > I2C_SMBUS_BLOCK_MAX)
-		msg_len = I2C_SMBUS_BLOCK_MAX;
-
-	data.block[0] = msg_len;
-	memcpy(&data.block[1], buf + 1, msg_len);
+	data.block[0] = i2c_msg_len;
+	memcpy(&data.block[1], buf + 1, i2c_msg_len);
 
 	timeout = jiffies + msecs_to_jiffies(WRITE_TIMEOUT);
 	do {
@@ -387,11 +386,12 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
 {
 	struct ipmb_rsp_elem	*queue_elem;
 	int			ret = 1;
+	unsigned long		flags;
 
-	spin_lock_irq(&master->lock);
+	spin_lock_irqsave(&master->lock, flags);
 
 	if (list_empty(&master->rsp_queue)) {
-		spin_unlock_irq(&master->lock);
+		spin_unlock_irqrestore(&master->lock, flags);
 
 		ret = wait_event_interruptible_timeout(master->wait_queue,
 			!list_empty(&master->rsp_queue), IPMB_TIMEOUT);
@@ -399,16 +399,17 @@ static int ipmb_receive_rsp(struct ipmb_master *master,
 		if (ret <= 0)
 			return ret;
 
-		spin_lock_irq(&master->lock);
+		spin_lock_irqsave(&master->lock, flags);
 	}
 
 	queue_elem = list_first_entry(&master->rsp_queue,
 			struct ipmb_rsp_elem, list);
+
 	memcpy(ipmb_rsp, &queue_elem->rsp, sizeof(struct response));
 	list_del(&queue_elem->list);
 	kfree(queue_elem);
 	atomic_dec(&master->rsp_queue_len);
-	spin_unlock_irq(&master->lock);
+	spin_unlock_irqrestore(&master->lock, flags);
 
 	return ret;
 }
@@ -438,6 +439,8 @@ static void ipmb_send_workfn(struct work_struct *work)
 	int				rsp_msg_len;
 	u8 				*buf_rsp;
 	u8 				verify_checksum;
+	u8				seq;
+	u8				i2c_msg_len;
 
 	u8 *buf = (u8 *) &ipmb_req_msg;
 
@@ -460,11 +463,13 @@ static void ipmb_send_workfn(struct work_struct *work)
 		return;
 	}
 
-	if (!ipmb_assign_seq(master, req_msg, &ipmb_req_msg.rq_seq_rq_lun)) {
+	if (!ipmb_assign_seq(master, req_msg, &seq)) {
 		ipmb_error_reply(master, req_msg, IPMI_NODE_BUSY_ERR);
 		return;
 	}
 
+	ipmb_req_msg.rq_seq_rq_lun = seq << 2;
+
 	msg_len = ipmi_smi_to_ipmb_len(smi_msg_size);
 
 	/* Responder  */
@@ -481,7 +486,15 @@ static void ipmb_send_workfn(struct work_struct *work)
 	ipmb_req_msg.payload[ipmb_payload_len((size_t)msg_len)] =
 		ipmb_checksum(buf + 2, msg_len - 2, 0);
 
-	if (ipmb_send_request(master, &ipmb_req_msg) < 0) {
+	/*
+	 * subtract netfn_rs_lun payload since it is passed as arg
+	 * 5 to i2c_smbus_xfer.
+	 */
+	i2c_msg_len = ipmi_smi_to_ipmb_len(master->msg_to_send->data_size) - 1;
+	if (i2c_msg_len > I2C_SMBUS_BLOCK_MAX)
+		i2c_msg_len = I2C_SMBUS_BLOCK_MAX;
+
+	if (ipmb_send_request(master, &ipmb_req_msg, i2c_msg_len) < 0) {
 		ipmb_free_seq(master, (GET_SEQ(ipmb_req_msg.rq_seq_rq_lun)));
 		ipmb_error_reply(master, req_msg, IPMI_BUS_ERR);
 		spin_lock_irqsave(&master->lock, flags);
@@ -621,6 +634,11 @@ static int ipmb_slave_cb(struct i2c_client *client,
 	struct ipmb_master *master = i2c_get_clientdata(client);
 	u8 *buf;
 
+	if (!handshake_rsp) {
+		handshake_rsp = true;
+		return 0;
+	}
+
 	spin_lock(&master->lock);
 
 	switch (event) {
@@ -665,6 +683,56 @@ static unsigned short slave_add = 0x0;
 module_param(slave_add, ushort, 0);
 MODULE_PARM_DESC(slave_add, "The i2c slave address of the responding device");
 
+#define GET_DEVICE_ID_MSG_LEN 7
+
+static bool ipmb_detect(struct ipmb_master *master)
+{
+	struct ipmb_rsp_elem *q_elem, *tmp_q_elem;
+	struct request request;
+	u8 *buf = (u8 *) &request;
+	struct device dev;
+	int retry = 2000;
+	u8 i2c_msg_len;
+	int ret;
+
+	/* Subtract rs sa and netfn */
+	i2c_msg_len = GET_DEVICE_ID_MSG_LEN - 2;
+
+	dev = master->client->dev;
+
+	request.netfn_rs_lun = IPMI_NETFN_APP_REQUEST << 2;
+	request.checksum1 = ipmb_checksum1((u8)(master->rs_sa << 1),
+						request.netfn_rs_lun);
+	request.rq_sa = (u8)(master->client->addr << 1);
+	request.rq_seq_rq_lun = 0;
+	request.cmd = IPMI_GET_DEVICE_ID_CMD;
+	request.payload[0] = ipmb_checksum(buf + 2, 3, 0);
+
+	ret = ipmb_send_request(master, &request, i2c_msg_len);
+	if (ret < 0) {
+		dev_err(&dev, "ERROR: ipmb_send_request failed during ipmb detection\n");
+		return false;
+	}
+
+	while(!handshake_rsp && (retry > 0)) {
+		mdelay(10);
+		retry--;
+	}
+
+	if (!retry) {
+		dev_err(&dev, "ERROR: Response timed out during ipmb detection\n");
+		return false;
+	}
+
+	list_for_each_entry_safe(q_elem, tmp_q_elem, &master->rsp_queue, list){
+		list_del(&q_elem->list);
+		kfree(q_elem);
+		atomic_dec(&master->rsp_queue_len);
+	}
+
+	return true;
+}
+
 static int ipmb_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
@@ -702,12 +770,30 @@ static int ipmb_probe(struct i2c_client *client,
 	if (ret)
 		return ret;
 
+	master->slave_registered = true;
+
+	/*
+	 * Send a simple message "get device ID" to detect whether the BMC is responsive or not.
+	 * This is necessary before calling ipmi_register_smi which executes a handshake with the
+	 * slave device and can hold the lock for a very long if the BMC is not up. This long wait
+	 * at boot time causes the system to crash.
+	 */
+	if (!ipmb_detect(master)) {
+		dev_err(&client->dev, "Unable to get response from slave device at this time\n");
+		i2c_slave_unregister(client);
+		master->slave_registered = false;
+		return -ENXIO;
+	}
+
 	ret = ipmi_register_smi(&ipmb_smi_handlers, master,
 				&client->dev,
 				(unsigned char)master->rs_sa);
 
-	if (ret)
+	if (ret) {
+		dev_err(&client->dev, "ipmi_register_smi failed with ret = %d\n", ret);
 		i2c_slave_unregister(client);
+		master->slave_registered = false;
+	}
 
 	return ret;
 }
@@ -717,8 +803,13 @@ static int ipmb_remove(struct i2c_client *client)
 	struct ipmb_master *master;
 
 	master = i2c_get_clientdata(client);
-	ipmi_unregister_smi(master->intf);
-	i2c_slave_unregister(client);
+	if (!master)
+		return 0;
+
+	if (master->slave_registered) {
+		ipmi_unregister_smi(master->intf);
+		i2c_slave_unregister(client);
+	}
 
 	return 0;
 }
-- 
2.1.2




More information about the kernel-team mailing list