[PATCH 09/13] crypto: qat - limit heartbeat notifications

Thibault Ferrante thibault.ferrante at canonical.com
Thu Mar 7 22:05:47 UTC 2024


From: Furong Zhou <furong.zhou at intel.com>

BugLink: https://bugs.launchpad.net/bugs/2056354

When the driver detects an heartbeat failure, it starts the recovery
flow. Set a limit so that the number of events is limited in case the
heartbeat status is read too frequently.

Signed-off-by: Furong Zhou <furong.zhou at intel.com>
Reviewed-by: Ahsan Atta <ahsan.atta at intel.com>
Reviewed-by: Markas Rapoportas <markas.rapoportas at intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu at intel.com>
Signed-off-by: Mun Chun Yep <mun.chun.yep at intel.com>
Signed-off-by: Herbert Xu <herbert at gondor.apana.org.au>
(cherry picked from commit 750fa7c20e60926431ec50d63899771ffcd9fd5c linux-next)
Signed-off-by: Thibault Ferrante <thibault.ferrante at canonical.com>
---
 .../crypto/intel/qat/qat_common/adf_heartbeat.c | 17 ++++++++++++++---
 .../crypto/intel/qat/qat_common/adf_heartbeat.h |  3 +++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
index fe8428d4ff39..b19aa1ef8eee 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
@@ -205,6 +205,19 @@ static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
 	return ret;
 }
 
+static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev)
+{
+	u64 curr_time = adf_clock_get_current_time();
+	u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time;
+
+	if (time_since_reset < ADF_CFG_HB_RESET_MS)
+		return;
+
+	accel_dev->heartbeat->last_hb_reset_time = curr_time;
+	if (adf_notify_fatal_error(accel_dev))
+		dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n");
+}
+
 void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
 			  enum adf_device_heartbeat_status *hb_status)
 {
@@ -229,9 +242,7 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
 			"Heartbeat ERROR: QAT is not responding.\n");
 		*hb_status = HB_DEV_UNRESPONSIVE;
 		hb->hb_failed_counter++;
-		if (adf_notify_fatal_error(accel_dev))
-			dev_err(&GET_DEV(accel_dev),
-				"Failed to notify fatal error\n");
+		adf_heartbeat_reset(accel_dev);
 		return;
 	}
 
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
index 24c3f4f24c86..16fdfb48b196 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
@@ -13,6 +13,8 @@ struct dentry;
 #define ADF_CFG_HB_TIMER_DEFAULT_MS 500
 #define ADF_CFG_HB_COUNT_THRESHOLD 3
 
+#define ADF_CFG_HB_RESET_MS 5000
+
 enum adf_device_heartbeat_status {
 	HB_DEV_UNRESPONSIVE = 0,
 	HB_DEV_ALIVE,
@@ -30,6 +32,7 @@ struct adf_heartbeat {
 	unsigned int hb_failed_counter;
 	unsigned int hb_timer;
 	u64 last_hb_check_time;
+	u64 last_hb_reset_time;
 	bool ctrs_cnt_checked;
 	struct hb_dma_addr {
 		dma_addr_t phy_addr;
-- 
2.43.0




More information about the kernel-team mailing list