[SRU][N:linux-azure-nvidia][PATCH 10/10] net: mana: Add handler for hardware servicing events
John Cabaj
john.cabaj at canonical.com
Thu Jun 26 21:23:02 UTC 2025
From: Haiyang Zhang <haiyangz at microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/2115453
To collaborate with hardware servicing events, upon receiving the special
EQE notification from the HW channel, remove the devices on this bus.
Then, after a waiting period based on the device specs, rescan the parent
bus to recover the devices.
Signed-off-by: Haiyang Zhang <haiyangz at microsoft.com>
Reviewed-by: Shradha Gupta <shradhagupta at linux.microsoft.com>
Reviewed-by: Simon Horman <horms at kernel.org>
Link: https://patch.msgid.link/1749834034-18498-1-git-send-email-haiyangz@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba at kernel.org>
(backported from commit 7768c5f417336fa58dbfef9bb7ecd7eeec6d8886 net-next)
[john-cabaj: upstream 7fc45cb: "net: mana: Allow variable size
indirection table" and 2fc8a34: "net: mana: Support holes in
device list reply msg" added changes to GDMA_DRV_CAP_FLAGS1,
which aren't required here]
Signed-off-by: John Cabaj <john.cabaj at canonical.com>
---
.../net/ethernet/microsoft/mana/gdma_main.c | 75 +++++++++++++++++++
include/net/mana/gdma.h | 10 ++-
2 files changed, 83 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 2902c04d119f..63b376e55a24 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -346,11 +346,59 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
}
EXPORT_SYMBOL_NS(mana_gd_ring_cq, NET_MANA);
+#define MANA_SERVICE_PERIOD 10
+
+struct mana_serv_work {
+ struct work_struct serv_work;
+ struct pci_dev *pdev;
+};
+
+static void mana_serv_func(struct work_struct *w)
+{
+ struct mana_serv_work *mns_wk;
+ struct pci_bus *bus, *parent;
+ struct pci_dev *pdev;
+
+ mns_wk = container_of(w, struct mana_serv_work, serv_work);
+ pdev = mns_wk->pdev;
+
+ pci_lock_rescan_remove();
+
+ if (!pdev)
+ goto out;
+
+ bus = pdev->bus;
+ if (!bus) {
+ dev_err(&pdev->dev, "MANA service: no bus\n");
+ goto out;
+ }
+
+ parent = bus->parent;
+ if (!parent) {
+ dev_err(&pdev->dev, "MANA service: no parent bus\n");
+ goto out;
+ }
+
+ pci_stop_and_remove_bus_device(bus->self);
+
+ msleep(MANA_SERVICE_PERIOD * 1000);
+
+ pci_rescan_bus(parent);
+
+out:
+ pci_unlock_rescan_remove();
+
+ pci_dev_put(pdev);
+ kfree(mns_wk);
+ module_put(THIS_MODULE);
+}
+
static void mana_gd_process_eqe(struct gdma_queue *eq)
{
u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
struct gdma_context *gc = eq->gdma_dev->gdma_context;
struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
+ struct mana_serv_work *mns_wk;
union gdma_eqe_info eqe_info;
enum gdma_eqe_type type;
struct gdma_event event;
@@ -395,6 +443,33 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
eq->eq.callback(eq->eq.context, eq, &event);
break;
+ case GDMA_EQE_HWC_FPGA_RECONFIG:
+ dev_info(gc->dev, "Recv MANA service type:%d\n", type);
+
+ if (gc->in_service) {
+ dev_info(gc->dev, "Already in service\n");
+ break;
+ }
+
+ if (!try_module_get(THIS_MODULE)) {
+ dev_info(gc->dev, "Module is unloading\n");
+ break;
+ }
+
+ mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC);
+ if (!mns_wk) {
+ module_put(THIS_MODULE);
+ break;
+ }
+
+ dev_info(gc->dev, "Start MANA service type:%d\n", type);
+ gc->in_service = true;
+ mns_wk->pdev = to_pci_dev(gc->dev);
+ pci_dev_get(mns_wk->pdev);
+ INIT_WORK(&mns_wk->serv_work, mana_serv_func);
+ schedule_work(&mns_wk->serv_work);
+ break;
+
default:
break;
}
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 0474bc538b5e..0a0a1a9ca880 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -58,7 +58,7 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
GDMA_EQE_HWC_INIT_DATA = 130,
GDMA_EQE_HWC_INIT_DONE = 131,
- GDMA_EQE_HWC_SOC_RECONFIG = 132,
+ GDMA_EQE_HWC_FPGA_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
GDMA_EQE_HWC_SOC_SERVICE = 134,
GDMA_EQE_RNIC_QP_FATAL = 176,
@@ -403,6 +403,8 @@ struct gdma_context {
u32 test_event_eq_id;
bool is_pf;
+ bool in_service;
+
phys_addr_t bar0_pa;
void __iomem *bar0_va;
void __iomem *shm_base;
@@ -576,10 +578,14 @@ enum {
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
#define GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB BIT(4)
+/* Driver can self reset on FPGA Reconfig EQE notification */
+#define GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE BIT(17)
+
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
- GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
+ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
+ GDMA_DRV_CAP_FLAG_1_HANDLE_RECONFIG_EQE)
#define GDMA_DRV_CAP_FLAGS2 0
--
2.43.0
More information about the kernel-team
mailing list