[SRU][o:azure][PATCH 1/2] net: mana: Implement get_ringparam/set_ringparam for mana
Vinicius Peixoto
vinicius.peixoto at canonical.com
Thu Nov 7 23:08:39 UTC 2024
From: Shradha Gupta <shradhagupta at linux.microsoft.com>
BugLink: https://bugs.launchpad.net/bugs/2086863
Currently the values of WQs for RX and TX queues for MANA devices
are hardcoded to default sizes.
Allow configuring these values for MANA devices as ringparam
configuration(get/set) through ethtool_ops.
Pre-allocate buffers at the beginning of this operation, to
prevent complete network loss in low-memory conditions.
Signed-off-by: Shradha Gupta <shradhagupta at linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz at microsoft.com>
Reviewed-by: Saurabh Sengar <ssengar at linux.microsoft.com>
Link: https://patch.msgid.link/1724688461-12203-1-git-send-email-shradhagupta@linux.microsoft.com
Signed-off-by: Jakub Kicinski <kuba at kernel.org>
(cherry-picked from 3410d0e14f9a6856386c6a8eb2310cbc58191777)
Signed-off-by: Vinicius Peixoto <vinicius.peixoto at canonical.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 27 ++++---
.../ethernet/microsoft/mana/mana_ethtool.c | 74 +++++++++++++++++++
include/net/mana/mana.h | 23 +++++-
3 files changed, 110 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 3d151700f658..eeabc8ef3cfb 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -511,7 +511,7 @@ static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb,
}
/* Release pre-allocated RX buffers */
-static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc)
+void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc)
{
struct device *dev;
int i;
@@ -608,7 +608,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
*datasize = mtu + ETH_HLEN;
}
-static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
+int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
{
struct device *dev;
struct page *page;
@@ -622,7 +622,7 @@ static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
dev = mpc->ac->gdma_dev->gdma_context->dev;
- num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE;
+ num_rxb = mpc->num_queues * mpc->rx_queue_size;
WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n");
mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL);
@@ -1911,15 +1911,17 @@ static int mana_create_txq(struct mana_port_context *apc,
return -ENOMEM;
/* The minimum size of the WQE is 32 bytes, hence
- * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs
+ * apc->tx_queue_size represents the maximum number of WQEs
* the SQ can store. This value is then used to size other queues
* to prevent overflow.
+ * Also note that the txq_size is always going to be MANA_PAGE_ALIGNED,
+ * as min val of apc->tx_queue_size is 128 and that would make
+ * txq_size 128*32 = 4096 and the other higher values of apc->tx_queue_size
+ * are always power of two
*/
- txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32;
- BUILD_BUG_ON(!MANA_PAGE_ALIGNED(txq_size));
+ txq_size = apc->tx_queue_size * 32;
- cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE;
- cq_size = MANA_PAGE_ALIGN(cq_size);
+ cq_size = apc->tx_queue_size * COMP_ENTRY_SIZE;
gc = gd->gdma_context;
@@ -2159,10 +2161,11 @@ static int mana_push_wqe(struct mana_rxq *rxq)
static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc)
{
+ struct mana_port_context *mpc = netdev_priv(rxq->ndev);
struct page_pool_params pprm = {};
int ret;
- pprm.pool_size = RX_BUFFERS_PER_QUEUE;
+ pprm.pool_size = mpc->rx_queue_size;
pprm.nid = gc->numa_node;
pprm.napi = &rxq->rx_cq.napi;
pprm.netdev = rxq->ndev;
@@ -2194,13 +2197,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
gc = gd->gdma_context;
- rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE),
+ rxq = kzalloc(struct_size(rxq, rx_oobs, apc->rx_queue_size),
GFP_KERNEL);
if (!rxq)
return NULL;
rxq->ndev = ndev;
- rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
+ rxq->num_rx_buf = apc->rx_queue_size;
rxq->rxq_idx = rxq_idx;
rxq->rxobj = INVALID_MANA_HANDLE;
@@ -2748,6 +2751,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
apc->ndev = ndev;
apc->max_queues = gc->max_num_queues;
apc->num_queues = gc->max_num_queues;
+ apc->tx_queue_size = DEF_TX_BUFFERS_PER_QUEUE;
+ apc->rx_queue_size = DEF_RX_BUFFERS_PER_QUEUE;
apc->port_handle = INVALID_MANA_HANDLE;
apc->pf_filter_handle = INVALID_MANA_HANDLE;
apc->port_idx = port_idx;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 146d5db1792f..d6a35fbda447 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -369,6 +369,78 @@ static int mana_set_channels(struct net_device *ndev,
return err;
}
+static void mana_get_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+
+ ring->rx_pending = apc->rx_queue_size;
+ ring->tx_pending = apc->tx_queue_size;
+ ring->rx_max_pending = MAX_RX_BUFFERS_PER_QUEUE;
+ ring->tx_max_pending = MAX_TX_BUFFERS_PER_QUEUE;
+}
+
+static int mana_set_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kernel_ring,
+ struct netlink_ext_ack *extack)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+ u32 new_tx, new_rx;
+ u32 old_tx, old_rx;
+ int err;
+
+ old_tx = apc->tx_queue_size;
+ old_rx = apc->rx_queue_size;
+
+ if (ring->tx_pending < MIN_TX_BUFFERS_PER_QUEUE) {
+ NL_SET_ERR_MSG_FMT(extack, "tx:%d less than the min:%d", ring->tx_pending,
+ MIN_TX_BUFFERS_PER_QUEUE);
+ return -EINVAL;
+ }
+
+ if (ring->rx_pending < MIN_RX_BUFFERS_PER_QUEUE) {
+ NL_SET_ERR_MSG_FMT(extack, "rx:%d less than the min:%d", ring->rx_pending,
+ MIN_RX_BUFFERS_PER_QUEUE);
+ return -EINVAL;
+ }
+
+ new_rx = roundup_pow_of_two(ring->rx_pending);
+ new_tx = roundup_pow_of_two(ring->tx_pending);
+ netdev_info(ndev, "Using nearest power of 2 values for Txq:%d Rxq:%d\n",
+ new_tx, new_rx);
+
+ /* pre-allocating new buffers to prevent failures in mana_attach() later */
+ apc->rx_queue_size = new_rx;
+ err = mana_pre_alloc_rxbufs(apc, ndev->mtu);
+ apc->rx_queue_size = old_rx;
+ if (err) {
+ netdev_err(ndev, "Insufficient memory for new allocations\n");
+ return err;
+ }
+
+ err = mana_detach(ndev, false);
+ if (err) {
+ netdev_err(ndev, "mana_detach failed: %d\n", err);
+ goto out;
+ }
+
+ apc->tx_queue_size = new_tx;
+ apc->rx_queue_size = new_rx;
+
+ err = mana_attach(ndev);
+ if (err) {
+ netdev_err(ndev, "mana_attach failed: %d\n", err);
+ apc->tx_queue_size = old_tx;
+ apc->rx_queue_size = old_rx;
+ }
+out:
+ mana_pre_dealloc_rxbufs(apc);
+ return err;
+}
+
const struct ethtool_ops mana_ethtool_ops = {
.get_ethtool_stats = mana_get_ethtool_stats,
.get_sset_count = mana_get_sset_count,
@@ -380,4 +452,6 @@ const struct ethtool_ops mana_ethtool_ops = {
.set_rxfh = mana_set_rxfh,
.get_channels = mana_get_channels,
.set_channels = mana_set_channels,
+ .get_ringparam = mana_get_ringparam,
+ .set_ringparam = mana_set_ringparam,
};
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index b8a6c7504ee1..59770dbc9f08 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -38,9 +38,21 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
-#define RX_BUFFERS_PER_QUEUE 512
+/* This Max value for RX buffers is derived from __alloc_page()'s max page
+ * allocation calculation. It allows maximum 2^(MAX_ORDER -1) pages. RX buffer
+ * size beyond this value gets rejected by __alloc_page() call.
+ */
+#define MAX_RX_BUFFERS_PER_QUEUE 8192
+#define DEF_RX_BUFFERS_PER_QUEUE 512
+#define MIN_RX_BUFFERS_PER_QUEUE 128
-#define MAX_SEND_BUFFERS_PER_QUEUE 256
+/* This max value for TX buffers is derived as the maximum allocatable
+ * pages supported on host per guest through testing. TX buffer size beyond
+ * this value is rejected by the hardware.
+ */
+#define MAX_TX_BUFFERS_PER_QUEUE 16384
+#define DEF_TX_BUFFERS_PER_QUEUE 256
+#define MIN_TX_BUFFERS_PER_QUEUE 128
#define EQ_SIZE (8 * MANA_PAGE_SIZE)
@@ -288,7 +300,7 @@ struct mana_recv_buf_oob {
void *buf_va;
bool from_pool; /* allocated from a page pool */
- /* SGL of the buffer going to be sent has part of the work request. */
+ /* SGL of the buffer going to be sent as part of the work request. */
u32 num_sge;
struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES];
@@ -440,6 +452,9 @@ struct mana_port_context {
unsigned int max_queues;
unsigned int num_queues;
+ unsigned int rx_queue_size;
+ unsigned int tx_queue_size;
+
mana_handle_t port_handle;
mana_handle_t pf_filter_handle;
@@ -475,6 +490,8 @@ struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
void mana_query_gf_stats(struct mana_port_context *apc);
+int mana_pre_alloc_rxbufs(struct mana_port_context *apc, int mtu);
+void mana_pre_dealloc_rxbufs(struct mana_port_context *apc);
extern const struct ethtool_ops mana_ethtool_ops;
--
2.43.0
More information about the kernel-team
mailing list