ACK: [SRU][Cosmic][PATCH 1/1] net/mlx5: WQ, fixes for fragmented WQ buffers API

Kleber Souza kleber.souza at canonical.com
Tue Nov 6 16:55:48 UTC 2018


On 10/31/18 19:06, Joseph Salisbury wrote:
> From: Tariq Toukan <tariqt at mellanox.com>
>
> BugLink: https://bugs.launchpad.net/bugs/1799393
>
> mlx5e netdevice used to calculate fragment edges by a call to
> mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
> indication for queues smaller than a PAGE_SIZE, (broken by default on
> PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
> calls/API.
>
> Since (TX/RX) Work Queues buffers are fragmented, here we introduce
> changes to the API in core driver, so that it gets a stride index and
> returns the index of last stride on same fragment, and an additional
> wrapping function that returns the number of physically contiguous
> strides that can be written contiguously to the work queue.
>
> This obsoletes the following API functions, and their buggy
> usage in EN driver:
> * mlx5_wq_cyc_get_frag_size()
> * mlx5_wq_cyc_ctr2fragix()
>
> The new API improves modularity and hides the details of such
> calculation for mlx5e netdevice and mlx5_ib rdma drivers.
>
> New calculation is also more efficient, and improves performance
> as follows:
>
> Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.
>
> Before: 16,477,619 pps
> After:  17,085,793 pps
>
> 3.7% improvement
>
> Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
> Signed-off-by: Tariq Toukan <tariqt at mellanox.com>
> Reviewed-by: Eran Ben Elisha <eranbe at mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm at mellanox.com>
> (cherry picked from commit 37fdffb217a45609edccbb8b407d031143f551c0)
> Signed-off-by: Joseph Salisbury <joseph.salisbury at canonical.com>

Acked-by: Kleber Sacilotto de Souza <kleber.souza at canonical.com>

> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 12 +++++-------
>  drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 22 +++++++++++-----------
>  .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h  |  5 ++---
>  drivers/net/ethernet/mellanox/mlx5/core/wq.c       |  5 -----
>  drivers/net/ethernet/mellanox/mlx5/core/wq.h       | 11 +++++------
>  include/linux/mlx5/driver.h                        |  8 ++++++++
>  6 files changed, 31 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> index d3a1dd2..549e926 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
> @@ -429,10 +429,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
>  
>  static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
>  					      struct mlx5_wq_cyc *wq,
> -					      u16 pi, u16 frag_pi)
> +					      u16 pi, u16 nnops)
>  {
>  	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
> -	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
>  
>  	edge_wi = wi + nnops;
>  
> @@ -451,15 +450,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
>  	struct mlx5_wq_cyc *wq = &sq->wq;
>  	struct mlx5e_umr_wqe *umr_wqe;
>  	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
> -	u16 pi, frag_pi;
> +	u16 pi, contig_wqebbs_room;
>  	int err;
>  	int i;
>  
>  	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -
> -	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
> -		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
> +		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
>  	}
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> index f29deb4..1e774d9 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
> @@ -287,10 +287,9 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  
>  static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
>  					   struct mlx5_wq_cyc *wq,
> -					   u16 pi, u16 frag_pi)
> +					   u16 pi, u16 nnops)
>  {
>  	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
> -	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
>  
>  	edge_wi = wi + nnops;
>  
> @@ -345,8 +344,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	struct mlx5e_tx_wqe_info *wi;
>  
>  	struct mlx5e_sq_stats *stats = sq->stats;
> +	u16 headlen, ihs, contig_wqebbs_room;
>  	u16 ds_cnt, ds_cnt_inl = 0;
> -	u16 headlen, ihs, frag_pi;
>  	u8 num_wqebbs, opcode;
>  	u32 num_bytes;
>  	int num_dma;
> @@ -383,9 +382,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	}
>  
>  	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
> -		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
> +		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
>  	}
>  
> @@ -629,7 +628,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	struct mlx5e_tx_wqe_info *wi;
>  
>  	struct mlx5e_sq_stats *stats = sq->stats;
> -	u16 headlen, ihs, pi, frag_pi;
> +	u16 headlen, ihs, pi, contig_wqebbs_room;
>  	u16 ds_cnt, ds_cnt_inl = 0;
>  	u8 num_wqebbs, opcode;
>  	u32 num_bytes;
> @@ -665,13 +664,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
>  	}
>  
>  	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
> -	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
> -	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
> +	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> +	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
> +	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
> +		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
>  		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
>  	}
>  
> -	mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
> +	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
>  
>  	/* fill wqe */
>  	wi       = &sq->db.wqe_info[pi];
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> index 08eac92..0982c57 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
> @@ -109,12 +109,11 @@ struct mlx5i_tx_wqe {
>  
>  static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
>  				      struct mlx5i_tx_wqe **wqe,
> -				      u16 *pi)
> +				      u16 pi)
>  {
>  	struct mlx5_wq_cyc *wq = &sq->wq;
>  
> -	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
> -	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
> +	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
>  	memset(*wqe, 0, sizeof(**wqe));
>  }
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> index d838af9..9046475 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
> @@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
>  	return (u32)wq->fbc.sz_m1 + 1;
>  }
>  
> -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
> -{
> -	return wq->fbc.frag_sz_m1 + 1;
> -}
> -
>  u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
>  {
>  	return wq->fbc.sz_m1 + 1;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> index 16476cc..3112565 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
> @@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
>  		       void *wqc, struct mlx5_wq_cyc *wq,
>  		       struct mlx5_wq_ctrl *wq_ctrl);
>  u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
> -u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
>  
>  int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
>  		      void *qpc, struct mlx5_wq_qp *wq,
> @@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
>  	return ctr & wq->fbc.sz_m1;
>  }
>  
> -static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
> -{
> -	return ctr & wq->fbc.frag_sz_m1;
> -}
> -
>  static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
>  {
>  	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
> @@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
>  	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
>  }
>  
> +static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
> +{
> +	return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
> +}
> +
>  static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
>  {
>  	int equal   = (cc1 == cc2);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 64f4505..b49bfc8 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -1022,6 +1022,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
>  		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
>  }
>  
> +static inline u32
> +mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
> +{
> +	u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
> +
> +	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
> +}
> +
>  int mlx5_cmd_init(struct mlx5_core_dev *dev);
>  void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
>  void mlx5_cmd_use_events(struct mlx5_core_dev *dev);





More information about the kernel-team mailing list