ACK: [SRU][F:linux-bluefield][PATCH 1/1] UBUNTU: SAUCE: Syncup with the latest gpio-mlxbf2 and mlxbf-gige drivers

Asmaa Mnebhi asmaa at nvidia.com
Mon Mar 15 13:59:32 UTC 2021


Hi,

Does this means that this change has been approved or should I still resubmit a patch addressing Stefan Bader's request to use
the shortened URL to: https://bugs.launchpad.net/bugs/1918684 instead of "https://bugs.launchpad.net/ubuntu/+source/linux-bluefield/+bug/1918684"

Thank you.
Asmaa
-----Original Message-----
From: Kleber Souza <kleber.souza at canonical.com> 
Sent: Friday, March 12, 2021 12:05 PM
To: Asmaa Mnebhi <asmaa at nvidia.com>; kernel-team at lists.ubuntu.com
Cc: Asmaa Mnebhi <asmaa at nvidia.com>
Subject: ACK: [SRU][F:linux-bluefield][PATCH 1/1] UBUNTU: SAUCE: Syncup with the latest gpio-mlxbf2 and mlxbf-gige drivers

On 11.03.21 20:13, Asmaa Mnebhi wrote:
> From: Asmaa Mnebhi <asmaa at nvidia.com>
> 
> Buglink: https://bugs.launchpad.net/ubuntu/+source/linux-bluefield/+bug/1918684
> 
> This patch adds the latest changes made to the mlxbf-gige
> and gpio-mlxbf2 drivers. These changes include:
> * moving the GPIO interrupt code from the mlxbf_gige
> driver to the mlxbf_gige driver in preparation for the
> next upstreaming patch.
> * splitting up the mlxbf_gige_main.c file into several
> files for better readability.
> * Now, there is a dependency between the gpio and
> mlxbf_gige drivers.
> 
> Signed-off-by: Asmaa Mnebhi <asmaa at nvidia.com>
> Reviewed-by: David Thompson <davthompson at nvidia.com>
> Signed-off-by: Asmaa Mnebhi <asmaa at nvidia.com>

Acked-by: Kleber Sacilotto de Souza <kleber.souza at canonical.com>

> ---
>   drivers/gpio/gpio-mlxbf2.c                         | 399 ++++++---
>   drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig   |   2 +-
>   drivers/net/ethernet/mellanox/mlxbf_gige/Makefile  |   4 +-
>   .../net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h  |  31 +-
>   .../mellanox/mlxbf_gige/mlxbf_gige_ethtool.c       | 178 ++++
>   .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c | 143 +++
>   .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 989 ++-------------------
>   .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c | 105 +--
>   .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h |   2 +-
>   .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c   | 299 +++++++
>   .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c   | 279 ++++++
>   11 files changed, 1267 insertions(+), 1164 deletions(-)
>   create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
>   create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c
>   create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
>   create mode 100644 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c
> 
> diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c
> index 0dc9747..55876fa 100644
> --- a/drivers/gpio/gpio-mlxbf2.c
> +++ b/drivers/gpio/gpio-mlxbf2.c
> @@ -1,25 +1,26 @@
>   // SPDX-License-Identifier: GPL-2.0-only or BSD-3-Clause
>   
>   /*
> - *  Copyright (c) 2020 NVIDIA Corporation.
> + *  Copyright (c) 2020-2021 NVIDIA Corporation.
>    */
>   
>   #include <linux/acpi.h>
>   #include <linux/bitfield.h>
>   #include <linux/bitops.h>
>   #include <linux/device.h>
> +#include <linux/gpio/consumer.h>
>   #include <linux/gpio/driver.h>
>   #include <linux/io.h>
>   #include <linux/ioport.h>
>   #include <linux/kernel.h>
> -#include <linux/kmod.h>
>   #include <linux/module.h>
>   #include <linux/platform_device.h>
>   #include <linux/pm.h>
>   #include <linux/resource.h>
>   #include <linux/spinlock.h>
>   #include <linux/types.h>
> -#include <linux/version.h>
> +
> +#define DRV_VERSION "1.2"
>   
>   /*
>    * There are 3 YU GPIO blocks:
> @@ -30,6 +31,13 @@
>    */
>   #define MLXBF2_GPIO_MAX_PINS_PER_BLOCK 32
>   
> +typedef enum {
> +	GPIO_BLOCK0 = 0,
> +	GPIO_BLOCK1 = 1,
> +	GPIO_BLOCK2 = 2,
> +	GPIO_BLOCK16 = 16
> +} yu_gpio_block;
> +
>   /*
>    * arm_gpio_lock register:
>    * bit[31]	lock status: active if set
> @@ -42,6 +50,9 @@
>   #define YU_ARM_GPIO_LOCK_ACQUIRE	0xd42f
>   #define YU_ARM_GPIO_LOCK_RELEASE	0x0
>   
> +#define YU_CAUSE_GPIO_ADDR		0x2801530
> +#define YU_CAUSE_GPIO_ADDR_SIZE		0x4
> +
>   /*
>    * gpio[x] block registers and their offset
>    */
> @@ -50,6 +61,8 @@
>   #define YU_GPIO_MODE0			0x0c
>   #define YU_GPIO_DATASET			0x14
>   #define YU_GPIO_DATACLEAR		0x18
> +#define YU_GPIO_FUNCTIONAL_ENABLE1	0x24
> +#define YU_GPIO_FUNCTIONAL_ENABLE0	0x28
>   #define YU_GPIO_CAUSE_RISE_EN		0x44
>   #define YU_GPIO_CAUSE_FALL_EN		0x48
>   #define YU_GPIO_MODE1_CLEAR		0x50
> @@ -59,16 +72,6 @@
>   #define YU_GPIO_CAUSE_OR_CAUSE_EVTEN0	0x80
>   #define YU_GPIO_CAUSE_OR_EVTEN0		0x94
>   #define YU_GPIO_CAUSE_OR_CLRCAUSE	0x98
> -#define YU_GPIO16_LOW_PWR_BIT		0
> -#define YU_GPIO0_RST_BIT		7
> -#define YU_GPIO_CAUSE_OR_CAUSE_EVTEN0_MASK(gpio_bit)	BIT(gpio_bit)
> -#define YU_GPIO_CAUSE_OR_EVTEN0_MASK(gpio_bit)		BIT(gpio_bit)
> -#define YU_GPIO_CAUSE_RISE_EN_MASK(gpio_bit)		BIT(gpio_bit)
> -#define YU_GPIO_CAUSE_FALL_EN_MASK(gpio_bit)		BIT(gpio_bit)
> -#define YU_GPIO_CAUSE_OR_CLRCAUSE_MASK(gpio_bit)	BIT(gpio_bit)
> -#define YU_CAUSE_RSH_COALESCE0_GPIO_CAUSE_MASK	0x10
> -#define YU_GPIO_CAUSE_IRQ_IS_SET(val) \
> -	(val & YU_CAUSE_RSH_COALESCE0_GPIO_CAUSE_MASK)
>   
>   #ifdef CONFIG_PM
>   struct mlxbf2_gpio_context_save_regs {
> @@ -77,28 +80,28 @@ struct mlxbf2_gpio_context_save_regs {
>   };
>   #endif
>   
> -#define RST_GPIO_PIN 7
> -#define LOW_PWR_GPIO_PIN 71
> -#define MAX_HOST_GPIOS LOW_PWR_GPIO_PIN
> -
>   /* BlueField-2 gpio block context structure. */
>   struct mlxbf2_gpio_context {
>   	struct gpio_chip gc;
> +	struct irq_chip irq_chip;
>   
>   	/* YU GPIO blocks address */
>   	void __iomem *gpio_io;
>   
> -	/* GPIO pin responsible for low power mode */
> +	/* YU cause gpio arm coalesce0 address */
> +	void __iomem *cause_gpio_arm_coalesce0_io;
> +
> +	/* YU GPIO pin responsible for low power mode */
>   	unsigned long low_pwr_pin;
>   
> -	/* GPIO pin responsible for soft reset */
> +	/* YU GPIO pin responsible for soft reset */
>   	unsigned long rst_pin;
>   
> -	/*
> -	 * Bit within the YU GPIO block that's conifgued
> -	 * as an interrupt.
> -	 */
> -	u32 gpio_int_bit;
> +	/* YU GPIO pin connected to PHY INT_N signal */
> +	unsigned long phy_int_pin;
> +
> +	/* YU GPIO block interrupt mask */
> +	u32 gpio_int_mask;
>   
>   	/* Worker function */
>   	struct work_struct send_work;
> @@ -128,6 +131,19 @@ static struct mlxbf2_gpio_param yu_arm_gpio_lock_param = {
>   	.lock = &yu_arm_gpio_lock_mutex,
>   };
>   
> +static struct resource yu_cause_gpio_res = {
> +	.start = YU_CAUSE_GPIO_ADDR,
> +	.end   = YU_CAUSE_GPIO_ADDR + YU_CAUSE_GPIO_ADDR_SIZE - 1,
> +	.name  = "YU_CAUSE_GPIO",
> +};
> +
> +static DEFINE_MUTEX(yu_cause_gpio_mutex);
> +
> +static struct mlxbf2_gpio_param yu_cause_gpio_param = {
> +	.res = &yu_cause_gpio_res,
> +	.lock = &yu_cause_gpio_mutex,
> +};
> +
>   /* Request memory region and map yu_arm_gpio_lock resource */
>   static int mlxbf2_gpio_get_lock_res(struct platform_device *pdev)
>   {
> @@ -151,8 +167,8 @@ static int mlxbf2_gpio_get_lock_res(struct platform_device *pdev)
>   	}
>   
>   	yu_arm_gpio_lock_param.io = devm_ioremap(dev, res->start, size);
> -	if (IS_ERR(yu_arm_gpio_lock_param.io))
> -		ret = PTR_ERR(yu_arm_gpio_lock_param.io);
> +	if (!yu_arm_gpio_lock_param.io)
> +		ret = -ENOMEM;
>   
>   exit:
>   	mutex_unlock(yu_arm_gpio_lock_param.lock);
> @@ -160,6 +176,38 @@ static int mlxbf2_gpio_get_lock_res(struct platform_device *pdev)
>   	return ret;
>   }
>   
> +/* Request memory region and map yu cause_gpio_arm.coalesce0 resource */
> +static int mlxbf2_gpio_get_yu_cause_gpio_res(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct resource *res;
> +	resource_size_t size;
> +	int ret = 0;
> +
> +	mutex_lock(yu_cause_gpio_param.lock);
> +
> +	/* Check if the memory map already exists */
> +	if (yu_cause_gpio_param.io)
> +		goto exit;
> +
> +	res = yu_cause_gpio_param.res;
> +	size = resource_size(res);
> +
> +	if (!devm_request_mem_region(dev, res->start, size, res->name)) {
> +		ret = -EFAULT;
> +		goto exit;
> +	}
> +
> +	yu_cause_gpio_param.io = devm_ioremap(dev, res->start, size);
> +	if (!yu_cause_gpio_param.io)
> +		ret = -ENOMEM;
> +
> +exit:
> +	mutex_unlock(yu_cause_gpio_param.lock);
> +
> +	return ret;
> +}
> +
>   /*
>    * Acquire the YU arm_gpio_lock to be able to change the direction
>    * mode. If the lock_active bit is already set, return an error.
> @@ -191,6 +239,8 @@ static int mlxbf2_gpio_lock_acquire(struct mlxbf2_gpio_context *gs)
>    * Release the YU arm_gpio_lock after changing the direction mode.
>    */
>   static void mlxbf2_gpio_lock_release(struct mlxbf2_gpio_context *gs)
> +	__releases(&gs->gc.bgpio_lock)
> +	__releases(yu_arm_gpio_lock_param.lock)
>   {
>   	writel(YU_ARM_GPIO_LOCK_RELEASE, yu_arm_gpio_lock_param.io);
>   	spin_unlock(&gs->gc.bgpio_lock);
> @@ -247,6 +297,7 @@ static int mlxbf2_gpio_direction_output(struct gpio_chip *chip,
>   {
>   	struct mlxbf2_gpio_context *gs = gpiochip_get_data(chip);
>   	int ret = 0;
> +	u32 val;
>   
>   	/*
>   	 * Although the arm_gpio_lock was set in the probe function,
> @@ -260,99 +311,90 @@ static int mlxbf2_gpio_direction_output(struct gpio_chip *chip,
>   	writel(BIT(offset), gs->gpio_io + YU_GPIO_MODE1_CLEAR);
>   	writel(BIT(offset), gs->gpio_io + YU_GPIO_MODE0_SET);
>   
> +	/*
> +	 * Set {functional_enable1,functional_enable0}={0,0}
> +	 * to give control to software over these GPIOs.
> +	 */
> +	val = readl(gs->gpio_io + YU_GPIO_FUNCTIONAL_ENABLE1);
> +	val &= ~BIT(offset);
> +	writel(val, gs->gpio_io + YU_GPIO_FUNCTIONAL_ENABLE1);
> +	val = readl(gs->gpio_io + YU_GPIO_FUNCTIONAL_ENABLE0);
> +	val &= ~BIT(offset);
> +	writel(val, gs->gpio_io + YU_GPIO_FUNCTIONAL_ENABLE0);
> +
>   	mlxbf2_gpio_lock_release(gs);
>   
>   	return ret;
>   }
>   
> -static void mlxbf2_gpio_irq_disable(struct mlxbf2_gpio_context *gs)
> -{
> -	u32 val;
> -
> -	spin_lock(&gs->gc.bgpio_lock);
> -	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> -	if (!val) {
> -		spin_unlock(&gs->gc.bgpio_lock);
> -		/* There is no enabled interrupt */
> -		return;
> -	}
> -
> -	val &= ~YU_GPIO_CAUSE_OR_EVTEN0_MASK(gs->gpio_int_bit);
> -	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> -	spin_unlock(&gs->gc.bgpio_lock);
> -}
> -
> -static void mlxbf2_gpio_irq_set_type(struct mlxbf2_gpio_context *gs)
> +static void mlxbf2_gpio_send_work(struct work_struct *work)
>   {
> -	u32 val;
> -
> -	spin_lock(&gs->gc.bgpio_lock);
> -
> -	/*
> -	 * The power state gpio interrupt should be detected at rising
> -	 * and falling edges.
> -	 *
> -	 * In the case of low power mode interrupt:
> -	 * When it goes from 0 to 1, system should go into low power state
> -	 * When it goes from 1 to 0, system should revert to normal state
> -	 *
> -	 * In the case of soft reset interrupt, trigger interrupt off
> -	 * falling edge since it is active low.
> -	 */
> -	if (gs->low_pwr_pin == LOW_PWR_GPIO_PIN) {
> -		val = readl(gs->gpio_io + YU_GPIO_CAUSE_RISE_EN);
> -		val |= YU_GPIO_CAUSE_RISE_EN_MASK(gs->gpio_int_bit);
> -		writel(val, gs->gpio_io + YU_GPIO_CAUSE_RISE_EN);
> -	}
> +	struct mlxbf2_gpio_context *gs;
>   
> -	val = readl(gs->gpio_io + YU_GPIO_CAUSE_FALL_EN);
> -	val |= YU_GPIO_CAUSE_FALL_EN_MASK(gs->gpio_int_bit);
> -	writel(val, gs->gpio_io + YU_GPIO_CAUSE_FALL_EN);
> +	gs = container_of(work, struct mlxbf2_gpio_context, send_work);
>   
> -	spin_unlock(&gs->gc.bgpio_lock);
> +	acpi_bus_generate_netlink_event("button/power.*", "Power Button",
> +					0x80, 1);
>   }
>   
> -static void mlxbf2_gpio_irq_enable(struct mlxbf2_gpio_context *gs)
> +static u32 mlxbf2_gpio_get_int_mask(struct mlxbf2_gpio_context *gs)
>   {
> -	u32 val;
> -
> -	spin_lock(&gs->gc.bgpio_lock);
> +	u32 gpio_int_mask = 0;
>   
>   	/*
> -	 * Setting the priority for the GPIO interrupt enables the
> -	 * interrupt as well
> +	 * Determine bit mask within the yu gpio block.
>   	 */
> -	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> -	val |= YU_GPIO_CAUSE_OR_EVTEN0_MASK(gs->gpio_int_bit);
> -	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> -
> -	spin_unlock(&gs->gc.bgpio_lock);
> +	if (gs->phy_int_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK)
> +		gpio_int_mask = BIT(gs->phy_int_pin);
> +	if (gs->rst_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK)
> +		gpio_int_mask |= BIT(gs->rst_pin);
> +	if (gs->low_pwr_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK)
> +		gpio_int_mask = BIT(gs->low_pwr_pin);
> +
> +	return gpio_int_mask;
>   }
>   
> -static void mlxbf2_gpio_send_work(struct work_struct *work)
> +static bool mlxbf2_gpio_is_acpi_event(u32 gpio_block, unsigned long gpio_pin,
> +			  struct mlxbf2_gpio_context *gs)
>   {
> -	struct mlxbf2_gpio_context *gs;
> -
> -	gs = container_of(work, struct mlxbf2_gpio_context, send_work);
> +	if (gpio_block & BIT(GPIO_BLOCK0)) {
> +		if (gpio_pin & BIT(gs->rst_pin))
> +			return true;
> +	}
> +	if (gpio_block & BIT(GPIO_BLOCK16)) {
> +		if (gpio_pin & BIT(gs->low_pwr_pin))
> +			return true;
> +	}
>   
> -	acpi_bus_generate_netlink_event("button/power.*", "Power Button", 0x80, 1);
> +	return false;
>   }
>   
>   static irqreturn_t mlxbf2_gpio_irq_handler(int irq, void *ptr)
>   {
>   	struct mlxbf2_gpio_context *gs = ptr;
> +	unsigned long gpio_pin;
> +	u32 gpio_block, val;
>   	unsigned long flags;
> -	u32 val;
>   
>   	spin_lock_irqsave(&gs->gc.bgpio_lock, flags);
>   
>   	/*
> -	 * Check if this interrupt is for bit 0 of yu.gpio[16]
> -	 * or bit 7 of yu.gpio[0].
> -	 * Return if it is not.
> +	 * Determine which yu gpio block this interrupt is for.
> +	 * Return if the interrupt is not for gpio block 0 or
> +	 * gpio block 16.
> +	 */
> +	gpio_block = readl(yu_cause_gpio_param.io);
> +	if (!(gpio_block & BIT(GPIO_BLOCK0)) &&
> +	    !(gpio_block & BIT(GPIO_BLOCK16))) {
> +		spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
> +		return IRQ_NONE;
> +	}
> +
> +	/*
> +	 * Check if the interrupt signaled by this yu gpio block is supported.
>   	 */
> -	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CAUSE_EVTEN0);
> -	if (!(val & YU_GPIO_CAUSE_OR_CAUSE_EVTEN0_MASK(gs->gpio_int_bit))) {
> +	gpio_pin = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CAUSE_EVTEN0);
> +	if (!(gpio_pin & gs->gpio_int_mask)) {
>   		spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
>   		return IRQ_NONE;
>   	}
> @@ -362,28 +404,103 @@ static irqreturn_t mlxbf2_gpio_irq_handler(int irq, void *ptr)
>   	 * will be triggered.
>   	 */
>   	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
> -	val |= YU_GPIO_CAUSE_OR_CLRCAUSE_MASK(gs->gpio_int_bit);
> +	val |= gpio_pin;
>   	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
> +
> +	if ((gpio_block & BIT(GPIO_BLOCK0)) && (gpio_pin & BIT(gs->phy_int_pin)))
> +		generic_handle_irq(irq_find_mapping(gs->gc.irq.domain, gs->phy_int_pin));
> +
>   	spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
>   
> -	schedule_work(&gs->send_work);
> +	if (mlxbf2_gpio_is_acpi_event(gpio_block, gpio_pin, gs))
> +		schedule_work(&gs->send_work);
>   
>   	return IRQ_HANDLED;
>   }
>   
> +static void mlxbf2_gpio_irq_unmask(struct irq_data *data)
> +{
> +}
> +
> +static void mlxbf2_gpio_irq_mask(struct irq_data *data)
> +{
> +}
> +
> +static int mlxbf2_gpio_init_hw(struct gpio_chip *gc)
> +{
> +	struct mlxbf2_gpio_context *gs = gpiochip_get_data(gc);
> +	unsigned long flags;
> +	u32 val;
> +
> +	spin_lock_irqsave(&gs->gc.bgpio_lock, flags);
> +
> +	/* Clear all interrupts */
> +	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
> +	val |= gs->gpio_int_mask;
> +	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
> +
> +	if (gs->low_pwr_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK) {
> +		val = readl(gs->gpio_io + YU_GPIO_CAUSE_RISE_EN);
> +		val |= gs->gpio_int_mask;
> +		writel(val, gs->gpio_io + YU_GPIO_CAUSE_RISE_EN);
> +	}
> +
> +	val = readl(gs->gpio_io + YU_GPIO_CAUSE_FALL_EN);
> +	val |= gs->gpio_int_mask;
> +	writel(val, gs->gpio_io + YU_GPIO_CAUSE_FALL_EN);
> +
> +	/*
> +	 * Setting the priority for the GPIO interrupt enables the
> +	 * interrupt as well
> +	 */
> +	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> +	val |= gs->gpio_int_mask;
> +	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> +
> +	spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
> +
> +	return 0;
> +}
> +
> +static void mlxbf2_gpio_disable_int(struct mlxbf2_gpio_context *gs)
> +{
> +	unsigned long flags;
> +	u32 val;
> +
> +	spin_lock_irqsave(&gs->gc.bgpio_lock, flags);
> +	val = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> +	val &= ~gs->gpio_int_mask;
> +	writel(val, gs->gpio_io + YU_GPIO_CAUSE_OR_EVTEN0);
> +	spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags);
> +}
> +
> +static int mlxbf2_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
> +{
> +	struct mlxbf2_gpio_context *gs;
> +
> +	gs = gpiochip_get_data(chip);
> +
> +	return irq_create_mapping(gs->gc.irq.domain, gpio);
> +}
> +
>   /* BlueField-2 GPIO driver initialization routine. */
>   static int
>   mlxbf2_gpio_probe(struct platform_device *pdev)
>   {
>   	struct mlxbf2_gpio_context *gs;
>   	struct device *dev = &pdev->dev;
> +	struct gpio_irq_chip *girq;
>   	unsigned int low_pwr_pin;
> +	unsigned int phy_int_pin;
>   	unsigned int rst_pin;
>   	struct gpio_chip *gc;
>   	struct resource *res;
>   	unsigned int npins;
> +	const char *name;
>   	int ret, irq;
>   
> +	name = dev_name(dev);
> +
>   	gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL);
>   	if (!gs)
>   		return -ENOMEM;
> @@ -406,6 +523,12 @@ mlxbf2_gpio_probe(struct platform_device *pdev)
>   		return ret;
>   	}
>   
> +	ret = mlxbf2_gpio_get_yu_cause_gpio_res(pdev);
> +	if (ret) {
> +		dev_err(dev, "Failed to get yu cause_gpio_arm.coalesce0 resource\n");
> +		return ret;
> +	}
> +
>   	if (device_property_read_u32(dev, "npins", &npins))
>   		npins = MLXBF2_GPIO_MAX_PINS_PER_BLOCK;
>   
> @@ -419,53 +542,79 @@ mlxbf2_gpio_probe(struct platform_device *pdev)
>   			NULL,
>   			0);
>   
> +	if (ret) {
> +		dev_err(dev, "bgpio_init failed\n");
> +		return ret;
> +	}
> +
>   	gc->direction_input = mlxbf2_gpio_direction_input;
>   	gc->direction_output = mlxbf2_gpio_direction_output;
>   	gc->ngpio = npins;
>   	gc->owner = THIS_MODULE;
> +	gc->to_irq = mlxbf2_gpio_to_irq;
>   
> -	ret = devm_gpiochip_add_data(dev, &gs->gc, gs);
> -	if (ret) {
> -		dev_err(dev, "Failed adding memory mapped gpiochip\n");
> -		return ret;
> -	}
> -	platform_set_drvdata(pdev, gs);
> +	/*
> +	 * PHY interrupt
> +	 */
> +	ret = device_property_read_u32(dev, "phy-int-pin", &phy_int_pin);
> +	if (ret < 0)
> +		phy_int_pin = MLXBF2_GPIO_MAX_PINS_PER_BLOCK;
>   
>   	/*
> -	 * OCP3.0 supports the AUX power mode interrupt on bit 0 of yu.gpio[16].
> -	 * BlueSphere and the PRIS boards support the rebooot interrupt on bit
> -	 * 7 of yu.gpio[0].
> +	 * OCP3.0 supports the low power mode interrupt.
>   	 */
>   	ret = device_property_read_u32(dev, "low-pwr-pin", &low_pwr_pin);
>   	if (ret < 0)
> -		low_pwr_pin = MAX_HOST_GPIOS + 1;
> +		low_pwr_pin = MLXBF2_GPIO_MAX_PINS_PER_BLOCK;
>   
> +	/*
> +	 * BlueSphere and the PRIS boards support the reset interrupt.
> +	 */
>   	ret = device_property_read_u32(dev, "rst-pin", &rst_pin);
>   	if (ret < 0)
> -		rst_pin = MAX_HOST_GPIOS + 1;
> +		rst_pin = MLXBF2_GPIO_MAX_PINS_PER_BLOCK;
>   
> +	gs->phy_int_pin = phy_int_pin;
>   	gs->low_pwr_pin = low_pwr_pin;
>   	gs->rst_pin = rst_pin;
> -
> -	if ((low_pwr_pin == LOW_PWR_GPIO_PIN) || (rst_pin == RST_GPIO_PIN)) {
> -		if (rst_pin == RST_GPIO_PIN)
> -			gs->gpio_int_bit = YU_GPIO0_RST_BIT;
> -		else
> -			gs->gpio_int_bit = YU_GPIO16_LOW_PWR_BIT;
> +	gs->gpio_int_mask = mlxbf2_gpio_get_int_mask(gs);
> +
> +	if (gs->gpio_int_mask) {
> +		gs->irq_chip.name = name;
> +		gs->irq_chip.irq_mask = mlxbf2_gpio_irq_mask;
> +		gs->irq_chip.irq_unmask = mlxbf2_gpio_irq_unmask;
> +
> +		girq = &gs->gc.irq;
> +		girq->chip = &gs->irq_chip;
> +		/* This will let us handle the parent IRQ in the driver */
> +		girq->parent_handler = NULL;
> +		girq->num_parents = 0;
> +		girq->parents = NULL;
> +		girq->default_type = IRQ_TYPE_NONE;
> +		girq->handler = handle_simple_irq;
> +		girq->init_hw = mlxbf2_gpio_init_hw;
>   
>   		irq = platform_get_irq(pdev, 0);
> -		/*
> -		 * For now, no need to check if interrupt was previously allocated
> -		 * by another gpio block.
> -		 */
>   		ret = devm_request_irq(dev, irq, mlxbf2_gpio_irq_handler,
> -			IRQF_ONESHOT | IRQF_SHARED | IRQF_PROBE_SHARED, dev_name(dev), gs);
> +				       IRQF_ONESHOT | IRQF_SHARED, name, gs);
>   		if (ret) {
> -			dev_err(dev, "IRQ handler registering failed (%d)\n", ret);
> +			dev_err(dev, "failed to request IRQ");
>   			return ret;
>   		}
> -		mlxbf2_gpio_irq_set_type(gs);
> -		mlxbf2_gpio_irq_enable(gs);
> +	}
> +
> +	ret = devm_gpiochip_add_data(dev, &gs->gc, gs);
> +	if (ret) {
> +		dev_err(dev, "Failed adding memory mapped gpiochip\n");
> +		return ret;
> +	}
> +	platform_set_drvdata(pdev, gs);
> +
> +	if (phy_int_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK) {
> +		/* Create phy irq mapping */
> +		mlxbf2_gpio_to_irq(&gs->gc, phy_int_pin);
> +		/* Enable sharing the irq domain with the PHY driver */
> +		irq_set_default_host(gs->gc.irq.domain);
>   	}
>   
>   	return 0;
> @@ -477,8 +626,15 @@ mlxbf2_gpio_remove(struct platform_device *pdev)
>   	struct mlxbf2_gpio_context *gs;
>   
>   	gs = platform_get_drvdata(pdev);
> -	if ((gs->low_pwr_pin == LOW_PWR_GPIO_PIN) || (gs->rst_pin == RST_GPIO_PIN)) {
> -		mlxbf2_gpio_irq_disable(gs);
> +
> +	if ((gs->phy_int_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK) ||
> +	    (gs->low_pwr_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK) ||
> +	    (gs->rst_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK)) {
> +		mlxbf2_gpio_disable_int(gs);
> +	}
> +
> +	if ((gs->low_pwr_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK) ||
> +	    (gs->rst_pin != MLXBF2_GPIO_MAX_PINS_PER_BLOCK)) {
>   		flush_work(&gs->send_work);
>   	}
>   
> @@ -512,7 +668,7 @@ static int mlxbf2_gpio_resume(struct platform_device *pdev)
>   }
>   #endif
>   
> -static const struct acpi_device_id mlxbf2_gpio_acpi_match[] = {
> +static const struct acpi_device_id __maybe_unused mlxbf2_gpio_acpi_match[] = {
>   	{ "MLNXBF22", 0 },
>   	{},
>   };
> @@ -534,5 +690,6 @@ static struct platform_driver mlxbf2_gpio_driver = {
>   module_platform_driver(mlxbf2_gpio_driver);
>   
>   MODULE_DESCRIPTION("Mellanox BlueField-2 GPIO Driver");
> -MODULE_AUTHOR("Mellanox Technologies");
> +MODULE_AUTHOR("Asmaa Mnebhi <asmaa at nvidia.com>");
>   MODULE_LICENSE("Dual BSD/GPL");
> +MODULE_VERSION(DRV_VERSION);
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig b/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig
> index 08a4487..0338ba5 100644
> --- a/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/Kconfig
> @@ -5,7 +5,7 @@
>   
>   config MLXBF_GIGE
>   	tristate "Mellanox Technologies BlueField Gigabit Ethernet support"
> -	depends on (ARM64 || COMPILE_TEST) && ACPI
> +	depends on (ARM64 || COMPILE_TEST) && ACPI && GPIO_MLXBF2
>   	select PHYLIB
>   	help
>   	  The second generation BlueField SoC from Mellanox Technologies
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile b/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile
> index e99fc19..6caae3c 100644
> --- a/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/Makefile
> @@ -2,4 +2,6 @@
>   
>   obj-$(CONFIG_MLXBF_GIGE) += mlxbf_gige.o
>   
> -mlxbf_gige-y := mlxbf_gige_main.o mlxbf_gige_mdio.o
> +mlxbf_gige-y := mlxbf_gige_ethtool.o mlxbf_gige_intr.o \
> +		mlxbf_gige_main.o mlxbf_gige_mdio.o \
> +		mlxbf_gige_rx.o mlxbf_gige_tx.o
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
> index c3cb50e..e8cf26f 100644
> --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h
> @@ -4,12 +4,13 @@
>    * - this file contains software data structures and any chip-specific
>    *   data structures (e.g. TX WQE format) that are memory resident.
>    *
> - * Copyright (c) 2020 NVIDIA Corporation.
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
>    */
>   
>   #ifndef __MLXBF_GIGE_H__
>   #define __MLXBF_GIGE_H__
>   
> +#include <linux/io-64-nonatomic-lo-hi.h>
>   #include <linux/irqreturn.h>
>   #include <linux/netdevice.h>
>   
> @@ -53,6 +54,8 @@
>   
>   #define MLXBF_GIGE_MDIO_DEFAULT_PHY_ADDR 0x3
>   
> +#define MLXBF_GIGE_DEFAULT_PHY_INT_GPIO 12
> +
>   struct mlxbf_gige_stats {
>   	u64 hw_access_errors;
>   	u64 tx_invalid_checksums;
> @@ -77,8 +80,6 @@ struct mlxbf_gige {
>   	struct platform_device *pdev;
>   	void __iomem *mdio_io;
>   	struct mii_bus *mdiobus;
> -	void __iomem *gpio_io;
> -	u32 phy_int_gpio_mask;
>   	spinlock_t lock;
>   	spinlock_t gpio_lock;
>   	u16 rx_q_entries;
> @@ -143,7 +144,6 @@ struct mlxbf_gige {
>   enum mlxbf_gige_res {
>   	MLXBF_GIGE_RES_MAC,
>   	MLXBF_GIGE_RES_MDIO9,
> -	MLXBF_GIGE_RES_GPIO0,
>   	MLXBF_GIGE_RES_LLU,
>   	MLXBF_GIGE_RES_PLU
>   };
> @@ -155,5 +155,28 @@ int mlxbf_gige_mdio_probe(struct platform_device *pdev,
>   			  struct mlxbf_gige *priv);
>   void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv);
>   irqreturn_t mlxbf_gige_mdio_handle_phy_interrupt(int irq, void *dev_id);
> +void mlxbf_gige_mdio_enable_phy_int(struct mlxbf_gige *priv);
> +
> +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
> +				  unsigned int index, u64 dmac);
> +void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
> +				  unsigned int index, u64 *dmac);
> +void mlxbf_gige_enable_promisc(struct mlxbf_gige *priv);
> +void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv);
> +int mlxbf_gige_rx_init(struct mlxbf_gige *priv);
> +void mlxbf_gige_rx_deinit(struct mlxbf_gige *priv);
> +int mlxbf_gige_tx_init(struct mlxbf_gige *priv);
> +void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv);
> +bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv);
> +netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb,
> +				  struct net_device *netdev);
> +struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv,
> +				     dma_addr_t *buf_dma,
> +				     enum dma_data_direction dir);
> +int mlxbf_gige_request_irqs(struct mlxbf_gige *priv);
> +void mlxbf_gige_free_irqs(struct mlxbf_gige *priv);
> +int mlxbf_gige_poll(struct napi_struct *napi, int budget);
> +extern const struct ethtool_ops mlxbf_gige_ethtool_ops;
> +void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv);
>   
>   #endif /* !defined(__MLXBF_GIGE_H__) */
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
> new file mode 100644
> index 0000000..55b5d67
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_ethtool.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
> +
> +/* Ethtool support for Mellanox Gigabit Ethernet driver
> + *
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
> + */
> +
> +#include <linux/phy.h>
> +
> +#include "mlxbf_gige.h"
> +#include "mlxbf_gige_regs.h"
> +
> +/* Start of struct ethtool_ops functions */
> +static int mlxbf_gige_get_regs_len(struct net_device *netdev)
> +{
> +	return MLXBF_GIGE_MMIO_REG_SZ;
> +}
> +
> +static void mlxbf_gige_get_regs(struct net_device *netdev,
> +				struct ethtool_regs *regs, void *p)
> +{
> +	struct mlxbf_gige *priv = netdev_priv(netdev);
> +
> +	regs->version = MLXBF_GIGE_REGS_VERSION;
> +
> +	/* Read entire MMIO register space and store results
> +	 * into the provided buffer. Each 64-bit word is converted
> +	 * to big-endian to make the output more readable.
> +	 *
> +	 * NOTE: by design, a read to an offset without an existing
> +	 *       register will be acknowledged and return zero.
> +	 */
> +	memcpy_fromio(p, priv->base, MLXBF_GIGE_MMIO_REG_SZ);
> +}
> +
> +static void mlxbf_gige_get_ringparam(struct net_device *netdev,
> +				     struct ethtool_ringparam *ering)
> +{
> +	struct mlxbf_gige *priv = netdev_priv(netdev);
> +
> +	ering->rx_max_pending = MLXBF_GIGE_MAX_RXQ_SZ;
> +	ering->tx_max_pending = MLXBF_GIGE_MAX_TXQ_SZ;
> +	ering->rx_pending = priv->rx_q_entries;
> +	ering->tx_pending = priv->tx_q_entries;
> +}
> +
> +static int mlxbf_gige_set_ringparam(struct net_device *netdev,
> +				    struct ethtool_ringparam *ering)
> +{
> +	const struct net_device_ops *ops = netdev->netdev_ops;
> +	struct mlxbf_gige *priv = netdev_priv(netdev);
> +	int new_rx_q_entries, new_tx_q_entries;
> +
> +	/* Device does not have separate queues for small/large frames */
> +	if (ering->rx_mini_pending || ering->rx_jumbo_pending)
> +		return -EINVAL;
> +
> +	/* Round up to supported values */
> +	new_rx_q_entries = roundup_pow_of_two(ering->rx_pending);
> +	new_tx_q_entries = roundup_pow_of_two(ering->tx_pending);
> +
> +	/* Check against min values, core checks against max values */
> +	if (new_tx_q_entries < MLXBF_GIGE_MIN_TXQ_SZ ||
> +	    new_rx_q_entries < MLXBF_GIGE_MIN_RXQ_SZ)
> +		return -EINVAL;
> +
> +	/* If queue sizes did not change, exit now */
> +	if (new_rx_q_entries == priv->rx_q_entries &&
> +	    new_tx_q_entries == priv->tx_q_entries)
> +		return 0;
> +
> +	if (netif_running(netdev))
> +		ops->ndo_stop(netdev);
> +
> +	priv->rx_q_entries = new_rx_q_entries;
> +	priv->tx_q_entries = new_tx_q_entries;
> +
> +	if (netif_running(netdev))
> +		ops->ndo_open(netdev);
> +
> +	return 0;
> +}
> +
> +static const struct {
> +	const char string[ETH_GSTRING_LEN];
> +} mlxbf_gige_ethtool_stats_keys[] = {
> +	{ "hw_access_errors" },
> +	{ "tx_invalid_checksums" },
> +	{ "tx_small_frames" },
> +	{ "tx_index_errors" },
> +	{ "sw_config_errors" },
> +	{ "sw_access_errors" },
> +	{ "rx_truncate_errors" },
> +	{ "rx_mac_errors" },
> +	{ "rx_din_dropped_pkts" },
> +	{ "tx_fifo_full" },
> +	{ "rx_filter_passed_pkts" },
> +	{ "rx_filter_discard_pkts" },
> +};
> +
> +static int mlxbf_gige_get_sset_count(struct net_device *netdev, int stringset)
> +{
> +	if (stringset != ETH_SS_STATS)
> +		return -EOPNOTSUPP;
> +	return ARRAY_SIZE(mlxbf_gige_ethtool_stats_keys);
> +}
> +
> +static void mlxbf_gige_get_strings(struct net_device *netdev, u32 stringset,
> +				   u8 *buf)
> +{
> +	if (stringset != ETH_SS_STATS)
> +		return;
> +	memcpy(buf, &mlxbf_gige_ethtool_stats_keys,
> +	       sizeof(mlxbf_gige_ethtool_stats_keys));
> +}
> +
> +static void mlxbf_gige_get_ethtool_stats(struct net_device *netdev,
> +					 struct ethtool_stats *estats,
> +					 u64 *data)
> +{
> +	struct mlxbf_gige *priv = netdev_priv(netdev);
> +
> +	/* Fill data array with interface statistics
> +	 *
> +	 * NOTE: the data writes must be in
> +	 *       sync with the strings shown in
> +	 *       the mlxbf_gige_ethtool_stats_keys[] array
> +	 *
> +	 * NOTE2: certain statistics below are zeroed upon
> +	 *        port disable, so the calculation below
> +	 *        must include the "cached" value of the stat
> +	 *        plus the value read directly from hardware.
> +	 *        Cached statistics are currently:
> +	 *          rx_din_dropped_pkts
> +	 *          rx_filter_passed_pkts
> +	 *          rx_filter_discard_pkts
> +	 */
> +	*data++ = priv->stats.hw_access_errors;
> +	*data++ = priv->stats.tx_invalid_checksums;
> +	*data++ = priv->stats.tx_small_frames;
> +	*data++ = priv->stats.tx_index_errors;
> +	*data++ = priv->stats.sw_config_errors;
> +	*data++ = priv->stats.sw_access_errors;
> +	*data++ = priv->stats.rx_truncate_errors;
> +	*data++ = priv->stats.rx_mac_errors;
> +	*data++ = (priv->stats.rx_din_dropped_pkts +
> +		   readq(priv->base + MLXBF_GIGE_RX_DIN_DROP_COUNTER));
> +	*data++ = priv->stats.tx_fifo_full;
> +	*data++ = (priv->stats.rx_filter_passed_pkts +
> +		   readq(priv->base + MLXBF_GIGE_RX_PASS_COUNTER_ALL));
> +	*data++ = (priv->stats.rx_filter_discard_pkts +
> +		   readq(priv->base + MLXBF_GIGE_RX_DISC_COUNTER_ALL));
> +}
> +
> +static void mlxbf_gige_get_pauseparam(struct net_device *netdev,
> +				      struct ethtool_pauseparam *pause)
> +{
> +	struct mlxbf_gige *priv = netdev_priv(netdev);
> +
> +	pause->autoneg = priv->aneg_pause;
> +	pause->rx_pause = priv->tx_pause;
> +	pause->tx_pause = priv->rx_pause;
> +}
> +
> +const struct ethtool_ops mlxbf_gige_ethtool_ops = {
> +	.get_link		= ethtool_op_get_link,
> +	.get_ringparam		= mlxbf_gige_get_ringparam,
> +	.set_ringparam		= mlxbf_gige_set_ringparam,
> +	.get_regs_len           = mlxbf_gige_get_regs_len,
> +	.get_regs               = mlxbf_gige_get_regs,
> +	.get_strings            = mlxbf_gige_get_strings,
> +	.get_sset_count         = mlxbf_gige_get_sset_count,
> +	.get_ethtool_stats      = mlxbf_gige_get_ethtool_stats,
> +	.nway_reset		= phy_ethtool_nway_reset,
> +	.get_pauseparam		= mlxbf_gige_get_pauseparam,
> +	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
> +};
> +
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c
> new file mode 100644
> index 0000000..f67826a
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_intr.c
> @@ -0,0 +1,143 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
> +
> +/* Interrupt related logic for Mellanox Gigabit Ethernet driver
> + *
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
> + */
> +
> +#include <linux/interrupt.h>
> +
> +#include "mlxbf_gige.h"
> +#include "mlxbf_gige_regs.h"
> +
> +static irqreturn_t mlxbf_gige_error_intr(int irq, void *dev_id)
> +{
> +	struct mlxbf_gige *priv;
> +	u64 int_status;
> +
> +	priv = dev_id;
> +
> +	priv->error_intr_count++;
> +
> +	int_status = readq(priv->base + MLXBF_GIGE_INT_STATUS);
> +
> +	if (int_status & MLXBF_GIGE_INT_STATUS_HW_ACCESS_ERROR)
> +		priv->stats.hw_access_errors++;
> +
> +	if (int_status & MLXBF_GIGE_INT_STATUS_TX_CHECKSUM_INPUTS) {
> +		priv->stats.tx_invalid_checksums++;
> +		/* This error condition is latched into MLXBF_GIGE_INT_STATUS
> +		 * when the GigE silicon operates on the offending
> +		 * TX WQE. The write to MLXBF_GIGE_INT_STATUS at the bottom
> +		 * of this routine clears this error condition.
> +		 */
> +	}
> +
> +	if (int_status & MLXBF_GIGE_INT_STATUS_TX_SMALL_FRAME_SIZE) {
> +		priv->stats.tx_small_frames++;
> +		/* This condition happens when the networking stack invokes
> +		 * this driver's "start_xmit()" method with a packet whose
> +		 * size < 60 bytes.  The GigE silicon will automatically pad
> +		 * this small frame up to a minimum-sized frame before it is
> +		 * sent. The "tx_small_frame" condition is latched into the
> +		 * MLXBF_GIGE_INT_STATUS register when the GigE silicon
> +		 * operates on the offending TX WQE. The write to
> +		 * MLXBF_GIGE_INT_STATUS at the bottom of this routine
> +		 * clears this condition.
> +		 */
> +	}
> +
> +	if (int_status & MLXBF_GIGE_INT_STATUS_TX_PI_CI_EXCEED_WQ_SIZE)
> +		priv->stats.tx_index_errors++;
> +
> +	if (int_status & MLXBF_GIGE_INT_STATUS_SW_CONFIG_ERROR)
> +		priv->stats.sw_config_errors++;
> +
> +	if (int_status & MLXBF_GIGE_INT_STATUS_SW_ACCESS_ERROR)
> +		priv->stats.sw_access_errors++;
> +
> +	/* Clear all error interrupts by writing '1' back to
> +	 * all the asserted bits in INT_STATUS.  Do not write
> +	 * '1' back to 'receive packet' bit, since that is
> +	 * managed separately.
> +	 */
> +
> +	int_status &= ~MLXBF_GIGE_INT_STATUS_RX_RECEIVE_PACKET;
> +
> +	writeq(int_status, priv->base + MLXBF_GIGE_INT_STATUS);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t mlxbf_gige_rx_intr(int irq, void *dev_id)
> +{
> +	struct mlxbf_gige *priv;
> +
> +	priv = dev_id;
> +
> +	priv->rx_intr_count++;
> +
> +	/* NOTE: GigE silicon automatically disables "packet rx" interrupt by
> +	 *       setting MLXBF_GIGE_INT_MASK bit0 upon triggering the interrupt
> +	 *       to the ARM cores.  Software needs to re-enable "packet rx"
> +	 *       interrupts by clearing MLXBF_GIGE_INT_MASK bit0.
> +	 */
> +
> +	napi_schedule(&priv->napi);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t mlxbf_gige_llu_plu_intr(int irq, void *dev_id)
> +{
> +	struct mlxbf_gige *priv;
> +
> +	priv = dev_id;
> +	priv->llu_plu_intr_count++;
> +
> +	return IRQ_HANDLED;
> +}
> +
> +int mlxbf_gige_request_irqs(struct mlxbf_gige *priv)
> +{
> +	int err;
> +
> +	err = request_irq(priv->error_irq, mlxbf_gige_error_intr, 0,
> +			  "mlxbf_gige_error", priv);
> +	if (err) {
> +		dev_err(priv->dev, "Request error_irq failure\n");
> +		return err;
> +	}
> +
> +	err = request_irq(priv->rx_irq, mlxbf_gige_rx_intr, 0,
> +			  "mlxbf_gige_rx", priv);
> +	if (err) {
> +		dev_err(priv->dev, "Request rx_irq failure\n");
> +		goto free_error_irq;
> +	}
> +
> +	err = request_irq(priv->llu_plu_irq, mlxbf_gige_llu_plu_intr, 0,
> +			  "mlxbf_gige_llu_plu", priv);
> +	if (err) {
> +		dev_err(priv->dev, "Request llu_plu_irq failure\n");
> +		goto free_rx_irq;
> +	}
> +
> +	return 0;
> +
> +free_rx_irq:
> +	free_irq(priv->rx_irq, priv);
> +
> +free_error_irq:
> +	free_irq(priv->error_irq, priv);
> +
> +	return err;
> +}
> +
> +void mlxbf_gige_free_irqs(struct mlxbf_gige *priv)
> +{
> +	free_irq(priv->error_irq, priv);
> +	free_irq(priv->rx_irq, priv);
> +	free_irq(priv->llu_plu_irq, priv);
> +}
> +
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
> index 85a7ce1..c5ffa68 100644
> --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
> @@ -2,15 +2,15 @@
>   
>   /* Gigabit Ethernet driver for Mellanox BlueField SoC
>    *
> - * Copyright (c) 2020 NVIDIA Corporation.
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
>    */
>   
>   #include <linux/acpi.h>
>   #include <linux/device.h>
>   #include <linux/dma-mapping.h>
>   #include <linux/etherdevice.h>
> +#include <linux/irqdomain.h>
>   #include <linux/interrupt.h>
> -#include <linux/io-64-nonatomic-lo-hi.h>
>   #include <linux/iopoll.h>
>   #include <linux/module.h>
>   #include <linux/phy.h>
> @@ -21,65 +21,7 @@
>   #include "mlxbf_gige_regs.h"
>   
>   #define DRV_NAME    "mlxbf_gige"
> -#define DRV_VERSION "1.10"
> -
> -static void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
> -					 unsigned int index, u64 dmac)
> -{
> -	void __iomem *base = priv->base;
> -	u64 control;
> -
> -	/* Write destination MAC to specified MAC RX filter */
> -	writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
> -	       (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
> -
> -	/* Enable MAC receive filter mask for specified index */
> -	control = readq(base + MLXBF_GIGE_CONTROL);
> -	control |= (MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index);
> -	writeq(control, base + MLXBF_GIGE_CONTROL);
> -}
> -
> -static void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
> -					 unsigned int index, u64 *dmac)
> -{
> -	void __iomem *base = priv->base;
> -
> -	/* Read destination MAC from specified MAC RX filter */
> -	*dmac = readq(base + MLXBF_GIGE_RX_MAC_FILTER +
> -		      (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
> -}
> -
> -static void mlxbf_gige_enable_promisc(struct mlxbf_gige *priv)
> -{
> -	void __iomem *base = priv->base;
> -	u64 control;
> -
> -	/* Enable MAC_ID_RANGE match functionality */
> -	control = readq(base + MLXBF_GIGE_CONTROL);
> -	control |= MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN;
> -	writeq(control, base + MLXBF_GIGE_CONTROL);
> -
> -	/* Set start of destination MAC range check to 0 */
> -	writeq(0, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START);
> -
> -	/* Set end of destination MAC range check to all FFs */
> -	writeq(0xFFFFFFFFFFFF, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END);
> -}
> -
> -static void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv)
> -{
> -	void __iomem *base = priv->base;
> -	u64 control;
> -
> -	/* Disable MAC_ID_RANGE match functionality */
> -	control = readq(base + MLXBF_GIGE_CONTROL);
> -	control &= ~MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN;
> -	writeq(control, base + MLXBF_GIGE_CONTROL);
> -
> -	/* NOTE: no need to change DMAC_RANGE_START or END;
> -	 * those values are ignored since MAC_ID_RANGE_EN=0
> -	 */
> -}
> +#define DRV_VERSION 1.19
>   
>   /* Allocate SKB whose payload pointer aligns with the Bluefield
>    * hardware DMA limitation, i.e. DMA operation can't cross
> @@ -88,9 +30,9 @@ static void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv)
>    * and then adjusts the headroom so that the SKB data pointer is
>    * naturally aligned to a 2KB boundary.
>    */
> -static struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv,
> -					    dma_addr_t *buf_dma,
> -					    enum dma_data_direction dir)
> +struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv,
> +				     dma_addr_t *buf_dma,
> +				     enum dma_data_direction dir)
>   {
>   	struct sk_buff *skb;
>   	u64 addr, offset;
> @@ -124,705 +66,27 @@ static struct sk_buff *mlxbf_gige_alloc_skb(struct mlxbf_gige *priv,
>   	return skb;
>   }
>   
> -/* Receive Initialization
> - * 1) Configures RX MAC filters via MMIO registers
> - * 2) Allocates RX WQE array using coherent DMA mapping
> - * 3) Initializes each element of RX WQE array with a receive
> - *    buffer pointer (also using coherent DMA mapping)
> - * 4) Allocates RX CQE array using coherent DMA mapping
> - * 5) Completes other misc receive initialization
> - */
> -static int mlxbf_gige_rx_init(struct mlxbf_gige *priv)
> -{
> -	size_t wq_size, cq_size;
> -	dma_addr_t *rx_wqe_ptr;
> -	dma_addr_t rx_buf_dma;
> -	u64 data;
> -	int i, j;
> -
> -	/* Configure MAC RX filter #0 to allow RX of broadcast pkts */
> -	mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX,
> -				     BCAST_MAC_ADDR);
> -
> -	wq_size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries;
> -	priv->rx_wqe_base = dma_alloc_coherent(priv->dev, wq_size,
> -					       &priv->rx_wqe_base_dma,
> -					       GFP_KERNEL);
> -	if (!priv->rx_wqe_base)
> -		return -ENOMEM;
> -
> -	/* Initialize 'rx_wqe_ptr' to point to first RX WQE in array
> -	 * Each RX WQE is simply a receive buffer pointer, so walk
> -	 * the entire array, allocating a 2KB buffer for each element
> -	 */
> -	rx_wqe_ptr = priv->rx_wqe_base;
> -
> -	for (i = 0; i < priv->rx_q_entries; i++) {
> -		priv->rx_skb[i] = mlxbf_gige_alloc_skb(priv, &rx_buf_dma, DMA_FROM_DEVICE);
> -		if (!priv->rx_skb[i])
> -			goto free_wqe_and_skb;
> -
> -		*rx_wqe_ptr++ = rx_buf_dma;
> -	}
> -
> -	/* Write RX WQE base address into MMIO reg */
> -	writeq(priv->rx_wqe_base_dma, priv->base + MLXBF_GIGE_RX_WQ_BASE);
> -
> -	cq_size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries;
> -	priv->rx_cqe_base = dma_alloc_coherent(priv->dev, cq_size,
> -					       &priv->rx_cqe_base_dma,
> -					       GFP_KERNEL);
> -	if (!priv->rx_cqe_base)
> -		goto free_wqe_and_skb;
> -
> -	/* Write RX CQE base address into MMIO reg */
> -	writeq(priv->rx_cqe_base_dma, priv->base + MLXBF_GIGE_RX_CQ_BASE);
> -
> -	/* Write RX_WQE_PI with current number of replenished buffers */
> -	writeq(priv->rx_q_entries, priv->base + MLXBF_GIGE_RX_WQE_PI);
> -
> -	/* Enable removal of CRC during RX */
> -	data = readq(priv->base + MLXBF_GIGE_RX);
> -	data |= MLXBF_GIGE_RX_STRIP_CRC_EN;
> -	writeq(data, priv->base + MLXBF_GIGE_RX);
> -
> -	/* Enable RX MAC filter pass and discard counters */
> -	writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN,
> -	       priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC);
> -	writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS_EN,
> -	       priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS);
> -
> -	/* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to
> -	 * indicate readiness to receive interrupts
> -	 */
> -	data = readq(priv->base + MLXBF_GIGE_INT_MASK);
> -	data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET;
> -	writeq(data, priv->base + MLXBF_GIGE_INT_MASK);
> -
> -	/* Enable RX DMA to write new packets to memory */
> -	writeq(MLXBF_GIGE_RX_DMA_EN, priv->base + MLXBF_GIGE_RX_DMA);
> -
> -	writeq(ilog2(priv->rx_q_entries),
> -	       priv->base + MLXBF_GIGE_RX_WQE_SIZE_LOG2);
> -
> -	return 0;
> -
> -free_wqe_and_skb:
> -	rx_wqe_ptr = priv->rx_wqe_base;
> -	for (j = 0; j < i; j++) {
> -		dma_unmap_single(priv->dev, *rx_wqe_ptr,
> -				 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
> -		dev_kfree_skb(priv->rx_skb[j]);
> -		rx_wqe_ptr++;
> -	}
> -	dma_free_coherent(priv->dev, wq_size,
> -			  priv->rx_wqe_base, priv->rx_wqe_base_dma);
> -	return -ENOMEM;
> -}
> -
> -/* Transmit Initialization
> - * 1) Allocates TX WQE array using coherent DMA mapping
> - * 2) Allocates TX completion counter using coherent DMA mapping
> - */
> -static int mlxbf_gige_tx_init(struct mlxbf_gige *priv)
> -{
> -	size_t size;
> -
> -	size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
> -	priv->tx_wqe_base = dma_alloc_coherent(priv->dev, size,
> -					       &priv->tx_wqe_base_dma,
> -					       GFP_KERNEL);
> -	if (!priv->tx_wqe_base)
> -		return -ENOMEM;
> -
> -	priv->tx_wqe_next = priv->tx_wqe_base;
> -
> -	/* Write TX WQE base address into MMIO reg */
> -	writeq(priv->tx_wqe_base_dma, priv->base + MLXBF_GIGE_TX_WQ_BASE);
> -
> -	/* Allocate address for TX completion count */
> -	priv->tx_cc = dma_alloc_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
> -					 &priv->tx_cc_dma, GFP_KERNEL);
> -	if (!priv->tx_cc) {
> -		dma_free_coherent(priv->dev, size,
> -				  priv->tx_wqe_base, priv->tx_wqe_base_dma);
> -		return -ENOMEM;
> -	}
> -
> -	/* Write TX CC base address into MMIO reg */
> -	writeq(priv->tx_cc_dma, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
> -
> -	writeq(ilog2(priv->tx_q_entries),
> -	       priv->base + MLXBF_GIGE_TX_WQ_SIZE_LOG2);
> -
> -	priv->prev_tx_ci = 0;
> -	priv->tx_pi = 0;
> -
> -	return 0;
> -}
> -
> -/* Receive Deinitialization
> - * This routine will free allocations done by mlxbf_gige_rx_init(),
> - * namely the RX WQE and RX CQE arrays, as well as all RX buffers
> - */
> -static void mlxbf_gige_rx_deinit(struct mlxbf_gige *priv)
> -{
> -	dma_addr_t *rx_wqe_ptr;
> -	size_t size;
> -	int i;
> -
> -	rx_wqe_ptr = priv->rx_wqe_base;
> -
> -	for (i = 0; i < priv->rx_q_entries; i++) {
> -		dma_unmap_single(priv->dev, *rx_wqe_ptr, MLXBF_GIGE_DEFAULT_BUF_SZ,
> -				 DMA_FROM_DEVICE);
> -		dev_kfree_skb(priv->rx_skb[i]);
> -		rx_wqe_ptr++;
> -	}
> -
> -	size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries;
> -	dma_free_coherent(priv->dev, size,
> -			  priv->rx_wqe_base, priv->rx_wqe_base_dma);
> -
> -	size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries;
> -	dma_free_coherent(priv->dev, size,
> -			  priv->rx_cqe_base, priv->rx_cqe_base_dma);
> -
> -	priv->rx_wqe_base = NULL;
> -	priv->rx_wqe_base_dma = 0;
> -	priv->rx_cqe_base = NULL;
> -	priv->rx_cqe_base_dma = 0;
> -	writeq(0, priv->base + MLXBF_GIGE_RX_WQ_BASE);
> -	writeq(0, priv->base + MLXBF_GIGE_RX_CQ_BASE);
> -}
> -
> -/* Transmit Deinitialization
> - * This routine will free allocations done by mlxbf_gige_tx_init(),
> - * namely the TX WQE array and the TX completion counter
> - */
> -static void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv)
> -{
> -	u64 *tx_wqe_addr;
> -	size_t size;
> -	int i;
> -
> -	tx_wqe_addr = priv->tx_wqe_base;
> -
> -	for (i = 0; i < priv->tx_q_entries; i++) {
> -		if (priv->tx_skb[i]) {
> -			dma_unmap_single(priv->dev, *tx_wqe_addr,
> -					 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_TO_DEVICE);
> -			dev_kfree_skb(priv->tx_skb[i]);
> -			priv->tx_skb[i] = NULL;
> -		}
> -		tx_wqe_addr += 2;
> -	}
> -
> -	size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
> -	dma_free_coherent(priv->dev, size,
> -			  priv->tx_wqe_base, priv->tx_wqe_base_dma);
> -
> -	dma_free_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
> -			  priv->tx_cc, priv->tx_cc_dma);
> -
> -	priv->tx_wqe_base = NULL;
> -	priv->tx_wqe_base_dma = 0;
> -	priv->tx_cc = NULL;
> -	priv->tx_cc_dma = 0;
> -	priv->tx_wqe_next = NULL;
> -	writeq(0, priv->base + MLXBF_GIGE_TX_WQ_BASE);
> -	writeq(0, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
> -}
> -
> -/* Start of struct ethtool_ops functions */
> -static int mlxbf_gige_get_regs_len(struct net_device *netdev)
> -{
> -	return MLXBF_GIGE_MMIO_REG_SZ;
> -}
> -
> -static void mlxbf_gige_get_regs(struct net_device *netdev,
> -				struct ethtool_regs *regs, void *p)
> -{
> -	struct mlxbf_gige *priv = netdev_priv(netdev);
> -
> -	regs->version = MLXBF_GIGE_REGS_VERSION;
> -
> -	/* Read entire MMIO register space and store results
> -	 * into the provided buffer. Each 64-bit word is converted
> -	 * to big-endian to make the output more readable.
> -	 *
> -	 * NOTE: by design, a read to an offset without an existing
> -	 *       register will be acknowledged and return zero.
> -	 */
> -	memcpy_fromio(p, priv->base, MLXBF_GIGE_MMIO_REG_SZ);
> -}
> -
> -static void mlxbf_gige_get_ringparam(struct net_device *netdev,
> -				     struct ethtool_ringparam *ering)
> -{
> -	struct mlxbf_gige *priv = netdev_priv(netdev);
> -
> -	ering->rx_max_pending = MLXBF_GIGE_MAX_RXQ_SZ;
> -	ering->tx_max_pending = MLXBF_GIGE_MAX_TXQ_SZ;
> -	ering->rx_pending = priv->rx_q_entries;
> -	ering->tx_pending = priv->tx_q_entries;
> -}
> -
> -static int mlxbf_gige_set_ringparam(struct net_device *netdev,
> -				    struct ethtool_ringparam *ering)
> -{
> -	const struct net_device_ops *ops = netdev->netdev_ops;
> -	struct mlxbf_gige *priv = netdev_priv(netdev);
> -	int new_rx_q_entries, new_tx_q_entries;
> -
> -	/* Device does not have separate queues for small/large frames */
> -	if (ering->rx_mini_pending || ering->rx_jumbo_pending)
> -		return -EINVAL;
> -
> -	/* Round up to supported values */
> -	new_rx_q_entries = roundup_pow_of_two(ering->rx_pending);
> -	new_tx_q_entries = roundup_pow_of_two(ering->tx_pending);
> -
> -	/* Check against min values, core checks against max values */
> -	if (new_tx_q_entries < MLXBF_GIGE_MIN_TXQ_SZ ||
> -	    new_rx_q_entries < MLXBF_GIGE_MIN_RXQ_SZ)
> -		return -EINVAL;
> -
> -	/* If queue sizes did not change, exit now */
> -	if (new_rx_q_entries == priv->rx_q_entries &&
> -	    new_tx_q_entries == priv->tx_q_entries)
> -		return 0;
> -
> -	if (netif_running(netdev))
> -		ops->ndo_stop(netdev);
> -
> -	priv->rx_q_entries = new_rx_q_entries;
> -	priv->tx_q_entries = new_tx_q_entries;
> -
> -	if (netif_running(netdev))
> -		ops->ndo_open(netdev);
> -
> -	return 0;
> -}
> -
> -static const struct {
> -	const char string[ETH_GSTRING_LEN];
> -} mlxbf_gige_ethtool_stats_keys[] = {
> -	{ "hw_access_errors" },
> -	{ "tx_invalid_checksums" },
> -	{ "tx_small_frames" },
> -	{ "tx_index_errors" },
> -	{ "sw_config_errors" },
> -	{ "sw_access_errors" },
> -	{ "rx_truncate_errors" },
> -	{ "rx_mac_errors" },
> -	{ "rx_din_dropped_pkts" },
> -	{ "tx_fifo_full" },
> -	{ "rx_filter_passed_pkts" },
> -	{ "rx_filter_discard_pkts" },
> -};
> -
> -static int mlxbf_gige_get_sset_count(struct net_device *netdev, int stringset)
> -{
> -	if (stringset != ETH_SS_STATS)
> -		return -EOPNOTSUPP;
> -	return ARRAY_SIZE(mlxbf_gige_ethtool_stats_keys);
> -}
> -
> -static void mlxbf_gige_get_strings(struct net_device *netdev, u32 stringset,
> -				   u8 *buf)
> -{
> -	if (stringset != ETH_SS_STATS)
> -		return;
> -	memcpy(buf, &mlxbf_gige_ethtool_stats_keys,
> -	       sizeof(mlxbf_gige_ethtool_stats_keys));
> -}
> -
> -static void mlxbf_gige_get_ethtool_stats(struct net_device *netdev,
> -					 struct ethtool_stats *estats,
> -					 u64 *data)
> -{
> -	struct mlxbf_gige *priv = netdev_priv(netdev);
> -
> -	/* Fill data array with interface statistics
> -	 *
> -	 * NOTE: the data writes must be in
> -	 *       sync with the strings shown in
> -	 *       the mlxbf_gige_ethtool_stats_keys[] array
> -	 *
> -	 * NOTE2: certain statistics below are zeroed upon
> -	 *        port disable, so the calculation below
> -	 *        must include the "cached" value of the stat
> -	 *        plus the value read directly from hardware.
> -	 *        Cached statistics are currently:
> -	 *          rx_din_dropped_pkts
> -	 *          rx_filter_passed_pkts
> -	 *          rx_filter_discard_pkts
> -	 */
> -	*data++ = priv->stats.hw_access_errors;
> -	*data++ = priv->stats.tx_invalid_checksums;
> -	*data++ = priv->stats.tx_small_frames;
> -	*data++ = priv->stats.tx_index_errors;
> -	*data++ = priv->stats.sw_config_errors;
> -	*data++ = priv->stats.sw_access_errors;
> -	*data++ = priv->stats.rx_truncate_errors;
> -	*data++ = priv->stats.rx_mac_errors;
> -	*data++ = (priv->stats.rx_din_dropped_pkts +
> -		   readq(priv->base + MLXBF_GIGE_RX_DIN_DROP_COUNTER));
> -	*data++ = priv->stats.tx_fifo_full;
> -	*data++ = (priv->stats.rx_filter_passed_pkts +
> -		   readq(priv->base + MLXBF_GIGE_RX_PASS_COUNTER_ALL));
> -	*data++ = (priv->stats.rx_filter_discard_pkts +
> -		   readq(priv->base + MLXBF_GIGE_RX_DISC_COUNTER_ALL));
> -}
> -
> -static void mlxbf_gige_get_pauseparam(struct net_device *netdev,
> -				      struct ethtool_pauseparam *pause)
> -{
> -	struct mlxbf_gige *priv = netdev_priv(netdev);
> -
> -	pause->autoneg = priv->aneg_pause;
> -	pause->rx_pause = priv->tx_pause;
> -	pause->tx_pause = priv->rx_pause;
> -}
> -
> -static const struct ethtool_ops mlxbf_gige_ethtool_ops = {
> -	.get_link		= ethtool_op_get_link,
> -	.get_ringparam		= mlxbf_gige_get_ringparam,
> -	.set_ringparam		= mlxbf_gige_set_ringparam,
> -	.get_regs_len           = mlxbf_gige_get_regs_len,
> -	.get_regs               = mlxbf_gige_get_regs,
> -	.get_strings            = mlxbf_gige_get_strings,
> -	.get_sset_count         = mlxbf_gige_get_sset_count,
> -	.get_ethtool_stats      = mlxbf_gige_get_ethtool_stats,
> -	.nway_reset		= phy_ethtool_nway_reset,
> -	.get_pauseparam		= mlxbf_gige_get_pauseparam,
> -	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
> -};
> -
> -/* Start of struct net_device_ops functions */
> -static irqreturn_t mlxbf_gige_error_intr(int irq, void *dev_id)
> -{
> -	struct mlxbf_gige *priv;
> -	u64 int_status;
> -
> -	priv = dev_id;
> -
> -	priv->error_intr_count++;
> -
> -	int_status = readq(priv->base + MLXBF_GIGE_INT_STATUS);
> -
> -	if (int_status & MLXBF_GIGE_INT_STATUS_HW_ACCESS_ERROR)
> -		priv->stats.hw_access_errors++;
> -
> -	if (int_status & MLXBF_GIGE_INT_STATUS_TX_CHECKSUM_INPUTS) {
> -		priv->stats.tx_invalid_checksums++;
> -		/* This error condition is latched into MLXBF_GIGE_INT_STATUS
> -		 * when the GigE silicon operates on the offending
> -		 * TX WQE. The write to MLXBF_GIGE_INT_STATUS at the bottom
> -		 * of this routine clears this error condition.
> -		 */
> -	}
> -
> -	if (int_status & MLXBF_GIGE_INT_STATUS_TX_SMALL_FRAME_SIZE) {
> -		priv->stats.tx_small_frames++;
> -		/* This condition happens when the networking stack invokes
> -		 * this driver's "start_xmit()" method with a packet whose
> -		 * size < 60 bytes.  The GigE silicon will automatically pad
> -		 * this small frame up to a minimum-sized frame before it is
> -		 * sent. The "tx_small_frame" condition is latched into the
> -		 * MLXBF_GIGE_INT_STATUS register when the GigE silicon
> -		 * operates on the offending TX WQE. The write to
> -		 * MLXBF_GIGE_INT_STATUS at the bottom of this routine
> -		 * clears this condition.
> -		 */
> -	}
> -
> -	if (int_status & MLXBF_GIGE_INT_STATUS_TX_PI_CI_EXCEED_WQ_SIZE)
> -		priv->stats.tx_index_errors++;
> -
> -	if (int_status & MLXBF_GIGE_INT_STATUS_SW_CONFIG_ERROR)
> -		priv->stats.sw_config_errors++;
> -
> -	if (int_status & MLXBF_GIGE_INT_STATUS_SW_ACCESS_ERROR)
> -		priv->stats.sw_access_errors++;
> -
> -	/* Clear all error interrupts by writing '1' back to
> -	 * all the asserted bits in INT_STATUS.  Do not write
> -	 * '1' back to 'receive packet' bit, since that is
> -	 * managed separately.
> -	 */
> -
> -	int_status &= ~MLXBF_GIGE_INT_STATUS_RX_RECEIVE_PACKET;
> -
> -	writeq(int_status, priv->base + MLXBF_GIGE_INT_STATUS);
> -
> -	return IRQ_HANDLED;
> -}
> -
> -static irqreturn_t mlxbf_gige_rx_intr(int irq, void *dev_id)
> -{
> -	struct mlxbf_gige *priv;
> -
> -	priv = dev_id;
> -
> -	priv->rx_intr_count++;
> -
> -	/* NOTE: GigE silicon automatically disables "packet rx" interrupt by
> -	 *       setting MLXBF_GIGE_INT_MASK bit0 upon triggering the interrupt
> -	 *       to the ARM cores.  Software needs to re-enable "packet rx"
> -	 *       interrupts by clearing MLXBF_GIGE_INT_MASK bit0.
> -	 */
> -
> -	napi_schedule(&priv->napi);
> -
> -	return IRQ_HANDLED;
> -}
> -
> -static irqreturn_t mlxbf_gige_llu_plu_intr(int irq, void *dev_id)
> -{
> -	struct mlxbf_gige *priv;
> -
> -	priv = dev_id;
> -	priv->llu_plu_intr_count++;
> -
> -	return IRQ_HANDLED;
> -}
> -
> -/* Function that returns status of TX ring:
> - *          0: TX ring is full, i.e. there are no
> - *             available un-used entries in TX ring.
> - *   non-null: TX ring is not full, i.e. there are
> - *             some available entries in TX ring.
> - *             The non-null value is a measure of
> - *             how many TX entries are available, but
> - *             it is not the exact number of available
> - *             entries (see below).
> - *
> - * The algorithm makes the assumption that if
> - * (prev_tx_ci == tx_pi) then the TX ring is empty.
> - * An empty ring actually has (tx_q_entries-1)
> - * entries, which allows the algorithm to differentiate
> - * the case of an empty ring vs. a full ring.
> - */
> -static u16 mlxbf_gige_tx_buffs_avail(struct mlxbf_gige *priv)
> -{
> -	unsigned long flags;
> -	u16 avail;
> -
> -	spin_lock_irqsave(&priv->lock, flags);
> -
> -	if (priv->prev_tx_ci == priv->tx_pi)
> -		avail = priv->tx_q_entries - 1;
> -	else
> -		avail = ((priv->tx_q_entries + priv->prev_tx_ci - priv->tx_pi)
> -			  % priv->tx_q_entries) - 1;
> -
> -	spin_unlock_irqrestore(&priv->lock, flags);
> -
> -	return avail;
> -}
> -
> -static bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv)
> -{
> -	struct net_device_stats *stats;
> -	u16 tx_wqe_index;
> -	u64 *tx_wqe_addr;
> -	u64 tx_status;
> -	u16 tx_ci;
> -
> -	tx_status = readq(priv->base + MLXBF_GIGE_TX_STATUS);
> -	if (tx_status & MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL)
> -		priv->stats.tx_fifo_full++;
> -	tx_ci = readq(priv->base + MLXBF_GIGE_TX_CONSUMER_INDEX);
> -	stats = &priv->netdev->stats;
> -
> -	/* Transmit completion logic needs to loop until the completion
> -	 * index (in SW) equals TX consumer index (from HW).  These
> -	 * parameters are unsigned 16-bit values and the wrap case needs
> -	 * to be supported, that is TX consumer index wrapped from 0xFFFF
> -	 * to 0 while TX completion index is still < 0xFFFF.
> -	 */
> -	for (; priv->prev_tx_ci != tx_ci; priv->prev_tx_ci++) {
> -		tx_wqe_index = priv->prev_tx_ci % priv->tx_q_entries;
> -		/* Each TX WQE is 16 bytes. The 8 MSB store the 2KB TX
> -		 * buffer address and the 8 LSB contain information
> -		 * about the TX WQE.
> -		 */
> -		tx_wqe_addr = priv->tx_wqe_base +
> -			       (tx_wqe_index * MLXBF_GIGE_TX_WQE_SZ_QWORDS);
> -
> -		stats->tx_packets++;
> -		stats->tx_bytes += MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr);
> -
> -		dma_unmap_single(priv->dev, *tx_wqe_addr,
> -				 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_TO_DEVICE);
> -		dev_consume_skb_any(priv->tx_skb[tx_wqe_index]);
> -		priv->tx_skb[tx_wqe_index] = NULL;
> -	}
> -
> -	/* Since the TX ring was likely just drained, check if TX queue
> -	 * had previously been stopped and now that there are TX buffers
> -	 * available the TX queue can be awakened.
> -	 */
> -	if (netif_queue_stopped(priv->netdev) &&
> -	    mlxbf_gige_tx_buffs_avail(priv))
> -		netif_wake_queue(priv->netdev);
> -
> -	return true;
> -}
> -
> -static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
> -{
> -	struct net_device *netdev = priv->netdev;
> -	u16 rx_pi_rem, rx_ci_rem;
> -	dma_addr_t rx_buf_dma;
> -	struct sk_buff *skb;
> -	u64 *rx_cqe_addr;
> -	u64 *rx_wqe_addr;
> -	u64 datalen;
> -	u64 rx_cqe;
> -	u16 rx_ci;
> -	u16 rx_pi;
> -
> -	/* Index into RX buffer array is rx_pi w/wrap based on RX_CQE_SIZE */
> -	rx_pi = readq(priv->base + MLXBF_GIGE_RX_WQE_PI);
> -	rx_pi_rem = rx_pi % priv->rx_q_entries;
> -	rx_wqe_addr = priv->rx_wqe_base + rx_pi_rem;
> -	dma_unmap_single(priv->dev, *rx_wqe_addr,
> -			 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
> -	rx_cqe_addr = priv->rx_cqe_base + rx_pi_rem;
> -	rx_cqe = *rx_cqe_addr;
> -
> -	if ((rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MASK) == 0) {
> -		/* Packet is OK, increment stats */
> -		datalen = rx_cqe & MLXBF_GIGE_RX_CQE_PKT_LEN_MASK;
> -		netdev->stats.rx_packets++;
> -		netdev->stats.rx_bytes += datalen;
> -
> -		skb = priv->rx_skb[rx_pi_rem];
> -
> -		skb_put(skb, datalen);
> -
> -		skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */
> -
> -		skb->protocol = eth_type_trans(skb, netdev);
> -		netif_receive_skb(skb);
> -
> -		/* Alloc another RX SKB for this same index */
> -		priv->rx_skb[rx_pi_rem] = mlxbf_gige_alloc_skb(priv, &rx_buf_dma,
> -							       DMA_FROM_DEVICE);
> -		if (!priv->rx_skb[rx_pi_rem]) {
> -			netdev->stats.rx_dropped++;
> -			return false;
> -		}
> -
> -		*rx_wqe_addr = rx_buf_dma;
> -	} else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) {
> -		priv->stats.rx_mac_errors++;
> -	} else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_TRUNCATED) {
> -		priv->stats.rx_truncate_errors++;
> -	}
> -
> -	/* Let hardware know we've replenished one buffer */
> -	rx_pi++;
> -	writeq(rx_pi, priv->base + MLXBF_GIGE_RX_WQE_PI);
> -
> -	(*rx_pkts)++;
> -
> -	rx_pi_rem = rx_pi % priv->rx_q_entries;
> -	rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI);
> -	rx_ci_rem = rx_ci % priv->rx_q_entries;
> -
> -	return rx_pi_rem != rx_ci_rem;
> -}
> -
> -/* Driver poll() function called by NAPI infrastructure */
> -static int mlxbf_gige_poll(struct napi_struct *napi, int budget)
> +static void mlxbf_gige_initial_mac(struct mlxbf_gige *priv)
>   {
> -	struct mlxbf_gige *priv;
> -	bool remaining_pkts;
> -	int work_done = 0;
> -	u64 data;
> -
> -	priv = container_of(napi, struct mlxbf_gige, napi);
> -
> -	mlxbf_gige_handle_tx_complete(priv);
> +	u8 mac[ETH_ALEN];
> +	u64 local_mac;
>   
> -	do {
> -		remaining_pkts = mlxbf_gige_rx_packet(priv, &work_done);
> -	} while (remaining_pkts && work_done < budget);
> +	mlxbf_gige_get_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX,
> +				     &local_mac);
> +	u64_to_ether_addr(local_mac, mac);
>   
> -	/* If amount of work done < budget, turn off NAPI polling
> -	 * via napi_complete_done(napi, work_done) and then
> -	 * re-enable interrupts.
> -	 */
> -	if (work_done < budget && napi_complete_done(napi, work_done)) {
> -		/* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to
> -		 * indicate receive readiness
> +	if (is_valid_ether_addr(mac)) {
> +		ether_addr_copy(priv->netdev->dev_addr, mac);
> +	} else {
> +		/* Provide a random MAC if for some reason the device has
> +		 * not been configured with a valid MAC address already.
>   		 */
> -		data = readq(priv->base + MLXBF_GIGE_INT_MASK);
> -		data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET;
> -		writeq(data, priv->base + MLXBF_GIGE_INT_MASK);
> -	}
> -
> -	return work_done;
> -}
> -
> -static int mlxbf_gige_request_irqs(struct mlxbf_gige *priv)
> -{
> -	int err;
> -
> -	err = request_irq(priv->error_irq, mlxbf_gige_error_intr, 0,
> -			  "mlxbf_gige_error", priv);
> -	if (err) {
> -		dev_err(priv->dev, "Request error_irq failure\n");
> -		return err;
> -	}
> -
> -	err = request_irq(priv->rx_irq, mlxbf_gige_rx_intr, 0,
> -			  "mlxbf_gige_rx", priv);
> -	if (err) {
> -		dev_err(priv->dev, "Request rx_irq failure\n");
> -		goto free_error_irq;
> -	}
> -
> -	err = request_irq(priv->llu_plu_irq, mlxbf_gige_llu_plu_intr, 0,
> -			  "mlxbf_gige_llu_plu", priv);
> -	if (err) {
> -		dev_err(priv->dev, "Request llu_plu_irq failure\n");
> -		goto free_rx_irq;
> -	}
> -
> -	err = request_threaded_irq(priv->phy_irq, NULL,
> -				   mlxbf_gige_mdio_handle_phy_interrupt,
> -				   IRQF_ONESHOT | IRQF_SHARED,
> -				   "mlxbf_gige_phy", priv);
> -	if (err) {
> -		dev_err(priv->dev, "Request phy_irq failure\n");
> -		goto free_llu_plu_irq;
> +		eth_hw_addr_random(priv->netdev);
>   	}
>   
> -	return 0;
> -
> -free_llu_plu_irq:
> -	free_irq(priv->llu_plu_irq, priv);
> -
> -free_rx_irq:
> -	free_irq(priv->rx_irq, priv);
> -
> -free_error_irq:
> -	free_irq(priv->error_irq, priv);
> -
> -	return err;
> -}
> -
> -static void mlxbf_gige_free_irqs(struct mlxbf_gige *priv)
> -{
> -	free_irq(priv->error_irq, priv);
> -	free_irq(priv->rx_irq, priv);
> -	free_irq(priv->llu_plu_irq, priv);
> -	free_irq(priv->phy_irq, priv);
> +	local_mac = ether_addr_to_u64(priv->netdev->dev_addr);
> +	mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX,
> +				     local_mac);
>   }
>   
>   static void mlxbf_gige_cache_stats(struct mlxbf_gige *priv)
> @@ -862,38 +126,6 @@ static int mlxbf_gige_clean_port(struct mlxbf_gige *priv)
>   	return err;
>   }
>   
> -static int mlxbf_gige_phy_enable_interrupt(struct phy_device *phydev)
> -{
> -	int err = 0;
> -
> -	if (phydev->drv->ack_interrupt)
> -		err = phydev->drv->ack_interrupt(phydev);
> -	if (err < 0)
> -		return err;
> -
> -	phydev->interrupts = PHY_INTERRUPT_ENABLED;
> -	if (phydev->drv->config_intr)
> -		err = phydev->drv->config_intr(phydev);
> -
> -	return err;
> -}
> -
> -static int mlxbf_gige_phy_disable_interrupt(struct phy_device *phydev)
> -{
> -	int err = 0;
> -
> -	if (phydev->drv->ack_interrupt)
> -		err = phydev->drv->ack_interrupt(phydev);
> -	if (err < 0)
> -		return err;
> -
> -	phydev->interrupts = PHY_INTERRUPT_DISABLED;
> -	if (phydev->drv->config_intr)
> -		err = phydev->drv->config_intr(phydev);
> -
> -	return err;
> -}
> -
>   static int mlxbf_gige_open(struct net_device *netdev)
>   {
>   	struct mlxbf_gige *priv = netdev_priv(netdev);
> @@ -916,14 +148,6 @@ static int mlxbf_gige_open(struct net_device *netdev)
>   		return err;
>   
>   	phy_start(phydev);
> -	/* Always make sure interrupts are enabled since phy_start calls
> -	 * __phy_resume which may reset the PHY interrupt control reg.
> -	 * __phy_resume only reenables the interrupts if
> -	 * phydev->irq != IRQ_IGNORE_INTERRUPT.
> -	 */
> -	err = mlxbf_gige_phy_enable_interrupt(phydev);
> -	if (err)
> -		return err;
>   
>   	netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll, NAPI_POLL_WEIGHT);
>   	napi_enable(&priv->napi);
> @@ -953,7 +177,6 @@ static int mlxbf_gige_stop(struct net_device *netdev)
>   	mlxbf_gige_free_irqs(priv);
>   
>   	phy_stop(netdev->phydev);
> -	mlxbf_gige_phy_disable_interrupt(netdev->phydev);
>   
>   	mlxbf_gige_rx_deinit(priv);
>   	mlxbf_gige_tx_deinit(priv);
> @@ -963,110 +186,6 @@ static int mlxbf_gige_stop(struct net_device *netdev)
>   	return 0;
>   }
>   
> -/* Function to advance the tx_wqe_next pointer to next TX WQE */
> -static void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv)
> -{
> -	/* Advance tx_wqe_next pointer */
> -	priv->tx_wqe_next += MLXBF_GIGE_TX_WQE_SZ_QWORDS;
> -
> -	/* Check if 'next' pointer is beyond end of TX ring */
> -	/* If so, set 'next' back to 'base' pointer of ring */
> -	if (priv->tx_wqe_next == (priv->tx_wqe_base +
> -				  (priv->tx_q_entries * MLXBF_GIGE_TX_WQE_SZ_QWORDS)))
> -		priv->tx_wqe_next = priv->tx_wqe_base;
> -}
> -
> -static netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb,
> -					 struct net_device *netdev)
> -{
> -	struct mlxbf_gige *priv = netdev_priv(netdev);
> -	u64 buff_addr, start_dma_page, end_dma_page;
> -	struct sk_buff *tx_skb;
> -	dma_addr_t tx_buf_dma;
> -	u64 *tx_wqe_addr;
> -	u64 word2;
> -
> -	/* If needed, linearize TX SKB as hardware DMA expects this */
> -	if (skb_linearize(skb)) {
> -		dev_kfree_skb(skb);
> -		netdev->stats.tx_dropped++;
> -		return NET_XMIT_DROP;
> -	}
> -
> -	buff_addr = (u64)skb->data;
> -	start_dma_page = buff_addr >> MLXBF_GIGE_DMA_PAGE_SHIFT;
> -	end_dma_page   = (buff_addr + skb->len - 1) >> MLXBF_GIGE_DMA_PAGE_SHIFT;
> -
> -	/* Verify that payload pointer and data length of SKB to be
> -	 * transmitted does not violate the hardware DMA limitation.
> -	 */
> -	if (start_dma_page != end_dma_page) {
> -		/* DMA operation would fail as-is, alloc new aligned SKB */
> -		tx_skb = mlxbf_gige_alloc_skb(priv, &tx_buf_dma, DMA_TO_DEVICE);
> -		if (!tx_skb) {
> -			/* Free original skb, could not alloc new aligned SKB */
> -			dev_kfree_skb(skb);
> -			netdev->stats.tx_dropped++;
> -			return NET_XMIT_DROP;
> -		}
> -
> -		skb_put_data(tx_skb, skb->data, skb->len);
> -		dev_kfree_skb(skb);
> -	} else {
> -		tx_skb = skb;
> -		tx_buf_dma = dma_map_single(priv->dev, skb->data,
> -					    MLXBF_GIGE_DEFAULT_BUF_SZ,
> -					    DMA_TO_DEVICE);
> -		if (dma_mapping_error(priv->dev, tx_buf_dma)) {
> -			dev_kfree_skb(skb);
> -			netdev->stats.tx_dropped++;
> -			return NET_XMIT_DROP;
> -		}
> -	}
> -
> -	priv->tx_skb[priv->tx_pi % priv->tx_q_entries] = tx_skb;
> -
> -	/* Get address of TX WQE */
> -	tx_wqe_addr = priv->tx_wqe_next;
> -
> -	mlxbf_gige_update_tx_wqe_next(priv);
> -
> -	/* Put PA of buffer address into first 64-bit word of TX WQE */
> -	*tx_wqe_addr = tx_buf_dma;
> -
> -	/* Set TX WQE pkt_len appropriately
> -	 * NOTE: GigE silicon will automatically pad up to
> -	 *       minimum packet length if needed.
> -	 */
> -	word2 = tx_skb->len & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK;
> -
> -	/* Write entire 2nd word of TX WQE */
> -	*(tx_wqe_addr + 1) = word2;
> -
> -	priv->tx_pi++;
> -
> -	if (!netdev_xmit_more()) {
> -		/* Create memory barrier before write to TX PI */
> -		wmb();
> -		writeq(priv->tx_pi, priv->base + MLXBF_GIGE_TX_PRODUCER_INDEX);
> -	}
> -
> -	/* Check if the last TX entry was just used */
> -	if (!mlxbf_gige_tx_buffs_avail(priv)) {
> -		/* TX ring is full, inform stack */
> -		netif_stop_queue(netdev);
> -
> -		/* Since there is no separate "TX complete" interrupt, need
> -		 * to explicitly schedule NAPI poll.  This will trigger logic
> -		 * which processes TX completions, and will hopefully drain
> -		 * the TX ring allowing the TX queue to be awakened.
> -		 */
> -		napi_schedule(&priv->napi);
> -	}
> -
> -	return NETDEV_TX_OK;
> -}
> -
>   static int mlxbf_gige_do_ioctl(struct net_device *netdev,
>   			       struct ifreq *ifr, int cmd)
>   {
> @@ -1093,8 +212,8 @@ static void mlxbf_gige_set_rx_mode(struct net_device *netdev)
>   			mlxbf_gige_enable_promisc(priv);
>   		else
>   			mlxbf_gige_disable_promisc(priv);
> -		}
> -	}
> +        }
> +}
>   
>   static void mlxbf_gige_get_stats64(struct net_device *netdev,
>   				   struct rtnl_link_stats64 *stats)
> @@ -1104,7 +223,8 @@ static void mlxbf_gige_get_stats64(struct net_device *netdev,
>   	netdev_stats_to_stats64(stats, &netdev->stats);
>   
>   	stats->rx_length_errors = priv->stats.rx_truncate_errors;
> -	stats->rx_fifo_errors = priv->stats.rx_din_dropped_pkts;
> +	stats->rx_fifo_errors = priv->stats.rx_din_dropped_pkts +
> +		                readq(priv->base + MLXBF_GIGE_RX_DIN_DROP_COUNTER);
>   	stats->rx_crc_errors = priv->stats.rx_mac_errors;
>   	stats->rx_errors = stats->rx_length_errors +
>   			   stats->rx_fifo_errors +
> @@ -1125,29 +245,6 @@ static const struct net_device_ops mlxbf_gige_netdev_ops = {
>   	.ndo_get_stats64        = mlxbf_gige_get_stats64,
>   };
>   
> -static void mlxbf_gige_initial_mac(struct mlxbf_gige *priv)
> -{
> -	u8 mac[ETH_ALEN];
> -	u64 local_mac;
> -
> -	mlxbf_gige_get_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX,
> -				     &local_mac);
> -	u64_to_ether_addr(local_mac, mac);
> -
> -	if (is_valid_ether_addr(mac)) {
> -		ether_addr_copy(priv->netdev->dev_addr, mac);
> -	} else {
> -		/* Provide a random MAC if for some reason the device has
> -		 * not been configured with a valid MAC address already.
> -		 */
> -		eth_hw_addr_random(priv->netdev);
> -	}
> -
> -	local_mac = ether_addr_to_u64(priv->netdev->dev_addr);
> -	mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX,
> -				     local_mac);
> -}
> -
>   static void mlxbf_gige_adjust_link(struct net_device *netdev)
>   {
>   	/* Only one speed and one duplex supported, simply return */
> @@ -1155,6 +252,7 @@ static void mlxbf_gige_adjust_link(struct net_device *netdev)
>   
>   static int mlxbf_gige_probe(struct platform_device *pdev)
>   {
> +	unsigned int phy_int_gpio;
>   	struct phy_device *phydev;
>   	struct net_device *netdev;
>   	struct resource *mac_res;
> @@ -1164,9 +262,20 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
>   	void __iomem *llu_base;
>   	void __iomem *plu_base;
>   	void __iomem *base;
> +	int addr, version;
>   	u64 control;
>   	int err = 0;
> -	int addr;
> +
> +	if (device_property_read_u32(&pdev->dev, "version", &version)) {
> +		dev_err(&pdev->dev, "Version Info not found\n");
> +		return -EINVAL;
> +	}
> +
> +	if (version != (int)DRV_VERSION) {
> +		dev_err(&pdev->dev, "Version Mismatch. Expected %d Returned %d\n",
> +			(int)DRV_VERSION, version);
> +		return -EINVAL;
> +	}
>   
>   	mac_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MAC);
>   	if (!mac_res)
> @@ -1232,20 +341,31 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
>   	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
>   	if (err) {
>   		dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err);
> +		mlxbf_gige_mdio_remove(priv);
>   		return err;
>   	}
>   
>   	priv->error_irq = platform_get_irq(pdev, MLXBF_GIGE_ERROR_INTR_IDX);
>   	priv->rx_irq = platform_get_irq(pdev, MLXBF_GIGE_RECEIVE_PKT_INTR_IDX);
>   	priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX);
> -	priv->phy_irq = platform_get_irq(pdev, MLXBF_GIGE_PHY_INT_N);
>   
> +	err = device_property_read_u32(&pdev->dev, "phy-int-gpio", &phy_int_gpio);
> +	if (err < 0)
> +		phy_int_gpio = MLXBF_GIGE_DEFAULT_PHY_INT_GPIO;
> +
> +	priv->phy_irq = irq_find_mapping(NULL, phy_int_gpio);
> +	if (priv->phy_irq == 0) {
> +		mlxbf_gige_mdio_remove(priv);
> +		return -ENODEV;
> +	}
>   	phydev = phy_find_first(priv->mdiobus);
> -	if (!phydev)
> +	if (!phydev) {
> +		mlxbf_gige_mdio_remove(priv);
>   		return -ENODEV;
> +	}
>   
>   	addr = phydev->mdio.addr;
> -	phydev->irq = priv->mdiobus->irq[addr] = PHY_IGNORE_INTERRUPT;
> +	phydev->irq = priv->mdiobus->irq[addr] = priv->phy_irq;
>   
>   	/* Sets netdev->phydev to phydev; which will eventually
>   	 * be used in ioctl calls.
> @@ -1256,6 +376,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
>   				 PHY_INTERFACE_MODE_GMII);
>   	if (err) {
>   		dev_err(&pdev->dev, "Could not attach to PHY\n");
> +		mlxbf_gige_mdio_remove(priv);
>   		return err;
>   	}
>   
> @@ -1281,6 +402,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
>   	if (err) {
>   		dev_err(&pdev->dev, "Failed to register netdev\n");
>   		phy_disconnect(phydev);
> +		mlxbf_gige_mdio_remove(priv);
>   		return err;
>   	}
>   
> @@ -1325,8 +447,9 @@ static struct platform_driver mlxbf_gige_driver = {
>   
>   module_platform_driver(mlxbf_gige_driver);
>   
> +MODULE_SOFTDEP("pre: gpio_mlxbf2");
>   MODULE_DESCRIPTION("Mellanox BlueField SoC Gigabit Ethernet Driver");
>   MODULE_AUTHOR("David Thompson <davthompson at nvidia.com>");
>   MODULE_AUTHOR("Asmaa Mnebhi <asmaa at nvidia.com>");
>   MODULE_LICENSE("Dual BSD/GPL");
> -MODULE_VERSION(DRV_VERSION);
> +MODULE_VERSION(__stringify(DRV_VERSION));
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
> index 636e19c..af4a754 100644
> --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
> @@ -2,7 +2,7 @@
>   
>   /* MDIO support for Mellanox Gigabit Ethernet driver
>    *
> - * Copyright (c) 2020 NVIDIA Corporation.
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
>    */
>   
>   #include <linux/acpi.h>
> @@ -68,17 +68,10 @@
>   				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO3_3_MASK, 1) | \
>   				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_FULL_DRIVE_MASK, 1) | \
>   				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDC_PERIOD_MASK, \
> -					    MLXBF_GIGE_MDIO_PERIOD) |   \
> +					    MLXBF_GIGE_MDIO_PERIOD) | \
>   				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_IN_SAMP_MASK, 6) | \
>   				 FIELD_PREP(MLXBF_GIGE_MDIO_CFG_MDIO_OUT_SAMP_MASK, 13))
>   
> -#define MLXBF_GIGE_GPIO_CAUSE_FALL_EN		0x48
> -#define MLXBF_GIGE_GPIO_CAUSE_OR_CAUSE_EVTEN0	0x80
> -#define MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0		0x94
> -#define MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE	0x98
> -
> -#define MLXBF_GIGE_GPIO12_BIT			12
> -
>   static u32 mlxbf_gige_mdio_create_cmd(u16 data, int phy_add,
>   				      int phy_reg, u32 opcode)
>   {
> @@ -149,88 +142,10 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add,
>   	return ret;
>   }
>   
> -static void mlxbf_gige_mdio_disable_phy_int(struct mlxbf_gige *priv)
> -{
> -	unsigned long flags;
> -	u32 val;
> -
> -	spin_lock_irqsave(&priv->gpio_lock, flags);
> -	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> -	val &= ~priv->phy_int_gpio_mask;
> -	writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> -	spin_unlock_irqrestore(&priv->gpio_lock, flags);
> -}
> -
> -static void mlxbf_gige_mdio_enable_phy_int(struct mlxbf_gige *priv)
> -{
> -	unsigned long flags;
> -	u32 val;
> -
> -	spin_lock_irqsave(&priv->gpio_lock, flags);
> -	/* The INT_N interrupt level is active low.
> -	 * So enable cause fall bit to detect when GPIO
> -	 * state goes low.
> -	 */
> -	val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_FALL_EN);
> -	val |= priv->phy_int_gpio_mask;
> -	writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_FALL_EN);
> -
> -	/* Enable PHY interrupt by setting the priority level */
> -	val = readl(priv->gpio_io +
> -			MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> -	val |= priv->phy_int_gpio_mask;
> -	writel(val, priv->gpio_io +
> -			MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> -	spin_unlock_irqrestore(&priv->gpio_lock, flags);
> -}
> -
> -/* Interrupt handler is called from mlxbf_gige_main.c
> - * driver whenever a phy interrupt is received.
> - */
> -irqreturn_t mlxbf_gige_mdio_handle_phy_interrupt(int irq, void *dev_id)
> -{
> -	struct phy_device *phydev;
> -	struct mlxbf_gige *priv;
> -	u32 val;
> -
> -	priv = dev_id;
> -	phydev = priv->netdev->phydev;
> -
> -	/* Check if this interrupt is from PHY device.
> -	 * Return if it is not.
> -	 */
> -	val = readl(priv->gpio_io +
> -			MLXBF_GIGE_GPIO_CAUSE_OR_CAUSE_EVTEN0);
> -	if (!(val & priv->phy_int_gpio_mask))
> -		return IRQ_NONE;
> -
> -	phy_mac_interrupt(phydev);
> -
> -	/* Clear interrupt when done, otherwise, no further interrupt
> -	 * will be triggered.
> -	 */
> -	val = readl(priv->gpio_io +
> -			MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
> -	val |= priv->phy_int_gpio_mask;
> -	writel(val, priv->gpio_io +
> -			MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
> -
> -	/* Make sure to clear the PHY device interrupt */
> -	if (phydev->drv->ack_interrupt)
> -		phydev->drv->ack_interrupt(phydev);
> -
> -	phydev->interrupts = PHY_INTERRUPT_ENABLED;
> -	if (phydev->drv->config_intr)
> -		phydev->drv->config_intr(phydev);
> -
> -	return IRQ_HANDLED;
> -}
> -
>   int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
>   {
>   	struct device *dev = &pdev->dev;
>   	struct resource *res;
> -	u32 phy_int_gpio;
>   	int ret;
>   
>   	res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MDIO9);
> @@ -241,25 +156,10 @@ int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
>   	if (IS_ERR(priv->mdio_io))
>   		return PTR_ERR(priv->mdio_io);
>   
> -	res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_GPIO0);
> -	if (!res)
> -		return -ENODEV;
> -
> -	priv->gpio_io = devm_ioremap(dev, res->start, resource_size(res));
> -	if (!priv->gpio_io)
> -		return -ENOMEM;
> -
>   	/* Configure mdio parameters */
>   	writel(MLXBF_GIGE_MDIO_CFG_VAL,
>   	       priv->mdio_io + MLXBF_GIGE_MDIO_CFG_OFFSET);
>   
> -	ret = device_property_read_u32(dev, "phy-int-gpio", &phy_int_gpio);
> -	if (ret < 0)
> -		phy_int_gpio = MLXBF_GIGE_GPIO12_BIT;
> -	priv->phy_int_gpio_mask = BIT(phy_int_gpio);
> -
> -	mlxbf_gige_mdio_enable_phy_int(priv);
> -
>   	priv->mdiobus = devm_mdiobus_alloc(dev);
>   	if (!priv->mdiobus) {
>   		dev_err(dev, "Failed to alloc MDIO bus\n");
> @@ -283,6 +183,5 @@ int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
>   
>   void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv)
>   {
> -	mlxbf_gige_mdio_disable_phy_int(priv);
>   	mdiobus_unregister(priv->mdiobus);
>   }
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
> index 128e128..30ad896 100644
> --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h
> @@ -2,7 +2,7 @@
>   
>   /* Header file for Mellanox BlueField GigE register defines
>    *
> - * Copyright (c) 2020 NVIDIA Corporation.
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
>    */
>   
>   #ifndef __MLXBF_GIGE_REGS_H__
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
> new file mode 100644
> index 0000000..1cf8be2
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c
> @@ -0,0 +1,299 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
> +
> +/* Packet receive logic for Mellanox Gigabit Ethernet driver
> + *
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
> + */
> +
> +#include <linux/etherdevice.h>
> +#include <linux/skbuff.h>
> +
> +#include "mlxbf_gige.h"
> +#include "mlxbf_gige_regs.h"
> +
> +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv,
> +				  unsigned int index, u64 dmac)
> +{
> +	void __iomem *base = priv->base;
> +	u64 control;
> +
> +	/* Write destination MAC to specified MAC RX filter */
> +	writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER +
> +	       (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
> +
> +	/* Enable MAC receive filter mask for specified index */
> +	control = readq(base + MLXBF_GIGE_CONTROL);
> +	control |= (MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index);
> +	writeq(control, base + MLXBF_GIGE_CONTROL);
> +}
> +
> +void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv,
> +				  unsigned int index, u64 *dmac)
> +{
> +	void __iomem *base = priv->base;
> +
> +	/* Read destination MAC from specified MAC RX filter */
> +	*dmac = readq(base + MLXBF_GIGE_RX_MAC_FILTER +
> +		      (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE));
> +}
> +
> +void mlxbf_gige_enable_promisc(struct mlxbf_gige *priv)
> +{
> +	void __iomem *base = priv->base;
> +	u64 control;
> +
> +	/* Enable MAC_ID_RANGE match functionality */
> +	control = readq(base + MLXBF_GIGE_CONTROL);
> +	control |= MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN;
> +	writeq(control, base + MLXBF_GIGE_CONTROL);
> +
> +	/* Set start of destination MAC range check to 0 */
> +	writeq(0, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START);
> +
> +	/* Set end of destination MAC range check to all FFs */
> +	writeq(0xFFFFFFFFFFFF, base + MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END);
> +}
> +
> +void mlxbf_gige_disable_promisc(struct mlxbf_gige *priv)
> +{
> +	void __iomem *base = priv->base;
> +	u64 control;
> +
> +	/* Disable MAC_ID_RANGE match functionality */
> +	control = readq(base + MLXBF_GIGE_CONTROL);
> +	control &= ~MLXBF_GIGE_CONTROL_MAC_ID_RANGE_EN;
> +	writeq(control, base + MLXBF_GIGE_CONTROL);
> +
> +	/* NOTE: no need to change DMAC_RANGE_START or END;
> +	 * those values are ignored since MAC_ID_RANGE_EN=0
> +	 */
> +}
> +
> +/* Receive Initialization
> + * 1) Configures RX MAC filters via MMIO registers
> + * 2) Allocates RX WQE array using coherent DMA mapping
> + * 3) Initializes each element of RX WQE array with a receive
> + *    buffer pointer (also using coherent DMA mapping)
> + * 4) Allocates RX CQE array using coherent DMA mapping
> + * 5) Completes other misc receive initialization
> + */
> +int mlxbf_gige_rx_init(struct mlxbf_gige *priv)
> +{
> +	size_t wq_size, cq_size;
> +	dma_addr_t *rx_wqe_ptr;
> +	dma_addr_t rx_buf_dma;
> +	u64 data;
> +	int i, j;
> +
> +	/* Configure MAC RX filter #0 to allow RX of broadcast pkts */
> +	mlxbf_gige_set_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX,
> +				     BCAST_MAC_ADDR);
> +
> +	wq_size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries;
> +	priv->rx_wqe_base = dma_alloc_coherent(priv->dev, wq_size,
> +					       &priv->rx_wqe_base_dma,
> +					       GFP_KERNEL);
> +	if (!priv->rx_wqe_base)
> +		return -ENOMEM;
> +
> +	/* Initialize 'rx_wqe_ptr' to point to first RX WQE in array
> +	 * Each RX WQE is simply a receive buffer pointer, so walk
> +	 * the entire array, allocating a 2KB buffer for each element
> +	 */
> +	rx_wqe_ptr = priv->rx_wqe_base;
> +
> +	for (i = 0; i < priv->rx_q_entries; i++) {
> +		priv->rx_skb[i] = mlxbf_gige_alloc_skb(priv, &rx_buf_dma, DMA_FROM_DEVICE);
> +		if (!priv->rx_skb[i])
> +			goto free_wqe_and_skb;
> +		*rx_wqe_ptr++ = rx_buf_dma;
> +	}
> +
> +	/* Write RX WQE base address into MMIO reg */
> +	writeq(priv->rx_wqe_base_dma, priv->base + MLXBF_GIGE_RX_WQ_BASE);
> +
> +	cq_size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries;
> +	priv->rx_cqe_base = dma_alloc_coherent(priv->dev, cq_size,
> +					       &priv->rx_cqe_base_dma,
> +					       GFP_KERNEL);
> +	if (!priv->rx_cqe_base)
> +		goto free_wqe_and_skb;
> +
> +	/* Write RX CQE base address into MMIO reg */
> +	writeq(priv->rx_cqe_base_dma, priv->base + MLXBF_GIGE_RX_CQ_BASE);
> +
> +	/* Write RX_WQE_PI with current number of replenished buffers */
> +	writeq(priv->rx_q_entries, priv->base + MLXBF_GIGE_RX_WQE_PI);
> +
> +	/* Enable removal of CRC during RX */
> +	data = readq(priv->base + MLXBF_GIGE_RX);
> +	data |= MLXBF_GIGE_RX_STRIP_CRC_EN;
> +	writeq(data, priv->base + MLXBF_GIGE_RX);
> +
> +	/* Enable RX MAC filter pass and discard counters */
> +	writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN,
> +	       priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC);
> +	writeq(MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS_EN,
> +	       priv->base + MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS);
> +
> +	/* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to
> +	 * indicate readiness to receive interrupts
> +	 */
> +	data = readq(priv->base + MLXBF_GIGE_INT_MASK);
> +	data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET;
> +	writeq(data, priv->base + MLXBF_GIGE_INT_MASK);
> +
> +	/* Enable RX DMA to write new packets to memory */
> +	writeq(MLXBF_GIGE_RX_DMA_EN, priv->base + MLXBF_GIGE_RX_DMA);
> +
> +	writeq(ilog2(priv->rx_q_entries),
> +	       priv->base + MLXBF_GIGE_RX_WQE_SIZE_LOG2);
> +
> +	return 0;
> +
> +free_wqe_and_skb:
> +	rx_wqe_ptr = priv->rx_wqe_base;
> +	for (j = 0; j < i; j++) {
> +		dma_unmap_single(priv->dev, *rx_wqe_ptr,
> +				 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
> +		dev_kfree_skb(priv->rx_skb[j]);
> +		rx_wqe_ptr++;
> +	}
> +	dma_free_coherent(priv->dev, wq_size,
> +			  priv->rx_wqe_base, priv->rx_wqe_base_dma);
> +	return -ENOMEM;
> +}
> +
> +/* Receive Deinitialization
> + * This routine will free allocations done by mlxbf_gige_rx_init(),
> + * namely the RX WQE and RX CQE arrays, as well as all RX buffers
> + */
> +void mlxbf_gige_rx_deinit(struct mlxbf_gige *priv)
> +{
> +	dma_addr_t *rx_wqe_ptr;
> +	size_t size;
> +	int i;
> +
> +	rx_wqe_ptr = priv->rx_wqe_base;
> +
> +	for (i = 0; i < priv->rx_q_entries; i++) {
> +		dma_unmap_single(priv->dev, *rx_wqe_ptr, MLXBF_GIGE_DEFAULT_BUF_SZ,
> +				 DMA_FROM_DEVICE);
> +		dev_kfree_skb(priv->rx_skb[i]);
> +		rx_wqe_ptr++;
> +	}
> +
> +	size = MLXBF_GIGE_RX_WQE_SZ * priv->rx_q_entries;
> +	dma_free_coherent(priv->dev, size,
> +			  priv->rx_wqe_base, priv->rx_wqe_base_dma);
> +
> +	size = MLXBF_GIGE_RX_CQE_SZ * priv->rx_q_entries;
> +	dma_free_coherent(priv->dev, size,
> +			  priv->rx_cqe_base, priv->rx_cqe_base_dma);
> +
> +	priv->rx_wqe_base = NULL;
> +	priv->rx_wqe_base_dma = 0;
> +	priv->rx_cqe_base = NULL;
> +	priv->rx_cqe_base_dma = 0;
> +	writeq(0, priv->base + MLXBF_GIGE_RX_WQ_BASE);
> +	writeq(0, priv->base + MLXBF_GIGE_RX_CQ_BASE);
> +}
> +
> +static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts)
> +{
> +	struct net_device *netdev = priv->netdev;
> +	u16 rx_pi_rem, rx_ci_rem;
> +	dma_addr_t *rx_wqe_addr;
> +	dma_addr_t rx_buf_dma;
> +	struct sk_buff *skb;
> +	u64 *rx_cqe_addr;
> +	u64 datalen;
> +	u64 rx_cqe;
> +	u16 rx_ci;
> +	u16 rx_pi;
> +
> +	/* Index into RX buffer array is rx_pi w/wrap based on RX_CQE_SIZE */
> +	rx_pi = readq(priv->base + MLXBF_GIGE_RX_WQE_PI);
> +	rx_pi_rem = rx_pi % priv->rx_q_entries;
> +	rx_wqe_addr = priv->rx_wqe_base + rx_pi_rem;
> +
> +	dma_unmap_single(priv->dev, *rx_wqe_addr,
> +			 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE);
> +
> +	rx_cqe_addr = priv->rx_cqe_base + rx_pi_rem;
> +	rx_cqe = *rx_cqe_addr;
> +
> +	if ((rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MASK) == 0) {
> +		/* Packet is OK, increment stats */
> +		datalen = rx_cqe & MLXBF_GIGE_RX_CQE_PKT_LEN_MASK;
> +		netdev->stats.rx_packets++;
> +		netdev->stats.rx_bytes += datalen;
> +
> +		skb = priv->rx_skb[rx_pi_rem];
> +
> +		skb_put(skb, datalen);
> +
> +		skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */
> +
> +		skb->protocol = eth_type_trans(skb, netdev);
> +		netif_receive_skb(skb);
> +
> +		/* Alloc another RX SKB for this same index */
> +		priv->rx_skb[rx_pi_rem] = mlxbf_gige_alloc_skb(priv, &rx_buf_dma,
> +							       DMA_FROM_DEVICE);
> +		if (!priv->rx_skb[rx_pi_rem]) {
> +			netdev->stats.rx_dropped++;
> +			return false;
> +		}
> +
> +		*rx_wqe_addr = rx_buf_dma;
> +	} else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) {
> +		priv->stats.rx_mac_errors++;
> +	} else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_TRUNCATED) {
> +		priv->stats.rx_truncate_errors++;
> +	}
> +
> +	/* Let hardware know we've replenished one buffer */
> +	rx_pi++;
> +	writeq(rx_pi, priv->base + MLXBF_GIGE_RX_WQE_PI);
> +
> +	(*rx_pkts)++;
> +
> +	rx_pi_rem = rx_pi % priv->rx_q_entries;
> +	rx_ci = readq(priv->base + MLXBF_GIGE_RX_CQE_PACKET_CI);
> +	rx_ci_rem = rx_ci % priv->rx_q_entries;
> +
> +	return rx_pi_rem != rx_ci_rem;
> +}
> +
> +/* Driver poll() function called by NAPI infrastructure */
> +int mlxbf_gige_poll(struct napi_struct *napi, int budget)
> +{
> +	struct mlxbf_gige *priv;
> +	bool remaining_pkts;
> +	int work_done = 0;
> +	u64 data;
> +
> +	priv = container_of(napi, struct mlxbf_gige, napi);
> +
> +	mlxbf_gige_handle_tx_complete(priv);
> +
> +	do {
> +		remaining_pkts = mlxbf_gige_rx_packet(priv, &work_done);
> +	} while (remaining_pkts && work_done < budget);
> +
> +	/* If amount of work done < budget, turn off NAPI polling
> +	 * via napi_complete_done(napi, work_done) and then
> +	 * re-enable interrupts.
> +	 */
> +	if (work_done < budget && napi_complete_done(napi, work_done)) {
> +		/* Clear MLXBF_GIGE_INT_MASK 'receive pkt' bit to
> +		 * indicate receive readiness
> +		 */
> +		data = readq(priv->base + MLXBF_GIGE_INT_MASK);
> +		data &= ~MLXBF_GIGE_INT_MASK_RX_RECEIVE_PACKET;
> +		writeq(data, priv->base + MLXBF_GIGE_INT_MASK);
> +	}
> +
> +	return work_done;
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c
> new file mode 100644
> index 0000000..257dd02
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_tx.c
> @@ -0,0 +1,279 @@
> +// SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
> +
> +/* Packet transmit logic for Mellanox Gigabit Ethernet driver
> + *
> + * Copyright (c) 2020-2021 NVIDIA Corporation.
> + */
> +
> +#include <linux/skbuff.h>
> +
> +#include "mlxbf_gige.h"
> +#include "mlxbf_gige_regs.h"
> +
> +/* Transmit Initialization
> + * 1) Allocates TX WQE array using coherent DMA mapping
> + * 2) Allocates TX completion counter using coherent DMA mapping
> + */
> +int mlxbf_gige_tx_init(struct mlxbf_gige *priv)
> +{
> +	size_t size;
> +
> +	size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
> +	priv->tx_wqe_base = dma_alloc_coherent(priv->dev, size,
> +					       &priv->tx_wqe_base_dma,
> +					       GFP_KERNEL);
> +	if (!priv->tx_wqe_base)
> +		return -ENOMEM;
> +
> +	priv->tx_wqe_next = priv->tx_wqe_base;
> +
> +	/* Write TX WQE base address into MMIO reg */
> +	writeq(priv->tx_wqe_base_dma, priv->base + MLXBF_GIGE_TX_WQ_BASE);
> +
> +	/* Allocate address for TX completion count */
> +	priv->tx_cc = dma_alloc_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
> +					 &priv->tx_cc_dma, GFP_KERNEL);
> +	if (!priv->tx_cc) {
> +		dma_free_coherent(priv->dev, size,
> +				  priv->tx_wqe_base, priv->tx_wqe_base_dma);
> +		return -ENOMEM;
> +	}
> +
> +	/* Write TX CC base address into MMIO reg */
> +	writeq(priv->tx_cc_dma, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
> +
> +	writeq(ilog2(priv->tx_q_entries),
> +	       priv->base + MLXBF_GIGE_TX_WQ_SIZE_LOG2);
> +
> +	priv->prev_tx_ci = 0;
> +	priv->tx_pi = 0;
> +
> +	return 0;
> +}
> +
> +/* Transmit Deinitialization
> + * This routine will free allocations done by mlxbf_gige_tx_init(),
> + * namely the TX WQE array and the TX completion counter
> + */
> +void mlxbf_gige_tx_deinit(struct mlxbf_gige *priv)
> +{
> +	u64 *tx_wqe_addr;
> +	size_t size;
> +	int i;
> +
> +	tx_wqe_addr = priv->tx_wqe_base;
> +
> +	for (i = 0; i < priv->tx_q_entries; i++) {
> +		if (priv->tx_skb[i]) {
> +			dma_unmap_single(priv->dev, *tx_wqe_addr,
> +					 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_TO_DEVICE);
> +			dev_kfree_skb(priv->tx_skb[i]);
> +			priv->tx_skb[i] = NULL;
> +		}
> +		tx_wqe_addr += 2;
> +	}
> +
> +	size = MLXBF_GIGE_TX_WQE_SZ * priv->tx_q_entries;
> +	dma_free_coherent(priv->dev, size,
> +			  priv->tx_wqe_base, priv->tx_wqe_base_dma);
> +
> +	dma_free_coherent(priv->dev, MLXBF_GIGE_TX_CC_SZ,
> +			  priv->tx_cc, priv->tx_cc_dma);
> +
> +	priv->tx_wqe_base = NULL;
> +	priv->tx_wqe_base_dma = 0;
> +	priv->tx_cc = NULL;
> +	priv->tx_cc_dma = 0;
> +	priv->tx_wqe_next = NULL;
> +	writeq(0, priv->base + MLXBF_GIGE_TX_WQ_BASE);
> +	writeq(0, priv->base + MLXBF_GIGE_TX_CI_UPDATE_ADDRESS);
> +}
> +
> +/* Function that returns status of TX ring:
> + *          0: TX ring is full, i.e. there are no
> + *             available un-used entries in TX ring.
> + *   non-null: TX ring is not full, i.e. there are
> + *             some available entries in TX ring.
> + *             The non-null value is a measure of
> + *             how many TX entries are available, but
> + *             it is not the exact number of available
> + *             entries (see below).
> + *
> + * The algorithm makes the assumption that if
> + * (prev_tx_ci == tx_pi) then the TX ring is empty.
> + * An empty ring actually has (tx_q_entries-1)
> + * entries, which allows the algorithm to differentiate
> + * the case of an empty ring vs. a full ring.
> + */
> +static u16 mlxbf_gige_tx_buffs_avail(struct mlxbf_gige *priv)
> +{
> +	unsigned long flags;
> +	u16 avail;
> +
> +	spin_lock_irqsave(&priv->lock, flags);
> +
> +	if (priv->prev_tx_ci == priv->tx_pi)
> +		avail = priv->tx_q_entries - 1;
> +	else
> +		avail = ((priv->tx_q_entries + priv->prev_tx_ci - priv->tx_pi)
> +			  % priv->tx_q_entries) - 1;
> +
> +	spin_unlock_irqrestore(&priv->lock, flags);
> +
> +	return avail;
> +}
> +
> +bool mlxbf_gige_handle_tx_complete(struct mlxbf_gige *priv)
> +{
> +	struct net_device_stats *stats;
> +	u16 tx_wqe_index;
> +	u64 *tx_wqe_addr;
> +	u64 tx_status;
> +	u16 tx_ci;
> +
> +	tx_status = readq(priv->base + MLXBF_GIGE_TX_STATUS);
> +	if (tx_status & MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL)
> +		priv->stats.tx_fifo_full++;
> +	tx_ci = readq(priv->base + MLXBF_GIGE_TX_CONSUMER_INDEX);
> +	stats = &priv->netdev->stats;
> +
> +	/* Transmit completion logic needs to loop until the completion
> +	 * index (in SW) equals TX consumer index (from HW).  These
> +	 * parameters are unsigned 16-bit values and the wrap case needs
> +	 * to be supported, that is TX consumer index wrapped from 0xFFFF
> +	 * to 0 while TX completion index is still < 0xFFFF.
> +	 */
> +	for (; priv->prev_tx_ci != tx_ci; priv->prev_tx_ci++) {
> +		tx_wqe_index = priv->prev_tx_ci % priv->tx_q_entries;
> +		/* Each TX WQE is 16 bytes. The 8 MSB store the 2KB TX
> +		 * buffer address and the 8 LSB contain information
> +		 * about the TX WQE.
> +		 */
> +		tx_wqe_addr = priv->tx_wqe_base +
> +			       (tx_wqe_index * MLXBF_GIGE_TX_WQE_SZ_QWORDS);
> +
> +		stats->tx_packets++;
> +		stats->tx_bytes += MLXBF_GIGE_TX_WQE_PKT_LEN(tx_wqe_addr);
> +
> +		dma_unmap_single(priv->dev, *tx_wqe_addr,
> +				 MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_TO_DEVICE);
> +		dev_consume_skb_any(priv->tx_skb[tx_wqe_index]);
> +		priv->tx_skb[tx_wqe_index] = NULL;
> +	}
> +
> +	/* Since the TX ring was likely just drained, check if TX queue
> +	 * had previously been stopped and now that there are TX buffers
> +	 * available the TX queue can be awakened.
> +	 */
> +	if (netif_queue_stopped(priv->netdev) &&
> +	    mlxbf_gige_tx_buffs_avail(priv))
> +		netif_wake_queue(priv->netdev);
> +
> +	return true;
> +}
> +
> +/* Function to advance the tx_wqe_next pointer to next TX WQE */
> +void mlxbf_gige_update_tx_wqe_next(struct mlxbf_gige *priv)
> +{
> +	/* Advance tx_wqe_next pointer */
> +	priv->tx_wqe_next += MLXBF_GIGE_TX_WQE_SZ_QWORDS;
> +
> +	/* Check if 'next' pointer is beyond end of TX ring */
> +	/* If so, set 'next' back to 'base' pointer of ring */
> +	if (priv->tx_wqe_next == (priv->tx_wqe_base +
> +				  (priv->tx_q_entries * MLXBF_GIGE_TX_WQE_SZ_QWORDS)))
> +		priv->tx_wqe_next = priv->tx_wqe_base;
> +}
> +
> +netdev_tx_t mlxbf_gige_start_xmit(struct sk_buff *skb,
> +				  struct net_device *netdev)
> +{
> +	struct mlxbf_gige *priv = netdev_priv(netdev);
> +	long buff_addr, start_dma_page, end_dma_page;
> +	struct sk_buff *tx_skb;
> +	dma_addr_t tx_buf_dma;
> +	u64 *tx_wqe_addr;
> +	u64 word2;
> +
> +	/* If needed, linearize TX SKB as hardware DMA expects this */
> +	if (skb_linearize(skb)) {
> +		dev_kfree_skb(skb);
> +		netdev->stats.tx_dropped++;
> +		return NETDEV_TX_OK;
> +	}
> +
> +	buff_addr = (long)skb->data;
> +	start_dma_page = buff_addr >> MLXBF_GIGE_DMA_PAGE_SHIFT;
> +	end_dma_page   = (buff_addr + skb->len - 1) >> MLXBF_GIGE_DMA_PAGE_SHIFT;
> +
> +	/* Verify that payload pointer and data length of SKB to be
> +	 * transmitted does not violate the hardware DMA limitation.
> +	 */
> +	if (start_dma_page != end_dma_page) {
> +		/* DMA operation would fail as-is, alloc new aligned SKB */
> +		tx_skb = mlxbf_gige_alloc_skb(priv, &tx_buf_dma, DMA_TO_DEVICE);
> +		if (!tx_skb) {
> +			/* Free original skb, could not alloc new aligned SKB */
> +			dev_kfree_skb(skb);
> +			netdev->stats.tx_dropped++;
> +			return NETDEV_TX_OK;
> +		}
> +
> +		skb_put_data(tx_skb, skb->data, skb->len);
> +
> +		/* Free the original SKB */
> +		dev_kfree_skb(skb);
> +	} else {
> +		tx_skb = skb;
> +		tx_buf_dma = dma_map_single(priv->dev, skb->data,
> +					    MLXBF_GIGE_DEFAULT_BUF_SZ,
> +					    DMA_TO_DEVICE);
> +		if (dma_mapping_error(priv->dev, tx_buf_dma)) {
> +			dev_kfree_skb(skb);
> +			netdev->stats.tx_dropped++;
> +			return NETDEV_TX_OK;
> +		}
> +	}
> +
> +	priv->tx_skb[priv->tx_pi % priv->tx_q_entries] = tx_skb;
> +
> +	/* Get address of TX WQE */
> +	tx_wqe_addr = priv->tx_wqe_next;
> +
> +	mlxbf_gige_update_tx_wqe_next(priv);
> +
> +	/* Put PA of buffer address into first 64-bit word of TX WQE */
> +	*tx_wqe_addr = tx_buf_dma;
> +
> +	/* Set TX WQE pkt_len appropriately
> +	 * NOTE: GigE silicon will automatically pad up to
> +	 *       minimum packet length if needed.
> +	 */
> +	word2 = tx_skb->len & MLXBF_GIGE_TX_WQE_PKT_LEN_MASK;
> +
> +	/* Write entire 2nd word of TX WQE */
> +	*(tx_wqe_addr + 1) = word2;
> +
> +	priv->tx_pi++;
> +
> +	if (!netdev_xmit_more()) {
> +		/* Create memory barrier before write to TX PI */
> +		wmb();
> +		writeq(priv->tx_pi, priv->base + MLXBF_GIGE_TX_PRODUCER_INDEX);
> +	}
> +
> +	/* Check if the last TX entry was just used */
> +	if (!mlxbf_gige_tx_buffs_avail(priv)) {
> +		/* TX ring is full, inform stack */
> +		netif_stop_queue(netdev);
> +
> +		/* Since there is no separate "TX complete" interrupt, need
> +		 * to explicitly schedule NAPI poll.  This will trigger logic
> +		 * which processes TX completions, and will hopefully drain
> +		 * the TX ring allowing the TX queue to be awakened.
> +		 */
> +		napi_schedule(&priv->napi);
> +	}
> +
> +	return NETDEV_TX_OK;
> +}
> 



More information about the kernel-team mailing list