[SRU][F:linux-bluefield][PATCH 27/32] net/bonding: Implement ndo_sk_get_lower_dev
Daniel Jurgens
danielj at nvidia.com
Mon May 3 19:39:12 UTC 2021
From: Tariq Toukan <tariqt at nvidia.com>
BugLink: https://bugs.launchpad.net/bugs/1926994
Add ndo_sk_get_lower_dev() implementation for bond interfaces.
Support only for the cases where the socket's and SKBs' hash
yields identical value for the whole connection lifetime.
Here we restrict it to L3+4 sockets only, with
xmit_hash_policy==LAYER34 and bond modes xor/802.3ad.
Signed-off-by: Tariq Toukan <tariqt at nvidia.com>
Reviewed-by: Boris Pismenny <borisp at nvidia.com>
Signed-off-by: Jakub Kicinski <kuba at kernel.org>
(cherry picked from commit 007feb87fb15933b5de7135e6bdf57c219b3fbec)
Signed-off-by: Daniel Jurgens <danielj at nvidia.com>
---
drivers/net/bonding/bond_main.c | 93 +++++++++++++++++++++++++++++++++++++++++
include/net/bonding.h | 2 +
2 files changed, 95 insertions(+)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 02b4fe7..0442b67 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -254,6 +254,19 @@ netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
return dev_queue_xmit(skb);
}
+bool bond_sk_check(struct bonding *bond)
+{
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34)
+ return true;
+ fallthrough;
+ default:
+ return false;
+ }
+}
+
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -4459,6 +4472,85 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
return NULL;
}
+static void bond_sk_to_flow(struct sock *sk, struct flow_keys *flow)
+{
+ switch (sk->sk_family) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ if (sk->sk_ipv6only ||
+ ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) {
+ flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ flow->addrs.v6addrs.src = inet6_sk(sk)->saddr;
+ flow->addrs.v6addrs.dst = sk->sk_v6_daddr;
+ break;
+ }
+ fallthrough;
+#endif
+ default: /* AF_INET */
+ flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ flow->addrs.v4addrs.src = inet_sk(sk)->inet_rcv_saddr;
+ flow->addrs.v4addrs.dst = inet_sk(sk)->inet_daddr;
+ break;
+ }
+
+ flow->ports.src = inet_sk(sk)->inet_sport;
+ flow->ports.dst = inet_sk(sk)->inet_dport;
+}
+
+/**
+ * bond_sk_hash_l34 - generate a hash value based on the socket's L3 and L4 fields
+ * @sk: socket to use for headers
+ *
+ * This function will extract the necessary field from the socket and use
+ * them to generate a hash based on the LAYER34 xmit_policy.
+ * Assumes that sk is a TCP or UDP socket.
+ */
+static u32 bond_sk_hash_l34(struct sock *sk)
+{
+ struct flow_keys flow;
+ u32 hash;
+
+ bond_sk_to_flow(sk, &flow);
+
+ /* L4 */
+ memcpy(&hash, &flow.ports.ports, sizeof(hash));
+ /* L3 */
+ return bond_ip_hash(hash, &flow);
+}
+
+static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond,
+ struct sock *sk)
+{
+ struct bond_up_slave *slaves;
+ struct slave *slave;
+ unsigned int count;
+ u32 hash;
+
+ slaves = rcu_dereference(bond->usable_slaves);
+ count = slaves ? READ_ONCE(slaves->count) : 0;
+ if (unlikely(!count))
+ return NULL;
+
+ hash = bond_sk_hash_l34(sk);
+ slave = slaves->arr[hash % count];
+
+ return slave->dev;
+}
+
+static struct net_device *bond_sk_get_lower_dev(struct net_device *dev,
+ struct sock *sk)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct net_device *lower = NULL;
+
+ rcu_read_lock();
+ if (bond_sk_check(bond))
+ lower = __bond_sk_get_lower_dev(bond, sk);
+ rcu_read_unlock();
+
+ return lower;
+}
+
static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct bonding *bond = netdev_priv(dev);
@@ -4596,6 +4688,7 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
.ndo_fix_features = bond_fix_features,
.ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
+ .ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
};
static const struct device_type bond_type = {
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 8603f62..4975064 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -266,6 +266,8 @@ struct bond_vlan_tag {
unsigned short vlan_id;
};
+bool bond_sk_check(struct bonding *bond);
+
/**
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
--
1.8.3.1
More information about the kernel-team
mailing list