[SRU][F:linux-bluefield][PATCH v1 2/5] net/sched: cls_api: Support hardware miss to tc action
Bartlomiej Zolnierkiewicz
bartlomiej.zolnierkiewicz at canonical.com
Tue Mar 28 16:39:41 UTC 2023
On Thu, Mar 23, 2023 at 8:18 PM William Tu <witu at nvidia.com> wrote:
>
> From: Paul Blakey <paulb at nvidia.com>
>
> BugLink: https://bugs.launchpad.net/bugs/2012571
>
> For drivers to support partial offload of a filter's action list,
> add support for action miss to specify an action instance to
> continue from in sw.
>
> CT action in particular can't be fully offloaded, as new connections
> need to be handled in software. This imposes other limitations on
> the actions that can be offloaded together with the CT action, such
> as packet modifications.
>
> Assign each action on a filter's action list a unique miss_cookie
> which drivers can then use to fill action_miss part of the tc skb
> extension. On getting back this miss_cookie, find the action
> instance with relevant cookie and continue classifying from there.
>
> Signed-off-by: Paul Blakey <paulb at nvidia.com>
> (Backported from upstream commit 80cd22c35c9001fe72bf614d29439de41933deca)
The preferred format is:
(backported from commit 80cd22c35c9001fe72bf614d29439de41933deca)
The above line should be put before your "Signed-off-by:" line if the
backport has been done by you.
Also please try to put a short summary of changes done during
backporting (this backport looks fine but it is far from trivial).
> Reviewed-by: Jiri Pirko <jiri at nvidia.com>
> Reviewed-by: Simon Horman <simon.horman at corigine.com>
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner at gmail.com>
> Acked-by: Jamal Hadi Salim <jhs at mojatatu.com>
> Signed-off-by: Jakub Kicinski <kuba at kernel.org>
Your "Signed-off-by:" is also needed here.
So all together it should look more like:
Signed-off-by: Paul Blakey <paulb at nvidia.com>
Reviewed-by: Jiri Pirko <jiri at nvidia.com>
Reviewed-by: Simon Horman <simon.horman at corigine.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner at gmail.com>
Acked-by: Jamal Hadi Salim <jhs at mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba at kernel.org>
(backported from commit 80cd22c35c9001fe72bf614d29439de41933deca)
[witu: adjust for context differences and lack of tc_setup_action()]
Signed-off-by: William Tu <witu at nvidia.com>
Best Regards,
Bartlomiej
> ---
> include/linux/skbuff.h | 6 +-
> include/net/flow_offload.h | 1 +
> include/net/pkt_cls.h | 30 ++++--
> include/net/sch_generic.h | 2 +
> net/openvswitch/flow.c | 3 +-
> net/sched/cls_api.c | 216 +++++++++++++++++++++++++++++++++++--
> 6 files changed, 236 insertions(+), 22 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index e246a3f2df35..3f71951b4206 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -282,12 +282,16 @@ struct nf_bridge_info {
> * and read by ovs to recirc_id.
> */
> struct tc_skb_ext {
> - __u32 chain;
> + union {
> + u64 act_miss_cookie;
> + __u32 chain;
> + };
> __u16 mru;
> __u16 zone;
> u8 post_ct:1;
> u8 post_ct_snat:1;
> u8 post_ct_dnat:1;
> + u8 act_miss:1; /* Set if act_miss_cookie is used */
> };
> #endif
>
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index d9e2e5d905ee..305de642e70f 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -167,6 +167,7 @@ typedef void (*action_destr)(void *priv);
> struct flow_action_entry {
> enum flow_action_id id;
> void *act;
> + u64 miss_cookie;
> action_destr destructor;
> void *destructor_priv;
> union {
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> index 18e9373caad8..80ae50a248db 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -53,6 +53,8 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
> void tcf_block_put(struct tcf_block *block);
> void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
> struct tcf_block_ext_info *ei);
> +int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
> + int police, struct tcf_proto *tp, u32 handle, bool used_action_miss);
>
> static inline bool tcf_block_shared(struct tcf_block *block)
> {
> @@ -197,6 +199,7 @@ struct tcf_exts {
> int nr_actions;
> struct tc_action **actions;
> struct net *net;
> + struct tcf_exts_miss_cookie_node *miss_cookie_node;
> #endif
> /* Map to export classifier specific extension TLV types to the
> * generic extensions API. Unsupported extensions must be set to 0.
> @@ -208,18 +211,11 @@ struct tcf_exts {
> static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
> int action, int police)
> {
> -#ifdef CONFIG_NET_CLS_ACT
> - exts->type = 0;
> - exts->nr_actions = 0;
> - exts->net = net;
> - exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
> - GFP_KERNEL);
> - if (!exts->actions)
> - return -ENOMEM;
> +#ifdef CONFIG_NET_CLS
> + return tcf_exts_init_ex(exts, net, action, police, NULL, 0, false);
> +#else
> + return -EOPNOTSUPP;
> #endif
> - exts->action = action;
> - exts->police = police;
> - return 0;
> }
>
> /* Return false if the netns is being destroyed in cleanup_net(). Callers
> @@ -307,6 +303,18 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
> return TC_ACT_OK;
> }
>
> +static inline int
> +tcf_exts_exec_ex(struct sk_buff *skb, struct tcf_exts *exts, int act_index,
> + struct tcf_result *res)
> +{
> +#ifdef CONFIG_NET_CLS_ACT
> + return tcf_action_exec(skb, exts->actions + act_index,
> + exts->nr_actions - act_index, res);
> +#else
> + return TC_ACT_OK;
> +#endif
> +}
> +
> int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
> struct nlattr **tb, struct nlattr *rate_tlv,
> struct tcf_exts *exts, bool ovr, bool rtnl_held,
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 295a9078e6cf..477d4b9cd012 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -358,6 +358,8 @@ struct tcf_proto_ops {
> struct nlattr **tca,
> struct netlink_ext_ack *extack);
> void (*tmplt_destroy)(void *tmplt_priv);
> + struct tcf_exts * (*get_exts)(const struct tcf_proto *tp,
> + u32 handle);
>
> /* rtnetlink specific */
> int (*dump)(struct net*, struct tcf_proto*, void *,
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index c8b03cbef897..8fa3ce23ce71 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -884,7 +884,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
> #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> if (static_branch_unlikely(&tc_recirc_sharing_support)) {
> tc_ext = skb_ext_find(skb, TC_SKB_EXT);
> - key->recirc_id = tc_ext ? tc_ext->chain : 0;
> + key->recirc_id = tc_ext && !tc_ext->act_miss ?
> + tc_ext->chain : 0;
> OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
> post_ct = tc_ext ? tc_ext->post_ct : false;
> post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 384a4d815849..2be1c228e6cd 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -23,6 +23,7 @@
> #include <linux/rhashtable.h>
> #include <linux/jhash.h>
> #include <linux/rculist.h>
> +#include <linux/rhashtable.h>
> #include <net/net_namespace.h>
> #include <net/sock.h>
> #include <net/netlink.h>
> @@ -49,6 +50,109 @@ static LIST_HEAD(tcf_proto_base);
> /* Protects list of registered TC modules. It is pure SMP lock. */
> static DEFINE_RWLOCK(cls_mod_lock);
>
> +static struct xarray tcf_exts_miss_cookies_xa;
> +struct tcf_exts_miss_cookie_node {
> + const struct tcf_chain *chain;
> + const struct tcf_proto *tp;
> + const struct tcf_exts *exts;
> + u32 chain_index;
> + u32 tp_prio;
> + u32 handle;
> + u32 miss_cookie_base;
> + struct rcu_head rcu;
> +};
> +
> +/* Each tc action entry cookie will be comprised of 32bit miss_cookie_base +
> + * action index in the exts tc actions array.
> + */
> +union tcf_exts_miss_cookie {
> + struct {
> + u32 miss_cookie_base;
> + u32 act_index;
> + };
> + u64 miss_cookie;
> +};
> +
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +static int
> +tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
> + u32 handle)
> +{
> + struct tcf_exts_miss_cookie_node *n;
> + static u32 next;
> + int err;
> +
> + if (WARN_ON(!handle || !tp->ops->get_exts))
> + return -EINVAL;
> +
> + n = kzalloc(sizeof(*n), GFP_KERNEL);
> + if (!n)
> + return -ENOMEM;
> +
> + n->chain_index = tp->chain->index;
> + n->chain = tp->chain;
> + n->tp_prio = tp->prio;
> + n->tp = tp;
> + n->exts = exts;
> + n->handle = handle;
> +
> + err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base,
> + n, xa_limit_32b, &next, GFP_KERNEL);
> + if (err)
> + goto err_xa_alloc;
> +
> + exts->miss_cookie_node = n;
> + return 0;
> +
> +err_xa_alloc:
> + kfree(n);
> + return err;
> +}
> +
> +static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
> +{
> + struct tcf_exts_miss_cookie_node *n;
> +
> + if (!exts->miss_cookie_node)
> + return;
> +
> + n = exts->miss_cookie_node;
> + xa_erase(&tcf_exts_miss_cookies_xa, n->miss_cookie_base);
> + kfree_rcu(n, rcu);
> +}
> +
> +static struct tcf_exts_miss_cookie_node *
> +tcf_exts_miss_cookie_lookup(u64 miss_cookie, int *act_index)
> +{
> + union tcf_exts_miss_cookie mc = { .miss_cookie = miss_cookie, };
> +
> + *act_index = mc.act_index;
> + return xa_load(&tcf_exts_miss_cookies_xa, mc.miss_cookie_base);
> +}
> +#else /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
> +static int
> +tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
> + u32 handle)
> +{
> + return 0;
> +}
> +
> +static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
> +{
> +}
> +#endif /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
> +
> +static u64 tcf_exts_miss_cookie_get(u32 miss_cookie_base, int act_index)
> +{
> + union tcf_exts_miss_cookie mc = { .act_index = act_index, };
> +
> + if (!miss_cookie_base)
> + return 0;
> +
> + mc.miss_cookie_base = miss_cookie_base;
> + return mc.miss_cookie;
> +}
> +
> static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
> {
> return jhash_3words(tp->chain->index, tp->prio,
> @@ -1579,6 +1683,8 @@ static inline int __tcf_classify(struct sk_buff *skb,
> const struct tcf_proto *orig_tp,
> struct tcf_result *res,
> bool compat_mode,
> + struct tcf_exts_miss_cookie_node *n,
> + int act_index,
> u32 *last_executed_chain)
> {
> #ifdef CONFIG_NET_CLS_ACT
> @@ -1590,13 +1696,36 @@ static inline int __tcf_classify(struct sk_buff *skb,
> #endif
> for (; tp; tp = rcu_dereference_bh(tp->next)) {
> __be16 protocol = skb_protocol(skb, false);
> - int err;
> + int err = 0;
>
> - if (tp->protocol != protocol &&
> - tp->protocol != htons(ETH_P_ALL))
> - continue;
> + if (n) {
> + struct tcf_exts *exts;
> +
> + if (n->tp_prio != tp->prio)
> + continue;
> +
> + /* We re-lookup the tp and chain based on index instead
> + * of having hard refs and locks to them, so do a sanity
> + * check if any of tp,chain,exts was replaced by the
> + * time we got here with a cookie from hardware.
> + */
> + if (unlikely(n->tp != tp || n->tp->chain != n->chain ||
> + !tp->ops->get_exts))
> + return TC_ACT_SHOT;
> +
> + exts = tp->ops->get_exts(tp, n->handle);
> + if (unlikely(!exts || n->exts != exts))
> + return TC_ACT_SHOT;
> +
> + n = NULL;
> + err = tcf_exts_exec_ex(skb, exts, act_index, res);
> + } else {
> + if (tp->protocol != protocol &&
> + tp->protocol != htons(ETH_P_ALL))
> + continue;
>
> - err = tp->classify(skb, tp, res);
> + err = tp->classify(skb, tp, res);
> + }
> #ifdef CONFIG_NET_CLS_ACT
> if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
> first_tp = orig_tp;
> @@ -1612,6 +1741,9 @@ static inline int __tcf_classify(struct sk_buff *skb,
> return err;
> }
>
> + if (unlikely(n))
> + return TC_ACT_SHOT;
> +
> return TC_ACT_UNSPEC; /* signal: continue lookup */
> #ifdef CONFIG_NET_CLS_ACT
> reset:
> @@ -1636,21 +1768,35 @@ int tcf_classify(struct sk_buff *skb,
> #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> u32 last_executed_chain = 0;
>
> - return __tcf_classify(skb, tp, tp, res, compat_mode,
> + return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0,
> &last_executed_chain);
> #else
> u32 last_executed_chain = tp ? tp->chain->index : 0;
> + struct tcf_exts_miss_cookie_node *n = NULL;
> const struct tcf_proto *orig_tp = tp;
> struct tc_skb_ext *ext;
> + int act_index = 0;
> int ret;
>
> if (block) {
> ext = skb_ext_find(skb, TC_SKB_EXT);
>
> - if (ext && ext->chain) {
> + if (ext && (ext->chain || ext->act_miss)) {
> struct tcf_chain *fchain;
> + u32 chain;
>
> - fchain = tcf_chain_lookup_rcu(block, ext->chain);
> + if (ext->act_miss) {
> + n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie,
> + &act_index);
> + if (!n)
> + return TC_ACT_SHOT;
> +
> + chain = n->chain_index;
> + } else {
> + chain = ext->chain;
> + }
> +
> + fchain = tcf_chain_lookup_rcu(block, chain);
> if (!fchain)
> return TC_ACT_SHOT;
>
> @@ -1662,7 +1808,7 @@ int tcf_classify(struct sk_buff *skb,
> }
> }
>
> - ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
> + ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index,
> &last_executed_chain);
>
> /* If we missed on some chain */
> @@ -3062,9 +3208,48 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
> return skb->len;
> }
>
> +int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
> + int police, struct tcf_proto *tp, u32 handle,
> + bool use_action_miss)
> +{
> + int err = 0;
> +
> +#ifdef CONFIG_NET_CLS_ACT
> + exts->type = 0;
> + exts->nr_actions = 0;
> + /* Note: we do not own yet a reference on net.
> + * This reference might be taken later from tcf_exts_get_net().
> + */
> + exts->net = net;
> + exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
> + GFP_KERNEL);
> + if (!exts->actions)
> + return -ENOMEM;
> +#endif
> +
> + exts->action = action;
> + exts->police = police;
> +
> + if (!use_action_miss)
> + return 0;
> +
> + err = tcf_exts_miss_cookie_base_alloc(exts, tp, handle);
> + if (err)
> + goto err_miss_alloc;
> +
> + return 0;
> +
> +err_miss_alloc:
> + tcf_exts_destroy(exts);
> + return err;
> +}
> +EXPORT_SYMBOL(tcf_exts_init_ex);
> +
> void tcf_exts_destroy(struct tcf_exts *exts)
> {
> #ifdef CONFIG_NET_CLS_ACT
> + tcf_exts_miss_cookie_base_destroy(exts);
> +
> if (exts->actions) {
> tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
> kfree(exts->actions);
> @@ -3505,11 +3690,15 @@ int tc_setup_flow_action(struct flow_action *flow_action,
> const struct tcf_exts *exts, bool rtnl_held)
> {
> struct tc_action *act;
> + u32 miss_cookie_base;
> int i, j, k, err = 0;
>
> if (!exts)
> return 0;
>
> + miss_cookie_base = exts->miss_cookie_node ?
> + exts->miss_cookie_node->miss_cookie_base : 0;
> +
> if (!rtnl_held)
> rtnl_lock();
>
> @@ -3520,6 +3709,10 @@ int tc_setup_flow_action(struct flow_action *flow_action,
> entry = &flow_action->entries[j];
> entry->act = act;
> spin_lock_bh(&act->tcfa_lock);
> +
> + if (!is_tcf_pedit(act))
> + entry->miss_cookie = tcf_exts_miss_cookie_get(miss_cookie_base, i);
> +
> if (is_tcf_gact_ok(act)) {
> entry->id = FLOW_ACTION_ACCEPT;
> } else if (is_tcf_gact_shot(act)) {
> @@ -3586,6 +3779,10 @@ int tc_setup_flow_action(struct flow_action *flow_action,
> entry->mangle.mask = tcf_pedit_mask(act, k);
> entry->mangle.val = tcf_pedit_val(act, k);
> entry->mangle.offset = tcf_pedit_offset(act, k);
> +
> + entry->miss_cookie =
> + tcf_exts_miss_cookie_get(miss_cookie_base, i);
> +
> entry = &flow_action->entries[++j];
> }
> } else if (is_tcf_csum(act)) {
> @@ -3727,6 +3924,7 @@ static int __init tc_filter_init(void)
> goto err_register_pernet_subsys;
>
> flow_indr_add_block_cb(&block_ing_entry);
> + xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1);
>
> rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
> RTNL_FLAG_DOIT_UNLOCKED);
More information about the kernel-team
mailing list