[SRU][F:linux-bluefield][PATCH v1 2/5] net/sched: cls_api: Support hardware miss to tc action

Bartlomiej Zolnierkiewicz bartlomiej.zolnierkiewicz at canonical.com
Tue Mar 28 16:39:41 UTC 2023


On Thu, Mar 23, 2023 at 8:18 PM William Tu <witu at nvidia.com> wrote:
>
> From: Paul Blakey <paulb at nvidia.com>
>
> BugLink: https://bugs.launchpad.net/bugs/2012571
>
> For drivers to support partial offload of a filter's action list,
> add support for action miss to specify an action instance to
> continue from in sw.
>
> CT action in particular can't be fully offloaded, as new connections
> need to be handled in software. This imposes other limitations on
> the actions that can be offloaded together with the CT action, such
> as packet modifications.
>
> Assign each action on a filter's action list a unique miss_cookie
> which drivers can then use to fill action_miss part of the tc skb
> extension. On getting back this miss_cookie, find the action
> instance with relevant cookie and continue classifying from there.
>
> Signed-off-by: Paul Blakey <paulb at nvidia.com>
> (Backported from upstream commit 80cd22c35c9001fe72bf614d29439de41933deca)

The preferred format is:

(backported from commit 80cd22c35c9001fe72bf614d29439de41933deca)

The above line should be put before your "Signed-off-by:" line if the
backport has been done by you.

Also please try to put a short summary of changes done during
backporting (this backport looks fine but it is far from trivial).

> Reviewed-by: Jiri Pirko <jiri at nvidia.com>
> Reviewed-by: Simon Horman <simon.horman at corigine.com>
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner at gmail.com>
> Acked-by: Jamal Hadi Salim <jhs at mojatatu.com>
> Signed-off-by: Jakub Kicinski <kuba at kernel.org>

Your "Signed-off-by:" is also needed here.

So all together it should look more like:

Signed-off-by: Paul Blakey <paulb at nvidia.com>
Reviewed-by: Jiri Pirko <jiri at nvidia.com>
Reviewed-by: Simon Horman <simon.horman at corigine.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner at gmail.com>
Acked-by: Jamal Hadi Salim <jhs at mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba at kernel.org>
(backported from commit 80cd22c35c9001fe72bf614d29439de41933deca)
[witu: adjust for context differences and lack of tc_setup_action()]
Signed-off-by: William Tu <witu at nvidia.com>

Best Regards,
Bartlomiej

> ---
>  include/linux/skbuff.h     |   6 +-
>  include/net/flow_offload.h |   1 +
>  include/net/pkt_cls.h      |  30 ++++--
>  include/net/sch_generic.h  |   2 +
>  net/openvswitch/flow.c     |   3 +-
>  net/sched/cls_api.c        | 216 +++++++++++++++++++++++++++++++++++--
>  6 files changed, 236 insertions(+), 22 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index e246a3f2df35..3f71951b4206 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -282,12 +282,16 @@ struct nf_bridge_info {
>   * and read by ovs to recirc_id.
>   */
>  struct tc_skb_ext {
> -       __u32 chain;
> +       union {
> +               u64 act_miss_cookie;
> +               __u32 chain;
> +       };
>         __u16 mru;
>         __u16 zone;
>         u8 post_ct:1;
>         u8 post_ct_snat:1;
>         u8 post_ct_dnat:1;
> +       u8 act_miss:1; /* Set if act_miss_cookie is used */
>  };
>  #endif
>
> diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
> index d9e2e5d905ee..305de642e70f 100644
> --- a/include/net/flow_offload.h
> +++ b/include/net/flow_offload.h
> @@ -167,6 +167,7 @@ typedef void (*action_destr)(void *priv);
>  struct flow_action_entry {
>         enum flow_action_id             id;
>         void                            *act;
> +       u64                             miss_cookie;
>         action_destr                    destructor;
>         void                            *destructor_priv;
>         union {
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> index 18e9373caad8..80ae50a248db 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -53,6 +53,8 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
>  void tcf_block_put(struct tcf_block *block);
>  void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
>                        struct tcf_block_ext_info *ei);
> +int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
> +                    int police, struct tcf_proto *tp, u32 handle, bool used_action_miss);
>
>  static inline bool tcf_block_shared(struct tcf_block *block)
>  {
> @@ -197,6 +199,7 @@ struct tcf_exts {
>         int nr_actions;
>         struct tc_action **actions;
>         struct net *net;
> +       struct tcf_exts_miss_cookie_node *miss_cookie_node;
>  #endif
>         /* Map to export classifier specific extension TLV types to the
>          * generic extensions API. Unsupported extensions must be set to 0.
> @@ -208,18 +211,11 @@ struct tcf_exts {
>  static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
>                                 int action, int police)
>  {
> -#ifdef CONFIG_NET_CLS_ACT
> -       exts->type = 0;
> -       exts->nr_actions = 0;
> -       exts->net = net;
> -       exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
> -                               GFP_KERNEL);
> -       if (!exts->actions)
> -               return -ENOMEM;
> +#ifdef CONFIG_NET_CLS
> +       return tcf_exts_init_ex(exts, net, action, police, NULL, 0, false);
> +#else
> +       return -EOPNOTSUPP;
>  #endif
> -       exts->action = action;
> -       exts->police = police;
> -       return 0;
>  }
>
>  /* Return false if the netns is being destroyed in cleanup_net(). Callers
> @@ -307,6 +303,18 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
>         return TC_ACT_OK;
>  }
>
> +static inline int
> +tcf_exts_exec_ex(struct sk_buff *skb, struct tcf_exts *exts, int act_index,
> +                struct tcf_result *res)
> +{
> +#ifdef CONFIG_NET_CLS_ACT
> +       return tcf_action_exec(skb, exts->actions + act_index,
> +                              exts->nr_actions - act_index, res);
> +#else
> +       return TC_ACT_OK;
> +#endif
> +}
> +
>  int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
>                       struct nlattr **tb, struct nlattr *rate_tlv,
>                       struct tcf_exts *exts, bool ovr, bool rtnl_held,
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 295a9078e6cf..477d4b9cd012 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -358,6 +358,8 @@ struct tcf_proto_ops {
>                                                 struct nlattr **tca,
>                                                 struct netlink_ext_ack *extack);
>         void                    (*tmplt_destroy)(void *tmplt_priv);
> +       struct tcf_exts *       (*get_exts)(const struct tcf_proto *tp,
> +                                           u32 handle);
>
>         /* rtnetlink specific */
>         int                     (*dump)(struct net*, struct tcf_proto*, void *,
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index c8b03cbef897..8fa3ce23ce71 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -884,7 +884,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>  #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>         if (static_branch_unlikely(&tc_recirc_sharing_support)) {
>                 tc_ext = skb_ext_find(skb, TC_SKB_EXT);
> -               key->recirc_id = tc_ext ? tc_ext->chain : 0;
> +               key->recirc_id = tc_ext && !tc_ext->act_miss ?
> +                                tc_ext->chain : 0;
>                 OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
>                 post_ct = tc_ext ? tc_ext->post_ct : false;
>                 post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index 384a4d815849..2be1c228e6cd 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -23,6 +23,7 @@
>  #include <linux/rhashtable.h>
>  #include <linux/jhash.h>
>  #include <linux/rculist.h>
> +#include <linux/rhashtable.h>
>  #include <net/net_namespace.h>
>  #include <net/sock.h>
>  #include <net/netlink.h>
> @@ -49,6 +50,109 @@ static LIST_HEAD(tcf_proto_base);
>  /* Protects list of registered TC modules. It is pure SMP lock. */
>  static DEFINE_RWLOCK(cls_mod_lock);
>
> +static struct xarray tcf_exts_miss_cookies_xa;
> +struct tcf_exts_miss_cookie_node {
> +       const struct tcf_chain *chain;
> +       const struct tcf_proto *tp;
> +       const struct tcf_exts *exts;
> +       u32 chain_index;
> +       u32 tp_prio;
> +       u32 handle;
> +       u32 miss_cookie_base;
> +       struct rcu_head rcu;
> +};
> +
> +/* Each tc action entry cookie will be comprised of 32bit miss_cookie_base +
> + * action index in the exts tc actions array.
> + */
> +union tcf_exts_miss_cookie {
> +       struct {
> +               u32 miss_cookie_base;
> +               u32 act_index;
> +       };
> +       u64 miss_cookie;
> +};
> +
> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
> +static int
> +tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
> +                               u32 handle)
> +{
> +       struct tcf_exts_miss_cookie_node *n;
> +       static u32 next;
> +       int err;
> +
> +       if (WARN_ON(!handle || !tp->ops->get_exts))
> +               return -EINVAL;
> +
> +       n = kzalloc(sizeof(*n), GFP_KERNEL);
> +       if (!n)
> +               return -ENOMEM;
> +
> +       n->chain_index = tp->chain->index;
> +       n->chain = tp->chain;
> +       n->tp_prio = tp->prio;
> +       n->tp = tp;
> +       n->exts = exts;
> +       n->handle = handle;
> +
> +       err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base,
> +                             n, xa_limit_32b, &next, GFP_KERNEL);
> +       if (err)
> +               goto err_xa_alloc;
> +
> +       exts->miss_cookie_node = n;
> +       return 0;
> +
> +err_xa_alloc:
> +       kfree(n);
> +       return err;
> +}
> +
> +static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
> +{
> +       struct tcf_exts_miss_cookie_node *n;
> +
> +       if (!exts->miss_cookie_node)
> +               return;
> +
> +       n = exts->miss_cookie_node;
> +       xa_erase(&tcf_exts_miss_cookies_xa, n->miss_cookie_base);
> +       kfree_rcu(n, rcu);
> +}
> +
> +static struct tcf_exts_miss_cookie_node *
> +tcf_exts_miss_cookie_lookup(u64 miss_cookie, int *act_index)
> +{
> +       union tcf_exts_miss_cookie mc = { .miss_cookie = miss_cookie, };
> +
> +       *act_index = mc.act_index;
> +       return xa_load(&tcf_exts_miss_cookies_xa, mc.miss_cookie_base);
> +}
> +#else /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
> +static int
> +tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
> +                               u32 handle)
> +{
> +       return 0;
> +}
> +
> +static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
> +{
> +}
> +#endif /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
> +
> +static u64 tcf_exts_miss_cookie_get(u32 miss_cookie_base, int act_index)
> +{
> +       union tcf_exts_miss_cookie mc = { .act_index = act_index, };
> +
> +       if (!miss_cookie_base)
> +               return 0;
> +
> +       mc.miss_cookie_base = miss_cookie_base;
> +       return mc.miss_cookie;
> +}
> +
>  static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
>  {
>         return jhash_3words(tp->chain->index, tp->prio,
> @@ -1579,6 +1683,8 @@ static inline int __tcf_classify(struct sk_buff *skb,
>                                  const struct tcf_proto *orig_tp,
>                                  struct tcf_result *res,
>                                  bool compat_mode,
> +                                struct tcf_exts_miss_cookie_node *n,
> +                                int act_index,
>                                  u32 *last_executed_chain)
>  {
>  #ifdef CONFIG_NET_CLS_ACT
> @@ -1590,13 +1696,36 @@ static inline int __tcf_classify(struct sk_buff *skb,
>  #endif
>         for (; tp; tp = rcu_dereference_bh(tp->next)) {
>                 __be16 protocol = skb_protocol(skb, false);
> -               int err;
> +               int err = 0;
>
> -               if (tp->protocol != protocol &&
> -                   tp->protocol != htons(ETH_P_ALL))
> -                       continue;
> +               if (n) {
> +                       struct tcf_exts *exts;
> +
> +                       if (n->tp_prio != tp->prio)
> +                               continue;
> +
> +                       /* We re-lookup the tp and chain based on index instead
> +                        * of having hard refs and locks to them, so do a sanity
> +                        * check if any of tp,chain,exts was replaced by the
> +                        * time we got here with a cookie from hardware.
> +                        */
> +                       if (unlikely(n->tp != tp || n->tp->chain != n->chain ||
> +                                    !tp->ops->get_exts))
> +                               return TC_ACT_SHOT;
> +
> +                       exts = tp->ops->get_exts(tp, n->handle);
> +                       if (unlikely(!exts || n->exts != exts))
> +                               return TC_ACT_SHOT;
> +
> +                       n = NULL;
> +                       err = tcf_exts_exec_ex(skb, exts, act_index, res);
> +               } else {
> +                       if (tp->protocol != protocol &&
> +                           tp->protocol != htons(ETH_P_ALL))
> +                               continue;
>
> -               err = tp->classify(skb, tp, res);
> +                       err = tp->classify(skb, tp, res);
> +               }
>  #ifdef CONFIG_NET_CLS_ACT
>                 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
>                         first_tp = orig_tp;
> @@ -1612,6 +1741,9 @@ static inline int __tcf_classify(struct sk_buff *skb,
>                         return err;
>         }
>
> +       if (unlikely(n))
> +               return TC_ACT_SHOT;
> +
>         return TC_ACT_UNSPEC; /* signal: continue lookup */
>  #ifdef CONFIG_NET_CLS_ACT
>  reset:
> @@ -1636,21 +1768,35 @@ int tcf_classify(struct sk_buff *skb,
>  #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>         u32 last_executed_chain = 0;
>
> -       return __tcf_classify(skb, tp, tp, res, compat_mode,
> +       return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0,
>                               &last_executed_chain);
>  #else
>         u32 last_executed_chain = tp ? tp->chain->index : 0;
> +       struct tcf_exts_miss_cookie_node *n = NULL;
>         const struct tcf_proto *orig_tp = tp;
>         struct tc_skb_ext *ext;
> +       int act_index = 0;
>         int ret;
>
>         if (block) {
>                 ext = skb_ext_find(skb, TC_SKB_EXT);
>
> -               if (ext && ext->chain) {
> +               if (ext && (ext->chain || ext->act_miss)) {
>                         struct tcf_chain *fchain;
> +                       u32 chain;
>
> -                       fchain = tcf_chain_lookup_rcu(block, ext->chain);
> +                       if (ext->act_miss) {
> +                               n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie,
> +                                                               &act_index);
> +                               if (!n)
> +                                       return TC_ACT_SHOT;
> +
> +                               chain = n->chain_index;
> +                       } else {
> +                               chain = ext->chain;
> +                       }
> +
> +                       fchain = tcf_chain_lookup_rcu(block, chain);
>                         if (!fchain)
>                                 return TC_ACT_SHOT;
>
> @@ -1662,7 +1808,7 @@ int tcf_classify(struct sk_buff *skb,
>                 }
>         }
>
> -       ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
> +       ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index,
>                              &last_executed_chain);
>
>         /* If we missed on some chain */
> @@ -3062,9 +3208,48 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
>         return skb->len;
>  }
>
> +int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
> +                    int police, struct tcf_proto *tp, u32 handle,
> +                    bool use_action_miss)
> +{
> +       int err = 0;
> +
> +#ifdef CONFIG_NET_CLS_ACT
> +       exts->type = 0;
> +       exts->nr_actions = 0;
> +       /* Note: we do not own yet a reference on net.
> +        * This reference might be taken later from tcf_exts_get_net().
> +        */
> +       exts->net = net;
> +       exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
> +                               GFP_KERNEL);
> +       if (!exts->actions)
> +               return -ENOMEM;
> +#endif
> +
> +       exts->action = action;
> +       exts->police = police;
> +
> +       if (!use_action_miss)
> +               return 0;
> +
> +       err = tcf_exts_miss_cookie_base_alloc(exts, tp, handle);
> +       if (err)
> +               goto err_miss_alloc;
> +
> +       return 0;
> +
> +err_miss_alloc:
> +       tcf_exts_destroy(exts);
> +       return err;
> +}
> +EXPORT_SYMBOL(tcf_exts_init_ex);
> +
>  void tcf_exts_destroy(struct tcf_exts *exts)
>  {
>  #ifdef CONFIG_NET_CLS_ACT
> +       tcf_exts_miss_cookie_base_destroy(exts);
> +
>         if (exts->actions) {
>                 tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
>                 kfree(exts->actions);
> @@ -3505,11 +3690,15 @@ int tc_setup_flow_action(struct flow_action *flow_action,
>                          const struct tcf_exts *exts, bool rtnl_held)
>  {
>         struct tc_action *act;
> +       u32 miss_cookie_base;
>         int i, j, k, err = 0;
>
>         if (!exts)
>                 return 0;
>
> +       miss_cookie_base = exts->miss_cookie_node ?
> +                          exts->miss_cookie_node->miss_cookie_base : 0;
> +
>         if (!rtnl_held)
>                 rtnl_lock();
>
> @@ -3520,6 +3709,10 @@ int tc_setup_flow_action(struct flow_action *flow_action,
>                 entry = &flow_action->entries[j];
>                 entry->act = act;
>                 spin_lock_bh(&act->tcfa_lock);
> +
> +               if (!is_tcf_pedit(act))
> +                       entry->miss_cookie = tcf_exts_miss_cookie_get(miss_cookie_base, i);
> +
>                 if (is_tcf_gact_ok(act)) {
>                         entry->id = FLOW_ACTION_ACCEPT;
>                 } else if (is_tcf_gact_shot(act)) {
> @@ -3586,6 +3779,10 @@ int tc_setup_flow_action(struct flow_action *flow_action,
>                                 entry->mangle.mask = tcf_pedit_mask(act, k);
>                                 entry->mangle.val = tcf_pedit_val(act, k);
>                                 entry->mangle.offset = tcf_pedit_offset(act, k);
> +
> +                               entry->miss_cookie =
> +                                       tcf_exts_miss_cookie_get(miss_cookie_base, i);
> +
>                                 entry = &flow_action->entries[++j];
>                         }
>                 } else if (is_tcf_csum(act)) {
> @@ -3727,6 +3924,7 @@ static int __init tc_filter_init(void)
>                 goto err_register_pernet_subsys;
>
>         flow_indr_add_block_cb(&block_ing_entry);
> +       xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1);
>
>         rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
>                       RTNL_FLAG_DOIT_UNLOCKED);



More information about the kernel-team mailing list