[SRU][Artful][PATCH 2/2] xfrm: reuse uncached_list to track xdsts
Joseph Salisbury
joseph.salisbury at canonical.com
Wed Apr 25 16:19:52 UTC 2018
On 04/23/2018 05:28 AM, Stefan Bader wrote:
> On 20.04.2018 18:19, Joseph Salisbury wrote:
>> From: Xin Long <lucien.xin at gmail.com>
>>
>> BugLink: http://bugs.launchpad.net/bugs/1746474
>>
>> In early time, when freeing a xdst, it would be inserted into
>> dst_garbage.list first. Then if it's refcnt was still held
>> somewhere, later it would be put into dst_busy_list in
>> dst_gc_task().
>>
>> When one dev was being unregistered, the dev of these dsts in
>> dst_busy_list would be set with loopback_dev and put this dev.
>> So that this dev's removal wouldn't get blocked, and avoid the
>> kmsg warning:
>>
>> kernel:unregister_netdevice: waiting for veth0 to become \
>> free. Usage count = 2
>>
>> However after Commit 52df157f17e5 ("xfrm: take refcnt of dst
>> when creating struct xfrm_dst bundle"), the xdst will not be
>> freed with dst gc, and this warning happens.
>>
>> To fix it, we need to find these xdsts that are still held by
>> others when removing the dev, and free xdst's dev and set it
>> with loopback_dev.
>>
>> But unfortunately after flow_cache for xfrm was deleted, no
>> list tracks them anymore. So we need to save these xdsts
>> somewhere to release the xdst's dev later.
>>
>> To make this easier, this patch is to reuse uncached_list to
>> track xdsts, so that the dev refcnt can be released in the
>> event NETDEV_UNREGISTER process of fib_netdev_notifier.
>>
>> Thanks to Florian, we could move forward this fix quickly.
>>
>> Fixes: 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle")
>> Reported-by: Jianlin Shi <jishi at redhat.com>
>> Reported-by: Hangbin Liu <liuhangbin at gmail.com>
>> Tested-by: Eyal Birger <eyal.birger at gmail.com>
>> Signed-off-by: Xin Long <lucien.xin at gmail.com>
>> Signed-off-by: Steffen Klassert <steffen.klassert at secunet.com>
>> (back ported from commit 510c321b557121861601f9d259aadd65aa274f35)
>> Signed-off-by: Joseph Salisbury <joseph.salisbury at canonical.com>
>> ---
>> include/net/ip6_fib.h | 2 ++
>> include/net/ip6_route.h | 13 +++++++++++++
>> include/net/route.h | 3 +++
>> net/ipv4/route.c | 21 +++++++++++++--------
>> net/ipv4/xfrm4_policy.c | 4 +++-
>> net/ipv6/route.c | 4 ++--
>> net/ipv6/xfrm6_policy.c | 5 +++++
>> 7 files changed, 41 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
>> index af509f8..8e1d61b 100644
>> --- a/include/net/ip6_fib.h
>> +++ b/include/net/ip6_fib.h
>> @@ -242,6 +242,8 @@ struct rt6_statistics {
>> __u32 fib_rt_entries; /* rt entries in table */
>> __u32 fib_rt_cache; /* cache routes */
>> __u32 fib_discarded_routes;
>> + /* The following stats are not protected by any lock */
>> + atomic_t fib_rt_uncache; /* rt entries in uncached list */
> The above count seems to be purely statistical and not used for anything else.
> So I wonder whether it would not make more sense to just drop the increment line
> in xfrm6_fill_dst().
>
> -Stefan
Thanks for the feedback, Stefan! I built a test kernel with your
suggestion and had it tested. The test kernel still fixes the bug, so
I'll NAK this one and send a v2 SRU request.
Thanks,
Joe
>
>> };
>>
>> #define RTN_TL_ROOT 0x0001
>> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
>> index 1990569..f37ea3e 100644
>> --- a/include/net/ip6_route.h
>> +++ b/include/net/ip6_route.h
>> @@ -163,6 +163,19 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu);
>> void rt6_remove_prefsrc(struct inet6_ifaddr *ifp);
>> void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
>>
>> +void rt6_uncached_list_add(struct rt6_info *rt);
>> +void rt6_uncached_list_del(struct rt6_info *rt);
>> +
>> +static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
>> +{
>> + const struct dst_entry *dst = skb_dst(skb);
>> + const struct rt6_info *rt6 = NULL;
>> +
>> + if (dst)
>> + rt6 = container_of(dst, struct rt6_info, dst);
>> +
>> + return rt6;
>> +}
>>
>> /*
>> * Store a destination cache entry in a socket
>> diff --git a/include/net/route.h b/include/net/route.h
>> index 5845896..09a3507 100644
>> --- a/include/net/route.h
>> +++ b/include/net/route.h
>> @@ -226,6 +226,9 @@ struct in_ifaddr;
>> void fib_add_ifaddr(struct in_ifaddr *);
>> void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
>>
>> +void rt_add_uncached_list(struct rtable *rt);
>> +void rt_del_uncached_list(struct rtable *rt);
>> +
>> static inline void ip_rt_put(struct rtable *rt)
>> {
>> /* dst_release() accepts a NULL parameter.
>> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
>> index 83a2c494..33db041 100644
>> --- a/net/ipv4/route.c
>> +++ b/net/ipv4/route.c
>> @@ -1382,7 +1382,7 @@ struct uncached_list {
>>
>> static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
>>
>> -static void rt_add_uncached_list(struct rtable *rt)
>> +void rt_add_uncached_list(struct rtable *rt)
>> {
>> struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
>>
>> @@ -1393,14 +1393,8 @@ static void rt_add_uncached_list(struct rtable *rt)
>> spin_unlock_bh(&ul->lock);
>> }
>>
>> -static void ipv4_dst_destroy(struct dst_entry *dst)
>> +void rt_del_uncached_list(struct rtable *rt)
>> {
>> - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
>> - struct rtable *rt = (struct rtable *) dst;
>> -
>> - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
>> - kfree(p);
>> -
>> if (!list_empty(&rt->rt_uncached)) {
>> struct uncached_list *ul = rt->rt_uncached_list;
>>
>> @@ -1410,6 +1404,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
>> }
>> }
>>
>> +static void ipv4_dst_destroy(struct dst_entry *dst)
>> +{
>> + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
>> + struct rtable *rt = (struct rtable *)dst;
>> +
>> + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
>> + kfree(p);
>> +
>> + rt_del_uncached_list(rt);
>> +}
>> +
>> void rt_flush_dev(struct net_device *dev)
>> {
>> struct net *net = dev_net(dev);
>> diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
>> index 71b4ecc1..cb9890a 100644
>> --- a/net/ipv4/xfrm4_policy.c
>> +++ b/net/ipv4/xfrm4_policy.c
>> @@ -97,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
>> xdst->u.rt.rt_pmtu = rt->rt_pmtu;
>> xdst->u.rt.rt_table_id = rt->rt_table_id;
>> INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
>> + rt_add_uncached_list(&xdst->u.rt);
>>
>> return 0;
>> }
>> @@ -244,7 +245,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst)
>> struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
>>
>> dst_destroy_metrics_generic(dst);
>> -
>> + if (xdst->u.rt.rt_uncached_list)
>> + rt_del_uncached_list(&xdst->u.rt);
>> xfrm_dst_destroy(xdst);
>> }
>>
>> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
>> index d00e41c..6062c65 100644
>> --- a/net/ipv6/route.c
>> +++ b/net/ipv6/route.c
>> @@ -124,7 +124,7 @@ struct uncached_list {
>>
>> static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
>>
>> -static void rt6_uncached_list_add(struct rt6_info *rt)
>> +void rt6_uncached_list_add(struct rt6_info *rt)
>> {
>> struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
>>
>> @@ -135,7 +135,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt)
>> spin_unlock_bh(&ul->lock);
>> }
>>
>> -static void rt6_uncached_list_del(struct rt6_info *rt)
>> +void rt6_uncached_list_del(struct rt6_info *rt)
>> {
>> if (!list_empty(&rt->rt6i_uncached)) {
>> struct uncached_list *ul = rt->rt6i_uncached_list;
>> diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
>> index 79651bc..7a6191c 100644
>> --- a/net/ipv6/xfrm6_policy.c
>> +++ b/net/ipv6/xfrm6_policy.c
>> @@ -109,6 +109,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
>> xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway;
>> xdst->u.rt6.rt6i_dst = rt->rt6i_dst;
>> xdst->u.rt6.rt6i_src = rt->rt6i_src;
>> + INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
>> + rt6_uncached_list_add(&xdst->u.rt6);
>> + atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
>>
>> return 0;
>> }
>> @@ -247,6 +250,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
>> if (likely(xdst->u.rt6.rt6i_idev))
>> in6_dev_put(xdst->u.rt6.rt6i_idev);
>> dst_destroy_metrics_generic(dst);
>> + if (xdst->u.rt6.rt6i_uncached_list)
>> + rt6_uncached_list_del(&xdst->u.rt6);
>> xfrm_dst_destroy(xdst);
>> }
>>
>>
>
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20180425/f9ffe622/attachment.sig>
More information about the kernel-team
mailing list