[SRU][Vivid][PATCH] netfilter: bridge: forward IPv6 fragmented packets

Thu Nov 19 12:27:29 UTC 2015

On 18.11.2015 01:48, Jay Vosburgh wrote:
> 
> From: Bernhard Thaler <bernhard.thaler at wvnet.at>
> 
> BugLink: https://bugs.launchpad.net/nova/+bug/1463911
> 
> IPv6 fragmented packets are not forwarded on an ethernet bridge
> with netfilter ip6_tables loaded. e.g. steps to reproduce
> 
> 1) create a simple bridge like this
> 
>         modprobe br_netfilter
>         brctl addbr br0
>         brctl addif br0 eth0
>         brctl addif br0 eth2
>         ifconfig eth0 up
>         ifconfig eth2 up
>         ifconfig br0 up
> 
> 2) place a host with an IPv6 address on each side of the bridge
> 
>         set IPv6 address on host A:
>         ip -6 addr add fd01:2345:6789:1::1/64 dev eth0
> 
>         set IPv6 address on host B:
>         ip -6 addr add fd01:2345:6789:1::2/64 dev eth0
> 
> 3) run a simple ping command on host A with packets > MTU
> 
>         ping6 -s 4000 fd01:2345:6789:1::2
> 
> 4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge
> 
> IPv6 fragmented packets traverse the bridge cleanly until somebody runs.
> "ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are
> loaded) IPv6 fragmented packets do not traverse the bridge any more (you
> see no more responses in ping's output).
> 
> After applying this patch IPv6 fragmented packets traverse the bridge
> cleanly in above scenario.
> 
> Signed-off-by: Bernhard Thaler <bernhard.thaler at wvnet.at>
> [pablo at netfilter.org: small changes to br_nf_dev_queue_xmit]
> Signed-off-by: Pablo Neira Ayuso <pablo at netfilter.org>
> (backported from commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd)
> (backported from commit e70deecbf8e1562cac0b19f23848919e2f5d65aa)
> (backported from commit d7b597421519d6f680eb8e152a0d8447466ee2d6)

Maybe that was already discussed somewhere but would it not be better to split
the SRU backports into their individual upstream commits?

-Stefan

> Signed-off-by: Jay Vosburgh <jay.vosburgh at canonical.com>
> ---
>  include/linux/netfilter_ipv6.h |   1 +
>  include/linux/skbuff.h         |   1 +
>  net/bridge/br_netfilter.c      | 185 ++++++++++++++++++++++++++++++++---------
>  net/bridge/br_private.h        |   6 +-
>  net/ipv6/netfilter.c           |   1 +
>  5 files changed, 155 insertions(+), 39 deletions(-)
> 
> diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
> index 64dad1cc..7c832ba 100644
> --- a/include/linux/netfilter_ipv6.h
> +++ b/include/linux/netfilter_ipv6.h
> @@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void);
>  struct nf_ipv6_ops {
>  	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
>  			const struct net_device *dev, int strict);
> +	int (*fragment)(struct sk_buff *skb, int (*output)(struct sk_buff *));
>  };
>  
>  extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 5fcc606..db71429 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -163,6 +163,7 @@ struct nf_conntrack {
>  struct nf_bridge_info {
>  	atomic_t		use;
>  	unsigned int		mask;
> +	__u16			frag_max_size;
>  	struct net_device	*physindev;
>  	struct net_device	*physoutdev;
>  	unsigned long		data[32 / sizeof(unsigned long)];
> diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
> index ba2bd0a..cb62adf 100644
> --- a/net/bridge/br_netfilter.c
> +++ b/net/bridge/br_netfilter.c
> @@ -34,6 +34,7 @@
>  
>  #include <net/ip.h>
>  #include <net/ipv6.h>
> +#include <net/addrconf.h>
>  #include <net/route.h>
>  #include <net/netfilter/br_netfilter.h>
>  
> @@ -112,6 +113,71 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
>  	 pppoe_proto(skb) == htons(PPP_IPV6) && \
>  	 brnf_filter_pppoe_tagged)
>  
> +/* largest possible L2 header, see br_nf_dev_queue_xmit() */
> +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
> +
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
> +struct brnf_frag_data {
> +	char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
> +	u8 encap_size;
> +	u8 size;
> +	u16 vlan_tci;
> +	__be16 vlan_proto;
> +};
> +
> +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
> +#endif
> +
> +static int check_hbh_len(struct sk_buff *skb);
> +
> +static int br_validate_ipv6(struct sk_buff *skb)
> +{
> +	const struct ipv6hdr *hdr;
> +	struct net_device *dev = skb->dev;
> +	struct inet6_dev *idev = __in6_dev_get(skb->dev);
> +	u32 pkt_len;
> +	u8 ip6h_len = sizeof(struct ipv6hdr);
> +
> +	if (!pskb_may_pull(skb, ip6h_len))
> +		goto inhdr_error;
> +
> +	if (skb->len < ip6h_len)
> +		goto drop;
> +
> +	hdr = ipv6_hdr(skb);
> +
> +	if (hdr->version != 6)
> +		goto inhdr_error;
> +
> +	pkt_len = ntohs(hdr->payload_len);
> +
> +	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
> +		if (pkt_len + ip6h_len > skb->len) {
> +			IP6_INC_STATS_BH(dev_net(dev), idev,
> +					 IPSTATS_MIB_INTRUNCATEDPKTS);
> +			goto drop;
> +		}
> +		if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
> +			IP6_INC_STATS_BH(dev_net(dev), idev,
> +					 IPSTATS_MIB_INDISCARDS);
> +			goto drop;
> +		}
> +	}
> +	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
> +		goto drop;
> +
> +	memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
> +	/* No IP options in IPv6 header; however it should be
> +	 * checked if some next headers need special treatment
> +	 */
> +	return 0;
> +
> +inhdr_error:
> +	IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
> +drop:
> +	return -1;
> +}
> +
>  static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
>  {
>  	struct net_bridge_port *port;
> @@ -247,6 +313,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
>  	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
>  	struct rtable *rt;
>  
> +	nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
> +
>  	if (nf_bridge->mask & BRNF_PKT_TYPE) {
>  		skb->pkt_type = PACKET_OTHERHOST;
>  		nf_bridge->mask ^= BRNF_PKT_TYPE;
> @@ -524,36 +592,15 @@ bad:
>  }
>  
>  /* Replicate the checks that IPv6 does on packet reception and pass the packet
> - * to ip6tables, which doesn't support NAT, so things are fairly simple. */
> + * to ip6tables.
> + */
>  static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
>  					   struct sk_buff *skb,
>  					   const struct net_device *in,
>  					   const struct net_device *out,
>  					   int (*okfn)(struct sk_buff *))
>  {
> -	const struct ipv6hdr *hdr;
> -	u32 pkt_len;
> -
> -	if (skb->len < sizeof(struct ipv6hdr))
> -		return NF_DROP;
> -
> -	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
> -		return NF_DROP;
> -
> -	hdr = ipv6_hdr(skb);
> -
> -	if (hdr->version != 6)
> -		return NF_DROP;
> -
> -	pkt_len = ntohs(hdr->payload_len);
> -
> -	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
> -		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
> -			return NF_DROP;
> -		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
> -			return NF_DROP;
> -	}
> -	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
> +	if (br_validate_ipv6(skb))
>  		return NF_DROP;
>  
>  	nf_bridge_put(skb->nf_bridge);
> @@ -658,6 +705,9 @@ static int br_nf_forward_finish(struct sk_buff *skb)
>  			BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
>  		}
>  
> +		if (skb->protocol == htons(ETH_P_IPV6))
> +			nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
> +
>  		in = nf_bridge->physindev;
>  		if (nf_bridge->mask & BRNF_PKT_TYPE) {
>  			skb->pkt_type = PACKET_OTHERHOST;
> @@ -728,6 +778,13 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
>  
>  	/* The physdev module checks on this */
>  	nf_bridge->mask |= BRNF_BRIDGED;
> +
> +	if (pf == NFPROTO_IPV6) {
> +		if (br_validate_ipv6(skb))
> +			return NF_DROP;
> +		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
> +	}
> +
>  	nf_bridge->physoutdev = skb->dev;
>  	if (pf == NFPROTO_IPV4)
>  		skb->protocol = htons(ETH_P_IP);
> @@ -776,35 +833,87 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
>  	return NF_STOLEN;
>  }
>  
> -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
> +static int br_nf_push_frag_xmit(struct sk_buff *skb)
> +{
> +	struct brnf_frag_data *data;
> +	int err;
> +
> +	data = this_cpu_ptr(&brnf_frag_data_storage);
> +	err = skb_cow_head(skb, data->size);
> +
> +	if (err) {
> +		kfree_skb(skb);
> +		return 0;
> +	}
> +
> +	if (data->vlan_tci) {
> +		skb->vlan_tci = data->vlan_tci;
> +		skb->vlan_proto = data->vlan_proto;
> +	}
> +
> +	skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
> +	__skb_push(skb, data->encap_size);
> +
> +	return br_dev_queue_push_xmit(skb);
> +}
> +#endif
> +
>  static int br_nf_dev_queue_xmit(struct sk_buff *skb)
>  {
>  	int ret;
>  	int frag_max_size;
> +	unsigned int mtu_reserved;
> +
> +	mtu_reserved = nf_bridge_mtu_reduction(skb);
> +
> +	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu)
> +		return br_dev_queue_push_xmit(skb);
>  
>  	/* This is wrong! We should preserve the original fragment
>  	 * boundaries by preserving frag_list rather than refragmenting.
>  	 */
> -	if (skb->protocol == htons(ETH_P_IP) &&
> -	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
> -	    !skb_is_gso(skb)) {
> +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
> +	if (skb->protocol == htons(ETH_P_IP)) {
>  		frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
>  		if (br_parse_ip_options(skb))
>  			/* Drop invalid packet */
>  			return NF_DROP;
>  		IPCB(skb)->frag_max_size = frag_max_size;
> -		ret = ip_fragment(skb, br_dev_queue_push_xmit);
> -	} else
> -		ret = br_dev_queue_push_xmit(skb);
> +		return ip_fragment(skb, br_dev_queue_push_xmit);
> +	}
> +#endif
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
> +	if (skb->protocol == htons(ETH_P_IPV6)) {
> +		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
> +		struct brnf_frag_data *data;
>  
> -	return ret;
> -}
> -#else
> -static int br_nf_dev_queue_xmit(struct sk_buff *skb)
> -{
> -        return br_dev_queue_push_xmit(skb);
> -}
> +		if (br_validate_ipv6(skb))
> +			goto drop;
> +
> +		IP6CB(skb)->frag_max_size = skb->nf_bridge->frag_max_size;
> +
> +		nf_bridge_update_protocol(skb);
> +
> +		data = this_cpu_ptr(&brnf_frag_data_storage);
> +		data->encap_size = nf_bridge_encap_header_len(skb);
> +		data->size = ETH_HLEN + data->encap_size;
> +
> +		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
> +						 data->size);
> +
> +		if (v6ops)
> +			return v6ops->fragment(skb, br_nf_push_frag_xmit);
> +
> +		kfree_skb(skb);
> +		return -EMSGSIZE;
> +	}
>  #endif
> +	return br_dev_queue_push_xmit(skb);
> +drop:
> +	kfree_skb(skb);
> +	return 0;
> +}
>  
>  /* PF_BRIDGE/POST_ROUTING ********************************************/
>  static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index aea3d13..d9c880d 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -18,6 +18,7 @@
>  #include <linux/netpoll.h>
>  #include <linux/u64_stats_sync.h>
>  #include <net/route.h>
> +#include <net/ip6_fib.h>
>  #include <linux/if_vlan.h>
>  
>  #define BR_HASH_BITS 8
> @@ -214,7 +215,10 @@ struct net_bridge
>  	spinlock_t			hash_lock;
>  	struct hlist_head		hash[BR_HASH_SIZE];
>  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> -	struct rtable 			fake_rtable;
> +	union {
> +		struct rtable		fake_rtable;
> +		struct rt6_info		fake_rt6_info;
> +	};
>  	bool				nf_call_iptables;
>  	bool				nf_call_ip6tables;
>  	bool				nf_call_arptables;
> diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
> index 398377a..3080815 100644
> --- a/net/ipv6/netfilter.c
> +++ b/net/ipv6/netfilter.c
> @@ -191,6 +191,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
>  
>  static const struct nf_ipv6_ops ipv6ops = {
>  	.chk_addr	= ipv6_chk_addr,
> +	.fragment	= ip6_fragment,
>  };
>  
>  static const struct nf_afinfo nf_ip6_afinfo = {
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20151119/d62101cc/attachment.sig>