[SRU][Vivid][PATCH] netfilter: bridge: forward IPv6 fragmented packets
Stefan Bader
stefan.bader at canonical.com
Thu Nov 19 12:27:29 UTC 2015
On 18.11.2015 01:48, Jay Vosburgh wrote:
>
> From: Bernhard Thaler <bernhard.thaler at wvnet.at>
>
> BugLink: https://bugs.launchpad.net/nova/+bug/1463911
>
> IPv6 fragmented packets are not forwarded on an ethernet bridge
> with netfilter ip6_tables loaded. e.g. steps to reproduce
>
> 1) create a simple bridge like this
>
> modprobe br_netfilter
> brctl addbr br0
> brctl addif br0 eth0
> brctl addif br0 eth2
> ifconfig eth0 up
> ifconfig eth2 up
> ifconfig br0 up
>
> 2) place a host with an IPv6 address on each side of the bridge
>
> set IPv6 address on host A:
> ip -6 addr add fd01:2345:6789:1::1/64 dev eth0
>
> set IPv6 address on host B:
> ip -6 addr add fd01:2345:6789:1::2/64 dev eth0
>
> 3) run a simple ping command on host A with packets > MTU
>
> ping6 -s 4000 fd01:2345:6789:1::2
>
> 4) wait some time and run e.g. "ip6tables -t nat -nvL" on the bridge
>
> IPv6 fragmented packets traverse the bridge cleanly until somebody runs.
> "ip6tables -t nat -nvL". As soon as it is run (and netfilter modules are
> loaded) IPv6 fragmented packets do not traverse the bridge any more (you
> see no more responses in ping's output).
>
> After applying this patch IPv6 fragmented packets traverse the bridge
> cleanly in above scenario.
>
> Signed-off-by: Bernhard Thaler <bernhard.thaler at wvnet.at>
> [pablo at netfilter.org: small changes to br_nf_dev_queue_xmit]
> Signed-off-by: Pablo Neira Ayuso <pablo at netfilter.org>
> (backported from commit efb6de9b4ba0092b2c55f6a52d16294a8a698edd)
> (backported from commit e70deecbf8e1562cac0b19f23848919e2f5d65aa)
> (backported from commit d7b597421519d6f680eb8e152a0d8447466ee2d6)
Maybe that was already discussed somewhere but would it not be better to split
the SRU backports into their individual upstream commits?
-Stefan
> Signed-off-by: Jay Vosburgh <jay.vosburgh at canonical.com>
> ---
> include/linux/netfilter_ipv6.h | 1 +
> include/linux/skbuff.h | 1 +
> net/bridge/br_netfilter.c | 185 ++++++++++++++++++++++++++++++++---------
> net/bridge/br_private.h | 6 +-
> net/ipv6/netfilter.c | 1 +
> 5 files changed, 155 insertions(+), 39 deletions(-)
>
> diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
> index 64dad1cc..7c832ba 100644
> --- a/include/linux/netfilter_ipv6.h
> +++ b/include/linux/netfilter_ipv6.h
> @@ -25,6 +25,7 @@ void ipv6_netfilter_fini(void);
> struct nf_ipv6_ops {
> int (*chk_addr)(struct net *net, const struct in6_addr *addr,
> const struct net_device *dev, int strict);
> + int (*fragment)(struct sk_buff *skb, int (*output)(struct sk_buff *));
> };
>
> extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 5fcc606..db71429 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -163,6 +163,7 @@ struct nf_conntrack {
> struct nf_bridge_info {
> atomic_t use;
> unsigned int mask;
> + __u16 frag_max_size;
> struct net_device *physindev;
> struct net_device *physoutdev;
> unsigned long data[32 / sizeof(unsigned long)];
> diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
> index ba2bd0a..cb62adf 100644
> --- a/net/bridge/br_netfilter.c
> +++ b/net/bridge/br_netfilter.c
> @@ -34,6 +34,7 @@
>
> #include <net/ip.h>
> #include <net/ipv6.h>
> +#include <net/addrconf.h>
> #include <net/route.h>
> #include <net/netfilter/br_netfilter.h>
>
> @@ -112,6 +113,71 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
> pppoe_proto(skb) == htons(PPP_IPV6) && \
> brnf_filter_pppoe_tagged)
>
> +/* largest possible L2 header, see br_nf_dev_queue_xmit() */
> +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
> +
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
> +struct brnf_frag_data {
> + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
> + u8 encap_size;
> + u8 size;
> + u16 vlan_tci;
> + __be16 vlan_proto;
> +};
> +
> +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
> +#endif
> +
> +static int check_hbh_len(struct sk_buff *skb);
> +
> +static int br_validate_ipv6(struct sk_buff *skb)
> +{
> + const struct ipv6hdr *hdr;
> + struct net_device *dev = skb->dev;
> + struct inet6_dev *idev = __in6_dev_get(skb->dev);
> + u32 pkt_len;
> + u8 ip6h_len = sizeof(struct ipv6hdr);
> +
> + if (!pskb_may_pull(skb, ip6h_len))
> + goto inhdr_error;
> +
> + if (skb->len < ip6h_len)
> + goto drop;
> +
> + hdr = ipv6_hdr(skb);
> +
> + if (hdr->version != 6)
> + goto inhdr_error;
> +
> + pkt_len = ntohs(hdr->payload_len);
> +
> + if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
> + if (pkt_len + ip6h_len > skb->len) {
> + IP6_INC_STATS_BH(dev_net(dev), idev,
> + IPSTATS_MIB_INTRUNCATEDPKTS);
> + goto drop;
> + }
> + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
> + IP6_INC_STATS_BH(dev_net(dev), idev,
> + IPSTATS_MIB_INDISCARDS);
> + goto drop;
> + }
> + }
> + if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
> + goto drop;
> +
> + memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
> + /* No IP options in IPv6 header; however it should be
> + * checked if some next headers need special treatment
> + */
> + return 0;
> +
> +inhdr_error:
> + IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
> +drop:
> + return -1;
> +}
> +
> static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
> {
> struct net_bridge_port *port;
> @@ -247,6 +313,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
> struct nf_bridge_info *nf_bridge = skb->nf_bridge;
> struct rtable *rt;
>
> + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
> +
> if (nf_bridge->mask & BRNF_PKT_TYPE) {
> skb->pkt_type = PACKET_OTHERHOST;
> nf_bridge->mask ^= BRNF_PKT_TYPE;
> @@ -524,36 +592,15 @@ bad:
> }
>
> /* Replicate the checks that IPv6 does on packet reception and pass the packet
> - * to ip6tables, which doesn't support NAT, so things are fairly simple. */
> + * to ip6tables.
> + */
> static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
> struct sk_buff *skb,
> const struct net_device *in,
> const struct net_device *out,
> int (*okfn)(struct sk_buff *))
> {
> - const struct ipv6hdr *hdr;
> - u32 pkt_len;
> -
> - if (skb->len < sizeof(struct ipv6hdr))
> - return NF_DROP;
> -
> - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
> - return NF_DROP;
> -
> - hdr = ipv6_hdr(skb);
> -
> - if (hdr->version != 6)
> - return NF_DROP;
> -
> - pkt_len = ntohs(hdr->payload_len);
> -
> - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
> - if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
> - return NF_DROP;
> - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
> - return NF_DROP;
> - }
> - if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
> + if (br_validate_ipv6(skb))
> return NF_DROP;
>
> nf_bridge_put(skb->nf_bridge);
> @@ -658,6 +705,9 @@ static int br_nf_forward_finish(struct sk_buff *skb)
> BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
> }
>
> + if (skb->protocol == htons(ETH_P_IPV6))
> + nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
> +
> in = nf_bridge->physindev;
> if (nf_bridge->mask & BRNF_PKT_TYPE) {
> skb->pkt_type = PACKET_OTHERHOST;
> @@ -728,6 +778,13 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
>
> /* The physdev module checks on this */
> nf_bridge->mask |= BRNF_BRIDGED;
> +
> + if (pf == NFPROTO_IPV6) {
> + if (br_validate_ipv6(skb))
> + return NF_DROP;
> + IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
> + }
> +
> nf_bridge->physoutdev = skb->dev;
> if (pf == NFPROTO_IPV4)
> skb->protocol = htons(ETH_P_IP);
> @@ -776,35 +833,87 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
> return NF_STOLEN;
> }
>
> -#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
> +static int br_nf_push_frag_xmit(struct sk_buff *skb)
> +{
> + struct brnf_frag_data *data;
> + int err;
> +
> + data = this_cpu_ptr(&brnf_frag_data_storage);
> + err = skb_cow_head(skb, data->size);
> +
> + if (err) {
> + kfree_skb(skb);
> + return 0;
> + }
> +
> + if (data->vlan_tci) {
> + skb->vlan_tci = data->vlan_tci;
> + skb->vlan_proto = data->vlan_proto;
> + }
> +
> + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
> + __skb_push(skb, data->encap_size);
> +
> + return br_dev_queue_push_xmit(skb);
> +}
> +#endif
> +
> static int br_nf_dev_queue_xmit(struct sk_buff *skb)
> {
> int ret;
> int frag_max_size;
> + unsigned int mtu_reserved;
> +
> + mtu_reserved = nf_bridge_mtu_reduction(skb);
> +
> + if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu)
> + return br_dev_queue_push_xmit(skb);
>
> /* This is wrong! We should preserve the original fragment
> * boundaries by preserving frag_list rather than refragmenting.
> */
> - if (skb->protocol == htons(ETH_P_IP) &&
> - skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
> - !skb_is_gso(skb)) {
> +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
> + if (skb->protocol == htons(ETH_P_IP)) {
> frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
> if (br_parse_ip_options(skb))
> /* Drop invalid packet */
> return NF_DROP;
> IPCB(skb)->frag_max_size = frag_max_size;
> - ret = ip_fragment(skb, br_dev_queue_push_xmit);
> - } else
> - ret = br_dev_queue_push_xmit(skb);
> + return ip_fragment(skb, br_dev_queue_push_xmit);
> + }
> +#endif
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
> + if (skb->protocol == htons(ETH_P_IPV6)) {
> + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
> + struct brnf_frag_data *data;
>
> - return ret;
> -}
> -#else
> -static int br_nf_dev_queue_xmit(struct sk_buff *skb)
> -{
> - return br_dev_queue_push_xmit(skb);
> -}
> + if (br_validate_ipv6(skb))
> + goto drop;
> +
> + IP6CB(skb)->frag_max_size = skb->nf_bridge->frag_max_size;
> +
> + nf_bridge_update_protocol(skb);
> +
> + data = this_cpu_ptr(&brnf_frag_data_storage);
> + data->encap_size = nf_bridge_encap_header_len(skb);
> + data->size = ETH_HLEN + data->encap_size;
> +
> + skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
> + data->size);
> +
> + if (v6ops)
> + return v6ops->fragment(skb, br_nf_push_frag_xmit);
> +
> + kfree_skb(skb);
> + return -EMSGSIZE;
> + }
> #endif
> + return br_dev_queue_push_xmit(skb);
> +drop:
> + kfree_skb(skb);
> + return 0;
> +}
>
> /* PF_BRIDGE/POST_ROUTING ********************************************/
> static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index aea3d13..d9c880d 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -18,6 +18,7 @@
> #include <linux/netpoll.h>
> #include <linux/u64_stats_sync.h>
> #include <net/route.h>
> +#include <net/ip6_fib.h>
> #include <linux/if_vlan.h>
>
> #define BR_HASH_BITS 8
> @@ -214,7 +215,10 @@ struct net_bridge
> spinlock_t hash_lock;
> struct hlist_head hash[BR_HASH_SIZE];
> #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
> - struct rtable fake_rtable;
> + union {
> + struct rtable fake_rtable;
> + struct rt6_info fake_rt6_info;
> + };
> bool nf_call_iptables;
> bool nf_call_ip6tables;
> bool nf_call_arptables;
> diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
> index 398377a..3080815 100644
> --- a/net/ipv6/netfilter.c
> +++ b/net/ipv6/netfilter.c
> @@ -191,6 +191,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
>
> static const struct nf_ipv6_ops ipv6ops = {
> .chk_addr = ipv6_chk_addr,
> + .fragment = ip6_fragment,
> };
>
> static const struct nf_afinfo nf_ip6_afinfo = {
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <https://lists.ubuntu.com/archives/kernel-team/attachments/20151119/d62101cc/attachment.sig>
More information about the kernel-team
mailing list