diff options
| -rw-r--r-- | Documentation/networking/ip-sysctl.txt | 14 | ||||
| -rw-r--r-- | include/net/ip.h | 3 | ||||
| -rw-r--r-- | include/net/ipv6.h | 3 | ||||
| -rw-r--r-- | include/net/netns/ipv4.h | 2 | ||||
| -rw-r--r-- | include/net/netns/ipv6.h | 1 | ||||
| -rw-r--r-- | net/ipv4/icmp.c | 11 | ||||
| -rw-r--r-- | net/ipv4/ip_output.c | 3 | ||||
| -rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 | ||||
| -rw-r--r-- | net/ipv6/icmp.c | 6 | ||||
| -rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 7 | ||||
| -rw-r--r-- | net/ipv6/tcp_ipv6.c | 1 | 
11 files changed, 55 insertions, 3 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 3458d6343e0..77731bba5c6 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -22,6 +22,13 @@ ip_no_pmtu_disc - BOOLEAN  min_pmtu - INTEGER  	default 552 - minimum discovered Path MTU +fwmark_reflect - BOOLEAN +	Controls the fwmark of kernel-generated IPv4 reply packets that are not +	associated with a socket for example, TCP RSTs or ICMP echo replies). +	If unset, these packets have a fwmark of zero. If set, they have the +	fwmark of the packet they are replying to. +	Default: 0 +  route/max_size - INTEGER  	Maximum number of routes allowed in the kernel.  Increase  	this when using large numbers of interfaces and/or routes. @@ -1087,6 +1094,13 @@ conf/all/forwarding - BOOLEAN  proxy_ndp - BOOLEAN  	Do proxy ndp. +fwmark_reflect - BOOLEAN +	Controls the fwmark of kernel-generated IPv6 reply packets that are not +	associated with a socket for example, TCP RSTs or ICMPv6 echo replies). +	If unset, these packets have a fwmark of zero. If set, they have the +	fwmark of the packet they are replying to. +	Default: 0 +  conf/interface/*:  	Change special settings per interface. diff --git a/include/net/ip.h b/include/net/ip.h index a68f838a132..509b8807927 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -225,6 +225,9 @@ extern void ipfrag_init(void);  extern void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ +	((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) +  static inline bool ip_is_fragment(const struct iphdr *iph)  {  	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ab47582f6c0..cc344ca9d0a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -111,6 +111,9 @@ struct frag_hdr {  #define	IP6_MF	0x0001 +#define IP6_REPLY_MARK(net, mark) \ +	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) +  #include <net/sock.h>  /* sysctls */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2ba9de89e8e..222461a7cc5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -64,6 +64,8 @@ struct netns_ipv4 {  	int sysctl_tcp_ecn; +	int sysctl_fwmark_reflect; +  	kgid_t sysctl_ping_group_range[2];  	long sysctl_tcp_mem[3]; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 005e2c2e39a..4b9f99e3a91 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {  	int ip6_rt_mtu_expires;  	int ip6_rt_min_advmss;  	int icmpv6_time; +	int fwmark_reflect;  };  struct netns_ipv6 { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 562efd91f45..cc38f44306e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)  	struct sock *sk;  	struct inet_sock *inet;  	__be32 daddr, saddr; +	u32 mark = IP4_REPLY_MARK(net, skb->mark);  	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))  		return; @@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)  	icmp_param->data.icmph.checksum = 0;  	inet->tos = ip_hdr(skb)->tos; +	sk->sk_mark = mark;  	daddr = ipc.addr = ip_hdr(skb)->saddr;  	saddr = fib_compute_spec_dst(skb);  	ipc.opt = NULL; @@ -361,6 +363,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)  	memset(&fl4, 0, sizeof(fl4));  	fl4.daddr = daddr;  	fl4.saddr = saddr; +	fl4.flowi4_mark = mark;  	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);  	fl4.flowi4_proto = IPPROTO_ICMP;  	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -379,7 +382,7 @@ static struct rtable *icmp_route_lookup(struct net *net,  					struct flowi4 *fl4,  					struct sk_buff *skb_in,  					const struct iphdr *iph, -					__be32 saddr, u8 tos, +					__be32 saddr, u8 tos, u32 mark,  					int type, int code,  					struct icmp_bxm *param)  { @@ -391,6 +394,7 @@ static struct rtable *icmp_route_lookup(struct net *net,  	fl4->daddr = (param->replyopts.opt.opt.srr ?  		      param->replyopts.opt.opt.faddr : iph->saddr);  	fl4->saddr = saddr; +	fl4->flowi4_mark = mark;  	fl4->flowi4_tos = RT_TOS(tos);  	fl4->flowi4_proto = IPPROTO_ICMP;  	fl4->fl4_icmp_type = type; @@ -488,6 +492,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)  	struct flowi4 fl4;  	__be32 saddr;  	u8  tos; +	u32 mark;  	struct net *net;  	struct sock *sk; @@ -584,6 +589,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)  	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |  					   IPTOS_PREC_INTERNETCONTROL) :  					  iph->tos; +	mark = IP4_REPLY_MARK(net, skb_in->mark);  	if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in))  		goto out_unlock; @@ -600,11 +606,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)  	icmp_param.skb	  = skb_in;  	icmp_param.offset = skb_network_offset(skb_in);  	inet_sk(sk)->tos = tos; +	sk->sk_mark = mark;  	ipc.addr = iph->saddr;  	ipc.opt = &icmp_param.replyopts.opt;  	ipc.tx_flags = 0; -	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, +	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,  			       type, code, &icmp_param);  	if (IS_ERR(rt))  		goto out_unlock; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4bcabf3ab4c..c2ee385cecf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1497,7 +1497,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,  			daddr = replyopts.opt.opt.faddr;  	} -	flowi4_init_output(&fl4, arg->bound_dev_if, 0, +	flowi4_init_output(&fl4, arg->bound_dev_if, +			   IP4_REPLY_MARK(net, skb->mark),  			   RT_TOS(arg->tos),  			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,  			   ip_reply_arg_flowi_flags(arg), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f9bb5d7488e..e6cdcb32b33 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -859,6 +859,13 @@ static struct ctl_table ipv4_net_table[] = {  		.mode		= 0644,  		.proc_handler	= ipv4_tcp_mem,  	}, +	{ +		.procname	= "fwmark_reflect", +		.data		= &init_net.ipv4.sysctl_fwmark_reflect, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= proc_dointvec, +	},  	{ }  }; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 1d2902e6178..28da4003e84 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -397,6 +397,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	int len;  	int hlimit;  	int err = 0; +	u32 mark = IP6_REPLY_MARK(net, skb->mark);  	if ((u8 *)hdr < skb->head ||  	    (skb->network_header + sizeof(*hdr)) > skb->tail) @@ -462,6 +463,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	fl6.daddr = hdr->saddr;  	if (saddr)  		fl6.saddr = *saddr; +	fl6.flowi6_mark = mark;  	fl6.flowi6_oif = iif;  	fl6.fl6_icmp_type = type;  	fl6.fl6_icmp_code = code; @@ -470,6 +472,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	sk = icmpv6_xmit_lock(net);  	if (sk == NULL)  		return; +	sk->sk_mark = mark;  	np = inet6_sk(sk);  	if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)  	struct dst_entry *dst;  	int err = 0;  	int hlimit; +	u32 mark = IP6_REPLY_MARK(net, skb->mark);  	saddr = &ipv6_hdr(skb)->daddr; @@ -567,11 +571,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)  		fl6.saddr = *saddr;  	fl6.flowi6_oif = skb->dev->ifindex;  	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; +	fl6.flowi6_mark = mark;  	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));  	sk = icmpv6_xmit_lock(net);  	if (sk == NULL)  		return; +	sk->sk_mark = mark;  	np = inet6_sk(sk);  	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e85c48bd404..53a9f5a6453 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -24,6 +24,13 @@ static ctl_table ipv6_table_template[] = {  		.mode		= 0644,  		.proc_handler	= proc_dointvec  	}, +	{ +		.procname	= "fwmark_reflect", +		.data		= &init_net.ipv6.sysctl.fwmark_reflect, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= proc_dointvec +	},  	{ }  }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf3..71545cb17ab 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -791,6 +791,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,  	fl6.flowi6_proto = IPPROTO_TCP;  	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)  		fl6.flowi6_oif = inet6_iif(skb); +	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);  	fl6.fl6_dport = t1->dest;  	fl6.fl6_sport = t1->source;  	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));  |