[<prev] [next>] [day] [month] [year] [list]
Message-Id: <200707081113.10203@auguste.remlab.net>
Date:	Sun, 8 Jul 2007 11:13:09 +0300
From:	Rémi Denis-Courmont <rdenis@...phalempin.com>
To:	David Miller <davem@...emloft.net>, netdev@...r.kernel.org
Subject: [PATCH net-2.6.23 take 3] Per-datagram TTL and TOS via sendmsg()
[Hmm, stupid me. Right this time. Sorry for the line noise.]
This patch adds support for specifying IPv4 Time-To-Live (IP_TTL) and/or 
Type-Of-Service (IP_TOS) values on a per datagram basis through 
sendmsg() ancilliary data. Until then, it only worked for IPv6 sockets 
(using IPV6_HOPLIMIT and IPV6_TCLASS).
Signed-off-by: Rémi Denis-Courmont <rdenis@...phalempin.com>
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 62daf21..7a6dc33 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -140,6 +140,8 @@ struct inet_sock {
 		int			length; /* Total length of all frames */
 		__be32			addr;
 		struct flowi		fl;
+		__s16			ttl;
+		__s16			tos;
 	} cork;
 };
 
diff --git a/include/net/ip.h b/include/net/ip.h
index abf2820..dcfdb41 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -54,6 +54,8 @@ struct ipcm_cookie
 	__be32			addr;
 	int			oif;
 	struct ip_options	*opt;
+	__s16			ttl;
+	__s16			tos;
 };
 
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 02a899b..e10852d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -392,8 +392,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	icmp_param->data.icmph.checksum = 0;
 	icmp_out_count(icmp_param->data.icmph.type);
 
-	inet->tos = ip_hdr(skb)->tos;
 	daddr = ipc.addr = rt->rt_src;
+	ipc.tos = ip_hdr(skb)->tos;
+	ipc.ttl = MULTICAST(daddr) ? inet->mc_ttl : inet->uc_ttl;
 	ipc.opt = NULL;
 	if (icmp_param->replyopts.optlen) {
 		ipc.opt = &icmp_param->replyopts;
@@ -438,7 +439,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	struct rtable *rt = (struct rtable *)skb_in->dst;
 	struct ipcm_cookie ipc;
 	__be32 saddr;
-	u8  tos;
 
 	if (!rt)
 		goto out;
@@ -526,9 +526,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			saddr = 0;
 	}
 
-	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
-					   IPTOS_PREC_INTERNETCONTROL) :
-					  iph->tos;
+	ipc.tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
+					       IPTOS_PREC_INTERNETCONTROL) :
+					      iph->tos;
 
 	if (ip_options_echo(&icmp_param.replyopts, skb_in))
 		goto out_unlock;
@@ -545,7 +545,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.skb	  = skb_in;
 	icmp_param.offset = skb_network_offset(skb_in);
 	icmp_out_count(icmp_param.data.icmph.type);
-	inet_sk(icmp_socket->sk)->tos = tos;
+	ipc.ttl = -1;
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param.replyopts;
 
@@ -557,7 +557,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 						icmp_param.replyopts.faddr :
 						iph->saddr,
 					.saddr = saddr,
-					.tos = RT_TOS(tos)
+					.tos = RT_TOS(ipc.tos)
 				}
 			},
 			.proto = IPPROTO_ICMP,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 34ea454..67ce657 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -806,6 +806,8 @@ int ip_append_data(struct sock *sk,
 					    dst_mtu(rt->u.dst.path);
 		inet->cork.rt = rt;
 		inet->cork.length = 0;
+		inet->cork.ttl = ipc->ttl;
+		inet->cork.tos = ipc->tos;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
 		if ((exthdrlen = rt->u.dst.header_len) != 0) {
@@ -1233,7 +1235,9 @@ int ip_push_pending_frames(struct sock *sk)
 	if (inet->cork.flags & IPCORK_OPT)
 		opt = inet->cork.opt;
 
-	if (rt->rt_type == RTN_MULTICAST)
+	if (inet->cork.ttl != -1)
+		ttl = inet->cork.ttl;
+	else if (rt->rt_type == RTN_MULTICAST)
 		ttl = inet->mc_ttl;
 	else
 		ttl = ip_select_ttl(inet, &rt->u.dst);
@@ -1245,7 +1249,7 @@ int ip_push_pending_frames(struct sock *sk)
 		iph->ihl += opt->optlen>>2;
 		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
 	}
-	iph->tos = inet->tos;
+	iph->tos = (inet->cork.tos != -1) ? inet->cork.tos : inet->tos;
 	iph->tot_len = htons(skb->len);
 	iph->frag_off = df;
 	ip_select_ident(iph, &rt->u.dst, sk);
@@ -1343,6 +1347,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
+	ipc.tos = ip_hdr(skb)->tos;
+	ipc.ttl = inet->uc_ttl;
 
 	if (replyopts.opt.optlen) {
 		ipc.opt = &replyopts.opt;
@@ -1374,7 +1380,6 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 	   with locally disabled BH and that sk cannot be already spinlocked.
 	 */
 	bh_lock_sock(sk);
-	inet->tos = ip_hdr(skb)->tos;
 	sk->sk_priority = skb->priority;
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4d54457..02c47ff 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -190,6 +190,16 @@ int ip_cmsg_send(struct msghdr *msg, struct ipcm_cookie *ipc)
 			ipc->addr = info->ipi_spec_dst.s_addr;
 			break;
 		}
+		case IP_TTL:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+				return -EINVAL;
+			ipc->ttl = *(int *)CMSG_DATA(cmsg);
+			break;
+		case IP_TOS:
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+				return -EINVAL;
+			ipc->tos = *(int *)CMSG_DATA(cmsg);
+			break;
 		default:
 			return -EINVAL;
 		}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 24d7c9f..035bb37 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -436,6 +436,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	ipc.addr = inet->saddr;
 	ipc.opt = NULL;
 	ipc.oif = sk->sk_bound_dev_if;
+	ipc.ttl = MULTICAST(daddr) ? inet->mc_ttl : inet->uc_ttl;
+	ipc.tos = inet->tos;
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(msg, &ipc);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index facb7e2..d7d6a02 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -581,6 +581,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	}
 	ipc.addr = inet->saddr;
 
+	ipc.ttl = MULTICAST(daddr) ? inet->mc_ttl : inet->uc_ttl;
+	ipc.tos = inet->tos;
 	ipc.oif = sk->sk_bound_dev_if;
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(msg, &ipc);
-- 
Rémi Denis-Courmont
http://www.remlab.net/
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists
 
