[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1166659257.5722.17.camel@w-sridhar2.beaverton.ibm.com>
Date: Wed, 20 Dec 2006 16:00:57 -0800
From: Sridhar Samudrala <sri@...ibm.com>
To: davem@...emloft.net
Cc: netdev@...r.kernel.org, lksctp-developers@...ts.sourceforge.net
Subject: [PATCH 3/4][SCTP]: Implement SCTP_FRAGMENT_INTERLEAVE socket
option.
[SCTP]: Implement SCTP_FRAGMENT_INTERLEAVE socket option.
This option was introduced in draft-ietf-tsvwg-sctpsocket-13. It
prevents head-of-line blocking in the case of one-to-many endpoint.
Applications enabling this option really must enable SCTP_SNDRCV event
so that they would know where the data belongs. Based on an
earlier patch by Ivan Skytte Jørgensen.
Signed-off-by: Vlad Yasevich <vladislav.yasevich@...com>
Signed-off-by: Sridhar Samudrala <sri@...ibm.com>
---
include/net/sctp/structs.h | 1 +
include/net/sctp/user.h | 4 ++
net/sctp/socket.c | 75 +++++++++++++++++++++++++++++++++++++++++++-
net/sctp/ulpqueue.c | 30 ++++++++++++------
4 files changed, 97 insertions(+), 13 deletions(-)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c9075d0..7497cec 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -306,6 +306,7 @@ struct sctp_sock {
__u8 disable_fragments;
__u8 pd_mode;
__u8 v4mapped;
+ __u8 frag_interleave;
__u32 adaptation_ind;
/* Receive to here while partial delivery is in effect. */
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 4116b0d..8c1d68c 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -97,6 +97,8 @@ #define SCTP_GET_PEER_ADDR_INFO SCTP_GET
#define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME
SCTP_CONTEXT, /* Receive Context */
#define SCTP_CONTEXT SCTP_CONTEXT
+ SCTP_FRAGMENT_INTERLEAVE,
+#define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE
/* Internal Socket Options. Some of the sctp library functions are
* implemented using these socket options.
@@ -530,7 +532,7 @@ struct sctp_paddrparams {
__u32 spp_flags;
} __attribute__((packed, aligned(4)));
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
*
* This options will get or set the delayed ack timer. The time is set
* in milliseconds. If the assoc_id is 0, then this sets or gets the
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ec70201..91bef1e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2249,7 +2249,7 @@ static int sctp_setsockopt_peer_addr_par
return 0;
}
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
*
* This options will get or set the delayed ack timer. The time is set
* in milliseconds. If the assoc_id is 0, then this sets or gets the
@@ -2786,6 +2786,46 @@ static int sctp_setsockopt_context(struc
return 0;
}
+/*
+ * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ *
+ * This options will at a minimum specify if the implementation is doing
+ * fragmented interleave. Fragmented interleave, for a one to many
+ * socket, is when subsequent calls to receive a message may return
+ * parts of messages from different associations. Some implementations
+ * may allow you to turn this value on or off. If so, when turned off,
+ * no fragment interleave will occur (which will cause a head of line
+ * blocking amongst multiple associations sharing the same one to many
+ * socket). When this option is turned on, then each receive call may
+ * come from a different association (thus the user must receive data
+ * with the extended calls (e.g. sctp_recvmsg) to keep track of which
+ * association each receive belongs to.
+ *
+ * This option takes a boolean value. A non-zero value indicates that
+ * fragmented interleave is on. A value of zero indicates that
+ * fragmented interleave is off.
+ *
+ * Note that it is important that an implementation that allows this
+ * option to be turned on, have it off by default. Otherwise an unaware
+ * application using the one to many model may become confused and act
+ * incorrectly.
+ */
+static int sctp_setsockopt_fragment_interleave(struct sock *sk,
+ char __user *optval,
+ int optlen)
+{
+ int val;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
+
+ sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2900,7 +2940,9 @@ SCTP_STATIC int sctp_setsockopt(struct s
case SCTP_CONTEXT:
retval = sctp_setsockopt_context(sk, optval, optlen);
break;
-
+ case SCTP_FRAGMENT_INTERLEAVE:
+ retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -3130,6 +3172,7 @@ SCTP_STATIC int sctp_init_sock(struct so
/* Control variables for partial data delivery. */
sp->pd_mode = 0;
skb_queue_head_init(&sp->pd_lobby);
+ sp->frag_interleave = 0;
/* Create a per socket endpoint structure. Even if we
* change the data structure relationships, this may still
@@ -4530,6 +4573,30 @@ static int sctp_getsockopt_maxseg(struct
return 0;
}
+/*
+ * 7.1.24. Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
+ */
+static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ int val;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+
+ val = sctp_sk(sk)->frag_interleave;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
+}
+
SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -4642,6 +4709,10 @@ SCTP_STATIC int sctp_getsockopt(struct s
case SCTP_CONTEXT:
retval = sctp_getsockopt_context(sk, len, optval, optlen);
break;
+ case SCTP_FRAGMENT_INTERLEAVE:
+ retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index e1d1442..d1e4ebd 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -184,20 +184,30 @@ int sctp_ulpq_tail_event(struct sctp_ulp
/* If we are in partial delivery mode, post to the lobby until
* partial delivery is cleared, unless, of course _this_ is
- * the association the cause of the partial delivery.
+ * the association that caused partial delivery.
*/
-
if (!sctp_sk(sk)->pd_mode) {
queue = &sk->sk_receive_queue;
- } else if (ulpq->pd_mode) {
- if (event->msg_flags & MSG_NOTIFICATION)
- queue = &sctp_sk(sk)->pd_lobby;
- else {
- clear_pd = event->msg_flags & MSG_EOR;
- queue = &sk->sk_receive_queue;
+ } else {
+ if (ulpq->pd_mode) {
+ if (event->msg_flags & MSG_NOTIFICATION)
+ queue = &sctp_sk(sk)->pd_lobby;
+ else {
+ clear_pd = event->msg_flags & MSG_EOR;
+ queue = &sk->sk_receive_queue;
+ }
+ } else {
+ /*
+ * If fragment interleave is enabled, we
+ * can queue this to the recieve queue instead
+ * of the lobby.
+ */
+ if (sctp_sk(sk)->frag_interleave)
+ queue = &sk->sk_receive_queue;
+ else
+ queue = &sctp_sk(sk)->pd_lobby;
}
- } else
- queue = &sctp_sk(sk)->pd_lobby;
+ }
/* If we are harvesting multiple skbs they will be
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists