[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <50072939.5030600@gmail.com>
Date: Wed, 18 Jul 2012 17:23:05 -0400
From: Vlad Yasevich <vyasevich@...il.com>
To: Neil Horman <nhorman@...driver.com>
CC: netdev@...r.kernel.org, Sridhar Samudrala <sri@...ibm.com>,
"David S. Miller" <davem@...emloft.net>, linux-sctp@...r.kernel.org
Subject: Re: [PATCH v2] sctp: Implement quick failover draft from tsvwg
On 07/18/2012 02:01 PM, Neil Horman wrote:
> I've seen several attempts recently made to do quick failover of sctp transports
> by reducing various retransmit timers and counters. While its possible to
> implement a faster failover on multihomed sctp associations, its not
> particularly robust, in that it can lead to unneeded retransmits, as well as
> false connection failures due to intermittent latency on a network.
>
> Instead, lets implement the new ietf quick failover draft found here:
> http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05
>
> This will let the sctp stack identify transports that have had a small number of
> errors, and avoid using them quickly until their reliability can be
> re-established. I've tested this out on two virt guests connected via multiple
> isolated virt networks and believe its in compliance with the above draft and
> works well.
>
> Signed-off-by: Neil Horman <nhorman@...driver.com>
> CC: Vlad Yasevich <vyasevich@...il.com>
> CC: Sridhar Samudrala <sri@...ibm.com>
> CC: "David S. Miller" <davem@...emloft.net>
> CC: linux-sctp@...r.kernel.org
>
> ---
> Change notes:
>
> V2)
> - Added socket option API from section 6.1 of the specification, as per
> request from Vlad. Adding this socket option allows us to alter both the path
> maximum retransmit value and the path partial failure threshold for each
> transport and the association as a whole.
>
> - Added a per transport pf_retrans value, and initialized it from the
> association value. This makes each transport independently configurable as per
> the socket option above, and prevents changes in the sysctl from bleeding into
> an already created association.
> ---
> Documentation/networking/ip-sysctl.txt | 14 +++++
> include/net/sctp/constants.h | 1 +
> include/net/sctp/structs.h | 11 +++-
> include/net/sctp/user.h | 11 ++++
> net/sctp/associola.c | 36 ++++++++++--
> net/sctp/outqueue.c | 6 +-
> net/sctp/sm_sideeffect.c | 33 ++++++++++-
> net/sctp/socket.c | 96 ++++++++++++++++++++++++++++++++
> net/sctp/sysctl.c | 9 +++
> net/sctp/transport.c | 4 +-
> 10 files changed, 206 insertions(+), 15 deletions(-)
>
[ snip ]
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index b3b8a8d..dfffece 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -3470,6 +3470,52 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
> }
>
>
> +/*
> + * SCTP_PEER_ADDR_THLDS
> + *
> + * This option allows us to alter the partially failed threshold for one or all
> + * transports in an association. See Section 6.1 of:
> + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
> + */
> +static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
> + char __user *optval,
> + unsigned int optlen)
> +{
> + struct sctp_paddrthlds val;
> + struct sctp_transport *trans;
> + struct sctp_association *asoc;
> +
> + if (optlen < sizeof(struct sctp_paddrthlds))
> + return -EINVAL;
> + if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval,
> + optlen))
> + return -EFAULT;
What if optlen is bigger? You going to trash the stack.
> +
> + if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
> + asoc = sctp_id2assoc(sk, val.spt_assoc_id);
> + if (!asoc)
> + return -ENOENT;
> + list_for_each_entry(trans, &asoc->peer.transport_addr_list,
> + transports) {
> + trans->pathmaxrxt = val.spt_pathmaxrxt;
> + trans->pf_retrans = val.spt_pathpfthld;
You want to make sure that the values aren't 0. Otherwise, you'll set
the pathmaxrxt to 0 and that would be bad.
> + }
> +
> + asoc->pf_retrans = val.spt_pathpfthld;
> + asoc->pathmaxrxt = val.spt_pathmaxrxt;
Ditto.
> + } else {
> + trans = sctp_addr_id2transport(sk, &val.spt_address,
> + val.spt_assoc_id);
> + if (!trans)
> + return -ENOENT;
> +
> + trans->pathmaxrxt = val.spt_pathmaxrxt;
> + trans->pf_retrans = val.spt_pathpfthld;
Ditto.
> + }
> +
> + return 0;
> +}
> +
> /* API 6.2 setsockopt(), getsockopt()
> *
> * Applications use setsockopt() and getsockopt() to set or retrieve
> @@ -3619,6 +3665,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
> case SCTP_AUTO_ASCONF:
> retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
> break;
> + case SCTP_PEER_ADDR_THLDS:
> + retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen);
> + break;
> default:
> retval = -ENOPROTOOPT;
> break;
> @@ -5490,6 +5539,50 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
> return 0;
> }
>
> +/*
> + * SCTP_PEER_ADDR_THLDS
> + *
> + * This option allows us to fetch the partially failed threshold for one or all
> + * transports in an association. See Section 6.1 of:
> + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
> + */
> +static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
> + char __user *optval,
> + int optlen)
> +{
> + struct sctp_paddrthlds val;
> + struct sctp_transport *trans;
> + struct sctp_association *asoc;
> +
> + if (optlen < sizeof(struct sctp_paddrthlds))
> + return -EINVAL;
> + if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, optlen))
> + return -EFAULT;
Again, trashing the stack if optlen and optval are bigger.
-vlad
> +
> + if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
> + val.spt_assoc_id);
> + asoc = sctp_id2assoc(sk, val.spt_assoc_id);
> + if (!asoc)
> + return -ENOENT;
> +
> + val.spt_pathpfthld = asoc->pf_retrans;
> + val.spt_pathmaxrxt = asoc->pathmaxrxt;
> + } else {
> + trans = sctp_addr_id2transport(sk, &val.spt_address,
> + val.spt_assoc_id);
> + if (!trans)
> + return -ENOENT;
> +
> + val.spt_pathmaxrxt = trans->pathmaxrxt;
> + val.spt_pathpfthld = trans->pf_retrans;
> + }
> +
> + if (copy_to_user(optval, &val, optlen))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
> char __user *optval, int __user *optlen)
> {
> @@ -5628,6 +5721,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
> case SCTP_AUTO_ASCONF:
> retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen);
> break;
> + case SCTP_PEER_ADDR_THLDS:
> + retval = sctp_getsockopt_paddr_thresholds(sk, optval, len);
> + break;
> default:
> retval = -ENOPROTOOPT;
> break;
> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
> index e5fe639..2b2bfe9 100644
> --- a/net/sctp/sysctl.c
> +++ b/net/sctp/sysctl.c
> @@ -141,6 +141,15 @@ static ctl_table sctp_table[] = {
> .extra2 = &int_max
> },
> {
> + .procname = "pf_retrans",
> + .data = &sctp_pf_retrans,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = &zero,
> + .extra2 = &int_max
> + },
> + {
> .procname = "max_init_retransmits",
> .data = &sctp_max_retrans_init,
> .maxlen = sizeof(int),
> diff --git a/net/sctp/transport.c b/net/sctp/transport.c
> index b026ba0..194d0f3 100644
> --- a/net/sctp/transport.c
> +++ b/net/sctp/transport.c
> @@ -85,6 +85,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
>
> /* Initialize the default path max_retrans. */
> peer->pathmaxrxt = sctp_max_retrans_path;
> + peer->pf_retrans = sctp_pf_retrans;
>
> INIT_LIST_HEAD(&peer->transmitted);
> INIT_LIST_HEAD(&peer->send_ready);
> @@ -585,7 +586,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *t)
> {
> unsigned long timeout;
> timeout = t->rto + sctp_jitter(t->rto);
> - if (t->state != SCTP_UNCONFIRMED)
> + if ((t->state != SCTP_UNCONFIRMED) &&
> + (t->state != SCTP_PF))
> timeout += t->hbinterval;
> timeout += jiffies;
> return timeout;
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists