netdev - Re: [PATCH v2] sctp: Implement quick failover draft from tsvwg

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <50072939.5030600@gmail.com>
Date:	Wed, 18 Jul 2012 17:23:05 -0400
From:	Vlad Yasevich <vyasevich@...il.com>
To:	Neil Horman <nhorman@...driver.com>
CC:	netdev@...r.kernel.org, Sridhar Samudrala <sri@...ibm.com>,
	"David S. Miller" <davem@...emloft.net>, linux-sctp@...r.kernel.org
Subject: Re: [PATCH v2] sctp: Implement quick failover draft from tsvwg

On 07/18/2012 02:01 PM, Neil Horman wrote:
> I've seen several attempts recently made to do quick failover of sctp transports
> by reducing various retransmit timers and counters.  While its possible to
> implement a faster failover on multihomed sctp associations, its not
> particularly robust, in that it can lead to unneeded retransmits, as well as
> false connection failures due to intermittent latency on a network.
>
> Instead, lets implement the new ietf quick failover draft found here:
> http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05
>
> This will let the sctp stack identify transports that have had a small number of
> errors, and avoid using them quickly until their reliability can be
> re-established.  I've tested this out on two virt guests connected via multiple
> isolated virt networks and believe its in compliance with the above draft and
> works well.
>
> Signed-off-by: Neil Horman <nhorman@...driver.com>
> CC: Vlad Yasevich <vyasevich@...il.com>
> CC: Sridhar Samudrala <sri@...ibm.com>
> CC: "David S. Miller" <davem@...emloft.net>
> CC: linux-sctp@...r.kernel.org
>
> ---
> Change notes:
>
> V2)
> - Added socket option API from section 6.1 of the specification, as per
> request from Vlad. Adding this socket option allows us to alter both the path
> maximum retransmit value and the path partial failure threshold for each
> transport and the association as a whole.
>
> - Added a per transport pf_retrans value, and initialized it from the
> association value.  This makes each transport independently configurable as per
> the socket option above, and prevents changes in the sysctl from bleeding into
> an already created association.
> ---
>   Documentation/networking/ip-sysctl.txt |   14 +++++
>   include/net/sctp/constants.h           |    1 +
>   include/net/sctp/structs.h             |   11 +++-
>   include/net/sctp/user.h                |   11 ++++
>   net/sctp/associola.c                   |   36 ++++++++++--
>   net/sctp/outqueue.c                    |    6 +-
>   net/sctp/sm_sideeffect.c               |   33 ++++++++++-
>   net/sctp/socket.c                      |   96 ++++++++++++++++++++++++++++++++
>   net/sctp/sysctl.c                      |    9 +++
>   net/sctp/transport.c                   |    4 +-
>   10 files changed, 206 insertions(+), 15 deletions(-)
>

[ snip ]

> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index b3b8a8d..dfffece 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -3470,6 +3470,52 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
>   }
>
>
> +/*
> + * SCTP_PEER_ADDR_THLDS
> + *
> + * This option allows us to alter the partially failed threshold for one or all
> + * transports in an association.  See Section 6.1 of:
> + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
> + */
> +static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
> +					    char __user *optval,
> +					    unsigned int optlen)
> +{
> +	struct sctp_paddrthlds val;
> +	struct sctp_transport *trans;
> +	struct sctp_association *asoc;
> +
> +	if (optlen < sizeof(struct sctp_paddrthlds))
> +		return -EINVAL;
> +	if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval,
> +			   optlen))
> +		return -EFAULT;

What if optlen is bigger?  You going to trash the stack.

> +
> +	if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
> +		asoc = sctp_id2assoc(sk, val.spt_assoc_id);
> +		if (!asoc)
> +			return -ENOENT;
> +		list_for_each_entry(trans, &asoc->peer.transport_addr_list,
> +				    transports) {
> +			trans->pathmaxrxt = val.spt_pathmaxrxt;
> +			trans->pf_retrans = val.spt_pathpfthld;

You want to make sure that the values aren't 0.  Otherwise, you'll set 
the pathmaxrxt to 0 and that would be bad.

> +		}
> +
> +		asoc->pf_retrans = val.spt_pathpfthld;
> +		asoc->pathmaxrxt = val.spt_pathmaxrxt;

Ditto.

> +	} else {
> +		trans = sctp_addr_id2transport(sk, &val.spt_address,
> +					       val.spt_assoc_id);
> +		if (!trans)
> +			return -ENOENT;
> +
> +		trans->pathmaxrxt = val.spt_pathmaxrxt;
> +		trans->pf_retrans = val.spt_pathpfthld;

Ditto.

> +	}
> +
> +	return 0;
> +}
> +
>   /* API 6.2 setsockopt(), getsockopt()
>    *
>    * Applications use setsockopt() and getsockopt() to set or retrieve
> @@ -3619,6 +3665,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
>   	case SCTP_AUTO_ASCONF:
>   		retval = sctp_setsockopt_auto_asconf(sk, optval, optlen);
>   		break;
> +	case SCTP_PEER_ADDR_THLDS:
> +		retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen);
> +		break;
>   	default:
>   		retval = -ENOPROTOOPT;
>   		break;
> @@ -5490,6 +5539,50 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len,
>   	return 0;
>   }
>
> +/*
> + * SCTP_PEER_ADDR_THLDS
> + *
> + * This option allows us to fetch the partially failed threshold for one or all
> + * transports in an association.  See Section 6.1 of:
> + * http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
> + */
> +static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
> +					    char __user *optval,
> +					    int optlen)
> +{
> +	struct sctp_paddrthlds val;
> +	struct sctp_transport *trans;
> +	struct sctp_association *asoc;
> +
> +	if (optlen < sizeof(struct sctp_paddrthlds))
> +		return -EINVAL;
> +	if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, optlen))
> +		return -EFAULT;

Again, trashing the stack if optlen and optval are bigger.

-vlad
> +
> +	if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
> +			val.spt_assoc_id);
> +		asoc = sctp_id2assoc(sk, val.spt_assoc_id);
> +		if (!asoc)
> +			return -ENOENT;
> +
> +		val.spt_pathpfthld = asoc->pf_retrans;
> +		val.spt_pathmaxrxt = asoc->pathmaxrxt;
> +	} else {
> +		trans = sctp_addr_id2transport(sk, &val.spt_address,
> +					       val.spt_assoc_id);
> +		if (!trans)
> +			return -ENOENT;
> +
> +		val.spt_pathmaxrxt = trans->pathmaxrxt;
> +		val.spt_pathpfthld = trans->pf_retrans;
> +	}
> +
> +	if (copy_to_user(optval, &val, optlen))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>   SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
>   				char __user *optval, int __user *optlen)
>   {
> @@ -5628,6 +5721,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
>   	case SCTP_AUTO_ASCONF:
>   		retval = sctp_getsockopt_auto_asconf(sk, len, optval, optlen);
>   		break;
> +	case SCTP_PEER_ADDR_THLDS:
> +		retval = sctp_getsockopt_paddr_thresholds(sk, optval, len);
> +		break;
>   	default:
>   		retval = -ENOPROTOOPT;
>   		break;
> diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
> index e5fe639..2b2bfe9 100644
> --- a/net/sctp/sysctl.c
> +++ b/net/sctp/sysctl.c
> @@ -141,6 +141,15 @@ static ctl_table sctp_table[] = {
>   		.extra2		= &int_max
>   	},
>   	{
> +		.procname	= "pf_retrans",
> +		.data		= &sctp_pf_retrans,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &int_max
> +	},
> +	{
>   		.procname	= "max_init_retransmits",
>   		.data		= &sctp_max_retrans_init,
>   		.maxlen		= sizeof(int),
> diff --git a/net/sctp/transport.c b/net/sctp/transport.c
> index b026ba0..194d0f3 100644
> --- a/net/sctp/transport.c
> +++ b/net/sctp/transport.c
> @@ -85,6 +85,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
>
>   	/* Initialize the default path max_retrans.  */
>   	peer->pathmaxrxt  = sctp_max_retrans_path;
> +	peer->pf_retrans  = sctp_pf_retrans;
>
>   	INIT_LIST_HEAD(&peer->transmitted);
>   	INIT_LIST_HEAD(&peer->send_ready);
> @@ -585,7 +586,8 @@ unsigned long sctp_transport_timeout(struct sctp_transport *t)
>   {
>   	unsigned long timeout;
>   	timeout = t->rto + sctp_jitter(t->rto);
> -	if (t->state != SCTP_UNCONFIRMED)
> +	if ((t->state != SCTP_UNCONFIRMED) &&
> +	    (t->state != SCTP_PF))
>   		timeout += t->hbinterval;
>   	timeout += jiffies;
>   	return timeout;
>


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html