lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 28 Oct 2010 23:40:50 -0700
From:	Linus Torvalds <torvalds@...ux-foundation.org>
To:	David Miller <davem@...emloft.net>
Cc:	netdev@...r.kernel.org, drosenberg@...curity.com,
	jon.maloy@...csson.com, allan.stephens@...driver.com
Subject: Re: [PATCH] net: Limit socket I/O iovec total length to INT_MAX.

Oh, btw, noticed another small detail..

I don't know if this matters, but the regular read/write routines
don't actually use INT_MAX as the limit, but instead a "maximally
page-aligned value that fits in an int":

   #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)

because the code does _not_ want to turn a nice set of huge
page-aligned big writes into a write of an odd number (2GB-1).

This may not make much of a difference to networking - you guys are
already used to working with odd sizes like 1500 bytes of data payload
per packet etc. Most regular filesystems are much more sensitive to
things like block (and particularly page-cache sized) boundaries
because of the vagaries of disk and cache granularities. But MAX_INT
is a _really_ odd size, and things like csum_and_copy still tends to
want to get things at least word-aligned, no? And if nothing else, the
memory copies tend to be better with cacheline boundaries.

It would be sad if a 4GB aligned write turns into
 - one 2GB-1 aligned write
 - one pessimally unaligned 2G-1 write where every read from user
space is unaligned
 - finally a single 2-byte write.

I suspect it would be better off using the same kind of (MAX_INT &
PAGE_CACHE_MASK) logic - that 4GB write would still get split into
three partial writes (and _lots_ of packets ;), but at least they'd
all be word-aligned.

Does it matter? I dunno. Once you do 2GB+ writes, these kinds of small
details may be totally hidden in all the noise.

                    Linus

On Thu, Oct 28, 2010 at 11:22 AM, David Miller <davem@...emloft.net> wrote:
>
> This helps protect us from overflow issues down in the
> individual protocol sendmsg/recvmsg handlers.  Once
> we hit INT_MAX we truncate out the rest of the iovec
> by setting the iov_len members to zero.
>
> This works because:
>
> 1) For SOCK_STREAM and SOCK_SEQPACKET sockets, partial
>   writes are allowed and the application will just continue
>   with another write to send the rest of the data.
>
> 2) For datagram oriented sockets, where there must be a
>   one-to-one correspondance between write() calls and
>   packets on the wire, INT_MAX is going to be far larger
>   than the packet size limit the protocol is going to
>   check for and signal with -EMSGSIZE.
>
> Based upon a patch by Linus Torvalds.
>
> Signed-off-by: David S. Miller <davem@...emloft.net>
> ---
>
> Ok, this is the patch I am testing right now.  It ought to
> plug the TIPC holes wrt. handling iovecs given by the
> user.
>
> I'll look at the recently discovered RDS crap next :-/
>
>  include/linux/socket.h |    2 +-
>  net/compat.c           |   12 +++++++-----
>  net/core/iovec.c       |   19 +++++++++----------
>  3 files changed, 17 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/socket.h b/include/linux/socket.h
> index 5146b50..86b652f 100644
> --- a/include/linux/socket.h
> +++ b/include/linux/socket.h
> @@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
>                                          int offset,
>                                          unsigned int len, __wsum *csump);
>
> -extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
> +extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
>  extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
>  extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
>                             int offset, int len);
> diff --git a/net/compat.c b/net/compat.c
> index 63d260e..71bfd8e 100644
> --- a/net/compat.c
> +++ b/net/compat.c
> @@ -34,17 +34,19 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
>                                          struct compat_iovec __user *uiov32,
>                                          int niov)
>  {
> -       int tot_len = 0;
> +       size_t tot_len = 0;
>
>        while (niov > 0) {
>                compat_uptr_t buf;
>                compat_size_t len;
>
>                if (get_user(len, &uiov32->iov_len) ||
> -                  get_user(buf, &uiov32->iov_base)) {
> -                       tot_len = -EFAULT;
> -                       break;
> -               }
> +                   get_user(buf, &uiov32->iov_base))
> +                       return -EFAULT;
> +
> +               if (len > INT_MAX - tot_len)
> +                       len = INT_MAX - tot_len;
> +
>                tot_len += len;
>                kiov->iov_base = compat_ptr(buf);
>                kiov->iov_len = (__kernel_size_t) len;
> diff --git a/net/core/iovec.c b/net/core/iovec.c
> index 72aceb1..e7f5b29 100644
> --- a/net/core/iovec.c
> +++ b/net/core/iovec.c
> @@ -35,10 +35,10 @@
>  *     in any case.
>  */
>
> -long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
> +int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
>  {
>        int size, ct;
> -       long err;
> +       size_t err;
>
>        if (m->msg_namelen) {
>                if (mode == VERIFY_READ) {
> @@ -62,14 +62,13 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
>        err = 0;
>
>        for (ct = 0; ct < m->msg_iovlen; ct++) {
> -               err += iov[ct].iov_len;
> -               /*
> -                * Goal is not to verify user data, but to prevent returning
> -                * negative value, which is interpreted as errno.
> -                * Overflow is still possible, but it is harmless.
> -                */
> -               if (err < 0)
> -                       return -EMSGSIZE;
> +               size_t len = iov[ct].iov_len;
> +
> +               if (len > INT_MAX - err) {
> +                       len = INT_MAX - err;
> +                       iov[ct].iov_len = len;
> +               }
> +               err += len;
>        }
>
>        return err;
> --
> 1.7.3.2
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ