lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160818081907.GA845@dhcp-128-65.nay.redhat.com>
Date:	Thu, 18 Aug 2016 16:19:46 +0800
From:	Dave Young <dyoung@...hat.com>
To:	Thiago Jung Bauermann <bauerman@...ux.vnet.ibm.com>
Cc:	kexec@...ts.infradead.org,
	Stewart Smith <stewart@...ux.vnet.ibm.com>,
	Mark Rutland <mark.rutland@....com>,
	Benjamin Herrenschmidt <benh@...nel.crashing.org>,
	Arnd Bergmann <arnd@...db.de>, Baoquan He <bhe@...hat.com>,
	linuxppc-dev@...ts.ozlabs.org, linux-kernel@...r.kernel.org,
	Vivek Goyal <vgoyal@...hat.com>,
	AKASHI Takahiro <takahiro.akashi@...aro.org>,
	David Laight <David.Laight@...LAB.COM>,
	Eric Biederman <ebiederm@...ssion.com>,
	Michael Ellerman <mpe@...erman.id.au>,
	Russell King - ARM Linux <linux@...linux.org.uk>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-arm-kernel@...ts.infradead.org
Subject: Re: [PATCH v2 2/2] kexec: extend kexec_file_load system call

Since Eric was objecting the extension, I think you should convince him,
but I will review from code point of view.

On 08/11/16 at 08:03pm, Thiago Jung Bauermann wrote:
> From: AKASHI Takahiro <takahiro.akashi@...aro.org>
> 
> Device tree blob must be passed to a second kernel on DTB-capable
> archs, like powerpc and arm64, but the current kernel interface
> lacks this support.
> 
> This patch extends kexec_file_load system call by adding an extra
> argument to this syscall so that an arbitrary number of file descriptors
> can be handed out from user space to the kernel.
> 
> 	long sys_kexec_file_load(int kernel_fd, int initrd_fd,
> 				 unsigned long cmdline_len,
> 				 const char __user *cmdline_ptr,
> 				 unsigned long flags,
> 				 const struct kexec_fdset __user *ufdset);
> 
> If KEXEC_FILE_EXTRA_FDS is set to the "flags" argument, the "ufdset"
> argument points to the following struct buffer:
> 
> 	struct kexec_fdset {
> 		int nr_fds;
> 		struct kexec_file_fd fds[0];
> 	}
> 
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@...aro.org>
> Signed-off-by: Thiago Jung Bauermann <bauerman@...ux.vnet.ibm.com>
> ---
>  include/linux/fs.h         |  1 +
>  include/linux/kexec.h      |  7 ++--
>  include/linux/syscalls.h   |  4 ++-
>  include/uapi/linux/kexec.h | 22 ++++++++++++
>  kernel/kexec_file.c        | 83 ++++++++++++++++++++++++++++++++++++++++++----
>  5 files changed, 108 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 3523bf62f328..847d9c31f428 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2656,6 +2656,7 @@ extern int do_pipe_flags(int *, int);
>  	id(MODULE, kernel-module)		\
>  	id(KEXEC_IMAGE, kexec-image)		\
>  	id(KEXEC_INITRAMFS, kexec-initramfs)	\
> +	id(KEXEC_PARTIAL_DTB, kexec-partial-dtb)		\
>  	id(POLICY, security-policy)		\
>  	id(MAX_ID, )
>  
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 4f85d284ed0b..29202935055d 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -148,7 +148,10 @@ struct kexec_file_ops {
>  	kexec_verify_sig_t *verify_sig;
>  #endif
>  };
> -#endif
> +
> +int __weak arch_kexec_verify_buffer(enum kexec_file_type type, const void *buf,
> +				    unsigned long size);
> +#endif /* CONFIG_KEXEC_FILE */
>  
>  struct kimage {
>  	kimage_entry_t head;
> @@ -280,7 +283,7 @@ extern int kexec_load_disabled;
>  
>  /* List of defined/legal kexec file flags */
>  #define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
> -				 KEXEC_FILE_NO_INITRAMFS)
> +				 KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_EXTRA_FDS)
>  
>  #define VMCOREINFO_BYTES           (4096)
>  #define VMCOREINFO_NOTE_NAME       "VMCOREINFO"
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index d02239022bd0..fc072bdb74e3 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -66,6 +66,7 @@ struct perf_event_attr;
>  struct file_handle;
>  struct sigaltstack;
>  union bpf_attr;
> +struct kexec_fdset;
>  
>  #include <linux/types.h>
>  #include <linux/aio_abi.h>
> @@ -321,7 +322,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
>  asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
>  				    unsigned long cmdline_len,
>  				    const char __user *cmdline_ptr,
> -				    unsigned long flags);
> +				    unsigned long flags,
> +				    const struct kexec_fdset __user *ufdset);
>  
>  asmlinkage long sys_exit(int error_code);
>  asmlinkage long sys_exit_group(int error_code);
> diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
> index aae5ebf2022b..6279be79efba 100644
> --- a/include/uapi/linux/kexec.h
> +++ b/include/uapi/linux/kexec.h
> @@ -23,6 +23,28 @@
>  #define KEXEC_FILE_UNLOAD	0x00000001
>  #define KEXEC_FILE_ON_CRASH	0x00000002
>  #define KEXEC_FILE_NO_INITRAMFS	0x00000004
> +#define KEXEC_FILE_EXTRA_FDS	0x00000008
> +
> +enum kexec_file_type {
> +	KEXEC_FILE_TYPE_KERNEL,
> +	KEXEC_FILE_TYPE_INITRAMFS,
> +
> +	/*
> +	 * Device Tree Blob containing just the nodes and properties that
> +	 * the kexec_file_load caller wants to add or modify.
> +	 */
> +	KEXEC_FILE_TYPE_PARTIAL_DTB,
> +};
> +
> +struct kexec_file_fd {
> +	enum kexec_file_type type;
> +	int fd;
> +};
> +
> +struct kexec_fdset {
> +	int nr_fds;
> +	struct kexec_file_fd fds[0];
> +};
>  
>  /* These values match the ELF architecture values.
>   * Unless there is a good reason that should continue to be the case.
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index 113af2f219b9..d6803dd884e2 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -25,6 +25,9 @@
>  #include <linux/vmalloc.h>
>  #include "kexec_internal.h"
>  
> +#define MAX_FDSET_SIZE	(sizeof(struct kexec_fdset) + \
> +				KEXEC_SEGMENT_MAX * sizeof(struct kexec_file_fd))

How about use KEXEC_EXTRA_FD_MAX = 1, so only allow passing one fd for
now. In the future if there's other requirement we can extend the
internal value.

> +
>  /*
>   * Declare these symbols weak so that if architecture provides a purgatory,
>   * these will be overridden.
> @@ -116,6 +119,22 @@ void kimage_file_post_load_cleanup(struct kimage *image)
>  	image->image_loader_data = NULL;
>  }
>  
> +/**
> + * arch_kexec_verify_buffer() - check that the given kexec file is valid
> + *
> + * Device trees in particular can contain properties that may make the kernel
> + * execute code that it wasn't supposed to (e.g., use the wrong entry point
> + * when calling firmware functions). Because of this, the kernel needs to
> + * verify that it is safe to use the device tree blob passed from userspace.
> + *
> + * Return: 0 on success, negative errno on error.
> + */
> +int __weak arch_kexec_verify_buffer(enum kexec_file_type type, const void *buf,
> +				    unsigned long size)
> +{
> +	return -EINVAL;
> +}
> +
>  /*
>   * In file mode list of segments is prepared by kernel. Copy relevant
>   * data from user space, do error checking, prepare segment list
> @@ -123,7 +142,8 @@ void kimage_file_post_load_cleanup(struct kimage *image)
>  static int
>  kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
>  			     const char __user *cmdline_ptr,
> -			     unsigned long cmdline_len, unsigned flags)
> +			     unsigned long cmdline_len, unsigned long flags,
> +			     const struct kexec_fdset __user *ufdset)
>  {
>  	int ret = 0;
>  	void *ldata;
> @@ -160,6 +180,55 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
>  		image->initrd_buf_len = size;
>  	}
>  
> +	if (flags & KEXEC_FILE_EXTRA_FDS) {
> +		int nr_fds, i;
> +		size_t fdset_size;
> +		char fdset_buf[MAX_FDSET_SIZE];
> +		struct kexec_fdset *fdset = (struct kexec_fdset *) fdset_buf;
> +
> +		ret = copy_from_user(&nr_fds, ufdset, sizeof(int));
> +		if (ret) {
> +			ret = -EFAULT;
> +			goto out;
> +		}
> +
> +		if (nr_fds > KEXEC_SEGMENT_MAX) {
> +			ret = -E2BIG;
> +			goto out;
> +		}
> +
> +		fdset_size = sizeof(struct kexec_fdset)
> +				+ nr_fds * sizeof(struct kexec_file_fd);
> +
> +		ret = copy_from_user(fdset, ufdset, fdset_size);
> +		if (ret) {
> +			ret = -EFAULT;
> +			goto out;
> +		}
> +
> +		for (i = 0; i < fdset->nr_fds; i++) {
> +			if (fdset->fds[i].type == KEXEC_FILE_TYPE_PARTIAL_DTB) {
> +				ret = kernel_read_file_from_fd(fdset->fds[i].fd,
> +						&image->dtb_buf, &size, INT_MAX,
> +						READING_KEXEC_PARTIAL_DTB);
> +				if (ret)
> +					goto out;
> +				image->dtb_buf_len = size;
> +
> +				ret = arch_kexec_verify_buffer(KEXEC_FILE_TYPE_PARTIAL_DTB,
> +							       image->dtb_buf,
> +							       image->dtb_buf_len);
> +				if (ret)
> +					goto out;
> +			} else {
> +				pr_debug("unknown file type %d failed.\n",
> +						fdset->fds[i].type);

Should be pr_err

> +				ret = -EINVAL;
> +				goto out;
> +			}
> +		}
> +	}
> +
>  	if (cmdline_len) {
>  		image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
>  		if (!image->cmdline_buf) {
> @@ -202,7 +271,8 @@ out:
>  static int
>  kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
>  		       int initrd_fd, const char __user *cmdline_ptr,
> -		       unsigned long cmdline_len, unsigned long flags)
> +		       unsigned long cmdline_len, unsigned long flags,
> +		       const struct kexec_fdset __user *ufdset)
>  {
>  	int ret;
>  	struct kimage *image;
> @@ -221,7 +291,8 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
>  	}
>  
>  	ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
> -					   cmdline_ptr, cmdline_len, flags);
> +					   cmdline_ptr, cmdline_len, flags,
> +					   ufdset);
>  	if (ret)
>  		goto out_free_image;
>  
> @@ -256,9 +327,9 @@ out_free_image:
>  	return ret;
>  }
>  
> -SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
> +SYSCALL_DEFINE6(kexec_file_load, int, kernel_fd, int, initrd_fd,
>  		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
> -		unsigned long, flags)
> +		unsigned long, flags, const struct kexec_fdset __user *, ufdset)
>  {
>  	int ret = 0, i;
>  	struct kimage **dest_image, *image;
> @@ -295,7 +366,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
>  		kimage_free(xchg(&kexec_crash_image, NULL));
>  
>  	ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
> -				     cmdline_len, flags);
> +				     cmdline_len, flags, ufdset);
>  	if (ret)
>  		goto out;
>  
> -- 
> 1.9.1
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@...ts.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec

Thanks
Dave

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ