lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20180411080536.sw6tesgi3vqgxxpn@mwanda>
Date:   Wed, 11 Apr 2018 11:05:36 +0300
From:   Dan Carpenter <dan.carpenter@...cle.com>
To:     Alistair Strachan <astrachan@...gle.com>
Cc:     linux-kernel@...r.kernel.org, gregkh@...uxfoundation.org,
        arve@...roid.com, tkjos@...roid.com, maco@...roid.com,
        devel@...verdev.osuosl.org, kernel-team@...roid.com,
        ghartman@...gle.com
Subject: Re: [PATCH] staging: Android: Add 'vsoc' driver for cuttlefish.

On Tue, Apr 10, 2018 at 12:06:47PM -0700, Alistair Strachan wrote:
> +static int do_create_fd_scoped_permission(
> +	struct vsoc_device_region *region_p,
> +	struct fd_scoped_permission_node *np,
> +	struct fd_scoped_permission_arg *__user arg)
> +{
> +	struct file *managed_filp;
> +	s32 managed_fd;
> +	atomic_t *owner_ptr = NULL;
> +	struct vsoc_device_region *managed_region_p;
> +
> +	if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) ||
> +	    copy_from_user(&managed_fd,
> +			   &arg->managed_region_fd, sizeof(managed_fd))) {
> +		return -EFAULT;
> +	}
> +	managed_filp = fdget(managed_fd).file;
> +	/* Check that it's a valid fd, */
> +	if (!managed_filp || vsoc_validate_filep(managed_filp))
> +		return -EPERM;
> +	/* EEXIST if the given fd already has a permission. */
> +	if (((struct vsoc_private_data *)managed_filp->private_data)->
> +	    fd_scoped_permission_node)
> +		return -EEXIST;
> +	managed_region_p = vsoc_region_from_filep(managed_filp);
> +	/* Check that the provided region is managed by this one */
> +	if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p)
> +		return -EPERM;
> +	/* The area must be well formed and have non-zero size */
> +	if (np->permission.begin_offset >= np->permission.end_offset)
> +		return -EINVAL;
> +	/* The area must fit in the memory window */
> +	if (np->permission.end_offset >
> +	    vsoc_device_region_size(managed_region_p))
> +		return -ERANGE;
> +	/* The area must be in the region data section */
> +	if (np->permission.begin_offset <
> +	    managed_region_p->offset_of_region_data)
> +		return -ERANGE;
> +	/* The area must be page aligned */
> +	if (!PAGE_ALIGNED(np->permission.begin_offset) ||
> +	    !PAGE_ALIGNED(np->permission.end_offset))
> +		return -EINVAL;
> +	/* The owner flag must reside in the owner memory */
> +	if (np->permission.owner_offset + sizeof(np->permission.owner_offset) >
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
There is a potential integer overflow here

> +	    vsoc_device_region_size(region_p))
> +		return -ERANGE;
> +	/* The owner flag must reside in the data section */
> +	if (np->permission.owner_offset < region_p->offset_of_region_data)
> +		return -EINVAL;
> +	/* Owner offset must be naturally aligned in the window */
> +	if (np->permission.owner_offset &
> +	    (sizeof(np->permission.owner_offset) - 1))
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
but then we prevent it here so that's fine.  But it would be better to
reverse these checks so that it's easier to review.

> +		return -EINVAL;
> +	/* The owner value must change to claim the memory */
> +	if (np->permission.owned_value == VSOC_REGION_FREE)
> +		return -EINVAL;
> +	owner_ptr =
> +	    (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset +
> +						np->permission.owner_offset);
> +	/* We've already verified that this is in the shared memory window, so
> +	 * it should be safe to write to this address.
> +	 */
> +	if (atomic_cmpxchg(owner_ptr,
> +			   VSOC_REGION_FREE,
> +			   np->permission.owned_value) != VSOC_REGION_FREE) {
> +		return -EBUSY;
> +	}
> +	((struct vsoc_private_data *)managed_filp->private_data)->
> +	    fd_scoped_permission_node = np;
> +	/* The file offset needs to be adjusted if the calling
> +	 * process did any read/write operations on the fd
> +	 * before creating the permission.
> +	 */
> +	if (managed_filp->f_pos) {
> +		if (managed_filp->f_pos > np->permission.end_offset) {
> +			/* If the offset is beyond the permission end, set it
> +			 * to the end.
> +			 */
> +			managed_filp->f_pos = np->permission.end_offset;
> +		} else {
> +			/* If the offset is within the permission interval
> +			 * keep it there otherwise reset it to zero.
> +			 */
> +			if (managed_filp->f_pos < np->permission.begin_offset) {
> +				managed_filp->f_pos = 0;
> +			} else {
> +				managed_filp->f_pos -=
> +				    np->permission.begin_offset;
> +			}
> +		}
> +	}
> +	return 0;
> +}
> +

[ snip ]

> +
> +/**
> + * Implements the inner logic of cond_wait. Copies to and from userspace are
> + * done in the helper function below.
> + */
> +static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
> +{
> +	DEFINE_WAIT(wait);
> +	u32 region_number = iminor(file_inode(filp));
> +	struct vsoc_region_data *data = vsoc_dev.regions_data + region_number;
> +	struct hrtimer_sleeper timeout, *to = NULL;
> +	int ret = 0;
> +	struct vsoc_device_region *region_p = vsoc_region_from_filep(filp);
> +	atomic_t *address = NULL;
> +	struct timespec ts;
> +
> +	/* Ensure that the offset is aligned */
> +	if (arg->offset & (sizeof(uint32_t) - 1))
> +		return -EADDRNOTAVAIL;
> +	/* Ensure that the offset is within shared memory */
> +	if (((uint64_t)arg->offset) + region_p->region_begin_offset +
> +	    sizeof(uint32_t) > region_p->region_end_offset)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This could overflow.

> +		return -E2BIG;
> +	address = shm_off_to_virtual_addr(region_p->region_begin_offset +
> +					  arg->offset);
> +
> +	/* Ensure that the type of wait is valid */
> +	switch (arg->wait_type) {
> +	case VSOC_WAIT_IF_EQUAL:
> +		break;
> +	case VSOC_WAIT_IF_EQUAL_TIMEOUT:
> +		to = &timeout;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (to) {
> +		/* Copy the user-supplied timesec into the kernel structure.
> +		 * We do things this way to flatten differences between 32 bit
> +		 * and 64 bit timespecs.
> +		 */
> +		ts.tv_sec = arg->wake_time_sec;
> +		ts.tv_nsec = arg->wake_time_nsec;
> +
> +		if (!timespec_valid(&ts))
> +			return -EINVAL;
> +		hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC,
> +				      HRTIMER_MODE_ABS);
> +		hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts),
> +					     current->timer_slack_ns);
> +
> +		hrtimer_init_sleeper(to, current);
> +	}
> +
> +	while (1) {
> +		prepare_to_wait(&data->futex_wait_queue, &wait,
> +				TASK_INTERRUPTIBLE);
> +		/*
> +		 * Check the sentinel value after prepare_to_wait. If the value
> +		 * changes after this check the writer will call signal,
> +		 * changing the task state from INTERRUPTIBLE to RUNNING. That
> +		 * will ensure that schedule() will eventually schedule this
> +		 * task.
> +		 */
> +		if (atomic_read(address) != arg->value) {
> +			ret = 0;
> +			break;
> +		}
> +		if (to) {
> +			hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
> +			if (likely(to->task))
> +				freezable_schedule();
> +			hrtimer_cancel(&to->timer);
> +			if (!to->task) {
> +				ret = -ETIMEDOUT;
> +				break;
> +			}
> +		} else {
> +			freezable_schedule();
> +		}
> +		/* Count the number of times that we woke up. This is useful
> +		 * for unit testing.
> +		 */
> +		++arg->wakes;
> +		if (signal_pending(current)) {
> +			ret = -EINTR;
> +			break;
> +		}
> +	}
> +	finish_wait(&data->futex_wait_queue, &wait);
> +	if (to)
> +		destroy_hrtimer_on_stack(&to->timer);
> +	return ret;
> +}
> +

[ snip ]

> +
> +static int vsoc_probe_device(struct pci_dev *pdev,
> +			     const struct pci_device_id *ent)
> +{
> +	int result;
> +	int i;
> +	resource_size_t reg_size;
> +	dev_t devt;
> +
> +	vsoc_dev.dev = pdev;
> +	result = pci_enable_device(pdev);
> +	if (result) {
> +		dev_err(&pdev->dev,
> +			"pci_enable_device failed %s: error %d\n",
> +			pci_name(pdev), result);
> +		return result;
> +	}
> +	vsoc_dev.enabled_device = 1;
> +	result = pci_request_regions(pdev, "vsoc");
> +	if (result < 0) {
> +		dev_err(&pdev->dev, "pci_request_regions failed\n");
> +		vsoc_remove_device(pdev);
> +		return -EBUSY;
> +	}
> +	vsoc_dev.requested_regions = 1;
> +	/* Set up the control registers in BAR 0 */
> +	reg_size = pci_resource_len(pdev, REGISTER_BAR);
> +	if (reg_size > MAX_REGISTER_BAR_LEN)
> +		vsoc_dev.regs =
> +		    pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN);
> +	else
> +		vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size);
> +
> +	if (!vsoc_dev.regs) {
> +		dev_err(&pdev->dev,
> +			"cannot ioremap registers of size %zu\n",
> +		       (size_t)reg_size);
> +		vsoc_remove_device(pdev);
> +		return -EBUSY;
> +	}
> +
> +	/* Map the shared memory in BAR 2 */
> +	vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR);
> +	vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR);
> +
> +	dev_info(&pdev->dev, "shared memory @ DMA %p size=0x%zx\n",
> +		 (void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
> +	/* TODO(ghartman): ioremap_wc should work here */
> +	vsoc_dev.kernel_mapped_shm = ioremap_nocache(
> +			vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
> +	if (!vsoc_dev.kernel_mapped_shm) {
> +		dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n");
> +		vsoc_remove_device(pdev);
> +		return -EBUSY;
> +	}
> +
> +	vsoc_dev.layout =
> +	    (struct vsoc_shm_layout_descriptor *)vsoc_dev.kernel_mapped_shm;
> +	dev_info(&pdev->dev, "major_version: %d\n",
> +		 vsoc_dev.layout->major_version);
> +	dev_info(&pdev->dev, "minor_version: %d\n",
> +		 vsoc_dev.layout->minor_version);
> +	dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size);
> +	dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count);
> +	if (vsoc_dev.layout->major_version !=
> +	    CURRENT_VSOC_LAYOUT_MAJOR_VERSION) {
> +		dev_err(&vsoc_dev.dev->dev,
> +			"driver supports only major_version %d\n",
> +			CURRENT_VSOC_LAYOUT_MAJOR_VERSION);
> +		vsoc_remove_device(pdev);
> +		return -EBUSY;
> +	}
> +	result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count,
> +				     VSOC_DEV_NAME);
> +	if (result) {
> +		dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n");
> +		vsoc_remove_device(pdev);
> +		return -EBUSY;
> +	}
> +	vsoc_dev.major = MAJOR(devt);
> +	cdev_init(&vsoc_dev.cdev, &vsoc_ops);
> +	vsoc_dev.cdev.owner = THIS_MODULE;
> +	result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count);
> +	if (result) {
> +		dev_err(&vsoc_dev.dev->dev, "cdev_add error\n");
> +		vsoc_remove_device(pdev);
> +		return -EBUSY;
> +	}
> +	vsoc_dev.cdev_added = 1;
> +	vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME);
> +	if (!vsoc_dev.class) {
            ^^^^^^^^^^^^^^^
This should be if (IS_ERR(vsoc_dev.class)) {.  The test in
vsoc_remove_device() needs to be updated as well.

> +		dev_err(&vsoc_dev.dev->dev, "class_create failed\n");
> +		vsoc_remove_device(pdev);
> +		return -EBUSY;
> +	}
> +	vsoc_dev.regions = (struct vsoc_device_region *)
> +		(vsoc_dev.kernel_mapped_shm +
> +		 vsoc_dev.layout->vsoc_region_desc_offset);
> +	vsoc_dev.msix_entries = kcalloc(
> +			vsoc_dev.layout->region_count,
> +			sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL);
> +	if (!vsoc_dev.msix_entries) {
> +		dev_err(&vsoc_dev.dev->dev,
> +			"unable to allocate msix_entries\n");
> +		vsoc_remove_device(pdev);
> +		return -ENOSPC;
> +	}
> +	vsoc_dev.regions_data = kcalloc(
> +			vsoc_dev.layout->region_count,
> +			sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL);
> +	if (!vsoc_dev.regions_data) {
> +		dev_err(&vsoc_dev.dev->dev,
> +			"unable to allocate regions' data\n");
> +		vsoc_remove_device(pdev);
> +		return -ENOSPC;
> +	}
> +	for (i = 0; i < vsoc_dev.layout->region_count; ++i)
> +		vsoc_dev.msix_entries[i].entry = i;
> +
> +	result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries,
> +				       vsoc_dev.layout->region_count);
> +	if (result) {
> +		dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result);
> +		vsoc_remove_device(pdev);
> +		return -ENOSPC;
> +	}
> +	/* Check that all regions are well formed */
> +	for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
> +		const struct vsoc_device_region *region = vsoc_dev.regions + i;
> +
> +		if (!PAGE_ALIGNED(region->region_begin_offset) ||
> +		    !PAGE_ALIGNED(region->region_end_offset)) {
> +			dev_err(&vsoc_dev.dev->dev,
> +				"region %d not aligned (%x:%x)", i,
> +				region->region_begin_offset,
> +				region->region_end_offset);
> +			vsoc_remove_device(pdev);
> +			return -EFAULT;
> +		}
> +		if (region->region_begin_offset >= region->region_end_offset ||
> +		    region->region_end_offset > vsoc_dev.shm_size) {
> +			dev_err(&vsoc_dev.dev->dev,
> +				"region %d offsets are wrong: %x %x %zx",
> +				i, region->region_begin_offset,
> +				region->region_end_offset, vsoc_dev.shm_size);
> +			vsoc_remove_device(pdev);
> +			return -EFAULT;
> +		}
> +		if (region->managed_by >= vsoc_dev.layout->region_count) {
> +			dev_err(&vsoc_dev.dev->dev,
> +				"region %d has invalid owner: %u",
> +				i, region->managed_by);
> +			vsoc_remove_device(pdev);
> +			return -EFAULT;
> +		}
> +	}
> +	vsoc_dev.msix_enabled = 1;
> +	for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
> +		const struct vsoc_device_region *region = vsoc_dev.regions + i;
> +		size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1;
> +		const struct vsoc_signal_table_layout *h_to_g_signal_table =
> +			&region->host_to_guest_signal_table;
> +		const struct vsoc_signal_table_layout *g_to_h_signal_table =
> +			&region->guest_to_host_signal_table;
> +
> +		vsoc_dev.regions_data[i].name[name_sz] = '\0';
> +		memcpy(vsoc_dev.regions_data[i].name, region->device_name,
> +		       name_sz);
> +		dev_info(&pdev->dev, "region %d name=%s\n",
> +			 i, vsoc_dev.regions_data[i].name);
> +		init_waitqueue_head(
> +				&vsoc_dev.regions_data[i].interrupt_wait_queue);
> +		init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue);
> +		vsoc_dev.regions_data[i].incoming_signalled =
> +			vsoc_dev.kernel_mapped_shm +
> +			region->region_begin_offset +
> +			h_to_g_signal_table->interrupt_signalled_offset;
> +		vsoc_dev.regions_data[i].outgoing_signalled =
> +			vsoc_dev.kernel_mapped_shm +
> +			region->region_begin_offset +
> +			g_to_h_signal_table->interrupt_signalled_offset;
> +
> +		result = request_irq(
> +				vsoc_dev.msix_entries[i].vector,
> +				vsoc_interrupt, 0,
> +				vsoc_dev.regions_data[i].name,
> +				vsoc_dev.regions_data + i);
> +		if (result) {
> +			dev_info(&pdev->dev,
> +				 "request_irq failed irq=%d vector=%d\n",
> +				i, vsoc_dev.msix_entries[i].vector);
> +			vsoc_remove_device(pdev);
> +			return -ENOSPC;
> +		}
> +		vsoc_dev.regions_data[i].irq_requested = 1;
> +		if (!device_create(vsoc_dev.class, NULL,
> +				   MKDEV(vsoc_dev.major, i),
> +				   NULL, vsoc_dev.regions_data[i].name)) {
> +			dev_err(&vsoc_dev.dev->dev, "device_create failed\n");
> +			vsoc_remove_device(pdev);
> +			return -EBUSY;
> +		}
> +		vsoc_dev.regions_data[i].device_created = 1;
> +	}
> +	return 0;
> +}
> +
> +/*
> + * This should undo all of the allocations in the probe function in reverse
> + * order.
> + *
> + * Notes:
> + *
> + *   The device may have been partially initialized, so double check
> + *   that the allocations happened.
> + *
> + *   This function may be called multiple times, so mark resources as freed
> + *   as they are deallocated.
> + */
> +static void vsoc_remove_device(struct pci_dev *pdev)
> +{
> +	int i;
> +	/*
> +	 * pdev is the first thing to be set on probe and the last thing
> +	 * to be cleared here. If it's NULL then there is no cleanup.
> +	 */
> +	if (!pdev || !vsoc_dev.dev)
> +		return;
> +	dev_info(&pdev->dev, "remove_device\n");
> +	if (vsoc_dev.regions_data) {
> +		for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
> +			if (vsoc_dev.regions_data[i].device_created) {
> +				device_destroy(vsoc_dev.class,
> +					       MKDEV(vsoc_dev.major, i));
> +				vsoc_dev.regions_data[i].device_created = 0;
> +			}
> +			if (vsoc_dev.regions_data[i].irq_requested)
> +				free_irq(vsoc_dev.msix_entries[i].vector, NULL);
> +			vsoc_dev.regions_data[i].irq_requested = 0;
> +		}
> +		kfree(vsoc_dev.regions_data);
> +		vsoc_dev.regions_data = 0;
> +	}
> +	if (vsoc_dev.msix_enabled) {
> +		pci_disable_msix(pdev);
> +		vsoc_dev.msix_enabled = 0;
> +	}
> +	kfree(vsoc_dev.msix_entries);
> +	vsoc_dev.msix_entries = 0;
> +	vsoc_dev.regions = 0;
> +	if (vsoc_dev.class) {
> +		class_destroy(vsoc_dev.class);
> +		vsoc_dev.class = 0;
> +	}
> +	if (vsoc_dev.cdev_added) {
> +		cdev_del(&vsoc_dev.cdev);
> +		vsoc_dev.cdev_added = 0;
> +	}
> +	if (vsoc_dev.major && vsoc_dev.layout) {
> +		unregister_chrdev_region(MKDEV(vsoc_dev.major, 0),
> +					 vsoc_dev.layout->region_count);
> +		vsoc_dev.major = 0;
> +	}
> +	vsoc_dev.layout = 0;
> +	if (vsoc_dev.kernel_mapped_shm && pdev) {
                                          ^^^^
These tests can be removed since we checked at the start of the function.

> +		pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm);
> +		vsoc_dev.kernel_mapped_shm = 0;
> +	}
> +	if (vsoc_dev.regs && pdev) {
                             ^^^^
> +		pci_iounmap(pdev, vsoc_dev.regs);
> +		vsoc_dev.regs = 0;
> +	}
> +	if (vsoc_dev.requested_regions && pdev) {
                                          ^^^^
> +		pci_release_regions(pdev);
> +		vsoc_dev.requested_regions = 0;
> +	}
> +	if (vsoc_dev.enabled_device && pdev) {
                                       ^^^^
> +		pci_disable_device(pdev);
> +		vsoc_dev.enabled_device = 0;
> +	}
> +	/* Do this last: it indicates that the device is not initialized. */
> +	vsoc_dev.dev = NULL;
> +}
> +

regards,
dan carpenter

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ