lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e1d7c2208ea0ec2aa6836bf4db5bf0c2bd9e4b86.camel@infradead.org>
Date: Fri, 28 Nov 2025 13:00:36 +0000
From: David Woodhouse <dwmw2@...radead.org>
To: "Chalios, Babis" <bchalios@...zon.es>, "richardcochran@...il.com"
 <richardcochran@...il.com>, "andrew+netdev@...n.ch"
 <andrew+netdev@...n.ch>,  "davem@...emloft.net" <davem@...emloft.net>,
 "edumazet@...gle.com" <edumazet@...gle.com>,  "kuba@...nel.org"
 <kuba@...nel.org>, "pabeni@...hat.com" <pabeni@...hat.com>, 
 "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
 "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Cc: "Graf (AWS), Alexander" <graf@...zon.de>, "mzxreary@...inter.de"
	 <mzxreary@...inter.de>
Subject: Re: [PATCH 2/2] ptp: vmclock: support device notifications

On Thu, 2025-11-27 at 10:32 +0000, Chalios, Babis wrote:
> Add optional support for device notifications in VMClock. When
> supported, the hypervisor will send a device notification every time it
> updates the seq_count to a new even value.
> 
> Moreover, add support for poll() in VMClock as a means to propagate this
> notification to user space. poll() will return a POLLIN event to
> listeners every time seq_count changes to a value different than the one
> last seen (since open() or last read()/pread()). This means that when
> poll() returns a POLLIN event, listeners need to use read() to observe
> what has changed and update the reader's view of seq_count. In other
> words, after a poll() returned, all subsequent calls to poll() will
> immediately return with a POLLIN event until the listener calls read().
> 
> The device advertises support for the notification mechanism by setting
> flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If
> the flag is not present the driver won't setup the ACPI notification
> handler and poll() will always immediately return POLLHUP.
> 
> Signed-off-by: Babis Chalios <bchalios@...zon.es>

Generally looks sane to me, thanks.

I haven't given much brain power to whether POLLHUP is the right thing
to return when poll() is invalid; I guess you have.

I also haven't looked hard into the locking on fst->seq which is
accessed during poll() and read(). Have you?

Your vmclock_setup_notification() function can return error, but you
ignore that. Which *might* have been intentional, to allow the device
to be used even without notifications if something goes wrong... but
then the condition for poll() returning POLLHUP is wrong, because that
only checks the flag that the hypervisor set, and not whether
notifications are *actually* working.

In open() you simply read seq_count from the vmclock structure but it
might be odd at that point. Do we want to wait for it to be even, like
read() does? Or just initialise fst->seq to zero?

And is there really no devm-like helper which will free your
fp->private_data for you on release()? That seems surprising.

> ---
>  drivers/ptp/ptp_vmclock.c        | 113 +++++++++++++++++++++++++++++--
>  include/uapi/linux/vmclock-abi.h |   5 ++
>  2 files changed, 113 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c
> index b3a83b03d9c1..fa515375d54f 100644
> --- a/drivers/ptp/ptp_vmclock.c
> +++ b/drivers/ptp/ptp_vmclock.c
> @@ -5,6 +5,9 @@
>   * Copyright © 2024 Amazon.com, Inc. or its affiliates.
>   */
>  
> +#include "linux/poll.h"
> +#include "linux/types.h"
> +#include "linux/wait.h"
>  #include <linux/acpi.h>
>  #include <linux/device.h>
>  #include <linux/err.h>
> @@ -39,6 +42,7 @@ struct vmclock_state {
>  	struct resource res;
>  	struct vmclock_abi *clk;
>  	struct miscdevice miscdev;
> +	wait_queue_head_t disrupt_wait;
>  	struct ptp_clock_info ptp_clock_info;
>  	struct ptp_clock *ptp_clock;
>  	enum clocksource_ids cs_id, sys_cs_id;
> @@ -357,10 +361,15 @@ static struct ptp_clock *vmclock_ptp_register(struct device *dev,
>  	return ptp_clock_register(&st->ptp_clock_info, dev);
>  }
>  
> +struct vmclock_file_state {
> +	struct vmclock_state *st;
> +	uint32_t seq;
> +};
> +
>  static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
>  {
> -	struct vmclock_state *st = container_of(fp->private_data,
> -						struct vmclock_state, miscdev);
> +	struct vmclock_file_state *fst = fp->private_data;
> +	struct vmclock_state *st = fst->st;
>  
>  	if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ)
>  		return -EROFS;
> @@ -379,8 +388,9 @@ static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
>  static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
>  				    size_t count, loff_t *ppos)
>  {
> -	struct vmclock_state *st = container_of(fp->private_data,
> -						struct vmclock_state, miscdev);
> +	struct vmclock_file_state *fst = fp->private_data;
> +	struct vmclock_state *st = fst->st;
> +
>  	ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
>  	size_t max_count;
>  	uint32_t seq;
> @@ -402,8 +412,10 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
>  
>  		/* Pairs with hypervisor wmb */
>  		virt_rmb();
> -		if (seq == le32_to_cpu(st->clk->seq_count))
> +		if (seq == le32_to_cpu(st->clk->seq_count)) {
> +			fst->seq = seq;
>  			break;
> +		}
>  
>  		if (ktime_after(ktime_get(), deadline))
>  			return -ETIMEDOUT;
> @@ -413,10 +425,58 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
>  	return count;
>  }
>  
> +static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait)
> +{
> +	struct vmclock_file_state *fst = fp->private_data;
> +	struct vmclock_state *st = fst->st;
> +	uint32_t seq;
> +
> +	/*
> +	 * Hypervisor will not send us any notifications, so fail immediately
> +	 * to avoid having caller sleeping for ever.
> +	 */
> +	if (!(st->clk->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
> +		return POLLHUP;
> +
> +	poll_wait(fp, &st->disrupt_wait, wait);
> +
> +	seq = le32_to_cpu(st->clk->seq_count);
> +	if (fst->seq != seq)
> +		return POLLIN | POLLRDNORM;
> +
> +	return 0;
> +}
> +
> +static int vmclock_miscdev_open(struct inode *inode, struct file *fp)
> +{
> +	struct vmclock_state *st = container_of(fp->private_data,
> +						struct vmclock_state, miscdev);
> +	struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL);
> +
> +	if (!fst)
> +		return -ENOMEM;
> +
> +	fst->st = st;
> +	fst->seq = le32_to_cpu(st->clk->seq_count);
> +
> +	fp->private_data = fst;
> +
> +	return 0;
> +}
> +
> +static int vmclock_miscdev_release(struct inode *inode, struct file *fp)
> +{
> +	kfree(fp->private_data);
> +	return 0;
> +}
> +
>  static const struct file_operations vmclock_miscdev_fops = {
>  	.owner = THIS_MODULE,
> +	.open = vmclock_miscdev_open,
> +	.release = vmclock_miscdev_release,
>  	.mmap = vmclock_miscdev_mmap,
>  	.read = vmclock_miscdev_read,
> +	.poll = vmclock_miscdev_poll,
>  };
>  
>  /* module operations */
> @@ -459,6 +519,44 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data
>  	return AE_ERROR;
>  }
>  
> +static void
> +vmclock_acpi_notification_handler(acpi_handle __always_unused handle,
> +				  u32 __always_unused event, void *dev)
> +{
> +	struct device *device = dev;
> +	struct vmclock_state *st = device->driver_data;
> +
> +	wake_up_interruptible(&st->disrupt_wait);
> +}
> +
> +static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st)
> +{
> +	struct acpi_device *adev = ACPI_COMPANION(dev);
> +	acpi_status status;
> +
> +	/*
> +	 * This should never happen as this function is only called when
> +	 * has_acpi_companion(dev) is true, but the logic is sufficiently
> +	 * complex that Coverity can't see the tautology.
> +	 */
> +	if (!adev)
> +		return -ENODEV;
> +
> +	/* The device does not support notifications. Nothing else to do */
> +	if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
> +		return 0;
> +
> +	status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
> +					     vmclock_acpi_notification_handler,
> +					     dev);
> +	if (ACPI_FAILURE(status)) {
> +		dev_err(dev, "failed to install notification handler");
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}
> +
>  static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st)
>  {
>  	struct acpi_device *adev = ACPI_COMPANION(dev);
> @@ -549,6 +647,9 @@ static int vmclock_probe(struct platform_device *pdev)
>  	if (ret)
>  		return ret;
>  
> +	init_waitqueue_head(&st->disrupt_wait);
> +	vmclock_setup_notification(dev, st);
> +
>  	/*
>  	 * If the structure is big enough, it can be mapped to userspace.
>  	 * Theoretically a guest OS even using larger pages could still
> @@ -581,6 +682,8 @@ static int vmclock_probe(struct platform_device *pdev)
>  		return -ENODEV;
>  	}
>  
> +	dev->driver_data = st;
> +
>  	dev_info(dev, "%s: registered %s%s%s\n", st->name,
>  		 st->miscdev.minor ? "miscdev" : "",
>  		 (st->miscdev.minor && st->ptp_clock) ? ", " : "",
> diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h
> index 937fe00e4f33..d320623b0118 100644
> --- a/include/uapi/linux/vmclock-abi.h
> +++ b/include/uapi/linux/vmclock-abi.h
> @@ -121,6 +121,11 @@ struct vmclock_abi {
>  	 * loaded from some save state (restored from a snapshot).
>  	 */
>  #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT     (1 << 8)
> +	/*
> +	 * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send
> +	 * a notification every time it updates seq_count to a new even number.
> +	 */
> +#define VMCLOCK_FLAG_NOTIFICATION_PRESENT       (1 << 9)
>  
>  	__u8 pad[2];
>  	__u8 clock_status;


Download attachment "smime.p7s" of type "application/pkcs7-signature" (5069 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ