lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 1 Jul 2022 07:52:03 -0700
From:   Guenter Roeck <linux@...ck-us.net>
To:     Sebastian Ene <sebastianene@...gle.com>,
        Rob Herring <robh+dt@...nel.org>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Arnd Bergmann <arnd@...db.de>,
        Dragan Cvetic <dragan.cvetic@...inx.com>
Cc:     linux-kernel@...r.kernel.org, devicetree@...r.kernel.org,
        maz@...nel.org, will@...nel.org, vdonnefort@...gle.com
Subject: Re: [PATCH v9 2/2] misc: Add a mechanism to detect stalls on guest
 vCPUs

On 7/1/22 07:40, Sebastian Ene wrote:
> This driver creates per-cpu hrtimers which are required to do the
> periodic 'pet' operation. On a conventional watchdog-core driver, the
> userspace is responsible for delivering the 'pet' events by writing to
> the particular /dev/watchdogN node. In this case we require a strong
> thread affinity to be able to account for lost time on a per vCPU.
> 
> This part of the driver is the 'frontend' which is reponsible for
> delivering the periodic 'pet' events, configuring the virtual peripheral
> and listening for cpu hotplug events. The other part of the driver is
> an emulated MMIO device which is part of the KVM virtual machine
> monitor and this part accounts for lost time by looking at the
> /proc/{}/task/{}/stat entries.
> 
> Signed-off-by: Sebastian Ene <sebastianene@...gle.com>

Reviewed-by: Guenter Roeck <linux@...ck-us.net>

> ---
>   drivers/misc/Kconfig               |  13 ++
>   drivers/misc/Makefile              |   1 +
>   drivers/misc/vcpu_stall_detector.c | 212 +++++++++++++++++++++++++++++
>   3 files changed, 226 insertions(+)
>   create mode 100644 drivers/misc/vcpu_stall_detector.c
> 
> diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> index 41d2bb0ae23a..83afb41a85cf 100644
> --- a/drivers/misc/Kconfig
> +++ b/drivers/misc/Kconfig
> @@ -483,6 +483,19 @@ config OPEN_DICE
>   
>   	  If unsure, say N.
>   
> +config VCPU_STALL_DETECTOR
> +	tristate "VCPU stall detector"
> +	select LOCKUP_DETECTOR
> +	depends on OF
> +	help
> +	  Detect CPU locks on a kvm virtual machine. This driver relies on
> +	  the hrtimers which are CPU-binded to do the 'pet' operation. When a
> +	  vCPU has to do a 'pet', it exits the guest through MMIO write and
> +	  the backend driver takes into account the lost ticks for this
> +	  particular CPU.
> +	  To compile this driver as a module, choose M here: the
> +	  module will be called vcpu_stall_detector.
> +
>   source "drivers/misc/c2port/Kconfig"
>   source "drivers/misc/eeprom/Kconfig"
>   source "drivers/misc/cb710/Kconfig"
> diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
> index 70e800e9127f..2be8542616dd 100644
> --- a/drivers/misc/Makefile
> +++ b/drivers/misc/Makefile
> @@ -60,3 +60,4 @@ obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
>   obj-$(CONFIG_HISI_HIKEY_USB)	+= hisi_hikey_usb.o
>   obj-$(CONFIG_HI6421V600_IRQ)	+= hi6421v600-irq.o
>   obj-$(CONFIG_OPEN_DICE)		+= open-dice.o
> +obj-$(CONFIG_VCPU_STALL_DETECTOR)	+= vcpu_stall_detector.o
> \ No newline at end of file
> diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
> new file mode 100644
> index 000000000000..039ac54564c1
> --- /dev/null
> +++ b/drivers/misc/vcpu_stall_detector.c
> @@ -0,0 +1,212 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//
> +// VCPU stall detector.
> +//  Copyright (C) Google, 2022
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/nmi.h>
> +#include <linux/of.h>
> +#include <linux/of_device.h>
> +#include <linux/param.h>
> +#include <linux/percpu.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +
> +#define REG_STATUS		(0x00)
> +#define REG_LOAD_CNT		(0x04)
> +#define REG_CURRENT_CNT		(0x08)
> +#define REG_CLOCK_FREQ_HZ	(0x0C)
> +#define REG_LEN			(0x10)
> +
> +#define DEFAULT_CLOCK_HZ	(10)
> +#define DEFAULT_TIMEOT_SEC	(8)
> +
> +struct vm_stall_detect_s {
> +	void __iomem *membase;
> +	u32 clock_freq;
> +	u32 expiration_sec;
> +	u32 ping_timeout_ms;
> +	struct hrtimer per_cpu_hrtimer;
> +	struct platform_device *dev;
> +};
> +
> +#define vcpu_stall_detect_reg_write(stall_detect, reg, value)	\
> +	iowrite32((value), (stall_detect)->membase + (reg))
> +#define vcpu_stall_detect_reg_read(stall_detect, reg)		\
> +	io32read((stall_detect)->membase + (reg))
> +
> +static struct vm_stall_detect_s __percpu *vm_stall_detect;
> +
> +static enum hrtimer_restart
> +vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
> +{
> +	struct vm_stall_detect_s *cpu_stall_detect;
> +	u32 ticks;
> +
> +	cpu_stall_detect = container_of(hrtimer, struct vm_stall_detect_s,
> +					per_cpu_hrtimer);
> +	ticks = cpu_stall_detect->clock_freq * cpu_stall_detect->expiration_sec;
> +	vcpu_stall_detect_reg_write(cpu_stall_detect, REG_LOAD_CNT, ticks);
> +	hrtimer_forward_now(hrtimer,
> +			    ms_to_ktime(cpu_stall_detect->ping_timeout_ms));
> +
> +	return HRTIMER_RESTART;
> +}
> +
> +static void vcpu_stall_detect_start(void *arg)
> +{
> +	u32 ticks;
> +	struct vm_stall_detect_s *cpu_stall_detect = arg;
> +	struct hrtimer *hrtimer = &cpu_stall_detect->per_cpu_hrtimer;
> +
> +	vcpu_stall_detect_reg_write(cpu_stall_detect, REG_CLOCK_FREQ_HZ,
> +				    cpu_stall_detect->clock_freq);
> +
> +	/* Compute the number of ticks required for the stall detector counter
> +	 * register based on the internal clock frequency and the timeout
> +	 * value given from the device tree.
> +	 */
> +	ticks = cpu_stall_detect->clock_freq *
> +		cpu_stall_detect->expiration_sec;
> +	vcpu_stall_detect_reg_write(cpu_stall_detect, REG_LOAD_CNT, ticks);
> +
> +	/* Enable the internal clock and start the stall detector */
> +	vcpu_stall_detect_reg_write(cpu_stall_detect, REG_STATUS, 1);
> +
> +	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	hrtimer->function = vcpu_stall_detect_timer_fn;
> +	hrtimer_start(hrtimer, ms_to_ktime(cpu_stall_detect->ping_timeout_ms),
> +		      HRTIMER_MODE_REL_PINNED);
> +}
> +
> +static void vcpu_stall_detect_stop(void *arg)
> +{
> +	struct vm_stall_detect_s *cpu_stall_detect = arg;
> +	struct hrtimer *hrtimer = &cpu_stall_detect->per_cpu_hrtimer;
> +
> +	hrtimer_cancel(hrtimer);
> +
> +	/* Disable the stall detector */
> +	vcpu_stall_detect_reg_write(cpu_stall_detect, REG_STATUS, 0);
> +}
> +
> +static int start_stall_detector_on_cpu(unsigned int cpu)
> +{
> +	vcpu_stall_detect_start(this_cpu_ptr(vm_stall_detect));
> +	return 0;
> +}
> +
> +static int stop_stall_detector_on_cpu(unsigned int cpu)
> +{
> +	vcpu_stall_detect_stop(this_cpu_ptr(vm_stall_detect));
> +	return 0;
> +}
> +
> +static int vcpu_stall_detect_probe(struct platform_device *dev)
> +{
> +	int cpu, ret, err;
> +	void __iomem *membase;
> +	struct resource *r;
> +	u32 stall_detect_clock, stall_detect_timeout_sec = 0;
> +
> +	r = platform_get_resource(dev, IORESOURCE_MEM, 0);
> +	if (r == NULL)
> +		return -ENODEV;
> +
> +	vm_stall_detect = alloc_percpu(typeof(struct vm_stall_detect_s));
> +	if (!vm_stall_detect)
> +		return -ENOMEM;
> +
> +	membase = ioremap(r->start, resource_size(r));
> +	if (!membase) {
> +		ret = -ENOMEM;
> +		goto err_withmem;
> +	}
> +
> +	if (of_property_read_u32(dev->dev.of_node, "clock-frequency",
> +				 &stall_detect_clock))
> +		stall_detect_clock = DEFAULT_CLOCK_HZ;
> +
> +	if (of_property_read_u32(dev->dev.of_node, "timeout-sec",
> +				 &stall_detect_timeout_sec))
> +		stall_detect_timeout_sec = DEFAULT_TIMEOT_SEC;
> +
> +	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) {
> +		struct vm_stall_detect_s *cpu_stall_detect;
> +
> +		cpu_stall_detect = per_cpu_ptr(vm_stall_detect, cpu);
> +		cpu_stall_detect->membase = membase + cpu * REG_LEN;
> +		cpu_stall_detect->clock_freq = stall_detect_clock;
> +		cpu_stall_detect->expiration_sec = stall_detect_timeout_sec;
> +
> +		/* Pet the stall detector at half of its expiration timeout
> +		 * to prevent spurios resets.
> +		 */
> +		cpu_stall_detect->ping_timeout_ms = stall_detect_timeout_sec *
> +			MSEC_PER_SEC / 2;
> +		smp_call_function_single(cpu, vcpu_stall_detect_start,
> +					 cpu_stall_detect, true);
> +	}
> +
> +	err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> +					"virt/vcpu_stall_detector:online",
> +					start_stall_detector_on_cpu,
> +					stop_stall_detector_on_cpu);
> +	if (err < 0) {
> +		dev_err(&dev->dev, "failed to install cpu hotplug");
> +		ret = err;
> +		goto err_withmem;
> +	}
> +
> +	return 0;
> +
> +err_withmem:
> +	free_percpu(vm_stall_detect);
> +	return ret;
> +}
> +
> +static int vcpu_stall_detect_remove(struct platform_device *dev)
> +{
> +	int cpu;
> +
> +	for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) {
> +		struct vm_stall_detect_s *cpu_stall_detect;
> +
> +		cpu_stall_detect = per_cpu_ptr(vm_stall_detect, cpu);
> +		smp_call_function_single(cpu, vcpu_stall_detect_stop,
> +					 cpu_stall_detect, true);
> +	}
> +
> +	free_percpu(vm_stall_detect);
> +	vm_stall_detect = NULL;
> +	return 0;
> +}
> +
> +static const struct of_device_id vcpu_stall_detect_of_match[] = {
> +	{ .compatible = "qemu,vcpu-stall-detector", },
> +	{}
> +};
> +
> +MODULE_DEVICE_TABLE(of, vcpu_stall_detect_of_match);
> +
> +static struct platform_driver vcpu_stall_detect_driver = {
> +	.probe  = vcpu_stall_detect_probe,
> +	.remove = vcpu_stall_detect_remove,
> +	.driver = {
> +		.name           = KBUILD_MODNAME,
> +		.of_match_table = vcpu_stall_detect_of_match,
> +	},
> +};
> +
> +module_platform_driver(vcpu_stall_detect_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Sebastian Ene <sebastianene@...gle.com>");
> +MODULE_DESCRIPTION("VCPU stall detector");

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ