lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 23 Aug 2019 19:52:32 +0200
From:   Auger Eric <eric.auger@...hat.com>
To:     eric.auger.pro@...il.com, maz@...nel.org,
        linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        kvmarm@...ts.cs.columbia.edu
Cc:     yuzenghui@...wei.com, zhang.zhanghailiang@...wei.com,
        wanghaibin.wang@...wei.com, james.morse@....com,
        qemu-arm@...gnu.org, julien.thierry.kdev@...il.com,
        suzuki.poulose@....com, peter.maydell@...aro.org,
        andre.przywara@....com
Subject: Re: [PATCH] KVM: arm/arm64: vgic: Use a single IO device per
 redistributor

Hi Zenghui, Marc,

On 8/23/19 7:33 PM, Eric Auger wrote:
> At the moment we use 2 IO devices per GICv3 redistributor: one
> one for the RD_base frame and one for the SGI_base frame.
> 
> Instead we can use a single IO device per redistributor (the 2
> frames are contiguous). This saves slots on the KVM_MMIO_BUS
> which is currently limited to NR_IOBUS_DEVS (1000).
> 
> This change allows to instantiate up to 512 redistributors and may
> speed the guest boot with a large number of VCPUs.
> 
> Signed-off-by: Eric Auger <eric.auger@...hat.com>

I tested this patch with below kernel and QEMU branches:
kernel: https://github.com/eauger/linux/tree/256fix-v1
(Marc's patch + this patch)
https://github.com/eauger/qemu/tree/v4.1.0-256fix-rfc1-rc0
(header update + kvm_arm_gic_set_irq modification)

On a machine with 224 pcpus, I was able to boot a 512 vcpu guest.

As expected, qemu outputs warnings:

qemu-system-aarch64: warning: Number of SMP cpus requested (512) exceeds
the recommended cpus supported by KVM (224)
qemu-system-aarch64: warning: Number of hotpluggable cpus requested
(512) exceeds the recommended cpus supported by KVM (224)

on the guest: getconf _NPROCESSORS_ONLN returns 512

Then I have no clue about what can be expected of such overcommit config
and I have not further exercised the guest at the moment. But at least
it seems to boot properly. I also tested without overcommit and it seems
to behave as before (boot, migration).

I still need to look at the migration of > 256vcpu guest at qemu level.

Thanks

Eric


> ---
>  include/kvm/arm_vgic.h           |  1 -
>  virt/kvm/arm/vgic/vgic-init.c    |  1 -
>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 81 ++++++++++----------------------
>  3 files changed, 24 insertions(+), 59 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 7a30524a80ee..004f6e9d3b05 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -311,7 +311,6 @@ struct vgic_cpu {
>  	 * parts of the redistributor.
>  	 */
>  	struct vgic_io_device	rd_iodev;
> -	struct vgic_io_device	sgi_iodev;
>  	struct vgic_redist_region *rdreg;
>  
>  	/* Contains the attributes and gpa of the LPI pending tables. */
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index bdbc297d06fb..eaff7031a089 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -192,7 +192,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>  	int i;
>  
>  	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
> -	vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
>  
>  	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
>  	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index c45e2d7e942f..400067085cab 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -515,7 +515,8 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
>  		VGIC_ACCESS_32bit),
>  };
>  
> -static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
> +static const struct vgic_register_region vgic_v3_rd_registers[] = {
> +	/* RD_base registers */
>  	REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
>  		vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
>  		VGIC_ACCESS_32bit),
> @@ -540,44 +541,42 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
>  	REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
>  		vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
>  		VGIC_ACCESS_32bit),
> -};
> -
> -static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
> -	REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
> +	/* SGI_base registers */
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
>  		vgic_mmio_read_group, vgic_mmio_write_group, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
>  		vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
>  		vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
>  		vgic_mmio_read_pending, vgic_mmio_write_spending,
>  		vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
>  		vgic_mmio_read_pending, vgic_mmio_write_cpending,
>  		vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
>  		vgic_mmio_read_active, vgic_mmio_write_sactive,
>  		NULL, vgic_mmio_uaccess_write_sactive,
>  		4, VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
>  		vgic_mmio_read_active, vgic_mmio_write_cactive,
>  		NULL, vgic_mmio_uaccess_write_cactive,
>  		4, VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
>  		vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
>  		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0,
>  		vgic_mmio_read_config, vgic_mmio_write_config, 8,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0,
>  		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR,
>  		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
>  		VGIC_ACCESS_32bit),
>  };
> @@ -607,9 +606,8 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
>  	struct vgic_dist *vgic = &kvm->arch.vgic;
>  	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>  	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
> -	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
>  	struct vgic_redist_region *rdreg;
> -	gpa_t rd_base, sgi_base;
> +	gpa_t rd_base;
>  	int ret;
>  
>  	if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
> @@ -631,52 +629,31 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
>  	vgic_cpu->rdreg = rdreg;
>  
>  	rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
> -	sgi_base = rd_base + SZ_64K;
>  
>  	kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
>  	rd_dev->base_addr = rd_base;
>  	rd_dev->iodev_type = IODEV_REDIST;
> -	rd_dev->regions = vgic_v3_rdbase_registers;
> -	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
> +	rd_dev->regions = vgic_v3_rd_registers;
> +	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
>  	rd_dev->redist_vcpu = vcpu;
>  
>  	mutex_lock(&kvm->slots_lock);
>  	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
> -				      SZ_64K, &rd_dev->dev);
> +				      2 * SZ_64K, &rd_dev->dev);
>  	mutex_unlock(&kvm->slots_lock);
>  
>  	if (ret)
>  		return ret;
>  
> -	kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
> -	sgi_dev->base_addr = sgi_base;
> -	sgi_dev->iodev_type = IODEV_REDIST;
> -	sgi_dev->regions = vgic_v3_sgibase_registers;
> -	sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
> -	sgi_dev->redist_vcpu = vcpu;
> -
> -	mutex_lock(&kvm->slots_lock);
> -	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
> -				      SZ_64K, &sgi_dev->dev);
> -	if (ret) {
> -		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
> -					  &rd_dev->dev);
> -		goto out;
> -	}
> -
>  	rdreg->free_index++;
> -out:
> -	mutex_unlock(&kvm->slots_lock);
> -	return ret;
> +	return 0;
>  }
>  
>  static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
> -	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
>  
>  	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
> -	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
>  }
>  
>  static int vgic_register_all_redist_iodevs(struct kvm *kvm)
> @@ -826,8 +803,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
>  		iodev.base_addr = 0;
>  		break;
>  	case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
> -		iodev.regions = vgic_v3_rdbase_registers;
> -		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
> +		iodev.regions = vgic_v3_rd_registers;
> +		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
>  		iodev.base_addr = 0;
>  		break;
>  	}
> @@ -985,21 +962,11 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
>  			   int offset, u32 *val)
>  {
>  	struct vgic_io_device rd_dev = {
> -		.regions = vgic_v3_rdbase_registers,
> -		.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
> +		.regions = vgic_v3_rd_registers,
> +		.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers),
>  	};
>  
> -	struct vgic_io_device sgi_dev = {
> -		.regions = vgic_v3_sgibase_registers,
> -		.nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
> -	};
> -
> -	/* SGI_base is the next 64K frame after RD_base */
> -	if (offset >= SZ_64K)
> -		return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
> -				    val);
> -	else
> -		return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
> +	return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
>  }
>  
>  int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ