[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a6df80c4-dcd7-bbf4-394f-278e6416701e@redhat.com>
Date: Thu, 27 Jan 2022 14:48:05 +0100
From: Eric Auger <eauger@...hat.com>
To: Gavin Shan <gshan@...hat.com>, kvmarm@...ts.cs.columbia.edu
Cc: maz@...nel.org, linux-kernel@...r.kernel.org,
Jonathan.Cameron@...wei.com, pbonzini@...hat.com, will@...nel.org
Subject: Re: [PATCH v4 16/21] KVM: arm64: Support SDEI ioctl commands on VM
Hi gavin,
On 1/12/22 8:03 AM, Gavin Shan wrote:
> Hi Eric,
>
> On 11/10/21 9:48 PM, Eric Auger wrote:
>> On 8/15/21 2:13 AM, Gavin Shan wrote:
>>> This supports ioctl commands on VM to manage the various objects.
>>> It's primarily used by VMM to accomplish live migration. The ioctl
>>> commands introduced by this are highlighted as blow:
>> below
>>>
>>> * KVM_SDEI_CMD_GET_VERSION
>>> Retrieve the version of current implementation
>> which implementation, SDEI?
>>> * KVM_SDEI_CMD_SET_EVENT
>>> Add event to be exported from KVM so that guest can register
>>> against it afterwards
>>> * KVM_SDEI_CMD_GET_KEVENT_COUNT
>>> Retrieve number of registered SDEI events
>>> * KVM_SDEI_CMD_GET_KEVENT
>>> Retrieve the state of the registered SDEI event
>>> * KVM_SDEI_CMD_SET_KEVENT
>>> Populate the registered SDEI event
>> I think we really miss the full picture of what you want to achieve with
>> those IOCTLs or at least I fail to get it. Please document the UAPI
>> separately including the structs and IOCTLs.
>
> The commit log will be improved accordingly in next revision. Yep, I will
> add document for UAPI and IOCTLs :)
>
>>>
>>> Signed-off-by: Gavin Shan <gshan@...hat.com>
>>> ---
>>> arch/arm64/include/asm/kvm_sdei.h | 1 +
>>> arch/arm64/include/uapi/asm/kvm_sdei.h | 17 +++
>>> arch/arm64/kvm/arm.c | 3 +
>>> arch/arm64/kvm/sdei.c | 171 +++++++++++++++++++++++++
>>> include/uapi/linux/kvm.h | 3 +
>>> 5 files changed, 195 insertions(+)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_sdei.h
>>> b/arch/arm64/include/asm/kvm_sdei.h
>>> index 19f2d9b91f85..8f5ea947ed0e 100644
>>> --- a/arch/arm64/include/asm/kvm_sdei.h
>>> +++ b/arch/arm64/include/asm/kvm_sdei.h
>>> @@ -125,6 +125,7 @@ int kvm_sdei_hypercall(struct kvm_vcpu *vcpu);
>>> int kvm_sdei_register_notifier(struct kvm *kvm, unsigned long num,
>>> kvm_sdei_notifier notifier);
>>> void kvm_sdei_deliver(struct kvm_vcpu *vcpu);
>>> +long kvm_sdei_vm_ioctl(struct kvm *kvm, unsigned long arg);
>>> void kvm_sdei_destroy_vcpu(struct kvm_vcpu *vcpu);
>>> void kvm_sdei_destroy_vm(struct kvm *kvm);
>>> diff --git a/arch/arm64/include/uapi/asm/kvm_sdei.h
>>> b/arch/arm64/include/uapi/asm/kvm_sdei.h
>>> index 4ef661d106fe..35ff05be3c28 100644
>>> --- a/arch/arm64/include/uapi/asm/kvm_sdei.h
>>> +++ b/arch/arm64/include/uapi/asm/kvm_sdei.h
>>> @@ -57,5 +57,22 @@ struct kvm_sdei_vcpu_state {
>>> struct kvm_sdei_vcpu_regs normal_regs;
>>> };
>>> +#define KVM_SDEI_CMD_GET_VERSION 0
>>> +#define KVM_SDEI_CMD_SET_EVENT 1
>>> +#define KVM_SDEI_CMD_GET_KEVENT_COUNT 2
>>> +#define KVM_SDEI_CMD_GET_KEVENT 3
>>> +#define KVM_SDEI_CMD_SET_KEVENT 4
>>> +
>>> +struct kvm_sdei_cmd {
>>> + __u32 cmd;
>>> + union {
>>> + __u32 version;
>>> + __u32 count;
>>> + __u64 num;
>>> + struct kvm_sdei_event_state kse_state;
>>> + struct kvm_sdei_kvm_event_state kske_state;
>>> + };
>>> +};
>>> +
>>> #endif /* !__ASSEMBLY__ */
>>> #endif /* _UAPI__ASM_KVM_SDEI_H */
>>> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
>>> index 0c3db1ef1ba9..8d61585124b2 100644
>>> --- a/arch/arm64/kvm/arm.c
>>> +++ b/arch/arm64/kvm/arm.c
>>> @@ -1389,6 +1389,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>> return -EFAULT;
>>> return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags);
>>> }
>>> + case KVM_ARM_SDEI_COMMAND: {
>>> + return kvm_sdei_vm_ioctl(kvm, arg);
>>> + }
>>> default:
>>> return -EINVAL;
>>> }
>>> diff --git a/arch/arm64/kvm/sdei.c b/arch/arm64/kvm/sdei.c
>>> index 5f7a37dcaa77..bdd76c3e5153 100644
>>> --- a/arch/arm64/kvm/sdei.c
>>> +++ b/arch/arm64/kvm/sdei.c
>>> @@ -931,6 +931,177 @@ void kvm_sdei_create_vcpu(struct kvm_vcpu *vcpu)
>>> vcpu->arch.sdei = vsdei;
>>> }
>>> +static long kvm_sdei_set_event(struct kvm *kvm,
>>> + struct kvm_sdei_event_state *kse_state)
>>> +{
>>> + struct kvm_sdei_kvm *ksdei = kvm->arch.sdei;
>>> + struct kvm_sdei_event *kse = NULL;
>>> +
>>> + if (!kvm_sdei_is_valid_event_num(kse_state->num))
>>> + return -EINVAL;
>>> +
>>> + if (!(kse_state->type == SDEI_EVENT_TYPE_SHARED ||
>>> + kse_state->type == SDEI_EVENT_TYPE_PRIVATE))
>>> + return -EINVAL;
>>> +
>>> + if (!(kse_state->priority == SDEI_EVENT_PRIORITY_NORMAL ||
>>> + kse_state->priority == SDEI_EVENT_PRIORITY_CRITICAL))
>>> + return -EINVAL;
>>> +
>>> + kse = kvm_sdei_find_event(kvm, kse_state->num);
>>> + if (kse)
>>> + return -EEXIST;
>>> +
>>> + kse = kzalloc(sizeof(*kse), GFP_KERNEL);
>>> + if (!kse)
>>> + return -ENOMEM;
>> userspace can exhaust the mem since there is no limit. There must be a
>> max.
>>
>
> Ok. I think it's minor or corner case. For now, the number of defined SDEI
> events are only one. I leave it for something to be improved in future.
Hum ok, actually this depends on kvm_sdei_is_valid_event_num's
implementation.
>
>>> +
>>> + kse->state = *kse_state;
>>> + kse->kvm = kvm;
>>> + list_add_tail(&kse->link, &ksdei->events);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static long kvm_sdei_get_kevent_count(struct kvm *kvm, int *count)
>>> +{
>>> + struct kvm_sdei_kvm *ksdei = kvm->arch.sdei;
>>> + struct kvm_sdei_kvm_event *kske = NULL;
>>> + int total = 0;
>>> +
>>> + list_for_each_entry(kske, &ksdei->kvm_events, link) {
>>> + total++;
>>> + }
>>> +
>>> + *count = total;
>>> + return 0;
>>> +}
>>> +
>>> +static long kvm_sdei_get_kevent(struct kvm *kvm,
>>> + struct kvm_sdei_kvm_event_state *kske_state)
shouldn't the function return a int instead?
>>> +{
>>> + struct kvm_sdei_kvm *ksdei = kvm->arch.sdei;
>>> + struct kvm_sdei_kvm_event *kske = NULL;
>>> +
>>> + /*
>>> + * The first entry is fetched if the event number is invalid.
>>> + * Otherwise, the next entry is fetched.
>> why don't we return an error? What is the point returning the next entry?
>
> The SDEI events attached to the KVM are migrated one by one. Thoese
> attached
> SDEI events are linked through a linked list:
>
> (1) on !kvm_sdei_is_valid_event_num(kske_state->num), the first SDEI
> event
> in the linked list is retrieved from source VM and will be
> restored on
> the destination VM.
>
> (2) Otherwise, the next SDEI event in the linked list will be retrieved
> from source VM and restored on the destination VM.
and why not returning NULL if the num is incorrect? Why do return the
1st elem?
Eric
>
> Another option is to introduce additional struct like below. In this
> way, all
> the attached SDEI events are retrieved and restored once. In this way, the
> memory block used for storing @kvm_sdei_kvm_event_state should be allocated
> and released by QEMU. Please let me know your preference:
>
> struct xxx {
> __u64 count;
> struct kvm_sdei_kvm_event_state events;
> }
>
>>> + */
>>> + if (!kvm_sdei_is_valid_event_num(kske_state->num)) {
>>> + kske = list_first_entry_or_null(&ksdei->kvm_events,
>>> + struct kvm_sdei_kvm_event, link);
>>> + } else {
>>> + kske = kvm_sdei_find_kvm_event(kvm, kske_state->num);
>>> + if (kske && !list_is_last(&kske->link, &ksdei->kvm_events))
>>> + kske = list_next_entry(kske, link);
>> Sorry I don't get why we return the next one?
>
> Please refer to the explanation above.
>
>>> + else
>>> + kske = NULL;
>>> + }
>>> +
>>> + if (!kske)
>>> + return -ENOENT;
>>> +
>>> + *kske_state = kske->state;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static long kvm_sdei_set_kevent(struct kvm *kvm,
>>> + struct kvm_sdei_kvm_event_state *kske_state)
>>> +{
>>> + struct kvm_sdei_kvm *ksdei = kvm->arch.sdei;
>>> + struct kvm_sdei_event *kse = NULL;
>>> + struct kvm_sdei_kvm_event *kske = NULL;
>>> +
>>> + /* Sanity check */
>>> + if (!kvm_sdei_is_valid_event_num(kske_state->num))
>>> + return -EINVAL;
>>> +
>>> + if (!(kske_state->route_mode == SDEI_EVENT_REGISTER_RM_ANY ||
>>> + kske_state->route_mode == SDEI_EVENT_REGISTER_RM_PE))
>>> + return -EINVAL;
>>> +
>>> + /* Check if the event number is valid */
>>> + kse = kvm_sdei_find_event(kvm, kske_state->num);
>>> + if (!kse)
>>> + return -ENOENT;
>>> +
>>> + /* Check if the event has been populated */
>>> + kske = kvm_sdei_find_kvm_event(kvm, kske_state->num);
>>> + if (kske)
>>> + return -EEXIST;
>>> +
>>> + kske = kzalloc(sizeof(*kske), GFP_KERNEL);
>> userspace can exhaust the mem since there is no limit
>
> Ok.
>
>>> + if (!kske)
>>> + return -ENOMEM;
>>> +
>>> + kske->state = *kske_state;
>>> + kske->kse = kse;
>>> + kske->kvm = kvm;
>>> + list_add_tail(&kske->link, &ksdei->kvm_events);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +long kvm_sdei_vm_ioctl(struct kvm *kvm, unsigned long arg)
>>> +{
>>> + struct kvm_sdei_kvm *ksdei = kvm->arch.sdei;
>>> + struct kvm_sdei_cmd *cmd = NULL;
>>> + void __user *argp = (void __user *)arg;
>>> + bool copy = false;
>>> + long ret = 0;
>>> +
>>> + /* Sanity check */
>>> + if (!ksdei) {
>>> + ret = -EPERM;
>>> + goto out;
>>> + }
>>> +
>>> + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
>>> + if (!cmd) {
>>> + ret = -ENOMEM;
>>> + goto out;
>>> + }
>>> +
>>> + if (copy_from_user(cmd, argp, sizeof(*cmd))) {
>>> + ret = -EFAULT;
>>> + goto out;
>>> + }
>>> +
>>> + spin_lock(&ksdei->lock);
>>> +
>>> + switch (cmd->cmd) {
>>> + case KVM_SDEI_CMD_GET_VERSION:
>>> + copy = true;
>>> + cmd->version = (1 << 16); /* v1.0.0 */
>>> + break;
>>> + case KVM_SDEI_CMD_SET_EVENT:
>>> + ret = kvm_sdei_set_event(kvm, &cmd->kse_state);
>>> + break;
>>> + case KVM_SDEI_CMD_GET_KEVENT_COUNT:
>>> + copy = true;
>>> + ret = kvm_sdei_get_kevent_count(kvm, &cmd->count);
>>> + break;
>>> + case KVM_SDEI_CMD_GET_KEVENT:
>>> + copy = true;
>>> + ret = kvm_sdei_get_kevent(kvm, &cmd->kske_state);
>>> + break;
>>> + case KVM_SDEI_CMD_SET_KEVENT:
>>> + ret = kvm_sdei_set_kevent(kvm, &cmd->kske_state);
>>> + break;
>>> + default:
>>> + ret = -EINVAL;
>>> + }
>>> +
>>> + spin_unlock(&ksdei->lock);
>>> +out:
>>> + if (!ret && copy && copy_to_user(argp, cmd, sizeof(*cmd)))
>>> + ret = -EFAULT;
>>> +
>>> + kfree(cmd);
>>> + return ret;
>>> +}
>>> +
>>> void kvm_sdei_destroy_vcpu(struct kvm_vcpu *vcpu)
>>> {
>>> struct kvm_sdei_vcpu *vsdei = vcpu->arch.sdei;
>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index d9e4aabcb31a..8cf41fd4bf86 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -1679,6 +1679,9 @@ struct kvm_xen_vcpu_attr {
>>> #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
>>> #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
>>> +/* Available with KVM_CAP_ARM_SDEI */
>>> +#define KVM_ARM_SDEI_COMMAND _IOWR(KVMIO, 0xce, struct kvm_sdei_cmd)
>>> +
>>> /* Secure Encrypted Virtualization command */
>>> enum sev_cmd_id {
>>> /* Guest initialization commands */
>>>
>
> Thanks,
> Gavin
>
Powered by blists - more mailing lists