linux-kernel - Re: [V4 6/8] KVM: selftests: add library for creating/interacting with SEV guests

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMkAt6oZQc4jqF7FOXOKkpbP3c4NXxPumVVjX9gXwPCh-zbtYg@mail.gmail.com>
Date:   Mon, 17 Oct 2022 10:32:41 -0600
From:   Peter Gonda <pgonda@...gle.com>
To:     Sean Christopherson <seanjc@...gle.com>
Cc:     kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
        marcorr@...gle.com, michael.roth@....com, thomas.lendacky@....com,
        joro@...tes.org, mizhang@...gle.com, pbonzini@...hat.com,
        andrew.jones@...ux.dev
Subject: Re: [V4 6/8] KVM: selftests: add library for creating/interacting
 with SEV guests

On Thu, Oct 6, 2022 at 12:25 PM Sean Christopherson <seanjc@...gle.com> wrote:
>
> On Mon, Aug 29, 2022, Peter Gonda wrote:
> > Add interfaces to allow tests to create/manage SEV guests. The
> > additional state associated with these guests is encapsulated in a new
> > struct sev_vm, which is a light wrapper around struct kvm_vm. These
> > VMs will use vm_set_memory_encryption() and vm_get_encrypted_phy_pages()
> > under the covers to configure and sync up with the core kvm_util
> > library on what should/shouldn't be treated as encrypted memory.
> >
> > Originally-by: Michael Roth <michael.roth@....com>
> > Signed-off-by: Peter Gonda <pgonda@...gle.com>
> > ---
> >  tools/testing/selftests/kvm/Makefile          |   1 +
> >  .../selftests/kvm/include/kvm_util_base.h     |   3 +
> >  .../selftests/kvm/include/x86_64/sev.h        |  47 ++++
> >  tools/testing/selftests/kvm/lib/x86_64/sev.c  | 232 ++++++++++++++++++
> >  4 files changed, 283 insertions(+)
> >  create mode 100644 tools/testing/selftests/kvm/include/x86_64/sev.h
> >  create mode 100644 tools/testing/selftests/kvm/lib/x86_64/sev.c
> >
> > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> > index 23649c5d42fc..0a70e50f0498 100644
> > --- a/tools/testing/selftests/kvm/Makefile
> > +++ b/tools/testing/selftests/kvm/Makefile
> > @@ -56,6 +56,7 @@ LIBKVM_x86_64 += lib/x86_64/processor.c
> >  LIBKVM_x86_64 += lib/x86_64/svm.c
> >  LIBKVM_x86_64 += lib/x86_64/ucall.c
> >  LIBKVM_x86_64 += lib/x86_64/vmx.c
> > +LIBKVM_x86_64 += lib/x86_64/sev.c
> >
> >  LIBKVM_aarch64 += lib/aarch64/gic.c
> >  LIBKVM_aarch64 += lib/aarch64/gic_v3.c
> > diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
> > index 489e8c833e5f..0927e262623d 100644
> > --- a/tools/testing/selftests/kvm/include/kvm_util_base.h
> > +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
> > @@ -69,6 +69,7 @@ struct userspace_mem_regions {
> >  /* Memory encryption policy/configuration. */
> >  struct vm_memcrypt {
> >       bool enabled;
> > +     bool encrypted;
> >       int8_t enc_by_default;
> >       bool has_enc_bit;
> >       int8_t enc_bit;
> > @@ -831,6 +832,8 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
> >
> >  static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
> >  {
> > +     TEST_ASSERT(!vm->memcrypt.encrypted,
>
> vm->protected
>
> > +                 "Encrypted guests have their page tables encrypted so gva2gpa conversions are not possible.");
>
> Unnecessarily verbose, e.g.
>
>                     "Protected VMs have private, inaccessible page tables");
>
> > +#define CPUID_MEM_ENC_LEAF 0x8000001f
> > +#define CPUID_EBX_CBIT_MASK 0x3f
> > +
> > +/* Common SEV helpers/accessors. */
>
> Please drop this comment and the "Local helpers" and "SEV VM implementation" comments
> below.  There's 0% chance these comments will stay fresh as code is added and moved
> around.  They also add no value IMO, e.g. "static" makes it quite obvious it's a
> local function, and sev_* vs. sev_es_*. vs. sev_snp_* namespacing takes care of the
> rest.
>
> > +void sev_ioctl(int sev_fd, int cmd, void *data)
> > +{
> > +     int ret;
> > +     struct sev_issue_cmd arg;
> > +
> > +     arg.cmd = cmd;
> > +     arg.data = (unsigned long)data;
> > +     ret = ioctl(sev_fd, SEV_ISSUE_CMD, &arg);
> > +     TEST_ASSERT(ret == 0,
> > +                 "SEV ioctl %d failed, error: %d, fw_error: %d",
> > +                 cmd, ret, arg.error);
> > +}
> > +
> > +void kvm_sev_ioctl(struct sev_vm *sev, int cmd, void *data)
> > +{
> > +     struct kvm_sev_cmd arg = {0};
> > +     int ret;
> > +
> > +     arg.id = cmd;
> > +     arg.sev_fd = sev->fd;
> > +     arg.data = (__u64)data;
> > +
> > +     ret = ioctl(sev->vm->fd, KVM_MEMORY_ENCRYPT_OP, &arg);
> > +     TEST_ASSERT(ret == 0,
> > +                 "SEV KVM ioctl %d failed, rc: %i errno: %i (%s), fw_error: %d",
> > +                 cmd, ret, errno, strerror(errno), arg.error);
> > +}
> > +
> > +/* Local helpers. */
> > +
> > +static void sev_register_user_region(struct sev_vm *sev, void *hva, uint64_t size)
> > +{
> > +     struct kvm_enc_region range = {0};
> > +     int ret;
> > +
> > +     pr_debug("%s: hva: %p, size: %lu\n", __func__, hva, size);
> > +
> > +     range.addr = (__u64)hva;
> > +     range.size = size;
> > +
> > +     ret = ioctl(sev->vm->fd, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
> > +     TEST_ASSERT(ret == 0, "failed to register user range, errno: %i\n", errno);
> > +}
> > +
> > +static void sev_encrypt_phy_range(struct sev_vm *sev, vm_paddr_t gpa, uint64_t size)
> > +{
> > +     struct kvm_sev_launch_update_data ksev_update_data = {0};
> > +
> > +     pr_debug("%s: addr: 0x%lx, size: %lu\n", __func__, gpa, size);
> > +
> > +     ksev_update_data.uaddr = (__u64)addr_gpa2hva(sev->vm, gpa);
> > +     ksev_update_data.len = size;
> > +
> > +     kvm_sev_ioctl(sev, KVM_SEV_LAUNCH_UPDATE_DATA, &ksev_update_data);
> > +}
> > +
> > +static void sev_encrypt(struct sev_vm *sev)
> > +{
> > +     const struct sparsebit *enc_phy_pages;
> > +     struct kvm_vm *vm = sev->vm;
> > +     sparsebit_idx_t pg = 0;
> > +     vm_paddr_t gpa_start;
> > +     uint64_t memory_size;
> > +     int ctr;
> > +     struct userspace_mem_region *region;
> > +
> > +     hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
> > +             enc_phy_pages = vm_get_encrypted_phy_pages(
>
> Please don't wrap after the opening paranthesis unless it's really, really necessary.
> More for future reference since I think vm_get_encrypted_phy_pages() should be open
> coded here.  E.g. in this case, the "enc_phy_" prefix doesn't add much value, and
> dropping that makes the code easier to read overall.
>
>                 pages = vm_get_encrypted_phy_pages(sev->vm, region->region.slot,
>                                                    &gpa_start, &memory_size);
>
> > +                     sev->vm, region->region.slot, &gpa_start, &memory_size);
> > +             TEST_ASSERT(enc_phy_pages,
> > +                         "Unable to retrieve encrypted pages bitmap");
> > +             while (pg < (memory_size / vm->page_size)) {
> > +                     sparsebit_idx_t pg_cnt;
>
> s/pg_cnt/nr_pages
>
> > +
> > +                     if (sparsebit_is_clear(enc_phy_pages, pg)) {
> > +                             pg = sparsebit_next_set(enc_phy_pages, pg);
> > +                             if (!pg)
> > +                                     break;
> > +                     }
> > +
> > +                     pg_cnt = sparsebit_next_clear(enc_phy_pages, pg) - pg;
> > +                     if (pg_cnt <= 0)
> > +                             pg_cnt = 1;
> > +
> > +                     sev_encrypt_phy_range(sev,
> > +                                           gpa_start + pg * vm->page_size,
> > +                                           pg_cnt * vm->page_size);
> > +                     pg += pg_cnt;
> > +             }
> > +     }
> > +
> > +     sev->vm->memcrypt.encrypted = true;
> > +}
> > +
> > +/* SEV VM implementation. */
> > +
> > +static struct sev_vm *sev_vm_alloc(struct kvm_vm *vm)
> > +{
> > +     struct sev_user_data_status sev_status;
> > +     uint32_t eax, ebx, ecx, edx;
> > +     struct sev_vm *sev;
> > +     int sev_fd;
> > +
> > +     sev_fd = open(SEV_DEV_PATH, O_RDWR);
> > +     if (sev_fd < 0) {
> > +             pr_info("Failed to open SEV device, path: %s, error: %d, skipping test.\n",
> > +                     SEV_DEV_PATH, sev_fd);
> > +             return NULL;
>
> Printing "skipping test" is wrong as there's no guarantee the caller is going to
> skip the test.  E.g. the sole user in this series asserts, i.e. fails the test.
>
> I also think that waiting until VM allocation to perform these sanity checks is
> flawed.  Rather do these checks every time, add helpers to query SEV and SEV-ES
> support, and then use TEST_REQUIRE() to actually skip tests that require support,
> e.g.
>
>         TEST_REQUIRE(kvm_is_sev_supported());
>
> or
>
>         TEST_REQUIRE(kvm_is_sev_es_supported());
>
> Then this helper can simply assert that opening SEV_DEV_PATH succeeds.
>
> > +     }
> > +
> > +     sev_ioctl(sev_fd, SEV_PLATFORM_STATUS, &sev_status);
> > +
> > +     if (!(sev_status.api_major > SEV_FW_REQ_VER_MAJOR ||
> > +           (sev_status.api_major == SEV_FW_REQ_VER_MAJOR &&
> > +            sev_status.api_minor >= SEV_FW_REQ_VER_MINOR))) {
> > +             pr_info("SEV FW version too old. Have API %d.%d (build: %d), need %d.%d, skipping test.\n",
> > +                     sev_status.api_major, sev_status.api_minor, sev_status.build,
> > +                     SEV_FW_REQ_VER_MAJOR, SEV_FW_REQ_VER_MINOR);
> > +             return NULL;
> > +     }
> > +
> > +     sev = calloc(1, sizeof(*sev));
>
> TEST_ASSERT(sev, ...)
>
> > +     sev->fd = sev_fd;
> > +     sev->vm = vm;
> > +
> > +     /* Get encryption bit via CPUID. */
> > +     cpuid(CPUID_MEM_ENC_LEAF, &eax, &ebx, &ecx, &edx);
> > +     sev->enc_bit = ebx & CPUID_EBX_CBIT_MASK;
>
> Oh hey, another series of mine[*] that you can leverage :-)
>
> [*] https://lore.kernel.org/all/20221006005125.680782-1-seanjc@google.com
>
> > +
> > +     return sev;
> > +}
> > +
> > +void sev_vm_free(struct sev_vm *sev)
> > +{
> > +     kvm_vm_free(sev->vm);
> > +     close(sev->fd);
> > +     free(sev);
> > +}
> > +
> > +struct sev_vm *sev_vm_create(uint32_t policy, uint64_t npages)
>
> The migration test already defines sev_vm_create().  That conflict needs to be
> resolved.
>
> > +{
> > +     struct sev_vm *sev;
> > +     struct kvm_vm *vm;
> > +
> > +     /* Need to handle memslots after init, and after setting memcrypt. */
> > +     vm = vm_create_barebones();
>
> Do not use vm_create_barebones().  That API is only to be used for tests that do
> not intend to run vCPUs.
>
>
>
> > +     sev = sev_vm_alloc(vm);
> > +     if (!sev)
> > +             return NULL;
> > +     sev->sev_policy = policy;
> > +
> > +     kvm_sev_ioctl(sev, KVM_SEV_INIT, NULL);
> > +
> > +     vm->vpages_mapped = sparsebit_alloc();
>
> This is unnecessary and leaks memory, vm->vpages_mapped is allocated by
> ____vm_create().
>
> > +     vm_set_memory_encryption(vm, true, true, sev->enc_bit);
> > +     pr_info("SEV cbit: %d\n", sev->enc_bit);
> > +     vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, npages, 0);
> > +     sev_register_user_region(sev, addr_gpa2hva(vm, 0),
> > +                              npages * vm->page_size);
>
> Burying sev_register_user_region() in here is not going to be maintainble.  I
> think the best away to handle this is to add an arch hook in vm_userspace_mem_region_add()
> and automatically register regions when they're created.
>
> And with that, I believe sev_vm_create() can go away entirely and the SEV encryption
> stuff can be handled via a new vm_guest_mode.  ____vm_create() already has a gross
> __x86_64__ hook that we can tweak, e.g.
>
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 54b8d8825f5d..2d6cbca2c01a 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -238,9 +238,10 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
>         case VM_MODE_P36V47_16K:
>                 vm->pgtable_levels = 3;
>                 break;
> +       case VM_MODE_PXXV48_4K_SEV:
>         case VM_MODE_PXXV48_4K:
>  #ifdef __x86_64__
> -               kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
> +               kvm_init_vm_address_properties(vm);
>                 /*
>                  * Ignore KVM support for 5-level paging (vm->va_bits == 57),
>                  * it doesn't take effect unless a CR4.LA57 is set, which it
>
> Then kvm_init_vm_address_properties() can pivot on vm->mode to deal with SEV
> specific stuff.
>
> > +
> > +     pr_info("SEV guest created, policy: 0x%x, size: %lu KB\n",
> > +             sev->sev_policy, npages * vm->page_size / 1024);
> > +
> > +     return sev;
> > +}
> > +
> > +void sev_vm_launch(struct sev_vm *sev)
> > +{
> > +     struct kvm_sev_launch_start ksev_launch_start = {0};
> > +     struct kvm_sev_guest_status ksev_status;
> > +
> > +     ksev_launch_start.policy = sev->sev_policy;
> > +     kvm_sev_ioctl(sev, KVM_SEV_LAUNCH_START, &ksev_launch_start);
> > +     kvm_sev_ioctl(sev, KVM_SEV_GUEST_STATUS, &ksev_status);
> > +     TEST_ASSERT(ksev_status.policy == sev->sev_policy, "Incorrect guest policy.");
> > +     TEST_ASSERT(ksev_status.state == SEV_GSTATE_LUPDATE,
> > +                 "Unexpected guest state: %d", ksev_status.state);
> > +
> > +     ucall_init(sev->vm, 0);
> > +
> > +     sev_encrypt(sev);
> > +}
> > +
> > +void sev_vm_launch_measure(struct sev_vm *sev, uint8_t *measurement)
> > +{
> > +     struct kvm_sev_launch_measure ksev_launch_measure;
> > +     struct kvm_sev_guest_status ksev_guest_status;
> > +
> > +     ksev_launch_measure.len = 256;
> > +     ksev_launch_measure.uaddr = (__u64)measurement;
> > +     kvm_sev_ioctl(sev, KVM_SEV_LAUNCH_MEASURE, &ksev_launch_measure);
> > +
> > +     /* Measurement causes a state transition, check that. */
> > +     kvm_sev_ioctl(sev, KVM_SEV_GUEST_STATUS, &ksev_guest_status);
> > +     TEST_ASSERT(ksev_guest_status.state == SEV_GSTATE_LSECRET,
> > +                 "Unexpected guest state: %d", ksev_guest_status.state);
> > +}
> > +
> > +void sev_vm_launch_finish(struct sev_vm *sev)
> > +{
> > +     struct kvm_sev_guest_status ksev_status;
> > +
> > +     kvm_sev_ioctl(sev, KVM_SEV_GUEST_STATUS, &ksev_status);
> > +     TEST_ASSERT(ksev_status.state == SEV_GSTATE_LUPDATE ||
> > +                 ksev_status.state == SEV_GSTATE_LSECRET,
> > +                 "Unexpected guest state: %d", ksev_status.state);
> > +
> > +     kvm_sev_ioctl(sev, KVM_SEV_LAUNCH_FINISH, NULL);
> > +
> > +     kvm_sev_ioctl(sev, KVM_SEV_GUEST_STATUS, &ksev_status);
> > +     TEST_ASSERT(ksev_status.state == SEV_GSTATE_RUNNING,
> > +                 "Unexpected guest state: %d", ksev_status.state);
> > +}
>
> Rather than force each test to invoke these via something like setup_test_common(),
> add the same kvm_arch_vm_post_create() hook that Vishal is likely going to add,
> and then automatically do all of the launch+measure+finish stuff for non-barebones
> VMs.  That will let SEV/SEV-ES tests use __vm_create_with_vcpus() and
> __vm_create().
>
> And it'd be a little gross, but I think it'd be wortwhile to add another layer
> to the "one_vcpu" helpers to make things even more convenient, e.g.
>
> struct kvm_vm *____vm_create_with_one_vcpu(enum vm_guest_mode mode,
>                                            struct kvm_vcpu **vcpu,
>                                            uint64_t extra_mem_pages,
>                                            void *guest_code)
> {
>         struct kvm_vcpu *vcpus[1];
>         struct kvm_vm *vm;
>
>         vm = __vm_create_with_vcpus(mode, 1, extra_mem_pages, guest_code, vcpus);
>
>         *vcpu = vcpus[0];
>         return vm;
> }
>
> static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
>                                                        uint64_t extra_mem_pages,
>                                                        void *guest_code)
> {
>         return ____vm_create_with_one_vcpu(VM_MODE_DEFAULT, vcpu,
>                                            extra_mem_pages, guest_code);
> }
>
> static inline struct kvm_vm *____vm_create_with_one_vcpu(enum vm_guest_mode mode,
>                                            struct kvm_vcpu **vcpu,
>                                            uint64_t extra_mem_pages,
>                                            void *guest_code)
> ____vm_create_with_one_vcpu
>
>
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index dafe4471a6c7..593dfadb662e 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -298,9 +298,8 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
>
>         kvm_vm_elf_load(vm, program_invocation_name);
>
> -#ifdef __x86_64__
> -       vm_create_irqchip(vm);
> -#endif
> +       kvm_arch_vm_post_create(vm);
> +
>         return vm;
>  }
>
>
> [*] https://lore.kernel.org/all/YzsC4ibDqGh5qaP9@google.com

This refactor sounds good, working on this with a few changes.

Instead of kvm_init_vm_address_properties() as you suggested I've added this:

@@ -272,6 +275,8 @@ struct kvm_vm *____vm_create(enum vm_guest_mode
 mode, uint64_t nr_pages)
                vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
 #endif

+       kvm_init_vm_arch(vm);
+
        vm_open(vm);

        /* Limit to VA-bit canonical virtual addresses. */

And I need to put kvm_arch_vm_post_create() after the vCPUs are
created because the ordering we need is: KVM_SEV_INIT -> Create vCPUS
-> KVM_SEV_LAUNCH_FINISH.