[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Zcuf1ZUvwhxBobuG@google.com>
Date: Tue, 13 Feb 2024 16:59:01 +0000
From: Sebastian Ene <sebastianene@...gle.com>
To: Oliver Upton <oliver.upton@...ux.dev>
Cc: catalin.marinas@....com, gshan@...hat.com, james.morse@....com,
mark.rutland@....com, maz@...nel.org, rananta@...gle.com,
ricarkol@...gle.com, ryan.roberts@....com, shahuang@...hat.com,
suzuki.poulose@....com, will@...nel.org, yuzenghui@...wei.com,
kvmarm@...ts.linux.dev, linux-arm-kernel@...ts.infradead.org,
linux-kernel@...r.kernel.org, kernel-team@...roid.com,
vdonnefort@...gle.com
Subject: Re: [PATCH v5 4/4] KVM: arm64: Initialize the ptdump parser with
stage-2 attributes
On Tue, Feb 13, 2024 at 12:42:42AM +0000, Oliver Upton wrote:
> On Wed, Feb 07, 2024 at 02:48:33PM +0000, Sebastian Ene wrote:
> > Define a set of attributes used by the ptdump parser to display the
> > properties of a guest memory region covered by a pagetable descriptor.
> > Build a description of the pagetable levels and initialize the parser
> > with this configuration.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@...gle.com>
> > ---
> > arch/arm64/kvm/ptdump.c | 156 ++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 156 insertions(+)
> >
> > diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
> > index a4e984da8aa7..60725d46f17b 100644
> > --- a/arch/arm64/kvm/ptdump.c
> > +++ b/arch/arm64/kvm/ptdump.c
> > @@ -14,6 +14,69 @@
> > #include <kvm_ptdump.h>
> >
> >
> > +#define ADDR_MARKER_LEN (2)
> > +#define MARKER_MSG_LEN (32)
> > +
> > +static const struct prot_bits stage2_pte_bits[] = {
> > + {
> > + .mask = PTE_VALID,
> > + .val = PTE_VALID,
> > + .set = " ",
> > + .clear = "F",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
> > + .set = "XN",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
> > + .set = "R",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
> > + .set = "W",
> > + .clear = " ",
> > + }, {
> > + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
> > + .set = "AF",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_NG,
> > + .val = PTE_NG,
> > + .set = "FnXS",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_CONT | PTE_VALID,
> > + .val = PTE_CONT | PTE_VALID,
> > + .set = "CON",
> > + .clear = " ",
> > + }, {
> > + .mask = PTE_TABLE_BIT,
> > + .val = PTE_TABLE_BIT,
> > + .set = " ",
> > + .clear = "BLK",
>
> <snip>
>
> > + }, {
> > + .mask = KVM_PGTABLE_PROT_SW0,
> > + .val = KVM_PGTABLE_PROT_SW0,
> > + .set = "SW0", /* PKVM_PAGE_SHARED_OWNED */
> > + }, {
> > + .mask = KVM_PGTABLE_PROT_SW1,
> > + .val = KVM_PGTABLE_PROT_SW1,
> > + .set = "SW1", /* PKVM_PAGE_SHARED_BORROWED */
> > + }, {
> > + .mask = KVM_PGTABLE_PROT_SW2,
> > + .val = KVM_PGTABLE_PROT_SW2,
> > + .set = "SW2",
> > + }, {
> > + .mask = KVM_PGTABLE_PROT_SW3,
> > + .val = KVM_PGTABLE_PROT_SW3,
> > + .set = "SW3",
> > + },
>
> </snip>
>
> These bits are never set in a 'normal' stage-2 PTE, does it make sense
> to carry descriptors for them here? In contexts where the SW bits are
> used it might be more useful if the ptdump used the specific meaning of
> the bit (e.g. OWNED, BORROWED, etc) instead of the generic SW%d.
>
> That can all wait for when the pKVM bits come into play though.
>
True, I guess we don't need these bits for now. We can insert them at a
later time when we will have the pKVM support and then we will have to
use their real maning for the state tracking.
> > +};
> > +
> > static int kvm_ptdump_guest_open(struct inode *inode, struct file *file);
> > static int kvm_ptdump_guest_show(struct seq_file *m, void *);
> >
> > @@ -52,6 +115,94 @@ static int kvm_ptdump_show_common(struct seq_file *m,
> > return kvm_pgtable_walk(pgtable, 0, BIT(pgtable->ia_bits), &walker);
> > }
> >
> > +static void kvm_ptdump_build_levels(struct pg_level *level, u32 start_lvl)
> > +{
> > + static const char * const level_names[] = {"PGD", "PUD", "PMD", "PTE"};
> > + u32 i = 0;
> > + u64 mask_lvl = 0;
>
> nit: _lvl adds nothing to this, and actually confused me for a sec as
> to whether the mask changed per level.
>
I will drop it from the name to avoid the confusion.
> > + if (start_lvl > 2) {
> > + pr_err("invalid start_lvl %u\n", start_lvl);
> > + return;
> > + }
>
> Can't we get something like -EINVAL out here and fail initialization?
> Otherwise breadcrumbs like this pr_err() are hard to connect to a
> specific failure.
>
Ok, I will add a return code for this function.
> > + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
> > + mask_lvl |= stage2_pte_bits[i].mask;
> > +
> > + for (i = start_lvl; i <= KVM_PGTABLE_LAST_LEVEL; i++) {
> > + level[i].name = level_names[i];
> > + level[i].num = ARRAY_SIZE(stage2_pte_bits);
> > + level[i].bits = stage2_pte_bits;
> > + level[i].mask = mask_lvl;
> > + }
> > +
> > + if (start_lvl > 0)
> > + level[start_lvl].name = level_names[0];
> > +}
> > +
> > +static int kvm_ptdump_parser_init(struct pg_state *st,
> > + struct kvm_pgtable *pgtable,
> > + struct seq_file *m)
> > +{
> > + struct addr_marker *ipa_addr_marker;
> > + char *marker_msg;
> > + struct pg_level *level_descr;
> > + struct ptdump_range *range;
> > +
> > + ipa_addr_marker = kzalloc(sizeof(struct addr_marker) * ADDR_MARKER_LEN,
> > + GFP_KERNEL_ACCOUNT);
> > + if (!ipa_addr_marker)
> > + return -ENOMEM;
> > +
> > + marker_msg = kzalloc(MARKER_MSG_LEN, GFP_KERNEL_ACCOUNT);
> > + if (!marker_msg)
> > + goto free_with_marker;
> > +
> > + level_descr = kzalloc(sizeof(struct pg_level) * (KVM_PGTABLE_LAST_LEVEL + 1),
> > + GFP_KERNEL_ACCOUNT);
> > + if (!level_descr)
> > + goto free_with_msg;
> > +
> > + range = kzalloc(sizeof(struct ptdump_range) * ADDR_MARKER_LEN,
> > + GFP_KERNEL_ACCOUNT);
> > + if (!range)
> > + goto free_with_level;
> > +
> > + kvm_ptdump_build_levels(level_descr, pgtable->start_level);
> > +
> > + snprintf(marker_msg, MARKER_MSG_LEN, "IPA bits %2u start lvl %1d",
> > + pgtable->ia_bits, pgtable->start_level);
> > +
> > + ipa_addr_marker[0].name = marker_msg;
>
> Is the dynamic name worth the added complexity? I see nothing wrong with
> exposing additional debugfs files for simple attributes like the IPA
> range and page table levels.
>
> I know it isn't *that* much, just looking for every opportunity to
> simplify further.
>
We can keep them separate, I have no strong opinion about this. I think
this was Vincent's, original suggestion to have them so I will check with
him as well.
> > + ipa_addr_marker[1].start_address = BIT(pgtable->ia_bits);
> > + range[0].end = BIT(pgtable->ia_bits);
> > +
> > + st->seq = m;
> > + st->marker = ipa_addr_marker;
> > + st->level = -1,
> > + st->pg_level = level_descr,
> > + st->ptdump.range = range;
> > + return 0;
> > +
> > +free_with_level:
> > + kfree(level_descr);
> > +free_with_msg:
> > + kfree(marker_msg);
> > +free_with_marker:
> > + kfree(ipa_addr_marker);
> > + return -ENOMEM;
> > +}
> > +
> > +static void kvm_ptdump_parser_teardown(struct pg_state *st)
> > +{
> > + const struct addr_marker *ipa_addr_marker = st->marker;
> > +
> > + kfree(ipa_addr_marker[0].name);
> > + kfree(ipa_addr_marker);
> > + kfree(st->pg_level);
> > + kfree(st->ptdump.range);
> > +}
> > +
> > static int kvm_ptdump_guest_show(struct seq_file *m, void *)
> > {
> > struct kvm *guest_kvm = m->private;
> > @@ -59,10 +210,15 @@ static int kvm_ptdump_guest_show(struct seq_file *m, void *)
> > struct pg_state parser_state = {0};
> > int ret;
> >
> > + ret = kvm_ptdump_parser_init(&parser_state, mmu->pgt, m);
> > + if (ret)
> > + return ret;
> > +
>
> Can this be done at open(), or am I missing something?
>
I guess we can do this in open() but then we will have to add again that
struct that wraps some ptdump specific state tracking. It seemed a bit cleaner in
this way. What do you think ?
> > write_lock(&guest_kvm->mmu_lock);
> > ret = kvm_ptdump_show_common(m, mmu->pgt, &parser_state);
> > write_unlock(&guest_kvm->mmu_lock);
> >
> > + kvm_ptdump_parser_teardown(&parser_state);
>
> Same question here, can this happen at close()? I guess you'll need a
> struct to encapsulate pg_state and a pointer to the VM at least.
>
Right, I tried to avoid using a separate struct as we discussed in v4.
> Actually, come to think of it, if you embed all of the data you need for
> the walker into a structure you can just do a single allocation for it
> upfront.
>
> --
> Thanks,
> Oliver
Thanks for the feedback,
Seb
Powered by blists - more mailing lists