[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250520163952.596fedba@p-imbrenda>
Date: Tue, 20 May 2025 16:39:52 +0200
From: Claudio Imbrenda <imbrenda@...ux.ibm.com>
To: Nina Schoetterl-Glausch <nsg@...ux.ibm.com>
Cc: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
linux-s390@...r.kernel.org, frankja@...ux.ibm.com,
borntraeger@...ibm.com, seiden@...ux.ibm.com, nrb@...ux.ibm.com,
david@...hat.com, hca@...ux.ibm.com, agordeev@...ux.ibm.com,
svens@...ux.ibm.com, gor@...ux.ibm.com
Subject: Re: [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c
On Tue, 20 May 2025 14:49:55 +0200
Nina Schoetterl-Glausch <nsg@...ux.ibm.com> wrote:
> On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > Refactor some functions in priv.c to make them more readable.
> >
> > handle_{iske,rrbe,sske}: move duplicated checks into a single function.
> > handle{pfmf,epsw}: improve readability.
> > handle_lpswe{,y}: merge implementations since they are almost the same.
> >
> > Use u64_replace_bits() where it makes sense.
> >
> > Signed-off-by: Claudio Imbrenda <imbrenda@...ux.ibm.com>
> > ---
> > arch/s390/kvm/kvm-s390.h | 15 ++
> > arch/s390/kvm/priv.c | 288 ++++++++++++++++++---------------------
> > 2 files changed, 148 insertions(+), 155 deletions(-)
> >
> [...]
>
> > diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> > index 758cefb5bac7..1a26aa591c2e 100644
> > --- a/arch/s390/kvm/priv.c
> > +++ b/arch/s390/kvm/priv.c
> > @@ -14,6 +14,7 @@
> > #include <linux/mm_types.h>
> > #include <linux/pgtable.h>
> > #include <linux/io.h>
> > +#include <linux/bitfield.h>
> > #include <asm/asm-offsets.h>
> > #include <asm/facility.h>
> > #include <asm/current.h>
> > @@ -253,29 +254,50 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
> > return 0;
> > }
> >
> > +struct skeys_ops_state {
> > + int reg1;
> > + int reg2;
> > + int rc;
> > + unsigned long gaddr;
> > +};
> > +
> > +static bool skeys_common_checks(struct kvm_vcpu *vcpu, struct skeys_ops_state *state, bool abs)
> > +{
> > + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) {
> > + state->rc = kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > + return true;
> > + }
> > +
> > + state->rc = try_handle_skey(vcpu);
> > + if (state->rc)
> > + return true;
> > +
> > + kvm_s390_get_regs_rre(vcpu, &state->reg1, &state->reg2);
> > +
> > + state->gaddr = vcpu->run->s.regs.gprs[state->reg2] & PAGE_MASK;
> > + state->gaddr = kvm_s390_logical_to_effective(vcpu, state->gaddr);
> > + if (!abs)
> > + state->gaddr = kvm_s390_real_to_abs(vcpu, state->gaddr);
> > +
> > + return false;
> > +}
>
> I don't really like this function, IMO it makes the calling functions harder to read.
> If it was just a chain of checks it be fine, but with the differing control flow
> base on the abs parameter and the complex return value it becomes too complicated.
I'll try to improve it
>
> > +
> > static int handle_iske(struct kvm_vcpu *vcpu)
> > {
> > - unsigned long gaddr, vmaddr;
> > + struct skeys_ops_state state;
> > + unsigned long vmaddr;
> > unsigned char key;
> > - int reg1, reg2;
> > bool unlocked;
> > + u64 *r1;
> > int rc;
> >
> > vcpu->stat.instruction_iske++;
> >
> > - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
>
> How about a macro INJECT_PGM_ON: INJECT_PGM_ON(kvm_s390_problem_state(vcpu), PGM_PRIVILEGED_OP)
no, I would like to avoid hiding control flow in a macro
>
>
> > -
> > - rc = try_handle_skey(vcpu);
> > - if (rc)
> > - return rc != -EAGAIN ? rc : 0;
>
> You are not replicating this behavior, are you?
no, but it's fine, we can afford a useless trip to userspace literally
once in the lifetime of the guest
> > -
> > - kvm_s390_get_regs_rre(vcpu, ®1, ®2);
>
> You could introduce a helper
>
> void _kvm_s390_get_gpr_ptrs_rre(vcpu, u64 **reg1, u64 **reg2)
> {
> int r1, r2;
>
> kvm_s390_get_regs_rre(vcpu, &r1, &r2);
> *reg1 = &vcpu->run->s.regs.gprs[r1];
> *reg2 = &vcpu->run->s.regs.gprs[r2];
> }
>
> which would remove some clutter from the original function implementations.
>
> > + if (skeys_common_checks(vcpu, &state, false))
> > + return state.rc;
> > + r1 = vcpu->run->s.regs.gprs + state.reg1;
> >
> > - gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> > - gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
> > - gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
> > - vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
> > + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(state.gaddr));
> > if (kvm_is_error_hva(vmaddr))
> > return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> > retry:
> > @@ -296,33 +318,23 @@ static int handle_iske(struct kvm_vcpu *vcpu)
> > return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> > if (rc < 0)
> > return rc;
> > - vcpu->run->s.regs.gprs[reg1] &= ~0xff;
> > - vcpu->run->s.regs.gprs[reg1] |= key;
> > + *r1 = u64_replace_bits(*r1, key, 0xff);
> > return 0;
> > }
> >
> >
> [...]
>
> > retry:
> > @@ -353,40 +365,30 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
> > static int handle_sske(struct kvm_vcpu *vcpu)
> > {
> > unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
> > + struct skeys_ops_state state;
> > unsigned long start, end;
> > unsigned char key, oldkey;
> > - int reg1, reg2;
> > + bool nq, mr, mc, mb;
> > bool unlocked;
> > + u64 *r1, *r2;
> > int rc;
> >
> > vcpu->stat.instruction_sske++;
> >
> > - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > -
> > - rc = try_handle_skey(vcpu);
> > - if (rc)
> > - return rc != -EAGAIN ? rc : 0;
> > -
> > - if (!test_kvm_facility(vcpu->kvm, 8))
> > - m3 &= ~SSKE_MB;
> > - if (!test_kvm_facility(vcpu->kvm, 10))
> > - m3 &= ~(SSKE_MC | SSKE_MR);
> > - if (!test_kvm_facility(vcpu->kvm, 14))
> > - m3 &= ~SSKE_NQ;
> > + mb = test_kvm_facility(vcpu->kvm, 8) && (m3 & SSKE_MB);
> > + mr = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MR);
> > + mc = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MC);
> > + nq = test_kvm_facility(vcpu->kvm, 14) && (m3 & SSKE_NQ);
>
> That is indeed much nicer.
>
> >
> > - kvm_s390_get_regs_rre(vcpu, ®1, ®2);
> > + /* start already designates an absolute address if MB is set */
> > + if (skeys_common_checks(vcpu, &state, mb))
> > + return state.rc;
> >
> > - key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
> > - start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> > - start = kvm_s390_logical_to_effective(vcpu, start);
> > - if (m3 & SSKE_MB) {
> > - /* start already designates an absolute address */
> > - end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
> > - } else {
> > - start = kvm_s390_real_to_abs(vcpu, start);
> > - end = start + PAGE_SIZE;
> > - }
> > + start = state.gaddr;
> > + end = mb ? ALIGN(start + 1, _SEGMENT_SIZE) : start + PAGE_SIZE;
>
> Alternatively you could do ALIGN_DOWN(start, _SEGMENT_SIZE) + _SEGMENT_SIZE,
> which seems a bit easier to read, but it's really minor.
>
> > + r1 = vcpu->run->s.regs.gprs + state.reg1;
> > + r2 = vcpu->run->s.regs.gprs + state.reg2;
> > + key = *r1 & 0xfe;
> >
> > while (start != end) {
> > unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
> > @@ -396,9 +398,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
> > return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> >
> > mmap_read_lock(current->mm);
> > - rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
> > - m3 & SSKE_NQ, m3 & SSKE_MR,
> > - m3 & SSKE_MC);
> > + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, nq, mr, mc);
> >
> > if (rc < 0) {
> > rc = fixup_user_fault(current->mm, vmaddr,
> > @@ -415,23 +415,21 @@ static int handle_sske(struct kvm_vcpu *vcpu)
> > start += PAGE_SIZE;
> > }
> >
> > - if (m3 & (SSKE_MC | SSKE_MR)) {
> > - if (m3 & SSKE_MB) {
> > + if (mc || mr) {
> > + if (mb) {
> > /* skey in reg1 is unpredictable */
> > kvm_s390_set_psw_cc(vcpu, 3);
> > } else {
> > kvm_s390_set_psw_cc(vcpu, rc);
> > - vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
> > - vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
> > + *r1 = u64_replace_bits(*r1, oldkey << 8, 0xff00);
>
> Uh, u64_replace_bits does the shift for you, no?
> So it should be u64_replace_bits(*r1, oldkey, 0xff00)
>
> You could also do u64p_replace_bits(r1, oldkey, 0xff00) but I'd actually prefer the assignment
> as you do it.
yeahhhhhh I think I'll completely rewrite those parts using bitfields
and structs / unions
>
> > }
> > }
> > - if (m3 & SSKE_MB) {
> > - if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
> > - vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
> > - else
> > - vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
> > + if (mb) {
> > end = kvm_s390_logical_to_effective(vcpu, end);
> > - vcpu->run->s.regs.gprs[reg2] |= end;
> > + if (kvm_s390_is_amode_64(vcpu))
> > + *r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> > + else
> > + *r2 = u64_replace_bits(*r2, end, 0xfffff000);
>
> This does not work because of the implicit shift.
> So you need to use gpa_to_gfn(end) instead.
> (I think I would prefer using start instead of end, since it better shows
> the interruptible nature of the instruction, but start == end if
> we get here so ...)
>
> > }
> > return 0;
> > }
> > @@ -773,46 +771,28 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
> > return 0;
> > }
> >
> > -static int handle_lpswe(struct kvm_vcpu *vcpu)
> > +static int handle_lpswe_y(struct kvm_vcpu *vcpu, bool lpswey)
> > {
> > psw_t new_psw;
> > u64 addr;
> > int rc;
> > u8 ar;
> >
> > - vcpu->stat.instruction_lpswe++;
> > -
> > - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > -
> > - addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> > - if (addr & 7)
> > - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> > - rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
> > - if (rc)
> > - return kvm_s390_inject_prog_cond(vcpu, rc);
> > - vcpu->arch.sie_block->gpsw = new_psw;
> > - if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
> > - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> > - return 0;
> > -}
> > -
> > -static int handle_lpswey(struct kvm_vcpu *vcpu)
> > -{
> > - psw_t new_psw;
> > - u64 addr;
> > - int rc;
> > - u8 ar;
> > -
> > - vcpu->stat.instruction_lpswey++;
> > + if (lpswey)
> > + vcpu->stat.instruction_lpswey++;
> > + else
> > + vcpu->stat.instruction_lpswe++;
> >
> > - if (!test_kvm_facility(vcpu->kvm, 193))
> > + if (lpswey && !test_kvm_facility(vcpu->kvm, 193))
> > return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
> >
> > if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> >
> > - addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> > + if (!lpswey)
> > + addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> > + else
> > + addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> > if (addr & 7)
> > return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
>
> I'd prefer a helper function _do_lpswe_y_swap(struct kvm_vcpu *vcpu, gpa_t addr)
>
> and then just
>
> static int handle_lpswey(struct kvm_vcpu *vcpu)
> {
> u64 addr;
> u8 ar;
>
> vcpu->stat.instruction_lpswey++;
>
> if (!test_kvm_facility(vcpu->kvm, 193))
> return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
>
> addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> return _do_lpswe_y_swap(vcpu, addr);
> }
>
> Makes it easier to read IMO because of the simpler control flow.
hmmm you have a point
> >
> > @@ -1034,7 +1014,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
> > case 0xb1:
> > return handle_stfl(vcpu);
> > case 0xb2:
> > - return handle_lpswe(vcpu);
> > + return handle_lpswe_y(vcpu, false);
> > default:
> > return -EOPNOTSUPP;
> > }
> > @@ -1043,42 +1023,50 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
> > static int handle_epsw(struct kvm_vcpu *vcpu)
> > {
> > int reg1, reg2;
> > + u64 *r1, *r2;
> >
> > vcpu->stat.instruction_epsw++;
> >
> > kvm_s390_get_regs_rre(vcpu, ®1, ®2);
> > + r1 = vcpu->run->s.regs.gprs + reg1;
> > + r2 = vcpu->run->s.regs.gprs + reg2;
> >
> > /* This basically extracts the mask half of the psw. */
> > - vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
> > - vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
> > - if (reg2) {
> > - vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
> > - vcpu->run->s.regs.gprs[reg2] |=
> > - vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
> > - }
> > + *r1 = u64_replace_bits(*r1, vcpu->arch.sie_block->gpsw.mask >> 32, 0xffffffff);
> > + if (reg2)
> > + *r2 = u64_replace_bits(*r2, vcpu->arch.sie_block->gpsw.mask, 0xffffffff);
>
> LGTM although I don't hate the original implementation, which is very easy to understand
> compared to u64_replace_bits whose implementation is anything but.
yeah I agree
> It would be nice to make gprs a union, which I think should be fine from a backwards
> compatibility point of view. So:
>
> struct kvm_sync_regs {
> __u64 prefix; /* prefix register */
> union {
> __u64 gprs[16]; /* general purpose registers */
> struct { __u32 h; __u32 l} gprs32[16];
> struct { __u16 hh; __u16 hl; ...} gprs16[16];
> ...
> ...
>
> But I don't expect you to do the refactor.
> You could of course also contribute documentation to bitfield.h :)
ehhhhhhh
>
> > return 0;
> > }
>
> [...]
>
> > static int handle_pfmf(struct kvm_vcpu *vcpu)
> > {
>
> [...]
>
> > - if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
> > - if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
> > - vcpu->run->s.regs.gprs[reg2] = end;
> > - } else {
> > - vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
> > - end = kvm_s390_logical_to_effective(vcpu, end);
> > - vcpu->run->s.regs.gprs[reg2] |= end;
> > - }
> > + if (r1.fsc) {
> > + u64 *r2 = vcpu->run->s.regs.gprs + reg2;
> > +
> > + end = kvm_s390_logical_to_effective(vcpu, end);
> > + if (kvm_s390_is_amode_64(vcpu))
> > + *r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> > + else
> > + *r2 = u64_replace_bits(*r2, end, 0xfffff000);
>
> Same issue as above regarding the shift.
>
> > }
> > return 0;
> > }
> > @@ -1361,8 +1338,9 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
> > reg = reg1;
> > nr_regs = 0;
> > do {
> > - vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
> > - vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
> > + u64 *cr = vcpu->arch.sie_block->gcr + reg;
> > +
> > + *cr = u64_replace_bits(*cr, ctl_array[nr_regs++], 0xffffffff);
> > if (reg == reg3)
> > break;
> > reg = (reg + 1) % 16;
> > @@ -1489,7 +1467,7 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
> > case 0x62:
> > return handle_ri(vcpu);
> > case 0x71:
> > - return handle_lpswey(vcpu);
> > + return handle_lpswe_y(vcpu, true);
> > default:
> > return -EOPNOTSUPP;
> > }
>
Powered by blists - more mailing lists