linux-kernel - Re: [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250520163952.596fedba@p-imbrenda>
Date: Tue, 20 May 2025 16:39:52 +0200
From: Claudio Imbrenda <imbrenda@...ux.ibm.com>
To: Nina Schoetterl-Glausch <nsg@...ux.ibm.com>
Cc: linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        linux-s390@...r.kernel.org, frankja@...ux.ibm.com,
        borntraeger@...ibm.com, seiden@...ux.ibm.com, nrb@...ux.ibm.com,
        david@...hat.com, hca@...ux.ibm.com, agordeev@...ux.ibm.com,
        svens@...ux.ibm.com, gor@...ux.ibm.com
Subject: Re: [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c

On Tue, 20 May 2025 14:49:55 +0200
Nina Schoetterl-Glausch <nsg@...ux.ibm.com> wrote:

> On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> > Refactor some functions in priv.c to make them more readable.
> > 
> > handle_{iske,rrbe,sske}: move duplicated checks into a single function.
> > handle{pfmf,epsw}: improve readability.
> > handle_lpswe{,y}: merge implementations since they are almost the same.
> > 
> > Use u64_replace_bits() where it makes sense.
> > 
> > Signed-off-by: Claudio Imbrenda <imbrenda@...ux.ibm.com>
> > ---
> >  arch/s390/kvm/kvm-s390.h |  15 ++
> >  arch/s390/kvm/priv.c     | 288 ++++++++++++++++++---------------------
> >  2 files changed, 148 insertions(+), 155 deletions(-)
> >   
> [...]
> 
> > diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> > index 758cefb5bac7..1a26aa591c2e 100644
> > --- a/arch/s390/kvm/priv.c
> > +++ b/arch/s390/kvm/priv.c
> > @@ -14,6 +14,7 @@
> >  #include <linux/mm_types.h>
> >  #include <linux/pgtable.h>
> >  #include <linux/io.h>
> > +#include <linux/bitfield.h>
> >  #include <asm/asm-offsets.h>
> >  #include <asm/facility.h>
> >  #include <asm/current.h>
> > @@ -253,29 +254,50 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
> >  	return 0;
> >  }
> >  
> > +struct skeys_ops_state {
> > +	int reg1;
> > +	int reg2;
> > +	int rc;
> > +	unsigned long gaddr;
> > +};
> > +
> > +static bool skeys_common_checks(struct kvm_vcpu *vcpu, struct skeys_ops_state *state, bool abs)
> > +{
> > +	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) {
> > +		state->rc = kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > +		return true;
> > +	}
> > +
> > +	state->rc = try_handle_skey(vcpu);
> > +	if (state->rc)
> > +		return true;
> > +
> > +	kvm_s390_get_regs_rre(vcpu, &state->reg1, &state->reg2);
> > +
> > +	state->gaddr = vcpu->run->s.regs.gprs[state->reg2] & PAGE_MASK;
> > +	state->gaddr = kvm_s390_logical_to_effective(vcpu, state->gaddr);
> > +	if (!abs)
> > +		state->gaddr = kvm_s390_real_to_abs(vcpu, state->gaddr);
> > +
> > +	return false;
> > +}  
> 
> I don't really like this function, IMO it makes the calling functions harder to read.
> If it was just a chain of checks it be fine, but with the differing control flow
> base on the abs parameter and the complex return value it becomes too complicated.

I'll try to improve it

> 
> > +
> >  static int handle_iske(struct kvm_vcpu *vcpu)
> >  {
> > -	unsigned long gaddr, vmaddr;
> > +	struct skeys_ops_state state;
> > +	unsigned long vmaddr;
> >  	unsigned char key;
> > -	int reg1, reg2;
> >  	bool unlocked;
> > +	u64 *r1;
> >  	int rc;
> >  
> >  	vcpu->stat.instruction_iske++;
> >  
> > -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);  
> 
> How about a macro INJECT_PGM_ON: INJECT_PGM_ON(kvm_s390_problem_state(vcpu), PGM_PRIVILEGED_OP)

no, I would like to avoid hiding control flow in a macro

> 
> 
> > -
> > -	rc = try_handle_skey(vcpu);
> > -	if (rc)
> > -		return rc != -EAGAIN ? rc : 0;  
> 
> You are not replicating this behavior, are you?

no, but it's fine, we can afford a useless trip to userspace literally
once in the lifetime of the guest

> > -
> > -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);  
> 
> You could introduce a helper
> 
> void _kvm_s390_get_gpr_ptrs_rre(vcpu, u64 **reg1, u64 **reg2)
> {
> 	int r1, r2;
> 
> 	kvm_s390_get_regs_rre(vcpu, &r1, &r2);
> 	*reg1 = &vcpu->run->s.regs.gprs[r1];
> 	*reg2 = &vcpu->run->s.regs.gprs[r2];
> }
> 
> which would remove some clutter from the original function implementations.
> 
> > +	if (skeys_common_checks(vcpu, &state, false))
> > +		return state.rc;
> > +	r1 = vcpu->run->s.regs.gprs + state.reg1;
> >  
> > -	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> > -	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
> > -	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
> > -	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
> > +	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(state.gaddr));
> >  	if (kvm_is_error_hva(vmaddr))
> >  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> >  retry:
> > @@ -296,33 +318,23 @@ static int handle_iske(struct kvm_vcpu *vcpu)
> >  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> >  	if (rc < 0)
> >  		return rc;
> > -	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
> > -	vcpu->run->s.regs.gprs[reg1] |= key;
> > +	*r1 = u64_replace_bits(*r1, key, 0xff);
> >  	return 0;
> >  }
> >  
> >   
> [...]
> 
> >  retry:
> > @@ -353,40 +365,30 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
> >  static int handle_sske(struct kvm_vcpu *vcpu)
> >  {
> >  	unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
> > +	struct skeys_ops_state state;
> >  	unsigned long start, end;
> >  	unsigned char key, oldkey;
> > -	int reg1, reg2;
> > +	bool nq, mr, mc, mb;
> >  	bool unlocked;
> > +	u64 *r1, *r2;
> >  	int rc;
> >  
> >  	vcpu->stat.instruction_sske++;
> >  
> > -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > -
> > -	rc = try_handle_skey(vcpu);
> > -	if (rc)
> > -		return rc != -EAGAIN ? rc : 0;
> > -
> > -	if (!test_kvm_facility(vcpu->kvm, 8))
> > -		m3 &= ~SSKE_MB;
> > -	if (!test_kvm_facility(vcpu->kvm, 10))
> > -		m3 &= ~(SSKE_MC | SSKE_MR);
> > -	if (!test_kvm_facility(vcpu->kvm, 14))
> > -		m3 &= ~SSKE_NQ;
> > +	mb = test_kvm_facility(vcpu->kvm, 8) && (m3 & SSKE_MB);
> > +	mr = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MR);
> > +	mc = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MC);
> > +	nq = test_kvm_facility(vcpu->kvm, 14) && (m3 & SSKE_NQ);  
> 
> That is indeed much nicer.
> 
> >  
> > -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> > +	/* start already designates an absolute address if MB is set */
> > +	if (skeys_common_checks(vcpu, &state, mb))
> > +		return state.rc;
> >  
> > -	key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
> > -	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> > -	start = kvm_s390_logical_to_effective(vcpu, start);
> > -	if (m3 & SSKE_MB) {
> > -		/* start already designates an absolute address */
> > -		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
> > -	} else {
> > -		start = kvm_s390_real_to_abs(vcpu, start);
> > -		end = start + PAGE_SIZE;
> > -	}
> > +	start = state.gaddr;
> > +	end = mb ? ALIGN(start + 1, _SEGMENT_SIZE) : start + PAGE_SIZE;  
> 
> Alternatively you could do ALIGN_DOWN(start, _SEGMENT_SIZE) + _SEGMENT_SIZE,
> which seems a bit easier to read, but it's really minor.
> 
> > +	r1 = vcpu->run->s.regs.gprs + state.reg1;
> > +	r2 = vcpu->run->s.regs.gprs + state.reg2;
> > +	key = *r1 & 0xfe;
> >  
> >  	while (start != end) {
> >  		unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
> > @@ -396,9 +398,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
> >  			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
> >  
> >  		mmap_read_lock(current->mm);
> > -		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
> > -						m3 & SSKE_NQ, m3 & SSKE_MR,
> > -						m3 & SSKE_MC);
> > +		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, nq, mr, mc);
> >  
> >  		if (rc < 0) {
> >  			rc = fixup_user_fault(current->mm, vmaddr,
> > @@ -415,23 +415,21 @@ static int handle_sske(struct kvm_vcpu *vcpu)
> >  		start += PAGE_SIZE;
> >  	}
> >  
> > -	if (m3 & (SSKE_MC | SSKE_MR)) {
> > -		if (m3 & SSKE_MB) {
> > +	if (mc || mr) {
> > +		if (mb) {
> >  			/* skey in reg1 is unpredictable */
> >  			kvm_s390_set_psw_cc(vcpu, 3);
> >  		} else {
> >  			kvm_s390_set_psw_cc(vcpu, rc);
> > -			vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
> > -			vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
> > +			*r1 = u64_replace_bits(*r1, oldkey << 8, 0xff00);  
> 
> Uh, u64_replace_bits does the shift for you, no?
> So it should be u64_replace_bits(*r1, oldkey, 0xff00)
> 
> You could also do u64p_replace_bits(r1, oldkey, 0xff00) but I'd actually prefer the assignment
> as you do it.

yeahhhhhh I think I'll completely rewrite those parts using bitfields
and structs / unions

> 
> >  		}
> >  	}
> > -	if (m3 & SSKE_MB) {
> > -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
> > -			vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
> > -		else
> > -			vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
> > +	if (mb) {
> >  		end = kvm_s390_logical_to_effective(vcpu, end);
> > -		vcpu->run->s.regs.gprs[reg2] |= end;
> > +		if (kvm_s390_is_amode_64(vcpu))
> > +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> > +		else
> > +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);  
> 
> This does not work because of the implicit shift.
> So you need to use gpa_to_gfn(end) instead.
> (I think I would prefer using start instead of end, since it better shows
> the interruptible nature of the instruction, but start == end if
> we get here so ...)
> 
> >  	}
> >  	return 0;
> >  }
> > @@ -773,46 +771,28 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
> >  	return 0;
> >  }
> >  
> > -static int handle_lpswe(struct kvm_vcpu *vcpu)
> > +static int handle_lpswe_y(struct kvm_vcpu *vcpu, bool lpswey)
> >  {
> >  	psw_t new_psw;
> >  	u64 addr;
> >  	int rc;
> >  	u8 ar;
> >  
> > -	vcpu->stat.instruction_lpswe++;
> > -
> > -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> > -
> > -	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> > -	if (addr & 7)
> > -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> > -	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
> > -	if (rc)
> > -		return kvm_s390_inject_prog_cond(vcpu, rc);
> > -	vcpu->arch.sie_block->gpsw = new_psw;
> > -	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
> > -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> > -	return 0;
> > -}
> > -
> > -static int handle_lpswey(struct kvm_vcpu *vcpu)
> > -{
> > -	psw_t new_psw;
> > -	u64 addr;
> > -	int rc;
> > -	u8 ar;
> > -
> > -	vcpu->stat.instruction_lpswey++;
> > +	if (lpswey)
> > +		vcpu->stat.instruction_lpswey++;
> > +	else
> > +		vcpu->stat.instruction_lpswe++;
> >  
> > -	if (!test_kvm_facility(vcpu->kvm, 193))
> > +	if (lpswey && !test_kvm_facility(vcpu->kvm, 193))
> >  		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
> >  
> >  	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> >  		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> >  
> > -	addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> > +	if (!lpswey)
> > +		addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> > +	else
> > +		addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> >  	if (addr & 7)
> >  		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);  
> 
> I'd prefer a helper function _do_lpswe_y_swap(struct kvm_vcpu *vcpu, gpa_t addr)
> 
> and then just
> 
> static int handle_lpswey(struct kvm_vcpu *vcpu)
> {
>         u64 addr;
>         u8 ar;
> 
>         vcpu->stat.instruction_lpswey++;
> 
>         if (!test_kvm_facility(vcpu->kvm, 193))
>                 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
> 
>         addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> 	return _do_lpswe_y_swap(vcpu, addr);
> }
> 
> Makes it easier to read IMO because of the simpler control flow.

hmmm you have a point

> >  
> > @@ -1034,7 +1014,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
> >  	case 0xb1:
> >  		return handle_stfl(vcpu);
> >  	case 0xb2:
> > -		return handle_lpswe(vcpu);
> > +		return handle_lpswe_y(vcpu, false);
> >  	default:
> >  		return -EOPNOTSUPP;
> >  	}
> > @@ -1043,42 +1023,50 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
> >  static int handle_epsw(struct kvm_vcpu *vcpu)
> >  {
> >  	int reg1, reg2;
> > +	u64 *r1, *r2;
> >  
> >  	vcpu->stat.instruction_epsw++;
> >  
> >  	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> > +	r1 = vcpu->run->s.regs.gprs + reg1;
> > +	r2 = vcpu->run->s.regs.gprs + reg2;
> >  
> >  	/* This basically extracts the mask half of the psw. */
> > -	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
> > -	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
> > -	if (reg2) {
> > -		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
> > -		vcpu->run->s.regs.gprs[reg2] |=
> > -			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
> > -	}
> > +	*r1 = u64_replace_bits(*r1, vcpu->arch.sie_block->gpsw.mask >> 32, 0xffffffff);
> > +	if (reg2)
> > +		*r2 = u64_replace_bits(*r2, vcpu->arch.sie_block->gpsw.mask, 0xffffffff);  
> 
> LGTM although I don't hate the original implementation, which is very easy to understand
> compared to u64_replace_bits whose implementation is anything but.

yeah I agree

> It would be nice to make gprs a union, which I think should be fine from a backwards
> compatibility point of view. So:
> 
> struct kvm_sync_regs {
> 	__u64 prefix;	/* prefix register */
> 	union {
> 		__u64 gprs[16];	/* general purpose registers */
> 		struct { __u32 h; __u32 l} gprs32[16];
> 		struct { __u16 hh; __u16 hl; ...} gprs16[16];
> 		... 
> ...
> 
> But I don't expect you to do the refactor.
> You could of course also contribute documentation to bitfield.h :)

ehhhhhhh

> 
> >  	return 0;
> >  }  
> 
> [...]
> 
> >  static int handle_pfmf(struct kvm_vcpu *vcpu)
> >  {  
> 
> [...]
> 
> > -	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
> > -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
> > -			vcpu->run->s.regs.gprs[reg2] = end;
> > -		} else {
> > -			vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
> > -			end = kvm_s390_logical_to_effective(vcpu, end);
> > -			vcpu->run->s.regs.gprs[reg2] |= end;
> > -		}
> > +	if (r1.fsc) {
> > +		u64 *r2 = vcpu->run->s.regs.gprs + reg2;
> > +
> > +		end = kvm_s390_logical_to_effective(vcpu, end);
> > +		if (kvm_s390_is_amode_64(vcpu))
> > +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> > +		else
> > +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);  
> 
> Same issue as above regarding the shift.
> 
> >  	}
> >  	return 0;
> >  }
> > @@ -1361,8 +1338,9 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
> >  	reg = reg1;
> >  	nr_regs = 0;
> >  	do {
> > -		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
> > -		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
> > +		u64 *cr = vcpu->arch.sie_block->gcr + reg;
> > +
> > +		*cr = u64_replace_bits(*cr, ctl_array[nr_regs++], 0xffffffff);
> >  		if (reg == reg3)
> >  			break;
> >  		reg = (reg + 1) % 16;
> > @@ -1489,7 +1467,7 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
> >  	case 0x62:
> >  		return handle_ri(vcpu);
> >  	case 0x71:
> > -		return handle_lpswey(vcpu);
> > +		return handle_lpswe_y(vcpu, true);
> >  	default:
> >  		return -EOPNOTSUPP;
> >  	}  
>