linux-kernel - Re: [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9fdc24a2288201b966864e11dd39fc3b009a1b93.camel@linux.ibm.com>
Date: Tue, 20 May 2025 14:49:55 +0200
From: Nina Schoetterl-Glausch <nsg@...ux.ibm.com>
To: Claudio Imbrenda <imbrenda@...ux.ibm.com>, linux-kernel@...r.kernel.org
Cc: kvm@...r.kernel.org, linux-s390@...r.kernel.org, frankja@...ux.ibm.com,
        borntraeger@...ibm.com, seiden@...ux.ibm.com, nrb@...ux.ibm.com,
        david@...hat.com, hca@...ux.ibm.com, agordeev@...ux.ibm.com,
        svens@...ux.ibm.com, gor@...ux.ibm.com
Subject: Re: [PATCH v1 3/5] KVM: s390: refactor some functions in priv.c

On Wed, 2025-05-14 at 18:38 +0200, Claudio Imbrenda wrote:
> Refactor some functions in priv.c to make them more readable.
> 
> handle_{iske,rrbe,sske}: move duplicated checks into a single function.
> handle{pfmf,epsw}: improve readability.
> handle_lpswe{,y}: merge implementations since they are almost the same.
> 
> Use u64_replace_bits() where it makes sense.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@...ux.ibm.com>
> ---
>  arch/s390/kvm/kvm-s390.h |  15 ++
>  arch/s390/kvm/priv.c     | 288 ++++++++++++++++++---------------------
>  2 files changed, 148 insertions(+), 155 deletions(-)
> 
[...]

> diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
> index 758cefb5bac7..1a26aa591c2e 100644
> --- a/arch/s390/kvm/priv.c
> +++ b/arch/s390/kvm/priv.c
> @@ -14,6 +14,7 @@
>  #include <linux/mm_types.h>
>  #include <linux/pgtable.h>
>  #include <linux/io.h>
> +#include <linux/bitfield.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/facility.h>
>  #include <asm/current.h>
> @@ -253,29 +254,50 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> +struct skeys_ops_state {
> +	int reg1;
> +	int reg2;
> +	int rc;
> +	unsigned long gaddr;
> +};
> +
> +static bool skeys_common_checks(struct kvm_vcpu *vcpu, struct skeys_ops_state *state, bool abs)
> +{
> +	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) {
> +		state->rc = kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> +		return true;
> +	}
> +
> +	state->rc = try_handle_skey(vcpu);
> +	if (state->rc)
> +		return true;
> +
> +	kvm_s390_get_regs_rre(vcpu, &state->reg1, &state->reg2);
> +
> +	state->gaddr = vcpu->run->s.regs.gprs[state->reg2] & PAGE_MASK;
> +	state->gaddr = kvm_s390_logical_to_effective(vcpu, state->gaddr);
> +	if (!abs)
> +		state->gaddr = kvm_s390_real_to_abs(vcpu, state->gaddr);
> +
> +	return false;
> +}

I don't really like this function, IMO it makes the calling functions harder to read.
If it was just a chain of checks it be fine, but with the differing control flow
base on the abs parameter and the complex return value it becomes too complicated.

> +
>  static int handle_iske(struct kvm_vcpu *vcpu)
>  {
> -	unsigned long gaddr, vmaddr;
> +	struct skeys_ops_state state;
> +	unsigned long vmaddr;
>  	unsigned char key;
> -	int reg1, reg2;
>  	bool unlocked;
> +	u64 *r1;
>  	int rc;
>  
>  	vcpu->stat.instruction_iske++;
>  
> -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);

How about a macro INJECT_PGM_ON: INJECT_PGM_ON(kvm_s390_problem_state(vcpu), PGM_PRIVILEGED_OP)


> -
> -	rc = try_handle_skey(vcpu);
> -	if (rc)
> -		return rc != -EAGAIN ? rc : 0;

You are not replicating this behavior, are you?
> -
> -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);

You could introduce a helper

void _kvm_s390_get_gpr_ptrs_rre(vcpu, u64 **reg1, u64 **reg2)
{
	int r1, r2;

	kvm_s390_get_regs_rre(vcpu, &r1, &r2);
	*reg1 = &vcpu->run->s.regs.gprs[r1];
	*reg2 = &vcpu->run->s.regs.gprs[r2];
}

which would remove some clutter from the original function implementations.

> +	if (skeys_common_checks(vcpu, &state, false))
> +		return state.rc;
> +	r1 = vcpu->run->s.regs.gprs + state.reg1;
>  
> -	gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> -	gaddr = kvm_s390_logical_to_effective(vcpu, gaddr);
> -	gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
> -	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr));
> +	vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(state.gaddr));
>  	if (kvm_is_error_hva(vmaddr))
>  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
>  retry:
> @@ -296,33 +318,23 @@ static int handle_iske(struct kvm_vcpu *vcpu)
>  		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
>  	if (rc < 0)
>  		return rc;
> -	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
> -	vcpu->run->s.regs.gprs[reg1] |= key;
> +	*r1 = u64_replace_bits(*r1, key, 0xff);
>  	return 0;
>  }
>  
> 
[...]

>  retry:
> @@ -353,40 +365,30 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
>  static int handle_sske(struct kvm_vcpu *vcpu)
>  {
>  	unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
> +	struct skeys_ops_state state;
>  	unsigned long start, end;
>  	unsigned char key, oldkey;
> -	int reg1, reg2;
> +	bool nq, mr, mc, mb;
>  	bool unlocked;
> +	u64 *r1, *r2;
>  	int rc;
>  
>  	vcpu->stat.instruction_sske++;
>  
> -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> -
> -	rc = try_handle_skey(vcpu);
> -	if (rc)
> -		return rc != -EAGAIN ? rc : 0;
> -
> -	if (!test_kvm_facility(vcpu->kvm, 8))
> -		m3 &= ~SSKE_MB;
> -	if (!test_kvm_facility(vcpu->kvm, 10))
> -		m3 &= ~(SSKE_MC | SSKE_MR);
> -	if (!test_kvm_facility(vcpu->kvm, 14))
> -		m3 &= ~SSKE_NQ;
> +	mb = test_kvm_facility(vcpu->kvm, 8) && (m3 & SSKE_MB);
> +	mr = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MR);
> +	mc = test_kvm_facility(vcpu->kvm, 10) && (m3 & SSKE_MC);
> +	nq = test_kvm_facility(vcpu->kvm, 14) && (m3 & SSKE_NQ);

That is indeed much nicer.

>  
> -	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> +	/* start already designates an absolute address if MB is set */
> +	if (skeys_common_checks(vcpu, &state, mb))
> +		return state.rc;
>  
> -	key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
> -	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
> -	start = kvm_s390_logical_to_effective(vcpu, start);
> -	if (m3 & SSKE_MB) {
> -		/* start already designates an absolute address */
> -		end = (start + _SEGMENT_SIZE) & ~(_SEGMENT_SIZE - 1);
> -	} else {
> -		start = kvm_s390_real_to_abs(vcpu, start);
> -		end = start + PAGE_SIZE;
> -	}
> +	start = state.gaddr;
> +	end = mb ? ALIGN(start + 1, _SEGMENT_SIZE) : start + PAGE_SIZE;

Alternatively you could do ALIGN_DOWN(start, _SEGMENT_SIZE) + _SEGMENT_SIZE,
which seems a bit easier to read, but it's really minor.

> +	r1 = vcpu->run->s.regs.gprs + state.reg1;
> +	r2 = vcpu->run->s.regs.gprs + state.reg2;
> +	key = *r1 & 0xfe;
>  
>  	while (start != end) {
>  		unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
> @@ -396,9 +398,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
>  			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
>  
>  		mmap_read_lock(current->mm);
> -		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
> -						m3 & SSKE_NQ, m3 & SSKE_MR,
> -						m3 & SSKE_MC);
> +		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, nq, mr, mc);
>  
>  		if (rc < 0) {
>  			rc = fixup_user_fault(current->mm, vmaddr,
> @@ -415,23 +415,21 @@ static int handle_sske(struct kvm_vcpu *vcpu)
>  		start += PAGE_SIZE;
>  	}
>  
> -	if (m3 & (SSKE_MC | SSKE_MR)) {
> -		if (m3 & SSKE_MB) {
> +	if (mc || mr) {
> +		if (mb) {
>  			/* skey in reg1 is unpredictable */
>  			kvm_s390_set_psw_cc(vcpu, 3);
>  		} else {
>  			kvm_s390_set_psw_cc(vcpu, rc);
> -			vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
> -			vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
> +			*r1 = u64_replace_bits(*r1, oldkey << 8, 0xff00);

Uh, u64_replace_bits does the shift for you, no?
So it should be u64_replace_bits(*r1, oldkey, 0xff00)

You could also do u64p_replace_bits(r1, oldkey, 0xff00) but I'd actually prefer the assignment
as you do it.

>  		}
>  	}
> -	if (m3 & SSKE_MB) {
> -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT)
> -			vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
> -		else
> -			vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
> +	if (mb) {
>  		end = kvm_s390_logical_to_effective(vcpu, end);
> -		vcpu->run->s.regs.gprs[reg2] |= end;
> +		if (kvm_s390_is_amode_64(vcpu))
> +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> +		else
> +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);

This does not work because of the implicit shift.
So you need to use gpa_to_gfn(end) instead.
(I think I would prefer using start instead of end, since it better shows
the interruptible nature of the instruction, but start == end if
we get here so ...)

>  	}
>  	return 0;
>  }
> @@ -773,46 +771,28 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> -static int handle_lpswe(struct kvm_vcpu *vcpu)
> +static int handle_lpswe_y(struct kvm_vcpu *vcpu, bool lpswey)
>  {
>  	psw_t new_psw;
>  	u64 addr;
>  	int rc;
>  	u8 ar;
>  
> -	vcpu->stat.instruction_lpswe++;
> -
> -	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
> -		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
> -
> -	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> -	if (addr & 7)
> -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> -	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
> -	if (rc)
> -		return kvm_s390_inject_prog_cond(vcpu, rc);
> -	vcpu->arch.sie_block->gpsw = new_psw;
> -	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
> -		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
> -	return 0;
> -}
> -
> -static int handle_lpswey(struct kvm_vcpu *vcpu)
> -{
> -	psw_t new_psw;
> -	u64 addr;
> -	int rc;
> -	u8 ar;
> -
> -	vcpu->stat.instruction_lpswey++;
> +	if (lpswey)
> +		vcpu->stat.instruction_lpswey++;
> +	else
> +		vcpu->stat.instruction_lpswe++;
>  
> -	if (!test_kvm_facility(vcpu->kvm, 193))
> +	if (lpswey && !test_kvm_facility(vcpu->kvm, 193))
>  		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
>  
>  	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
>  		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
>  
> -	addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
> +	if (!lpswey)
> +		addr = kvm_s390_get_base_disp_s(vcpu, &ar);
> +	else
> +		addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
>  	if (addr & 7)
>  		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);

I'd prefer a helper function _do_lpswe_y_swap(struct kvm_vcpu *vcpu, gpa_t addr)

and then just

static int handle_lpswey(struct kvm_vcpu *vcpu)
{
        u64 addr;
        u8 ar;

        vcpu->stat.instruction_lpswey++;

        if (!test_kvm_facility(vcpu->kvm, 193))
                return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);

        addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
	return _do_lpswe_y_swap(vcpu, addr);
}

Makes it easier to read IMO because of the simpler control flow.
>  
> @@ -1034,7 +1014,7 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
>  	case 0xb1:
>  		return handle_stfl(vcpu);
>  	case 0xb2:
> -		return handle_lpswe(vcpu);
> +		return handle_lpswe_y(vcpu, false);
>  	default:
>  		return -EOPNOTSUPP;
>  	}
> @@ -1043,42 +1023,50 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
>  static int handle_epsw(struct kvm_vcpu *vcpu)
>  {
>  	int reg1, reg2;
> +	u64 *r1, *r2;
>  
>  	vcpu->stat.instruction_epsw++;
>  
>  	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
> +	r1 = vcpu->run->s.regs.gprs + reg1;
> +	r2 = vcpu->run->s.regs.gprs + reg2;
>  
>  	/* This basically extracts the mask half of the psw. */
> -	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
> -	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
> -	if (reg2) {
> -		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
> -		vcpu->run->s.regs.gprs[reg2] |=
> -			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
> -	}
> +	*r1 = u64_replace_bits(*r1, vcpu->arch.sie_block->gpsw.mask >> 32, 0xffffffff);
> +	if (reg2)
> +		*r2 = u64_replace_bits(*r2, vcpu->arch.sie_block->gpsw.mask, 0xffffffff);

LGTM although I don't hate the original implementation, which is very easy to understand
compared to u64_replace_bits whose implementation is anything but.
It would be nice to make gprs a union, which I think should be fine from a backwards
compatibility point of view. So:

struct kvm_sync_regs {
	__u64 prefix;	/* prefix register */
	union {
		__u64 gprs[16];	/* general purpose registers */
		struct { __u32 h; __u32 l} gprs32[16];
		struct { __u16 hh; __u16 hl; ...} gprs16[16];
		... 
...

But I don't expect you to do the refactor.
You could of course also contribute documentation to bitfield.h :)

>  	return 0;
>  }

[...]

>  static int handle_pfmf(struct kvm_vcpu *vcpu)
>  {

[...]

> -	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
> -		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
> -			vcpu->run->s.regs.gprs[reg2] = end;
> -		} else {
> -			vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
> -			end = kvm_s390_logical_to_effective(vcpu, end);
> -			vcpu->run->s.regs.gprs[reg2] |= end;
> -		}
> +	if (r1.fsc) {
> +		u64 *r2 = vcpu->run->s.regs.gprs + reg2;
> +
> +		end = kvm_s390_logical_to_effective(vcpu, end);
> +		if (kvm_s390_is_amode_64(vcpu))
> +			*r2 = u64_replace_bits(*r2, end, PAGE_MASK);
> +		else
> +			*r2 = u64_replace_bits(*r2, end, 0xfffff000);

Same issue as above regarding the shift.

>  	}
>  	return 0;
>  }
> @@ -1361,8 +1338,9 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
>  	reg = reg1;
>  	nr_regs = 0;
>  	do {
> -		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
> -		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
> +		u64 *cr = vcpu->arch.sie_block->gcr + reg;
> +
> +		*cr = u64_replace_bits(*cr, ctl_array[nr_regs++], 0xffffffff);
>  		if (reg == reg3)
>  			break;
>  		reg = (reg + 1) % 16;
> @@ -1489,7 +1467,7 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
>  	case 0x62:
>  		return handle_ri(vcpu);
>  	case 0x71:
> -		return handle_lpswey(vcpu);
> +		return handle_lpswe_y(vcpu, true);
>  	default:
>  		return -EOPNOTSUPP;
>  	}

-- 
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Wolfgang Wendt
Geschäftsführung: David Faller
Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294