lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 17 Oct 2022 17:19:18 +0800
From:   WANG Xuerui <kernel@...0n.name>
To:     Rui Wang <wangrui@...ngson.cn>,
        Huacai Chen <chenhuacai@...ngson.cn>
Cc:     Huacai Chen <chenhuacai@...nel.org>, loongarch@...ts.linux.dev,
        Xuefeng Li <lixuefeng@...ngson.cn>,
        Tiezhu Yang <yangtiezhu@...ngson.cn>,
        Guo Ren <guoren@...nel.org>,
        Jiaxun Yang <jiaxun.yang@...goat.com>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH V2] LoongArch: Add unaligned access support

On 2022/10/17 16:59, Rui Wang wrote:
>> +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
>> +{
>> +       bool user = user_mode(regs);
>> +       unsigned int res;
>> +       unsigned long origpc;
>> +       unsigned long origra;
>> +       unsigned long value = 0;
>> +       union loongarch_instruction insn;
>> +
>> +       origpc = (unsigned long)pc;
>> +       origra = regs->regs[1];
>> +
>> +       perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
>> +
>> +       /*
>> +        * This load never faults.
>> +        */
>> +       __get_inst(&insn.word, pc, user);
>> +       if (user && !access_ok(addr, 8))
>> +               goto sigbus;
>> +
>> +       if (insn.reg2i12_format.opcode == ldd_op ||
>> +               insn.reg2i14_format.opcode == ldptrd_op ||
>> +               insn.reg3_format.opcode == ldxd_op) {
>> +               res = unaligned_read(addr, &value, 8, 1);
>> +               if (res)
>> +                       goto fault;
>> +               regs->regs[insn.reg2i12_format.rd] = value;
>> +       } else if (insn.reg2i12_format.opcode == ldw_op ||
>> +               insn.reg2i14_format.opcode == ldptrw_op ||
>> +               insn.reg3_format.opcode == ldxw_op) {
>> +               res = unaligned_read(addr, &value, 4, 1);
>> +               if (res)
>> +                       goto fault;
>> +               regs->regs[insn.reg2i12_format.rd] = value;
>> +       } else if (insn.reg2i12_format.opcode == ldwu_op ||
>> +               insn.reg3_format.opcode == ldxwu_op) {
>> +               res = unaligned_read(addr, &value, 4, 0);
>> +               if (res)
>> +                       goto fault;
>> +               regs->regs[insn.reg2i12_format.rd] = value;
>> +       } else if (insn.reg2i12_format.opcode == ldh_op ||
>> +               insn.reg3_format.opcode == ldxh_op) {
>> +               res = unaligned_read(addr, &value, 2, 1);
>> +               if (res)
>> +                       goto fault;
>> +               regs->regs[insn.reg2i12_format.rd] = value;
>> +       } else if (insn.reg2i12_format.opcode == ldhu_op ||
>> +               insn.reg3_format.opcode == ldxhu_op) {
>> +               res = unaligned_read(addr, &value, 2, 0);
>> +               if (res)
>> +                       goto fault;
>> +               regs->regs[insn.reg2i12_format.rd] = value;
>> +       } else if (insn.reg2i12_format.opcode == std_op ||
>> +               insn.reg2i14_format.opcode == stptrd_op ||
>> +               insn.reg3_format.opcode == stxd_op) {
>> +               value = regs->regs[insn.reg2i12_format.rd];
>> +               res = unaligned_write(addr, value, 8);
>> +               if (res)
>> +                       goto fault;
>> +       } else if (insn.reg2i12_format.opcode == stw_op ||
>> +               insn.reg2i14_format.opcode == stptrw_op ||
>> +               insn.reg3_format.opcode == stxw_op) {
>> +               value = regs->regs[insn.reg2i12_format.rd];
>> +               res = unaligned_write(addr, value, 4);
>> +               if (res)
>> +                       goto fault;
>> +       } else if (insn.reg2i12_format.opcode == sth_op ||
>> +               insn.reg3_format.opcode == stxh_op) {
>> +               value = regs->regs[insn.reg2i12_format.rd];
>> +               res = unaligned_write(addr, value, 2);
>> +               if (res)
>> +                       goto fault;
>> +       } else if (insn.reg2i12_format.opcode == fldd_op ||
>> +               insn.reg3_format.opcode == fldxd_op) {
>> +               res = unaligned_read(addr, &value, 8, 1);
>> +               if (res)
>> +                       goto fault;
>> +               write_fpr(insn.reg2i12_format.rd, value);
>> +       } else if (insn.reg2i12_format.opcode == flds_op ||
>> +               insn.reg3_format.opcode == fldxs_op) {
>> +               res = unaligned_read(addr, &value, 4, 1);
>> +               if (res)
>> +                       goto fault;
>> +               write_fpr(insn.reg2i12_format.rd, value);
>> +       } else if (insn.reg2i12_format.opcode == fstd_op ||
>> +               insn.reg3_format.opcode == fstxd_op) {
>> +               value = read_fpr(insn.reg2i12_format.rd);
>> +               res = unaligned_write(addr, value, 8);
>> +               if (res)
>> +                       goto fault;
>> +       } else if (insn.reg2i12_format.opcode == fsts_op ||
>> +               insn.reg3_format.opcode == fstxs_op) {
>> +               value = read_fpr(insn.reg2i12_format.rd);
>> +               res = unaligned_write(addr, value, 4);
>> +               if (res)
>> +                       goto fault;
>> +       } else
>> +               goto sigbus;
> 
> So many condtional branches for linear instruction matching, use
> switch case is better?
> 
> 0000000000000238 <emulate_load_store_insn>:
> ...
>   35c:    1470180f     lu12i.w         $t3, 229568(0x380c0)
>   360:    580141af     beq             $t1, $t3, 320(0x140)    # 4a0
> <emulate_load_store_insn+0x268>
>   364:    1451000f     lu12i.w         $t3, 165888(0x28800)
>   368:    5801dd8f     beq             $t0, $t3, 476(0x1dc)    # 544
> [snip]

The code structure here is basically several switches intermingled with 
each other, each matching opcodes for one insn format, but sharing much 
code once matched. Quite difficult to express with C without some kind 
of code duplication.

But we can try:

{
	int size;
	bool is_read, is_signed;

	switch (insn.reg2i12_format.opcode) {
	case ldd_op:
		size = 8;
		is_read = true;
		is_signed = true;
		break;

	case std_op:
		size = 8;
		is_read = false;
		value = regs->regs[insn.reg2i12_format.rd];
		break;

	case ldw_op:
		size = 4;
		is_read = true;
		is_signed = true;
		break;

	case ldwu_op:
		size = 4;
		is_read = true;
		is_signed = false;
		break;

	/* other opcodes */

	default:
		/* not 2RI12 */
		break;
	}

	switch (insn.reg2i14_format.opcode) {
	case ldptrd_op:
		size = 8;
		is_read = true;
		is_signed = true;
		break;

	case ldptrw_op:
		size = 4;
		is_read = true;
		is_signed = true;
		break;

	// ...
	}

	// other formats

	if (!size) {
		/* no match */
		goto sigbus;
	}

	if (is_read)
		res = unaligned_read(addr, &value, size, is_signed);
	else
		res = unaligned_write(addr, value, size);

	if (res)
		goto fault;

	// ...
}

This way at least the data flow is clearer, and probably the codegen 
quality would benefit as well due to the clearer switch-case structures. 
(It's okay if this is not the case, it's not performance-critical anyway 
because if we ever hit this at all, performance would have already tanked.)

-- 
WANG "xen0n" Xuerui

Linux/LoongArch mailing list: https://lore.kernel.org/loongarch/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ