lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190523020757.mwbux72pqjbvpqkh@ast-mbp.dhcp.thefacebook.com>
Date:   Wed, 22 May 2019 19:07:59 -0700
From:   Alexei Starovoitov <alexei.starovoitov@...il.com>
To:     Jiong Wang <jiong.wang@...ronome.com>
Cc:     daniel@...earbox.net, bpf@...r.kernel.org, netdev@...r.kernel.org,
        oss-drivers@...ronome.com, davem@...emloft.net,
        paul.burton@...s.com, udknight@...il.com, zlim.lnx@...il.com,
        illusionist.neo@...il.com, naveen.n.rao@...ux.ibm.com,
        sandipan@...ux.ibm.com, schwidefsky@...ibm.com,
        heiko.carstens@...ibm.com, jakub.kicinski@...ronome.com
Subject: Re: [PATCH v7 bpf-next 01/16] bpf: verifier: mark verified-insn with
 sub-register zext flag

On Wed, May 22, 2019 at 07:54:57PM +0100, Jiong Wang wrote:
> eBPF ISA specification requires high 32-bit cleared when low 32-bit
> sub-register is written. This applies to destination register of ALU32 etc.
> JIT back-ends must guarantee this semantic when doing code-gen. x86_64 and
> AArch64 ISA has the same semantics, so the corresponding JIT back-end
> doesn't need to do extra work.
> 
> However, 32-bit arches (arm, x86, nfp etc.) and some other 64-bit arches
> (PowerPC, SPARC etc) need to do explicit zero extension to meet this
> requirement, otherwise code like the following will fail.
> 
>   u64_value = (u64) u32_value
>   ... other uses of u64_value
> 
> This is because compiler could exploit the semantic described above and
> save those zero extensions for extending u32_value to u64_value, these JIT
> back-ends are expected to guarantee this through inserting extra zero
> extensions which however could be a significant increase on the code size.
> Some benchmarks show there could be ~40% sub-register writes out of total
> insns, meaning at least ~40% extra code-gen.
> 
> One observation is these extra zero extensions are not always necessary.
> Take above code snippet for example, it is possible u32_value will never be
> casted into a u64, the value of high 32-bit of u32_value then could be
> ignored and extra zero extension could be eliminated.
> 
> This patch implements this idea, insns defining sub-registers will be
> marked when the high 32-bit of the defined sub-register matters. For
> those unmarked insns, it is safe to eliminate high 32-bit clearnace for
> them.
> 
> Algo:
>  - Split read flags into READ32 and READ64.
> 
>  - Record index of insn that does sub-register write. Keep the index inside
>    reg state and update it during verifier insn walking.
> 
>  - A full register read on a sub-register marks its definition insn as
>    needing zero extension on dst register.
> 
>    A new sub-register write overrides the old one.
> 
>  - When propagating read64 during path pruning, also mark any insn defining
>    a sub-register that is read in the pruned path as full-register.
> 
> Reviewed-by: Jakub Kicinski <jakub.kicinski@...ronome.com>
> Signed-off-by: Jiong Wang <jiong.wang@...ronome.com>
> ---
>  include/linux/bpf_verifier.h |  14 +++-
>  kernel/bpf/verifier.c        | 175 +++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 173 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 1305ccb..60fb54e 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -36,9 +36,11 @@
>   */
>  enum bpf_reg_liveness {
>  	REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
> -	REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
> -	REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
> -	REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */
> +	REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
> +	REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
> +	REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
> +	REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
> +	REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
>  };
>  
>  struct bpf_reg_state {
> @@ -131,6 +133,11 @@ struct bpf_reg_state {
>  	 * pointing to bpf_func_state.
>  	 */
>  	u32 frameno;
> +	/* Tracks subreg definition. The stored value is the insn_idx of the
> +	 * writing insn. This is safe because subreg_def is used before any insn
> +	 * patching which only happens after main verification finished.
> +	 */
> +	s32 subreg_def;
>  	enum bpf_reg_liveness live;
>  };
>  
> @@ -232,6 +239,7 @@ struct bpf_insn_aux_data {
>  	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
>  	int sanitize_stack_off; /* stack slot to be cleared */
>  	bool seen; /* this insn was processed by the verifier */
> +	bool zext_dst; /* this insn zero extends dst reg */
>  	u8 alu_state; /* used in combination with alu_limit */
>  	unsigned int orig_idx; /* original instruction index */
>  };
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 95f93544..0efccf8 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -981,6 +981,7 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
>  	__mark_reg_not_init(regs + regno);
>  }
>  
> +#define DEF_NOT_SUBREG	(-1)
>  static void init_reg_state(struct bpf_verifier_env *env,
>  			   struct bpf_func_state *state)
>  {
> @@ -991,6 +992,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
>  		mark_reg_not_init(env, regs, i);
>  		regs[i].live = REG_LIVE_NONE;
>  		regs[i].parent = NULL;
> +		regs[i].subreg_def = DEF_NOT_SUBREG;

shouldn't it be moved into __mark_reg_unknown ?
Also what about my old suggestion to use DEF_NOT_SUBREG==0
to be on safer side when we zero things out?

>  	}
>  
>  	/* frame pointer */
> @@ -1136,7 +1138,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
>   */
>  static int mark_reg_read(struct bpf_verifier_env *env,
>  			 const struct bpf_reg_state *state,
> -			 struct bpf_reg_state *parent)
> +			 struct bpf_reg_state *parent, u8 flag)
>  {
>  	bool writes = parent == state->parent; /* Observe write marks */
>  	int cnt = 0;
> @@ -1151,17 +1153,26 @@ static int mark_reg_read(struct bpf_verifier_env *env,
>  				parent->var_off.value, parent->off);
>  			return -EFAULT;
>  		}
> -		if (parent->live & REG_LIVE_READ)
> +		/* The first condition is more likely to be true than the
> +		 * second, checked it first.
> +		 */
> +		if ((parent->live & REG_LIVE_READ) == flag ||
> +		    parent->live & REG_LIVE_READ64)
>  			/* The parentage chain never changes and
>  			 * this parent was already marked as LIVE_READ.
>  			 * There is no need to keep walking the chain again and
>  			 * keep re-marking all parents as LIVE_READ.
>  			 * This case happens when the same register is read
>  			 * multiple times without writes into it in-between.
> +			 * Also, if parent has the stronger REG_LIVE_READ64 set,
> +			 * then no need to set the weak REG_LIVE_READ32.
>  			 */
>  			break;
>  		/* ... then we depend on parent's value */
> -		parent->live |= REG_LIVE_READ;
> +		parent->live |= flag;
> +		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
> +		if (flag == REG_LIVE_READ64)
> +			parent->live &= ~REG_LIVE_READ32;
>  		state = parent;
>  		parent = state->parent;
>  		writes = true;
> @@ -1173,12 +1184,111 @@ static int mark_reg_read(struct bpf_verifier_env *env,
>  	return 0;
>  }
>  
> +/* This function is supposed to be used by the following 32-bit optimization
> + * code only. It returns TRUE if the source or destination register operates
> + * on 64-bit, otherwise return FALSE.
> + */
> +static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
> +		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
> +{
> +	u8 code, class, op;
> +
> +	code = insn->code;
> +	class = BPF_CLASS(code);
> +	op = BPF_OP(code);
> +	if (class == BPF_JMP) {
> +		/* BPF_EXIT for "main" will reach here. Return TRUE
> +		 * conservatively.
> +		 */
> +		if (op == BPF_EXIT)
> +			return true;
> +		if (op == BPF_CALL) {
> +			/* BPF to BPF call will reach here because of marking
> +			 * caller saved clobber with DST_OP_NO_MARK for which we
> +			 * don't care the register def because they are anyway
> +			 * marked as NOT_INIT already.
> +			 */
> +			if (insn->src_reg == BPF_PSEUDO_CALL)
> +				return false;
> +			/* Helper call will reach here because of arg type
> +			 * check, conservatively return TRUE.
> +			 */
> +			if (t == SRC_OP)
> +				return true;
> +
> +			return false;
> +		}
> +	}
> +
> +	if (class == BPF_ALU64 || class == BPF_JMP ||
> +	    /* BPF_END always use BPF_ALU class. */
> +	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
> +		return true;
> +
> +	if (class == BPF_ALU || class == BPF_JMP32)
> +		return false;
> +
> +	if (class == BPF_LDX) {
> +		if (t != SRC_OP)
> +			return BPF_SIZE(code) == BPF_DW;
> +		/* LDX source must be ptr. */
> +		return true;
> +	}
> +
> +	if (class == BPF_STX) {
> +		if (reg->type != SCALAR_VALUE)
> +			return true;
> +		return BPF_SIZE(code) == BPF_DW;
> +	}
> +
> +	if (class == BPF_LD) {
> +		u8 mode = BPF_MODE(code);
> +
> +		/* LD_IMM64 */
> +		if (mode == BPF_IMM)
> +			return true;
> +
> +		/* Both LD_IND and LD_ABS return 32-bit data. */
> +		if (t != SRC_OP)
> +			return  false;
> +
> +		/* Implicit ctx ptr. */
> +		if (regno == BPF_REG_6)
> +			return true;

compiler will optimize above 'if' away.
What's the point of the above stmt?

> +
> +		/* Explicit source could be any width. */
> +		return true;
> +	}
> +
> +	if (class == BPF_ST)
> +		/* The only source register for BPF_ST is a ptr. */
> +		return true;

ditto

> +
> +	/* Conservatively return true at default. */
> +	return true;
> +}
> +
> +static void mark_insn_zext(struct bpf_verifier_env *env,
> +			   struct bpf_reg_state *reg)
> +{
> +	s32 def_idx = reg->subreg_def;
> +
> +	if (def_idx == DEF_NOT_SUBREG)
> +		return;
> +
> +	env->insn_aux_data[def_idx].zext_dst = true;
> +	/* The dst will be zero extended, so won't be sub-register anymore. */
> +	reg->subreg_def = DEF_NOT_SUBREG;
> +}
> +
>  static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
>  			 enum reg_arg_type t)
>  {
>  	struct bpf_verifier_state *vstate = env->cur_state;
>  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
> +	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
>  	struct bpf_reg_state *reg, *regs = state->regs;
> +	bool rw64;
>  
>  	if (regno >= MAX_BPF_REG) {
>  		verbose(env, "R%d is invalid\n", regno);
> @@ -1186,6 +1296,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
>  	}
>  
>  	reg = &regs[regno];
> +	rw64 = is_reg64(env, insn, regno, reg, t);
>  	if (t == SRC_OP) {
>  		/* check whether register used as source operand can be read */
>  		if (reg->type == NOT_INIT) {
> @@ -1196,7 +1307,11 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
>  		if (regno == BPF_REG_FP)
>  			return 0;
>  
> -		return mark_reg_read(env, reg, reg->parent);
> +		if (rw64)
> +			mark_insn_zext(env, reg);
> +
> +		return mark_reg_read(env, reg, reg->parent,
> +				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
>  	} else {
>  		/* check whether register used as dest operand can be written to */
>  		if (regno == BPF_REG_FP) {
> @@ -1204,6 +1319,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
>  			return -EACCES;
>  		}
>  		reg->live |= REG_LIVE_WRITTEN;
> +		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx;
>  		if (t == DST_OP)
>  			mark_reg_unknown(env, regs, regno);
>  	}
> @@ -1383,7 +1499,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
>  			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
>  		}
>  		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
> -			      reg_state->stack[spi].spilled_ptr.parent);
> +			      reg_state->stack[spi].spilled_ptr.parent,
> +			      REG_LIVE_READ64);
>  		return 0;
>  	} else {
>  		int zeros = 0;
> @@ -1400,7 +1517,9 @@ static int check_stack_read(struct bpf_verifier_env *env,
>  			return -EACCES;
>  		}
>  		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
> -			      reg_state->stack[spi].spilled_ptr.parent);
> +			      reg_state->stack[spi].spilled_ptr.parent,
> +			      size == BPF_REG_SIZE
> +			      ? REG_LIVE_READ64 : REG_LIVE_READ32);

I don't think that's correct.
It's a parial read from a stack slot that doesn't contain a pointer.
It can be of any size and any offset.
Hence 4-byte read can read upper bits too.

>  		if (value_regno >= 0) {
>  			if (zeros == size) {
>  				/* any size read into register is zero extended,
> @@ -2109,6 +2228,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
>  						    value_regno);
>  				if (reg_type_may_be_null(reg_type))
>  					regs[value_regno].id = ++env->id_gen;
> +				/* A load of ctx field could have different
> +				 * actual load size with the one encoded in the
> +				 * insn. When the dst is PTR, it is for sure not
> +				 * a sub-register.
> +				 */
> +				regs[value_regno].subreg_def = DEF_NOT_SUBREG;

because of cases like above I think that DEF_NOT_SUBREG==0 is a safer choice.
To make any mark_reg_* to set it DEF_NOT_SUBREG and only two places
would have to do dst_reg->subreg_def = env->insn_idx + 1;

>  			}
>  			regs[value_regno].type = reg_type;
>  		}
> @@ -2368,7 +2493,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
>  		 * the whole slot to be marked as 'read'
>  		 */
>  		mark_reg_read(env, &state->stack[spi].spilled_ptr,
> -			      state->stack[spi].spilled_ptr.parent);
> +			      state->stack[spi].spilled_ptr.parent,
> +			      access_size == BPF_REG_SIZE
> +			      ? REG_LIVE_READ64 : REG_LIVE_READ32);
>  	}
>  	return update_stack_depth(env, state, min_off);
>  }
> @@ -3332,6 +3459,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>  		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
>  	}
>  
> +	/* helper call returns 64-bit value. */
> +	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
> +
>  	/* update return register (already marked as written above) */
>  	if (fn->ret_type == RET_INTEGER) {
>  		/* sets type to SCALAR_VALUE */
> @@ -4263,6 +4393,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
>  				 */
>  				*dst_reg = *src_reg;
>  				dst_reg->live |= REG_LIVE_WRITTEN;
> +				dst_reg->subreg_def = DEF_NOT_SUBREG;
>  			} else {
>  				/* R1 = (u32) R2 */
>  				if (is_pointer_value(env, insn->src_reg)) {
> @@ -4273,6 +4404,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
>  				} else if (src_reg->type == SCALAR_VALUE) {
>  					*dst_reg = *src_reg;
>  					dst_reg->live |= REG_LIVE_WRITTEN;
> +					dst_reg->subreg_def = env->insn_idx;
>  				} else {
>  					mark_reg_unknown(env, regs,
>  							 insn->dst_reg);
> @@ -5352,6 +5484,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
>  	 * Already marked as written above.
>  	 */
>  	mark_reg_unknown(env, regs, BPF_REG_0);
> +	/* ld_abs load up to 32-bit skb data. */
> +	regs[BPF_REG_0].subreg_def = env->insn_idx;
>  	return 0;
>  }
>  
> @@ -6292,20 +6426,33 @@ static bool states_equal(struct bpf_verifier_env *env,
>  	return true;
>  }
>  
> +/* Return 0 if no propagation happened. Return negative error code if error
> + * happened. Otherwise, return the propagated bit.
> + */
>  static int propagate_liveness_reg(struct bpf_verifier_env *env,
>  				  struct bpf_reg_state *reg,
>  				  struct bpf_reg_state *parent_reg)
>  {
> +	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
> +	u8 flag = reg->live & REG_LIVE_READ;
>  	int err;
>  
> -	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
> +	/* When comes here, read flags of PARENT_REG or REG could be any of
> +	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
> +	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
> +	 */
> +	if (parent_flag == REG_LIVE_READ64 ||
> +	    /* Or if there is no read flag from REG. */
> +	    !flag ||
> +	    /* Or if the read flag from REG is the same as PARENT_REG. */
> +	    parent_flag == flag)
>  		return 0;
>  
> -	err = mark_reg_read(env, reg, parent_reg);
> +	err = mark_reg_read(env, reg, parent_reg, flag);
>  	if (err)
>  		return err;
>  
> -	return 0;
> +	return flag;
>  }
>  
>  /* A write screens off any subsequent reads; but write marks come from the
> @@ -6339,8 +6486,10 @@ static int propagate_liveness(struct bpf_verifier_env *env,
>  		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
>  			err = propagate_liveness_reg(env, &state_reg[i],
>  						     &parent_reg[i]);
> -			if (err)
> +			if (err < 0)
>  				return err;
> +			if (err == REG_LIVE_READ64)
> +				mark_insn_zext(env, &parent_reg[i]);
>  		}
>  
>  		/* Propagate stack slots. */
> @@ -6350,11 +6499,11 @@ static int propagate_liveness(struct bpf_verifier_env *env,
>  			state_reg = &state->stack[i].spilled_ptr;
>  			err = propagate_liveness_reg(env, state_reg,
>  						     parent_reg);
> -			if (err)
> +			if (err < 0)
>  				return err;
>  		}
>  	}
> -	return err;
> +	return 0;
>  }
>  
>  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
> -- 
> 2.7.4
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ